from collections import Counter
from glob import iglob
import re
import os
def remove_garbage(text):
"""Replace non-word (non-alphanumeric) chars in text with spaces,
then convert and return a lowercase version of the result.
"""
text = re.sub(r'\W+', ' ', text)
text = text.lower()
return text
topwords = 50000
folderpath = 'd:/jktextall/'
counter = Counter()
for filepath in iglob(os.path.join(folderpath, '*.txt')):
with open(filepath) as file:
counter.update(remove_garbage(file.read()).split())
file1 = open("jkwords1.txt","w")
for word, count in counter.most_common(topwords):
file1.write (str(count) +"," + word + "\n")
file1.close
我修改了上面的代码,将输出写入文本文件jkwords1.txt
。没有任何内容正在写入文本文件。但是,print (word,count)
的确在python控制台中产生输出。Python循环的输出不会去文本文件
但是,如果使用print(count,word)
,则不使用file1.write
,而是使用屏幕输出。
使用'开放的(...),因为文件1:...',以确保一切都被写入磁盘正常。 –