实例9:基本统计值计算
代码:
def getNum(): #获取用户不定长度的输入
nums = []
iNumStr = input("请输入数字(回车退出): ")
while iNumStr != "": nums.append(eval(iNumStr))
iNumStr = input("请输入数字(回车退出): ")
return nums
def mean(numbers): #计算平均值 s = 0.0
for num in numbers:
s = s + num
return s / len(numbers)
实例10:文本词频统计
英文词频统计
程序代码:
#CalHamletV1.py
defgetText():
txt= open("hamlet.txt", "r").read()
txt= txt.lower()
forch in '!"#$%&()*+,-./:;<=>?@[\\]^_‘{|}~': txt = txt.replace(ch, " ")
returntxt
hamletTxt= getText()
words = hamletTxt.split()
counts= {}
forword in words:
counts[word]= counts.get(word,0) + 1 items =list(counts.items())
items.sort(key=lambdax:x[1], reverse=True)
fori in range(10):
word,count = items[i]
print("{0:<10}{1:>5}".format(word,count))
程序代码分析(略)
中文词频统计
程序代码:
#CalThreeKingdomsV1.py
importjieba
txt= open("threekingdoms.txt", "r",encoding="utf-8").read() words= jieba.lcut(txt)
counts= {}
forword in words:
iflen(word) == 1:
continue
else:
counts[word]= counts.get(word,0) + 1
items= list(counts.items())
items.sort(key=lambdax:x[1], reverse=True)
fori in range(15):
word,count = items[i] print("{0:<10}{1:>5}".format(word, count))
出场人物统计:
#CalThreeKingdomsV2.py
importjieba
txt= open("threekingdoms.txt", "r",encoding="utf-8").read() excludes = {"将军","却说","荆州","二人","不可","不能","如此"} words = jieba.lcut(txt)
counts= {}
forword in words:
iflen(word) == 1:
continue
elifword == "诸葛亮" or word == "孔明曰": rword = "孔明"
elifword == "关公" or word == "云长": rword = "关羽"
elifword == "玄德" or word == "玄德曰": rword = "刘备"
elifword == "孟德" or word == "丞相":
rword= "曹操"
else:
rword= word
counts[rword]= counts.get(rword,0) + 1
forword in excludes:
delcounts[word]
items= list(counts.items()) items.sort(key=lambda x:x[1], reverse=True)
fori in range(10):
word,count = items[i]
print("{0:<10}{1:>5}".format(word,count))

