Skip to content

Instantly share code, notes, and snippets.

@kaminomisosiru
Last active May 31, 2017 02:59
Show Gist options
  • Save kaminomisosiru/1d4e47f021d649864008ed25d934affb to your computer and use it in GitHub Desktop.
Save kaminomisosiru/1d4e47f021d649864008ed25d934affb to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
'''
英文ファイルを読み込んで頻度分析を行うサンプル
'''
from collections import OrderedDict
def frequency_analysis(text):
'''
与えられたtextに対して頻度分析を行う。
結果はアルファベット順に各文字の頻度が標準出力に出力される。
'''
occurences = {}
counter = 0
for i in range(len(text)):
chara = text[i]
is_alphabet = True if ord('a') <= ord(chara) and ord(chara) <= ord('z') else False
if is_alphabet and chara not in occurences:
counter += 1
occurences[chara] = 1
elif is_alphabet:
counter += 1
occurences[chara] += 1
# アルファベット順にソート
occurences = OrderedDict(sorted(occurences.items(), key=lambda x: x[0]))
for c, count in occurences.items():
print(c + ':' +str(count / counter * 100))
if __name__ == '__main__':
f = open('freq_sample') #分析するファイルを読み込む
text = f.read()
f.close()
frequency_analysis(text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment