Last active
May 31, 2017 02:59
-
-
Save kaminomisosiru/1d4e47f021d649864008ed25d934affb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
''' | |
英文ファイルを読み込んで頻度分析を行うサンプル | |
''' | |
from collections import OrderedDict | |
def frequency_analysis(text): | |
''' | |
与えられたtextに対して頻度分析を行う。 | |
結果はアルファベット順に各文字の頻度が標準出力に出力される。 | |
''' | |
occurences = {} | |
counter = 0 | |
for i in range(len(text)): | |
chara = text[i] | |
is_alphabet = True if ord('a') <= ord(chara) and ord(chara) <= ord('z') else False | |
if is_alphabet and chara not in occurences: | |
counter += 1 | |
occurences[chara] = 1 | |
elif is_alphabet: | |
counter += 1 | |
occurences[chara] += 1 | |
# アルファベット順にソート | |
occurences = OrderedDict(sorted(occurences.items(), key=lambda x: x[0])) | |
for c, count in occurences.items(): | |
print(c + ':' +str(count / counter * 100)) | |
if __name__ == '__main__': | |
f = open('freq_sample') #分析するファイルを読み込む | |
text = f.read() | |
f.close() | |
frequency_analysis(text) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment