Skip to content

Instantly share code, notes, and snippets.

@wtsnjp
Created July 25, 2017 05:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wtsnjp/c86071ffd855d16683a181fb4b073ff5 to your computer and use it in GitHub Desktop.
Save wtsnjp/c86071ffd855d16683a181fb4b073ff5 to your computer and use it in GitHub Desktop.
#
# usage: python ccs.py {file path}
#
import sys
import re
import collections
from matplotlib import pyplot as plt
def get_cs(fn):
r = re.compile(r'\\([a-zA-Z@]+|[^a-zA-Z])')
return [e for l in [re.findall(r, l) for l in open(fn)] for e in l]
def frequency_ranking(ls):
cd = collections.Counter(ls)
return [[k, v] for k,v in cd.most_common()]
if __name__ == '__main__':
fn = sys.argv[1]
cls = get_cs(fn)
ls = frequency_ranking(cls)
plt.plot([v[1]/len(ls) for v in ls])
plt.xscale('log')
plt.yscale('log')
plt.xlabel('順位')
plt.ylabel('出現頻度')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment