Created
September 12, 2014 15:13
-
-
Save tosh1ki/193f77bf11ea0873b007 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import BeautifulSoup as bs | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import pdb | |
if __name__ == '__main__': | |
gakubu_list = [ | |
u'文学部',u'教育学部',u'法学部',u'理学部',u'医学部', | |
u'歯学部',u'薬学部',u'工学部',u'農学部',u'獣医学部', | |
u'水産学部'] | |
column_list = [u'秀(%)', u'優(%)', u'良(%)', u'可(%)', u'不可(%)', u'GPA'] | |
gakubu_dict = {} | |
## 学部ごとにファイルを読み込んでgakubu_dictに放り込んでいく. | |
for gakubu in gakubu_list: | |
filename = './txt/' + gakubu + '-u.txt' | |
htmltext = open(filename).read() | |
print filename | |
soup = bs.BeautifulStoneSoup( | |
htmltext, | |
convertEntities=bs.BeautifulStoneSoup.HTML_ENTITIES) | |
table = soup.find('table',attrs={'id':'rdlGrid_gridList'}) | |
data = [] | |
for i,tr in enumerate(table.findAll('tr')[0:-1]): | |
rowlist = [] | |
for j,td in enumerate(tr.findAll('td')): | |
## 0列目と0~4行目は数字でない | |
if j <= 4 or i == 0: | |
rowlist.append( td.find(text=True) ) | |
elif j > 4: | |
rowlist.append( float(td.find(text=True)) ) | |
data.append(rowlist) | |
gakubu_dict[gakubu] = pd.DataFrame(data[1:-1],columns=data[0]) | |
## column_list ごとにプロットしていく | |
for hyougo in column_list: | |
result_list = [] | |
for key, item in gakubu_dict.iteritems(): | |
result_list.append(item[hyougo]) | |
## x軸の各箱ひげ図のラベルをつけるためによくわからない方法を使う | |
ax = plt.subplot(111) | |
ax.violinplot(result_list, | |
showmeans=False, | |
showmedians=True) | |
ax.set_xticks(range(1,12)) ## バッドノウハウっぽい | |
ax.set_xticklabels(gakubu_list,rotation=45) | |
if hyougo == u'GPA': | |
ax.set_ylim(-0.1,4.1) | |
else: | |
ax.set_ylim(-10,110) | |
ax.set_title(hyougo) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment