Skip to content

Instantly share code, notes, and snippets.

@tosh1ki
Created September 12, 2014 15:13
Show Gist options
  • Save tosh1ki/193f77bf11ea0873b007 to your computer and use it in GitHub Desktop.
Save tosh1ki/193f77bf11ea0873b007 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import BeautifulSoup as bs
import pandas as pd
import matplotlib.pyplot as plt
import pdb
if __name__ == '__main__':
gakubu_list = [
u'文学部',u'教育学部',u'法学部',u'理学部',u'医学部',
u'歯学部',u'薬学部',u'工学部',u'農学部',u'獣医学部',
u'水産学部']
column_list = [u'秀(%)', u'優(%)', u'良(%)', u'可(%)', u'不可(%)', u'GPA']
gakubu_dict = {}
## 学部ごとにファイルを読み込んでgakubu_dictに放り込んでいく.
for gakubu in gakubu_list:
filename = './txt/' + gakubu + '-u.txt'
htmltext = open(filename).read()
print filename
soup = bs.BeautifulStoneSoup(
htmltext,
convertEntities=bs.BeautifulStoneSoup.HTML_ENTITIES)
table = soup.find('table',attrs={'id':'rdlGrid_gridList'})
data = []
for i,tr in enumerate(table.findAll('tr')[0:-1]):
rowlist = []
for j,td in enumerate(tr.findAll('td')):
## 0列目と0~4行目は数字でない
if j <= 4 or i == 0:
rowlist.append( td.find(text=True) )
elif j > 4:
rowlist.append( float(td.find(text=True)) )
data.append(rowlist)
gakubu_dict[gakubu] = pd.DataFrame(data[1:-1],columns=data[0])
## column_list ごとにプロットしていく
for hyougo in column_list:
result_list = []
for key, item in gakubu_dict.iteritems():
result_list.append(item[hyougo])
## x軸の各箱ひげ図のラベルをつけるためによくわからない方法を使う
ax = plt.subplot(111)
ax.violinplot(result_list,
showmeans=False,
showmedians=True)
ax.set_xticks(range(1,12)) ## バッドノウハウっぽい
ax.set_xticklabels(gakubu_list,rotation=45)
if hyougo == u'GPA':
ax.set_ylim(-0.1,4.1)
else:
ax.set_ylim(-10,110)
ax.set_title(hyougo)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment