Skip to content

Instantly share code, notes, and snippets.

@skyzh
Last active August 9, 2018 03:25
Show Gist options
  • Save skyzh/1b2ce3c739724d12b6ba714067f61263 to your computer and use it in GitHub Desktop.
Save skyzh/1b2ce3c739724d12b6ba714067f61263 to your computer and use it in GitHub Desktop.
2017 上海市综合评价招生参考数据

数据来源: 阳光高考

上海交通大学 2017 年综合评价物理专业组 已录取考生高考成绩与面试分数的关系

screen shot 2018-06-23 at 3 32 33 pm

上海交通大学 2017 年综合评价物理专业组 已录取考生面试成绩分布

screen shot 2018-06-23 at 3 33 21 pm

上海交通大学 2017 年综合评价所有专业组 已录取考生高考成绩与面试分数的关系

screen shot 2018-06-23 at 3 34 13 pm

上海交通大学 2017 年综合评价所有专业组 已录取考生面试成绩分布

screen shot 2018-06-23 at 3 34 17 pm

复旦大学 2017 年综合评价所有专业组 已录取考生高考成绩与面试分数的关系

screen shot 2018-06-23 at 3 34 35 pm

复旦大学 2017 年综合评价所有专业组 已录取考生面试成绩分布

screen shot 2018-06-23 at 3 34 38 pm

复旦大学 2017 年综合评价物理专业组 已录取考生高考成绩与面试分数的关系

screen shot 2018-06-23 at 3 35 20 pm

复旦大学 2017 年综合评价物理专业组 已录取考生面试成绩分布

screen shot 2018-06-23 at 3 35 23 pm

复旦大学 2017 年综合评价物理专业组 已录取考生综合评价成绩和高考成绩的关系

screen shot 2018-06-23 at 3 41 47 pm

上海交通大学 2017 年综合评价物理专业组 已录取考生综合评价成绩和高考成绩的关系

screen shot 2018-06-23 at 3 42 24 pm

上海交通大学 2017 年综合评价所有专业组 同高考成绩下考生最终录取率

image

复旦大学 2017 年综合评价所有专业组 同高考成绩下考生最终录取率

image

const fs = require('fs')
const _ = require('lodash')
const __process_file = '2017fdu.admitted'
fs.readFile(`${__dirname}/${__process_file}.html`, 'utf-8', (err, data) => {
const _reg = /<td.*?>(.*?)<\/td>/g
let __data = []
let match = _reg.exec(data);
while (match != null) {
__data.push(match[1])
match = _reg.exec(data)
}
__data.splice(13, 1) //sjtuadmitted 13, otherwise 10
let __csv = _.chain(__data)
.chunk(13) //sjtuadmitted 13, otherwise 10
.map(
row => _.reduce(row, (a, b) => `${a},${b}`)
)
.reduce((a, b) => `${a}\n${b}`)
.value()
fs.writeFile(`${__dirname}/${__process_file}.csv`, __csv, err => console.log('succeed!'))
})
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from collections import Counter
import math
FILE_PRE='2018sjtu'
ALL_STUDENTS=pd.read_csv(FILE_PRE + '.admitted.csv', ',')
QULIFIED_STUDENTS=pd.read_csv(FILE_PRE + '.qualified.csv', ',')
ALL_STUDENTS['A'] = ALL_STUDENTS['test_score']
# GAOKAO / 660 * 0.6 + INTERVIEW / 100 * 0.3 + 0.1 = ALL / 660
# => INTERVIEW = ((ALL / 660) - GAOKAO / 660 * 0.6 - 0.1) / 0.3 * 100
# GAOKAO / 660 * 0.6 + INTERVIEW / 100 * 0.3 + 0.1 = ALL / 1000
# => INTERVIEW = ((ALL / 1000) - GAOKAO / 660 * 0.6 - 0.1) / 0.3 * 100
ALL = ALL_STUDENTS['comprehensive_score']
GAOKAO = ALL_STUDENTS['test_score']
ALL_STUDENTS['interview_score'] = ((ALL / 660) - GAOKAO / 660 * 0.6 - 0.1) / 0.3 * 100 # SJTU
# ALL_STUDENTS['interview_score'] = ((ALL / 1000) - GAOKAO / 660 * 0.6 - 0.1) / 0.3 * 100 # FDU
def draw_scatter(students, x_label, y_label, ax):
x=students[x_label]
y=students[y_label]
c = Counter(zip(x,y))
s = [15*c[(xx,yy)] for xx,yy in zip(x,y)]
students.plot(kind='scatter', x=x_label, y=y_label, s=s, ax=ax)
def draw_bar(students, label, ax):
series = pd.Series(np.bincount(students[label].astype(np.int64)))
series = series[series > 0]
yint = range(min(series), math.ceil(max(series))+1)
plt.yticks(yint)
series.plot.bar(color='#1f77b4', ax=ax)
def new_plot():
return plt.subplots(figsize=(12,9))
def draw(students, title, identifier):
fig, ax = new_plot()
draw_scatter(students, 'test_score', 'comprehensive_score', ax=ax)
plt.xlabel('Gaokao Score')
plt.ylabel('Comprehensive Score')
plt.title(title)
fig.savefig(identifier + 'test_comprehensive_scatter.png')
fig, ax = new_plot()
draw_scatter(students, 'test_score', 'interview_score', ax=ax)
plt.ylabel('Interview Score')
plt.xlabel('Comprehensive Score')
plt.title(title)
fig.savefig(identifier + 'test_interview_scatter.png')
fig, ax = new_plot()
draw_bar(students, 'interview_score', ax=ax)
plt.xlabel('Interview Score')
plt.ylabel('Count')
plt.title(title)
fig.savefig(identifier + 'interview_hist.png')
fig, ax = new_plot()
draw_bar(students, 'comprehensive_score', ax=ax)
plt.xlabel('Comprehensive Score')
plt.ylabel('Count')
plt.title(title)
fig.savefig(identifier + 'comprehensive_hist.png')
draw(ALL_STUDENTS[ALL_STUDENTS['test_subject'] == '校本部物理专业组'], 'SJTU Physics Major Group', 'physics_')
draw(ALL_STUDENTS, 'SJTU All Major Group', 'all_')
draw(ALL_STUDENTS[ALL_STUDENTS['major'] == '工科试验班类'], 'SJTU Physics Major Group', 'gk_')
series = pd.Series(np.bincount(ALL_STUDENTS['test_score'], minlength=700) / np.bincount(QULIFIED_STUDENTS['test_score'], minlength=700))
series = series[series > 0]
fig, ax = new_plot()
series.plot.bar(color='#1f77b4', ax=ax)
plt.xlabel('Gaokao Score')
plt.ylabel('Percent')
plt.title('SJTU Comprehensive Examination')
fig.savefig('all_hist.png')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment