Last active
October 23, 2018 15:38
-
-
Save lawlite19/cd6dd13c15f8aa96238ac87af3ae38d2 to your computer and use it in GitHub Desktop.
画学习曲线的方法
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from matplotlib import pyplot as plt | |
from matplotlib.font_manager import FontProperties | |
font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=14) # 解决windows环境下画图汉字乱码问题 | |
from sklearn.learning_curve import learning_curve | |
from sklearn.metrics import make_scorer, mean_absolute_error # 评分函数 | |
''' | |
- 用sklearn的learning_curve得到training_score和cv_score,使用matplotlib画出learning curve | |
- 画出data在某模型上的learning curve. | |
参数解释 | |
---------- | |
@estimator : 你用的分类器。 | |
@title : 表格的标题。 | |
@X : 输入的feature,numpy类型 | |
@y : 输入的target vector | |
@ylim : tuple格式的(ymin, ymax), 设定图像中纵坐标的最低点和最高点 | |
@cv : 做cross-validation的时候,数据分成的份数,其中一份作为cv集,其余n-1份作为training(默认None为3份) | |
@scoring : 评分函数,可以通过sklearn.metrics中的评分函数指定 | |
@n_jobs : 并行的的任务数(默认1) | |
''' | |
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, scoring=None, n_jobs=1, | |
train_sizes=np.linspace(.05, 1., 20), verbose=0, plot=True): | |
train_sizes, train_scores, test_scores = learning_curve( | |
estimator, X, y, cv=cv, scoring=scoring, n_jobs=n_jobs, train_sizes=train_sizes, verbose=verbose) | |
''' | |
返回值解释 | |
------------ | |
train_sizes:指定的train_sizes大小的数组,shape为(n_ticks, ). | |
train_scores:训练集上的得分,shape为(n_ticks, n_cv_folds),n_cv_folds为cv的份数,默认为3 | |
test_scores: 测试集上的得分 | |
''' | |
train_scores_mean = np.mean(train_scores, axis=1) | |
train_scores_std = np.std(train_scores, axis=1) | |
test_scores_mean = np.mean(test_scores, axis=1) | |
test_scores_std = np.std(test_scores, axis=1) | |
if plot: | |
#plt.figure() | |
plt.title(title,fontproperties=font) | |
if ylim is not None: | |
plt.ylim(*ylim) | |
plt.xlabel(u"训练样本数",fontproperties=font) | |
plt.ylabel(u"得分",fontproperties=font) | |
plt.gca().invert_yaxis() | |
plt.grid() | |
plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, | |
alpha=0.1, color="b") | |
plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, | |
alpha=0.1, color="r") | |
plt.plot(train_sizes, train_scores_mean, 'o-', color="b", label=r"score of training set") | |
plt.plot(train_sizes, test_scores_mean, 'o-', color="r", label=r'score of cv set') | |
plt.legend(loc="best") | |
#plt.ion() | |
plt.draw() | |
plt.show() | |
plt.gca().invert_yaxis() | |
#time.sleep(1) | |
midpoint = ((train_scores_mean[-1] + train_scores_std[-1]) + (test_scores_mean[-1] - test_scores_std[-1])) / 2 | |
diff = (train_scores_mean[-1] + train_scores_std[-1]) - (test_scores_mean[-1] - test_scores_std[-1]) | |
return midpoint, diff | |
'''使用''' | |
#mean_absolute_error_score = make_scorer(mean_absolute_error) | |
#plot_learning_curve(model, u"学习曲线", X_train, Y_train[:,i],scoring=mean_absolute_error_score) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
谢楼主,解释的很详细