Skip to content

Instantly share code, notes, and snippets.

@TomHortons
Last active July 19, 2017 02:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TomHortons/d7e9dc9382f6fcd4763163e1a7db9f99 to your computer and use it in GitHub Desktop.
Save TomHortons/d7e9dc9382f6fcd4763163e1a7db9f99 to your computer and use it in GitHub Desktop.
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import MultiLabelBinarizer, MinMaxScaler
from sklearn.metrics import fbeta_score, precision_score, make_scorer, average_precision_score
import cv2
import warnings
n_samples = 5000
rescaled_dim = 20
df = pd.read_csv('../input/train_v2.csv')
df['split_tags'] = df['tags'].map(lambda row: row.split(" "))
lb = MultiLabelBinarizer()
y = lb.fit_transform(df['split_tags'])
y = y[:n_samples]
X = np.squeeze(np.array([cv2.resize(plt.imread('../input/train-jpg/{}.jpg'.format(name)), (rescaled_dim, rescaled_dim), cv2.INTER_LINEAR).reshape(1, -1) for name in df.head(n_samples)['image_name'].values]))
X = MinMaxScaler().fit_transform(X)
print(X.shape, y.shape, lb.classes_)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
clf = OneVsRestClassifier(LogisticRegression(C=10, penalty='l2'))
with warnings.catch_warnings():
warnings.simplefilter('ignore')
clf.fit(X_train, y_train)
score = fbeta_score(y_test, clf.predict(X_test), beta=2, average=None)
avg_sample_score = fbeta_score(y_test, clf.predict(X_test), beta=2, average='samples')
print('Average F2 test score {}'.format(avg_sample_score))
print('F2 test scores per tag:')
[(lb.classes_[l], score[l]) for l in score.argsort()[::-1]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment