Skip to content

Instantly share code, notes, and snippets.

@catalystfrank
Last active December 21, 2018 05:13
Show Gist options
  • Save catalystfrank/3c6c82c5565eb0cbc0a03a180960c002 to your computer and use it in GitHub Desktop.
Save catalystfrank/3c6c82c5565eb0cbc0a03a180960c002 to your computer and use it in GitHub Desktop.
F1_Score
import numpy as np
import pandas as pd
import bottleneck as bn
# Read And Count
trainDF = pd.read_csv('train.csv',sep=',',header=0)
lenTrain = len(trainDF)
valDF = pd.read_csv('submission.csv',sep=',',header=0)
lenVal = len(valDF)
trainFold = pd.read_csv('train_5fold_20181219.csv',sep=',',header=0)
# Fake Result, Put True Result in trainPred and valPred
trainPred = pd.DataFrame(np.random.uniform(size=(lenTrain,29)))
trainOut = pd.DataFrame(np.zeros_like(trainPred.values))
valPred = pd.DataFrame(np.random.uniform(size=(lenVal,29)))
valOut = pd.DataFrame(np.zeros_like(valPred.values))
trainPred[0] = trainDF.iloc[:,0]
trainOut[0] = trainDF.iloc[:,0]
valPred[0] = valDF.iloc[:,0]
valOut[0] = valDF.iloc[:,0]
# F1 Score Thresholder and Estimator
def Schumacher(s):
sv = s.values
sortindex = np.argsort(-sv)
R = sum(sv)
runningA = 0
lastF = 0
for i in range(len(s)):
P = i + 1
runningA = runningA + sv[sortindex[i]]
runningF = runningA / (P + R)
if runningF > lastF:
lastF = runningF
else:
break
K = i
givePred = [0 for i in range(len(sv))]
for idx in sortindex[:K]:
givePred[idx] = 1
return (np.array(givePred), lastF)
# Giving Train Estimator, For Specific FoldID
FoldID = 4
sumtotalF1Estimate = 0.0
sumtotalF1 = 0.0
for i in range(28):
seqPred = trainPred[trainFold['inpool']==FoldID][i+1]
seqGT = trainFold[trainFold['inpool']==FoldID]['Target'].map(lambda x: int(str(i) in x.split(' ')))
# print seqPred, seqGT
outDetail, F = Schumacher(seqPred)
for idx,line_index in enumerate(trainFold[trainFold['inpool']==FoldID].index):
trainOut.ix[line_index, i+1] = outDetail[idx]
sumtotalF1Estimate += F
sumtotalF1 += (outDetail * seqGT.values).sum() * 1.0 / ( outDetail.sum() + seqGT.sum() )
print sumtotalF1Estimate,sumtotalF1
# Giving Val Estimator
sumtotalF1Estimate = 0.0
for i in range(28):
seqPred = valPred[i+1]
# print seqPred, seqGT
outDetail, F = Schumacher(seqPred)
valOut[i+1] = outDetail
sumtotalF1Estimate += F
print sumtotalF1Estimate
# valOut is for Submission
################ReWeight###########
for i in range(28):
trainDF[str(i)] = trainDF.Target.map(lambda x: int(str(i) in x.split(' ')))
value_counts = trainDF.sum(axis=0).iloc[2:]
# For class who had least total weight, adjust its weight to resample_bottomline * largest total weight
resample_bottomline = 0.25
value_max, value_min = value_counts.max(), value_counts.min()
raw_weight = value_counts.map(lambda x: (x-value_min)*(1 - resample_bottomline) + value_max * resample_bottomline) / value_counts
## Use adj_weight
adj_weight = raw_weight/raw_weight.mean()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment