Skip to content

Instantly share code, notes, and snippets.

@NhuanTDBK
Created December 9, 2016 03:26
Show Gist options
  • Save NhuanTDBK/14989f19f450c8ad675d52e8452517ad to your computer and use it in GitHub Desktop.
Save NhuanTDBK/14989f19f450c8ad675d52e8452517ad to your computer and use it in GitHub Desktop.
map_col = lambda dat,col: col+"-"+dat.map(str)
gen_hash_item = lambda field, feat: '{0}:{1}:1'.format(field,hashstr(feat))
def gen_hash_row(feats,label):
result = []
for idx, item in enumerate(feats):
val = item.split('-')[-1]
if val != 'nan':
result.append(gen_hash_item(idx,item))
lbl = 1
if label == 0:
lbl = -1
return str(lbl) + ' ' + ' '.join(result)+'\n'
merge_dat_val = merge_dat.drop(['display_id','clicked'],axis=1)
cols = merge_dat_val.columns
features = []
for col in merge_dat_val.columns:
features.append(map_col(merge_dat_val[col],col))
features = np.array(features).T
with open(output_name,'w') as f_tr:
i = 0;
for item,label in zip(features,targets):
if(i%200000==0):
print i
row = gen_hash_row(item,label)
f_tr.write(row)
i+=1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment