dhwajraj/gist:cf4e796ab67925ad0d89d6a015fc05fc

## gistfile1.txt
fout = open('classifier_votes.txt','w')
counter={}

for ll in range(100):
    print(ll)
    X_train, X_test, y_train, y_test, ix_train, ix_test = train_test_split(X, y, indices, test_size=0.2, random_state=ll)
    classifiers=[]
    classifiers.append(LogisticRegression(class_weight='balanced'))
    classifiers.append(RandomForestClassifier(n_estimators=10, max_depth=4, random_state=0,
                                    max_features=None,criterion="entropy", class_weight='balanced'))
    classifiers.append(svm.SVC(C=100, gamma=0.001, kernel='rbf', class_weight={0.0:4.0}))
    classifiers.append(DecisionTreeClassifier(class_weight='balanced', criterion="entropy", max_depth=3, min_samples_leaf=19, min_samples_split=11))
    classifiers.append(BaggingClassifier())
    classifiers.append(linear_model.SGDClassifier(max_iter=100, class_weight='balanced'))
    classifiers.append(svm.LinearSVC(class_weight='balanced'))
    pred1=[]
    pred2=[]
    for clf in classifiers:
        clf.fit(X_train,y_train)
        pred1.append(clf.predict(X_test))

    i=0
    for _,row in df.loc[ix_test].iterrows():
        s = "\t".join([str(p) for p in row.values])
        sum=0.0
        for j in range(7):
            sum = sum+pred1[j][i]
        if row['result']==1.0 and sum<=2.0: #0,1,2
            counter[s] = counter.get(s,0)+1
        if row['result']==0.0 and sum>=4.0: #4,5,6
            counter[s] = counter.get(s,0)+1
        i=i+1

for key in counter:
    fout.write(str(counter[key])+"\t"+key+"\n")
    fout.flush()
fout.close()
	fout = open('classifier_votes.txt','w')
	counter={}

	for ll in range(100):
	print(ll)
	X_train, X_test, y_train, y_test, ix_train, ix_test = train_test_split(X, y, indices, test_size=0.2, random_state=ll)
	classifiers=[]
	classifiers.append(LogisticRegression(class_weight='balanced'))
	classifiers.append(RandomForestClassifier(n_estimators=10, max_depth=4, random_state=0,
	max_features=None,criterion="entropy", class_weight='balanced'))
	classifiers.append(svm.SVC(C=100, gamma=0.001, kernel='rbf', class_weight={0.0:4.0}))
	classifiers.append(DecisionTreeClassifier(class_weight='balanced', criterion="entropy", max_depth=3, min_samples_leaf=19, min_samples_split=11))
	classifiers.append(BaggingClassifier())
	classifiers.append(linear_model.SGDClassifier(max_iter=100, class_weight='balanced'))
	classifiers.append(svm.LinearSVC(class_weight='balanced'))
	pred1=[]
	pred2=[]
	for clf in classifiers:
	clf.fit(X_train,y_train)
	pred1.append(clf.predict(X_test))

	i=0
	for _,row in df.loc[ix_test].iterrows():
	s = "\t".join([str(p) for p in row.values])
	sum=0.0
	for j in range(7):
	sum = sum+pred1[j][i]
	if row['result']==1.0 and sum<=2.0: #0,1,2
	counter[s] = counter.get(s,0)+1
	if row['result']==0.0 and sum>=4.0: #4,5,6
	counter[s] = counter.get(s,0)+1
	i=i+1

	for key in counter:
	fout.write(str(counter[key])+"\t"+key+"\n")
	fout.flush()
	fout.close()