Created
June 9, 2014 15:10
-
-
Save abinashpanda/c4f57a0acb6c0e6998c5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Benchmarking test with time taken for multilabel learning using Shogun's | |
Structured Output (SO) learning framework. | |
""" | |
from __future__ import print_function | |
from __future__ import division | |
import numpy as np | |
from sklearn.datasets import make_multilabel_classification | |
from sklearn.multiclass import OneVsRestClassifier | |
from sklearn.svm import SVC | |
from sklearn.metrics import jaccard_similarity_score | |
from sklearn.preprocessing import MultiLabelBinarizer | |
from time import time | |
from os import system | |
def generate_svm_light_sample(x_sample, y_sample): | |
return "%s %s" % ( | |
','.join(map(str, y_sample)), | |
' '.join(["%d:%f" % (index+1, val) | |
for index, val in enumerate(x_sample)])) | |
def generate_multilabel_data(output_file, X, y): | |
m, _ = X.shape | |
assert(m == len(y)) | |
for val in range(m): | |
output_file.writelines("%s\n" % generate_svm_light_sample( | |
X[val], y[val])) | |
if __name__ == '__main__': | |
print(__doc__) | |
X, y = make_multilabel_classification( | |
n_features=1000, | |
n_samples=10000, | |
n_classes=10, | |
n_labels=3, | |
allow_unlabeled=False) | |
with open("sample_main.svm", "w+") as output_file: | |
generate_multilabel_data(output_file, X, y) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment