Skip to content

Instantly share code, notes, and snippets.

@abinashpanda
Created June 9, 2014 15:10
Show Gist options
  • Save abinashpanda/c4f57a0acb6c0e6998c5 to your computer and use it in GitHub Desktop.
Save abinashpanda/c4f57a0acb6c0e6998c5 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
Benchmarking test with time taken for multilabel learning using Shogun's
Structured Output (SO) learning framework.
"""
from __future__ import print_function
from __future__ import division
import numpy as np
from sklearn.datasets import make_multilabel_classification
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from sklearn.metrics import jaccard_similarity_score
from sklearn.preprocessing import MultiLabelBinarizer
from time import time
from os import system
def generate_svm_light_sample(x_sample, y_sample):
return "%s %s" % (
','.join(map(str, y_sample)),
' '.join(["%d:%f" % (index+1, val)
for index, val in enumerate(x_sample)]))
def generate_multilabel_data(output_file, X, y):
m, _ = X.shape
assert(m == len(y))
for val in range(m):
output_file.writelines("%s\n" % generate_svm_light_sample(
X[val], y[val]))
if __name__ == '__main__':
print(__doc__)
X, y = make_multilabel_classification(
n_features=1000,
n_samples=10000,
n_classes=10,
n_labels=3,
allow_unlabeled=False)
with open("sample_main.svm", "w+") as output_file:
generate_multilabel_data(output_file, X, y)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment