Skip to content

Instantly share code, notes, and snippets.

/inputs Secret

Created October 23, 2013 02:06
Show Gist options
  • Select an option

  • Save anonymous/1af53a1da1357a6a97c3 to your computer and use it in GitHub Desktop.

Select an option

Save anonymous/1af53a1da1357a6a97c3 to your computer and use it in GitHub Desktop.
5
B 0.99813803 -0.00263872 -0.00464602
A 2.09441750 -0.00242373 0.00417336
A 0.63238996 1.03082951 0.00417296
A 0.62561232 -0.52974905 0.88151021
A 0.64010219 -0.50924801 -0.90858051
8
B 0.99566434 -0.00295079 -0.00645530
B 2.52433599 -0.00704005 0.00062949
A 0.59642533 1.02180902 -0.00238364
A 0.58817563 -0.51880627 0.87523331
A 0.59641749 -0.50854984 -0.89780318
A 2.92359554 0.50116660 0.89048719
A 2.93182660 0.50621547 -0.88257380
A 2.92355907 -1.03181414 -0.00043407
6
B 0.98946692 0.00007550 0.00000000
B 2.32461012 -0.00013585 0.00000000
A 0.41663940 0.92974933 0.00000000
A 0.41634287 -0.92941467 0.00000000
A 2.89773268 0.92935495 0.00000000
A 2.89743801 -0.92980927 0.00000000
4
B 0.98972410 0.00000000 0.00000000
B 2.20043588 0.00000000 0.00000000
A -0.08202857 0.00000000 0.00000000
A 3.27218859 0.00000000 0.00000000
9
B -0.02685201 0.87078057 -0.05692871
B -0.73928196 -0.46068850 -0.05716072
B 0.76666931 -0.41017260 0.04288779
A 0.01672181 1.42791936 -0.99323318
A -0.10696079 1.49356439 0.83453787
A -1.17833564 -0.80568159 -0.99374138
A -1.30207225 -0.73988932 0.83406126
A 1.22416472 -0.65503779 1.00190446
A 1.34779680 -0.72084452 -0.82587738
11
B 0.97845955 -0.02768979 0.00389766
B 2.50774209 0.01819297 0.00831353
B 3.05889059 1.44544974 0.00669378
A 0.60211829 -1.06093922 0.00127122
A 0.56966716 0.47973329 -0.88389480
A 0.56477020 0.47699441 0.89098372
A 2.89423058 -0.52569991 -0.86988889
A 2.88881752 -0.52182571 0.89125798
A 2.72423469 1.99802663 -0.88517904
A 4.15845772 1.45737554 0.01421023
A 2.71204162 2.00483205 0.88962461
9
B 0.96797053 -0.01219310 0.03712852
B 2.45732001 0.07634869 0.17261662
B 3.20126278 1.13572502 -0.16570673
A 0.67869402 -0.85110128 -0.61581754
A 0.54471661 0.91181994 -0.38057026
A 0.49232629 -0.19891972 1.01345648
A 2.95758301 -0.80754447 0.58493744
A 2.74826498 2.04015326 -0.58074711
A 4.28498177 1.13572165 -0.04105743
7
B 0.98318823 0.00302913 0.01438752
B 2.43760578 0.00077500 0.00182832
B 3.65128941 -0.00043559 -0.00398891
A 0.59037348 1.03032089 0.02906828
A 0.59576925 -0.51948910 0.90133246
A 0.57943309 -0.49978425 -0.87673930
A 4.72201076 -0.00130608 -0.00807838
8
B -0.05920440 0.87474790 -0.06163405
B -0.76054880 -0.43623157 -0.06104497
C 0.71652180 -0.38332116 -0.10884352
A -0.01756435 1.46779128 -0.97780601
A -0.06928588 1.45695877 0.86320280
A -1.23099070 -0.80076026 -0.97679165
A -1.25002057 -0.75040924 0.86420328
A 1.09557290 -0.58557574 0.82162412
10
B 0.99851568 -0.00462126 0.00528468
B 2.52332056 0.01617598 -0.01385443
C 3.01772873 1.40240938 0.03161740
A 0.61728676 -1.03560805 0.01853973
A 0.58694446 0.49012320 -0.88884712
A 0.61763227 0.52532798 0.88954162
A 2.88460715 -0.55996332 -0.89060400
A 2.90788005 -0.50009589 0.88078080
A 2.72754636 1.89770018 -0.81671631
A 4.04047798 1.41093181 0.02678764
6
B 0.98647308 -0.00002291 -0.00000091
B 2.44148072 -0.00000876 0.00000033
C 3.60751248 0.00002109 0.00000640
A 0.60763620 1.03029421 0.01179759
A 0.60759942 -0.52535096 0.88638192
A 0.60758811 -0.50493268 -0.89816533
10
B 0.99473361 -0.01020972 -0.01877491
C 2.45253321 0.02462639 0.04911034
B 2.98026566 1.38421790 -0.01381381
A 0.64860759 -1.05298966 -0.04235117
A 0.56612260 0.52152103 -0.89604511
A 0.57695153 0.45771185 0.88663649
A 2.83753164 -0.52082428 -0.72456982
A 2.67825786 1.93412560 0.89152731
A 2.62901551 1.96967610 -0.89128121
A 4.07871080 1.35601480 -0.03309812
7
B -0.11615305 0.89582221 -0.06281006
B -0.81009471 -0.40118034 -0.06316902
D 0.62563398 -0.33518528 0.01255526
A -0.00902635 1.44795992 -1.00218591
A -0.10911202 1.50110455 0.84951253
A -1.20973989 -0.79640510 -1.00275757
A -1.30982796 -0.74328596 0.84895478
9
B 0.98680622 -0.00556068 0.00384512
B 2.50927358 0.01966334 -0.01547778
D 3.05304559 1.34611733 0.02324657
A 0.61774476 -1.04209472 0.02517547
A 0.56972738 0.47651307 -0.89423471
A 0.60027421 0.52068553 0.88771373
A 2.89022911 -0.53230218 -0.89526213
A 2.91144160 -0.47770691 0.87951913
A 2.73168755 1.81986522 -0.76397539
7
B 0.99247987 0.01741937 -0.01355008
B 2.49414341 0.06610944 0.00915377
D 3.17309403 0.89062911 -0.57434571
A 0.66535275 -0.96243285 -0.39615644
A 0.57967416 0.82299921 -0.63114005
A 0.60911841 0.09008858 1.01672297
A 2.98212737 -0.73830286 0.62150554
9
B 0.98729036 -0.00489977 0.00422631
D 2.40628482 0.01941219 0.00000800
B 2.90288012 1.34890404 -0.00422560
A 0.67992688 -1.05837153 0.00430684
A 0.56972709 0.49412138 -0.89181424
A 0.57522891 0.49273287 0.90359931
A 3.99851510 1.28696524 -0.00431284
A 2.57185326 1.90908978 0.89181076
A 2.57131345 1.90342581 -0.90359854
12
B 0.34153625 1.03857962 -0.09372648
B 1.01486254 -0.33510713 0.17613063
B -0.33739712 -1.01387267 -0.17610970
B -1.03666848 0.34678097 0.09370222
A 0.59452012 1.87967860 0.56690688
A 0.48861590 1.35500735 -1.13748229
A 1.25482289 -0.45728386 1.24319677
A 1.90081240 -0.60437811 -0.41588935
A -0.38267652 -1.27933338 -1.24318636
A -0.65076215 -1.88523276 0.41587614
A -1.86226272 0.64633799 -0.56696828
A -1.37831311 0.41786337 1.13743982
12
B 1.00830484 0.00711468 -0.07133905
B 2.51625940 -0.06194947 0.00735422
B 3.30600801 -0.73146853 -1.09255786
B 3.36606231 0.77151100 -0.92269603
A 0.61362586 0.85320471 0.51210278
A 0.54309530 -0.91373668 0.31306051
A 0.67601273 0.13757955 -1.11260087
A 2.91607814 -0.19203053 1.01705448
A 2.74768497 -1.12222934 -1.94536411
A 4.18589975 -1.31651467 -0.82273057
A 4.28700730 1.21036876 -0.53724307
A 2.84795139 1.38516054 -1.66209043
14
B 0.95639262 -0.07346266 0.00118236
B 2.48371193 0.00778067 -0.00386041
B 3.01367094 1.44456330 0.01515929
B 4.54100360 1.52573544 0.01009677
A 0.60520325 -1.11530998 -0.01271911
A 0.52921178 0.43481182 -0.87725104
A 0.53647907 0.40867267 0.89768087
A 2.88026883 -0.51082700 -0.89450044
A 2.88742000 -0.53678977 0.86786838
A 2.61717450 1.96318352 0.90581339
A 2.60995944 1.98914980 -0.85655901
A 4.96815948 1.01751471 0.88858078
A 4.89226886 2.56755766 0.02388590
A 4.96087569 1.04347983 -0.88635773
14
B 0.97964482 -0.01169469 0.00638349
B 2.51253780 0.01164099 -0.01159439
B 3.06181110 -0.75617544 -1.21961844
B 3.04263670 1.44999610 0.01511682
A 0.58034233 0.51014802 0.88907598
A 0.59377959 -1.04215183 0.01977554
A 0.57203289 0.48611689 -0.88887711
A 2.86914417 -0.49711494 0.90298336
A 2.73256492 -0.28735337 -2.16157028
A 2.71117932 -1.79921515 -1.22587419
A 4.16214340 -0.76956514 -1.22056606
A 2.71364642 2.00304552 -0.88007371
A 4.14263935 1.47232373 0.03536037
A 2.67729721 1.99639933 0.89765861
import sys, math, random
import numpy as np
from sklearn import cross_validation
from sklearn.svm import SVR
from sklearn.metrics import make_scorer
from sklearn.grid_search import GridSearchCV
inputs_file = sys.argv[1] if len(sys.argv) > 1 else "inputs"
targets_file = sys.argv[2] if len(sys.argv) > 2 else "targets"
## ----------------------------------------------------------------------------
print("Reading inputs...")
with open(inputs_file) as f:
cs = []
inputs_ize = f.readline()
while inputs_ize:
input_ = []
for i in xrange(int(inputs_ize)):
line = f.readline().split()
a = (line[0], np.array(line[1:], dtype=float))
input_.append(a)
cs.append(input_)
inputs_ize = f.readline()
inputs_ = dict(zip(xrange(len(cs)), cs))
## ----------------------------------------------------------------------------
print("Reading reference targets...")
with open(targets_file) as f:
lines = f.readlines()
target = dict(zip(xrange(len(lines)), iter(float(e) for e in lines)))
## ----------------------------------------------------------------------------
print("Calculating eigenvalues...")
def CM(input_):
def c(a_i, a_j):
Z = {'A': 1.0, 'B': 2.0, 'C': 3.0, 'D': 4.0}
if np.array_equal(a_i[1], a_j[1]):
return 0.5*math.pow(Z[a_i[0]], 2.4)
else:
return Z[a_i[0]] * Z[a_j[0]] / np.linalg.norm(a_i[1] - a_j[1])
return np.array([[c(a_i, a_j) for a_j in input_] for a_i in input_])
inputs_used = [(dn, inputs_[dn]) for dn in target.keys()]
CMs = iter((dn, CM(input_)) for (dn, input_) in inputs_used)
eigenvalues = dict(iter((dn, np.linalg.eigvals(cm)) for (dn, cm) in CMs))
max_dim = max(len(ev) for ev in eigenvalues.values())
for i in eigenvalues:
eigenvalues[i] = np.lib.pad(eigenvalues[i], (0, max_dim - len(eigenvalues[i])), 'constant')
## ----------------------------------------------------------------------------
print("Preparing training/testing data...")
num_folds = 5
randomized_inputs = target.keys()
random.shuffle(randomized_inputs)
k = 20
inputs_of_interest = randomized_inputs[:k]
targets_of_interest = [(input_, target[input_]) for input_ in inputs_of_interest]
# Try to ensure each fold covers the whole target range.
# After grouping the inputs of interest approximately according to their target,
# each of these groups is given a label to be used in the stratified k-fold
# cross-validation scheme.
sorted_targets = sorted(targets_of_interest, key=lambda (_, e): e)
labels = iter([label] * num_folds for label in range(k / num_folds))
stratified_labels = [label for labelclass in labels for label in labelclass]
skf = cross_validation.StratifiedKFold(stratified_labels, n_folds=num_folds)
X = np.array([eigenvalues[input_] for (input_, _) in sorted_targets])
Y = np.array([target for (_, target) in sorted_targets])
## ----------------------------------------------------------------------------
print("Training model...")
tuned_parameters = [{'kernel': ['rbf'], 'C': np.logspace(-3, 3, 7),
'epsilon': np.logspace(-2, 2, 5)}]
def mae_scorer(Y, Y_pred):
return math.sqrt(np.sum((Y - Y_pred)**2) / len(Y))
my_scorer = make_scorer(mae_scorer, greater_is_better=False)
scores = ['mean_squared_error', my_scorer]
for score in scores:
print("")
print("Tuning hyper-parameters for %s" % score)
print("")
reg = GridSearchCV(SVR(kernel='rbf'), tuned_parameters, cv=skf, n_jobs=-1, scoring=score)
reg.fit(X, Y)
print("Best parameters set found on development set:")
print("")
print(reg.best_estimator_)
print("")
print("Grid scores on development set:")
print("")
for params, mean_score, scores in reg.grid_scores_:
print("%0.3f (+/-%0.03f) for %r"
% (mean_score, scores.std() / 2, params))
print("")
for train, test in skf:
Y_pred = reg.best_estimator_.predict(X[test])
rms_error = math.sqrt(np.sum((Y[test] - Y_pred)**2) / len(test))
ma_error = np.sum(np.absolute(Y[test] - Y_pred)) / len(test)
print("RMSE: %f " % rms_error)
print("MAE: %f " % ma_error)
-18.2656438106035
-31.1744765392964
-24.7351783251961
-17.8036116284256
-37.7023801308192
-44.1444146275594
-37.8657090471788
-31.1090946857785
-31.9531454818111
-38.6094777852318
-27.2619987197509
-38.2686804639616
-28.9798776002281
-35.6903401104491
-30.0188847046984
-35.1692713055691
-50.7218554324618
-50.7428545764524
-57.1130709473027
-57.1466796410123
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment