Last active
March 11, 2016 17:17
-
-
Save dinob0t/6dc01057c712c0bf8879 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def next_candidate(current_x, bounds): | |
found = False | |
while not found: | |
candidate = [np.random.randint(np.max([x-1, b[0]]), high=np.min([x+2, b[1]+1])) for x, b in zip(current_x, bounds)] | |
if np.sum(np.abs(np.subtract(current_x, candidate))) >= 1: | |
found = True | |
return candidate | |
def likelihood_ratio(chi1, chi2, sigma): | |
return np.exp((-chi2+chi1)/(2.0*(sigma**2.0))) | |
def cost_function(x1, x2): | |
return (1.0-x1)**2.0 + (1.0-x2)**2.0 | |
def roc_score(clf, X_set, y_set): | |
return metrics.roc_auc_score(y_set,clf.predict(X_set)) | |
def param_mapping(x): | |
max_depth = int(x[0]) | |
max_features = int(x[1]) | |
min_samples_split = int(x[2]) | |
min_samples_leaf = int(x[3]) | |
params={'max_depth': max_depth, | |
'max_features': max_features, | |
'min_samples_split': min_samples_split, | |
'min_samples_leaf': min_samples_leaf, | |
'random_state': 420} | |
return params | |
def fit_clf(x): | |
params = param_mapping(x) | |
# cvc = ensemble.ExtraTreesClassifier(**params) | |
# cvc = tree.DecisionTreeClassifier(**params) | |
cvc = ensemble.RandomForestClassifier(**params) | |
cvc = cvc.fit(X_train, y_train) | |
x1 = roc_score(cvc, X_train, y_train) | |
x2 = roc_score(cvc, X_test, y_test) | |
return cost_function(x1,x2) | |
def accept_ratio(accept_reject, check_step, i): | |
return np.sum(accept_reject[i-check_step+1:i+1]>0)*1.0/check_step | |
bounds = ((2,30),(2,len(features)),(1,30),(1,30)) | |
inital_guess = [np.ceil((b[0] + b[1])/2) for b in bounds] | |
iterations = 10000 | |
burn_in = 3000 | |
check_step = 100 | |
optimal_accept = 0.4 | |
accept_reject = np.zeros(iterations) | |
sigma = 0.004 | |
results = np.zeros([iterations,len(inital_guess)+1]) | |
for i in range(iterations): | |
if i == 0: | |
current_x = inital_guess | |
current_cost = fit_clf(current_x) | |
accept_reject[i] = 1 | |
print 'Starting params', current_x | |
print 'Starting cost', current_cost | |
else: | |
proposed_x = next_candidate(current_x, bounds) | |
proposed_cost = fit_clf(proposed_x) | |
ll_ratio = likelihood_ratio(current_cost, proposed_cost, sigma) | |
rn = np.random.random() | |
if ll_ratio > rn: | |
current_x = proposed_x | |
current_cost = proposed_cost | |
accept_reject[i] = 1 | |
else: | |
accept_reject[i] = -1 | |
if (i+1)%check_step == 0: | |
if i < burn_in: | |
if accept_ratio(accept_reject, check_step, i) > 0.4: | |
sigma = sigma - sigma*0.05 | |
else: | |
sigma = sigma + sigma*0.05 | |
results[i,0:4] = current_x | |
results[i,4] = current_cost | |
print 'Accept ratio', accept_ratio(accept_reject, check_step, i) | |
print 'Finishing params', current_x | |
print 'Finishing cost', current_cost |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment