dinob0t/mcmc.py

## mcmc.py
def next_candidate(current_x, bounds):
    found = False
    while not found:
        candidate = [np.random.randint(np.max([x-1, b[0]]), high=np.min([x+2, b[1]+1])) for x, b in zip(current_x, bounds)]
        if np.sum(np.abs(np.subtract(current_x, candidate))) >= 1:
            found = True
            return candidate

def likelihood_ratio(chi1, chi2, sigma):
    return np.exp((-chi2+chi1)/(2.0*(sigma**2.0)))

def cost_function(x1, x2):
    return (1.0-x1)**2.0 + (1.0-x2)**2.0

def roc_score(clf, X_set, y_set):
    return metrics.roc_auc_score(y_set,clf.predict(X_set))

def param_mapping(x):
    max_depth = int(x[0])
    max_features = int(x[1])
    min_samples_split = int(x[2])
    min_samples_leaf = int(x[3])
    params={'max_depth': max_depth,
            'max_features': max_features,
            'min_samples_split': min_samples_split,
            'min_samples_leaf': min_samples_leaf,
            'random_state': 420}
    return params

def fit_clf(x):
    params = param_mapping(x)
#     cvc = ensemble.ExtraTreesClassifier(**params)
#     cvc = tree.DecisionTreeClassifier(**params)
    cvc = ensemble.RandomForestClassifier(**params)
    cvc = cvc.fit(X_train, y_train)
    x1 = roc_score(cvc, X_train, y_train)
    x2 = roc_score(cvc, X_test, y_test)
    return cost_function(x1,x2)
def accept_ratio(accept_reject, check_step, i):
    return np.sum(accept_reject[i-check_step+1:i+1]>0)*1.0/check_step

bounds = ((2,30),(2,len(features)),(1,30),(1,30))
inital_guess = [np.ceil((b[0] + b[1])/2) for b in bounds]
iterations = 10000
burn_in = 3000
check_step = 100
optimal_accept = 0.4
accept_reject = np.zeros(iterations)
sigma = 0.004
results = np.zeros([iterations,len(inital_guess)+1])
for i in range(iterations):

    if i == 0:
        current_x = inital_guess
        current_cost = fit_clf(current_x)
        accept_reject[i] = 1
        print 'Starting params', current_x
        print 'Starting cost', current_cost
    else:
        proposed_x = next_candidate(current_x, bounds)
        proposed_cost = fit_clf(proposed_x)
        ll_ratio = likelihood_ratio(current_cost, proposed_cost, sigma)
        rn = np.random.random()
        if ll_ratio > rn:
            current_x = proposed_x
            current_cost = proposed_cost
            accept_reject[i] = 1
        else:
            accept_reject[i] = -1
    if (i+1)%check_step == 0:
        if i < burn_in:
            if accept_ratio(accept_reject, check_step, i) > 0.4:
                sigma = sigma - sigma*0.05
            else:
                sigma = sigma + sigma*0.05
    results[i,0:4] = current_x
    results[i,4] = current_cost


print 'Accept ratio', accept_ratio(accept_reject, check_step, i)
print 'Finishing params', current_x
print 'Finishing cost', current_cost
	def next_candidate(current_x, bounds):
	found = False
	while not found:
	candidate = [np.random.randint(np.max([x-1, b[0]]), high=np.min([x+2, b[1]+1])) for x, b in zip(current_x, bounds)]
	if np.sum(np.abs(np.subtract(current_x, candidate))) >= 1:
	found = True
	return candidate

	def likelihood_ratio(chi1, chi2, sigma):
	return np.exp((-chi2+chi1)/(2.0(sigma*2.0)))

	def cost_function(x1, x2):
	return (1.0-x1)2.0 + (1.0-x2)2.0

	def roc_score(clf, X_set, y_set):
	return metrics.roc_auc_score(y_set,clf.predict(X_set))

	def param_mapping(x):
	max_depth = int(x[0])
	max_features = int(x[1])
	min_samples_split = int(x[2])
	min_samples_leaf = int(x[3])
	params={'max_depth': max_depth,
	'max_features': max_features,
	'min_samples_split': min_samples_split,
	'min_samples_leaf': min_samples_leaf,
	'random_state': 420}
	return params

	def fit_clf(x):
	params = param_mapping(x)
	# cvc = ensemble.ExtraTreesClassifier(**params)
	# cvc = tree.DecisionTreeClassifier(**params)
	cvc = ensemble.RandomForestClassifier(**params)
	cvc = cvc.fit(X_train, y_train)
	x1 = roc_score(cvc, X_train, y_train)
	x2 = roc_score(cvc, X_test, y_test)
	return cost_function(x1,x2)
	def accept_ratio(accept_reject, check_step, i):
	return np.sum(accept_reject[i-check_step+1:i+1]>0)*1.0/check_step

	bounds = ((2,30),(2,len(features)),(1,30),(1,30))
	inital_guess = [np.ceil((b[0] + b[1])/2) for b in bounds]
	iterations = 10000
	burn_in = 3000
	check_step = 100
	optimal_accept = 0.4
	accept_reject = np.zeros(iterations)
	sigma = 0.004
	results = np.zeros([iterations,len(inital_guess)+1])
	for i in range(iterations):

	if i == 0:
	current_x = inital_guess
	current_cost = fit_clf(current_x)
	accept_reject[i] = 1
	print 'Starting params', current_x
	print 'Starting cost', current_cost
	else:
	proposed_x = next_candidate(current_x, bounds)
	proposed_cost = fit_clf(proposed_x)
	ll_ratio = likelihood_ratio(current_cost, proposed_cost, sigma)
	rn = np.random.random()
	if ll_ratio > rn:
	current_x = proposed_x
	current_cost = proposed_cost
	accept_reject[i] = 1
	else:
	accept_reject[i] = -1
	if (i+1)%check_step == 0:
	if i < burn_in:
	if accept_ratio(accept_reject, check_step, i) > 0.4:
	sigma = sigma - sigma*0.05
	else:
	sigma = sigma + sigma*0.05
	results[i,0:4] = current_x
	results[i,4] = current_cost




	print 'Accept ratio', accept_ratio(accept_reject, check_step, i)
	print 'Finishing params', current_x
	print 'Finishing cost', current_cost