ageitgey/gist:c40fba50b6fece4ee1e7

## gistfile1.py
import numpy as np
from sklearn import grid_search
from sklearn import cross_validation
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor

# Load your starting coefficient data as an array called X. This can be as big
# as your computer's memory. If that's still not big enough, you can load it in
# segments and use partial_fit instead of fit.

# Replace this with code to load data from a CSV or something.
X = [
  [4,3,1,0,1],
  [5,2,1,0,1],
  [4,2,1,1,1],
  [3,1,0,1,1],
  [1,1,0,1,1],
  [4,3,1,0,1],
  [5,2,1,0,1],
  [4,2,1,1,1],
  [3,1,0,1,1],
  [1,1,0,1,1],
  [4,3,1,0,1],
  [5,2,1,0,1],
  [4,2,1,1,1],
  [3,1,0,1,1],
  [1,1,0,1,1]
]

# Load the equation "answers" to your equation into a vector named y
y = [4, 6, 6, 3, 1, 4, 6, 6, 3, 1, 4, 6, 6, 3, 1]

# To find the best parameters for the SGD regressor, we are just going to try
# them all and see which get the most accurate results.
# To do that, we need to split the data into a training set and a test set.
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.3)

# Create the model. Shuffling is important with SGD because the order you see
# each array row affects the output.
model = SGDRegressor(shuffle=True)

# A list of param ranges we want to guess and check and a list of values to try
# for each one.
param_grid = [{
      'alpha': [0.1, 0.3, 0.01, 0.03, 0.001, 0.003],
      'l1_ratio': [.05, .15, .5, .7, .9, .95, .99, 1]
      }]

# Try each param pair and pick the best! This might take several min with a big data set.
gs = grid_search.GridSearchCV(model, param_grid, n_jobs=8, verbose=1)
gs.fit(X_train, y_train)

# gs.best_estimator_ will be a SGDClassifier classifier instance with the most efficient params
print("Best estimator:")
print(gs.best_estimator_)
print(gs.best_estimator_.coef_)
	import numpy as np
	from sklearn import grid_search
	from sklearn import cross_validation
	from sklearn.preprocessing import StandardScaler
	from sklearn.linear_model import SGDRegressor

	# Load your starting coefficient data as an array called X. This can be as big
	# as your computer's memory. If that's still not big enough, you can load it in
	# segments and use partial_fit instead of fit.

	# Replace this with code to load data from a CSV or something.
	X = [
	[4,3,1,0,1],
	[5,2,1,0,1],
	[4,2,1,1,1],
	[3,1,0,1,1],
	[1,1,0,1,1],
	[4,3,1,0,1],
	[5,2,1,0,1],
	[4,2,1,1,1],
	[3,1,0,1,1],
	[1,1,0,1,1],
	[4,3,1,0,1],
	[5,2,1,0,1],
	[4,2,1,1,1],
	[3,1,0,1,1],
	[1,1,0,1,1]
	]

	# Load the equation "answers" to your equation into a vector named y
	y = [4, 6, 6, 3, 1, 4, 6, 6, 3, 1, 4, 6, 6, 3, 1]

	# To find the best parameters for the SGD regressor, we are just going to try
	# them all and see which get the most accurate results.
	# To do that, we need to split the data into a training set and a test set.
	X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.3)

	# Create the model. Shuffling is important with SGD because the order you see
	# each array row affects the output.
	model = SGDRegressor(shuffle=True)

	# A list of param ranges we want to guess and check and a list of values to try
	# for each one.
	param_grid = [{
	'alpha': [0.1, 0.3, 0.01, 0.03, 0.001, 0.003],
	'l1_ratio': [.05, .15, .5, .7, .9, .95, .99, 1]
	}]

	# Try each param pair and pick the best! This might take several min with a big data set.
	gs = grid_search.GridSearchCV(model, param_grid, n_jobs=8, verbose=1)
	gs.fit(X_train, y_train)

	# gs.best_estimator_ will be a SGDClassifier classifier instance with the most efficient params
	print("Best estimator:")
	print(gs.best_estimator_)
	print(gs.best_estimator_.coef_)