Skip to content

Instantly share code, notes, and snippets.

from sklearn.cluster import AgglomerativeClustering
model = AgglomerativeClustering(n_clusters=2)
model.fit(X)
model.labels_ #for labels
from sklearn.cluster import DBSCAN
model = DBSCAN(eps=0.30, min_samples=9)
model.fit(X)
model.labels_ #for labels of each cluster
{'learner': GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
learning_rate=0.009132299586303643, loss='deviance',
max_depth=None, max_features='sqrt',
max_leaf_nodes=None, min_impurity_decrease=0.0,
min_impurity_split=None, min_samples_leaf=1,
min_samples_split=2, min_weight_fraction_leaf=0.0,
n_estimators=342, n_iter_no_change=None,
presort='auto', random_state=2,
subsample=0.6844206624548879, tol=0.0001,
validation_fraction=0.1, verbose=0,
import tpot
pipeline_optimizer = tpot.TPOTClassifier(generations=5, #number of iterations to run the training
population_size=20, #number of individuals to train
cv=5) #number of folds in StratifiedKFold
pipeline_optimizer.fit(X_train, y_train) #fit the pipeline optimizer - can take a long time
print(pipeline_optimizer.score(X_test, y_test)) #print scoring for the pipeline
pipeline_optimizer.export('tpot_exported_pipeline.py') #export the pipeline - in Python code!
import autosklearn as ask
#ask.regression.AutoSklearnRegressor() for regression tasks
model = ask.classification.AutoSklearnClassifier(ensemble_size=10, #size of the end ensemble (minimum is 1)
time_left_for_this_task=120, #the number of seconds the process runs for
per_run_time_limit=30) #maximum seconds allocated per model
model.fit(X_train, y_train) #begin fitting the search model
print(model.sprint_statistics()) #print statistics for the search
y_predictions = model.predict(X_test) #get predictions from the model
model = keras.Model(inputs=[image_input, numerical_input, text_input],
outputs=[outputs_1, outputs_2], name='complex model')
concat_1 = concatenate([image_text_3, numerical_2])
concat_2 = Dense(16, activation='relu')(concat_1)
outputs_1 = Dense(1, activation='linear', name='continuous')(concat_2) #continuous variable output
outputs_2 = Dense(3, activation='softmax', name='categorical')(concat_2) #categorical variable output
from keras.layers import Embedding, LSTM
text_1 = Embedding(10_000, 64)(text_input)
text_2 = LSTM(128)(text_1)
from keras.layers import Dense
numerical_1 = Dense(16, activation='relu')(numerical_input)
numerical_2 = Dense(8, activation='relu')(numerical_1)
from keras.layers import Conv2D, MaxPooling2D, Flatten
image_1 = Conv2D(32, kernel_size=(3, 3), activation="relu")(image_input) #convolution
image_2 = MaxPooling2D(pool_size=(2, 2))(image_1) #max pooling
image_3 = Conv2D(64, kernel_size=(3, 3), activation="relu")(image_2) #convolution
image_4 = MaxPooling2D(pool_size=(2, 2))(image_3) #max pooling
image_5 = Flatten()(image_4) #flatten