Skip to content

Instantly share code, notes, and snippets.

View alinazhanguwo's full-sized avatar

Alina Zhang alinazhanguwo

View GitHub Profile
def train_classifier(X_train, y_train):
X_train, y_train — training text and sentiment
return: trained classifier
# Create and fit LogisticRegression wraped into OneVsRestClassifier.
model = OneVsRestClassifier(LogisticRegression(penalty='l2', C=1.0))
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import recall_score
def evaluation_scores(y_val, predicted):
print ("Accracy={}".format(accuracy_score(y_val, predicted)))
print ("F1_macro={}".format(f1_score(y_val, predicted, average='macro')))
max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
X = tokenizer.texts_to_sequences(X)
X = pad_sequences(X)
embed_dim = 128
lstm_out = 196
model = Sequential()
model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
# create train and test datasets
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.33, random_state = 42)
print("Trianing ", X_train.shape,Y_train.shape)
print("Testing ",X_test.shape,Y_test.shape)
batch_size = 32, Y_train, epochs = 20, batch_size=batch_size, verbose = 2)
# set up the parameters
n_init = 12
max_iter = 225
tol = 0.0001
random_state = 42
n_jobs = -1
n_clusters = 3
t0 =
print("========= Start training ... ")
def overallAccuracy(clusterDF, labelsDF):
countByCluster = pd.DataFrame(data=clusterDF['cluster'].value_counts())
countByCluster.reset_index(inplace=True, drop=False)
countByCluster.columns = ['cluster', 'clusterCount']
# print('countByCluster \n', countByCluster)
preds = pd.concat([labelsDF, clusterDF], axis=1)
preds.columns = ['trueLabel', 'cluster']
# print('preds \n', preds)
def overallAccuracy(clusterDF, labelsDF):
countByCluster = pd.DataFrame(data=clusterDF['cluster'].value_counts())
countByCluster.reset_index(inplace=True, drop=False)
countByCluster.columns = ['cluster', 'clusterCount']
# print('countByCluster \n', countByCluster)
preds = pd.concat([labelsDF, clusterDF], axis=1)
preds.columns = ['trueLabel', 'cluster']
# print('preds \n', preds)
# set up the parameters
n_init = 12
max_iter = 225
tol = 0.0001
random_state = 42
n_jobs = -1
t0 =
print("========= Start training ... ")
alinazhanguwo /
Last active October 15, 2020 18:39
Number of deaths and confirmed cases in top 10 states of America (2020-09-13)
fig1, (ax1, ax2) = plt.subplots(1,2, figsize=(12,8), sharey=False)
sns.barplot(x="Deaths", y="Province/State", data=bar_plot_df, label="num of Deaths", color="b", ax=ax1)
ax1.legend(ncol=2, loc="lower left", frameon=True)
ax1.set(xlim=(0, 33323), ylabel="", xlabel="Num of Deaths by states")
sns.despine(left=True, bottom=True,ax=ax1)