Skip to content

Instantly share code, notes, and snippets.

View michelkana's full-sized avatar

Michel Kana michelkana

View GitHub Profile
# Use train_test_split to split our data into train and validation sets for training
train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(input_ids, labels,
random_state=2018, test_size=0.1)
train_masks, validation_masks, _, _ = train_test_split(attention_masks, input_ids,
random_state=2018, test_size=0.1)
# Convert all of our data into torch tensors, the required datatype for our model
train_inputs = torch.tensor(train_inputs)
validation_inputs = torch.tensor(validation_inputs)
train_labels = torch.tensor(train_labels)
# Load BertForSequenceClassification, the pretrained BERT model with a single linear classification layer on top.
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=nb_labels)
model.cuda()
# BERT model summary
BertForSequenceClassification(
(bert): BertModel(
(embeddings): BertEmbeddings(
(word_embeddings): Embedding(30522, 768, padding_idx=0)
# packages needed
# !pip install nltk
# !pip install stanfordnlp
# !pip install --upgrade bleu
import nltk
from nltk.tokenize import sent_tokenize
import re
import stanfordnlp
from bleu import list_bleu
# pip install scipy==1.1.0
# pip install keras-vis
from vis.visualization import visualize_saliency
def plot_saliency(img_idx=None):
img_idx = plot_features_map(img_idx)
grads = visualize_saliency(cnn_saliency, -1, filter_indices=ytest[img_idx][0],
seed_input=x_test[img_idx], backprop_modifier=None,
grad_modifier="absolute")
predicted_label = labels[np.argmax(cnn.predict(x_test[img_idx].reshape(1,32,32,3)),1)[0]]
# prediction
y_pred_without_dropout = model_without_dropout.predict(x_test)
y_pred_with_dropout = model_with_dropout.predict(x_test)
# plotting
fig, ax = plt.subplots(1,1,figsize=(10,5))
ax.scatter(x_train, y_train, s=10, label='train data')
ax.plot(x_test, x_test, ls='--', label='test data', color='green')
ax.plot(x_test, y_pred_without_dropout, label='predicted ANN - R2 {:.2f}'.format(r2_score(x_test, y_pred_without_dropout)), color='red')
ax.plot(x_test, y_pred_with_dropout, label='predicted ANN Dropout - R2 {:.2f}'.format(r2_score(x_test, y_pred_with_dropout)), color='black')
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import r2_score
# importing R functions
#!pip install rpy2
import rpy2.robjects as robjects
r_predict = robjects.r["predict"]
r_lm = robjects.r["lm"]
# prepare train data
df_train_summary = df_train_2.groupby(['date']) \
.agg({'price': np.mean}).reset_index()
min_date = df_train_summary.date.min()
df_train_summary.date = df_train_summary.date - min_date
df_train_summary.date = df_train_summary.date.dt.days
df_train_summary.sample(frac=.01)
# prepare test data
df_test_2 = convert_date(df_test)
import pandas as pd
import numpy as np
# load the data
df_train = pd.read_csv('calendar_train.csv')
df_test = pd.read_csv('calendar_test.csv')
# convert dates
def convert_date(df):
df = df[~ df.price.isnull()]
# load GloVe files
glove_file = datapath('glove.6B\\glove.6B.100d.txt')
glove_file_300 = datapath('glove.6B\\glove.6B.300d.txt')
# convert from GloVe to Word2vec
word2vec_glove_file = get_tmpfile("glove.6B.100d.word2vec.txt")
glove2word2vec(glove_file, word2vec_glove_file)
word2vec_glove_file_300 = get_tmpfile("glove.6B.300d.word2vec.txt")
glove2word2vec(glove_file_300, word2vec_glove_file_300)
from keras.datasets import cifar10
from keras.utils import np_utils
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras import regularizers
from keras.layers import BatchNormalization
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator