Michel Kana michelkana

## bert_atis_classifier_data_tensors.py
# Use train_test_split to split our data into train and validation sets for training
train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(input_ids, labels,
                                                            random_state=2018, test_size=0.1)
train_masks, validation_masks, _, _ = train_test_split(attention_masks, input_ids,
                                             random_state=2018, test_size=0.1)

# Convert all of our data into torch tensors, the required datatype for our model
train_inputs = torch.tensor(train_inputs)
validation_inputs = torch.tensor(validation_inputs)
train_labels = torch.tensor(train_labels)

## bert_atis_classifier_model.py
# Load BertForSequenceClassification, the pretrained BERT model with a single linear classification layer on top.

model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=nb_labels)
model.cuda()

# BERT model summary
BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)

## truecasing_pos.py
# packages needed
# !pip install nltk
# !pip install stanfordnlp
# !pip install --upgrade bleu
import nltk
from nltk.tokenize import sent_tokenize
import re
import stanfordnlp
from bleu import list_bleu

## saliency_map.py
# pip install scipy==1.1.0
# pip install keras-vis
from vis.visualization import visualize_saliency

def plot_saliency(img_idx=None):
    img_idx = plot_features_map(img_idx)
    grads = visualize_saliency(cnn_saliency, -1, filter_indices=ytest[img_idx][0],
                               seed_input=x_test[img_idx], backprop_modifier=None,
                               grad_modifier="absolute")
    predicted_label = labels[np.argmax(cnn.predict(x_test[img_idx].reshape(1,32,32,3)),1)[0]]

## uncertainty_prediction.py
# prediction
y_pred_without_dropout = model_without_dropout.predict(x_test)
y_pred_with_dropout = model_with_dropout.predict(x_test)

# plotting
fig, ax = plt.subplots(1,1,figsize=(10,5))
ax.scatter(x_train, y_train, s=10, label='train data')
ax.plot(x_test, x_test, ls='--', label='test data', color='green')
ax.plot(x_test, y_pred_without_dropout, label='predicted ANN - R2 {:.2f}'.format(r2_score(x_test, y_pred_without_dropout)), color='red')
ax.plot(x_test, y_pred_with_dropout, label='predicted ANN Dropout - R2 {:.2f}'.format(r2_score(x_test, y_pred_with_dropout)), color='black')

## airbnb_polyreg_R.py
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import r2_score

# importing R functions
#!pip install rpy2
import rpy2.robjects as robjects
r_predict = robjects.r["predict"]
r_lm = robjects.r["lm"]

## airbnb_polyreg_data.py
# prepare train data
df_train_summary = df_train_2.groupby(['date']) \
                    .agg({'price': np.mean}).reset_index()
min_date = df_train_summary.date.min()
df_train_summary.date = df_train_summary.date - min_date
df_train_summary.date = df_train_summary.date.dt.days
df_train_summary.sample(frac=.01)

# prepare test data
df_test_2 = convert_date(df_test)

## airbnb_price_per_month.py
import pandas as pd
import numpy as np

# load the data
df_train = pd.read_csv('calendar_train.csv')
df_test = pd.read_csv('calendar_test.csv')

# convert dates
def convert_date(df):
  df = df[~ df.price.isnull()]

## load_glove.py
# load GloVe files
glove_file = datapath('glove.6B\\glove.6B.100d.txt')
glove_file_300 = datapath('glove.6B\\glove.6B.300d.txt')

# convert from GloVe to Word2vec
word2vec_glove_file = get_tmpfile("glove.6B.100d.word2vec.txt")
glove2word2vec(glove_file, word2vec_glove_file)
word2vec_glove_file_300 = get_tmpfile("glove.6B.300d.word2vec.txt")
glove2word2vec(glove_file_300, word2vec_glove_file_300)

## saliency_maps_training.py
from keras.datasets import cifar10
from keras.utils import np_utils
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras import regularizers
from keras.layers import BatchNormalization
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
	# Use train_test_split to split our data into train and validation sets for training
	train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(input_ids, labels,
	random_state=2018, test_size=0.1)
	train_masks, validation_masks, _, _ = train_test_split(attention_masks, input_ids,
	random_state=2018, test_size=0.1)

	# Convert all of our data into torch tensors, the required datatype for our model
	train_inputs = torch.tensor(train_inputs)
	validation_inputs = torch.tensor(validation_inputs)
	train_labels = torch.tensor(train_labels)
	# Load BertForSequenceClassification, the pretrained BERT model with a single linear classification layer on top.

	model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=nb_labels)
	model.cuda()

	# BERT model summary
	BertForSequenceClassification(
	(bert): BertModel(
	(embeddings): BertEmbeddings(
	(word_embeddings): Embedding(30522, 768, padding_idx=0)
	# packages needed
	# !pip install nltk
	# !pip install stanfordnlp
	# !pip install --upgrade bleu
	import nltk
	from nltk.tokenize import sent_tokenize
	import re
	import stanfordnlp
	from bleu import list_bleu
	# pip install scipy==1.1.0
	# pip install keras-vis
	from vis.visualization import visualize_saliency

	def plot_saliency(img_idx=None):
	img_idx = plot_features_map(img_idx)
	grads = visualize_saliency(cnn_saliency, -1, filter_indices=ytest[img_idx][0],
	seed_input=x_test[img_idx], backprop_modifier=None,
	grad_modifier="absolute")
	predicted_label = labels[np.argmax(cnn.predict(x_test[img_idx].reshape(1,32,32,3)),1)[0]]
	# prediction
	y_pred_without_dropout = model_without_dropout.predict(x_test)
	y_pred_with_dropout = model_with_dropout.predict(x_test)

	# plotting
	fig, ax = plt.subplots(1,1,figsize=(10,5))
	ax.scatter(x_train, y_train, s=10, label='train data')
	ax.plot(x_test, x_test, ls='--', label='test data', color='green')
	ax.plot(x_test, y_pred_without_dropout, label='predicted ANN - R2 {:.2f}'.format(r2_score(x_test, y_pred_without_dropout)), color='red')
	ax.plot(x_test, y_pred_with_dropout, label='predicted ANN Dropout - R2 {:.2f}'.format(r2_score(x_test, y_pred_with_dropout)), color='black')
	import matplotlib.pyplot as plt
	%matplotlib inline
	from sklearn.metrics import r2_score

	# importing R functions
	#!pip install rpy2
	import rpy2.robjects as robjects
	r_predict = robjects.r["predict"]
	r_lm = robjects.r["lm"]
	# prepare train data
	df_train_summary = df_train_2.groupby(['date']) \
	.agg({'price': np.mean}).reset_index()
	min_date = df_train_summary.date.min()
	df_train_summary.date = df_train_summary.date - min_date
	df_train_summary.date = df_train_summary.date.dt.days
	df_train_summary.sample(frac=.01)

	# prepare test data
	df_test_2 = convert_date(df_test)
	import pandas as pd
	import numpy as np

	# load the data
	df_train = pd.read_csv('calendar_train.csv')
	df_test = pd.read_csv('calendar_test.csv')

	# convert dates
	def convert_date(df):
	df = df[~ df.price.isnull()]
	# load GloVe files
	glove_file = datapath('glove.6B\\glove.6B.100d.txt')
	glove_file_300 = datapath('glove.6B\\glove.6B.300d.txt')

	# convert from GloVe to Word2vec
	word2vec_glove_file = get_tmpfile("glove.6B.100d.word2vec.txt")
	glove2word2vec(glove_file, word2vec_glove_file)
	word2vec_glove_file_300 = get_tmpfile("glove.6B.300d.word2vec.txt")
	glove2word2vec(glove_file_300, word2vec_glove_file_300)
	from keras.datasets import cifar10
	from keras.utils import np_utils
	from keras.models import Sequential, Model
	from keras.layers import Dense, Dropout, Flatten
	from keras.layers.convolutional import Conv2D, MaxPooling2D
	from keras import regularizers
	from keras.layers import BatchNormalization
	from keras.optimizers import RMSprop
	from keras.preprocessing.image import ImageDataGenerator