Michel Kana michelkana

## senv_setup.py
# install gensim
# pip install –-upgrade gensim
import numpy as np
import gensim
import matplotlib.pyplot as plt
plt.style.use('ggplot')
from sklearn.decomposition import PCA
from gensim.test.utils import datapath, get_tmpfile
from gensim.models import KeyedVectors
from gensim.scripts.glove2word2vec import glove2word2vec

## load_glove.py
# load GloVe files
glove_file = datapath('glove.6B\\glove.6B.100d.txt')
glove_file_300 = datapath('glove.6B\\glove.6B.300d.txt')

# convert from GloVe to Word2vec
word2vec_glove_file = get_tmpfile("glove.6B.100d.word2vec.txt")
glove2word2vec(glove_file, word2vec_glove_file)
word2vec_glove_file_300 = get_tmpfile("glove.6B.300d.word2vec.txt")
glove2word2vec(glove_file_300, word2vec_glove_file_300)

## plot_glove.py
# get word2vec embeddings
words = ['russia', 'moscow', 'france', 'paris']
word_vectors = np.array([model[w] for w in words])
# pca transformation
twodim = PCA().fit_transform(word_vectors)[:,:2]
# t-sne transformation
twodim_tsne = TSNE().fit_transform(word_vectors)[:,:2]
# pca plot
plt.figure(figsize=(3,3))
plt.scatter(twodim[:,0], twodim[:,1], edgecolors='k', c='r')

## analogy_glove.py
# function to compute and display analogies between a group of word-pairs.
def plot_analogy(word_groups, model, func, colors, title, ax=None):
    if ax==None:
        fig, ax = plt.subplots(1,1,figsize=(5,5))
    for i, words in enumerate(word_groups):
        analogical_word = model.most_similar(positive=[words[0], words[1]],
                                             negative=[words[2]],
                                             topn=1)[0][0]
        words.append(analogical_word)
        word_vectors = np.array([model[w] for w in words])

## sample_line.py
import numpy as np
import matplotlib.pyplot as plt

# Let's create a sample line
x = np.sort(np.random.uniform(-2.0, 3.0, 100))
y = 2*x + 1

# Let's add normal noise to the line
y = y + np.random.normal(0, 1, 100)

## linear_regression_keras.py
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from sklearn.metrics import r2_score

# neural network with 1 neuron and a linear activation
model = Sequential()
model.add(Dense(1, activation = 'linear', input_dim = 1))

## one_hump_data.py
import pandas as pd
import matplotlib.pyplot as plt

# plot one hum data
one_hump_df = pd.read_csv('https://raw.githubusercontent.com/michelkana/medium/master/data/one_hump_df.csv')

# plot data
plt.scatter(one_hump_df.x, one_hump_df.y, lw=0.5)
plt.xlabel('$x$', fontsize=10)
plt.ylabel('$y$', fontsize=10)

## one_hump_ann.py
# affine transformation of an input x
# using weight w and biais b
def affine(x, w, b):
    return w * x + b

# sigmoidal activation
# on output from affine transformation
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))


## one_hump_keras.py
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from sklearn.metrics import r2_score

# network with two layers
# first layer has 2 neurons
# second layer has 1 neuron
model = Sequential()
model.add(Dense(2, activation = 'sigmoid', input_dim = 1))

## classification_data.py
import numpy as np
import matplotlib.pyplot as plt

# generate binary data
x = np.sort(np.random.uniform(-2.0, 3.0, 100))
y = 1.0 / (1.0 + np.exp(-5*x))
y = y + np.random.normal(0, 0.5, 100)
y[y < 0.5] = 0
y[y >= 0.5] = 1
plt.scatter(x,y)
	# install gensim
	# pip install –-upgrade gensim
	import numpy as np
	import gensim
	import matplotlib.pyplot as plt
	plt.style.use('ggplot')
	from sklearn.decomposition import PCA
	from gensim.test.utils import datapath, get_tmpfile
	from gensim.models import KeyedVectors
	from gensim.scripts.glove2word2vec import glove2word2vec
	# load GloVe files
	glove_file = datapath('glove.6B\\glove.6B.100d.txt')
	glove_file_300 = datapath('glove.6B\\glove.6B.300d.txt')

	# convert from GloVe to Word2vec
	word2vec_glove_file = get_tmpfile("glove.6B.100d.word2vec.txt")
	glove2word2vec(glove_file, word2vec_glove_file)
	word2vec_glove_file_300 = get_tmpfile("glove.6B.300d.word2vec.txt")
	glove2word2vec(glove_file_300, word2vec_glove_file_300)
	# get word2vec embeddings
	words = ['russia', 'moscow', 'france', 'paris']
	word_vectors = np.array([model[w] for w in words])
	# pca transformation
	twodim = PCA().fit_transform(word_vectors)[:,:2]
	# t-sne transformation
	twodim_tsne = TSNE().fit_transform(word_vectors)[:,:2]
	# pca plot
	plt.figure(figsize=(3,3))
	plt.scatter(twodim[:,0], twodim[:,1], edgecolors='k', c='r')
	# function to compute and display analogies between a group of word-pairs.
	def plot_analogy(word_groups, model, func, colors, title, ax=None):
	if ax==None:
	fig, ax = plt.subplots(1,1,figsize=(5,5))
	for i, words in enumerate(word_groups):
	analogical_word = model.most_similar(positive=[words[0], words[1]],
	negative=[words[2]],
	topn=1)[0][0]
	words.append(analogical_word)
	word_vectors = np.array([model[w] for w in words])
	import numpy as np
	import matplotlib.pyplot as plt

	# Let's create a sample line
	x = np.sort(np.random.uniform(-2.0, 3.0, 100))
	y = 2*x + 1

	# Let's add normal noise to the line
	y = y + np.random.normal(0, 1, 100)
	import keras
	from keras.models import Sequential
	from keras.layers import Dense
	from keras.optimizers import SGD
	from sklearn.metrics import r2_score

	# neural network with 1 neuron and a linear activation
	model = Sequential()
	model.add(Dense(1, activation = 'linear', input_dim = 1))
	import pandas as pd
	import matplotlib.pyplot as plt

	# plot one hum data
	one_hump_df = pd.read_csv('https://raw.githubusercontent.com/michelkana/medium/master/data/one_hump_df.csv')

	# plot data
	plt.scatter(one_hump_df.x, one_hump_df.y, lw=0.5)
	plt.xlabel('$x$', fontsize=10)
	plt.ylabel('$y$', fontsize=10)
	# affine transformation of an input x
	# using weight w and biais b
	def affine(x, w, b):
	return w * x + b

	# sigmoidal activation
	# on output from affine transformation
	def sigmoid(z):
	return 1.0 / (1.0 + np.exp(-z))
	import numpy as np
	import matplotlib.pyplot as plt

	# generate binary data
	x = np.sort(np.random.uniform(-2.0, 3.0, 100))
	y = 1.0 / (1.0 + np.exp(-5*x))
	y = y + np.random.normal(0, 0.5, 100)
	y[y < 0.5] = 0
	y[y >= 0.5] = 1
	plt.scatter(x,y)