Skip to content

Instantly share code, notes, and snippets.

View michelkana's full-sized avatar

Michel Kana michelkana

View GitHub Profile
@michelkana
michelkana / senv_setup.py
Created July 14, 2019 19:57
NLP text representation - env setup
# install gensim
# pip install –-upgrade gensim
import numpy as np
import gensim
import matplotlib.pyplot as plt
plt.style.use('ggplot')
from sklearn.decomposition import PCA
from gensim.test.utils import datapath, get_tmpfile
from gensim.models import KeyedVectors
from gensim.scripts.glove2word2vec import glove2word2vec
# load GloVe files
glove_file = datapath('glove.6B\\glove.6B.100d.txt')
glove_file_300 = datapath('glove.6B\\glove.6B.300d.txt')
# convert from GloVe to Word2vec
word2vec_glove_file = get_tmpfile("glove.6B.100d.word2vec.txt")
glove2word2vec(glove_file, word2vec_glove_file)
word2vec_glove_file_300 = get_tmpfile("glove.6B.300d.word2vec.txt")
glove2word2vec(glove_file_300, word2vec_glove_file_300)
# get word2vec embeddings
words = ['russia', 'moscow', 'france', 'paris']
word_vectors = np.array([model[w] for w in words])
# pca transformation
twodim = PCA().fit_transform(word_vectors)[:,:2]
# t-sne transformation
twodim_tsne = TSNE().fit_transform(word_vectors)[:,:2]
# pca plot
plt.figure(figsize=(3,3))
plt.scatter(twodim[:,0], twodim[:,1], edgecolors='k', c='r')
# function to compute and display analogies between a group of word-pairs.
def plot_analogy(word_groups, model, func, colors, title, ax=None):
if ax==None:
fig, ax = plt.subplots(1,1,figsize=(5,5))
for i, words in enumerate(word_groups):
analogical_word = model.most_similar(positive=[words[0], words[1]],
negative=[words[2]],
topn=1)[0][0]
words.append(analogical_word)
word_vectors = np.array([model[w] for w in words])
import numpy as np
import matplotlib.pyplot as plt
# Let's create a sample line
x = np.sort(np.random.uniform(-2.0, 3.0, 100))
y = 2*x + 1
# Let's add normal noise to the line
y = y + np.random.normal(0, 1, 100)
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from sklearn.metrics import r2_score
# neural network with 1 neuron and a linear activation
model = Sequential()
model.add(Dense(1, activation = 'linear', input_dim = 1))
import pandas as pd
import matplotlib.pyplot as plt
# plot one hum data
one_hump_df = pd.read_csv('https://raw.githubusercontent.com/michelkana/medium/master/data/one_hump_df.csv')
# plot data
plt.scatter(one_hump_df.x, one_hump_df.y, lw=0.5)
plt.xlabel('$x$', fontsize=10)
plt.ylabel('$y$', fontsize=10)
# affine transformation of an input x
# using weight w and biais b
def affine(x, w, b):
return w * x + b
# sigmoidal activation
# on output from affine transformation
def sigmoid(z):
return 1.0 / (1.0 + np.exp(-z))
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from sklearn.metrics import r2_score
# network with two layers
# first layer has 2 neurons
# second layer has 1 neuron
model = Sequential()
model.add(Dense(2, activation = 'sigmoid', input_dim = 1))
import numpy as np
import matplotlib.pyplot as plt
# generate binary data
x = np.sort(np.random.uniform(-2.0, 3.0, 100))
y = 1.0 / (1.0 + np.exp(-5*x))
y = y + np.random.normal(0, 0.5, 100)
y[y < 0.5] = 0
y[y >= 0.5] = 1
plt.scatter(x,y)