Skip to content

Instantly share code, notes, and snippets.

View michelkana's full-sized avatar

Michel Kana michelkana

View GitHub Profile
@michelkana
michelkana / senv_setup.py
Created July 14, 2019 19:57
NLP text representation - env setup
# install gensim
# pip install –-upgrade gensim
import numpy as np
import gensim
import matplotlib.pyplot as plt
plt.style.use('ggplot')
from sklearn.decomposition import PCA
from gensim.test.utils import datapath, get_tmpfile
from gensim.models import KeyedVectors
from gensim.scripts.glove2word2vec import glove2word2vec
# get word2vec embeddings
words = ['russia', 'moscow', 'france', 'paris']
word_vectors = np.array([model[w] for w in words])
# pca transformation
twodim = PCA().fit_transform(word_vectors)[:,:2]
# t-sne transformation
twodim_tsne = TSNE().fit_transform(word_vectors)[:,:2]
# pca plot
plt.figure(figsize=(3,3))
plt.scatter(twodim[:,0], twodim[:,1], edgecolors='k', c='r')
# function to compute and display analogies between a group of word-pairs.
def plot_analogy(word_groups, model, func, colors, title, ax=None):
if ax==None:
fig, ax = plt.subplots(1,1,figsize=(5,5))
for i, words in enumerate(word_groups):
analogical_word = model.most_similar(positive=[words[0], words[1]],
negative=[words[2]],
topn=1)[0][0]
words.append(analogical_word)
word_vectors = np.array([model[w] for w in words])
import numpy as np
import matplotlib.pyplot as plt
# Let's create a sample line
x = np.sort(np.random.uniform(-2.0, 3.0, 100))
y = 2*x + 1
# Let's add normal noise to the line
y = y + np.random.normal(0, 1, 100)
import pandas as pd
import matplotlib.pyplot as plt
# plot one hum data
one_hump_df = pd.read_csv('https://raw.githubusercontent.com/michelkana/medium/master/data/one_hump_df.csv')
# plot data
plt.scatter(one_hump_df.x, one_hump_df.y, lw=0.5)
plt.xlabel('$x$', fontsize=10)
plt.ylabel('$y$', fontsize=10)
# affine transformation of an input x
# using weight w and biais b
def affine(x, w, b):
return w * x + b
# sigmoidal activation
# on output from affine transformation
def sigmoid(z):
return 1.0 / (1.0 + np.exp(-z))
import numpy as np
import matplotlib.pyplot as plt
# generate binary data
x = np.sort(np.random.uniform(-2.0, 3.0, 100))
y = 1.0 / (1.0 + np.exp(-5*x))
y = y + np.random.normal(0, 0.5, 100)
y[y < 0.5] = 0
y[y >= 0.5] = 1
plt.scatter(x,y)
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import accuracy_score
model = Sequential()
model.add(Dense(1, activation = 'sigmoid', input_dim = 1))
model.compile(optimizer='sgd', loss='binary_crossentropy', metrics = ['acc'])
history = model.fit(x, y, batch_size=1, epochs=100, shuffle=True, verbose=0)
import numpy as np
from math import cos, sin, pi
import matplotlib.pyplot as plt
# function for creating points randomly distributed around a given center
def get_cloud(x0, y0, nb):
radius = np.random.normal(0, 1, nb)
angle = np.random.uniform(0, 2*pi, nb)
x = np.array([r*cos(angle[i]) for i, r in enumerate(radius)])
y = np.array([r*sin(angle[i]) for i, r in enumerate(radius)])
import keras
from keras.models import Sequential
from keras.layers import Dense, Input
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# turn the x,y coordinates into a predictors matrix
X = np.array([x, y]).T
# turn the labels into 1-hot encodings