This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install gensim | |
# pip install –-upgrade gensim | |
import numpy as np | |
import gensim | |
import matplotlib.pyplot as plt | |
plt.style.use('ggplot') | |
from sklearn.decomposition import PCA | |
from gensim.test.utils import datapath, get_tmpfile | |
from gensim.models import KeyedVectors | |
from gensim.scripts.glove2word2vec import glove2word2vec |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load GloVe files | |
glove_file = datapath('glove.6B\\glove.6B.100d.txt') | |
glove_file_300 = datapath('glove.6B\\glove.6B.300d.txt') | |
# convert from GloVe to Word2vec | |
word2vec_glove_file = get_tmpfile("glove.6B.100d.word2vec.txt") | |
glove2word2vec(glove_file, word2vec_glove_file) | |
word2vec_glove_file_300 = get_tmpfile("glove.6B.300d.word2vec.txt") | |
glove2word2vec(glove_file_300, word2vec_glove_file_300) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# get word2vec embeddings | |
words = ['russia', 'moscow', 'france', 'paris'] | |
word_vectors = np.array([model[w] for w in words]) | |
# pca transformation | |
twodim = PCA().fit_transform(word_vectors)[:,:2] | |
# t-sne transformation | |
twodim_tsne = TSNE().fit_transform(word_vectors)[:,:2] | |
# pca plot | |
plt.figure(figsize=(3,3)) | |
plt.scatter(twodim[:,0], twodim[:,1], edgecolors='k', c='r') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# function to compute and display analogies between a group of word-pairs. | |
def plot_analogy(word_groups, model, func, colors, title, ax=None): | |
if ax==None: | |
fig, ax = plt.subplots(1,1,figsize=(5,5)) | |
for i, words in enumerate(word_groups): | |
analogical_word = model.most_similar(positive=[words[0], words[1]], | |
negative=[words[2]], | |
topn=1)[0][0] | |
words.append(analogical_word) | |
word_vectors = np.array([model[w] for w in words]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
# Let's create a sample line | |
x = np.sort(np.random.uniform(-2.0, 3.0, 100)) | |
y = 2*x + 1 | |
# Let's add normal noise to the line | |
y = y + np.random.normal(0, 1, 100) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import keras | |
from keras.models import Sequential | |
from keras.layers import Dense | |
from keras.optimizers import SGD | |
from sklearn.metrics import r2_score | |
# neural network with 1 neuron and a linear activation | |
model = Sequential() | |
model.add(Dense(1, activation = 'linear', input_dim = 1)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
# plot one hum data | |
one_hump_df = pd.read_csv('https://raw.githubusercontent.com/michelkana/medium/master/data/one_hump_df.csv') | |
# plot data | |
plt.scatter(one_hump_df.x, one_hump_df.y, lw=0.5) | |
plt.xlabel('$x$', fontsize=10) | |
plt.ylabel('$y$', fontsize=10) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# affine transformation of an input x | |
# using weight w and biais b | |
def affine(x, w, b): | |
return w * x + b | |
# sigmoidal activation | |
# on output from affine transformation | |
def sigmoid(z): | |
return 1.0 / (1.0 + np.exp(-z)) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.models import Sequential | |
from keras.layers import Dense | |
from keras.optimizers import SGD | |
from sklearn.metrics import r2_score | |
# network with two layers | |
# first layer has 2 neurons | |
# second layer has 1 neuron | |
model = Sequential() | |
model.add(Dense(2, activation = 'sigmoid', input_dim = 1)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
# generate binary data | |
x = np.sort(np.random.uniform(-2.0, 3.0, 100)) | |
y = 1.0 / (1.0 + np.exp(-5*x)) | |
y = y + np.random.normal(0, 0.5, 100) | |
y[y < 0.5] = 0 | |
y[y >= 0.5] = 1 | |
plt.scatter(x,y) |
OlderNewer