Skip to content

Instantly share code, notes, and snippets.

View Tony607's full-sized avatar

Chengwei Zhang Tony607

View GitHub Profile
@Tony607
Tony607 / .py
Created February 23, 2018 09:26
source: How to generate realistic yelp restaurant reviews with Keras | DLology
# Read thow two CSV files to pandas dataframes
df_business=pd.read_csv('../dataset/business.csv')
df_review=pd.read_csv('../dataset/review.csv')
# Filter 'Restaurants' businesses
restaurants = df_business[df_business['categories'].str.contains('Restaurants')]
# Filter 5-stars reviews
five_star=df_review[df_review['stars']==5]
# merge the reviews with restaurants by key 'business_id'
# This keep only 5-star restaurants reviews
combo=pd.merge(restaurants_clean, five_star, on='business_id')
@Tony607
Tony607 / .py
Created February 23, 2018 09:30
source: How to generate realistic yelp restaurant reviews with Keras | DLology
# remove new line characters
rnn_fivestar_reviews_only=rnn_fivestar_reviews_only.replace({r'\n+': ''}, regex=True)
# remove dupliated reviews
final=rnn_fivestar_reviews_only.drop_duplicates()
@Tony607
Tony607 / model.py
Created February 23, 2018 09:36
source: How to generate realistic yelp restaurant reviews with Keras | DLology
import keras
from keras import layers
model = keras.models.Sequential()
model.add(layers.LSTM(1024, input_shape=(60, 95),return_sequences=True))
model.add(layers.LSTM(1024, input_shape=(60, 95)))
model.add(layers.Dense(95, activation='softmax'))
@Tony607
Tony607 / char_indices.py
Created February 23, 2018 09:38
source: How to generate realistic yelp restaurant reviews with Keras | DLology
# List of unique characters in the corpus
chars = sorted(list(set(text)))
print('Unique characters:', len(chars))
# Dictionary mapping unique characters to their index in `chars`
char_indices = dict((char, chars.index(char)) for char in chars)
@Tony607
Tony607 / getDataFromChunk.py
Created February 23, 2018 09:38
source: How to generate realistic yelp restaurant reviews with Keras | DLology
def getDataFromChunk(txtChunk, maxlen=60, step=1):
sentences = []
next_chars = []
for i in range(0, len(txtChunk) - maxlen, step):
sentences.append(txtChunk[i : i + maxlen])
next_chars.append(txtChunk[i + maxlen])
print('nb sequences:', len(sentences))
print('Vectorization...')
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
@Tony607
Tony607 / callbacks_list.py
Created February 23, 2018 09:39
source: How to generate realistic yelp restaurant reviews with Keras | DLology
# this saves the weights everytime they improve so you can let it train. Also learning rate decay
filepath="Feb-22-all-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.5,
patience=1, min_lr=0.00001)
callbacks_list = [checkpoint, reduce_lr]
@Tony607
Tony607 / train.py
Created February 23, 2018 09:40
source: How to generate realistic yelp restaurant reviews with Keras | DLology
for iteration in range(1, 20):
print('Iteration', iteration)
with open("../dataset/short_reviews_shuffle.txt") as f:
for chunk in iter(lambda: f.read(90000), ""):
X, y = getDataFromChunk(chunk)
model.fit(X, y, batch_size=128, epochs=1, callbacks=callbacks_list)
@Tony607
Tony607 / sample.py
Created February 23, 2018 09:44
source: How to generate realistic yelp restaurant reviews with Keras | DLology
def sample(preds, temperature=1.0):
'''
Generate some randomness with the given preds
which is a list of numbers, if the temperature
is very small, it will always pick the index
with highest pred value
'''
preds = np.asarray(preds).astype('float64')
preds = np.log(preds) / temperature
exp_preds = np.exp(preds)
@Tony607
Tony607 / generate.py
Created February 23, 2018 09:44
source: How to generate realistic yelp restaurant reviews with Keras | DLology
# We generate 300 characters
for i in range(300):
sampled = np.zeros((1, maxlen, len(chars)))
# Turn each char to char index.
for t, char in enumerate(generated_text):
sampled[0, t, char_indices[char]] = 1.
# Predict next char probabilities
preds = model.predict(sampled, verbose=0)[0]
# Add some randomness by sampling given probabilities.
next_index = sample(preds, temperature)
@Tony607
Tony607 / model.py
Created March 2, 2018 09:02
How to do Real Time Trigger Word Detection with Keras | DLology
def model(input_shape):
"""
Function creating the model's graph in Keras.
Argument:
input_shape -- shape of the model's input data (using Keras conventions)
Returns:
model -- Keras model instance
"""