Skip to content

Instantly share code, notes, and snippets.

@shantanuo
Created February 10, 2019 07:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shantanuo/4e35baba144ee658e4dd4d1f87e19f3a to your computer and use it in GitHub Desktop.
Save shantanuo/4e35baba144ee658e4dd4d1f87e19f3a to your computer and use it in GitHub Desktop.
tensorflow code
# https://towardsdatascience.com/transfer-learning-using-elmo-embedding-c4a7e415103c
import pandas as pd
import numpy as np
import re
import tensorflow_hub as hub
import tensorflow as tf
import keras
from tensorflow.python.keras.layers import Input, Dense, Lambda
from keras.models import Model
#!mkdir module/
#!mkdir module/module_elmo2
#!curl -L "https://tfhub.dev/google/elmo/2?tf-hub-format=compressed" | tar -zxvC module/module_elmo2
#!unzip first-gop-debate-twitter-sentiment.zip
elmo = hub.Module("module/module_elmo2/", trainable=False)
embeddings = elmo(
["the cat is on the mat", "what are you doing in evening"],
signature="default",
as_dict=True,
)["elmo"]
with tf.Session() as session:
session.run([tf.global_variables_initializer(), tf.tables_initializer()])
message_embeddings = session.run(embeddings)
elmo = hub.Module("module/module_elmo2/", trainable=False)
tokens_input = [
["the", "cat", "is", "on", "the", "mat"],
["what", "are", "you", "doing", "in", "evening"],
]
tokens_length = [6, 5]
embeddings = elmo(
inputs={"tokens": tokens_input, "sequence_len": tokens_length},
signature="tokens",
as_dict=True,
)["elmo"]
with tf.Session() as session:
session.run([tf.global_variables_initializer(), tf.tables_initializer()])
message_embeddings = session.run(embeddings)
def embed_elmo2(module):
with tf.Graph().as_default():
sentences = tf.placeholder(tf.string)
embed = hub.Module(module)
embeddings = embed(sentences)
session = tf.train.MonitoredSession()
return lambda x: session.run(embeddings, {sentences: x})
embed_fn = embed_elmo2("module/module_elmo2")
embed_fn(["i am sambit"]).shape
df = pd.read_csv("Sentiment.csv", encoding="latin")
df = df[df["sentiment"] != "Neutral"]
df.loc[df["sentiment"] == "Negative", "sentiment"] = 0
df.loc[df["sentiment"] == "Positive", "sentiment"] = 1
def cleanText(text):
text = text.strip().replace("\n", " ").replace("\r", " ")
# text = replace_contraction(text)
# text = replace_links(text, "link")
# text = remove_numbers(text)
text = re.sub(r'[,!@#$%^&*)(|/><";:.?\'\\}{]', "", text)
text = text.lower()
return text
X = np.array(df["text"].apply(cleanText))
y = np.array(df["sentiment"])
embed = hub.Module("module/module_elmo2")
def ELMoEmbedding(x):
return embed(
tf.squeeze(tf.cast(x, tf.string)), signature="default", as_dict=True
)["default"]
def build_model():
input_text = Input(shape=(1,), dtype="string")
embedding = Lambda(ELMoEmbedding, output_shape=(1024,))(input_text)
dense = Dense(
256, activation="relu", kernel_regularizer=keras.regularizers.l2(0.001)
)(embedding)
pred = Dense(1, activation="sigmoid")(dense)
model = Model(inputs=[input_text], outputs=pred)
model.compile(
loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"]
)
return model
model_elmo = build_model()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment