shantanuo/tf_learn.py

## tf_learn.py
# https://towardsdatascience.com/transfer-learning-using-elmo-embedding-c4a7e415103c
import pandas as pd
import numpy as np
import re

import tensorflow_hub as hub
import tensorflow as tf
import keras
from tensorflow.python.keras.layers import Input, Dense, Lambda

from keras.models import Model

#!mkdir module/
#!mkdir module/module_elmo2

#!curl -L "https://tfhub.dev/google/elmo/2?tf-hub-format=compressed" | tar -zxvC module/module_elmo2
#!unzip first-gop-debate-twitter-sentiment.zip

elmo = hub.Module("module/module_elmo2/", trainable=False)

embeddings = elmo(
    ["the cat is on the mat", "what are you doing in evening"],
    signature="default",
    as_dict=True,
)["elmo"]

with tf.Session() as session:
    session.run([tf.global_variables_initializer(), tf.tables_initializer()])
    message_embeddings = session.run(embeddings)

elmo = hub.Module("module/module_elmo2/", trainable=False)
tokens_input = [
    ["the", "cat", "is", "on", "the", "mat"],
    ["what", "are", "you", "doing", "in", "evening"],
]
tokens_length = [6, 5]
embeddings = elmo(
    inputs={"tokens": tokens_input, "sequence_len": tokens_length},
    signature="tokens",
    as_dict=True,
)["elmo"]

with tf.Session() as session:
    session.run([tf.global_variables_initializer(), tf.tables_initializer()])
    message_embeddings = session.run(embeddings)

def embed_elmo2(module):
    with tf.Graph().as_default():
        sentences = tf.placeholder(tf.string)
        embed = hub.Module(module)
        embeddings = embed(sentences)
        session = tf.train.MonitoredSession()
    return lambda x: session.run(embeddings, {sentences: x})


embed_fn = embed_elmo2("module/module_elmo2")
embed_fn(["i am sambit"]).shape

df = pd.read_csv("Sentiment.csv", encoding="latin")
df = df[df["sentiment"] != "Neutral"]
df.loc[df["sentiment"] == "Negative", "sentiment"] = 0
df.loc[df["sentiment"] == "Positive", "sentiment"] = 1

def cleanText(text):
    text = text.strip().replace("\n", " ").replace("\r", " ")
    #    text = replace_contraction(text)
    #    text = replace_links(text, "link")
    #    text = remove_numbers(text)
    text = re.sub(r'[,!@#$%^&*)(|/><";:.?\'\\}{]', "", text)
    text = text.lower()
    return text

X = np.array(df["text"].apply(cleanText))
y = np.array(df["sentiment"])


embed = hub.Module("module/module_elmo2")

def ELMoEmbedding(x):
    return embed(
        tf.squeeze(tf.cast(x, tf.string)), signature="default", as_dict=True
    )["default"]

def build_model():
    input_text = Input(shape=(1,), dtype="string")
    embedding = Lambda(ELMoEmbedding, output_shape=(1024,))(input_text)
    dense = Dense(
        256, activation="relu", kernel_regularizer=keras.regularizers.l2(0.001)
    )(embedding)
    pred = Dense(1, activation="sigmoid")(dense)
    model = Model(inputs=[input_text], outputs=pred)
    model.compile(
        loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"]
    )
    return model

model_elmo = build_model()
	# https://towardsdatascience.com/transfer-learning-using-elmo-embedding-c4a7e415103c
	import pandas as pd
	import numpy as np
	import re

	import tensorflow_hub as hub
	import tensorflow as tf
	import keras
	from tensorflow.python.keras.layers import Input, Dense, Lambda

	from keras.models import Model

	#!mkdir module/
	#!mkdir module/module_elmo2

	#!curl -L "https://tfhub.dev/google/elmo/2?tf-hub-format=compressed" \| tar -zxvC module/module_elmo2
	#!unzip first-gop-debate-twitter-sentiment.zip

	elmo = hub.Module("module/module_elmo2/", trainable=False)

	embeddings = elmo(
	["the cat is on the mat", "what are you doing in evening"],
	signature="default",
	as_dict=True,
	)["elmo"]

	with tf.Session() as session:
	session.run([tf.global_variables_initializer(), tf.tables_initializer()])
	message_embeddings = session.run(embeddings)

	elmo = hub.Module("module/module_elmo2/", trainable=False)
	tokens_input = [
	["the", "cat", "is", "on", "the", "mat"],
	["what", "are", "you", "doing", "in", "evening"],
	]
	tokens_length = [6, 5]
	embeddings = elmo(
	inputs={"tokens": tokens_input, "sequence_len": tokens_length},
	signature="tokens",
	as_dict=True,
	)["elmo"]

	with tf.Session() as session:
	session.run([tf.global_variables_initializer(), tf.tables_initializer()])
	message_embeddings = session.run(embeddings)

	def embed_elmo2(module):
	with tf.Graph().as_default():
	sentences = tf.placeholder(tf.string)
	embed = hub.Module(module)
	embeddings = embed(sentences)
	session = tf.train.MonitoredSession()
	return lambda x: session.run(embeddings, {sentences: x})


	embed_fn = embed_elmo2("module/module_elmo2")
	embed_fn(["i am sambit"]).shape

	df = pd.read_csv("Sentiment.csv", encoding="latin")
	df = df[df["sentiment"] != "Neutral"]
	df.loc[df["sentiment"] == "Negative", "sentiment"] = 0
	df.loc[df["sentiment"] == "Positive", "sentiment"] = 1

	def cleanText(text):
	text = text.strip().replace("\n", " ").replace("\r", " ")
	# text = replace_contraction(text)
	# text = replace_links(text, "link")
	# text = remove_numbers(text)
	text = re.sub(r'[,!@#$%^&*)(\|/><";:.?\'\\}{]', "", text)
	text = text.lower()
	return text

	X = np.array(df["text"].apply(cleanText))
	y = np.array(df["sentiment"])


	embed = hub.Module("module/module_elmo2")

	def ELMoEmbedding(x):
	return embed(
	tf.squeeze(tf.cast(x, tf.string)), signature="default", as_dict=True
	)["default"]

	def build_model():
	input_text = Input(shape=(1,), dtype="string")
	embedding = Lambda(ELMoEmbedding, output_shape=(1024,))(input_text)
	dense = Dense(
	256, activation="relu", kernel_regularizer=keras.regularizers.l2(0.001)
	)(embedding)
	pred = Dense(1, activation="sigmoid")(dense)
	model = Model(inputs=[input_text], outputs=pred)
	model.compile(
	loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"]
	)
	return model

	model_elmo = build_model()