Vishnu Nandakumar Vishnunkumar

## get_individual_resume.py
def get_individual_resume(image):

  result = reader.readtext(image)
  text = []
  for r in result:
    text.append(r[1])
  content = (' ').join(text)

  doc = nlp(content)
  person = []

## queryforsimilar.py
def preprocess_text():
  text = input()
  text = text.lower()
  text = re.sub('[^A-Za-z0-9]+', ' ', text)
  return text

query_text = preprocess_text()
query_encoding = get_bert_embeddings(query_text, preprocessor, encoder)

df_yt['similarity_score'] = df_yt['encodings'].apply(lambda x: metrics.pairwise.cosine_similarity(x, query_encoding)[0][0])

## embedding.py
preprocessor = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/1", trainable=True)

def get_bert_embeddings(text, preprocessor, encoder):

  text_input = tf.keras.layers.Input(shape=(), dtype=tf.string)
  encoder_inputs = preprocessor(text_input)
  outputs = encoder(encoder_inputs)
  embedding_model = tf.keras.Model(text_input, outputs['pooled_output'])
  sentences = tf.constant([text])

## text_preprocess_df.py
df_yt = pd.read_csv('/content/GB_videos_data.csv')
df_yt = df_yt.drop_duplicates(subset = ['title'])
df_yt = df_yt[['title', 'description']]
df_yt.columns = ['Title', 'Description']
df_yt['cleaned_title'] = df_yt['Title'].apply(lambda x: x.lower())
df_yt['cleaned_title'] = df_yt['cleaned_title'].apply(lambda x: re.sub('[^A-Za-z0-9]+', ' ', x))

## graph.py
kg_df = news_df.copy()
G=nx.from_pandas_edgelist(kg_df[kg_df['links']=="declared"], "subject", "object",
                          edge_attr=True, create_using=nx.MultiDiGraph())

plt.figure(figsize=(7, 7))
pos = nx.spring_layout(G, k = 0.5)
nx.draw(G, with_labels=True, node_color='skyblue', node_size=700, edge_cmap=plt.cm.Blues, pos = pos)
plt.show()
plt.savefig('links.jpg')

## sample.json
{"object": "patrolling  island lack boats",
 "original-source": "Mumbai cops stop patrolling island due to lack of boats",
 "score": "100.0"}

## query_kg.py
def query(news_df, conf):

  try:
    conf = conf*(100)
    question = input()
    question_kg = get_KG(nlp(question))
    query_param = [i for i,j in enumerate(question_kg) if j != '']

    columns  = ["subject", "links", "object"]
    col_dict = {}

## get_KG.py
def get_KG(doc):
  text = []
  for tok in doc:
    if tok.tag_ in ["NN","NNP","NNPS","NNS"]:
      text.append(tok.text)
    if tok.tag_ in ["VB","VBD","VBG","VBN","VBP","VBZ"]:
      text.append('<' + tok.text + '<')

  text = ('-').join(text)
  text_list = text.split('<')

## twitter_api.py
def create_twitter_url(handle, max_results):

    mrf = "max_results={}".format(max_results)
    q = "query=from:{}".format(handle)
    url = "https://api.twitter.com/2/tweets/search/recent?{}&{}".format(
        mrf, q
    )
    return url

def process_yaml():

## transformers.py
from simpletransformers.classification import ClassificationModel, ClassificationArgs

model_args = ClassificationArgs()
model_args.num_train_epochs = 4
model_args.reprocess_input_data = True
model_args.save_best_model = True
model_args.save_optimizer_and_scheduler = False
model_args.overwrite_output_dir = True
model_args.manual_seed = 4
model_args.use_multiprocessing = True
	def get_individual_resume(image):

	result = reader.readtext(image)
	text = []
	for r in result:
	text.append(r[1])
	content = (' ').join(text)

	doc = nlp(content)
	person = []
	def preprocess_text():
	text = input()
	text = text.lower()
	text = re.sub('[^A-Za-z0-9]+', ' ', text)
	return text

	query_text = preprocess_text()
	query_encoding = get_bert_embeddings(query_text, preprocessor, encoder)

	df_yt['similarity_score'] = df_yt['encodings'].apply(lambda x: metrics.pairwise.cosine_similarity(x, query_encoding)[0][0])
	preprocessor = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
	encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/1", trainable=True)

	def get_bert_embeddings(text, preprocessor, encoder):

	text_input = tf.keras.layers.Input(shape=(), dtype=tf.string)
	encoder_inputs = preprocessor(text_input)
	outputs = encoder(encoder_inputs)
	embedding_model = tf.keras.Model(text_input, outputs['pooled_output'])
	sentences = tf.constant([text])
	df_yt = pd.read_csv('/content/GB_videos_data.csv')
	df_yt = df_yt.drop_duplicates(subset = ['title'])
	df_yt = df_yt[['title', 'description']]
	df_yt.columns = ['Title', 'Description']
	df_yt['cleaned_title'] = df_yt['Title'].apply(lambda x: x.lower())
	df_yt['cleaned_title'] = df_yt['cleaned_title'].apply(lambda x: re.sub('[^A-Za-z0-9]+', ' ', x))
	kg_df = news_df.copy()
	G=nx.from_pandas_edgelist(kg_df[kg_df['links']=="declared"], "subject", "object",
	edge_attr=True, create_using=nx.MultiDiGraph())

	plt.figure(figsize=(7, 7))
	pos = nx.spring_layout(G, k = 0.5)
	nx.draw(G, with_labels=True, node_color='skyblue', node_size=700, edge_cmap=plt.cm.Blues, pos = pos)
	plt.show()
	plt.savefig('links.jpg')
	{"object": "patrolling island lack boats",
	"original-source": "Mumbai cops stop patrolling island due to lack of boats",
	"score": "100.0"}
	def query(news_df, conf):

	try:
	conf = conf*(100)
	question = input()
	question_kg = get_KG(nlp(question))
	query_param = [i for i,j in enumerate(question_kg) if j != '']

	columns = ["subject", "links", "object"]
	col_dict = {}
	def get_KG(doc):
	text = []
	for tok in doc:
	if tok.tag_ in ["NN","NNP","NNPS","NNS"]:
	text.append(tok.text)
	if tok.tag_ in ["VB","VBD","VBG","VBN","VBP","VBZ"]:
	text.append('<' + tok.text + '<')

	text = ('-').join(text)
	text_list = text.split('<')
	def create_twitter_url(handle, max_results):

	mrf = "max_results={}".format(max_results)
	q = "query=from:{}".format(handle)
	url = "https://api.twitter.com/2/tweets/search/recent?{}&{}".format(
	mrf, q
	)
	return url

	def process_yaml():
	from simpletransformers.classification import ClassificationModel, ClassificationArgs

	model_args = ClassificationArgs()
	model_args.num_train_epochs = 4
	model_args.reprocess_input_data = True
	model_args.save_best_model = True
	model_args.save_optimizer_and_scheduler = False
	model_args.overwrite_output_dir = True
	model_args.manual_seed = 4
	model_args.use_multiprocessing = True