Skip to content

Instantly share code, notes, and snippets.

View Vishnunkumar's full-sized avatar
😉
Exploring Dev

Vishnu Nandakumar Vishnunkumar

😉
Exploring Dev
View GitHub Profile
def get_individual_resume(image):
result = reader.readtext(image)
text = []
for r in result:
text.append(r[1])
content = (' ').join(text)
doc = nlp(content)
person = []
def preprocess_text():
text = input()
text = text.lower()
text = re.sub('[^A-Za-z0-9]+', ' ', text)
return text
query_text = preprocess_text()
query_encoding = get_bert_embeddings(query_text, preprocessor, encoder)
df_yt['similarity_score'] = df_yt['encodings'].apply(lambda x: metrics.pairwise.cosine_similarity(x, query_encoding)[0][0])
preprocessor = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/1", trainable=True)
def get_bert_embeddings(text, preprocessor, encoder):
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string)
encoder_inputs = preprocessor(text_input)
outputs = encoder(encoder_inputs)
embedding_model = tf.keras.Model(text_input, outputs['pooled_output'])
sentences = tf.constant([text])
df_yt = pd.read_csv('/content/GB_videos_data.csv')
df_yt = df_yt.drop_duplicates(subset = ['title'])
df_yt = df_yt[['title', 'description']]
df_yt.columns = ['Title', 'Description']
df_yt['cleaned_title'] = df_yt['Title'].apply(lambda x: x.lower())
df_yt['cleaned_title'] = df_yt['cleaned_title'].apply(lambda x: re.sub('[^A-Za-z0-9]+', ' ', x))
kg_df = news_df.copy()
G=nx.from_pandas_edgelist(kg_df[kg_df['links']=="declared"], "subject", "object",
edge_attr=True, create_using=nx.MultiDiGraph())
plt.figure(figsize=(7, 7))
pos = nx.spring_layout(G, k = 0.5)
nx.draw(G, with_labels=True, node_color='skyblue', node_size=700, edge_cmap=plt.cm.Blues, pos = pos)
plt.show()
plt.savefig('links.jpg')
{"object": "patrolling island lack boats",
"original-source": "Mumbai cops stop patrolling island due to lack of boats",
"score": "100.0"}
def query(news_df, conf):
try:
conf = conf*(100)
question = input()
question_kg = get_KG(nlp(question))
query_param = [i for i,j in enumerate(question_kg) if j != '']
columns = ["subject", "links", "object"]
col_dict = {}
def get_KG(doc):
text = []
for tok in doc:
if tok.tag_ in ["NN","NNP","NNPS","NNS"]:
text.append(tok.text)
if tok.tag_ in ["VB","VBD","VBG","VBN","VBP","VBZ"]:
text.append('<' + tok.text + '<')
text = ('-').join(text)
text_list = text.split('<')
def create_twitter_url(handle, max_results):
mrf = "max_results={}".format(max_results)
q = "query=from:{}".format(handle)
url = "https://api.twitter.com/2/tweets/search/recent?{}&{}".format(
mrf, q
)
return url
def process_yaml():
from simpletransformers.classification import ClassificationModel, ClassificationArgs
model_args = ClassificationArgs()
model_args.num_train_epochs = 4
model_args.reprocess_input_data = True
model_args.save_best_model = True
model_args.save_optimizer_and_scheduler = False
model_args.overwrite_output_dir = True
model_args.manual_seed = 4
model_args.use_multiprocessing = True