This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_individual_resume(image): | |
| result = reader.readtext(image) | |
| text = [] | |
| for r in result: | |
| text.append(r[1]) | |
| content = (' ').join(text) | |
| doc = nlp(content) | |
| person = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def preprocess_text(): | |
| text = input() | |
| text = text.lower() | |
| text = re.sub('[^A-Za-z0-9]+', ' ', text) | |
| return text | |
| query_text = preprocess_text() | |
| query_encoding = get_bert_embeddings(query_text, preprocessor, encoder) | |
| df_yt['similarity_score'] = df_yt['encodings'].apply(lambda x: metrics.pairwise.cosine_similarity(x, query_encoding)[0][0]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| preprocessor = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3") | |
| encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/1", trainable=True) | |
| def get_bert_embeddings(text, preprocessor, encoder): | |
| text_input = tf.keras.layers.Input(shape=(), dtype=tf.string) | |
| encoder_inputs = preprocessor(text_input) | |
| outputs = encoder(encoder_inputs) | |
| embedding_model = tf.keras.Model(text_input, outputs['pooled_output']) | |
| sentences = tf.constant([text]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| df_yt = pd.read_csv('/content/GB_videos_data.csv') | |
| df_yt = df_yt.drop_duplicates(subset = ['title']) | |
| df_yt = df_yt[['title', 'description']] | |
| df_yt.columns = ['Title', 'Description'] | |
| df_yt['cleaned_title'] = df_yt['Title'].apply(lambda x: x.lower()) | |
| df_yt['cleaned_title'] = df_yt['cleaned_title'].apply(lambda x: re.sub('[^A-Za-z0-9]+', ' ', x)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| kg_df = news_df.copy() | |
| G=nx.from_pandas_edgelist(kg_df[kg_df['links']=="declared"], "subject", "object", | |
| edge_attr=True, create_using=nx.MultiDiGraph()) | |
| plt.figure(figsize=(7, 7)) | |
| pos = nx.spring_layout(G, k = 0.5) | |
| nx.draw(G, with_labels=True, node_color='skyblue', node_size=700, edge_cmap=plt.cm.Blues, pos = pos) | |
| plt.show() | |
| plt.savefig('links.jpg') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| {"object": "patrolling island lack boats", | |
| "original-source": "Mumbai cops stop patrolling island due to lack of boats", | |
| "score": "100.0"} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def query(news_df, conf): | |
| try: | |
| conf = conf*(100) | |
| question = input() | |
| question_kg = get_KG(nlp(question)) | |
| query_param = [i for i,j in enumerate(question_kg) if j != ''] | |
| columns = ["subject", "links", "object"] | |
| col_dict = {} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_KG(doc): | |
| text = [] | |
| for tok in doc: | |
| if tok.tag_ in ["NN","NNP","NNPS","NNS"]: | |
| text.append(tok.text) | |
| if tok.tag_ in ["VB","VBD","VBG","VBN","VBP","VBZ"]: | |
| text.append('<' + tok.text + '<') | |
| text = ('-').join(text) | |
| text_list = text.split('<') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def create_twitter_url(handle, max_results): | |
| mrf = "max_results={}".format(max_results) | |
| q = "query=from:{}".format(handle) | |
| url = "https://api.twitter.com/2/tweets/search/recent?{}&{}".format( | |
| mrf, q | |
| ) | |
| return url | |
| def process_yaml(): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from simpletransformers.classification import ClassificationModel, ClassificationArgs | |
| model_args = ClassificationArgs() | |
| model_args.num_train_epochs = 4 | |
| model_args.reprocess_input_data = True | |
| model_args.save_best_model = True | |
| model_args.save_optimizer_and_scheduler = False | |
| model_args.overwrite_output_dir = True | |
| model_args.manual_seed = 4 | |
| model_args.use_multiprocessing = True |