View keyword-extraction-textrank-20.py
# Evaluating on test set | |
test_precision,test_recall,test_Fmeasure=evaluate_micro_average(df_test['keys'].values,df_test['pred_keys']) | |
print('Precision=>',test_precision) | |
print('Recall=>',test_recall) | |
print('F-measure=>',test_Fmeasure) |
View keyword-extraction-textrank-19.py
# Evaluating on validation set | |
val_precision,val_recall,val_Fmeasure=evaluate_micro_average(df_val['keys'].values,df_val['pred_keys']) | |
print('Precision=>',val_precision) | |
print('Recall=>',val_recall) | |
print('F-measure=>',val_Fmeasure) |
View keyword-extraction-textrank-18.py
def evaluate_micro_average(actual_keys,predicted_keys): | |
# Combining actual keywords | |
ground_truth=[] | |
for i in actual_keys: | |
ground_truth.extend(i) | |
# Combining extracted keywords | |
extracted_keywords=[] | |
for i in predicted_keys: | |
extracted_keywords.extend(i) |
View keyword-extraction-textrank-17.py
# Extracting keyphrases for abstracts in test set | |
df_test['pred_keys']=df_test['abstract'].apply(extract_keyphrase) | |
df_test.head() |
View keyword-extraction-textrank-16.py
# Function for extracting keyphrases | |
def extract_keyphrase(text): | |
# Creating Spacy's Doc object | |
doc=textacy.make_spacy_doc(text,lang=en) | |
# Getting top 5 keyphrases from the text | |
keyphrases=textacy.ke.textrank(doc,normalize='lower',topn=0.33) | |
return [phrase for phrase,score in keyphrases] | |
# Extracting keyphrases for abstracts in validation set |
View keyword-extraction-textrank-15.py
# Getting top num_keys keyphrases from the text | |
keyphrases=textacy.ke.textrank(doc,normalize='lower',topn=0.33) | |
keyphrases |
View keyword-extraction-textrank-14.py
# Creating Spacy's Doc object | |
doc=textacy.make_spacy_doc(df_val['abstract'][3],lang=en) | |
doc |
View keyword-extraction-textrank-13.py
# Importing textaCy | |
import textacy | |
# Importing for keyword extraction(mandatory) | |
import textacy.ke | |
# Loading spacy model | |
en = textacy.load_spacy_lang("en_core_web_sm") |
View keyword-extraction-textrank-12.py
# Installing textacy | |
!pip install textacy |
View keyword-extraction-textrank-11.py
# Getting list of all the files containing abstracts | |
abs_list=glob.glob('/content/test/*.abstr') | |
# Getting index value | |
index=sorted([int(i.split('.')[0].split('/')[-1]) for i in abs_list]) | |
# Getting abstracts | |
abstracts=dict.fromkeys(index,None) | |
for text_file in abs_list: | |
file=open(text_file,encoding='utf8').read() |
NewerOlder