Skip to content

Instantly share code, notes, and snippets.

Avatar

ABHISHEK SHARMA abhishek-shrm

  • New Delhi, India
View GitHub Profile
View rfc_vs_dt-1.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
# Importing dataset
df=pd.read_csv('dataset.csv')
df.head()
View eta_on_text_data-16.py
# Creating Document Term Matrix
from sklearn.feature_extraction.text
import CountVectorizer
cv=CountVectorizer(analyzer='word')
data=cv.fit_transform(df_grouped['lemmatized'])
df_dtm = pd.DataFrame(data.toarray(), columns=cv.get_feature_names())
df_dtm.index=df_grouped.index
df_dtm.head(3)
View keplergl-01.py
import pandas as pd
df=pd.read_csv('kepler.gl-data/nyctrips/data.csv')
print('Shape=>',df.shape)
df.head()
View keyword-extraction-textrank-20.py
# Evaluating on test set
test_precision,test_recall,test_Fmeasure=evaluate_micro_average(df_test['keys'].values,df_test['pred_keys'])
print('Precision=>',test_precision)
print('Recall=>',test_recall)
print('F-measure=>',test_Fmeasure)
View keyword-extraction-textrank-19.py
# Evaluating on validation set
val_precision,val_recall,val_Fmeasure=evaluate_micro_average(df_val['keys'].values,df_val['pred_keys'])
print('Precision=>',val_precision)
print('Recall=>',val_recall)
print('F-measure=>',val_Fmeasure)
View keyword-extraction-textrank-18.py
def evaluate_micro_average(actual_keys,predicted_keys):
# Combining actual keywords
ground_truth=[]
for i in actual_keys:
ground_truth.extend(i)
# Combining extracted keywords
extracted_keywords=[]
for i in predicted_keys:
extracted_keywords.extend(i)
View keyword-extraction-textrank-17.py
# Extracting keyphrases for abstracts in test set
df_test['pred_keys']=df_test['abstract'].apply(extract_keyphrase)
df_test.head()
View keyword-extraction-textrank-16.py
# Function for extracting keyphrases
def extract_keyphrase(text):
# Creating Spacy's Doc object
doc=textacy.make_spacy_doc(text,lang=en)
# Getting top 5 keyphrases from the text
keyphrases=textacy.ke.textrank(doc,normalize='lower',topn=0.33)
return [phrase for phrase,score in keyphrases]
# Extracting keyphrases for abstracts in validation set
View keyword-extraction-textrank-15.py
# Getting top num_keys keyphrases from the text
keyphrases=textacy.ke.textrank(doc,normalize='lower',topn=0.33)
keyphrases
View keyword-extraction-textrank-14.py
# Creating Spacy's Doc object
doc=textacy.make_spacy_doc(df_val['abstract'][3],lang=en)
doc