Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Key-phrase extraction and Integration in Power BI
"""
@author: Jayant Kumar Kodwani
"""
# 'dataset' holds the input data for this script
import RAKE
import pandas as pd
"""Add stopwords list, REPLACE path as required"""
stop_dir = r"C:\Users\Jayant\.spyder-py3\stopwords.txt"
rake_object = RAKE.Rake(stop_dir)
"""Create a empty dataframe to store output"""
Rake_Final_Output = pd.DataFrame()
#Assign your dataset to a variable
df= dataset
def Sort_Tuple(tup):
tup.sort(key= lambda x:x[1])
return tup
# Loop through all the field/column values and apply RAKE
for x in range(len(df)):
subtitles = df.Answer[x]
print (subtitles)
"""Run Rake Algorithm, You can change the parameter [-1:] to get more than 1 keyphrase from the text"""
keywords=Sort_Tuple(rake_object.run(subtitles))[-1:]
# create DataFrame using RAKE output data
Output = pd.DataFrame(keywords, columns =['Word', 'KeywordScore'])
Output['Keywords']=keywords
Output['KeywordScore'] = Output['KeywordScore'].astype('float')
Output['Date']=df.Date[x]
Output['Question']=df.Question[x]
Output['Answer']=df.Answer[x]
Output['Index']=df.Index[x]
Rake_Final_Output = Rake_Final_Output.append(Output, ignore_index=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment