Key-phrase extraction and Integration in Power BI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
@author: Jayant Kumar Kodwani | |
""" | |
# 'dataset' holds the input data for this script | |
import RAKE | |
import pandas as pd | |
"""Add stopwords list, REPLACE path as required""" | |
stop_dir = r"C:\Users\Jayant\.spyder-py3\stopwords.txt" | |
rake_object = RAKE.Rake(stop_dir) | |
"""Create a empty dataframe to store output""" | |
Rake_Final_Output = pd.DataFrame() | |
#Assign your dataset to a variable | |
df= dataset | |
def Sort_Tuple(tup): | |
tup.sort(key= lambda x:x[1]) | |
return tup | |
# Loop through all the field/column values and apply RAKE | |
for x in range(len(df)): | |
subtitles = df.Answer[x] | |
print (subtitles) | |
"""Run Rake Algorithm, You can change the parameter [-1:] to get more than 1 keyphrase from the text""" | |
keywords=Sort_Tuple(rake_object.run(subtitles))[-1:] | |
# create DataFrame using RAKE output data | |
Output = pd.DataFrame(keywords, columns =['Word', 'KeywordScore']) | |
Output['Keywords']=keywords | |
Output['KeywordScore'] = Output['KeywordScore'].astype('float') | |
Output['Date']=df.Date[x] | |
Output['Question']=df.Question[x] | |
Output['Answer']=df.Answer[x] | |
Output['Index']=df.Index[x] | |
Rake_Final_Output = Rake_Final_Output.append(Output, ignore_index=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment