Last active
June 30, 2020 03:20
-
-
Save aniruddha27/0decb9b6f8632a209b9ab054b68b0099 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# separate noun, preposition and noun | |
prep_dict = dict() | |
dis_dict = dict() | |
dis_list = [] | |
# iterating over all the sentences | |
for i in range(len(df_show3)): | |
# sentence containing the output | |
sentence = df_show3.loc[i,'Sent'] | |
# year of the sentence | |
year = df_show3.loc[i,'Year'] | |
# output of the sentence | |
output = df_show3.loc[i,'Output'] | |
# iterating over all the outputs from the sentence | |
for sent in output: | |
# separate subject, verb and object | |
n1, p, n2 = sent.split()[0], sent.split()[1], sent.split()[2:] | |
# append to list, along with the sentence | |
dis_dict = {'Sent':sentence,'Year':year,'Noun1':n1,'Preposition':p,'Noun2':n2} | |
dis_list.append(dis_dict) | |
# counting the number of sentences containing the verb | |
prep = sent.split()[1] | |
if prep in prep_dict: | |
prep_dict[prep]+=1 | |
else: | |
prep_dict[prep]=1 | |
df_sep3= pd.DataFrame(dis_list) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment