Last active
June 8, 2020 10:36
-
-
Save aniruddha27/0c2f90071cdce515c255807179b07a81 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# separate subject, verb and object | |
verb_dict = dict() | |
dis_dict = dict() | |
dis_list = [] | |
# iterating over all the sentences | |
for i in range(len(df_show)): | |
# sentence containing the output | |
sentence = df_show.loc[i,'Sent'] | |
# year of the sentence | |
year = df_show.loc[i,'Year'] | |
# output of the sentence | |
output = df_show.loc[i,'Output'] | |
# iterating over all the outputs from the sentence | |
for sent in output: | |
# separate subject, verb and object | |
n1, v, n2 = sent.split()[:1], sent.split()[1], sent.split()[2:] | |
# append to list, along with the sentence | |
dis_dict = {'Sent':sentence,'Year':year,'Noun1':n1,'Verb':v,'Noun2':n2} | |
dis_list.append(dis_dict) | |
# counting the number of sentences containing the verb | |
verb = sent.split()[1] | |
if verb in verb_dict: | |
verb_dict[verb]+=1 | |
else: | |
verb_dict[verb]=1 | |
df_sep = pd.DataFrame(dis_list) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment