-
-
Save amankharwal/786d2cfd2c5be8b30ee017ced01b46a4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Get English and Hindi Vocabulary | |
all_eng_words=set() | |
for eng in lines['english_sentence']: | |
for word in eng.split(): | |
if word not in all_eng_words: | |
all_eng_words.add(word) | |
all_hindi_words=set() | |
for hin in lines['hindi_sentence']: | |
for word in hin.split(): | |
if word not in all_hindi_words: | |
all_hindi_words.add(word) | |
lines['length_eng_sentence']=lines['english_sentence'].apply(lambda x:len(x.split(" "))) | |
lines['length_hin_sentence']=lines['hindi_sentence'].apply(lambda x:len(x.split(" "))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment