Skip to content

Instantly share code, notes, and snippets.

Mohd Sanad Zaki Rizvi mohdsanadzakirizvi

Block or report user

Report or block mohdsanadzakirizvi

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
View indic_stanfordnlp.py
#dictionary that contains pos tags and their explanations
pos_dict = {
'CC': 'coordinating conjunction','CD': 'cardinal digit','DT': 'determiner',
'EX': 'existential there (like: \"there is\" ... think of it like \"there exists\")',
'FW': 'foreign word','IN': 'preposition/subordinating conjunction','JJ': 'adjective \'big\'',
'JJR': 'adjective, comparative \'bigger\'','JJS': 'adjective, superlative \'biggest\'',
'LS': 'list marker 1)','MD': 'modal could, will','NN': 'noun, singular \'desk\'',
'NNS': 'noun plural \'desks\'','NNP': 'proper noun, singular \'Harrison\'',
'NNPS': 'proper noun, plural \'Americans\'','PDT': 'predeterminer \'all the kids\'',
'POS': 'possessive ending parent\'s','PRP': 'personal pronoun I, he, she',
View indic_syllables.py
from indicnlp.syllable import syllabifier
# Word to be broken into syllables
w='जगदीशचंद्र'
# Language code Hindi in this case
lang='hi'
# Break into syllables
print(' '.join(syllabifier.orthographic_syllabify(w,lang)))
View indic_similar.py
from indicnlp.script import indic_scripts as isc
from indicnlp.script import phonetic_sim as psim
c1='क'
c2='ख'
c3='भ'
lang='hi'
print('Similarity between {} and {}'.format(c1,c2))
print(psim.cosine(
View indic_phonetic.py
from indicnlp.langinfo import *
# Input character
c='आ'
# Language is Hindi or 'hi'
lang='hi'
print('Is vowel?: {}'.format(is_vowel(c,lang)))
print('Is consonant?: {}'.format(is_consonant(c,lang)))
print('Is velar?: {}'.format(is_velar(c,lang)))
View indic_roman.py
from indicnlp.transliterate.unicode_transliterate import ItransTransliterator
input_text='आज मौसम अच्छा है। इसलिए हम आज खेल सकते हैं!'
# Transliterate Hindi to Roman
print(ItransTransliterator.to_itrans(input_text, 'hi'))
View indic_transliterate.py
from indicnlp.transliterate.unicode_transliterate import UnicodeIndicTransliterator
# Input text "Today the weather is good. Sun is bright and there are no signs of rain. Hence we can play today."
input_text='आज मौसम अच्छा है। सूरज उज्ज्वल है और बारिश के कोई संकेत नहीं हैं। इसलिए हम आज खेल सकते हैं!'
# Transliterate from Hindi to Telugu
print(UnicodeIndicTransliterator.transliterate(input_text,"hi","te"))
View indic_sentence_split.py
from indicnlp.tokenize import sentence_tokenize
indic_string="""तो क्या विश्व कप 2019 में मैच का बॉस टॉस है? यानी मैच में हार-जीत में \
टॉस की भूमिका अहम है? आप ऐसा सोच सकते हैं। विश्वकप के अपने-अपने पहले मैच में बुरी तरह हारने वाली एशिया की दो टीमों \
पाकिस्तान और श्रीलंका के कप्तान ने हालांकि अपने हार के पीछे टॉस की दलील तो नहीं दी, लेकिन यह जरूर कहा था कि वह एक अहम टॉस हार गए थे।"""
# Split the sentence, language code "hi" is passed for hingi
sentences=sentence_tokenize.sentence_split(indic_string, lang='hi')
# print the sentences
View indic_setup.py
import sys
from indicnlp import common
# The path to the local git repo for Indic NLP library
INDIC_NLP_LIB_HOME=r"indic_nlp_library"
# The path to the local git repo for Indic NLP Resources
INDIC_NLP_RESOURCES=r"indic_nlp_resources"
# Add library to Python path
View inltk_sent_similar.py
from inltk.inltk import get_sentence_similarity
# similarity of encodings is calculated by using cmp function whose default is cosine similarity
get_sentence_similarity('मुझे भोजन पसंद है।', 'मैं ऐसे भोजन की सराहना करता हूं जिसका स्वाद अच्छा हो।', 'hi')
View inltk_text_complete.py
from inltk.inltk import setup
from inltk.inltk import predict_next_words
# download models for Gujarati
setup('bn')
# predict the next words of the sentence "The weather is nice today"
predict_next_words("আবহাওয়া চমৎকার", 10, "bn", 0.7)
You can’t perform that action at this time.