Mohd Sanad Zaki Rizvi mohdsanadzakirizvi

## re_nlp.py
import re
import nltk
nltk.download('stopwords')

# download stopwords list from nltk
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))

def clean_text(text):

## re_dates.py
import re

re.findall(r"(\d{4})-(\d{2})-(\d{2})", date)

## html.py
html = """<table class="vertical-navbox nowraplinks" style="float:right;clear:right;width:22.0em;margin:0 0 1.0em 1.0em;background:#f9f9f9;border:1px solid #aaa;padding:0.2em;border-spacing:0.4em 0;text-align:center;line-height:1.4em;font-size:88%"><tbody><tr><th style="padding:0.2em 0.4em 0.2em;font-size:145%;line-height:1.2em"><a href="/wiki/Machine_learning" title="Machine learning">Machine learning</a> and<br /><a href="/wiki/Data_mining" title="Data mining">data mining</a></th></tr><tr><td style="padding:0.2em 0 0.4em;padding:0.25em 0.25em 0.75em;"><a href="/wiki/File:Kernel_Machine.svg" class="image"><img alt="Kernel Machine.svg" src="//upload.wikimedia.org/wikipedia/commons/thumb/f/fe/Kernel_Machine.svg/220px-Kernel_Machine.svg.png" decoding="async" width="220" height="100" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/f/fe/Kernel_Machine.svg/330px-Kernel_Machine.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/f/fe/Kernel_Machine.svg/440px-Kernel_Machine.svg.png 2x" data-file-widt

## re_file_email.py
import re

# give your filename here
with open("filename.txt", "r") as fp:
  text = fp.read()

re.findall(r"[\w.-]+@[\w.-]+", text)

## re_email.py
import re

# insert your text here
text = ""

re.findall(r"[\w.-]+@[\w.-]+", text)

## indic_similar.py
from indicnlp.script import  indic_scripts as isc
from indicnlp.script import  phonetic_sim as psim

c1='क'
c2='ख'
c3='भ'
lang='hi'

print('Similarity between {} and {}'.format(c1,c2))
print(psim.cosine(

## indic_phonetic.py
from indicnlp.langinfo import *

# Input character
c='आ'
# Language is Hindi or 'hi'
lang='hi'

print('Is vowel?:  {}'.format(is_vowel(c,lang)))
print('Is consonant?:  {}'.format(is_consonant(c,lang)))
print('Is velar?:  {}'.format(is_velar(c,lang)))

## indic_transliterate.py
from indicnlp.transliterate.unicode_transliterate import UnicodeIndicTransliterator

# Input text "Today the weather is good. Sun is bright and there are no signs of rain. Hence we can play today."
input_text='आज मौसम अच्छा है। सूरज उज्ज्वल है और बारिश के कोई संकेत नहीं हैं। इसलिए हम आज खेल सकते हैं!'

# Transliterate from Hindi to Telugu
print(UnicodeIndicTransliterator.transliterate(input_text,"hi","te"))

## indic_sentence_split.py
from indicnlp.tokenize import sentence_tokenize

indic_string="""तो क्या विश्व कप 2019 में मैच का बॉस टॉस है? यानी मैच में हार-जीत में \
टॉस की भूमिका अहम है? आप ऐसा सोच सकते हैं। विश्वकप के अपने-अपने पहले मैच में बुरी तरह हारने वाली एशिया की दो टीमों \
पाकिस्तान और श्रीलंका के कप्तान ने हालांकि अपने हार के पीछे टॉस की दलील तो नहीं दी, लेकिन यह जरूर कहा था कि वह एक अहम टॉस हार गए थे।"""

# Split the sentence, language code "hi" is passed for hingi
sentences=sentence_tokenize.sentence_split(indic_string, lang='hi')

# print the sentences

## indic_setup.py
import sys
from indicnlp import common

# The path to the local git repo for Indic NLP library
INDIC_NLP_LIB_HOME=r"indic_nlp_library"

# The path to the local git repo for Indic NLP Resources
INDIC_NLP_RESOURCES=r"indic_nlp_resources"

# Add library to Python path
	import re
	import nltk
	nltk.download('stopwords')

	# download stopwords list from nltk
	from nltk.corpus import stopwords

	stop_words = set(stopwords.words('english'))

	def clean_text(text):
	import re

	# give your filename here
	with open("filename.txt", "r") as fp:
	text = fp.read()

	re.findall(r"[\w.-]+@[\w.-]+", text)
	import re

	# insert your text here
	text = ""

	re.findall(r"[\w.-]+@[\w.-]+", text)
	from indicnlp.script import indic_scripts as isc
	from indicnlp.script import phonetic_sim as psim

	c1='क'
	c2='ख'
	c3='भ'
	lang='hi'

	print('Similarity between {} and {}'.format(c1,c2))
	print(psim.cosine(
	from indicnlp.langinfo import *

	# Input character
	c='आ'
	# Language is Hindi or 'hi'
	lang='hi'

	print('Is vowel?: {}'.format(is_vowel(c,lang)))
	print('Is consonant?: {}'.format(is_consonant(c,lang)))
	print('Is velar?: {}'.format(is_velar(c,lang)))
	from indicnlp.transliterate.unicode_transliterate import UnicodeIndicTransliterator

	# Input text "Today the weather is good. Sun is bright and there are no signs of rain. Hence we can play today."
	input_text='आज मौसम अच्छा है। सूरज उज्ज्वल है और बारिश के कोई संकेत नहीं हैं। इसलिए हम आज खेल सकते हैं!'

	# Transliterate from Hindi to Telugu
	print(UnicodeIndicTransliterator.transliterate(input_text,"hi","te"))
	from indicnlp.tokenize import sentence_tokenize

	indic_string="""तो क्या विश्व कप 2019 में मैच का बॉस टॉस है? यानी मैच में हार-जीत में \
	टॉस की भूमिका अहम है? आप ऐसा सोच सकते हैं। विश्वकप के अपने-अपने पहले मैच में बुरी तरह हारने वाली एशिया की दो टीमों \
	पाकिस्तान और श्रीलंका के कप्तान ने हालांकि अपने हार के पीछे टॉस की दलील तो नहीं दी, लेकिन यह जरूर कहा था कि वह एक अहम टॉस हार गए थे।"""

	# Split the sentence, language code "hi" is passed for hingi
	sentences=sentence_tokenize.sentence_split(indic_string, lang='hi')

	# print the sentences
	import sys
	from indicnlp import common

	# The path to the local git repo for Indic NLP library
	INDIC_NLP_LIB_HOME=r"indic_nlp_library"

	# The path to the local git repo for Indic NLP Resources
	INDIC_NLP_RESOURCES=r"indic_nlp_resources"

	# Add library to Python path