kajal yadav techykajal

## filter_tokens_POS_tags.py
def Pos_tagging(text):
    """
    This function will tag part of speeches corresponding to every tokens in the Corpus using NLTK.
    """
    tagged_articles=[]
    for articles in text:
        tagged = nltk.pos_tag(articles)
        #print(tagged[100:150])
        tagged_articles.append(tagged)
    #print(tagged_articles)

## call_func.py
# Pre-processing for Content
List_Content = DF['Content_nGrams'].to_list()
Final_Article = []
Complete_Content = []
for article in List_Content:
    Processed_Content = text_preprocessing(article) #Cleaned text of Content attribute after pre-processing
    Final_Article.append(Processed_Content)
Complete_Content.extend(Final_Article)
DF['Updated_content'] = Complete_Content
#print(Complete_Content)

## combined_func.py
# Writing main function to merge all the preprocessing steps.
def text_preprocessing(text,  punctuations=True,  token = True,
                       stop_words=True, apostrophe=False, verbs=False):
    """
    This function will preprocess input text and return
    the clean text.
    """
    stoplist = stopwords.words('english')
    stoplist = set(stoplist)


## tokenization.py
def tokenize_text(Updated_content):
    """
    This function will tokenize the word after removing stopwords & punctuations
    and return the list of list of articles.
    """
    tokenized_text = [word for word in word_tokenize(Updated_content)]
    return tokenized_text

## remove_punct.py
def removing_special_characters(text):
    """Removing all the special characters except the one that is passed within
       the regex to match, as they have imp meaning in the text provided.


    arguments:
         input_text: "text" of type "String".

    return:
        value: Text with removed special characters that don't require.

## remove_stopwords.py
def removing_stopwords(text):
    """This function will remove stopwords which doesn't add much meaning to a sentence
       & they can be remove safely without comprimising meaning of the sentence.

    arguments:
         input_text: "text" of type "String".

    return:
        value: Text after omitted all stopwords.


## add_nGrams.py
def add_ngrams_to_input(Processed_content,Mapping):
    """
    This function will replace original occurrence of n_Grams in the text with that of Combined n_Grams.
    """
    for i in range(len(Processed_content)):
        for key, value in Mapping.items():
            Processed_content[i] = Processed_content[i].replace(key, value)
    return Processed_content
content_nGrams = add_ngrams_to_input(Processed_Content,Mapping)

## mapping_combined_grams.py
def mapping(n_grams_to_use, Combined_nGrams):
    """
    This function will map combined n_Grams with that of individual n_Grams & return the dictionary.
    """
    dic=dict()
    for i in range(len(Combined_nGrams)):
        dic[n_grams_to_use[i]] = Combined_nGrams[i]
    return dic
Mapping = mapping(n_grams_to_use, Combined_nGrams)
Mapping

## generating_nGrams.py
# Combine each n_Gram using '_'
def combined_n_Grams(n_grams_to_use):
    """
    This function will read n_Grams & return list of combined n_Grams using '_'
    """
    Combined_nGrams = []
    for i in range(len(n_grams_to_use)):
        Combined_nGrams.append(n_grams_to_use[i].replace(' ','_'))
    return Combined_nGrams
Combined_nGrams = combined_n_Grams(n_grams_to_use)

## read_bigrams_trigrams.py
def read_nGrams():
    """
    This function will read bigrams & trigrams and
    return  list of n_Grams.
    """
    # read  bigrams
    original_bigram = readFile("bigram.txt")
    # read trigrams
    original_trigram = readFile("trigram.txt")
	def Pos_tagging(text):
	"""
	This function will tag part of speeches corresponding to every tokens in the Corpus using NLTK.
	"""
	tagged_articles=[]
	for articles in text:
	tagged = nltk.pos_tag(articles)
	#print(tagged[100:150])
	tagged_articles.append(tagged)
	#print(tagged_articles)
	# Pre-processing for Content
	List_Content = DF['Content_nGrams'].to_list()
	Final_Article = []
	Complete_Content = []
	for article in List_Content:
	Processed_Content = text_preprocessing(article) #Cleaned text of Content attribute after pre-processing
	Final_Article.append(Processed_Content)
	Complete_Content.extend(Final_Article)
	DF['Updated_content'] = Complete_Content
	#print(Complete_Content)
	# Writing main function to merge all the preprocessing steps.
	def text_preprocessing(text, punctuations=True, token = True,
	stop_words=True, apostrophe=False, verbs=False):
	"""
	This function will preprocess input text and return
	the clean text.
	"""
	stoplist = stopwords.words('english')
	stoplist = set(stoplist)
	def tokenize_text(Updated_content):
	"""
	This function will tokenize the word after removing stopwords & punctuations
	and return the list of list of articles.
	"""
	tokenized_text = [word for word in word_tokenize(Updated_content)]
	return tokenized_text
	def removing_special_characters(text):
	"""Removing all the special characters except the one that is passed within
	the regex to match, as they have imp meaning in the text provided.


	arguments:
	input_text: "text" of type "String".

	return:
	value: Text with removed special characters that don't require.
	def removing_stopwords(text):
	"""This function will remove stopwords which doesn't add much meaning to a sentence
	& they can be remove safely without comprimising meaning of the sentence.

	arguments:
	input_text: "text" of type "String".

	return:
	value: Text after omitted all stopwords.
	def add_ngrams_to_input(Processed_content,Mapping):
	"""
	This function will replace original occurrence of n_Grams in the text with that of Combined n_Grams.
	"""
	for i in range(len(Processed_content)):
	for key, value in Mapping.items():
	Processed_content[i] = Processed_content[i].replace(key, value)
	return Processed_content
	content_nGrams = add_ngrams_to_input(Processed_Content,Mapping)
	def mapping(n_grams_to_use, Combined_nGrams):
	"""
	This function will map combined n_Grams with that of individual n_Grams & return the dictionary.
	"""
	dic=dict()
	for i in range(len(Combined_nGrams)):
	dic[n_grams_to_use[i]] = Combined_nGrams[i]
	return dic
	Mapping = mapping(n_grams_to_use, Combined_nGrams)
	Mapping
	# Combine each n_Gram using '_'
	def combined_n_Grams(n_grams_to_use):
	"""
	This function will read n_Grams & return list of combined n_Grams using '_'
	"""
	Combined_nGrams = []
	for i in range(len(n_grams_to_use)):
	Combined_nGrams.append(n_grams_to_use[i].replace(' ','_'))
	return Combined_nGrams
	Combined_nGrams = combined_n_Grams(n_grams_to_use)
	def read_nGrams():
	"""
	This function will read bigrams & trigrams and
	return list of n_Grams.
	"""
	# read bigrams
	original_bigram = readFile("bigram.txt")
	# read trigrams
	original_trigram = readFile("trigram.txt")