kajal yadav techykajal

## Frequency_counting_bigrams.py
def freq_counting_bigram(bigramFinder):
    """
    This function will count how many times adjacent words co-occurs as bigrams.

    arguments:
        input_text: "bigramFinder" of type "nltk.collocations.BigramCollocationFinder".

    return:
        value: "bigramFreqTable" of type "pandas Dataframe"
                containing bigrams and their corresponding frequencies.

## NLTK_BigramTrigram_Finder.py
def initialize_ngram_finder(list_of_tokens):
    """
    This function will initialize ngram finders for both bigrams & trigrams.

    arguments:
        input_text: "list_of_tokens" of type "list" containing all alphanum tokens for each article.

    return:
        value: "bigrams", "trigrams", "bigramFinder", "trigramFinder"


## Article_to_token_converter.py
def list_tokens(Data):
    """
    This function will convert Articles into list of tokens(either alpha or numeric)

    arguments:
        input_text: "Data" of type "pandas Dataframe".

    return:
        value: "list_of_tokens" after splitting all the articles in
                "Processed_Content" attribute into list of tokens.

## Lemmatization function.py
# The code for lemmatization
w_tokenizer = nltk.tokenize.WhitespaceTokenizer()
lemmatizer = nltk.stem.WordNetLemmatizer()
def lemmatization(text):
    """This function converts word to their root words
       without explicitely cut down as done in stemming.

    arguments:
         input_text: "text" of type "String".


## Speller Function.py
# The code for spelling corrections
def spelling_correction(text):
    '''
    This function will correct spellings.

    arguments:
         input_text: "text" of type "String".

    return:
        value: Text after corrected spellings.

## Remove StopWords function.py
# The code for removing stopwords
stoplist = stopwords.words('english')
stoplist = set(stoplist)
def removing_stopwords(text):
    """This function will remove stopwords which doesn't add much meaning to a sentence
       & they can be remove safely without comprimising meaning of the sentence.

    arguments:
         input_text: "text" of type "String".


## Remove Special Characters.py
# The code for removing special characters
def removing_special_characters(text):
    """Removing all the special characters except the one that is passed within
       the regex to match, as they have imp meaning in the text provided.


    arguments:
         input_text: "text" of type "String".

    return:

## Expand contractions function.py
CONTRACTION_MAP = {
"ain't": "is not",
"aren't": "are not",
"can't": "cannot",
"can't've": "cannot have",
"'cause": "because",
"could've": "could have",
"couldn't": "could not",
"couldn't've": "could not have",
"didn't": "did not",

## Reduce repeated characters and punctuations function.py
# Code for removing repeated characters and punctuations

def reducing_incorrect_character_repeatation(text):
    """
    This Function will reduce repeatition to two characters
    for alphabets and to one character for punctuations.

    arguments:
         input_text: "text" of type "String".


## text case conversion function.py
# Code for text lowercasing
def lower_casing_text(text):

    """
    The function will convert text into lower case.

    arguments:
         input_text: "text" of type "String".

    return:
	def freq_counting_bigram(bigramFinder):
	"""
	This function will count how many times adjacent words co-occurs as bigrams.

	arguments:
	input_text: "bigramFinder" of type "nltk.collocations.BigramCollocationFinder".

	return:
	value: "bigramFreqTable" of type "pandas Dataframe"
	containing bigrams and their corresponding frequencies.
	def initialize_ngram_finder(list_of_tokens):
	"""
	This function will initialize ngram finders for both bigrams & trigrams.

	arguments:
	input_text: "list_of_tokens" of type "list" containing all alphanum tokens for each article.

	return:
	value: "bigrams", "trigrams", "bigramFinder", "trigramFinder"
	def list_tokens(Data):
	"""
	This function will convert Articles into list of tokens(either alpha or numeric)

	arguments:
	input_text: "Data" of type "pandas Dataframe".

	return:
	value: "list_of_tokens" after splitting all the articles in
	"Processed_Content" attribute into list of tokens.
	# The code for lemmatization
	w_tokenizer = nltk.tokenize.WhitespaceTokenizer()
	lemmatizer = nltk.stem.WordNetLemmatizer()
	def lemmatization(text):
	"""This function converts word to their root words
	without explicitely cut down as done in stemming.

	arguments:
	input_text: "text" of type "String".
	# The code for spelling corrections
	def spelling_correction(text):
	'''
	This function will correct spellings.

	arguments:
	input_text: "text" of type "String".

	return:
	value: Text after corrected spellings.
	CONTRACTION_MAP = {
	"ain't": "is not",
	"aren't": "are not",
	"can't": "cannot",
	"can't've": "cannot have",
	"'cause": "because",
	"could've": "could have",
	"couldn't": "could not",
	"couldn't've": "could not have",
	"didn't": "did not",
	# Code for removing repeated characters and punctuations

	def reducing_incorrect_character_repeatation(text):
	"""
	This Function will reduce repeatition to two characters
	for alphabets and to one character for punctuations.

	arguments:
	input_text: "text" of type "String".
	# Code for text lowercasing
	def lower_casing_text(text):

	"""
	The function will convert text into lower case.

	arguments:
	input_text: "text" of type "String".

	return: