prafulgondane

## stem.py
# Stemming
from nltk.stem.porter import PorterStemmer
porter_stemmer = PorterStemmer()

def stemming(text):
  stem_text = [porter_stemmer.stem(word) for word in text]
  return stem_text

## stopwords.py
# Remove stopwords from tokenized text
def remove_stopwords(text):
    output= [i for i in text if i not in stopwords[0:140]]  # after 140 indext it will don't haven't kind of words
    return output

## token.py
# Tokenization
def tokenization(text):
    tokens = text.split()
    return tokens

## punctuation.py
# Removing punctuations like . , ! $( ) * % @

def remove_punctuation(text):
    punctuationfree="".join([i for i in text if i not in string.punctuation])
    return punctuationfree

## gist:6087ca268b058d0d084c5f967a56b1b7
aggregation_dict = {
    'NUM_INSTALMENT_VERSION': ['nunique'],
    'NUM_INSTALMENT_VERSION': ['max'],
    'DPD': ['max', 'mean', 'median', 'sum'],
    'DBD': ['max', 'mean', 'median', 'sum'],
    'PAYMENT_PERCENTAGE': ['max', 'mean', 'median', 'sum', 'var'],
    'PAYMENT_DIFFERENCE': ['max', 'mean', 'median', 'sum', 'var'],
    'AMT_INSTALMENT': ['max', 'mean', 'median', 'sum'],
    'AMT_PAYMENT': ['min', 'max', 'mean', 'median', 'sum'],
    'DAYS_ENTRY_PAYMENT': ['max', 'mean', 'median', 'sum']
	# Stemming
	from nltk.stem.porter import PorterStemmer
	porter_stemmer = PorterStemmer()

	def stemming(text):
	stem_text = [porter_stemmer.stem(word) for word in text]
	return stem_text
	# Remove stopwords from tokenized text
	def remove_stopwords(text):
	output= [i for i in text if i not in stopwords[0:140]] # after 140 indext it will don't haven't kind of words
	return output
	# Tokenization
	def tokenization(text):
	tokens = text.split()
	return tokens
	# Removing punctuations like . , ! $( ) * % @

	def remove_punctuation(text):
	punctuationfree="".join([i for i in text if i not in string.punctuation])
	return punctuationfree
	aggregation_dict = {
	'NUM_INSTALMENT_VERSION': ['nunique'],
	'NUM_INSTALMENT_VERSION': ['max'],
	'DPD': ['max', 'mean', 'median', 'sum'],
	'DBD': ['max', 'mean', 'median', 'sum'],
	'PAYMENT_PERCENTAGE': ['max', 'mean', 'median', 'sum', 'var'],
	'PAYMENT_DIFFERENCE': ['max', 'mean', 'median', 'sum', 'var'],
	'AMT_INSTALMENT': ['max', 'mean', 'median', 'sum'],
	'AMT_PAYMENT': ['min', 'max', 'mean', 'median', 'sum'],
	'DAYS_ENTRY_PAYMENT': ['max', 'mean', 'median', 'sum']