Skip to content

Instantly share code, notes, and snippets.

View prafulgondane's full-sized avatar

prafulgondane

View GitHub Profile
# Stemming
from nltk.stem.porter import PorterStemmer
porter_stemmer = PorterStemmer()
def stemming(text):
stem_text = [porter_stemmer.stem(word) for word in text]
return stem_text
# Remove stopwords from tokenized text
def remove_stopwords(text):
output= [i for i in text if i not in stopwords[0:140]] # after 140 indext it will don't haven't kind of words
return output
# Tokenization
def tokenization(text):
tokens = text.split()
return tokens
# Removing punctuations like . , ! $( ) * % @
def remove_punctuation(text):
punctuationfree="".join([i for i in text if i not in string.punctuation])
return punctuationfree
aggregation_dict = {
'NUM_INSTALMENT_VERSION': ['nunique'],
'NUM_INSTALMENT_VERSION': ['max'],
'DPD': ['max', 'mean', 'median', 'sum'],
'DBD': ['max', 'mean', 'median', 'sum'],
'PAYMENT_PERCENTAGE': ['max', 'mean', 'median', 'sum', 'var'],
'PAYMENT_DIFFERENCE': ['max', 'mean', 'median', 'sum', 'var'],
'AMT_INSTALMENT': ['max', 'mean', 'median', 'sum'],
'AMT_PAYMENT': ['min', 'max', 'mean', 'median', 'sum'],
'DAYS_ENTRY_PAYMENT': ['max', 'mean', 'median', 'sum']