This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Stemming | |
from nltk.stem.porter import PorterStemmer | |
porter_stemmer = PorterStemmer() | |
def stemming(text): | |
stem_text = [porter_stemmer.stem(word) for word in text] | |
return stem_text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Remove stopwords from tokenized text | |
def remove_stopwords(text): | |
output= [i for i in text if i not in stopwords[0:140]] # after 140 indext it will don't haven't kind of words | |
return output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Tokenization | |
def tokenization(text): | |
tokens = text.split() | |
return tokens |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Removing punctuations like . , ! $( ) * % @ | |
def remove_punctuation(text): | |
punctuationfree="".join([i for i in text if i not in string.punctuation]) | |
return punctuationfree |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
aggregation_dict = { | |
'NUM_INSTALMENT_VERSION': ['nunique'], | |
'NUM_INSTALMENT_VERSION': ['max'], | |
'DPD': ['max', 'mean', 'median', 'sum'], | |
'DBD': ['max', 'mean', 'median', 'sum'], | |
'PAYMENT_PERCENTAGE': ['max', 'mean', 'median', 'sum', 'var'], | |
'PAYMENT_DIFFERENCE': ['max', 'mean', 'median', 'sum', 'var'], | |
'AMT_INSTALMENT': ['max', 'mean', 'median', 'sum'], | |
'AMT_PAYMENT': ['min', 'max', 'mean', 'median', 'sum'], | |
'DAYS_ENTRY_PAYMENT': ['max', 'mean', 'median', 'sum'] |
NewerOlder