Nav Navjotbians

## j_score.py
def j_score(y_true, y_pred):
    jaccard = np.minimum(y_true, y_pred).sum(axis = 1)/np.maximum(y_true, y_pred).sum(axis = 1)
    return jaccard.mean()*100

def print_score(y_pred, y_test, clf):
    print("Clf: ",clf.__class__.__name__)
    print("Jaccard score: {}".format(j_score(pd.DataFrame(y_test), pd.DataFrame(y_pred))))
    print("F1 Score : {}".format(f1_score(y_test, y_pred,average='macro')))

## train.py
from sklearn.multiclass import OneVsRestClassifier
from sklearn import model_selection
from sklearn.metrics import accuracy_score, classification_report, f1_score, roc_auc_score
from sklearn.metrics import multilabel_confusion_matrix

### OneVsRestClassifier
def train_model(classifier,X, y, max_feature = 1000, embedding= 'bow' ):

    #Train-test split
    print("... Performing train test split")

## word_embeddings.py
from sklearn import feature_extraction

def get_embeddings(X_train, X_test, max_feature = 1000, embedding_type = "tfidf"):
    if embedding_type == "bow":
        vectorizer = feature_extraction.text.CountVectorizer(max_features= max_feature)
        vectorizer.fit_transform(X_train).toarray()

        train_feat = vectorizer.transform(X_train).toarray()
        test_feat = vectorizer.transform(X_test).toarray()

## processing.py
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

def process_txt(input, stemm = False,lemm = True):

    ### Clean input data
    processed_text = clean(input)

    ### Tokenization

## clean_comments.py
import re
import string

def clean(input_str):
    input_str = input_str.lower()

    for sub in SUBSTITUTIONS:
        input_str = re.sub(sub[0], sub[1], input_str)

    # Eliminate punchuation

## patterns.py
SUBSTITUTIONS = [
    (r'\d+', ''),               # Delete digits
    (r"n't", " not "),          # Replace pattern n't -> not
    (r"can't", "cannot "),      # Replace pattern can't -> cannot
    (r"what's", "what is "),    # Replace pattern what's -> what is
    (r"\'s", " "),              # Delete pattern 's
    (r"\'ve", " have "),        # Replace pattern 've -> have
    (r"\'re", " are "),         # Replace pattern 're -> are
    (r"\'d", " would "),        # Replace pattern 'd -> would
    (r"\'ll", " will "),        # Replace pattern 'll -> will
	def j_score(y_true, y_pred):
	jaccard = np.minimum(y_true, y_pred).sum(axis = 1)/np.maximum(y_true, y_pred).sum(axis = 1)
	return jaccard.mean()*100

	def print_score(y_pred, y_test, clf):
	print("Clf: ",clf.__class__.__name__)
	print("Jaccard score: {}".format(j_score(pd.DataFrame(y_test), pd.DataFrame(y_pred))))
	print("F1 Score : {}".format(f1_score(y_test, y_pred,average='macro')))
	from sklearn.multiclass import OneVsRestClassifier
	from sklearn import model_selection
	from sklearn.metrics import accuracy_score, classification_report, f1_score, roc_auc_score
	from sklearn.metrics import multilabel_confusion_matrix

	### OneVsRestClassifier
	def train_model(classifier,X, y, max_feature = 1000, embedding= 'bow' ):

	#Train-test split
	print("... Performing train test split")
	from sklearn import feature_extraction

	def get_embeddings(X_train, X_test, max_feature = 1000, embedding_type = "tfidf"):
	if embedding_type == "bow":
	vectorizer = feature_extraction.text.CountVectorizer(max_features= max_feature)
	vectorizer.fit_transform(X_train).toarray()

	train_feat = vectorizer.transform(X_train).toarray()
	test_feat = vectorizer.transform(X_test).toarray()
	import nltk
	from nltk.tokenize import word_tokenize
	from nltk.corpus import stopwords

	def process_txt(input, stemm = False,lemm = True):

	### Clean input data
	processed_text = clean(input)

	### Tokenization
	import re
	import string

	def clean(input_str):
	input_str = input_str.lower()

	for sub in SUBSTITUTIONS:
	input_str = re.sub(sub[0], sub[1], input_str)

	# Eliminate punchuation
	SUBSTITUTIONS = [
	(r'\d+', ''), # Delete digits
	(r"n't", " not "), # Replace pattern n't -> not
	(r"can't", "cannot "), # Replace pattern can't -> cannot
	(r"what's", "what is "), # Replace pattern what's -> what is
	(r"\'s", " "), # Delete pattern 's
	(r"\'ve", " have "), # Replace pattern 've -> have
	(r"\'re", " are "), # Replace pattern 're -> are
	(r"\'d", " would "), # Replace pattern 'd -> would
	(r"\'ll", " will "), # Replace pattern 'll -> will