rachid elfermi rachidelfermi

## transformTrna.py
seq=[]
inf=[]
typ=[]
information=[]
with open('secondary1.txt',"r") as file:
     for line in file.readlines():

            if line[:2]=='Se':
                data=line[:-1].replace('Seq: ',"")
                seq.append(data)

## svmacc.py
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix
# Model Accuracy: how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

## svm.py
from sklearn import svm

clf = svm.SVC(kernel='linear') # Linear Kernel

#Train the model using the training sets
clf.fit(X_train_filter, y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test_filter)

## drop.py
X_train_filter.drop(corr_features,axis=1)
X_test_filter.drop(corr_features,axis=1)

## cor.py
def correlation(dataset, threshold):
    col_corr = set()  # Set of all the names of correlated columns
    corr_matrix = dataset.corr()
    for i in range(len(corr_matrix.columns)):
        for j in range(i):
            if abs(corr_matrix.iloc[i, j]) > threshold: # we are interested in absolute coeff value
                colname = corr_matrix.columns[i]  # getting the name of column
                col_corr.add(colname)
    return col_corr

## heatmap.py
import seaborn as sns
import matplotlib.pyplot as plt

#Using Pearson Correlation
plt.figure(figsize=(12,10))
cor = X_train_filter.corr()
sns.heatmap(cor, annot=True, cmap=plt.cm.CMRmap_r)
plt.show()

## constant.py
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.feature_selection import VarianceThreshold
constant_filter = VarianceThreshold(threshold=0)
constant_filter.fit(X_train)
X_train_filter=constant_filter.transform(X_train)
X_test_filter=constant_filter.transform(X_test)

## test_split.py
# separate dataset into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.3,
    random_state=0)

## split.py
X = data.drop("target",axis=1)   #Feature Matrix
y = data["target"]

## pandas.py
import pandas as pd
data = pd.DataFrame(featuresExtraction[1:], columns = featuresExtraction[0])
data['target'] = target
	seq=[]
	inf=[]
	typ=[]
	information=[]
	with open('secondary1.txt',"r") as file:
	for line in file.readlines():

	if line[:2]=='Se':
	data=line[:-1].replace('Seq: ',"")
	seq.append(data)
	from sklearn import metrics
	from sklearn.metrics import classification_report, confusion_matrix
	# Model Accuracy: how often is the classifier correct?
	print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

	print(confusion_matrix(y_test,y_pred))
	print(classification_report(y_test,y_pred))
	from sklearn import svm

	clf = svm.SVC(kernel='linear') # Linear Kernel

	#Train the model using the training sets
	clf.fit(X_train_filter, y_train)

	#Predict the response for test dataset
	y_pred = clf.predict(X_test_filter)
	X_train_filter.drop(corr_features,axis=1)
	X_test_filter.drop(corr_features,axis=1)
	def correlation(dataset, threshold):
	col_corr = set() # Set of all the names of correlated columns
	corr_matrix = dataset.corr()
	for i in range(len(corr_matrix.columns)):
	for j in range(i):
	if abs(corr_matrix.iloc[i, j]) > threshold: # we are interested in absolute coeff value
	colname = corr_matrix.columns[i] # getting the name of column
	col_corr.add(colname)
	return col_corr
	import seaborn as sns
	import matplotlib.pyplot as plt

	#Using Pearson Correlation
	plt.figure(figsize=(12,10))
	cor = X_train_filter.corr()
	sns.heatmap(cor, annot=True, cmap=plt.cm.CMRmap_r)
	plt.show()
	from sklearn.feature_selection import SelectKBest, f_classif
	from sklearn.feature_selection import VarianceThreshold
	constant_filter = VarianceThreshold(threshold=0)
	constant_filter.fit(X_train)
	X_train_filter=constant_filter.transform(X_train)
	X_test_filter=constant_filter.transform(X_test)
	# separate dataset into train and test
	from sklearn.model_selection import train_test_split
	X_train, X_test, y_train, y_test = train_test_split(
	X,
	y,
	test_size=0.3,
	random_state=0)
	X = data.drop("target",axis=1) #Feature Matrix
	y = data["target"]
	import pandas as pd
	data = pd.DataFrame(featuresExtraction[1:], columns = featuresExtraction[0])
	data['target'] = target