This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
seq=[] | |
inf=[] | |
typ=[] | |
information=[] | |
with open('secondary1.txt',"r") as file: | |
for line in file.readlines(): | |
if line[:2]=='Se': | |
data=line[:-1].replace('Seq: ',"") | |
seq.append(data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn import metrics | |
from sklearn.metrics import classification_report, confusion_matrix | |
# Model Accuracy: how often is the classifier correct? | |
print("Accuracy:",metrics.accuracy_score(y_test, y_pred)) | |
print(confusion_matrix(y_test,y_pred)) | |
print(classification_report(y_test,y_pred)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn import svm | |
clf = svm.SVC(kernel='linear') # Linear Kernel | |
#Train the model using the training sets | |
clf.fit(X_train_filter, y_train) | |
#Predict the response for test dataset | |
y_pred = clf.predict(X_test_filter) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
X_train_filter.drop(corr_features,axis=1) | |
X_test_filter.drop(corr_features,axis=1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def correlation(dataset, threshold): | |
col_corr = set() # Set of all the names of correlated columns | |
corr_matrix = dataset.corr() | |
for i in range(len(corr_matrix.columns)): | |
for j in range(i): | |
if abs(corr_matrix.iloc[i, j]) > threshold: # we are interested in absolute coeff value | |
colname = corr_matrix.columns[i] # getting the name of column | |
col_corr.add(colname) | |
return col_corr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import seaborn as sns | |
import matplotlib.pyplot as plt | |
#Using Pearson Correlation | |
plt.figure(figsize=(12,10)) | |
cor = X_train_filter.corr() | |
sns.heatmap(cor, annot=True, cmap=plt.cm.CMRmap_r) | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_selection import SelectKBest, f_classif | |
from sklearn.feature_selection import VarianceThreshold | |
constant_filter = VarianceThreshold(threshold=0) | |
constant_filter.fit(X_train) | |
X_train_filter=constant_filter.transform(X_train) | |
X_test_filter=constant_filter.transform(X_test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# separate dataset into train and test | |
from sklearn.model_selection import train_test_split | |
X_train, X_test, y_train, y_test = train_test_split( | |
X, | |
y, | |
test_size=0.3, | |
random_state=0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
X = data.drop("target",axis=1) #Feature Matrix | |
y = data["target"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
data = pd.DataFrame(featuresExtraction[1:], columns = featuresExtraction[0]) | |
data['target'] = target |
NewerOlder