Skip to content

Instantly share code, notes, and snippets.

View rachidelfermi's full-sized avatar

rachid elfermi rachidelfermi

View GitHub Profile
@rachidelfermi
rachidelfermi / transformTrna.py
Created October 5, 2021 21:28
transform trna second structure into multifasta file
seq=[]
inf=[]
typ=[]
information=[]
with open('secondary1.txt',"r") as file:
for line in file.readlines():
if line[:2]=='Se':
data=line[:-1].replace('Seq: ',"")
seq.append(data)
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix
# Model Accuracy: how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))
from sklearn import svm
clf = svm.SVC(kernel='linear') # Linear Kernel
#Train the model using the training sets
clf.fit(X_train_filter, y_train)
#Predict the response for test dataset
y_pred = clf.predict(X_test_filter)
X_train_filter.drop(corr_features,axis=1)
X_test_filter.drop(corr_features,axis=1)
def correlation(dataset, threshold):
col_corr = set() # Set of all the names of correlated columns
corr_matrix = dataset.corr()
for i in range(len(corr_matrix.columns)):
for j in range(i):
if abs(corr_matrix.iloc[i, j]) > threshold: # we are interested in absolute coeff value
colname = corr_matrix.columns[i] # getting the name of column
col_corr.add(colname)
return col_corr
import seaborn as sns
import matplotlib.pyplot as plt
#Using Pearson Correlation
plt.figure(figsize=(12,10))
cor = X_train_filter.corr()
sns.heatmap(cor, annot=True, cmap=plt.cm.CMRmap_r)
plt.show()
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.feature_selection import VarianceThreshold
constant_filter = VarianceThreshold(threshold=0)
constant_filter.fit(X_train)
X_train_filter=constant_filter.transform(X_train)
X_test_filter=constant_filter.transform(X_test)
# separate dataset into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
X,
y,
test_size=0.3,
random_state=0)
X = data.drop("target",axis=1) #Feature Matrix
y = data["target"]
import pandas as pd
data = pd.DataFrame(featuresExtraction[1:], columns = featuresExtraction[0])
data['target'] = target