Skip to content

Instantly share code, notes, and snippets.

@jsun
jsun / calc_cdna_len.py
Last active February 6, 2019 09:20
Calculation of non-overlapping exon length with GTF file.
import sys
import re
## Description:
## Calculation of non-overlapping exon length with GFF file for each gene.
##
## e.g) Gene G has four transcripts: Ga, Gb, Gc, and Gd. The four transcripts
## have different numbers of exons and different combinations of exons.
## Some regions are shared with the four transcripts, and some regions
## are only used by a single transcript. These information are saved in
@jsun
jsun / convert_gtf.py
Last active August 31, 2018 03:46
Convert coordinates in GTF file with VCF file.
import sys
import os
## Description:
## This script is used for converting the coordinates in GTF file
## with the VCF file.
##
## Usage:
## python convert_gtf.py sampl.vcf sampl.gtf > modified_sampl.gtf
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
# get training and test sets
x_train, x_test, y_train, y_test = train_test_split(mnist.data, mnist.target, test_size=0.2, random_state=0)
print(x_train.shape)
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
cancer = datasets.load_breast_cancer()
x = cancer.data
y = cancer.target
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
cancer = datasets.load_breast_cancer()
x = cancer.data
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
cancer = datasets.load_breast_cancer()
x = cancer.data
y = cancer.target
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.learning_curve import learning_curve
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.learning_curve import validation_curve
import matplotlib.pyplot as plt
import numpy as np
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
# load data
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import cross_val_score