Skip to content

Instantly share code, notes, and snippets.

View neksa's full-sized avatar

Alexander Goncearenco neksa

View GitHub Profile
import numpy as np
import matplotlib.pyplot as plt
from sklearn import manifold
%matplotlib inline
D = np.array([[0, 10, 6], [10, 0, 5], [6, 5, 0]])
M = manifold.MDS(n_components=2, n_init=1, max_iter=10000, metric=True, dissimilarity="precomputed")
K = M.fit_transform(D)
print("Stress", M.stress_)
@neksa
neksa / Mahalanobis_Outliers.ipynb
Created June 8, 2016 21:43 — forked from kevindavenport/Mahalanobis_Outliers.ipynb
An IPython notebook created for my blog post on http://kldavenport.com about the Mahalanobis Distance function and outlier detection.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@neksa
neksa / chr_rs_acc_via_eutils.py
Created April 25, 2017 20:53 — forked from eweitz/chr_rs_acc_via_eutils.py
NCBI EUtils demo: get a chromosome's RefSeq accession given its name and assembly
'''
This simple script shows how to use NCBI E-Utilies to get a chromosome's
RefSeq accession given the chromosome's name and its genome assembly.
Example:
$ python3 chr_rs_acc_via_eutils.py
RefSeq accession for chromosome 6 on genome assembly GRCh38 (GCF_000001405.26):
NC_000006.12
'''
# test the new generatd machine learning algorithm
def testAlgorithm(algorithm, trainSet, trainSetAnswers):
print("Type of trainSetAnswers is " + str(type(trainSetAnswers)))
for i in range(len(trainSetAnswers)):
y_predicted = algorithm.predict(trainSet[index])
correctAnswer = trainSetAnswers[index]
print("Correct Answer is " + str(correctAnswer))
correctAnswer = correctAnswer.reshape(1,-1)
print("Shape of CorrectAnswer is " + str(correctAnswer.shape))
import requests
import json
MUTAGENE_URL = "https://www.ncbi.nlm.nih.gov/research/mutagene"
def get_profile(fname, assembly=37):
"""
Calling MutaGene REST API to convert a VCF file into a mutational profile (96 context-dependent mutational probabilities)
and profile_counts (counts of mutations for each of the 96 context-dependent mutations)
3a4
> M1L 0.539601 0 0
5a7
> M1V 1.030088 0 0
172,175c174,177
< L25L 3.182415 0 0
< L25P 0.907837 0 0
< L25Q 0.440429 0 0
< L25R 0.202944 0 0
---
import zlib
from pyasn1.codec.ber import decoder
def decode_residues(res):
residues = []
try:
code, rest = decoder.decode(zlib.decompress(res, 16 + zlib.MAX_WBITS))
except:
pass
for i in range(len(code)):
try:
import twobitreader as tbr
except:
print("twobitreader module required")
nucleotides = "ACGT" # this order of nucleotides is important for reversing
complementary_nucleotide = dict(zip(nucleotides, reversed(nucleotides)))
TWOBIT_GENOMES_PATH = '/net/pan1/mutagene/data/genomes/'
import requests
import json
# MUTAGENE_URL = "https://www.ncbi.nlm.nih.gov/research/mutagene"
# MUTAGENE_URL = "https://dev.ncbi.nlm.nih.gov/research/mutagene"
MUTAGENE_URL = "https://mwebdev2/research/mutagene"
# MUTAGENE_URL = "http://localhost:5000"
def get_motifs(fname, assembly=37):
%matplotlib inline
import seaborn as sns
sns.set(style="ticks")
df = sns.load_dataset("iris")
sns.pairplot(df, x_vars=["sepal_length", "sepal_width"], y_vars=["petal_length", "petal_width"], markers=".", kind="reg")
# round(df.corr()**2, 2)
sns.heatmap(df.corr(), cmap=sns.color_palette("RdBu_r", 7), annot=True, fmt=".2f", center=0, vmin=-1.0, vmax=1.0)