Skip to content

Instantly share code, notes, and snippets.

Avatar

André F. Rendeiro afrendeiro

View GitHub Profile
@afrendeiro
afrendeiro / pairwiseAlign.py
Created Jun 4, 2014
Exact pairwise alignment by dynamic programming. Forward recursion with pruning
View pairwiseAlign.py
#! /usr/bin/python
# Forward-recursion with pruning
# An algorithm for doing exact alignment of two sequences
# Forward recursion is used, with pruning of cells
# two sequences to align
S1 = 'APPLR'
S2 = 'APPLS'
@afrendeiro
afrendeiro / mast2tsv.py
Last active Aug 29, 2015
Parses MAST xml output, extracts relevant info and outputs tab-delimited file
View mast2tsv.py
#!/usr/bin/env python
from argparse import ArgumentParser
from BeautifulSoup import BeautifulSoup
import csv
# argparser
parser = ArgumentParser(description = 'Parses MEME-MAST xml output.',
usage = 'python mast2tsv.py mast.output.xml mast.output.tsv')
# positional arguments
@afrendeiro
afrendeiro / cosmic.py
Last active Aug 29, 2015
Mining cosmicDB
View cosmic.py
#!/usr/bin/env python
# Mining "COSMIC: Catalogue Of Somatic Mutations In Cancer" database
import os
from subprocess import call
import pandas as pd
# Cosmic DB has requires login to download data, so neither BioMart or wget actually work.
call(["wget", "http://cancer.sanger.ac.uk/files/cosmic/current_release/CosmicCompleteExport.tsv.gz"])
@afrendeiro
afrendeiro / hdf5_to_csv.py
Last active Aug 29, 2015
Playing with CellProfiler
View hdf5_to_csv.py
#!/usr/env/python
import os
import h5py
import pandas as pd
projectDir = '/home/afr/workspace/cellprofiler/'
# open hdf5
hdf5 = h5py.File(projectDir + '1315001__2014-01-25T18_26_59-Measurement1.h5', 'r')
@afrendeiro
afrendeiro / read_distances.py
Last active Aug 29, 2015
Counting read spacing
View read_distances.py
#!/usr/env python
from argparse import ArgumentParser
import os, re
from pybedtools import BedTool
import HTSeq
import numpy as np
import pandas as pd
import string
import itertools
@afrendeiro
afrendeiro / taxon_distribution_interpro_domains.py
Last active Aug 29, 2015
Get taxonomic distribution of interpro domains
View taxon_distribution_interpro_domains.py
import pandas as pd
from biomart import BiomartServer, BiomartDataset
from Bio import Entrez
def get_tax_id(specie):
"""Get taxon ID for specie."""
specie = specie.replace(" ", "+").strip()
search = Entrez.esearch(term=specie, db="taxonomy", retmode="xml")
record = Entrez.read(search)
View fit_negative_exponential.py
from scipy.optimize import curve_fit
from scipy import stats
import matplotlib.pyplot as plt
def fit_exponential_neg(x, a, b, c):
return a * np.exp(-b * x) + c
X = np.array(rpkm_log['mean'])
Y = np.array(rpkm_log['qv2'])
View networkx_play.py
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
# Getting a specific node
G['PAX5']
# Geting a specific edge
G['PAX5']['NFKB1']
# Getting all edge weights
@afrendeiro
afrendeiro / quantilize_bigwigs.py
Last active Oct 21, 2015
Produce bigWig files with the quantiles/mean of signal across a number of bigWig files.
View quantilize_bigwigs.py
import sys
from argparse import ArgumentParser
import pyBigWig
import numpy as np
import multiprocessing
import parmap
"""
Produce bigWig files with the quantiles/mean of signal across a number of bigWig files.
"""
@afrendeiro
afrendeiro / pubmed2wordcloud.py
Last active Nov 8, 2015
Get the all the words in titles of publications for a particular PubMed search to make a wordcloud.
View pubmed2wordcloud.py
import sys
import json
import urllib2
import re
from collections import Counter
def get_ids(term, ids=list(), retstart=0, retmax=1000):
"""
Return all Pubmed Ids of articles containing a term, in a recursive fashion.