Skip to content

Instantly share code, notes, and snippets.

View afrendeiro's full-sized avatar

André F. Rendeiro afrendeiro

View GitHub Profile
@afrendeiro
afrendeiro / pairwiseAlign.py
Created June 4, 2014 22:28
Exact pairwise alignment by dynamic programming. Forward recursion with pruning
#! /usr/bin/python
# Forward-recursion with pruning
# An algorithm for doing exact alignment of two sequences
# Forward recursion is used, with pruning of cells
# two sequences to align
S1 = 'APPLR'
S2 = 'APPLS'
@afrendeiro
afrendeiro / mast2tsv.py
Last active August 29, 2015 14:04
Parses MAST xml output, extracts relevant info and outputs tab-delimited file
#!/usr/bin/env python
from argparse import ArgumentParser
from BeautifulSoup import BeautifulSoup
import csv
# argparser
parser = ArgumentParser(description = 'Parses MEME-MAST xml output.',
usage = 'python mast2tsv.py mast.output.xml mast.output.tsv')
# positional arguments
@afrendeiro
afrendeiro / cosmic.py
Last active August 29, 2015 14:12
Mining cosmicDB
#!/usr/bin/env python
# Mining "COSMIC: Catalogue Of Somatic Mutations In Cancer" database
import os
from subprocess import call
import pandas as pd
# Cosmic DB has requires login to download data, so neither BioMart or wget actually work.
call(["wget", "http://cancer.sanger.ac.uk/files/cosmic/current_release/CosmicCompleteExport.tsv.gz"])
@afrendeiro
afrendeiro / hdf5_to_csv.py
Last active August 29, 2015 14:12
Playing with CellProfiler
#!/usr/env/python
import os
import h5py
import pandas as pd
projectDir = '/home/afr/workspace/cellprofiler/'
# open hdf5
hdf5 = h5py.File(projectDir + '1315001__2014-01-25T18_26_59-Measurement1.h5', 'r')
@afrendeiro
afrendeiro / read_distances.py
Last active August 29, 2015 14:13
Counting read spacing
#!/usr/env python
from argparse import ArgumentParser
import os, re
from pybedtools import BedTool
import HTSeq
import numpy as np
import pandas as pd
import string
import itertools
@afrendeiro
afrendeiro / taxon_distribution_interpro_domains.py
Last active August 29, 2015 14:15
Get taxonomic distribution of interpro domains
import pandas as pd
from biomart import BiomartServer, BiomartDataset
from Bio import Entrez
def get_tax_id(specie):
"""Get taxon ID for specie."""
specie = specie.replace(" ", "+").strip()
search = Entrez.esearch(term=specie, db="taxonomy", retmode="xml")
record = Entrez.read(search)
from scipy.optimize import curve_fit
from scipy import stats
import matplotlib.pyplot as plt
def fit_exponential_neg(x, a, b, c):
return a * np.exp(-b * x) + c
X = np.array(rpkm_log['mean'])
Y = np.array(rpkm_log['qv2'])
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
# Getting a specific node
G['PAX5']
# Geting a specific edge
G['PAX5']['NFKB1']
# Getting all edge weights
@afrendeiro
afrendeiro / quantilize_bigwigs.py
Last active October 21, 2015 07:56
Produce bigWig files with the quantiles/mean of signal across a number of bigWig files.
import sys
from argparse import ArgumentParser
import pyBigWig
import numpy as np
import multiprocessing
import parmap
"""
Produce bigWig files with the quantiles/mean of signal across a number of bigWig files.
"""
@afrendeiro
afrendeiro / pubmed2wordcloud.py
Last active November 8, 2015 10:23
Get the all the words in titles of publications for a particular PubMed search to make a wordcloud.
import sys
import json
import urllib2
import re
from collections import Counter
def get_ids(term, ids=list(), retstart=0, retmax=1000):
"""
Return all Pubmed Ids of articles containing a term, in a recursive fashion.