André F. Rendeiro afrendeiro

## pairwiseAlign.py
#! /usr/bin/python

# Forward-recursion with pruning
# An algorithm for doing exact alignment of two sequences
# Forward recursion is used, with pruning of cells

# two sequences to align
S1 = 'APPLR'
S2 = 'APPLS'

## mast2tsv.py
#!/usr/bin/env python

from argparse import ArgumentParser
from BeautifulSoup import BeautifulSoup
import csv

# argparser
parser = ArgumentParser(description = 'Parses MEME-MAST xml output.',
    usage = 'python mast2tsv.py mast.output.xml mast.output.tsv')
# positional arguments

## cosmic.py
#!/usr/bin/env python

# Mining "COSMIC: Catalogue Of Somatic Mutations In Cancer" database

import os
from subprocess import call
import pandas as pd

# Cosmic DB has requires login to download data, so neither BioMart or wget actually work.
call(["wget", "http://cancer.sanger.ac.uk/files/cosmic/current_release/CosmicCompleteExport.tsv.gz"])

## hdf5_to_csv.py
#!/usr/env/python

import os
import h5py
import pandas as pd

projectDir = '/home/afr/workspace/cellprofiler/'

# open hdf5
hdf5 = h5py.File(projectDir + '1315001__2014-01-25T18_26_59-Measurement1.h5', 'r')

## read_distances.py
#!/usr/env python

from argparse import ArgumentParser
import os, re
from pybedtools import BedTool
import HTSeq
import numpy as np
import pandas as pd
import string
import itertools

## taxon_distribution_interpro_domains.py
import pandas as pd
from biomart import BiomartServer, BiomartDataset
from Bio import Entrez


def get_tax_id(specie):
    """Get taxon ID for specie."""
    specie = specie.replace(" ", "+").strip()
    search = Entrez.esearch(term=specie, db="taxonomy", retmode="xml")
    record = Entrez.read(search)

## fit_negative_exponential.py
from scipy.optimize import curve_fit
from scipy import stats
import matplotlib.pyplot as plt


def fit_exponential_neg(x, a, b, c):
    return a * np.exp(-b * x) + c

X = np.array(rpkm_log['mean'])
Y = np.array(rpkm_log['qv2'])

## networkx_play.py
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns

# Getting a specific node
G['PAX5']
# Geting a specific edge
G['PAX5']['NFKB1']

# Getting all edge weights

## quantilize_bigwigs.py
import sys
from argparse import ArgumentParser
import pyBigWig
import numpy as np
import multiprocessing
import parmap

"""
Produce bigWig files with the quantiles/mean of signal across a number of bigWig files.
"""

## pubmed2wordcloud.py
import sys
import json
import urllib2
import re
from collections import Counter


def get_ids(term, ids=list(), retstart=0, retmax=1000):
    """
    Return all Pubmed Ids of articles containing a term, in a recursive fashion.
	#! /usr/bin/python

	# Forward-recursion with pruning
	# An algorithm for doing exact alignment of two sequences
	# Forward recursion is used, with pruning of cells

	# two sequences to align
	S1 = 'APPLR'
	S2 = 'APPLS'
	#!/usr/bin/env python

	from argparse import ArgumentParser
	from BeautifulSoup import BeautifulSoup
	import csv

	# argparser
	parser = ArgumentParser(description = 'Parses MEME-MAST xml output.',
	usage = 'python mast2tsv.py mast.output.xml mast.output.tsv')
	# positional arguments
	#!/usr/bin/env python

	# Mining "COSMIC: Catalogue Of Somatic Mutations In Cancer" database

	import os
	from subprocess import call
	import pandas as pd

	# Cosmic DB has requires login to download data, so neither BioMart or wget actually work.
	call(["wget", "http://cancer.sanger.ac.uk/files/cosmic/current_release/CosmicCompleteExport.tsv.gz"])
	#!/usr/env/python

	import os
	import h5py
	import pandas as pd

	projectDir = '/home/afr/workspace/cellprofiler/'

	# open hdf5
	hdf5 = h5py.File(projectDir + '1315001__2014-01-25T18_26_59-Measurement1.h5', 'r')
	#!/usr/env python

	from argparse import ArgumentParser
	import os, re
	from pybedtools import BedTool
	import HTSeq
	import numpy as np
	import pandas as pd
	import string
	import itertools
	import pandas as pd
	from biomart import BiomartServer, BiomartDataset
	from Bio import Entrez


	def get_tax_id(specie):
	"""Get taxon ID for specie."""
	specie = specie.replace(" ", "+").strip()
	search = Entrez.esearch(term=specie, db="taxonomy", retmode="xml")
	record = Entrez.read(search)
	from scipy.optimize import curve_fit
	from scipy import stats
	import matplotlib.pyplot as plt


	def fit_exponential_neg(x, a, b, c):
	return a * np.exp(-b * x) + c

	X = np.array(rpkm_log['mean'])
	Y = np.array(rpkm_log['qv2'])
	import networkx as nx
	import matplotlib.pyplot as plt
	import seaborn as sns

	# Getting a specific node
	G['PAX5']
	# Geting a specific edge
	G['PAX5']['NFKB1']

	# Getting all edge weights
	import sys
	from argparse import ArgumentParser
	import pyBigWig
	import numpy as np
	import multiprocessing
	import parmap

	"""
	Produce bigWig files with the quantiles/mean of signal across a number of bigWig files.
	"""
	import sys
	import json
	import urllib2
	import re
	from collections import Counter


	def get_ids(term, ids=list(), retstart=0, retmax=1000):
	"""
	Return all Pubmed Ids of articles containing a term, in a recursive fashion.