Skip to content

Instantly share code, notes, and snippets.

View afrendeiro's full-sized avatar

André F. Rendeiro afrendeiro

View GitHub Profile
@afrendeiro
afrendeiro / plotNucleosomeFits.py
Last active May 24, 2021 14:15
From a paired-end bam file, plot the frequency of insert sizes and a fit
def plot_fragment_sizes_fit(bam, plot, outputCSV, maxInsert=1500, smallestInsert=30):
"""
Heavy inspiration from here:
https://github.com/dbrg77/ATAC/blob/master/ATAC_seq_read_length_curve_fitting.ipynb
"""
try:
import pysam
import numpy as np
import matplotlib.mlab as mlab
from scipy.optimize import curve_fit
from scipy import stats
import matplotlib.pyplot as plt
def fit_exponential_neg(x, a, b, c):
return a * np.exp(-b * x) + c
X = np.array(rpkm_log['mean'])
Y = np.array(rpkm_log['qv2'])
@afrendeiro
afrendeiro / sra2bam.py
Last active February 9, 2017 19:41
SRA sample to unaligned bam using sra tools and sambamba, job submission through slurm
from argparse import ArgumentParser
import sys
from pypiper import NGSTk
import textwrap
global tk
tk = NGSTk()
def sra2bam(sra_acession, output_bam):
# Slurm header
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
# Getting a specific node
G['PAX5']
# Geting a specific edge
G['PAX5']['NFKB1']
# Getting all edge weights
@afrendeiro
afrendeiro / quantilize_bigwigs.py
Last active October 21, 2015 07:56
Produce bigWig files with the quantiles/mean of signal across a number of bigWig files.
import sys
from argparse import ArgumentParser
import pyBigWig
import numpy as np
import multiprocessing
import parmap
"""
Produce bigWig files with the quantiles/mean of signal across a number of bigWig files.
"""
@afrendeiro
afrendeiro / pubmed2wordcloud.py
Last active November 8, 2015 10:23
Get the all the words in titles of publications for a particular PubMed search to make a wordcloud.
import sys
import json
import urllib2
import re
from collections import Counter
def get_ids(term, ids=list(), retstart=0, retmax=1000):
"""
Return all Pubmed Ids of articles containing a term, in a recursive fashion.
@afrendeiro
afrendeiro / kde_2d_weighted.py
Last active October 17, 2022 07:33
2D weighted kernel density estimation (KDE)
import numpy as np
import matplotlib.pyplot as plt
# class from here: http://nbviewer.ipython.org/gist/tillahoffmann/f844bce2ec264c1c8cb5
class gaussian_kde(object):
"""Representation of a kernel-density estimate using Gaussian kernels.
Kernel density estimation is a way to estimate the probability density
function (PDF) of a random variable in a non-parametric way.
import numpy as np
import pandas as pd
class DifferentialRegions(object):
"""
Compute two-tailed empirical p-value for difference between values of two variables.
"""
def __init__(self, df, a, b, permutations=100, alpha=0.05, correct=True):
super(DifferentialRegions, self).__init__()
@afrendeiro
afrendeiro / mass_rename.sh
Created February 4, 2016 09:43
Mass rename files cheatsheet
# I just need to have these somewhere to remember them later
for F in `find . | grep -e 'CM[0-9]\{2,\}s'`
do
echo $F $(echo $F | sed 's/CM\([0-9]\{2,\}\)s/CM\1-/g')
mv $F $(echo $F | sed 's/CM\([0-9]\{2,\}\)s/CM\1-/g')
done
for F in `find . | grep -e '_[1-2]_' | grep -v PBMC`
do
@afrendeiro
afrendeiro / enrichr.py
Last active May 3, 2016 09:08
Function to use Enrichr's API
def enrichr(dataframe, gene_set_libraries=None, kind="genes"):
"""
Use Enrichr on a list of genes (currently only genes supported through the API).
"""
import json
import requests
import pandas
ENRICHR_ADD = 'http://amp.pharm.mssm.edu/Enrichr/addList'