André F. Rendeiro afrendeiro

## kde_2d_weighted.py
import numpy as np
import matplotlib.pyplot as plt


# class from here: http://nbviewer.ipython.org/gist/tillahoffmann/f844bce2ec264c1c8cb5
class gaussian_kde(object):
    """Representation of a kernel-density estimate using Gaussian kernels.

    Kernel density estimation is a way to estimate the probability density
    function (PDF) of a random variable in a non-parametric way.

## pubmed2wordcloud.py
import sys
import json
import urllib2
import re
from collections import Counter


def get_ids(term, ids=list(), retstart=0, retmax=1000):
    """
    Return all Pubmed Ids of articles containing a term, in a recursive fashion.

## quantilize_bigwigs.py
import sys
from argparse import ArgumentParser
import pyBigWig
import numpy as np
import multiprocessing
import parmap

"""
Produce bigWig files with the quantiles/mean of signal across a number of bigWig files.
"""

## networkx_play.py
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns

# Getting a specific node
G['PAX5']
# Geting a specific edge
G['PAX5']['NFKB1']

# Getting all edge weights

## sra2bam.py
from argparse import ArgumentParser
import sys
from pypiper import NGSTk
import textwrap

global tk
tk = NGSTk()

def sra2bam(sra_acession, output_bam):
    # Slurm header

## fit_negative_exponential.py
from scipy.optimize import curve_fit
from scipy import stats
import matplotlib.pyplot as plt


def fit_exponential_neg(x, a, b, c):
    return a * np.exp(-b * x) + c

X = np.array(rpkm_log['mean'])
Y = np.array(rpkm_log['qv2'])

## plotNucleosomeFits.py

def plot_fragment_sizes_fit(bam, plot, outputCSV, maxInsert=1500, smallestInsert=30):
    """
    Heavy inspiration from here:
    https://github.com/dbrg77/ATAC/blob/master/ATAC_seq_read_length_curve_fitting.ipynb
    """
    try:
        import pysam
        import numpy as np
        import matplotlib.mlab as mlab

## ngs_101.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                afrendeiro
                / ngs_101.md
            
            
              Last active
              September 26, 2022 13:19
            
              
                NGS for dummies
              
          
    Introduction to next-generation sequencing (NGS)

General workflow

The current used technology for next generation sequencing is Illumina sequencing - all others cannot compete with its speed, price and output power - they have therefore specialized in niche applications (not discussed here).
Nevertheless, no sequencing technology cannot simply start sequencing one end of a chromosome until the other end.
The approach therefore is:

  
## run_zinba.R
# Install zimba
R
install.packages(c("R.oo"))
install.packages(c("R.utils", "quantreg","doParallel","doMC","foreach"))  # for R>3.0
# only version that works with R 3.0:
# get it from here: https://code.google.com/p/zinba/issues/detail?id=69
install.packages("zinba_2.03.1.tar.gz", repos=NULL)

# Make bed files from bams
# system("bedtools bamtobed -i /data/mapped/sample.bam > ~/zinba/reads/sample.bed")

## get_bioinfo_files.sh
for GENOME in hg19 mm10 danRer10
do
    # Static files
    mkdir -p resources/${GENOME}
    cd resources/${GENOME}
    ### Genome
    wget http://hgdownload.cse.ucsc.edu/goldenPath/${GENOME}/bigZips/${GENOME}.2bit
    twoBitToFa ${GENOME}.2bit ${GENOME}.fa
    samtools faidx ${GENOME}.fa
    cd ../..
	import numpy as np
	import matplotlib.pyplot as plt


	# class from here: http://nbviewer.ipython.org/gist/tillahoffmann/f844bce2ec264c1c8cb5
	class gaussian_kde(object):
	"""Representation of a kernel-density estimate using Gaussian kernels.

	Kernel density estimation is a way to estimate the probability density
	function (PDF) of a random variable in a non-parametric way.
	import sys
	import json
	import urllib2
	import re
	from collections import Counter


	def get_ids(term, ids=list(), retstart=0, retmax=1000):
	"""
	Return all Pubmed Ids of articles containing a term, in a recursive fashion.
	import sys
	from argparse import ArgumentParser
	import pyBigWig
	import numpy as np
	import multiprocessing
	import parmap

	"""
	Produce bigWig files with the quantiles/mean of signal across a number of bigWig files.
	"""
	import networkx as nx
	import matplotlib.pyplot as plt
	import seaborn as sns

	# Getting a specific node
	G['PAX5']
	# Geting a specific edge
	G['PAX5']['NFKB1']

	# Getting all edge weights
	from argparse import ArgumentParser
	import sys
	from pypiper import NGSTk
	import textwrap

	global tk
	tk = NGSTk()

	def sra2bam(sra_acession, output_bam):
	# Slurm header
	from scipy.optimize import curve_fit
	from scipy import stats
	import matplotlib.pyplot as plt


	def fit_exponential_neg(x, a, b, c):
	return a * np.exp(-b * x) + c

	X = np.array(rpkm_log['mean'])
	Y = np.array(rpkm_log['qv2'])

	def plot_fragment_sizes_fit(bam, plot, outputCSV, maxInsert=1500, smallestInsert=30):
	"""
	Heavy inspiration from here:
	https://github.com/dbrg77/ATAC/blob/master/ATAC_seq_read_length_curve_fitting.ipynb
	"""
	try:
	import pysam
	import numpy as np
	import matplotlib.mlab as mlab
	# Install zimba
	R
	install.packages(c("R.oo"))
	install.packages(c("R.utils", "quantreg","doParallel","doMC","foreach")) # for R>3.0
	# only version that works with R 3.0:
	# get it from here: https://code.google.com/p/zinba/issues/detail?id=69
	install.packages("zinba_2.03.1.tar.gz", repos=NULL)

	# Make bed files from bams
	# system("bedtools bamtobed -i /data/mapped/sample.bam > ~/zinba/reads/sample.bed")
	for GENOME in hg19 mm10 danRer10
	do
	# Static files
	mkdir -p resources/${GENOME}
	cd resources/${GENOME}
	### Genome
	wget http://hgdownload.cse.ucsc.edu/goldenPath/${GENOME}/bigZips/${GENOME}.2bit
	twoBitToFa ${GENOME}.2bit ${GENOME}.fa
	samtools faidx ${GENOME}.fa
	cd ../..