Skip to content

Instantly share code, notes, and snippets.

@kgori
kgori / discretize.py
Last active October 11, 2020 11:54
How to discretize a gamma or lognormal distribution, for modelling rates over sites in a phylogenetic model
import scipy
import scipy.stats as ss
import numpy as np
def discretize(alpha, ncat, dist=ss.gamma):
if dist == ss.gamma:
dist = dist(alpha, scale=1 / alpha)
elif dist == ss.lognorm:
dist = dist(s=alpha, scale=np.exp(0.5 * alpha**2))
quantiles = dist.ppf(np.arange(0, ncat) / ncat)
@kgori
kgori / Makefile
Last active July 12, 2019 09:49
bcftools make consensus with insertion
# Replace these with the full path to the executables, if necessary
BWA=bwa
SAMTOOLS=samtools
BCFTOOLS=bcftools
# Makefile targets
all: ref.fa reads.fq aln.bam aln.bam.bai pileup.vcf filtered_calls.bcf consensus.fa
.PHONY: all
simulation: ref.fa reads.fq
@kgori
kgori / standardised_variables.R
Created November 27, 2018 17:21
Convert regression line coefficient and intercept of standardised variables back to the original scale
# Standardisation formula
z <- function(x) (x - mean(x)) / sd(x)
#' Convert regression parameters obtained from standardised variables
#' back to the scale of the original data.
#'
#' For a regression y ~ x*, where x* denotes the standardised form of x:
#' Result: y = m(x*) + c
#' Transformed: y = (m / sd(x))(x) + (c - (m*mean(x)/sd(x))
#'
@kgori
kgori / revcomp.py
Created April 6, 2018 16:42
DNA reverse complement
def revcomp(s):
complements = dict(
A='T', C='G', G='C', T='A',
M='K', R='Y', W='W', S='S',
Y='R', K='M', V='B', H='D',
D='H', B='V', N='N'
)
return ''.join(complements[char] for char in s.upper()[::-1])
@kgori
kgori / insert_size.py
Last active March 23, 2018 11:31
Estimate insert size distribution of proper pairs in bam file
import pysam
import math
from itertools import islice
def filterfn(read):
return (read.is_proper_pair and
read.is_paired and
read.tlen > 0 and
not read.is_supplementary and
not read.is_duplicate and
@kgori
kgori / progress.py
Created November 17, 2017 11:32
Simple progress bar
import sys
class Progress():
def __init__(self, capacity, label = '', width=80, fill='#', blank=' '):
self.capacity = capacity
self.label = label
self.width = width
self.fill = fill
self.blank = blank
self.completed = 0
@kgori
kgori / discrete_gamma.c
Created July 6, 2015 15:25
Cython wrapper of PAML discrete gamma rates function
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "discrete_gamma.h"
// PAML code for discrete gamma
// Forward decs
double QuantileChi2(double prob, double v);
double LnGammaFunction(double alpha);
double IncompleteGamma(double x, double alpha, double ln_gamma_alpha);
@kgori
kgori / sigfit_pymc3.py
Last active January 23, 2017 18:47
PYMC3 model code to do signature fitting using NUTS
"""
PYMC3 model code to do signature fitting using NUTS
"""
import pymc3
from theano import tensor
mytest = pymc3.Model()
counts = data
with mytest:
mixing_proportions = pymc3.Dirichlet('mixing_proportions', a = np.ones(30), shape = 30)
@kgori
kgori / fasta.py
Last active October 27, 2016 16:41
Fasta parser - python oneliners
import re
# Fasta one-liners
# Parse fasta from a string
def fasta_parse_string(s):
return dict([(lambda x:(x[0].strip(), re.sub(r'\s+','',x[2])))(l.partition('\n')) for l in s.split('>')[1:]])
# Parse fasta from a file
def fasta_parse_file(f):
with open(f) as h:return fasta_parse_string(h.read())
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.