Skip to content

Instantly share code, notes, and snippets.

import argparse as ap
import sys
import vcf
import yaml
VERSION = 0.1
def filter_variants(yaml_variants):
variants = []
@nickloman
nickloman / gist:2fcab98c353d07521ca5016dd7b779b6
Created April 29, 2019 14:51
Ubuntu 18.10 missing dependencies
root@6cc3ac9c89c3:/# dpkg -i --ignore-depends=libboost-log1.58.0 ont_guppy_2.3.7-1~xenial_amd64.deb
(Reading database ... 15231 files and directories currently installed.)
Preparing to unpack ont_guppy_2.3.7-1~xenial_amd64.deb ...
Unpacking ont-guppy (2.3.7-1~xenial) over (2.3.7-1~xenial) ...
dpkg: dependency problems prevent configuration of ont-guppy:
ont-guppy depends on libcurl4-openssl-dev; however:
Package libcurl4-openssl-dev is not installed.
ont-guppy depends on libssl-dev; however:
Package libssl-dev is not installed.
in bwa/bwamem.c reduce chunk_size to a smaller value otherwise it will not return for ages:
o->chunk_size = 10000;
import subprocess
import select
p = subprocess.Popen(['bin/bwa/bwa', 'mem', 'ref/NC_018143.fna', '/dev/stdin'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
@nickloman
nickloman / gist:f6b2d1f100b2fb3e4791
Created November 30, 2015 08:43
Stream a directory of files to an FTP server, remembering the MD5 hash
tar -cf - $dir | tee >(ncftpput -v -F -u USER -p PASS -c HOST ${dir}.tar) | md5sum > ${dir}.md5
touch a.fasta
touch b.fasta
touch c.fasta
find . -name "*.fasta" | parallel echo union -sequence {} -sformat fasta -outseq {}.union.fasta -osformat fasta -auto
find . -name "*.fasta" | xargs -L 1 -I '{}' echo union -sequence {} -sformat fasta -outseq {}.union.fasta -osformat fasta -auto
@nickloman
nickloman / mapping_stats.py
Created June 10, 2014 15:56
mapping_stats.py - for BLASR SAM output, requires Pysam
import pysam
import sys
samfile = pysam.Samfile(sys.argv[1], "rb")
fields = ['Name', 'QueryLen', 'AlignLen', 'NumMismatches']
print "\t".join(fields)
for read in samfile:
t = dict(read.tags)
@nickloman
nickloman / fast5tofasta.py
Created June 9, 2014 16:01
fast5tofasta.py
import h5py
from Bio import SeqIO
from StringIO import StringIO
import sys
keys = {'template' : '/Analyses/Basecall_2D_000/BaseCalled_template/Fastq',
'complement' : '/Analyses/Basecall_2D_000/BaseCalled_complement/Fastq',
'twodirections' : '/Analyses/Basecall_2D_000/BaseCalled_2D/Fastq'}
for fn in sys.argv[1:]:
@nickloman
nickloman / hdf5.rb
Last active August 29, 2015 14:02
hdf5.rb
require 'formula'
class Hdf5 < Formula
homepage 'http://www.hdfgroup.org/HDF5'
url 'http://www.hdfgroup.org/ftp/HDF5/releases/hdf5-1.8.12/src/hdf5-1.8.12.tar.bz2'
sha1 '8414ca0e6ff7d08e423955960d641ec5f309a55f'
version '1.8.12'
# TODO - warn that these options conflict
option :universal
@nickloman
nickloman / gist:8031817
Last active December 31, 2015 19:09
make_blobolog_file.py
import sys
from collections import defaultdict
from itertools import izip_longest
def grouper(n, iterable, fillvalue=None):
args = [iter(iterable)] * n
return izip_longest(fillvalue=fillvalue, *args)
contigs = defaultdict(dict)
@nickloman
nickloman / pullblast.py
Last active December 22, 2015 20:09
pullblast.py - retrieve BLAST HSP database hits in FASTA format, returning results in same strand orientation as query
#Usage:
#blastall -p blastn -d db -i query -m 8 | python pullblast.py db.fasta
import sys
from Bio import SeqIO
records = SeqIO.to_dict(SeqIO.parse(open(sys.argv[1]), "fasta"))
for ln in sys.stdin:
cols = ln.rstrip().split("\t")