Skip to content

Instantly share code, notes, and snippets.

View kdm9's full-sized avatar
💭
Remind me if I don't respond

Dr. K. D. Murray kdm9

💭
Remind me if I don't respond
View GitHub Profile
@kdm9
kdm9 / tst.py
Created November 5, 2015 02:15
from __future__ import print_function, division
from frisk.kmerhash import *
import screed
from matplotlib import pyplot as plt
from functools import partial
import multiprocessing as mp
from os import path
def seq_ivom(gen):
@kdm9
kdm9 / tst.py
Created October 29, 2015 09:40
from frisk.kmerhash import *
import screed
r = screed.open('/home/kevin/ws/seqs/TAIR10_gen/TAIR10.fasta.gz')
for x in r:
seq = x.sequence[:100000]
break
winsz = 3000
ivs = []
# given a sequence find kmers of a given length that occur equal to t or more
# times in a given length of window in the sequence
from collections import deque
def kmer_frequency(text, k, window, thresh=3):
counts = dict()
kmers = deque()
for w in range(len(text)-window+1):
@kdm9
kdm9 / gengen
Last active August 29, 2015 14:25
#!/usr/bin/env python
from __future__ import print_function
import random
import docopt
DOC = """
USAGE:
gengen <length> ...
Give a length for each chromosome you want
This file has been truncated, but you can view the full file.
dpkg-buildpackage -rfakeroot -D -us -uc -i -I
dpkg-buildpackage: source package seqan
dpkg-buildpackage: source version 2.0.0+dfsg-1
dpkg-buildpackage: source distribution UNRELEASED
dpkg-buildpackage: source changed by Andreas Tille <tille@debian.org>
dpkg-source -i -I --before-build seqan
dpkg-buildpackage: host architecture amd64
fakeroot debian/rules clean
dh clean --parallel
dh_testdir -O--parallel
fastq=~/ws/gbstrim/data/em_10B8.fq
for ext in 'uncomp' 'gz' 'zstd'
do
for i in {1..4}
do
echo $ext $i
/usr/bin/time -f 'TIME %U %S %e %M' load-into-counting.py \
-T 1 \
-N 4 \
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
Compression;User;Sys;Wall;Rss
uncomp;4.15;0.4;13.67;408252
uncomp;4.19;0.34;22.8;408120
uncomp;4.14;0.37;19.5;408060
uncomp;4.2;0.34;20.34;408284
mean_uncomp;4.17;0.3625;19.0775;408179
gz;9.44;0.1;9.54;408604
gz;9.43;0.12;9.55;408592
gz;9.49;0.12;9.61;408660
gz;9.4;0.14;9.54;408660
#include "read_parsers.hh"
int main(int argc, char **argv)
{
khmer::read_parsers::Read read;
size_t n_reads = 0;
if (argc < 2) {
return 1;
}
#include <seqan/seq_io.h>
using namespace seqan;
int main(int argc, char **argv)
{
std::string id, seq, qual;
if (argc < 2) {
return 1;
#include <seqan/seq_io.h>
int main(int argc, char **argv)
{
std::string id, seq, qual;
if (argc < 2) {
return 1;
}