Skip to content

Instantly share code, notes, and snippets.

View brentp's full-sized avatar

Brent Pedersen brentp

View GitHub Profile
library(SKAT)
set.seed(1)
hap.skat.null = function(formula, covs, ...){
covs = read.delim(covs)
assign("y", covs[[as.character(formula[[2]])]], envir = .GlobalEnv)
m = SKAT_Null_Model(formula, data=covs, out_type="D")
return(m)
}
@brentp
brentp / README.md
Last active August 29, 2015 14:10
automatic running bedtools shuffle

usage:

shuf.sh "bedtools cmd" "metric" $genome

where metric must accept the output from bedtools cmd and return a single numeric value.

@brentp
brentp / irelate.go
Last active August 29, 2015 14:10
Streaming relation (overlap, distance, KNN) testing of (any number of) sorted files of intervals.
package main
import (
"bufio"
"compress/gzip"
"container/heap"
"fmt"
"io"
"os"
"strconv"
@brentp
brentp / compare.sh
Created March 11, 2015 21:00
compare different variant normalization methods
set -eo pipefail
REF=human_g1k_v37_decoy.fasta
VCF=Mills_and_1000G_gold_standard.indels.b37.vcf.gz
VCF=/usr/local/src/gemini/test/test.mult-alts.fb.vep.vcf.gz
java -Xmx4G -jar /usr/local/src/gatk/GenomeAnalysisTK.jar -T LeftAlignAndTrimVariants --splitMultiallelics --trimAlleles \
-R $REF \
--variant $VCF -o gatk.norm.vcf
@brentp
brentp / preprocess.py
Last active August 29, 2015 14:18
preprocess a (non GATK/Freebayes) VCF to set the ref, alt and total depth tags so that gemini can use it.
"""
This will standardize input file genotype fields to have AD=REF,ALT1,ALT2 ... (a
la GTK).
For platypus: --depth NR --alt-depth NV
varscan: --ref-depth RD --alt-depth AD
samtools: --ref-depth DP4 (untested)
"""
import argparse
from bx.bbi import bigwig_file
from bx.bbi import bigbed_file
print bigwig_file
import time
from hashlib import md5
prefix = '/data/gemini/data/hg19_fitcons_fc-i6-0_V1-01'
print "before bw: 8.6 seconds"
@brentp
brentp / format.md
Last active August 29, 2015 14:19
scratch

$ gemini de_novo --columns "chrom, start, end, ref, alt" test.de_novo.db --min-kindreds 1 --gt-pl-max 1000 | cols

chrom  start      end        ref  alt  gene    variant_id    family_id  family_members                                                     family_genotypes  affected_samples  family_count
chr10  48003991   48003992   C    T    ASAH2C  variant_id:2  2          2_dad(dad;unaffected),2_mom(mom;unaffected),2_kid(child;affected)  C/C,C/C,C/T       2_kid             2
chr10  48004991   48004992   C    T    ASAH2C  variant_id:3  3          3_dad(dad;unaffected),3_mom(mom;unaffected),3_kid(child;affected)  C/C,C/C,C/T       3_kid             2
chr10  135336655  135336656  G    A    SPRN    variant_id:4  2          2_dad(dad;unaffected),2_mom(mom;unaffected),2_kid(child;affected)  G/G,G/G,G/A       2_kid             2
chr10  135336655  135336656  G    A    SPRN    variant_id:4  1          1_dad(dad;unaffected),1_mom(mom;unaffected),1_kid(child;affected)  G/G,G/G,G/A       1_kid             2
chr10  135
with-bcolz time (seconds) bcolz-time md5sum num_lines filter
True 2.8 0.62 5281fecb8571cc7c8e832e5682e865bb 16778 (gt_phred_ll_homalt).(*).(==0).(all)
False 155.2 NA 5281fecb8571cc7c8e832e5682e865bb 16778 (gt_phred_ll_homalt).(*).(==0).(all)
True 3.1 0.74 b2b75c9a56e128410a0e19708f096e2f 16750 ((gt_phred_ll_homalt).().(==0).(all)) and (gt_types).().(==HOM_ALT).(all)
False 212.8 NA b2b75c9a56e128410a0e19708f096e2f 16750 ((gt_phred_ll_homalt).().(==0).(all)) and (gt_types).().(==HOM_ALT).(all)
True 1.4 0.42 c4885f35d03bed862d1e9d0ece71bc7c 2367 (gt_depths.{dad} > 20 and gt_depths.{mom} > 20 and gt_depths.{kid} > 20) and (gt_types.{dad} == HOM_REF and gt_types.{mom} == HOM_REF and gt_types.{kid} != HOM_REF)
False 203.0 NA c4885f35d03bed862d1e9d0ece71bc7c 2367 (gt_depths.{dad} > 20 and gt_depths.{mom} > 20 and gt_depths.{kid} > 20) and (gt_types.{dad} == HO
diff --git a/src/extended/gff3_visitor.c b/src/extended/gff3_visitor.c
index 8246680..9e87612 100644
--- a/src/extended/gff3_visitor.c
+++ b/src/extended/gff3_visitor.c
@@ -197,32 +197,52 @@ static int store_ids(GtGenomeNode *gn, void *data, GtError *err)
AddIDInfo add_id_info;
int had_err = 0;
GtStr *id;
+ /* Q: is this const ok? */
+ const char *idstring;
diff --git a/src/extended/gff3_out_stream.c b/src/extended/gff3_out_stream.c
index 6dbbc23..f813e8f 100644
--- a/src/extended/gff3_out_stream.c
+++ b/src/extended/gff3_out_stream.c
@@ -18,6 +18,7 @@
#include "extended/gff3_out_stream.h"
#include "extended/gff3_visitor.h"
#include "extended/node_stream_rep.h"
+#include "core/cstr_table.h"