Skip to content

Instantly share code, notes, and snippets.

View srynobio's full-sized avatar

Shawn Rynearson srynobio

View GitHub Profile
##fileformat=VCFv4.2
##ALT=<ID=NON_REF,Description="Represents any possible alternative allele at this location">
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block">
##FORMAT=<ID=PGT,Number=1,Type=String,Description="Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another">
##FORMAT=<ID=PID,Number=1,Type=String,Description="Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="The phred-scaled genotype likelihoods rounded to the closest integer">
@srynobio
srynobio / Preposed changes for GRCh38.md
Last active December 19, 2018 22:14
Preposed changes for GRCh38

#Preposed changes for GRCh38

Reference/ 
	GRCh37/
	GRCh38/

GATK_Bundle
	GRCh37/
	GRCh38/
package main
import (
"crypto/sha1"
"encoding/base64"
"fmt"
"log"
"runtime"
"sync"
package main
import (
"crypto/sha1"
"encoding/base64"
"fmt"
"log"
"runtime"
"sync"
#!/usr/bin/env perl
use strict;
use warnings;
use feature 'say';
use autodie;
open( my $FILE, '<', $ARGV[0] ) or die;
( my $individual = $ARGV[0] ) =~ s/\.data.prep.txt//g;
open(my $OUT, '>>', "$individual.align.txt") or die;
$> ls NA06985/
NA06985/ERR050082_1.filt.fastq.gz NA06985/ERR068361_1.filt.fastq.gz NA06985/SRR400038_1.filt.fastq.gz NA06985/SRR400039_1.filt.fastq.gz NA06985/SRR709972_1.filt.fastq.gz
NA06985/ERR050082_2.filt.fastq.gz NA06985/ERR068361_2.filt.fastq.gz NA06985/SRR400038_2.filt.fastq.gz NA06985/SRR400039_2.filt.fastq.gz NA06985/SRR709972_2.filt.fastq.gz
NA06985/ERR050082.filt.fastq.gz NA06985/ERR068361.filt.fastq.gz NA06985/SRR400038.filt.fastq.gz NA06985/SRR400039.filt.fastq.gz NA06985/SRR709972.filt.fastq.g
$> data_prep.pl NA06985/*fastq.gz > NA06985.data.prep.txt
$> perl alignMaker.pl NA06985.data.prep.txt
$> ls
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PGT,Number=1,Type=String,Description="Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another">
##FORMAT=<ID=PID,Number=1,Type=String,Description="Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="The phred-scaled genotype likelihoods rounded to the closest integer">
##FORMAT=<ID=SAC,Number=.,Type=Integer,Description="Number of reads on the forward and reverse strand supporting each allele (including reference)">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for
cmdline: /sentieon-genomics-201808.03/libexec/driver --thread_count 30 -r /scratch/ucgd/lustre/common/data/Reference/GRCh38/human_g1k_v38_decoy_phix.fasta --interval /scratch/ucgd/lustre/common/data/Regions/GRCh38/UCGD.CDS.Region.bed --algo VarCal --vcf 17-10-27_Test-Yandell-Disease_merged.vcf.gz --var_type SNP --tranches_file 17-10-27_Test-Yandell-Disease_recal.tranches.snp --resource /scratch/ucgd/lustre/common/data/GATK_Bundle/GRCh38/hapmap_3.3.hg38.vcf.gz --resource_param hapmap,known=false,training=true,truth=true,prior=15.0 --resource /scratch/ucgd/lustre/common/data/GATK_Bundle/GRCh38/1000G_omni2.5.hg38.vcf.gz --resource_param omni,known=false,training=true,truth=true,prior=12.0 --resource /scratch/ucgd/lustre/common/data/GATK_Bundle/GRCh38/1000G_phase1.snps.high_confidence.hg38.vcf.gz --resource_param 1000G,known=false,training=true,truth=false,prior=10.0 --resource /scratch/ucgd/lustre/common/data/GATK_Bundle/GRCh38/dbsnp_138.hg38.vcf.gz --resource_param dbsnp,known=true,training=false,truth=false,pr
REMOVE RAW_MQ:
...
progress 100% @chrX:154320758, elapsed 5.6s, est remaining 0.0s, peak mem 281MB, user 156.8s, sys 3.6s, output queue size 887
data size = 175996
There is no training data for annotation SAC for var_type SNP. Please remove SAC from annotations
REMOVE SAC:
...
progress 100% @chrX:154320758, elapsed 9.5s, est remaining 0.0s, peak mem 321MB, user 154.3s, sys 3.8s, output queue size 887
data size = 175996
Error: Package: kmod-lustre-client-2.10.6-1.el7.x86_64 (/kmod-lustre-client-2.10.6-1.el7.x86_64)
Requires: kernel(clear_page_dirty_for_io) = 0x2e1a09e1
Installed: kernel-4.14.88-88.73.amzn2.x86_64 (installed)
kernel(clear_page_dirty_for_io) = 0xba614b0b
Installed: kernel-4.14.101-91.76.amzn2.x86_64 (@amzn2-core)
kernel(clear_page_dirty_for_io) = 0x49960ae2
Available: kernel-4.9.62-10.57.amzn2.x86_64 (amzn2-core)
kernel(clear_page_dirty_for_io) = 0xd3ad0089
Available: kernel-4.9.70-2.243.amzn2.x86_64 (amzn2-core)
kernel(clear_page_dirty_for_io) = 0x69b783ca