Skip to content

Instantly share code, notes, and snippets.

@nh13
nh13 / README.md
Created October 25, 2023 07:23
Split the value in a SAM tag

Split a SAM tag value and store in multiple SAM tags

This is motiviated by fulcrumgenomics/fgsv#25 whereby the be tag stores a semi-colon delimited list of break end values, and we want each value to be in their own tag. Supports where the case where the be tag may contain multiple such delimited values, each seperated by a different delimiter (in this case a comma).

python split_sam_tag.py \
  --in-bam fgsv.bam \
  --out-bam split.bam \
 --in-tag be \
pigz -d -c in.fastq.gz |
| awk 'BEGIN {LN=0; } { if (LN == 1 || LN == 3) { print substr($0, 0, 26) } else { print $0 } ; if (LN == 3) { LN = 0 } else { LN++ } }' \
| pigz -c - > out.fastq.gz
from collections import defaultdict
from pathlib import Path
from typing import Any
from typing import Callable
from typing import Dict
from typing import List
from typing import Optional
import snakemake
# Delimited-data files: no empty columns please
function columnit { column -t $1 | less -S; }
function tabit { column -t -s $'\t' $1 | less -S; }
# Delimited-data files: empty columns values are nasty
function columnit-empty { cat $1 | sed -E 's_'$'\t'$'\t''_'$'\t''NA'$'\t''_g' | column -t | less -S; }
function tabit-empty { cat $1 | sed -E 's_'$'\t'$'\t''_'$'\t''NA'$'\t''_g' | column -t -s $'\t' | less -S; }
# Show me the money grep, now
function grep-nobuff { gstdbuf -o0 grep $@; }
@nh13
nh13 / michael_hoffmans_super_special_wishmaker.dict
Last active May 22, 2020 02:31
Michael Hoffmans Super Special Wishmaker
@HD VN:1.6
@SQ SN:chr1 LN:249250621 am:1 ga:CM000663.1 sn:1 ra:NC_000001.10 un:chr1 AN:1,CM000663.1,NC_000001.10 sr:assembled-molecule
@SQ SN:chr2 LN:243199373 am:2 ga:CM000664.1 sn:2 ra:NC_000002.11 un:chr2 AN:2,CM000664.1,NC_000002.11 sr:assembled-molecule
@SQ SN:chr3 LN:198022430 am:3 ga:CM000665.1 sn:3 ra:NC_000003.11 un:chr3 AN:3,CM000665.1,NC_000003.11 sr:assembled-molecule
@SQ SN:chr4 LN:191154276 am:4 ga:CM000666.1 sn:4 ra:NC_000004.11 un:chr4 AN:4,CM000666.1,NC_000004.11 sr:assembled-molecule
@SQ SN:chr5 LN:180915260 am:5 ga:CM000667.1 sn:5 ra:NC_000005.9 un:chr5 AN:5,CM000667.1,NC_000005.9 sr:assembled-molecule
@SQ SN:chr6 LN:171115067 am:6 ga:CM000668.1 sn:6 ra:NC_000006.11 un:chr6 AN:6,CM000668.1,NC_000006.11 sr:assembled-molecule
@SQ SN:chr7 LN:159138663 am:7 ga:CM000669.1 sn:7 ra:NC_000007.13 un:chr7 AN:7,CM000669.1,NC_000007.13 sr:assembled-molecule
@SQ SN:chr8 LN:146364022 am:8 ga:CM000670.1 sn:8 ra:NC_000008.10 un:chr8 AN:8,CM000670.1,NC_000008.10 sr:assembled-molecule
@SQ SN:chr9 LN:14121343
@nh13
nh13 / primer_pairs_example.sam
Last active May 21, 2020 07:20
Example SAM file with Primer Pairs
@HD VN:1.6 GO:query SO:unsorted SS:unsorted:template-coordinate
@SQ SN:chr1 LN:248956422 M5:6aef897c3d6ff0c78aff06ac189178dd AS:hg38 UR:ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_genomic.fna.gz SP:Homo sapiens AN:NC_000001.11,CM000663.2
@SQ SN:chr2 LN:242193529 M5:f98db672eb0993dcfdabafe2a882905c AS:hg38 UR:ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_genomic.fna.gz SP:Homo sapiens AN:NC_000002.12,CM000664.2
@SQ SN:chr3 LN:198295559 M5:76635a41ea913a405ded820447d067b0 AS:hg38 UR:ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_genomic.fna.gz SP:Homo sapiens AN:NC_000003.12,CM000665.2
@SQ SN:chr4 LN:190214555 M5:3210fecf1eb92d5489da4346b3fddc6e AS:hg38 UR:ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_genomic.fna.gz SP:Homo sapiens AN:NC_000004.12,CM00066
@nh13
nh13 / add_rg_per_read.sc
Created April 17, 2020 03:21
Add a read group (and sample) per read in a SAM/BAM
import $ivy.`com.fulcrumgenomics::fgbio:1.1.0`
import com.fulcrumgenomics.FgBioDef._
import java.nio.file.{Path, Paths}
import com.fulcrumgenomics.commons.util.{LazyLogging, Logger}
import com.fulcrumgenomics.util.ProgressLogger
import com.fulcrumgenomics.bam.api._
import htsjdk.samtools.SAMReadGroupRecord
// So that a [[Path]] can be built from a [[String]] on the command line
import enum
from typing import List
from pathlib import Path
class Alignment(enum.Enum):
BWA = 1
Bowtie = 2
class VariantCalling(enum.Enum):
@nh13
nh13 / .bash_profile
Created December 23, 2019 18:23
Bash function for switching AWS profiles
function aws_switch {
last_profile=$(cat ~/.aws/.last_profile | xargs)
last_profile_i=1;
i=1
profiles=()
for profile in $(grep "^\[.*\]$" ~/.aws/credentials | sed -e 's_^\[__' -e 's_\]$__' | sort)
do
echo "[$i] $profile";
profiles+=( "$profile" );
@nh13
nh13 / README.md
Created August 23, 2019 16:18
Mill issues