Skip to content

Instantly share code, notes, and snippets.

Avatar
😀
Things are going good

Daniel E Cook danielecook

😀
Things are going good
View GitHub Profile
@danielecook
danielecook / realign_bam.sh
Created Aug 7, 2021
Remapping an aligned bam
View realign_bam.sh
# Source: https://lh3.github.io/2021/07/06/remapping-an-aligned-bam
samtools collate -Oun128 in.bam | samtools fastq -OT RG,BC - \
| bwa mem -pt8 -CH <(samtools view -H in.bam|grep ^@RG) ref.fa - \
| samtools sort -@4 -m4g -o out.bam -
@danielecook
danielecook / get_header_and_combine.sh
Last active Jun 17, 2021
get header from first file and drop it from other files #tsv
View get_header_and_combine.sh
# get header from first file and drop it from other files
awk 'NR == FNR || FNR > 1' *.tsv > ${output_file}
# Add a column for filename and filename column
awk -v OFS='\t' 'NR == FNR { print \$0, 'filename' } FNR > 1 { print \$0, FILENAME }'
@danielecook
danielecook / fix_bam_index.sh
Created Aug 11, 2020
Fix missing BAM indices
View fix_bam_index.sh
# This quick script will check for big differences in a bams index and creation date and perform indexing.
for i in *.bam; do
bam=${i}
bai=${i}.bai
bam_date=$(stat -c %Y ${bam})
bai_date=$(stat -c %Y ${bai})
diff=`expr ${bam_date} - ${bai_date}`
if [ ${diff} -gt 100000 ]; then
echo "${bam} ${diff}";
sbatch --job-name=${bam} --partition=cpu --time=3:00:00 --cpus-per-task=32 --wrap "samtools index -@ 32 ${bam}"
@danielecook
danielecook / split_bed.sh
Created Aug 11, 2020
Split bedfile by chromosome and chunk size #bed
View split_bed.sh
function split_bed() {
# This function will split a bed file by chromosome and chunk_size=1000
# In other words, split files will only possess one chromosome max.
# File sizes may be variable.
awk -v chunk_size=1000 'NR == 1 { chrom=$1; iter=0; fname_iter=0; print chrom }
{
if(chrom == $1 && iter <= chunk_size) {
print > sprintf("x%04d_%s.segment.txt", fname_iter, $1);
iter++;
} else {
@danielecook
danielecook / resolve_symbolic.sh
Created Aug 5, 2020
Resolve symbolic link #bash
View resolve_symbolic.sh
resolve-symbolic-link() {
if [ -L $1 ]; then
temp="$(readlink "$1")";
rm -rf "$1";
cp -rf "$temp" "$1";
fi
}
@danielecook
danielecook / node.sh
Created Jul 2, 2020
ssh into node and retain the current directory #hpc
View node.sh
function node() {
# Usage:
# node ca015
# ssh into node and go to current directory
ssh -t ${1} "cd ${PWD}; bash"
}
@danielecook
danielecook / setup_script.sh
Created Jun 15, 2020
pipeline setup script
View setup_script.sh
#!/usr/bin/bash
error() {
>&2 echo -e "\n\t$(tput setaf 1)${1}$(tput sgr0)\n"
exit 1
}
msg() {
>&2 echo -e "$(tput setaf 3)${1}$(tput sgr0)"
}
@danielecook
danielecook / vectorize.R
Created Jun 6, 2020
vectorize a string list; Useful for setting constants
View vectorize.R
# ITEMS <- vectorize("
# a
# b
# c
# ")
vectorize <- function(x) {
x <- strsplit(x, "\n")[[1]]
x[!is.na(x) & x != ""]
}
@danielecook
danielecook / parse_format.R
Last active May 12, 2020
Parse format column in R
View parse_format.R
parse_format_column <-
df[, .(rbindlist(
purrr::map2(
strsplit(vcf_format, ":", fixed=TRUE),
strsplit(vcf_gt, ":", fixed=TRUE),
function(x, y) { names(y) <- x; data.table(t(y)) }
)
)
),
]
@danielecook
danielecook / stop_containers.sh
Created May 7, 2020
Stop and remove running containers #docker
View stop_containers.sh
docker stop $(docker ps -a -q)
docker rm $(docker ps -a -q)