Skip to content

Instantly share code, notes, and snippets.

View danielecook's full-sized avatar
😀
Things are going good

Daniel E Cook danielecook

😀
Things are going good
View GitHub Profile
@danielecook
danielecook / realign_bam.sh
Created August 7, 2021 00:49
Remapping an aligned bam
# Source: https://lh3.github.io/2021/07/06/remapping-an-aligned-bam
samtools collate -Oun128 in.bam | samtools fastq -OT RG,BC - \
| bwa mem -pt8 -CH <(samtools view -H in.bam|grep ^@RG) ref.fa - \
| samtools sort -@4 -m4g -o out.bam -
@danielecook
danielecook / get_header_and_combine.sh
Last active June 17, 2021 14:03
get header from first file and drop it from other files #tsv
# get header from first file and drop it from other files
awk 'NR == FNR || FNR > 1' *.tsv > ${output_file}
# Add a column for filename and filename column
awk -v OFS='\t' 'NR == FNR { print \$0, 'filename' } FNR > 1 { print \$0, FILENAME }'
@danielecook
danielecook / fix_bam_index.sh
Created August 11, 2020 19:35
Fix missing BAM indices
# This quick script will check for big differences in a bams index and creation date and perform indexing.
for i in *.bam; do
bam=${i}
bai=${i}.bai
bam_date=$(stat -c %Y ${bam})
bai_date=$(stat -c %Y ${bai})
diff=`expr ${bam_date} - ${bai_date}`
if [ ${diff} -gt 100000 ]; then
echo "${bam} ${diff}";
sbatch --job-name=${bam} --partition=cpu --time=3:00:00 --cpus-per-task=32 --wrap "samtools index -@ 32 ${bam}"
@danielecook
danielecook / split_bed.sh
Created August 11, 2020 14:09
Split bedfile by chromosome and chunk size #bed
function split_bed() {
# This function will split a bed file by chromosome and chunk_size=1000
# In other words, split files will only possess one chromosome max.
# File sizes may be variable.
awk -v chunk_size=1000 'NR == 1 { chrom=$1; iter=0; fname_iter=0; print chrom }
{
if(chrom == $1 && iter <= chunk_size) {
print > sprintf("x%04d_%s.segment.txt", fname_iter, $1);
iter++;
} else {
@danielecook
danielecook / resolve_symbolic.sh
Created August 5, 2020 15:22
Resolve symbolic link #bash
resolve-symbolic-link() {
if [ -L $1 ]; then
temp="$(readlink "$1")";
rm -rf "$1";
cp -rf "$temp" "$1";
fi
}
@danielecook
danielecook / node.sh
Created July 2, 2020 20:23
ssh into node and retain the current directory #hpc
function node() {
# Usage:
# node ca015
# ssh into node and go to current directory
ssh -t ${1} "cd ${PWD}; bash"
}
@danielecook
danielecook / setup_script.sh
Created June 15, 2020 22:24
pipeline setup script
#!/usr/bin/bash
error() {
>&2 echo -e "\n\t$(tput setaf 1)${1}$(tput sgr0)\n"
exit 1
}
msg() {
>&2 echo -e "$(tput setaf 3)${1}$(tput sgr0)"
}
@danielecook
danielecook / vectorize.R
Created June 6, 2020 15:26
vectorize a string list; Useful for setting constants
# ITEMS <- vectorize("
# a
# b
# c
# ")
vectorize <- function(x) {
x <- strsplit(x, "\n")[[1]]
x[!is.na(x) & x != ""]
}
@danielecook
danielecook / parse_format.R
Last active May 12, 2020 16:28
Parse format column in R
parse_format_column <-
df[, .(rbindlist(
purrr::map2(
strsplit(vcf_format, ":", fixed=TRUE),
strsplit(vcf_gt, ":", fixed=TRUE),
function(x, y) { names(y) <- x; data.table(t(y)) }
)
)
),
]
@danielecook
danielecook / stop_containers.sh
Created May 7, 2020 18:50
Stop and remove running containers #docker
docker stop $(docker ps -a -q)
docker rm $(docker ps -a -q)