opplatek /
Last active May 6, 2024 12:55
Subset a BAM file by a number or fraction of mappings or reads
# Subsample BAM to $SUBSAMPLE_MAPPINGS mappings (approximately)
# TODO: Make subsampling by number of reads, not mappings (a single read can be mapped multiple times so the final number
# or "lines" will be different than the $SUBSAMPLE_READS)
# check out
$ diff /etc/skel/.bashrc ~/.bashrc # to apply the diff run $ patch bashrcToBeChanged bashrc_diff
> ## Add git branch name to prompt
> parse_git_branch() {
> git branch 2> /dev/null | sed -e '/^[^*]/d' -e 's/* \(.*\)/ (\1)/'
> }
> #export PS1="\u@\h \[\033[32m\]\w\[\033[33m\]\$(parse_git_branch)\[\033[00m\] $ "
< PS1='${debian_chroot:+($debian_chroot)}\[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ '
alias 'h'='head'
alias haed="head" # My most common typo
alias 'gitl'='git log -p'
alias 'p'='pwd'
alias 'dockeri'='docker images'
alias 'cleanswap'='sudo swapoff -a; sudo swapon -a'
alias 'python3.8unit'='python3.8 -m unittest'
function gitreset() {
local git_branch_name=$(git rev-parse --abbrev-ref HEAD)
opplatek /
Created September 28, 2023 08:22
Fill (prefix zeros) number of digits to predefined number
# Easier
echo $(printf "%0${TARGET_DIGITS}d" ${DIGIT})
# More complicated
opplatek /
Created September 28, 2023 08:18
Length of string in Bash
echo ${#VAR}
opplatek /
Created August 9, 2023 13:01
Parse GitLab API raw JSON for projects/groups/subgroups, filter and extract a specific field
#!/usr/bin/env python3
# Read GitLab raw JSON and extract ssh url
import sys
import json
def filter_dicts(list_of_dicts, value_to_check):
filtered_list = [item for item in list_of_dicts if not item.get(value_to_check, True)]
opplatek / read_file_list.R
Created August 3, 2023 08:17
Read a list of files (TSV) and merge them to one data.frame
# Read a list of files (TSV) and merge them to one data.frame
read_tsv <- function(x){
df.file <- read.table(file = x, header = T, stringsAsFactors = F, sep = "\t")
files <- c("a.tsv", "b.tsv", "c.tsv")
opplatek /
Last active August 3, 2023 08:17
Calculate median and mean in AWK from a specific column
# Calculate median and mean with AWK from a specific column
COL_NUMBER=1 # column number with values to calculate median/mean from
MEAN=$(echo -e "1\n10\n5\n4\n3" | awk -v N=$COL_NUMBER '{ sum += $N } END { if (NR > 0) print sum / NR }')
MEDIAN=$(echo -e "1\n10\n5\n4\n3" | cut -f${COL_NUMBER} | sort -n | awk ' { a[i++]=$1; } END { x=int((i+1)/2); if (x < (i+1)/2) print (a[x-1]+a[x])/2; else print a[x-1]; }')
opplatek / extract_subcolumn_info_awk
Last active July 19, 2023 13:46
Extract column by name from another column split by separator using AWK
# We have a file that has a column with multiple additional "subcolumns" separated by the ; character
# We want to extract a specific subcolumn from the "merged" column that starts with a specific word
# Get 1000 Genomes project VCF
# This VCF has a INFO column (column no. 8) that stores additonal information
zcat ALL.wgs.mergedSV.v8.20130502.svs.genotypes.GRCh38.vcf.gz | grep -v "^#"| cut -f8 | head
# Convert apply() result to data.frame
# Sample data frame
df <- data.frame(A = c(1, 2, 3),
B = c(4, 5, 6))
# Sample function
calculate_sum <- function(row) {