Last active June 10, 2023 00:05
Bash basics, so I don't have to look them up all the time.
### Conditionals
if [[ -d "${DIRECTORY}" ]]; then ... fi # Returnes true if the directory exists
if [[ ! -d "${DIRECTORY}" ]]; then ... fi # Returnes true if the directory does not exists
if [[ ! -e "${file}" ]]; then ... fi # Returnes true if the file/directory does not exists
if [[ -z ${variable} ]]; then ... fi # Returns true if variable is not set.
if [[ $? -ne 0 ]]; then .. fi # Returns true if the previous command has failed.
# Create directory if not already exists:
[ -d foo ] || mkdir foo
mkdir -p foo
# Or a more sophisticated version:
if [[ ! -e $dir ]]; then
mkdir $dir
elif [[ ! -d $dir ]]; then
echo "$dir already exists but is not a directory" 1>&2
# Arithmetic conditions:
if (( $var % 4 == 0 )); then echo "The number is dividable by four."; fi
# Chromosome names are usually read as jobindices. But it has to be fixed, as the 23rd chromosome is referred as X
if [[ $chr == 23 ]]; then chr="X"; fi;
# Checking if variable is an integer:
if [[ $window =~ ^-?[0-9]+$ ]]; then echo "Variable is integer!"; fi
### For/while loops
for cohort in `cat ${workingDir}/../../cohort.list.txt` ; do # looping through a list read from a file (words separated by whitespace)
for i in $(seq 1 12); do # looping though a list of numbers defined by a sequence
for i in {1..22}; do # looping though a list of numbers defined by a sequence
# Reading a file line by line:
while read p; do
echo $p
done < file
# The same as above, but more intuitive:
cat file | while read line; do
echo $line
# Looping through the indices of arrays:
for i in "${!foo[@]}"; do
# Loop control:
break # Breaks the execution of a loop (no more rounds).
continue # Continues to the next element of the loop
### Arrays
declare -a arrayname=(element1 element2 element3)
echo ${#arrayname[@]} #Number of elements in the array
echo ${#arrayname} #Number of characters in the first element of the array
echo ${#arrayname[3]} # length of the element located at index 3
echo ${arrayname[@]:3:2} # Extracting certain elements of the array
# Splitting sting to get an array:
array=(${string//;/ })
# Splitting a string and access the resulting array:
arrIN=(${IN//;/ })
# Using associative arrays in Bash:
declare -A aa # -A shows it will be an associative array
# assigning key/value pairs of an associative array:
aa=([hello]=world [ab]=cd)
# Retrieving element of an associative array:
if [[ ${aa[hello]} ]]; then
echo "equal"
# Iterating over an associative array in bash
for i in "${!aa[@]}"; do
echo "key : $i"
echo "value: ${aa[$i]}"
### Simple variables
# put command output into a variable:
var=$(ls -la)
# Exporting shell variable into awk:
# Substituting string in variable:
mv ${i} ${i/cica/kutya}
cica="cirmos cica hajj, hova lett a vaj"
echo ${cica/ /_} # one replace
echo ${cica// /_} # Global replace
### Tips & tricks
# Finding lines in file, that do not contain the pattern
cat <file> | grep -v -E "pattern"
# Creating small text file with cat:
cat > new_file Do typing, and once you are ready hit ctrl-d
# Get the first 20k entries from a vcf file plus the header indicated by # tags:
zcat file.vcf.gz | perl -lane ‘if( $_ =~ /#/){print $_ } else {$a++; print $_; die if $a == 20000;}’ | bgzip > new.vcf.gz
# find only non unique lines in file:
cat <file> | sort | uniq -d
# get the count of unique lines in file
cat <file> | sort | uniq -c
# Get a sorted list of unique items in a list:
# Source:
sort -u # The -u switch makes the output unique
sort -n # Numerical sort
sort -r <file> # Sort file in a reverse order, can be used with other commands.
sort <file1> <file2> # Sorting multiple files together
sort -nu <file1> <file2> # Sorting two files numerically, removing duplicates
sort -t"," file # Sorting by multiple fields, definition of the delimiter
sort -t"," -km,n # Sorting file by multiple columns, starting from column m to n
sort -t"," -k2,2 # Sorting file by second column
sort -t"," -k
# Print out line only if the exported pattern is matched.
more file | awk -v ref="$var" 'match($0, ref) {print $0}'
awk -v ref1="$var1" -v ref2="$var2" # Exporting more variables into awk
# Get current date:
date "+%Y.%m.%d"
# if you want to echo tab separated characters in bash:
echo -e "cica\tkutyus"
# redirecting messages to stderr:
echo "This message will be printed to the standard error." 1>&2
# printing to stdout without tailing newline:
echo -n "Some text "
echo "this text will be in the same line!"
# Using watch for commands with pipes:
watch -n 1 'bjobs | wc -l'
# if you want to print to the screen a table, and want to the columns to be aligned, then use:
column –t
# Piping arguments into a command:
echo "url" | xargs curl
# Creating archive with tar:
tar cf archive_name.tar dirname/
tar czf archive_name.tar.gz dirname/
# Moving files at the same time:
tar czvf archive_name.tar.gz dirname/ | xargs rm -fr
# Extracting a single file from a tar archive:
tar xvf test.tar -C anotherDirectory/ testfile1
# Get list of files of the archive:
tar tf test.tar
# joining together gzipped files:
# -a # - keep unmatched rows from this file.
# -1 # - join the first file by this column.
# -e <str> - sting for missing values.
# -o order of output column.
join <(zcat file1.gz) <(zcat file2.gz) -1 1 -2 1 -a1 -e "NA" -o '1.1 1.2 1.3 2.2'
# Some special joins:
paste -d" " <(zcat ${inputDir}/${legendFile} | cut -d" " -f1-4 | tail -n +2) <(zcat ${inputDir}/${hapFile}) | gzip > ${temp}/${outfile}
# -d" " - We specify the the character we use to join the lines
# <() - Files are piped into the paste process
# zcat File1.gz - The first file is gzipped, so as we join files, we have to unzip as well
# | cut -d" " -f1-4 - We define space as column separator, then we cut the first four columns
# | tail -n +2 - We get rid of the first row.
# Pattern for grep can be piped in:
cut -f2 /tmp/chr22_1_temp | grep -f - <(zcat /lustre/scratch113/projects/helic/Reference_panel/uk10k_legend_files/chr22.legend.gz)
# We pipe into the second column of a file to grep as a serch pattern.
# We search for these pattern in a file, which have to be zcat
# Grepping multiple patterns:
grep -E 'foo|bar' *.txt
egrep 'foo|bar' *txt # egrep is equal to grep -e
# Checking file size:
size=$(stat -c '%s' /uk10k_vs_helic/chr${chr}_chunk${chunk}_counts.csv )
if [ ! ${size} -ge 50 ]; then fi
# shuffle lines in files:
cat file | shuf > file_shuffled
# chmod switches:
1 = ..x
2 = .w.
4 = r..
3 = .wx
5 = r.x
6 = rw.
7 = rwx
# Recursively change permission on folders:
chmod -R g+w folder
chmod -R o-x folder
# Killing process running in the background:
ps -aux | grep script_name
# Submitting mathematical expression into the Bash shell:
cica=1231422; kutya=2345
echo `expr ${cica} - ${kutya}`
echo $(( $cica - $kutya ))
# Keep stuff downloading:
wget -c --tries=0 --read-timeout=20 -o ${path_to_file}/filename ${URL}
# Modification of the files by a chain of commands in place:
cat file | awk '$4 > 12' | sponge file
# Substitute string in file using sed:
sed -i 's@{pattern1}@'"{pattern2}"'@' <file>
sed -i -e 's/pattern1/pattern2/g' <file>
sed -f script.file.sed <targetfile>
# Compressing a whole folder using gzip:
tar -zcvf archive-name.tar.gz directory-name # Creating archive
tar -zxvf archive-name.tar.gz # Extracting archive
# Multiple echoes into the same line:
echo -n "cica"
echo "ful"
# Gemma does not read files where the phenotype is missing. The values in the 6th column cannot be -9
# So it has to be replaced:
awk '$6 = 1' ${path}/${prefix}.fam | sponge ${path}/${prefix}.fam
# Nice top and loop hack:
tmuxes=$(ps -e | grep tmux | cut -d" " -f1) # list of PIDs for all running tmux sessions.
top $(for id in $tmuxes ; do echo -p $id; done) # top only these PIDs.
# Visualize hidden stuffs:
type ll # show the alias of the command ll
type mkcdir # echo the content of the mkcdir function
# Join two files by matching columns:
join -j1 -a 1 -e NA -o 1.1 1.2 2.1 <(sort -k1 file1 ) <(sort -k1 file2)
# -j join by this column, -o print these colums, -a print unpaired from this file. -e replace empty fields with this string.
# Very fast grep from files
LC_ALL=C grep pattern file
### awk
'BEGIN {total=0}{
print itemno," ", book,"\t","$"bookamount;
} END { print "Total Amount = $"total }'
# awk special variables:
# NR - line number
# NF - number of fields
# IFS - input field separator
# OFS - output field separator
# $NF - last column
# $(NF - 1) - column second from the last
awk '{print $(NF -1), $NF }' # printing out the last two columns
### Farm stuffs:
# Killing all bsubbed jobs on farm:
bjobs | -d" " -f1 | sort -u | xargs -n1 bkill
# Accessing jobindex within script:
# More information on the available queues:
bqueues -l normal
# Submit script to bsub (also selecting queue):
bsub -G helic \
-J "Hvs1kg_${chr}_${chunk}" -M8000 -R'select[mem>8000] rusage[mem=8000]' \
-e ${logdir}/Hvs1kg_${chr}_${chunk}.error \
-o ${logdir}/Hvs1kg_${chr}_${chunk}.log \
-q normal \
perl ${outdir}/ ${chr} ${chunk}
# Submitting job-array:
-J "Jobname_[1-22]" # the job will be submitted for all chromosomes
-e "Jobname_chr%I.errro" # Referring to jobindex
# Efficiently submitting jobs to the farm:
for i in {1..22}; do echo tabix -f -p vcf chr$i.snps.vcf.gz; done | ~ag15/array 1g index
# Switching queues:
bswitch ${queue} ${job_ID}
# Compressing bjobs:
bjobs -A
### bash scripts
# importing command line parameter within a script:
# bash handling command line options:
while getopts "h?vf:" opt; do
case "$opt" in
# Writing functions in bash:
function cica { echo "parameter passed to function: " $1 }
cica "pocok"
# Get the path of the script is located:
scriptDir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# using references in bash:
eval echo \$${file}
# parameter processing:
$0 - first positional parameter. Name of the script.
$1..$9 - argument list
${10}..${N} - argument list above 9
$* - all positional parameter except $0
$@ - all positional parameter except $0
$# - the number of positional parameters excluding $0
# Removing elements from the argument list:
# vim commands:
:w filename - Current document will be saved as filename
:u - undo last command
# A small function to set up an ssh tunnel (for ipython notebooks)
function python_tunel { ssh -Y -Y -N -f -L localhost:${1}:localhost:${1} ds26@${2}; }
# PS1 stuffs:
\h - host name to the first dot
\H - full host name
\A - current time in hh:mm format
\u - user name
\w - full path of the current working directory
\W - basename of the current working directory
\# - command number of this command
\e[1;32m - color starts
\e[m - color ends
\e[1;31m- light red
\e[1;32m - light green
\e[1;34m - light blue
# Example PS1:
export PS1="\[\e[1;34m\]\A \u@\h\[\e[m\]:\[\e[1;32m\]\W$\[\e[m\] "
# Using a env to get bash or python location:
#!/usr/bin/env bash
#!/usr/bin/env python
# Piping output into an other command as a command line parameter:
cat signals.tsv | perl -lane 'if ($F[0] =~ /rs/){print $F[0]} else {printf "%s_%s/%s\n", $F[3],$F[4],$F[5]}' | xargs -n1 -I % /nfs/team144/ds26/FunctionalAnnotation/v2.2/ -i %
# monitoring the progression of a command output. With pipes (surrounded by single quotes):
watch -n2 'bjobs | grep tabix'
# Bash find command useful arguments:
find . -regex ''
find . -type d # Find directories
find . -type f # Find files.
find . -type f -name *.sh # Find only files with sh extension.
find . -type f -newermt 2007-06-07 ! -newermt 2007-06-08 # finding files between specific dates
find . -type f -name *.sh -exec grep -n -H HELIC5102819 {} # findin files and execute command.
# password-less ssh login:
ssh-keygen # generating public/private keys on local host
ssh-copy-id -i ~/.ssh/ remote-host # copy public key to remote host
# Important grep parameters:
grep -E # Use extended regular expression.
grep -P # Use Perl like regular expression.
grep -x # Pattern match the whole line
grep -o # Output matching string (potentially more words per line)
grep -v # Reverse pattern
grep -w # Match word
grep -c # Count matches
grep -i # Ignore case.
grep -f # Pattern read from file
grep -H # Always print file name in front of the line.
grep -m 1 # Stop after # matches.
grep -n # Output line number in which the pattern was found.
grep -h # Surprass file name from output.
grep -A 3 # print 3 lines after the match
grep -B 3 # print 3 lines before the match
# Lower/upper casing in bash:
echo "A string to change case" | tr "[:lower:]" "[:upper:]"
string="A string to change case"
echo ${string^^}
echo ${string,,}
# git commands:
git clone # Cloning github repository
# Tabix parameters:
# From the absolute path of a file get file name and folder:
basename "${file}" #
dirname "${file}" # /nfs/team144/ds26/tools/
# Extracting specific file from tar achive:
tar -xf etc.tar etc/apt/sources.list
# Referencing variables:
eval ls -al \$$ref
# Find all files in a folder owned by me and add write permission for the group:
find . -user ds26 | xargs -n1 -I % chmod g+w %
# Default variable value:
echo ${varName:-DefaultValue}
# Cases in bash:
case $var1 in
"cica" ) var2="cica" ;;
"kutya" ) var2="kutya" ;;
* ) var2="other";;
# check if a string has a substring:
if [[ $string == *substring* ]]; then echo "Substring has been found!"; fi
if [[ $string =~ .*substring.* ]]; then echo "Substring has been found!"; fi
# Stream redirection:
>&2 # Stdout to stderr
2>&1 # Stderr to stdout
# Repeating a command until it returns successfully:
while [ $? -ne 0 ]; do !!; done
