Skip to content

Instantly share code, notes, and snippets.

@chrisamiller
Created October 30, 2019 17:51
Show Gist options
  • Save chrisamiller/ec6904c7fac2d91352685f57324a167e to your computer and use it in GitHub Desktop.
Save chrisamiller/ec6904c7fac2d91352685f57324a167e to your computer and use it in GitHub Desktop.
notes from the "bash tips and tricks" workshop
#notes from the "bash tips and tricks" workshop
#there are two paths coming out of every command - stdout and stderr
#by default both write to the screen
date #output to stdout
date --asdf #error to stderr
#you can redirect stdout to a file
date >file.txt
#you can also redirect stderr to a file
date --asdf 2>err.log
#you can get fancy and send both to the same file
date 2>&1
#use wc -l to count the number of lines in a file
wc -l genes1
#use sort to reorder a file
sort genes1
#set operations
#find shared genes
cat genes1 genes2 | sort | uniq -d
#find genes1 specific
cat genes1 genes2 genes2 | sort | uniq -u
#find genes2 specific
cat genes1 genes1 genes2 | sort | uniq -u
#get a certain column from a file
cut -f 10 tcga.aml.tsv | head
#find the most frequent items in a certain column
cut -f 10 tcga.aml.tsv | sort | uniq -c | sort -nk 1 | head
#find lines in one file that match genes from another
grep -wf genes1 tcga.aml.tsv | less
#what if we want to find lines in genes1 that match the genes column in our complicated file?
# Nested expressions! <(command) treats the output of that command as a file
grep -wf <(cut -f 10 tcga.aml.tsv | sort | uniq) genes1
#we can also use the output of a command just as a text string or variable:
for i in $(cut -f 10 tcga.aml.tsv | sort | uniq);do grep $i genes2;done
#loop over known values
for i in 1 2 3 4;do echo $i;done
#loop over values from a file
grep IDH genes1 | while read i;do echo $i;done
#do something more complicated in the loop
grep IDH genes1 | while read i;do echo "my favorite gene is $i";done
#nested loops
for num in 1 2 3 4;do cat genes | while read gene;do echo "$gene $num";done;done
#xargs can also be used to split things out into separate commands, individually or in chunks
cat genes1 | xargs -n 1 echo
cat genes1 | xargs -n 5 echo
#grab multiple columns from a file
cut -f 1,2 tcga.aml.tsv | head
#this doesn't change the order of the columns
cut -f 2,1 tcga.aml.tsv | head
#but awk can help!
awk '{print $2,$1}' tcga.aml.tsv | head
#some extra parameters (via an alias) can be used to keep things tab-delimited
alias tawk='awk -F"\t" -v OFS="\t"'
awk '{print $2,$1}' tcga.aml.tsv | head
#use CTRL-R to search backwards through your history, or type something like `history | less`
#pushd and popd can be used to navigate through directories saving your place
pushd /gscuser/cmiller
popd
#let's change a gzipped VCF file from having "chr" prefixes to having no prefixes using only one line of code:
cat <(gunzip -c asdf.vcf.gz | grep "^#") <(gunzip -c asdf.vcf.gz | grep -v "^#" | sed 's/^chr//') | gzip >asdf.fixed.vcf.gz
#get the full path to your current directory
pwd -P
#get the full path to a particular file
readlink -f genes1
#in ~cmiller/.bashrc and my ~cmiller/usr/bin there are lots of other useful commands. "header", "binfo", "bwait", "urlify", etc. Do some exploring!
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment