Created
October 30, 2019 17:51
-
-
Save chrisamiller/ec6904c7fac2d91352685f57324a167e to your computer and use it in GitHub Desktop.
notes from the "bash tips and tricks" workshop
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#notes from the "bash tips and tricks" workshop | |
#there are two paths coming out of every command - stdout and stderr | |
#by default both write to the screen | |
date #output to stdout | |
date --asdf #error to stderr | |
#you can redirect stdout to a file | |
date >file.txt | |
#you can also redirect stderr to a file | |
date --asdf 2>err.log | |
#you can get fancy and send both to the same file | |
date 2>&1 | |
#use wc -l to count the number of lines in a file | |
wc -l genes1 | |
#use sort to reorder a file | |
sort genes1 | |
#set operations | |
#find shared genes | |
cat genes1 genes2 | sort | uniq -d | |
#find genes1 specific | |
cat genes1 genes2 genes2 | sort | uniq -u | |
#find genes2 specific | |
cat genes1 genes1 genes2 | sort | uniq -u | |
#get a certain column from a file | |
cut -f 10 tcga.aml.tsv | head | |
#find the most frequent items in a certain column | |
cut -f 10 tcga.aml.tsv | sort | uniq -c | sort -nk 1 | head | |
#find lines in one file that match genes from another | |
grep -wf genes1 tcga.aml.tsv | less | |
#what if we want to find lines in genes1 that match the genes column in our complicated file? | |
# Nested expressions! <(command) treats the output of that command as a file | |
grep -wf <(cut -f 10 tcga.aml.tsv | sort | uniq) genes1 | |
#we can also use the output of a command just as a text string or variable: | |
for i in $(cut -f 10 tcga.aml.tsv | sort | uniq);do grep $i genes2;done | |
#loop over known values | |
for i in 1 2 3 4;do echo $i;done | |
#loop over values from a file | |
grep IDH genes1 | while read i;do echo $i;done | |
#do something more complicated in the loop | |
grep IDH genes1 | while read i;do echo "my favorite gene is $i";done | |
#nested loops | |
for num in 1 2 3 4;do cat genes | while read gene;do echo "$gene $num";done;done | |
#xargs can also be used to split things out into separate commands, individually or in chunks | |
cat genes1 | xargs -n 1 echo | |
cat genes1 | xargs -n 5 echo | |
#grab multiple columns from a file | |
cut -f 1,2 tcga.aml.tsv | head | |
#this doesn't change the order of the columns | |
cut -f 2,1 tcga.aml.tsv | head | |
#but awk can help! | |
awk '{print $2,$1}' tcga.aml.tsv | head | |
#some extra parameters (via an alias) can be used to keep things tab-delimited | |
alias tawk='awk -F"\t" -v OFS="\t"' | |
awk '{print $2,$1}' tcga.aml.tsv | head | |
#use CTRL-R to search backwards through your history, or type something like `history | less` | |
#pushd and popd can be used to navigate through directories saving your place | |
pushd /gscuser/cmiller | |
popd | |
#let's change a gzipped VCF file from having "chr" prefixes to having no prefixes using only one line of code: | |
cat <(gunzip -c asdf.vcf.gz | grep "^#") <(gunzip -c asdf.vcf.gz | grep -v "^#" | sed 's/^chr//') | gzip >asdf.fixed.vcf.gz | |
#get the full path to your current directory | |
pwd -P | |
#get the full path to a particular file | |
readlink -f genes1 | |
#in ~cmiller/.bashrc and my ~cmiller/usr/bin there are lots of other useful commands. "header", "binfo", "bwait", "urlify", etc. Do some exploring! | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment