aaronsaunders/nix-cheat.sh

## nix-cheat.sh
# Unix shell

# run if zero exit
cd tmp/a/b/c && tar xvf ~/archive.tar  # untar if dir exists
# run if non-zero exit
cd tmp/a/b/c || mkdir -p tmp/a/b/c
cd tmp/a/b/c || mkdir -p tmp/a/b/c && tar xvf -C tmp/a/b/c ~/archive.tar

which
whereis


############################
# Filesystem

passwd    # change password

mkdir -p tmp/a/b/c
mkdir -p project/{lib/ext,bin,src,doc/{html,info,pdf},demo/stat/a}  # use curly brackets for list

some_cmd 2>&1 | more           #direct stderr output to stdout:
some_cmd > output.outerr 2>&1

cat -n /etc/logrotate.conf    # prepend line number to output

head file.txt             # default -n 10
head -n 20 file.txt       # specify number of lines

tail -n 100 filename.txt
tail -f file.log                # updates
tail -f file1.log -f file2.log

less file

# -N; show line numbers
# -g; highlight search
# b  previous page
# g  # start of file
# G  # end of file
# q  # quit less
# /string  #search in file
# n  # next match

grep string example.txt | less

cat
cat file1 file2

# Compression
gzip
tar cvf archive_name.tar dirname/    # Create a new tar archive.
tar tvf archive_name.tar    # View an existing tar archive.


gunzip <FILE>    # unnzip
tar xvf -C path/to/destination file.tar.gz    # extract tar file
tar -xfvz <FILE>                              # extract from zipped tarball

## encrypt file ##
openssl aes-256-cbc -salt -in financial-records-fy-2011-12.dbx   -out financial-records-fy-2011-12.dbx.aes
## decrypt file ##
openssl aes-256-cbc -d -in financial-records-fy-2011-12.dbx.aes -out financial-records-fy-2011-12.dbx

# env variable
export ORACLE_HOME=/u01/app/oracle/product/10.2.0
export /my/path:$PATH

###################
# Network

ssh -l username remotehost.com

# copying files between machines (file transfer)
scp user@remote-computer:/remote/path/remote-file /local/path/file

# curl
curl www.bio.aau.dk  # pipes html to stout
curl www.bio.aau.dk -o example.html

# wget
# source: http://bit.ly/HiIcL7

wget -O taglist.zip http://www.vim.org/download.php?src_id=7701  # rename long filename
wget -c http://www.web.org/repos/strx.tar.bz2       # resume
wget -b http://www.web.org/repos/strx.tar.bz2       # download in background; progress by: tail -f wget-log
wget -i url-list.txt


###################
# SEARCH

locate aaron           # find string in filenames and paths

find . -name '*.txt'   # find all files with extension as "*.txt" (files can exist in subdirectory)
find . -type d         # find all directories

find -iname "bigfile.fa" -exec md5sum {} \;   # Execute commands on files found by the find command

find . -name "*.txt" | sed "s/\.txt$//" | xargs -i echo mv {}.txt {}.bak | sh   # rename *.txt as *.bak
find . -name "*.txt" | xargs rm                                                 # delete all *.txt files in a directory
find . -name "*.fa"  | xargs tar -zcf .fa.tar.gz                                # package all *.fa files in a directory


grep "string" input   # best to enclose the string in quotes
# -i, case insensitive; -w, match words; -v, NOT match; -c, return count

grep -A 1 -i "^>" file.fasta    # Print the matched line, along with the 1 line after it.
grep -A 3 -i "^@" file.fastq    # Print the matched line, along with the 3 lines after it.

grep -v 'spam' bar > /dev/null  # filter out lines that match

################################
# Text manipulation

sort names.txt       # default: alpha sort
sort -n names.txt    # sort by number
sort -k names.txt    # defines the sort column, eg. -k2 for 2nd column.
sort -t '' names.txt # defines delimiter, eg. -t'|'
sort -r names.txt    # reverse

#cut extracts sections from each line of input — usually from a file.
#Extraction of segments can be done by characters (-c), or fields (-f) delimiter (-d — the tab character by default). A range must be provided (base 1).

cut -c 4-10 file       # outputs the 4th to 10th character of each line
cut -d "," -f 5- file  # output 5th to final field in lines
cut -d "\t" -f 1

# uniq is a filter that collapses adjacent identical lines to one. Often used after sort

sort file | uniq
sort file | uniq -c   # each line is preceded by a count of the number of times it occurred
sort file | uniq -u   # returns oly the uniq lines (not repeated)


#sed
# examples: http://bit.ly/1dWUTcO

sed -e 's/find/replace/g' infile > outfile
sed -f file.sed infile > outfile             # .sed file contains a list of commands

sed  '/spam/d' bar > /dev/null   # Delete line that match string
grep -v 'spam' bar > /dev/null   # faster


# Batch file extension rename
# change .htm files to .html
for file in *.htm ; do mv $file `echo $file | sed 's/\(.*\.\)htm/\1html/'` ; done

# awk
The AWK utility is a data extraction and reporting tool that uses a data-driven scripting language consisting of a set of actions to be taken against textual data (either in files or data streams) for the purpose of producing formatted reports. The language used by awk extensively uses the string datatype, associative arrays (that is, arrays indexed by key strings), and Regular expressions.
It is a mini perl.
http://en.wikipedia.org/wiki/AWK

Example
Download the list of completed and in progress genomes from NCBI

cat complete.txt incomplete.txt | cut -f 4 | cut -d " " -f 1,2 | sort | uniq -c | sort -r | head -50
cat complete.txt incomplete.txt | grep -v ‘Organism’| awk ‘BEGIN{FS=OFS=”\t”}{x[$3]++}END{for (i in x){print i,x[i]}}’
Find and replace from the command line
find .           \( -name \u201c*.php\u201d -or -name \u201c*.html\u201d \) | xargs grep -l \u2018ENTER STRING OR TEXT TO SEARCH FOR\u2019 | xargs sed -i -e \u2018s/ENTER OLD STRING OR TEXT TO REPLACE/ENTER REPLACEMENT STRING OR TEXT/g

## shell-cheat.sh
# quoting variables
~ $ ls tmp/
a b
~ $ VAR="tmp/*"
~ $ echo $VAR
tmp/a tmp/b
~ $ echo "$VAR"
tmp/*
~ $ echo $VARa

~ $ echo "$VARa"

~ $ echo "${VAR}a"
tmp/*a
~ $ echo ${VAR}a
tmp/a
~ $

# back slash long lines
cd tmp/a/b/c || \
mkdir -p tmp/a/b/c && \
tar xvf -C tmp/a/b/c ~/archive.tar
	# Unix shell

	# run if zero exit
	cd tmp/a/b/c && tar xvf ~/archive.tar # untar if dir exists
	# run if non-zero exit
	cd tmp/a/b/c \|\| mkdir -p tmp/a/b/c
	cd tmp/a/b/c \|\| mkdir -p tmp/a/b/c && tar xvf -C tmp/a/b/c ~/archive.tar

	which
	whereis


	############################
	# Filesystem

	passwd # change password

	mkdir -p tmp/a/b/c
	mkdir -p project/{lib/ext,bin,src,doc/{html,info,pdf},demo/stat/a} # use curly brackets for list

	some_cmd 2>&1 \| more #direct stderr output to stdout:
	some_cmd > output.outerr 2>&1

	cat -n /etc/logrotate.conf # prepend line number to output

	head file.txt # default -n 10
	head -n 20 file.txt # specify number of lines

	tail -n 100 filename.txt
	tail -f file.log # updates
	tail -f file1.log -f file2.log

	less file

	# -N; show line numbers
	# -g; highlight search
	# b previous page
	# g # start of file
	# G # end of file
	# q # quit less
	# /string #search in file
	# n # next match

	grep string example.txt \| less

	cat
	cat file1 file2

	# Compression
	gzip
	tar cvf archive_name.tar dirname/ # Create a new tar archive.
	tar tvf archive_name.tar # View an existing tar archive.


	gunzip <FILE> # unnzip
	tar xvf -C path/to/destination file.tar.gz # extract tar file
	tar -xfvz <FILE> # extract from zipped tarball

	## encrypt file ##
	openssl aes-256-cbc -salt -in financial-records-fy-2011-12.dbx -out financial-records-fy-2011-12.dbx.aes
	## decrypt file ##
	openssl aes-256-cbc -d -in financial-records-fy-2011-12.dbx.aes -out financial-records-fy-2011-12.dbx

	# env variable
	export ORACLE_HOME=/u01/app/oracle/product/10.2.0
	export /my/path:$PATH

	###################
	# Network

	ssh -l username remotehost.com

	# copying files between machines (file transfer)
	scp user@remote-computer:/remote/path/remote-file /local/path/file

	# curl
	curl www.bio.aau.dk # pipes html to stout
	curl www.bio.aau.dk -o example.html

	# wget
	# source: http://bit.ly/HiIcL7

	wget -O taglist.zip http://www.vim.org/download.php?src_id=7701 # rename long filename
	wget -c http://www.web.org/repos/strx.tar.bz2 # resume
	wget -b http://www.web.org/repos/strx.tar.bz2 # download in background; progress by: tail -f wget-log
	wget -i url-list.txt


	###################
	# SEARCH

	locate aaron # find string in filenames and paths

	find . -name '.txt' # find all files with extension as ".txt" (files can exist in subdirectory)
	find . -type d # find all directories

	find -iname "bigfile.fa" -exec md5sum {} \; # Execute commands on files found by the find command

	find . -name ".txt" \| sed "s/\.txt$//" \| xargs -i echo mv {}.txt {}.bak \| sh # rename .txt as *.bak
	find . -name ".txt" \| xargs rm # delete all .txt files in a directory
	find . -name ".fa" \| xargs tar -zcf .fa.tar.gz # package all .fa files in a directory


	grep "string" input # best to enclose the string in quotes
	# -i, case insensitive; -w, match words; -v, NOT match; -c, return count

	grep -A 1 -i "^>" file.fasta # Print the matched line, along with the 1 line after it.
	grep -A 3 -i "^@" file.fastq # Print the matched line, along with the 3 lines after it.

	grep -v 'spam' bar > /dev/null # filter out lines that match

	################################
	# Text manipulation

	sort names.txt # default: alpha sort
	sort -n names.txt # sort by number
	sort -k names.txt # defines the sort column, eg. -k2 for 2nd column.
	sort -t '' names.txt # defines delimiter, eg. -t'\|'
	sort -r names.txt # reverse

	#cut extracts sections from each line of input — usually from a file.
	#Extraction of segments can be done by characters (-c), or fields (-f) delimiter (-d — the tab character by default). A range must be provided (base 1).

	cut -c 4-10 file # outputs the 4th to 10th character of each line
	cut -d "," -f 5- file # output 5th to final field in lines
	cut -d "\t" -f 1

	# uniq is a filter that collapses adjacent identical lines to one. Often used after sort

	sort file \| uniq
	sort file \| uniq -c # each line is preceded by a count of the number of times it occurred
	sort file \| uniq -u # returns oly the uniq lines (not repeated)


	#sed
	# examples: http://bit.ly/1dWUTcO

	sed -e 's/find/replace/g' infile > outfile
	sed -f file.sed infile > outfile # .sed file contains a list of commands

	sed '/spam/d' bar > /dev/null # Delete line that match string
	grep -v 'spam' bar > /dev/null # faster



	# Batch file extension rename
	# change .htm files to .html
	for file in .htm ; do mv $file `echo $file \| sed 's/\(.\.\)htm/\1html/'` ; done

	# awk
	The AWK utility is a data extraction and reporting tool that uses a data-driven scripting language consisting of a set of actions to be taken against textual data (either in files or data streams) for the purpose of producing formatted reports. The language used by awk extensively uses the string datatype, associative arrays (that is, arrays indexed by key strings), and Regular expressions.
	It is a mini perl.
	http://en.wikipedia.org/wiki/AWK

	Example
	Download the list of completed and in progress genomes from NCBI

	cat complete.txt incomplete.txt \| cut -f 4 \| cut -d " " -f 1,2 \| sort \| uniq -c \| sort -r \| head -50
	cat complete.txt incomplete.txt \| grep -v ‘Organism’\| awk ‘BEGIN{FS=OFS=”\t”}{x[$3]++}END{for (i in x){print i,x[i]}}’
	Find and replace from the command line
	find . \( -name \u201c.php\u201d -or -name \u201c.html\u201d \) \| xargs grep -l \u2018ENTER STRING OR TEXT TO SEARCH FOR\u2019 \| xargs sed -i -e \u2018s/ENTER OLD STRING OR TEXT TO REPLACE/ENTER REPLACEMENT STRING OR TEXT/g
	# quoting variables
	~ $ ls tmp/
	a b
	~ $ VAR="tmp/*"
	~ $ echo $VAR
	tmp/a tmp/b
	~ $ echo "$VAR"
	tmp/*
	~ $ echo $VARa

	~ $ echo "$VARa"

	~ $ echo "${VAR}a"
	tmp/*a
	~ $ echo ${VAR}a
	tmp/a
	~ $

	# back slash long lines
	cd tmp/a/b/c \|\| \
	mkdir -p tmp/a/b/c && \
	tar xvf -C tmp/a/b/c ~/archive.tar