Sum over a column in a csv:
cat file.csv| cut -d "," -f 2 | paste -sd+ - | bc
Pretty view a csv in less:
cat file.csv | column -t -s, | less -S
Syntax highlighting outside of bash profile:
export PS1="\[\033[36m\]\u\[\033[m\]@\[\033[32m\]\h:\[\033[33;1m\]\w\[\033[m\]\$ "
export CLICOLOR=1
export LSCOLORS=ExFxBxDxCxegedabagacad
alias ls='ls --color=auto'
Average length of a contig in a multi fasta
awk '{/>/&&++a||b+=length()}END{print b/a}' file.fa
Export environment from history:
conda env export --from-history
rsync from UC Davis Genome Center
rsync -avL slimsdata.genomecenter.ucdavis.edu::slims/random_string .
set permissions for farm files:
find ~/* -type d -exec chmod a+x {} \;
find ~/* -type f -exec chmod a+r {} \;
umask to set permissions to all newly created files
umask 0022
print directories that do not contain a certain file
for d in *_k31_r1_search_oh0/
do
[ -f "$d"ERS235531_43.fna.gz.cdbg_ids.reads.fa.gz ] || printf '%s\n' "$d"
done
split a multifasta into single fastas
cat file.fa | awk '{
if (substr($0, 1, 1)==">") {filename=(substr($0,2) "suffix.fa")}
print $0 > filename
}'
Remove all text after first space in a fasta header
awk '{print $1;next}1' file.fa
Make all fasta headers unique by appending _x
to duplicated header
awk '(/^>/ && s[$0]++){$0=$0"_"s[$0]}1;' file.fasta
Prepend filename to all fasta headers, and separate from original header with --
awk '/>/{sub(">","&"FILENAME"--");sub(/\.faa/,x)}1' sample1.faa
remove FASTA sequence lines from a GFF file (e.g. like those included by prokka or bakta)
sed '/^##FASTA$/,$d' <genome.gff> > <genome.nofasta.gff>
Size of directories including all files they contain:
find -maxdepth 1 -type d -name '[!.]*' -exec du -sh {} + | sort -h
Switch to intel (i386) from arm64
env /usr/bin/arch -x86_64 /bin/zsh
Count number of files in a directory when Argument list too long
:
find -type f -name '*.csv' | wc -l