Lisa K. Johnson johnsolk

## add_path.sh
# add current directory to ~/.bashrc

echo export PATH=$PATH:$(pwd) >> ~/.bashrc
source ~/.bashrc

## Notes.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                johnsolk
                / Notes.md
            
            
              Created
              October 31, 2017 16:14
            
          
labeled text MA plot


Fun fact: suspended gold in the ocean, mostly from hydrothermal vents, is enough for each person on Earth to have 9 lbs. - Microbial Diversity, 2017


"You don't have to be great at something to do it or enjoy doing it!" You can keep doing it. - Tracy Teal


## mmetsp_loops.sh
for file in $(find . -name "*.fasta.dammit.gff3"); do echo $file; cut -f1 $file | uniq | wc -l; done > ~/MMETSP/assembly_evaluation_data/total_contigs


for file in $(find `pwd` -name "*.fasta.renamed.fasta.dammit.gff3");
do
  base=$(basename $file .renamed.fasta.dammit.gff3)
  new=$base.dammit.gff3
  echo cp $file /mnt/scratch/ljcohen/mmetsp_dammit/cp/gff3/$new
done

## jupyter_farm.sh
# 10/19/2017, with help from Luiz Irber
# ssh ljcohen@farm.cse.ucdavis.edu
# start interactive job, get assigned a designated compute node
# srun -p high -t 24:00:00 --mem=20000 --pty bash
# on the farm cluster
# jupyter notebook is installed in anaconda, in home dir
# activate virtualenv
source ~/anaconda2/envs/py3.dammit/bin/activate
# change environment variable to change where tmp files are stored
export XDG_RUNTIME_DIR=/home/ljcohen/tmp

## fasta_to_table.py
# From within a py3 virtualenv
# or
# sudo pip install screed
# source ~/bin/py3/bin/activate
import screed
with open("table.txt",'w') as fq:
  for r in screed.open('trinity.nema.full.fasta'):
    fq.write(r.name+"\n")

## replace.sh
#replace IUPAC ambiguous characters in all fasta files in a directory
for i in $(ls *.fa); do sed -i -e 's/[YRWSKMDVHBX]/N/g' $i; done

## msu_hpcc_commands.sh
# delete block of jobs in queue all starting with specific digits in Job ID
qstat -u ljcohen | grep "472526" | cut -d "." -f1 | xargs qdel

## oysterriver_readnums
[ljcohen@dev-intel14 trim]$ for i in $(find /mnt/scratch/ljcohen/oysterriver/ -name "*left.fq"); do echo $i; done
/mnt/scratch/ljcohen/oysterriver/ERR1674585/trinity/ERR1674585.left.fq
/mnt/scratch/ljcohen/oysterriver/ERR489297/trinity/ERR489297.left.fq
/mnt/scratch/ljcohen/oysterriver/DRR036858/trinity/DRR036858.left.fq
/mnt/scratch/ljcohen/oysterriver/DRR069093/trinity/DRR069093.left.fq
/mnt/scratch/ljcohen/oysterriver/DRR053698/trinity/DRR053698.left.fq
/mnt/scratch/ljcohen/oysterriver/DRR031870/trinity/DRR031870.left.fq
/mnt/scratch/ljcohen/oysterriver/SRR2016923/trinity/SRR2016923.left.fq
/mnt/scratch/ljcohen/oysterriver/ERR058009/trinity/ERR058009.left.fq
/mnt/scratch/ljcohen/oysterriver/SRR2086412/trinity/SRR2086412.left.fq

## ncbi_urlprediction.py
"""
From Luiz Irber.
Takes an SRA accession and determines the location of the .sra data file for automated or downloading.
Follows this format: ftp://ftp-trace.ncbi.nih.gov/sra/sra-instant/reads/ByRun/sra/{SRR|ERR|DRR}/<first 6 characters of accession>/<accession>/<accession>.sra
Format according to NCBI utility handbook: https://www.ncbi.nlm.nih.gov/books/NBK158899/
"""

def sra_url(accession):
    """Returns predicted URL given SRA accession as input."""
    accession = accession.upper()

## fastq_to_fasta.py
# checks for redundancies in record headers
# converts fastq to fasta format
import screed

f = open('porecamp_killifish.fasta','wb')

for n,r in enumerate(screed.open('porecamp_killifish.fastq')):
  if r.name in s:
    continue
  else:
	# add current directory to ~/.bashrc

	echo export PATH=$PATH:$(pwd) >> ~/.bashrc
	source ~/.bashrc
	for file in $(find . -name "*.fasta.dammit.gff3"); do echo $file; cut -f1 $file \| uniq \| wc -l; done > ~/MMETSP/assembly_evaluation_data/total_contigs



	for file in $(find `pwd` -name "*.fasta.renamed.fasta.dammit.gff3");
	do
	base=$(basename $file .renamed.fasta.dammit.gff3)
	new=$base.dammit.gff3
	echo cp $file /mnt/scratch/ljcohen/mmetsp_dammit/cp/gff3/$new
	done
	# 10/19/2017, with help from Luiz Irber
	# ssh ljcohen@farm.cse.ucdavis.edu
	# start interactive job, get assigned a designated compute node
	# srun -p high -t 24:00:00 --mem=20000 --pty bash
	# on the farm cluster
	# jupyter notebook is installed in anaconda, in home dir
	# activate virtualenv
	source ~/anaconda2/envs/py3.dammit/bin/activate
	# change environment variable to change where tmp files are stored
	export XDG_RUNTIME_DIR=/home/ljcohen/tmp
	# From within a py3 virtualenv
	# or
	# sudo pip install screed
	# source ~/bin/py3/bin/activate
	import screed
	with open("table.txt",'w') as fq:
	for r in screed.open('trinity.nema.full.fasta'):
	fq.write(r.name+"\n")
	#replace IUPAC ambiguous characters in all fasta files in a directory
	for i in $(ls *.fa); do sed -i -e 's/[YRWSKMDVHBX]/N/g' $i; done
	# delete block of jobs in queue all starting with specific digits in Job ID
	qstat -u ljcohen \| grep "472526" \| cut -d "." -f1 \| xargs qdel
	[ljcohen@dev-intel14 trim]$ for i in $(find /mnt/scratch/ljcohen/oysterriver/ -name "*left.fq"); do echo $i; done
	/mnt/scratch/ljcohen/oysterriver/ERR1674585/trinity/ERR1674585.left.fq
	/mnt/scratch/ljcohen/oysterriver/ERR489297/trinity/ERR489297.left.fq
	/mnt/scratch/ljcohen/oysterriver/DRR036858/trinity/DRR036858.left.fq
	/mnt/scratch/ljcohen/oysterriver/DRR069093/trinity/DRR069093.left.fq
	/mnt/scratch/ljcohen/oysterriver/DRR053698/trinity/DRR053698.left.fq
	/mnt/scratch/ljcohen/oysterriver/DRR031870/trinity/DRR031870.left.fq
	/mnt/scratch/ljcohen/oysterriver/SRR2016923/trinity/SRR2016923.left.fq
	/mnt/scratch/ljcohen/oysterriver/ERR058009/trinity/ERR058009.left.fq
	/mnt/scratch/ljcohen/oysterriver/SRR2086412/trinity/SRR2086412.left.fq
	"""
	From Luiz Irber.
	Takes an SRA accession and determines the location of the .sra data file for automated or downloading.
	Follows this format: ftp://ftp-trace.ncbi.nih.gov/sra/sra-instant/reads/ByRun/sra/{SRR\|ERR\|DRR}/<first 6 characters of accession>/<accession>/<accession>.sra
	Format according to NCBI utility handbook: https://www.ncbi.nlm.nih.gov/books/NBK158899/
	"""

	def sra_url(accession):
	"""Returns predicted URL given SRA accession as input."""
	accession = accession.upper()
	# checks for redundancies in record headers
	# converts fastq to fasta format
	import screed

	f = open('porecamp_killifish.fasta','wb')

	for n,r in enumerate(screed.open('porecamp_killifish.fastq')):
	if r.name in s:
	continue
	else: