Skip to content

Instantly share code, notes, and snippets.

for B in `ls -1 *fastq* | perl -ne 'm#^(.+?)_#;print "$1\n"' | sort | uniq`; do
echo $B;
java -jar ~/bin/picard.jar FastqToSam F1=$(ls -1 ${B}_*R1*.fastq.gz) F2=$(ls -1 ${B}_*R2*fastq.gz) O=$B.bam SAMPLE_NAME=$B;
#in case of a failure, we'll get a file of zero bytes
touch $B.bam
done;
wget -O - -q 'http://www.medicinalgenomics.com/janeome-additional-data/' | perl -e '$page=join "",<>;@fq=$page =~ m#"(http.+?fastq.gz)"#g;print join "\n",@fq' | xargs wget -c
wget ftp://ftp.ncbi.nlm.nih.gov/sra/wgs_aux/MN/PR/MNPR01/MNPR01.1.fsa_nt.gz
wget ftp://ftp.ncbi.nlm.nih.gov/sra/wgs_aux/MN/PR/MNPR01/MNPR01.2.fsa_nt.gz
gunzip *.gz
cat *.fsa_nt > MNPR01.fa
gsutil cp MNPR01.fa gs://$BUCKET/cannabis-reference-cannatonic/
ls -l | grep -w 0
-rw-r--r-- 1 root root 0 Feb 4 08:25 Black84.bam
-rw-r--r-- 1 root root 0 Feb 4 08:25 C4SCC.bam
-rw-r--r-- 1 root root 0 Feb 4 08:25 DutchHaze.bam
-rw-r--r-- 1 root root 0 Feb 4 08:30 GrandDaddyPurpleSCC.bam
-rw-r--r-- 1 root root 0 Feb 4 09:01 NorthernLightAutoFeminizedSeed.bam
-rw-r--r-- 1 root root 0 Feb 4 09:04 OGKushXBubbaKushSCC.bam
-rw-r--r-- 1 root root 0 Feb 4 09:10 OGXPurpleBubbleGumXC4SCC.bam
-rw-r--r-- 1 root root 0 Feb 4 09:50 SuperCritical.bam
-rw-r--r-- 1 root root 0 Feb 4 09:52 SuperSilverHaze.bam
for B in $(find * -size 0); do
C=${B/.bam/};
echo $C;
java -jar ~/bin/picard.jar FastqToSam F1=$(ls -1 ${C}_*R1*.fastq.gz) O=$C.bam SAMPLE_NAME=$C;
done
gsutil -m cp *.bam gs://$BUCKET/kannapedia/
#get the build specification file
git clone git@github.com:allenday/bfx.git
cd bfx/bfx-bwa
#do whatever you need to do
vim Dockerfile
#build it with tag bfx-bwa. you probably want to rename.
docker build -t bfx-bwa .
# Set according to your working enviroment
BUCKET=gs://your-bucket-name
WORKSPACE=$BUCKET/some/subdirectory
BASE_FASTA=some.fa
INPUT_FASTA=$BUCKET/$BASE_FASTA
# probably you want this to be the same as input.fasta.
# this is the default behavior for BWA.
OUTPUT_INDEX_PREFIX=$BASE_FASTA
OUTPUT_PATH=$WORKSPACE/out
OP_ID=operations/ENr3tYihKxjl8vvhnZvjmrsBIM29ta6GEioPcHJvZHVjdGlvblF1ZXVl
CMD="gcloud --format='value(done)' alpha genomics operations describe $OP_ID"
while [[ $(eval ${CMD}) != "True" ]]; do
echo -n "$OP_ID not done, sleeping 30s. ";
date;
sleep 30;
done
Running [operations/ENr3tYihKxjl8vvhnZvjmrsBIM29ta6GEioPcHJvZHVjdGlvblF1ZXVl]