Allen Day allenday

## wait for operation to complete.bash
operations/ENr3tYihKxjl8vvhnZvjmrsBIM29ta6GEioPcHJvZHVjdGlvblF1ZXVl not done, sleeping 30s.  Sun Feb  5 17:59:53 PST 2017
operations/ENr3tYihKxjl8vvhnZvjmrsBIM29ta6GEioPcHJvZHVjdGlvblF1ZXVl not done, sleeping 30s.  Sun Feb  5 18:00:25 PST 2017
operations/ENr3tYihKxjl8vvhnZvjmrsBIM29ta6GEioPcHJvZHVjdGlvblF1ZXVl not done, sleeping 30s.  Sun Feb  5 18:00:58 PST 2017
operations/ENr3tYihKxjl8vvhnZvjmrsBIM29ta6GEioPcHJvZHVjdGlvblF1ZXVl not done, sleeping 30s.  Sun Feb  5 18:01:30 PST 2017
operations/ENr3tYihKxjl8vvhnZvjmrsBIM29ta6GEioPcHJvZHVjdGlvblF1ZXVl not done, sleeping 30s.  Sun Feb  5 18:02:01 PST 2017
operations/ENr3tYihKxjl8vvhnZvjmrsBIM29ta6GEioPcHJvZHVjdGlvblF1ZXVl not done, sleeping 30s.  Sun Feb  5 18:02:32 PST 2017

## bwa output in gs bucket.bash
gsutil ls -l $OUTPUT_PATH
        18  2017-02-06T02:04:33Z  gs://.../MNPR01.fa.amb
   1576040  2017-02-06T02:04:34Z  gs://.../MNPR01.fa.ann
 585823748  2017-02-06T02:04:47Z  gs://.../MNPR01.fa.bwt
 146455918  2017-02-06T02:04:40Z  gs://.../MNPR01.fa.pac
 292911888  2017-02-06T02:04:38Z  gs://.../MNPR01.fa.sa
TOTAL: 5 objects, 1026767612 bytes (979.2 MiB)

## align sample.bash

gcloud alpha genomics pipelines run \
  --pipeline-file ~/src/bfx/bfx-bwa/pipeline-bwa-mem.yaml \
  --logging gs://allenday-dev/bwa-logs/6/ \
  --inputs INDEX_PATH=gs://$BUCKET/.../MNPR01.fa*,FASTQ_PATH=gs://$BUCKET/.../SRR4448180.fastq,PREFIX=MNPR01.fa \
  --outputs OUTPUT_PATH=gs://$BUCKET/.../bam-SRR4448180/ \

## samtools_recipes.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                allenday
                / samtools_recipes.md
            
            
              Last active
              February 22, 2017 08:21
            
          
    IS_SAM="-S"
IN=this.sam
OUT=.
# get primary alignments with unmapped mate of read pair (with header)
samtools view -h -F 0x0100 -f 0x0008 $IS_SAM $IN > $OUT/prim_mate_map.sam

# get primary alignments with unmapped self of read pair
samtools view    -F 0x0100 -f 0x0004 $IS_SAM $IN > $OUT/prim_self_map.sam


## sra_download.yaml
name: sra_download
description: use Google Pipeline API to download an SRA run, reformat it as unaligned BAM, and upload it to Google Cloud Storage.  Run it like this: gcloud alpha genomics pipelines run --inputs SAMPLE=XXXXX --inputs RUN=XXXXX --outputs OUTPUT_FILE=gs://XXXXX --pipeline-file=sra_download.yaml
resources:
  #increase boot disk from 10GB to 50GB to accomodate intermediate files
  bootDiskSizeGb: 50
  #specify multiple zones so this pipeline will run in parallel
  zones:
  - us-west1-a
  - us-west1-b
  - us-east1-b

## assign_reserved_ip.startup_script.sh
#reserved static IPs on Google Cloud are named.
#specify the key EXTERNAL_IP_NAME with the correct IP name in the instance (or instance template) metadata
EXTERNAL_IP_NAME=`wget --header 'Metadata-Flavor: Google' -O - -q 'http://metadata.google.internal/computeMetadata/v1/instance/attributes/EXTERNAL_IP_NAME'`
#other metadata we can retrieve about the reserved IP and instance
EXTERNAL_IP_ADDRESS=`gcloud compute addresses list | grep $EXTERNAL_IP_NAME | awk '{print $3}'`
INSTANCE_NAME=`wget --header 'Metadata-Flavor: Google' -O - -q 'http://metadata.google.internal/computeMetadata/v1/instance/name'`
INSTANCE_ZONE=`wget --header 'Metadata-Flavor: Google' -O - -q 'http://metadata.google.internal/computeMetadata/v1/instance/zone' | cut -d/ -f4`

#delete the current IP (old access config)
yes | gcloud compute instances delete-access-config $INSTANCE_NAME --access-config-name "`yes | gcloud compute instances describe $INSTANCE_NAME --format='flattened' | grep networkInterfaces | grep accessConfigs | grep name | tai

## align_bam.yaml
name: align_bam
description: use Google Pipeline API to retrieve BAMs for a given sample, align them to a reference, merge them (one BAM per RG), and upload to cloud storage
resources:
  minimumCpuCores: 16
  bootDiskSizeGb: 100
  zones:
  - us-west1-a
  - us-west1-b
  - us-east1-b
  - us-east1-c

## samtools_index.yaml
name: samtools_index
description: Run samtools index to generate a BAM index file
inputParameters:
- name: INPUT_FILE
  localCopy:
    disk: data
    path: input.bam
outputParameters:
- name: OUTPUT_FILE
  localCopy:

## freebayes_vcf.yaml
name: freebayes_vcf
description: create .vcf, .vcf.gz, and .vcf.gz.tbi files from .bam and .bam.bai files
resources:
  minimumCpuCores: 1
  disks:
  - name: data
    type: PERSISTENT_HDD
    sizeGb: 50
    mountPoint: /mnt/data
  zones:

## gatk_vcf.yaml
name: gatk_vcf
description: create .vcf, .vcf.gz, and .vcf.gz.tbi files from .bam and .bam.bai files
resources:
  minimumCpuCores: 1
  disks:
  - name: data
    type: PERSISTENT_HDD
    sizeGb: 50
    mountPoint: /mnt/data
  zones:
	operations/ENr3tYihKxjl8vvhnZvjmrsBIM29ta6GEioPcHJvZHVjdGlvblF1ZXVl not done, sleeping 30s. Sun Feb 5 17:59:53 PST 2017
	operations/ENr3tYihKxjl8vvhnZvjmrsBIM29ta6GEioPcHJvZHVjdGlvblF1ZXVl not done, sleeping 30s. Sun Feb 5 18:00:25 PST 2017
	operations/ENr3tYihKxjl8vvhnZvjmrsBIM29ta6GEioPcHJvZHVjdGlvblF1ZXVl not done, sleeping 30s. Sun Feb 5 18:00:58 PST 2017
	operations/ENr3tYihKxjl8vvhnZvjmrsBIM29ta6GEioPcHJvZHVjdGlvblF1ZXVl not done, sleeping 30s. Sun Feb 5 18:01:30 PST 2017
	operations/ENr3tYihKxjl8vvhnZvjmrsBIM29ta6GEioPcHJvZHVjdGlvblF1ZXVl not done, sleeping 30s. Sun Feb 5 18:02:01 PST 2017
	operations/ENr3tYihKxjl8vvhnZvjmrsBIM29ta6GEioPcHJvZHVjdGlvblF1ZXVl not done, sleeping 30s. Sun Feb 5 18:02:32 PST 2017
	gsutil ls -l $OUTPUT_PATH
	18 2017-02-06T02:04:33Z gs://.../MNPR01.fa.amb
	1576040 2017-02-06T02:04:34Z gs://.../MNPR01.fa.ann
	585823748 2017-02-06T02:04:47Z gs://.../MNPR01.fa.bwt
	146455918 2017-02-06T02:04:40Z gs://.../MNPR01.fa.pac
	292911888 2017-02-06T02:04:38Z gs://.../MNPR01.fa.sa
	TOTAL: 5 objects, 1026767612 bytes (979.2 MiB)

	gcloud alpha genomics pipelines run \
	--pipeline-file ~/src/bfx/bfx-bwa/pipeline-bwa-mem.yaml \
	--logging gs://allenday-dev/bwa-logs/6/ \
	--inputs INDEX_PATH=gs://$BUCKET/.../MNPR01.fa*,FASTQ_PATH=gs://$BUCKET/.../SRR4448180.fastq,PREFIX=MNPR01.fa \
	--outputs OUTPUT_PATH=gs://$BUCKET/.../bam-SRR4448180/ \
	name: sra_download
	description: use Google Pipeline API to download an SRA run, reformat it as unaligned BAM, and upload it to Google Cloud Storage. Run it like this: gcloud alpha genomics pipelines run --inputs SAMPLE=XXXXX --inputs RUN=XXXXX --outputs OUTPUT_FILE=gs://XXXXX --pipeline-file=sra_download.yaml
	resources:
	#increase boot disk from 10GB to 50GB to accomodate intermediate files
	bootDiskSizeGb: 50
	#specify multiple zones so this pipeline will run in parallel
	zones:
	- us-west1-a
	- us-west1-b
	- us-east1-b
	#reserved static IPs on Google Cloud are named.
	#specify the key EXTERNAL_IP_NAME with the correct IP name in the instance (or instance template) metadata
	EXTERNAL_IP_NAME=`wget --header 'Metadata-Flavor: Google' -O - -q 'http://metadata.google.internal/computeMetadata/v1/instance/attributes/EXTERNAL_IP_NAME'`
	#other metadata we can retrieve about the reserved IP and instance
	EXTERNAL_IP_ADDRESS=`gcloud compute addresses list \| grep $EXTERNAL_IP_NAME \| awk '{print $3}'`
	INSTANCE_NAME=`wget --header 'Metadata-Flavor: Google' -O - -q 'http://metadata.google.internal/computeMetadata/v1/instance/name'`
	INSTANCE_ZONE=`wget --header 'Metadata-Flavor: Google' -O - -q 'http://metadata.google.internal/computeMetadata/v1/instance/zone' \| cut -d/ -f4`

	#delete the current IP (old access config)
	yes \| gcloud compute instances delete-access-config $INSTANCE_NAME --access-config-name "`yes \| gcloud compute instances describe $INSTANCE_NAME --format='flattened' \| grep networkInterfaces \| grep accessConfigs \| grep name \| tai
	name: align_bam
	description: use Google Pipeline API to retrieve BAMs for a given sample, align them to a reference, merge them (one BAM per RG), and upload to cloud storage
	resources:
	minimumCpuCores: 16
	bootDiskSizeGb: 100
	zones:
	- us-west1-a
	- us-west1-b
	- us-east1-b
	- us-east1-c
	name: samtools_index
	description: Run samtools index to generate a BAM index file
	inputParameters:
	- name: INPUT_FILE
	localCopy:
	disk: data
	path: input.bam
	outputParameters:
	- name: OUTPUT_FILE
	localCopy:
	name: freebayes_vcf
	description: create .vcf, .vcf.gz, and .vcf.gz.tbi files from .bam and .bam.bai files
	resources:
	minimumCpuCores: 1
	disks:
	- name: data
	type: PERSISTENT_HDD
	sizeGb: 50
	mountPoint: /mnt/data
	zones:
	name: gatk_vcf
	description: create .vcf, .vcf.gz, and .vcf.gz.tbi files from .bam and .bam.bai files
	resources:
	minimumCpuCores: 1
	disks:
	- name: data
	type: PERSISTENT_HDD
	sizeGb: 50
	mountPoint: /mnt/data
	zones: