Brad Chapman chapmanb

## sacCer3.chrom.sizes
chrI	230218
chrII	813184
chrIII	316620
chrIV	1531933
chrV	576874
chrVI	270161
chrVII	1090940
chrVIII	562643
chrIX	439888
chrX	745751

## vagrant_pallet.clj
(ns distblast-cluster.vagrant
  (:require [pallet.compute :as compute]
            [pallet.phase :as phase]
            [pallet.core :as core]
            [pallet.utils :as utils]
            [pallet.compute.node-list :as node-list]
            [pallet.action.exec-script :as exec-script]))

(defn test-script [session]
  (-> session

## gist:727625
from Bio import Entrez

def fetch_gene_coordinates(search_term):
    handle = Entrez.esearch(db="gene", term=search_term)
    rec = Entrez.read(handle)
    gene_id = rec["IdList"][0] # assuming best match works

    handle = Entrez.efetch(db="gene", id=gene_id, retmode="xml")
    rec = Entrez.read(handle)[0]
    gene_locus = rec["Entrezgene_locus"][0]

## arvados_add_update.py
def simple_add_update():
    """Test adding and updating a collection to reproduce certificate error.
    """
    project_uuid = "pirca-j7d0g-h13mew3i5ya8cqq"
    name = "save_new_test"
    fname = "seqs.fa"
    avoid_an_error = True
    arvados_conn = get_connection_config()
    c = arvados.collection.Collection(apiconfig=arvados_conn)
    with c.open(fname, "wb") as writer:

## tcga_capture.R
#!/usr/bin/env Rscript

library(GenomicDataCommons)
library(dplyr)

manifest = GenomicDataCommons::files() %>%
  GenomicDataCommons::filter(~ cases.project.project_id == "TCGA-LUAD" &
           experimental_strategy == "WXS" &
           data_format == "BAM") %>%
  GenomicDataCommons::manifest()

## gist:626765
with open ("dna.txt", "r") as myfile:
    data=myfile.readlines()
    mRNA=data[1]

def next_transcript(mRNA, cur_pos):
    initial=mRNA.find("AUG", cur_pos)
    for i in range(initial, len(mRNA),3):
        if mRNA[i:i+3] == "UAG":
            return initial, i

## omixon_optitype_hla_calls.csv

          
            sample
            locus
            alleles
            expected
            validates

            
              NA12878
              A
              HLA-A*01:01;HLA-A*11:01
              HLA-A*01:01;HLA-A*11:01
              yes

            
              NA12878
              B
              HLA-B*56:01;HLA-B*08:01
              HLA-B*08:01;HLA-B*56:01
              yes

            
              NA12878
              C
              HLA-C*07:01;HLA-C*01:02
              HLA-C*01:02;HLA-C*07:01
              yes

            
              NA12891
              A
              HLA-A*24:02;HLA-A*01:01
              HLA-A*01:01;HLA-A*24:02
              yes

            
              NA12891
              B
              HLA-B*08:01;HLA-B*07:02
              HLA-B*07:02;HLA-B*08:01
              yes

            
              NA12891
              C
              HLA-C*07:01;HLA-C*07:02
              HLA-C*07:01;HLA-C*07:02
              yes

            
              NA18526
              A
              HLA-A*24:02;HLA-A*33:03
              HLA-A*24:02;HLA-A*33:03
              yes

            
              NA18526
              B
              HLA-B*40:01;HLA-B*58:01
              HLA-B*40:01;HLA-B*58:01;HLA-B*58:02
              no

            
              NA18526
              C
              HLA-C*03:02;HLA-C*07:02
              HLA-C*03:02;HLA-C*07:02
              yes

## snpeff
#!/bin/bash
# snpEff executable shell script
# http://snpeff.sourceforge.net/
# Extracts memory and system property Java arguments from the list of provided arguments
# (ie -Xms 1g -Xmx 2g)

jardir="$(cd "$(dirname "$0")" && pwd -P)"

java=java
if [ -e "$JAVA_HOME/bin/java" ]

## met_cnvs.txt
2-584759 no MET
2-584751 MET (FISH and ddPCR)
2-584699 MET (FISH and ddPCR)

## bcbio_vm_install.sh
#!/bin/bash
set -eu -o pipefail

export TARGETDIR=`pwd`/anaconda
export BINDIR=`pwd`
wget http://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh
bash Miniconda2-latest-Linux-x86_64.sh -b -p $TARGETDIR
$TARGETDIR/bin/conda install --yes -c conda-forge -c bioconda bcbio-nextgen
$TARGETDIR/bin/conda install --yes -c conda-forge -c bioconda bcbio-nextgen-vm
mkdir -p $BINDIR/bin
	chrI 230218
	chrII 813184
	chrIII 316620
	chrIV 1531933
	chrV 576874
	chrVI 270161
	chrVII 1090940
	chrVIII 562643
	chrIX 439888
	chrX 745751
	(ns distblast-cluster.vagrant
	(:require [pallet.compute :as compute]
	[pallet.phase :as phase]
	[pallet.core :as core]
	[pallet.utils :as utils]
	[pallet.compute.node-list :as node-list]
	[pallet.action.exec-script :as exec-script]))

	(defn test-script [session]
	(-> session
	from Bio import Entrez

	def fetch_gene_coordinates(search_term):
	handle = Entrez.esearch(db="gene", term=search_term)
	rec = Entrez.read(handle)
	gene_id = rec["IdList"][0] # assuming best match works

	handle = Entrez.efetch(db="gene", id=gene_id, retmode="xml")
	rec = Entrez.read(handle)[0]
	gene_locus = rec["Entrezgene_locus"][0]
	def simple_add_update():
	"""Test adding and updating a collection to reproduce certificate error.
	"""
	project_uuid = "pirca-j7d0g-h13mew3i5ya8cqq"
	name = "save_new_test"
	fname = "seqs.fa"
	avoid_an_error = True
	arvados_conn = get_connection_config()
	c = arvados.collection.Collection(apiconfig=arvados_conn)
	with c.open(fname, "wb") as writer:
	#!/usr/bin/env Rscript

	library(GenomicDataCommons)
	library(dplyr)

	manifest = GenomicDataCommons::files() %>%
	GenomicDataCommons::filter(~ cases.project.project_id == "TCGA-LUAD" &
	experimental_strategy == "WXS" &
	data_format == "BAM") %>%
	GenomicDataCommons::manifest()
	with open ("dna.txt", "r") as myfile:
	data=myfile.readlines()
	mRNA=data[1]

	def next_transcript(mRNA, cur_pos):
	initial=mRNA.find("AUG", cur_pos)
	for i in range(initial, len(mRNA),3):
	if mRNA[i:i+3] == "UAG":
	return initial, i
sample	locus	alleles	expected	validates
NA12878	A	HLA-A01:01;HLA-A11:01	HLA-A01:01;HLA-A11:01	yes
NA12878	B	HLA-B56:01;HLA-B08:01	HLA-B08:01;HLA-B56:01	yes
NA12878	C	HLA-C07:01;HLA-C01:02	HLA-C01:02;HLA-C07:01	yes
NA12891	A	HLA-A24:02;HLA-A01:01	HLA-A01:01;HLA-A24:02	yes
NA12891	B	HLA-B08:01;HLA-B07:02	HLA-B07:02;HLA-B08:01	yes
NA12891	C	HLA-C07:01;HLA-C07:02	HLA-C07:01;HLA-C07:02	yes
NA18526	A	HLA-A24:02;HLA-A33:03	HLA-A24:02;HLA-A33:03	yes
NA18526	B	HLA-B40:01;HLA-B58:01	HLA-B40:01;HLA-B58:01;HLA-B*58:02	no
NA18526	C	HLA-C03:02;HLA-C07:02	HLA-C03:02;HLA-C07:02	yes
	#!/bin/bash
	# snpEff executable shell script
	# http://snpeff.sourceforge.net/
	# Extracts memory and system property Java arguments from the list of provided arguments
	# (ie -Xms 1g -Xmx 2g)

	jardir="$(cd "$(dirname "$0")" && pwd -P)"

	java=java
	if [ -e "$JAVA_HOME/bin/java" ]
	2-584759 no MET
	2-584751 MET (FISH and ddPCR)
	2-584699 MET (FISH and ddPCR)
	#!/bin/bash
	set -eu -o pipefail

	export TARGETDIR=`pwd`/anaconda
	export BINDIR=`pwd`
	wget http://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh
	bash Miniconda2-latest-Linux-x86_64.sh -b -p $TARGETDIR
	$TARGETDIR/bin/conda install --yes -c conda-forge -c bioconda bcbio-nextgen
	$TARGETDIR/bin/conda install --yes -c conda-forge -c bioconda bcbio-nextgen-vm
	mkdir -p $BINDIR/bin