Larsson Omberg larssono

## synapse_upload_directory_tree.py
import os
import synapseclient
from synapseclient import File, Folder

PROJECT = 'syn2778315'
START_PATH = '.'
syn=synapseclient.login(silent=True)


parents = {START_PATH: PROJECT}

## vcfSynapseUpload.py
import synapseclient
from synapseclient import File, Activity, Wiki
syn = synapseclient.login()

DKFZ_FOLDER = 'syn2898426'
WORKFLOW = 'oicr-sga'
WORKFLOW_VERSION  = '1-0-0'
DESCRIPTION = 'This is the variant calling for specimen 669a4076-13de-42dc-895c-85d040422042 from donor 05506f4c-e701-4a9d-ae06-97f066aade43. The results consist of one or more VCF files plus optional tar.gz files that contain additional file types. This uses the SangerPancancerCgpCnIndelSnvStr workflow, version 1.0.1 available at https://s3.amazonaws.com/oicr.workflow.bundles/released-bundles/Workflow_Bundle_SangerPancancerCgpCnIndelSnvStr_1.0.1_SeqWare_1.1.0-alpha.5.zip. This workflow can be created from source, see https://github.com/ICGC-TCGA-PanCancer/SeqWare-CGP-SomaticCore. For a complete change log see https://github.com/testproject/workflow-test-cancer/blob/1.0.0/workflow-test-cancer/CHANGELOG.md. Note the 'ANALYSIS_TYPE' is 'REFERENCE_ASSEMBLY' but a better term to describe this analysis is 'SEQUENCE_VARIATION' as defined by the

## gist:76e4aa1e7df379b533c2
import pandas as pd
import synapseclient
import os

def compare2Files(fname, originFiles, newFiles, syn):
    df1 = pd.read_csv(syn.get(originFiles[fname]).path, sep="\t")
    df2 = pd.read_csv(syn.get(newFiles[fname]).path, sep="\t")
    df1 = df1.ix[sort(df1.index), sort(df1.columns)]
    df2 = df2.ix[sort(df2.index), sort(df2.columns)]

## countTCGASamples.py
import synapseclient
syn = synapseclient.login()
import pandas as pd
import synapseHelpers
from multiprocessing.dummy import  Pool

QUERY = ("select * from file where benefactorId=='syn2812961' "
                               "and fileType!='clinicalMatrix'"
                               "and fileType!='maf'")

## gist:e746b062b4ceed18ec8c
import pandas as pd
import synapseclient
syn = synapseclient.login()

#Read csv summary by sample
dcc = pd.read_csv('/Users/lom/Downloads/DCC_datatable.csv', sep='\t')
##################
#Compare BLCA methylation
###################
dcc_meth_blca=dcc[(dcc.Disease=='BLCA') & (dcc.HumanMethylation450=='Yes')]

## find_missing_provenance
import synapseclient
from multiprocessing.dummy import Pool
mp = Pool(5)
syn = synapseclient.login()

def prov(x):
    try:
        return syn.getProvenance(x)
    except synapseclient.exceptions.SynapseHTTPError:
      return None

## gist:d5212d555f3ea1d98ff0
def thisCodeInSynapse(parentId, file=None, description=''):
    """Determines the name of the file that the code is called from
    and uploads that to Synapse returning the synapseId of created codeObject.
    """
    #print inspect.getfile(inspect.currentframe())
    #print os.path.abspath(inspect.getfile(inspect.currentframe()))
    file = inspect.getfile(sys._getframe(1)) if file==None else file
    #Make sure unallowed characters are striped out for the name
    code= synapseclient.File(file, name=os.path.split(file)[-1], parent=parentId, description=description)
    codeEntity = syn.store(code)

## IlluminaHiSeqDNASeqC_barcode_conversion.py
import tarfile
from StringIO import StringIO
import requests
import synapseclient
import re
import pandas as pd

syn=synapseclient.Synapse(skip_checks=False)
syn.login(silent=True)

## gist:1c2b9acfc2fba25a96fb
#  Make S3 bucket the default storage location for project you set up.  Lets assume that this project has Id syn123
#In R run:

library(synapseClient)
synapseLogin()

# Set up a storage location for this bucket in Synapse. Only need to do this once per bucket.
AWSbucketName = "your-bucket-name-here"

storageLocation <- synRestPOST("/storageLocation", list(

## filterRecords.py
import synapseclient
import pandas as pd

syn=synapseclient.Synapse(skip_checks=True)
syn.login(silent=True)


records = pd.read_csv('publicRecordIds', sep='\t')
records = records.query('studyId=="parkinson"')
	import os
	import synapseclient
	from synapseclient import File, Folder

	PROJECT = 'syn2778315'
	START_PATH = '.'
	syn=synapseclient.login(silent=True)


	parents = {START_PATH: PROJECT}
	import synapseclient
	from synapseclient import File, Activity, Wiki
	syn = synapseclient.login()

	DKFZ_FOLDER = 'syn2898426'
	WORKFLOW = 'oicr-sga'
	WORKFLOW_VERSION = '1-0-0'
	DESCRIPTION = 'This is the variant calling for specimen 669a4076-13de-42dc-895c-85d040422042 from donor 05506f4c-e701-4a9d-ae06-97f066aade43. The results consist of one or more VCF files plus optional tar.gz files that contain additional file types. This uses the SangerPancancerCgpCnIndelSnvStr workflow, version 1.0.1 available at https://s3.amazonaws.com/oicr.workflow.bundles/released-bundles/Workflow_Bundle_SangerPancancerCgpCnIndelSnvStr_1.0.1_SeqWare_1.1.0-alpha.5.zip. This workflow can be created from source, see https://github.com/ICGC-TCGA-PanCancer/SeqWare-CGP-SomaticCore. For a complete change log see https://github.com/testproject/workflow-test-cancer/blob/1.0.0/workflow-test-cancer/CHANGELOG.md. Note the 'ANALYSIS_TYPE' is 'REFERENCE_ASSEMBLY' but a better term to describe this analysis is 'SEQUENCE_VARIATION' as defined by the
	import pandas as pd
	import synapseclient
	import os

	def compare2Files(fname, originFiles, newFiles, syn):
	df1 = pd.read_csv(syn.get(originFiles[fname]).path, sep="\t")
	df2 = pd.read_csv(syn.get(newFiles[fname]).path, sep="\t")
	df1 = df1.ix[sort(df1.index), sort(df1.columns)]
	df2 = df2.ix[sort(df2.index), sort(df2.columns)]
	import synapseclient
	from multiprocessing.dummy import Pool
	mp = Pool(5)
	syn = synapseclient.login()

	def prov(x):
	try:
	return syn.getProvenance(x)
	except synapseclient.exceptions.SynapseHTTPError:
	return None
	def thisCodeInSynapse(parentId, file=None, description=''):
	"""Determines the name of the file that the code is called from
	and uploads that to Synapse returning the synapseId of created codeObject.
	"""
	#print inspect.getfile(inspect.currentframe())
	#print os.path.abspath(inspect.getfile(inspect.currentframe()))
	file = inspect.getfile(sys._getframe(1)) if file==None else file
	#Make sure unallowed characters are striped out for the name
	code= synapseclient.File(file, name=os.path.split(file)[-1], parent=parentId, description=description)
	codeEntity = syn.store(code)
	import tarfile
	from StringIO import StringIO
	import requests
	import synapseclient
	import re
	import pandas as pd

	syn=synapseclient.Synapse(skip_checks=False)
	syn.login(silent=True)
	# Make S3 bucket the default storage location for project you set up. Lets assume that this project has Id syn123
	#In R run:

	library(synapseClient)
	synapseLogin()

	# Set up a storage location for this bucket in Synapse. Only need to do this once per bucket.
	AWSbucketName = "your-bucket-name-here"

	storageLocation <- synRestPOST("/storageLocation", list(
	import synapseclient
	import pandas as pd

	syn=synapseclient.Synapse(skip_checks=True)
	syn.login(silent=True)


	records = pd.read_csv('publicRecordIds', sep='\t')
	records = records.query('studyId=="parkinson"')