Skip to content

Instantly share code, notes, and snippets.

View larssono's full-sized avatar

Larsson Omberg larssono

View GitHub Profile
@larssono
larssono / synapse_upload_directory_tree.py
Last active August 29, 2015 14:08
Recursively uploads a directory structure to Synapse
import os
import synapseclient
from synapseclient import File, Folder
PROJECT = 'syn2778315'
START_PATH = '.'
syn=synapseclient.login(silent=True)
parents = {START_PATH: PROJECT}
import synapseclient
from synapseclient import File, Activity, Wiki
syn = synapseclient.login()
DKFZ_FOLDER = 'syn2898426'
WORKFLOW = 'oicr-sga'
WORKFLOW_VERSION = '1-0-0'
DESCRIPTION = 'This is the variant calling for specimen 669a4076-13de-42dc-895c-85d040422042 from donor 05506f4c-e701-4a9d-ae06-97f066aade43. The results consist of one or more VCF files plus optional tar.gz files that contain additional file types. This uses the SangerPancancerCgpCnIndelSnvStr workflow, version 1.0.1 available at https://s3.amazonaws.com/oicr.workflow.bundles/released-bundles/Workflow_Bundle_SangerPancancerCgpCnIndelSnvStr_1.0.1_SeqWare_1.1.0-alpha.5.zip. This workflow can be created from source, see https://github.com/ICGC-TCGA-PanCancer/SeqWare-CGP-SomaticCore. For a complete change log see https://github.com/testproject/workflow-test-cancer/blob/1.0.0/workflow-test-cancer/CHANGELOG.md. Note the 'ANALYSIS_TYPE' is 'REFERENCE_ASSEMBLY' but a better term to describe this analysis is 'SEQUENCE_VARIATION' as defined by the
import pandas as pd
import synapseclient
import os
def compare2Files(fname, originFiles, newFiles, syn):
df1 = pd.read_csv(syn.get(originFiles[fname]).path, sep="\t")
df2 = pd.read_csv(syn.get(newFiles[fname]).path, sep="\t")
df1 = df1.ix[sort(df1.index), sort(df1.columns)]
df2 = df2.ix[sort(df2.index), sort(df2.columns)]
@larssono
larssono / countTCGASamples.py
Created March 7, 2015 10:21
Create a summary of the TCGA data in Synapse
import synapseclient
syn = synapseclient.login()
import pandas as pd
import synapseHelpers
from multiprocessing.dummy import Pool
QUERY = ("select * from file where benefactorId=='syn2812961' "
"and fileType!='clinicalMatrix'"
"and fileType!='maf'")
import pandas as pd
import synapseclient
syn = synapseclient.login()
#Read csv summary by sample
dcc = pd.read_csv('/Users/lom/Downloads/DCC_datatable.csv', sep='\t')
##################
#Compare BLCA methylation
###################
dcc_meth_blca=dcc[(dcc.Disease=='BLCA') & (dcc.HumanMethylation450=='Yes')]
import synapseclient
from multiprocessing.dummy import Pool
mp = Pool(5)
syn = synapseclient.login()
def prov(x):
try:
return syn.getProvenance(x)
except synapseclient.exceptions.SynapseHTTPError:
return None
@larssono
larssono / gist:d5212d555f3ea1d98ff0
Created April 22, 2015 16:12
This code in Synapse
def thisCodeInSynapse(parentId, file=None, description=''):
"""Determines the name of the file that the code is called from
and uploads that to Synapse returning the synapseId of created codeObject.
"""
#print inspect.getfile(inspect.currentframe())
#print os.path.abspath(inspect.getfile(inspect.currentframe()))
file = inspect.getfile(sys._getframe(1)) if file==None else file
#Make sure unallowed characters are striped out for the name
code= synapseclient.File(file, name=os.path.split(file)[-1], parent=parentId, description=description)
codeEntity = syn.store(code)
import tarfile
from StringIO import StringIO
import requests
import synapseclient
import re
import pandas as pd
syn=synapseclient.Synapse(skip_checks=False)
syn.login(silent=True)
# Make S3 bucket the default storage location for project you set up. Lets assume that this project has Id syn123
#In R run:
library(synapseClient)
synapseLogin()
# Set up a storage location for this bucket in Synapse. Only need to do this once per bucket.
AWSbucketName = "your-bucket-name-here"
storageLocation <- synRestPOST("/storageLocation", list(
import synapseclient
import pandas as pd
syn=synapseclient.Synapse(skip_checks=True)
syn.login(silent=True)
records = pd.read_csv('publicRecordIds', sep='\t')
records = records.query('studyId=="parkinson"')