Skip to content

Instantly share code, notes, and snippets.

@larssono
Created March 19, 2015 17:47
Show Gist options
  • Save larssono/e746b062b4ceed18ec8c to your computer and use it in GitHub Desktop.
Save larssono/e746b062b4ceed18ec8c to your computer and use it in GitHub Desktop.
import pandas as pd
import synapseclient
syn = synapseclient.login()
#Read csv summary by sample
dcc = pd.read_csv('/Users/lom/Downloads/DCC_datatable.csv', sep='\t')
##################
#Compare BLCA methylation
###################
dcc_meth_blca=dcc[(dcc.Disease=='BLCA') & (dcc.HumanMethylation450=='Yes')]
#Get the same information from Synapse
table = syn.tableQuery("SELECT * FROM syn3281840 where acronym='BLCA' and platform='HumanMethylation450'")
df = table.asDataFrame()
#Compare
set(df.patient_barcode) - set([x[:12] for x in dcc_meth_blca.Sample_Name])
df[df.patient_barcode=='TCGA-07-0227']
df[df.patient_barcode=='TCGA-07-0227'].samples
################
#Compare GBM SNP6
################
dcc_snp6_gbm=dcc[(dcc.Disease=='GBM') & (dcc.Genome_Wide_SNP_6=='Yes')]
df = syn.tableQuery("SELECT * FROM syn3281840 where acronym='GBM' and platform='Genome_Wide_SNP_6'").asDataFrame()
differences = set(df.patient_barcode) - set([x[:12] for x in dcc_snp6_gbm.Sample_Name])
for barcode in differences:
print df[df.patient_barcode==barcode].samples
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment