Skip to content

Instantly share code, notes, and snippets.

@larssono
Forked from chunxiangzheng/gist:e74e0c2beda0ab5d76a7
Last active August 29, 2015 14:16
Show Gist options
  • Save larssono/76e4aa1e7df379b533c2 to your computer and use it in GitHub Desktop.
Save larssono/76e4aa1e7df379b533c2 to your computer and use it in GitHub Desktop.
import pandas as pd
import synapseclient
import os
def compare2Files(fname, originFiles, newFiles, syn):
df1 = pd.read_csv(syn.get(originFiles[fname]).path, sep="\t")
df2 = pd.read_csv(syn.get(newFiles[fname]).path, sep="\t")
df1 = df1.ix[sort(df1.index), sort(df1.columns)]
df2 = df2.ix[sort(df2.index), sort(df2.columns)]
original = "syn2812961"
new = "syn3270657"
orginalFolder = "original"
newFolder = "new"
syn=synapseclient.login()
originFiles = {x["file.name"]: x["file.id"] for x in syn.chunkedQuery("select name from file where benefactorId=='%s'"%original)}
newFiles = {x["file.name"]: x["file.id"] for x in syn.chunkedQuery("select name from file where benefactorId=='%s'"%new)}
fname = 'unc.edu_GBM_AgilentG4502A_07_2.geneExp.tsv'
df1 = pd.read_csv(syn.get(originFiles[fname]).path, sep="\t", index_col=0)
df2 = pd.read_csv(syn.get(newFiles[fname]).path, sep="\t", index_col=0)
df1 = df1.ix[sorted(df1.index), sorted(df1.columns)]
df2 = df2.ix[sorted(df2.index), sorted(df2.columns)]
df1 = pd.read_csv(syn.get(originFiles[fname]).path, sep="\t", index_col=0, na_values=['null'].astype('float'))
df1 = df1.ix[sorted(df1.index), sorted(df1.columns)]
df2 = pd.read_csv(syn.get(newFiles[fname]).path, sep="\t", index_col=0, na_values=['null']).astype('float')
df2 = df2.ix[sorted(df2.index), sorted(df2.columns)]
(df1.ix[:100,:] - df2.ix[:100,:]).abs().max().max()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment