Forked from chunxiangzheng/gist:e74e0c2beda0ab5d76a7
Last active
August 29, 2015 14:16
-
-
Save larssono/76e4aa1e7df379b533c2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import synapseclient | |
import os | |
def compare2Files(fname, originFiles, newFiles, syn): | |
df1 = pd.read_csv(syn.get(originFiles[fname]).path, sep="\t") | |
df2 = pd.read_csv(syn.get(newFiles[fname]).path, sep="\t") | |
df1 = df1.ix[sort(df1.index), sort(df1.columns)] | |
df2 = df2.ix[sort(df2.index), sort(df2.columns)] | |
original = "syn2812961" | |
new = "syn3270657" | |
orginalFolder = "original" | |
newFolder = "new" | |
syn=synapseclient.login() | |
originFiles = {x["file.name"]: x["file.id"] for x in syn.chunkedQuery("select name from file where benefactorId=='%s'"%original)} | |
newFiles = {x["file.name"]: x["file.id"] for x in syn.chunkedQuery("select name from file where benefactorId=='%s'"%new)} | |
fname = 'unc.edu_GBM_AgilentG4502A_07_2.geneExp.tsv' | |
df1 = pd.read_csv(syn.get(originFiles[fname]).path, sep="\t", index_col=0) | |
df2 = pd.read_csv(syn.get(newFiles[fname]).path, sep="\t", index_col=0) | |
df1 = df1.ix[sorted(df1.index), sorted(df1.columns)] | |
df2 = df2.ix[sorted(df2.index), sorted(df2.columns)] | |
df1 = pd.read_csv(syn.get(originFiles[fname]).path, sep="\t", index_col=0, na_values=['null'].astype('float')) | |
df1 = df1.ix[sorted(df1.index), sorted(df1.columns)] | |
df2 = pd.read_csv(syn.get(newFiles[fname]).path, sep="\t", index_col=0, na_values=['null']).astype('float') | |
df2 = df2.ix[sorted(df2.index), sorted(df2.columns)] | |
(df1.ix[:100,:] - df2.ix[:100,:]).abs().max().max() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment