Skip to content

Instantly share code, notes, and snippets.

@evilkost
Created April 5, 2014 15:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save evilkost/9993562 to your computer and use it in GitHub Desktop.
Save evilkost/9993562 to your computer and use it in GitHub Desktop.
import pandas as pd
from functools import partial
from collections import Iterable
STORE_DIR = "preprocessed"
def do_df(filename, sep=" ", new_name=None, do_transpose=False, plugin_func=None):
df = pd.read_table(filename, sep=sep, index_col=0)
if do_transpose:
df = df.T
if plugin_func:
try:
for func in plugin_func:
df = func(df)
except:
df = plugin_func(df)
if new_name:
df.to_csv("%s/%s" % (STORE_DIR, new_name), sep=" ")
return df
def skip_lines(df, rows=None, cols=None):
wdf = df.copy()
if rows:
wdf = wdf.drop(wdf.index[rows])
if cols:
wdf = wdf.drop(wdf.columns[cols], 1)
return wdf
def columns_to_upper(df):
df.columns = [col.upper() for col in df.columns]
return df
def main():
do_df("SampleCharGCT.csv", sep=",", new_name="phenotype.csv")
do_df("GoldenGateMethGCT.txt", sep="\t", new_name="meth.csv", do_transpose=True,
plugin_func=partial(skip_lines, rows=[0, 1]))
do_df("score_matrix_tarbase_GCT_ext.txt", sep="\t", new_name="interaction_TAR.csv",
plugin_func=columns_to_upper)
do_df("score_matrix_mirwalk_GCT_05.txt", sep="\t", new_name="interaction_MIR.csv",
plugin_func=columns_to_upper)
do_df("StemCellqPCRGCTNormalized.txt", sep=" ", new_name="mRNA.csv", do_transpose=True)
do_df("miRNANormalizedGCTnames.txt", sep=" ", new_name="miRNA.csv", do_transpose=True)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment