Skip to content

Instantly share code, notes, and snippets.

@fbrundu

fbrundu/tsv2gct.py

Created Feb 24, 2015
Embed
What would you like to do?
Transform tab separated matrix to gct file
import pandas as pd
import sys
import glob
import os
# input / output directory
input_dir = sys.argv[1]
# input file extension
input_ext = sys.argv[2]
# cardinality of index columns (rownames)
# FIXME: by now it assumes the first two columns as the index
# n_index_columns = int(sys.argv[3])
n_index_columns = 2
index_columns = [i for i in range(n_index_columns)]
for f in glob.glob(os.path.join(input_dir, "*" + input_ext)):
f_read = pd.read_table(f, sep='\t', index_col=index_columns)
n_rows, n_cols = f_read.shape
output_fn = os.path.splitext(f)[0] + '.gct'
with open(f) as txt:
data = txt.read()
with open(output_fn, 'w') as gct:
gct.write('#1.2\n' + str(n_rows) + '\t' + str(n_cols) + '\n' + data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment