Transform tab separated matrix to gct file
import pandas as pd | |
import sys | |
import glob | |
import os | |
# input / output directory | |
input_dir = sys.argv[1] | |
# input file extension | |
input_ext = sys.argv[2] | |
# cardinality of index columns (rownames) | |
# FIXME: by now it assumes the first two columns as the index | |
# n_index_columns = int(sys.argv[3]) | |
n_index_columns = 2 | |
index_columns = [i for i in range(n_index_columns)] | |
for f in glob.glob(os.path.join(input_dir, "*" + input_ext)): | |
f_read = pd.read_table(f, sep='\t', index_col=index_columns) | |
n_rows, n_cols = f_read.shape | |
output_fn = os.path.splitext(f)[0] + '.gct' | |
with open(f) as txt: | |
data = txt.read() | |
with open(output_fn, 'w') as gct: | |
gct.write('#1.2\n' + str(n_rows) + '\t' + str(n_cols) + '\n' + data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment