Correct a TCGA assembled tsv file (tab delimited), formatting sample names for tsv columns
import pandas as pd
import sys
import re
tcga_tsv = sys.argv[1]
tcga = pd.read_table(tcga_tsv, sep='\t', index_col=0)
oldcolumns = tcga.columns.tolist()
newcolumns = ['-'.join(re.findall(r'TCGA[^_]*', oc)[0].split('-')[:4])
for oc in oldcolumns]
tcga.columns = newcolumns
tcga.to_csv(tcga_tsv, sep='\t')
