Correct a TCGA assembled tsv file (tab delimited), formatting sample names for tsv columns
import pandas as pd | |
import sys | |
import re | |
tcga_tsv = sys.argv[1] | |
tcga = pd.read_table(tcga_tsv, sep='\t', index_col=0) | |
oldcolumns = tcga.columns.tolist() | |
newcolumns = ['-'.join(re.findall(r'TCGA[^_]*', oc)[0].split('-')[:4]) | |
for oc in oldcolumns] | |
tcga.columns = newcolumns | |
tcga.to_csv(tcga_tsv, sep='\t') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment