xgrg/fs_collector.py

## fs_collector.py
from glob import glob
import pandas as pd
import os.path as osp
import os
import textwrap

def process(directory, excel_file, filename):
    files = [osp.abspath(e) for e in glob(osp.join(directory, '*', '*', filename))]
    wholeset = []
    wholeset2 = []
    subjects = []
    for f in files:
	print f
	lines = [[e for e in each.rstrip('\n').split(' ') if e != ''] for each in open(f).readlines() if not each.startswith('#')]
	subject = f[len(osp.abspath(directory)) + 1:].split('/')[0]
	for line in lines:
	    l = [subject]
	    l.extend(line)
	    wholeset.append(l)

        dash_lines = [[e for e in each.rstrip('\n').strip('# Measure ').split(', ') if e != ''] for each in open(f).readlines() if each.startswith('# Measure ')]
        for line in dash_lines:
            l = [subject]
            l.append(line[0])
            l.append(line[1])
            l.append(line[-2])
            wholeset2.append(l)


    headers = ['subject']
    test = [[e for e in each.rstrip('\n').strip('# ColHeaders').split(' ') if e != ''] for each in open(files[0]).readlines() if each.startswith('# ColHeaders')]
    headers.extend(test[0])
    df = pd.DataFrame(wholeset, columns=headers)
    df.to_excel(excel_file)

    headers = ['subject','name', 'subfield', 'value']
    df = pd.DataFrame(wholeset2, columns=headers)
    fp, ext = osp.splitext(excel_file)
    df.to_excel(fp + '_global' + ext)


def process_all_files(directory, outputdir):
    for files in ['aseg.stats',
	     'lh.aparc.DKTatlas40.stats',
	     'lh.BA.stats',
	     #'lh.curv.stats',
	     'lh.w-g.pct.stats',
	     'rh.aparc.DKTatlas40.stats',
	     'rh.BA.stats',
	     #'rh.curv.stats',
	     #'rh.w-g.pct.statslh.aparc.a2009s.stats',
	     'lh.aparc.stats',
	     'lh.BA.thresh.stats',
	     'lh.entorhinal_exvivo.stats',
	     'rh.aparc.a2009s.stats',
	     'rh.aparc.stats',
	     'rh.BA.thresh.stats',
	     'rh.entorhinal_exvivo.stats',
	     'wmparc.stats']:
        print files
	process(directory, osp.join(outputdir, files).replace('stats', 'xls'), files)


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
            description=textwrap.dedent('''\
            Compile FreeSurfer outputs into one excel file.

            If FILENAME is given as ALL, then processes all FreeSurfer outputs existing in stats/ directory.
            In that specific case, EXCEL_FILE should be provided as a directory.
            Otherwise, FILE_NAME should match one of the existing generated files found in stats/ (aseg.stats, lh.aparc.stats, ...)

            examples:
              python fs_collector.py -o /tmp/table.xls -d /home/data/study/freesurfer_subjects/ -n aseg.stats
              python fs_collector.py -o /tmp/tabledir/ -d /home/data/study/freesurfer_subjects/ -n ALL
            '''))
    parser.add_argument("-o", dest='excel_file', type=str, required=True)
    parser.add_argument("-d", dest='directory', type=str, required=True)
    parser.add_argument("-n", dest='filename', type=str, required=True)

    args = parser.parse_args()
    if args.filename == 'ALL':
	process_all_files(args.directory, args.excel_file)
    else:
	process(args.directory, args.excel_file, args.filename)
	from glob import glob
	import pandas as pd
	import os.path as osp
	import os
	import textwrap

	def process(directory, excel_file, filename):
	files = [osp.abspath(e) for e in glob(osp.join(directory, '', '', filename))]
	wholeset = []
	wholeset2 = []
	subjects = []
	for f in files:
	print f
	lines = [[e for e in each.rstrip('\n').split(' ') if e != ''] for each in open(f).readlines() if not each.startswith('#')]
	subject = f[len(osp.abspath(directory)) + 1:].split('/')[0]
	for line in lines:
	l = [subject]
	l.extend(line)
	wholeset.append(l)

	dash_lines = [[e for e in each.rstrip('\n').strip('# Measure ').split(', ') if e != ''] for each in open(f).readlines() if each.startswith('# Measure ')]
	for line in dash_lines:
	l = [subject]
	l.append(line[0])
	l.append(line[1])
	l.append(line[-2])
	wholeset2.append(l)


	headers = ['subject']
	test = [[e for e in each.rstrip('\n').strip('# ColHeaders').split(' ') if e != ''] for each in open(files[0]).readlines() if each.startswith('# ColHeaders')]
	headers.extend(test[0])
	df = pd.DataFrame(wholeset, columns=headers)
	df.to_excel(excel_file)

	headers = ['subject','name', 'subfield', 'value']
	df = pd.DataFrame(wholeset2, columns=headers)
	fp, ext = osp.splitext(excel_file)
	df.to_excel(fp + '_global' + ext)


	def process_all_files(directory, outputdir):
	for files in ['aseg.stats',
	'lh.aparc.DKTatlas40.stats',
	'lh.BA.stats',
	#'lh.curv.stats',
	'lh.w-g.pct.stats',
	'rh.aparc.DKTatlas40.stats',
	'rh.BA.stats',
	#'rh.curv.stats',
	#'rh.w-g.pct.statslh.aparc.a2009s.stats',
	'lh.aparc.stats',
	'lh.BA.thresh.stats',
	'lh.entorhinal_exvivo.stats',
	'rh.aparc.a2009s.stats',
	'rh.aparc.stats',
	'rh.BA.thresh.stats',
	'rh.entorhinal_exvivo.stats',
	'wmparc.stats']:
	print files
	process(directory, osp.join(outputdir, files).replace('stats', 'xls'), files)


	if __name__ == '__main__':
	import argparse
	parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
	description=textwrap.dedent('''\
	Compile FreeSurfer outputs into one excel file.

	If FILENAME is given as ALL, then processes all FreeSurfer outputs existing in stats/ directory.
	In that specific case, EXCEL_FILE should be provided as a directory.
	Otherwise, FILE_NAME should match one of the existing generated files found in stats/ (aseg.stats, lh.aparc.stats, ...)

	examples:
	python fs_collector.py -o /tmp/table.xls -d /home/data/study/freesurfer_subjects/ -n aseg.stats
	python fs_collector.py -o /tmp/tabledir/ -d /home/data/study/freesurfer_subjects/ -n ALL
	'''))
	parser.add_argument("-o", dest='excel_file', type=str, required=True)
	parser.add_argument("-d", dest='directory', type=str, required=True)
	parser.add_argument("-n", dest='filename', type=str, required=True)

	args = parser.parse_args()
	if args.filename == 'ALL':
	process_all_files(args.directory, args.excel_file)
	else:
	process(args.directory, args.excel_file, args.filename)