Last active
July 28, 2016 15:22
-
-
Save xgrg/afec2be06a1ca21ae7a7fd73a40fd8c3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from glob import glob | |
import pandas as pd | |
import os.path as osp | |
import os | |
import textwrap | |
def process(directory, excel_file, filename): | |
files = [osp.abspath(e) for e in glob(osp.join(directory, '*', '*', filename))] | |
wholeset = [] | |
wholeset2 = [] | |
subjects = [] | |
for f in files: | |
print f | |
lines = [[e for e in each.rstrip('\n').split(' ') if e != ''] for each in open(f).readlines() if not each.startswith('#')] | |
subject = f[len(osp.abspath(directory)) + 1:].split('/')[0] | |
for line in lines: | |
l = [subject] | |
l.extend(line) | |
wholeset.append(l) | |
dash_lines = [[e for e in each.rstrip('\n').strip('# Measure ').split(', ') if e != ''] for each in open(f).readlines() if each.startswith('# Measure ')] | |
for line in dash_lines: | |
l = [subject] | |
l.append(line[0]) | |
l.append(line[1]) | |
l.append(line[-2]) | |
wholeset2.append(l) | |
headers = ['subject'] | |
test = [[e for e in each.rstrip('\n').strip('# ColHeaders').split(' ') if e != ''] for each in open(files[0]).readlines() if each.startswith('# ColHeaders')] | |
headers.extend(test[0]) | |
df = pd.DataFrame(wholeset, columns=headers) | |
df.to_excel(excel_file) | |
headers = ['subject','name', 'subfield', 'value'] | |
df = pd.DataFrame(wholeset2, columns=headers) | |
fp, ext = osp.splitext(excel_file) | |
df.to_excel(fp + '_global' + ext) | |
def process_all_files(directory, outputdir): | |
for files in ['aseg.stats', | |
'lh.aparc.DKTatlas40.stats', | |
'lh.BA.stats', | |
#'lh.curv.stats', | |
'lh.w-g.pct.stats', | |
'rh.aparc.DKTatlas40.stats', | |
'rh.BA.stats', | |
#'rh.curv.stats', | |
#'rh.w-g.pct.statslh.aparc.a2009s.stats', | |
'lh.aparc.stats', | |
'lh.BA.thresh.stats', | |
'lh.entorhinal_exvivo.stats', | |
'rh.aparc.a2009s.stats', | |
'rh.aparc.stats', | |
'rh.BA.thresh.stats', | |
'rh.entorhinal_exvivo.stats', | |
'wmparc.stats']: | |
print files | |
process(directory, osp.join(outputdir, files).replace('stats', 'xls'), files) | |
if __name__ == '__main__': | |
import argparse | |
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, | |
description=textwrap.dedent('''\ | |
Compile FreeSurfer outputs into one excel file. | |
If FILENAME is given as ALL, then processes all FreeSurfer outputs existing in stats/ directory. | |
In that specific case, EXCEL_FILE should be provided as a directory. | |
Otherwise, FILE_NAME should match one of the existing generated files found in stats/ (aseg.stats, lh.aparc.stats, ...) | |
examples: | |
python fs_collector.py -o /tmp/table.xls -d /home/data/study/freesurfer_subjects/ -n aseg.stats | |
python fs_collector.py -o /tmp/tabledir/ -d /home/data/study/freesurfer_subjects/ -n ALL | |
''')) | |
parser.add_argument("-o", dest='excel_file', type=str, required=True) | |
parser.add_argument("-d", dest='directory', type=str, required=True) | |
parser.add_argument("-n", dest='filename', type=str, required=True) | |
args = parser.parse_args() | |
if args.filename == 'ALL': | |
process_all_files(args.directory, args.excel_file) | |
else: | |
process(args.directory, args.excel_file, args.filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment