tacaswell/md2txt

## md2txt
#!/usr/bin/python

#Copyright 2009-2013 Thomas A Caswell
#tcaswell@uchicago.edu
#http://jfi.uchicago.edu/~tcaswell
#
#This program is free software; you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation; either version 3 of the License, or (at
#your option) any later version.
#
#This program is distributed in the hope that it will be useful, but
#WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
#General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with this program; if not, see <http://www.gnu.org/licenses>.


import xml.dom.minidom
import subprocess
import datetime
import os.path
import sys
import argparse
import re


# list of meta data, put which ever values you want into writeSet

VALID_KEYS = set(['Exposure'
                  'Binning'
                  'Region'
                  'Subtract'
                  'Shading'
                  'Sensor Mode'
                  'Digitizer'
                  'Gain'
                  'Camera Shutter'
                  'Clear Count'
                  'Clear Mode'
                  'Frames to Average'
                  'Trigger Mode'
                  'Temperature'
                  'MetaDataVersion'
                  'ApplicationName'
                  'ApplicationVersion'
                  'plane-type'
                  'pixel-size-x'
                  'pixel-size-y'
                  'bits-per-pixel'
                  'autoscale-state'
                  'autoscale-min-percent'
                  'autoscale-max-percent'
                  'scale-min'
                  'scale-max'
                  'spatial-calibration-state'
                  'spatial-calibration-x'
                  'spatial-calibration-y'
                  'spatial-calibration-units'
                  'image-name'
                  'threshold-state'
                  'threshold-low'
                  'threshold-high'
                  'threshold-color'
                  'zoom-percent'
                  'gamma'
                  'look-up-table-type'
                  'look-up-table-name'
                  'photonegative-mode'
                  'gray-calibration-curve-fit-algorithm'
                  'gray-calibration-values'
                  'gray-calibration-min'
                  'gray-calibration-max'
                  'gray-calibration-units'
                  'plane-guid'
                  'acquisition-time-local'
                  'modification-time-local'
                  'stage-position-x'
                  'stage-position-y'
                  'stage-label'
                  'z-position'
                  'wavelength'
                  'camera-binning-x'
                  'camera-binning-y'
                  'camera-chip-offset-x'
                  'camera-chip-offset-y'
                  '_IllumSetting_'
                  '_MagNA_'
                  '_MagRI_'
                  '_MagSetting_'
                  'number-of-planes'
                  'dtime'])


class md_out(object):
    def __init__(self, write_keys=None):
        if write_keys is None:
            write_keys = ['acquisition-time-local']
            #        write_keys = [k in write_keys if k in VALID_KEYS]
        self.write_keys = set(write_keys)


class txtFile(md_out):
    def __init__(self, f, write_keys=None):
        """
        Parameters
        ----------
        f: file-object like
            the file object to dump the results into, needs to implement `write`

        write_keys: iterable or `None`
            The MD keys to extract
        """
        md_out.__init__(self, write_keys) # call super
        self.f = f

    def add_entry(self, key, val):
        if key in self.write_keys:
            self.f.write(key + ": " + str(val) + "\n")

    def start_frame(self, n):
        self.f.write("Frame " + str(n) + "\n")

    def end_frame(self):
        self.f.write("\n")


class dict_vecs(md_out):
    def __init__(self, write_keys=None):
        md_out.__init__(self, write_keys) # call super
        self.d = {}
        for k in self.write_keys:
            self.d[k] = []

    def add_entry(self, key, val):
        if key in self.write_keys:
            self.d[key].append(val)

    def start_frame(self, n):
        pass

    def end_frame(self):
        pass

def _write(file, key, val):
    if key == "acquisition-time-local" or key == "modification-time-local":
        tmp = int(val[18:])
        val = val[:18] + "%(#)03d" % {"#": tmp}
    file.add_entry(key, val)


def _start_group(f, n):
    f.start_frame(n)


def _end_group(f):
    f.end_frame()


def _parse_attr(file_obj, dom_obj):
    if dom_obj.getAttribute("id") == "Description":
        _parse_des(file_obj, dom_obj)
    elif dom_obj.getAttribute("type") == "int":
        _write(file_obj, dom_obj.getAttribute("id"), int(dom_obj.getAttribute("value")))
    elif dom_obj.getAttribute("type") == "float":
        _write(file_obj, dom_obj.getAttribute("id"), float(dom_obj.getAttribute("value")))
    else:
        _write(file_obj, dom_obj.getAttribute("id"), dom_obj.getAttribute("value").encode('ascii'))


def _parse_des(file_obj, des_obj):
    des_string = des_obj.getAttribute("value")
    des_split = des_string.split("&#13;&#10;")

    for x in des_split:
        tmp_split = x.split(":")
        if len(tmp_split) == 2:
            _write(file_obj, tmp_split[0], tmp_split[1].encode('ascii'))


def parse_file(fin, f, frame_offset=0):

    # make sure the files exist
    if not (os.path.exists(fin)):
        print "file does not exist"
        exit()

    #changed to deal with 2.5v2.6
    #g = f.create_group("frame{0:06d}".format(0))
    _start_group(f, 0 + frame_offset)

    a = subprocess.Popen(["tiffinfo", "-0", fin], stdout=subprocess.PIPE)
    tiff_string = (a.stdout).readlines()
    tiff_string = "".join(tiff_string)

    xml_str = tiff_string[(tiff_string.find("<MetaData>")):(10 + tiff_string.rfind("MetaData>"))]
    dom = xml.dom.minidom.parseString(xml_str)

    props = dom.getElementsByTagName("prop")

    for p in props:
        if p.parentNode.nodeName == "PlaneInfo":
            _parse_attr(f, p)
            if p.getAttribute("id") == "acquisition-time-local":
                tmp = p.getAttribute("value")
                initial_t = datetime.datetime.strptime(tmp[:17], "%Y%m%d %H:%M:%S")
                initial_t.replace(microsecond=int(tmp[18:]) * 1000)

        elif p.parentNode.nodeName == "MetaData":
            _parse_attr(f, p)
        elif p.parentNode.nodeName == "SetInfo":
            _parse_attr(f, p)
            if p.getAttribute("id") == "number-of-planes":
                frame_count = int(p.getAttribute("value"))

    _write(f, "dtime", 0.0)

    _end_group(f)

    for frame in range(1, frame_count):
        _start_group(f, frame + frame_offset)
        a = subprocess.Popen(["tiffinfo", "-" + str(frame), fin], stdout=subprocess.PIPE)
        tiff_string = (a.stdout).readlines()
        tiff_string = "".join(tiff_string)
        xml_str = tiff_string[(tiff_string.find("<MetaData>")):(10 + tiff_string.rfind("MetaData>"))]
        dom = xml.dom.minidom.parseString(xml_str)
        props = dom.getElementsByTagName("prop")

        for p in props:
            if p.parentNode.nodeName == "PlaneInfo":
                _parse_attr(f, p)
                if p.getAttribute("id") == "acquisition-time-local":
                    tmp = p.getAttribute("value")
                    current_t = datetime.datetime.strptime(tmp[:17], "%Y%m%d %H:%M:%S")
                    current_t = current_t.replace(microsecond=int(tmp[18:]) * 1000)
        dt = current_t - initial_t
        _write(f, "dtime", dt.seconds + dt.microseconds / (pow(10., 6)))
        initial_t = current_t
        _end_group(f)


    return f

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--fname",help="The file name of the file to strip MD from")
    parser.add_argument("--fout",help="Name of file to write results to")
    parser.add_argument("--path_file",help="File which contains a list of paths to process, generates a series of text_files")
    args = parser.parse_args()
    if args.fname is not None:
        # process a single file
        if args.fout is None:
            parse_file(args.fname, txtFile(sys.stdout))
        else:
            with open(args.fout,'w') as f:
                parse_file(args.fname, txtFile(f))
    elif args.path_file is not None:
        with open(args.path_file, 'r') as path_file:
            for folder in path_file:
                folder = os.path.abspath(os.path.expanduser(folder.strip()))
                if not os.path.isdir(folder):
                    print 'invalid dir: {folder} \n moving on'.format(folder=folder)
                    continue
                with open(folder.split('/')[-1] + '.txt', 'w') as fout:
                    out_obj = txtFile(fout)
                    files = sorted(os.listdir(folder))
                    for img_f in files:
                        if 'tif' not in img_f.lower():
                            continue
                        m = re.match('.+?([0-9]*)\.tif', img_f)
                        frame_offset = int(m.groups()[0])
                        frame_fname = u'/'.join([folder, img_f])
                        parse_file(frame_fname, out_obj, frame_offset=frame_offset)


if __name__ == "__main__":
    main()
	#!/usr/bin/python

	#Copyright 2009-2013 Thomas A Caswell
	#tcaswell@uchicago.edu
	#http://jfi.uchicago.edu/~tcaswell
	#
	#This program is free software; you can redistribute it and/or modify
	#it under the terms of the GNU General Public License as published by
	#the Free Software Foundation; either version 3 of the License, or (at
	#your option) any later version.
	#
	#This program is distributed in the hope that it will be useful, but
	#WITHOUT ANY WARRANTY; without even the implied warranty of
	#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	#General Public License for more details.
	#
	#You should have received a copy of the GNU General Public License
	#along with this program; if not, see <http://www.gnu.org/licenses>.


	import xml.dom.minidom
	import subprocess
	import datetime
	import os.path
	import sys
	import argparse
	import re



	# list of meta data, put which ever values you want into writeSet

	VALID_KEYS = set(['Exposure'
	'Binning'
	'Region'
	'Subtract'
	'Shading'
	'Sensor Mode'
	'Digitizer'
	'Gain'
	'Camera Shutter'
	'Clear Count'
	'Clear Mode'
	'Frames to Average'
	'Trigger Mode'
	'Temperature'
	'MetaDataVersion'
	'ApplicationName'
	'ApplicationVersion'
	'plane-type'
	'pixel-size-x'
	'pixel-size-y'
	'bits-per-pixel'
	'autoscale-state'
	'autoscale-min-percent'
	'autoscale-max-percent'
	'scale-min'
	'scale-max'
	'spatial-calibration-state'
	'spatial-calibration-x'
	'spatial-calibration-y'
	'spatial-calibration-units'
	'image-name'
	'threshold-state'
	'threshold-low'
	'threshold-high'
	'threshold-color'
	'zoom-percent'
	'gamma'
	'look-up-table-type'
	'look-up-table-name'
	'photonegative-mode'
	'gray-calibration-curve-fit-algorithm'
	'gray-calibration-values'
	'gray-calibration-min'
	'gray-calibration-max'
	'gray-calibration-units'
	'plane-guid'
	'acquisition-time-local'
	'modification-time-local'
	'stage-position-x'
	'stage-position-y'
	'stage-label'
	'z-position'
	'wavelength'
	'camera-binning-x'
	'camera-binning-y'
	'camera-chip-offset-x'
	'camera-chip-offset-y'
	'_IllumSetting_'
	'_MagNA_'
	'_MagRI_'
	'_MagSetting_'
	'number-of-planes'
	'dtime'])


	class md_out(object):
	def __init__(self, write_keys=None):
	if write_keys is None:
	write_keys = ['acquisition-time-local']
	# write_keys = [k in write_keys if k in VALID_KEYS]
	self.write_keys = set(write_keys)


	class txtFile(md_out):
	def __init__(self, f, write_keys=None):
	"""
	Parameters
	----------
	f: file-object like
	the file object to dump the results into, needs to implement `write`

	write_keys: iterable or `None`
	The MD keys to extract
	"""
	md_out.__init__(self, write_keys) # call super
	self.f = f

	def add_entry(self, key, val):
	if key in self.write_keys:
	self.f.write(key + ": " + str(val) + "\n")

	def start_frame(self, n):
	self.f.write("Frame " + str(n) + "\n")

	def end_frame(self):
	self.f.write("\n")


	class dict_vecs(md_out):
	def __init__(self, write_keys=None):
	md_out.__init__(self, write_keys) # call super
	self.d = {}
	for k in self.write_keys:
	self.d[k] = []

	def add_entry(self, key, val):
	if key in self.write_keys:
	self.d[key].append(val)

	def start_frame(self, n):
	pass

	def end_frame(self):
	pass

	def _write(file, key, val):
	if key == "acquisition-time-local" or key == "modification-time-local":
	tmp = int(val[18:])
	val = val[:18] + "%(#)03d" % {"#": tmp}
	file.add_entry(key, val)


	def _start_group(f, n):
	f.start_frame(n)


	def _end_group(f):
	f.end_frame()


	def _parse_attr(file_obj, dom_obj):
	if dom_obj.getAttribute("id") == "Description":
	_parse_des(file_obj, dom_obj)
	elif dom_obj.getAttribute("type") == "int":
	_write(file_obj, dom_obj.getAttribute("id"), int(dom_obj.getAttribute("value")))
	elif dom_obj.getAttribute("type") == "float":
	_write(file_obj, dom_obj.getAttribute("id"), float(dom_obj.getAttribute("value")))
	else:
	_write(file_obj, dom_obj.getAttribute("id"), dom_obj.getAttribute("value").encode('ascii'))


	def _parse_des(file_obj, des_obj):
	des_string = des_obj.getAttribute("value")
	des_split = des_string.split(" ")

	for x in des_split:
	tmp_split = x.split(":")
	if len(tmp_split) == 2:
	_write(file_obj, tmp_split[0], tmp_split[1].encode('ascii'))


	def parse_file(fin, f, frame_offset=0):

	# make sure the files exist
	if not (os.path.exists(fin)):
	print "file does not exist"
	exit()

	#changed to deal with 2.5v2.6
	#g = f.create_group("frame{0:06d}".format(0))
	_start_group(f, 0 + frame_offset)

	a = subprocess.Popen(["tiffinfo", "-0", fin], stdout=subprocess.PIPE)
	tiff_string = (a.stdout).readlines()
	tiff_string = "".join(tiff_string)

	xml_str = tiff_string[(tiff_string.find("<MetaData>")):(10 + tiff_string.rfind("MetaData>"))]
	dom = xml.dom.minidom.parseString(xml_str)

	props = dom.getElementsByTagName("prop")

	for p in props:
	if p.parentNode.nodeName == "PlaneInfo":
	_parse_attr(f, p)
	if p.getAttribute("id") == "acquisition-time-local":
	tmp = p.getAttribute("value")
	initial_t = datetime.datetime.strptime(tmp[:17], "%Y%m%d %H:%M:%S")
	initial_t.replace(microsecond=int(tmp[18:]) * 1000)

	elif p.parentNode.nodeName == "MetaData":
	_parse_attr(f, p)
	elif p.parentNode.nodeName == "SetInfo":
	_parse_attr(f, p)
	if p.getAttribute("id") == "number-of-planes":
	frame_count = int(p.getAttribute("value"))

	_write(f, "dtime", 0.0)

	_end_group(f)

	for frame in range(1, frame_count):
	_start_group(f, frame + frame_offset)
	a = subprocess.Popen(["tiffinfo", "-" + str(frame), fin], stdout=subprocess.PIPE)
	tiff_string = (a.stdout).readlines()
	tiff_string = "".join(tiff_string)
	xml_str = tiff_string[(tiff_string.find("<MetaData>")):(10 + tiff_string.rfind("MetaData>"))]
	dom = xml.dom.minidom.parseString(xml_str)
	props = dom.getElementsByTagName("prop")

	for p in props:
	if p.parentNode.nodeName == "PlaneInfo":
	_parse_attr(f, p)
	if p.getAttribute("id") == "acquisition-time-local":
	tmp = p.getAttribute("value")
	current_t = datetime.datetime.strptime(tmp[:17], "%Y%m%d %H:%M:%S")
	current_t = current_t.replace(microsecond=int(tmp[18:]) * 1000)
	dt = current_t - initial_t
	_write(f, "dtime", dt.seconds + dt.microseconds / (pow(10., 6)))
	initial_t = current_t
	_end_group(f)


	return f

	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("--fname",help="The file name of the file to strip MD from")
	parser.add_argument("--fout",help="Name of file to write results to")
	parser.add_argument("--path_file",help="File which contains a list of paths to process, generates a series of text_files")
	args = parser.parse_args()
	if args.fname is not None:
	# process a single file
	if args.fout is None:
	parse_file(args.fname, txtFile(sys.stdout))
	else:
	with open(args.fout,'w') as f:
	parse_file(args.fname, txtFile(f))
	elif args.path_file is not None:
	with open(args.path_file, 'r') as path_file:
	for folder in path_file:
	folder = os.path.abspath(os.path.expanduser(folder.strip()))
	if not os.path.isdir(folder):
	print 'invalid dir: {folder} \n moving on'.format(folder=folder)
	continue
	with open(folder.split('/')[-1] + '.txt', 'w') as fout:
	out_obj = txtFile(fout)
	files = sorted(os.listdir(folder))
	for img_f in files:
	if 'tif' not in img_f.lower():
	continue
	m = re.match('.+?([0-9]*)\.tif', img_f)
	frame_offset = int(m.groups()[0])
	frame_fname = u'/'.join([folder, img_f])
	parse_file(frame_fname, out_obj, frame_offset=frame_offset)



	if __name__ == "__main__":
	main()