sveetch/xmp_parser.py

## xmp_parser.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Simple strings extractor from a Photoshop XMP file

It extract all <photoshop:LayerText/> text from the document then output it in a file.
"""
import os
from xml.etree.ElementTree import ElementTree as ET

# Used namespaces within XMP documents
PHOTOSHOP_NAMESPACES = {
    'photoshop': 'http://ns.adobe.com/photoshop/1.0/',
    'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
}

class PhotoshopTextExtractorBase(object):
    """
    Default parser to extract strings from XMP files
    """
    description = "Default format, append each string line in a HTML paragraph"
    output_item_template = "<!-- {name} -->\n{contents}"
    output_contentline_template = "<p>{content}</p>\n"

    def __init__(self, *args, **kwargs):
        pass

    def extract(self, paths, output):
        output_fp = open(output, "w")

        if isinstance(paths, basestring):
            paths = [paths]

        for filepath in paths:
            contents = ""
            name = self.get_file_name(filepath)
            content_lines = self.xmp_parser(filepath)

            #output_fp.write( self.output_name_template.format(name=name) )
            for item in content_lines:
                contents += self.output_contentline_template.format(content=item)

            output_fp.write( self.output_item_template.format(name=name, contents=contents) )

        output_fp.close()

    def get_file_name(self, path):
        return os.path.basename(path)

    def xmp_parser(self, path):
        """
        Extract all strings in ``<photoshop:LayerText>`` elements from given
        path and return them encoded in UTF8
        """
        lines = []

        tree = ET()
        tree.parse(path)
        root = tree.getroot()
        for item in root.findall('.//photoshop:LayerText', namespaces=PHOTOSHOP_NAMESPACES):
            lines.append(item.text.encode('UTF-8'))

        return lines

class PhotoshopTextExtractorOptimusI18n(PhotoshopTextExtractorBase):
    """
    Parser to extract strings for Optimus format
    """
    description = "Optimus format, append each string line in a HTML paragraph, paragraph content is surrounded within a 'trans' tag"
    output_item_template = "<!-- {name} -->\n<div>\n{contents}\n</div>\n\n"
    output_contentline_template = "    <p>{{% trans %}}{content}{{% endtrans %}}</p>\n"

# Parser map
PARSERS = {
    'default': PhotoshopTextExtractorBase,
    'optimus': PhotoshopTextExtractorOptimusI18n,
}

if __name__ == "__main__":
    import glob
    from optparse import OptionParser

    commandline_parser = OptionParser()
    commandline_parser.add_option("-p", "--path", action='append', dest="filepaths", help=u"XMP filepath to parse", metavar="PATH")
    commandline_parser.add_option("-o", "--output", action='store', dest="output_filepath", help=u"Filepath where extracted content will be writen", metavar="PATH")
    commandline_parser.add_option('-g', '--glob', action='store_true', dest='glob_mode', default=False, help=u'Enable "Unix style pathname pattern expansion" for given filpath(s), note you will have to surround your pattern path with quotes, like \'*.xmp\'')
    commandline_parser.add_option("-f", "--format", action='store', default='default', dest="parser", help=u"The optionnal format to use for the ouptput, use --list option to know about available formats", metavar="STRING")
    commandline_parser.add_option('-l', '--list', action='store_true', dest='list_formats', default=False, help=u'List available formats then quit')

    (commandline_options, commandline_args) = commandline_parser.parse_args()

    if commandline_options.list_formats:
        print "Available format are :"
        print
        for k,v in PARSERS.items():
            print "  * {0} : {1}".format(k, v.description)
        print
    elif commandline_options.filepaths:
        if not commandline_options.output_filepath:
            commandline_parser.error("You must supply an output file path with the --output option.")
        extractor = PARSERS[commandline_options.parser]()
        filepaths = commandline_options.filepaths[:]
        if commandline_options.glob_mode:
            filepaths = []
            for p in commandline_options.filepaths:
                filepaths += glob.glob(p)
        #Do job
        extractor.extract(filepaths, commandline_options.output_filepath)
	#!/usr/bin/env python
	# -- coding: utf-8 --
	"""
	Simple strings extractor from a Photoshop XMP file

	It extract all <photoshop:LayerText/> text from the document then output it in a file.
	"""
	import os
	from xml.etree.ElementTree import ElementTree as ET

	# Used namespaces within XMP documents
	PHOTOSHOP_NAMESPACES = {
	'photoshop': 'http://ns.adobe.com/photoshop/1.0/',
	'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
	}

	class PhotoshopTextExtractorBase(object):
	"""
	Default parser to extract strings from XMP files
	"""
	description = "Default format, append each string line in a HTML paragraph"
	output_item_template = "<!-- {name} -->\n{contents}"
	output_contentline_template = "<p>{content}</p>\n"

	def __init__(self, args, *kwargs):
	pass

	def extract(self, paths, output):
	output_fp = open(output, "w")

	if isinstance(paths, basestring):
	paths = [paths]

	for filepath in paths:
	contents = ""
	name = self.get_file_name(filepath)
	content_lines = self.xmp_parser(filepath)

	#output_fp.write( self.output_name_template.format(name=name) )
	for item in content_lines:
	contents += self.output_contentline_template.format(content=item)

	output_fp.write( self.output_item_template.format(name=name, contents=contents) )

	output_fp.close()

	def get_file_name(self, path):
	return os.path.basename(path)

	def xmp_parser(self, path):
	"""
	Extract all strings in ``<photoshop:LayerText>`` elements from given
	path and return them encoded in UTF8
	"""
	lines = []

	tree = ET()
	tree.parse(path)
	root = tree.getroot()
	for item in root.findall('.//photoshop:LayerText', namespaces=PHOTOSHOP_NAMESPACES):
	lines.append(item.text.encode('UTF-8'))

	return lines

	class PhotoshopTextExtractorOptimusI18n(PhotoshopTextExtractorBase):
	"""
	Parser to extract strings for Optimus format
	"""
	description = "Optimus format, append each string line in a HTML paragraph, paragraph content is surrounded within a 'trans' tag"
	output_item_template = "<!-- {name} -->\n<div>\n{contents}\n</div>\n\n"
	output_contentline_template = " <p>{{% trans %}}{content}{{% endtrans %}}</p>\n"

	# Parser map
	PARSERS = {
	'default': PhotoshopTextExtractorBase,
	'optimus': PhotoshopTextExtractorOptimusI18n,
	}

	if __name__ == "__main__":
	import glob
	from optparse import OptionParser

	commandline_parser = OptionParser()
	commandline_parser.add_option("-p", "--path", action='append', dest="filepaths", help=u"XMP filepath to parse", metavar="PATH")
	commandline_parser.add_option("-o", "--output", action='store', dest="output_filepath", help=u"Filepath where extracted content will be writen", metavar="PATH")
	commandline_parser.add_option('-g', '--glob', action='store_true', dest='glob_mode', default=False, help=u'Enable "Unix style pathname pattern expansion" for given filpath(s), note you will have to surround your pattern path with quotes, like \'*.xmp\'')
	commandline_parser.add_option("-f", "--format", action='store', default='default', dest="parser", help=u"The optionnal format to use for the ouptput, use --list option to know about available formats", metavar="STRING")
	commandline_parser.add_option('-l', '--list', action='store_true', dest='list_formats', default=False, help=u'List available formats then quit')

	(commandline_options, commandline_args) = commandline_parser.parse_args()

	if commandline_options.list_formats:
	print "Available format are :"
	print
	for k,v in PARSERS.items():
	print " * {0} : {1}".format(k, v.description)
	print
	elif commandline_options.filepaths:
	if not commandline_options.output_filepath:
	commandline_parser.error("You must supply an output file path with the --output option.")
	extractor = PARSERS[commandline_options.parser]()
	filepaths = commandline_options.filepaths[:]
	if commandline_options.glob_mode:
	filepaths = []
	for p in commandline_options.filepaths:
	filepaths += glob.glob(p)
	#Do job
	extractor.extract(filepaths, commandline_options.output_filepath)