Last active
December 26, 2015 11:49
-
-
Save sveetch/7146034 to your computer and use it in GitHub Desktop.
Command line tool to extract text layer strings from a Photoshop XMP file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
Simple strings extractor from a Photoshop XMP file | |
It extract all <photoshop:LayerText/> text from the document then output it in a file. | |
""" | |
import os | |
from xml.etree.ElementTree import ElementTree as ET | |
# Used namespaces within XMP documents | |
PHOTOSHOP_NAMESPACES = { | |
'photoshop': 'http://ns.adobe.com/photoshop/1.0/', | |
'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', | |
} | |
class PhotoshopTextExtractorBase(object): | |
""" | |
Default parser to extract strings from XMP files | |
""" | |
description = "Default format, append each string line in a HTML paragraph" | |
output_item_template = "<!-- {name} -->\n{contents}" | |
output_contentline_template = "<p>{content}</p>\n" | |
def __init__(self, *args, **kwargs): | |
pass | |
def extract(self, paths, output): | |
output_fp = open(output, "w") | |
if isinstance(paths, basestring): | |
paths = [paths] | |
for filepath in paths: | |
contents = "" | |
name = self.get_file_name(filepath) | |
content_lines = self.xmp_parser(filepath) | |
#output_fp.write( self.output_name_template.format(name=name) ) | |
for item in content_lines: | |
contents += self.output_contentline_template.format(content=item) | |
output_fp.write( self.output_item_template.format(name=name, contents=contents) ) | |
output_fp.close() | |
def get_file_name(self, path): | |
return os.path.basename(path) | |
def xmp_parser(self, path): | |
""" | |
Extract all strings in ``<photoshop:LayerText>`` elements from given | |
path and return them encoded in UTF8 | |
""" | |
lines = [] | |
tree = ET() | |
tree.parse(path) | |
root = tree.getroot() | |
for item in root.findall('.//photoshop:LayerText', namespaces=PHOTOSHOP_NAMESPACES): | |
lines.append(item.text.encode('UTF-8')) | |
return lines | |
class PhotoshopTextExtractorOptimusI18n(PhotoshopTextExtractorBase): | |
""" | |
Parser to extract strings for Optimus format | |
""" | |
description = "Optimus format, append each string line in a HTML paragraph, paragraph content is surrounded within a 'trans' tag" | |
output_item_template = "<!-- {name} -->\n<div>\n{contents}\n</div>\n\n" | |
output_contentline_template = " <p>{{% trans %}}{content}{{% endtrans %}}</p>\n" | |
# Parser map | |
PARSERS = { | |
'default': PhotoshopTextExtractorBase, | |
'optimus': PhotoshopTextExtractorOptimusI18n, | |
} | |
if __name__ == "__main__": | |
import glob | |
from optparse import OptionParser | |
commandline_parser = OptionParser() | |
commandline_parser.add_option("-p", "--path", action='append', dest="filepaths", help=u"XMP filepath to parse", metavar="PATH") | |
commandline_parser.add_option("-o", "--output", action='store', dest="output_filepath", help=u"Filepath where extracted content will be writen", metavar="PATH") | |
commandline_parser.add_option('-g', '--glob', action='store_true', dest='glob_mode', default=False, help=u'Enable "Unix style pathname pattern expansion" for given filpath(s), note you will have to surround your pattern path with quotes, like \'*.xmp\'') | |
commandline_parser.add_option("-f", "--format", action='store', default='default', dest="parser", help=u"The optionnal format to use for the ouptput, use --list option to know about available formats", metavar="STRING") | |
commandline_parser.add_option('-l', '--list', action='store_true', dest='list_formats', default=False, help=u'List available formats then quit') | |
(commandline_options, commandline_args) = commandline_parser.parse_args() | |
if commandline_options.list_formats: | |
print "Available format are :" | |
for k,v in PARSERS.items(): | |
print " * {0} : {1}".format(k, v.description) | |
elif commandline_options.filepaths: | |
if not commandline_options.output_filepath: | |
commandline_parser.error("You must supply an output file path with the --output option.") | |
extractor = PARSERS[commandline_options.parser]() | |
filepaths = commandline_options.filepaths[:] | |
if commandline_options.glob_mode: | |
filepaths = [] | |
for p in commandline_options.filepaths: | |
filepaths += glob.glob(p) | |
#Do job | |
extractor.extract(filepaths, commandline_options.output_filepath) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment