Skip to content

Instantly share code, notes, and snippets.

@sveetch
Last active December 26, 2015 11:49
Show Gist options
  • Save sveetch/7146034 to your computer and use it in GitHub Desktop.
Save sveetch/7146034 to your computer and use it in GitHub Desktop.
Command line tool to extract text layer strings from a Photoshop XMP file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Simple strings extractor from a Photoshop XMP file
It extract all <photoshop:LayerText/> text from the document then output it in a file.
"""
import os
from xml.etree.ElementTree import ElementTree as ET
# Used namespaces within XMP documents
PHOTOSHOP_NAMESPACES = {
'photoshop': 'http://ns.adobe.com/photoshop/1.0/',
'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
}
class PhotoshopTextExtractorBase(object):
"""
Default parser to extract strings from XMP files
"""
description = "Default format, append each string line in a HTML paragraph"
output_item_template = "<!-- {name} -->\n{contents}"
output_contentline_template = "<p>{content}</p>\n"
def __init__(self, *args, **kwargs):
pass
def extract(self, paths, output):
output_fp = open(output, "w")
if isinstance(paths, basestring):
paths = [paths]
for filepath in paths:
contents = ""
name = self.get_file_name(filepath)
content_lines = self.xmp_parser(filepath)
#output_fp.write( self.output_name_template.format(name=name) )
for item in content_lines:
contents += self.output_contentline_template.format(content=item)
output_fp.write( self.output_item_template.format(name=name, contents=contents) )
output_fp.close()
def get_file_name(self, path):
return os.path.basename(path)
def xmp_parser(self, path):
"""
Extract all strings in ``<photoshop:LayerText>`` elements from given
path and return them encoded in UTF8
"""
lines = []
tree = ET()
tree.parse(path)
root = tree.getroot()
for item in root.findall('.//photoshop:LayerText', namespaces=PHOTOSHOP_NAMESPACES):
lines.append(item.text.encode('UTF-8'))
return lines
class PhotoshopTextExtractorOptimusI18n(PhotoshopTextExtractorBase):
"""
Parser to extract strings for Optimus format
"""
description = "Optimus format, append each string line in a HTML paragraph, paragraph content is surrounded within a 'trans' tag"
output_item_template = "<!-- {name} -->\n<div>\n{contents}\n</div>\n\n"
output_contentline_template = " <p>{{% trans %}}{content}{{% endtrans %}}</p>\n"
# Parser map
PARSERS = {
'default': PhotoshopTextExtractorBase,
'optimus': PhotoshopTextExtractorOptimusI18n,
}
if __name__ == "__main__":
import glob
from optparse import OptionParser
commandline_parser = OptionParser()
commandline_parser.add_option("-p", "--path", action='append', dest="filepaths", help=u"XMP filepath to parse", metavar="PATH")
commandline_parser.add_option("-o", "--output", action='store', dest="output_filepath", help=u"Filepath where extracted content will be writen", metavar="PATH")
commandline_parser.add_option('-g', '--glob', action='store_true', dest='glob_mode', default=False, help=u'Enable "Unix style pathname pattern expansion" for given filpath(s), note you will have to surround your pattern path with quotes, like \'*.xmp\'')
commandline_parser.add_option("-f", "--format", action='store', default='default', dest="parser", help=u"The optionnal format to use for the ouptput, use --list option to know about available formats", metavar="STRING")
commandline_parser.add_option('-l', '--list', action='store_true', dest='list_formats', default=False, help=u'List available formats then quit')
(commandline_options, commandline_args) = commandline_parser.parse_args()
if commandline_options.list_formats:
print "Available format are :"
print
for k,v in PARSERS.items():
print " * {0} : {1}".format(k, v.description)
print
elif commandline_options.filepaths:
if not commandline_options.output_filepath:
commandline_parser.error("You must supply an output file path with the --output option.")
extractor = PARSERS[commandline_options.parser]()
filepaths = commandline_options.filepaths[:]
if commandline_options.glob_mode:
filepaths = []
for p in commandline_options.filepaths:
filepaths += glob.glob(p)
#Do job
extractor.extract(filepaths, commandline_options.output_filepath)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment