Skip to content

Instantly share code, notes, and snippets.

@mkweskin
Created April 16, 2020 18:32
Show Gist options
  • Save mkweskin/b0203c9d9ec0a1f748f4d2ec63e8e599 to your computer and use it in GitHub Desktop.
Save mkweskin/b0203c9d9ec0a1f748f4d2ec63e8e599 to your computer and use it in GitHub Desktop.
A general utility to do a batch find/replace. Takes a translation file with the find/replace pairs and a file to be translated.
#!/usr/bin/env python3
"""
Author: Matthew Kweskin, github: @mkweskin
A general utility to read in a delimited translation file with two columns
and rename any text file with these values.
"""
import argparse
from os import path
import sys
import re
import codecs
if (sys.version_info < (3, 0)):
raise Exception("This script requires python3. One way to install this is with miniconda3: https://docs.conda.io/en/latest/miniconda.html")
def unescaped_str(arg_str):
"""
Allows for tab characters in the arguments
"""
return codecs.decode(str(arg_str), 'unicode_escape')
def get_args():
parser = argparse.ArgumentParser(description="Text replacement utility that gets values to find/replace from a translation file (tab separated file as default, others delimiters can be specified.)", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("TRANSLATION_FILE", help = 'File file with the pairs of values to find/replace. Note: if there are >1 delimiter in a line, the text following the final delimiter character will be used as the replace value.')
parser.add_argument("INPUT", help = 'File to perform the find/replace on.')
parser.add_argument("--output", help = 'Output file. Use \'STDOUT\' to output to screen.', default = ('STDOUT'))
parser.add_argument("--overwrite", action="store_true", help = 'Automatically overwrite the output file and log file if they\'re already present')
parser.add_argument("--delimiter", help = 'Character(s) to separate the fields. Use \'\\t\' for tab (default).', type=unescaped_str, default = ('\t'))
parser.add_argument("--quiet", action="store_true", help = 'Disable warning messages')
parser.add_argument("--trim", action="store_true", help = 'Trim out extra white space before and after each find/replace pair')
parser.add_argument("--reverse", action="store_true", help = 'Reverse columns 1 and 2 in the translation file')
return parser.parse_args()
def file_checks(args):
"""
Some checks on the input/output/log files
"""
if not path.exists(args.INPUT):
raise Exception("Input file does not exist")
if not path.exists(args.TRANSLATION_FILE):
raise Exception("Translation file does not exist")
if args.output != 'STDOUT' and path.exists(args.output) and not args.overwrite:
answer = input("Output file exists. (use --overwrite to Automatically overwrite the output file)\nOverwrite [y/n]? ")
if answer.lower() != 'y':
print ("Exiting script.")
sys.exit()
outputdir = args.output != 'STDOUT' and path.dirname(args.output)
if not path.exists(outputdir) and outputdir != "":
raise Exception("Output directory does not exist")
def read_translation(translation_file, quiet, trim, delim, reverse):
"""
Read in a translation file as a dictionary
"""
dict = {}
with open(translation_file) as translation:
for line in translation:
line = line.rstrip('\r\n')
line_split = line.rsplit(delim, 1)
if len(line_split) != 2:
sys.stderr.write("ERROR: line is missing the delimiter character: \"" + str(line_split) + "\"\n")
sys.exit()
if reverse:
line_split = tuple(reversed(line_split))
if trim:
line_split = list(map(str.strip, line_split))
if line_split[0] in dict.keys() and not quiet:
sys.stderr.write ("Warning: the text to find \"" + str(line_split[0]) + "\" is already defined as \"" + str(dict[line_split[0]]) + "\", overwritting.\n")
if delim in line_split[0] and not quiet:
sys.stderr.write ("Warning: the line \"" + line + "\" contains an extra delimiter. All text to the left of the last delimiter will be used as the search string.\n")
(key, val) = line_split
dict[key] = val
return dict
def do_find_replace(infile, outfile, re_dict):
"""
This does the find/replace on the text file
"""
re_dict = dict((re.escape(k), v) for k, v in re_dict.items())
pattern = re.compile("|".join(re_dict.keys()))
for line in infile:
line = pattern.sub(lambda m: re_dict[re.escape(m.group(0))], line)
if outfile == None:
sys.stdout.write(line)
else:
outfile.write(line)
def main():
args = get_args()
file_checks(args)
re_dict = read_translation(args.TRANSLATION_FILE, args.quiet, args.trim, args.delimiter, args.reverse)
if args.output == "STDOUT":
with open(args.INPUT) as infile:
do_find_replace(infile, None, re_dict)
else:
with open(args.INPUT) as infile, open(args.output, 'w') as outfile:
do_find_replace(infile, outfile, re_dict)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment