Skip to content

Instantly share code, notes, and snippets.

@edvardm
Last active July 14, 2018 09:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save edvardm/92f7fa5f433fa382ab547916a8083444 to your computer and use it in GitHub Desktop.
Save edvardm/92f7fa5f433fa382ab547916a8083444 to your computer and use it in GitHub Desktop.
Just a simple tool to remap long regular patterns with shorter ones, useful for comparing two sets of data for equality
import re
import sys
import string
def _get_fh(has_fname):
if has_fname:
return open(sys.argv[1])
else:
return sys.stdin
def _get_pattern(has_fname):
pattern = sys.argv[2] if has_fname else sys.argv[1]
return pattern
def run(input_handle, pattern, seq):
substitutions = dict()
for line in input_handle.readlines():
match = re.search(pattern, line)
if match: # TODO: could be several patterns on same line
match_string = match[0]
if match_string in substitutions:
repl = substitutions[match_string]
else:
substitutions[match_string] = next(seq)
repl = substitutions[match_string]
line = re.sub(pattern, repl, line)
print(line.rstrip('\r\n'))
if len(sys.argv) < 2:
print("Usage: pattern_to_seq [file] pattern")
sys.exit(1)
def make_gen(max_numval):
max_numgen_val = 2**32
num_gen = iter(range(0, max_numgen_val))
alpha_prefix_gen = iter(string.ascii_uppercase)
pre_prefix = next(alpha_prefix_gen)
while True:
i = next(num_gen)
if i and i % max_numval == 0 and pre_prefix != 'Z':
pre_prefix = next(alpha_prefix_gen)
num_gen = iter(range(0, max_numgen_val))
i = next(num_gen)
yield '{}{}'.format(pre_prefix, i)
if __name__ == '__main__':
fname_present = len(sys.argv) == 3
run(_get_fh(fname_present), _get_pattern(fname_present), make_gen(10000))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment