Skip to content

Instantly share code, notes, and snippets.

@omc8db
Last active July 7, 2023 07:00
Show Gist options
  • Save omc8db/47a6759b6ed06671e1e77c0c7e6ed84f to your computer and use it in GitHub Desktop.
Save omc8db/47a6759b6ed06671e1e77c0c7e6ed84f to your computer and use it in GitHub Desktop.
@spacenerd — Today at 1:59 PM "Could you make an awk script to print out the lyrics from WAP by only grouping 3 or more letters together at a time when parsing through the federal budget proposal?"
#!/usr/bin/env python3
import sys, re
MIN_MATCH=3
MAX_MATCH=9
# The lyrics of "Wet Ass Pussy" by Cardi B
target=open(sys.argv[1]).read()
# United States House Resolution 2617, Omnibus spending bill for FY2023
reference=open(sys.argv[2]).read().lower()
def sanitize(s): return re.sub('[\W_]+', '', s.lower())
# Sanitize the input by converting to lowercase and removing all non-letters
target = re.sub('[\W_]+', '', target.lower())
# Modifiers that can be added to regex letters
OPTIONAL_WITH_NOISE='?[^a-z\n]*'
WITH_NOISE='[^a-z\n]*'
def regex_modify(s, option):
return re.sub('([a-z])', r'\1' + option, s)
# ANSI Output modifiers
BOLD = '\033[1m'
PURPLE = '\033[95m'
UNDERLINE = '\033[4m'
END = '\033[0m'
offset = 0
def longest_match_regex(s):
"""Return a regex that matches any number of letters from s"""
result = ""
for c in s:
result += f"({c}[^a-z\n]*"
for _ in s:
result += ")?"
return result
while reference and target:
searchstr = regex_modify(target[:MIN_MATCH], WITH_NOISE)
searchstr += longest_match_regex(target[MIN_MATCH:MAX_MATCH])
line = re.search(f"\n.*({searchstr}).*\n", reference, re.IGNORECASE)
if line is None:
break
bolded_line = re.sub(f'({searchstr})', BOLD + PURPLE + UNDERLINE + r'\1' + END, line.group(0), re.IGNORECASE).strip()
# Find boundaries of word match within the line
mstart, mend = line.span(1)
matched_chars = len(re.sub('[^a-zA-Z]', '', line.group(1)))
print(f"Omnibus spending bill, characters {offset + mstart} to {offset + mend}")
print("\t" + bolded_line)
target = target[matched_chars:]
reference = reference[mend:]
offset += mstart
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment