Created
September 16, 2016 17:36
-
-
Save geowa4/8146643d95b4f86d08c824cad8fc37eb to your computer and use it in GitHub Desktop.
Find the word with the most repeated characters.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
O Romeo, Romeo, wherefore art thou Romeo? | |
Some people feel the rain, while others just get wet. | |
ffff ----- '''''' D'd-d'-dd'-dd-' d'd-d'-dd'-dd-' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from typing import IO, Any, List, Text | |
import argparse | |
import sys | |
from collections import Counter | |
def main() -> None: | |
haystack = get_file_from_args() | |
needle = find_needle_in_haystack(haystack) | |
print(needle) | |
def get_file_from_args() -> IO[Any]: | |
parser = argparse.ArgumentParser( | |
description='Find the word with the most repeated letters.' | |
) | |
parser.add_argument( | |
'filename', type=str, | |
help='path to file containing input text.' | |
) | |
args = parser.parse_args() | |
try: | |
return open(args.filename, 'r') | |
except: | |
print( | |
'There was an error opening {}.'.format(args.filename), | |
file=sys.stderr | |
) | |
sys.exit(1) | |
def find_needle_in_haystack(haystack: IO[Any]) -> Text: | |
needle = '' | |
max_count = 0 | |
# read file line-by-line | |
for line in haystack: | |
# split line into words, ignoring punctuation | |
words = split_and_scrub_punctuation(line) | |
# count most frequent letter | |
for word in words: | |
count = get_max_letter_repitition(word) | |
# track word with most repeated letters | |
if count > max_count: | |
needle = word | |
max_count = count | |
return needle | |
def split_and_scrub_punctuation(line: str) -> List[Text]: | |
scrubbed_split = [] # type: List[Text] | |
naive_split = line.split() | |
for naive_word in naive_split: | |
scrubbed_word = '' | |
for character in naive_word: | |
if character not in ',.?!"“”:;': | |
scrubbed_word += character | |
scrubbed_split.append(scrubbed_word) | |
return scrubbed_split | |
def get_max_letter_repitition(word: str) -> int: | |
letter_counter = Counter(word.lower()) | |
# filter out cases where the most common character is ' or - | |
most_common = filter( | |
lambda letter_with_count: letter_with_count[0] not in "'-", | |
letter_counter.most_common(1) | |
) # type: Any | |
count = next(most_common, ('', 0))[1] | |
return count | |
if __name__ == '__main__': | |
main() | |
# This code was checked for errors with flake8 and mypy. | |
# | |
# $ ls -lA input | |
# -rw-r--r-- 1 geowa4 staff 2.0M Sep 15 09:03 combined | |
# | |
# $ time ./puzzle.py input | |
# D'd-d'-dd'-dd-' | |
# ./datto.py combined 2.61s user 0.01s system 99% cpu 2.631 total |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment