Skip to content

Instantly share code, notes, and snippets.

@geowa4
Created September 16, 2016 17:36
Show Gist options
  • Save geowa4/8146643d95b4f86d08c824cad8fc37eb to your computer and use it in GitHub Desktop.
Save geowa4/8146643d95b4f86d08c824cad8fc37eb to your computer and use it in GitHub Desktop.
Find the word with the most repeated characters.
O Romeo, Romeo, wherefore art thou Romeo?
Some people feel the rain, while others just get wet.
ffff ----- '''''' D'd-d'-dd'-dd-' d'd-d'-dd'-dd-'
#!/usr/bin/env python
from typing import IO, Any, List, Text
import argparse
import sys
from collections import Counter
def main() -> None:
haystack = get_file_from_args()
needle = find_needle_in_haystack(haystack)
print(needle)
def get_file_from_args() -> IO[Any]:
parser = argparse.ArgumentParser(
description='Find the word with the most repeated letters.'
)
parser.add_argument(
'filename', type=str,
help='path to file containing input text.'
)
args = parser.parse_args()
try:
return open(args.filename, 'r')
except:
print(
'There was an error opening {}.'.format(args.filename),
file=sys.stderr
)
sys.exit(1)
def find_needle_in_haystack(haystack: IO[Any]) -> Text:
needle = ''
max_count = 0
# read file line-by-line
for line in haystack:
# split line into words, ignoring punctuation
words = split_and_scrub_punctuation(line)
# count most frequent letter
for word in words:
count = get_max_letter_repitition(word)
# track word with most repeated letters
if count > max_count:
needle = word
max_count = count
return needle
def split_and_scrub_punctuation(line: str) -> List[Text]:
scrubbed_split = [] # type: List[Text]
naive_split = line.split()
for naive_word in naive_split:
scrubbed_word = ''
for character in naive_word:
if character not in ',.?!"“”:;':
scrubbed_word += character
scrubbed_split.append(scrubbed_word)
return scrubbed_split
def get_max_letter_repitition(word: str) -> int:
letter_counter = Counter(word.lower())
# filter out cases where the most common character is ' or -
most_common = filter(
lambda letter_with_count: letter_with_count[0] not in "'-",
letter_counter.most_common(1)
) # type: Any
count = next(most_common, ('', 0))[1]
return count
if __name__ == '__main__':
main()
# This code was checked for errors with flake8 and mypy.
#
# $ ls -lA input
# -rw-r--r-- 1 geowa4 staff 2.0M Sep 15 09:03 combined
#
# $ time ./puzzle.py input
# D'd-d'-dd'-dd-'
# ./datto.py combined 2.61s user 0.01s system 99% cpu 2.631 total
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment