geowa4/input

## input
O Romeo, Romeo, wherefore art thou Romeo?

Some people feel the rain, while others just get wet.


ffff ----- '''''' D'd-d'-dd'-dd-' d'd-d'-dd'-dd-'

## puzzle.py
#!/usr/bin/env python

from typing import IO, Any, List, Text
import argparse
import sys
from collections import Counter


def main() -> None:
    haystack = get_file_from_args()
    needle = find_needle_in_haystack(haystack)
    print(needle)


def get_file_from_args() -> IO[Any]:
    parser = argparse.ArgumentParser(
        description='Find the word with the most repeated letters.'
    )
    parser.add_argument(
        'filename', type=str,
        help='path to file containing input text.'
    )
    args = parser.parse_args()
    try:
        return open(args.filename, 'r')
    except:
        print(
            'There was an error opening {}.'.format(args.filename),
            file=sys.stderr
        )
        sys.exit(1)


def find_needle_in_haystack(haystack: IO[Any]) -> Text:
    needle = ''
    max_count = 0

    # read file line-by-line
    for line in haystack:
        # split line into words, ignoring punctuation
        words = split_and_scrub_punctuation(line)

        # count most frequent letter
        for word in words:
            count = get_max_letter_repitition(word)

            # track word with most repeated letters
            if count > max_count:
                needle = word
                max_count = count

    return needle


def split_and_scrub_punctuation(line: str) -> List[Text]:
    scrubbed_split = []  # type: List[Text]
    naive_split = line.split()
    for naive_word in naive_split:
        scrubbed_word = ''
        for character in naive_word:
            if character not in ',.?!"“”:;':
                scrubbed_word += character
        scrubbed_split.append(scrubbed_word)
    return scrubbed_split


def get_max_letter_repitition(word: str) -> int:
    letter_counter = Counter(word.lower())
    # filter out cases where the most common character is ' or -
    most_common = filter(
        lambda letter_with_count: letter_with_count[0] not in "'-",
        letter_counter.most_common(1)
    )  # type: Any
    count = next(most_common, ('', 0))[1]
    return count


if __name__ == '__main__':
    main()

# This code was checked for errors with flake8 and mypy.
#
# $ ls -lA input
# -rw-r--r--  1 geowa4  staff   2.0M Sep 15 09:03 combined
#
# $ time ./puzzle.py input
# D'd-d'-dd'-dd-'
# ./datto.py combined  2.61s user 0.01s system 99% cpu 2.631 total
	O Romeo, Romeo, wherefore art thou Romeo?

	Some people feel the rain, while others just get wet.



	ffff ----- '''''' D'd-d'-dd'-dd-' d'd-d'-dd'-dd-'
	#!/usr/bin/env python

	from typing import IO, Any, List, Text
	import argparse
	import sys
	from collections import Counter


	def main() -> None:
	haystack = get_file_from_args()
	needle = find_needle_in_haystack(haystack)
	print(needle)


	def get_file_from_args() -> IO[Any]:
	parser = argparse.ArgumentParser(
	description='Find the word with the most repeated letters.'
	)
	parser.add_argument(
	'filename', type=str,
	help='path to file containing input text.'
	)
	args = parser.parse_args()
	try:
	return open(args.filename, 'r')
	except:
	print(
	'There was an error opening {}.'.format(args.filename),
	file=sys.stderr
	)
	sys.exit(1)


	def find_needle_in_haystack(haystack: IO[Any]) -> Text:
	needle = ''
	max_count = 0

	# read file line-by-line
	for line in haystack:
	# split line into words, ignoring punctuation
	words = split_and_scrub_punctuation(line)

	# count most frequent letter
	for word in words:
	count = get_max_letter_repitition(word)

	# track word with most repeated letters
	if count > max_count:
	needle = word
	max_count = count

	return needle


	def split_and_scrub_punctuation(line: str) -> List[Text]:
	scrubbed_split = [] # type: List[Text]
	naive_split = line.split()
	for naive_word in naive_split:
	scrubbed_word = ''
	for character in naive_word:
	if character not in ',.?!"“”:;':
	scrubbed_word += character
	scrubbed_split.append(scrubbed_word)
	return scrubbed_split


	def get_max_letter_repitition(word: str) -> int:
	letter_counter = Counter(word.lower())
	# filter out cases where the most common character is ' or -
	most_common = filter(
	lambda letter_with_count: letter_with_count[0] not in "'-",
	letter_counter.most_common(1)
	) # type: Any
	count = next(most_common, ('', 0))[1]
	return count


	if __name__ == '__main__':
	main()

	# This code was checked for errors with flake8 and mypy.
	#
	# $ ls -lA input
	# -rw-r--r-- 1 geowa4 staff 2.0M Sep 15 09:03 combined
	#
	# $ time ./puzzle.py input
	# D'd-d'-dd'-dd-'
	# ./datto.py combined 2.61s user 0.01s system 99% cpu 2.631 total