adem/word_frequency.py

## word_frequency.py
import sys
import re

def get_word_frequency(words: list[str]) -> dict[str, int]:
    """
    Given a list of words, returns a Dictionary containing the words as keys
    and their frequency as values.

    >>> words = ["foo", "bar", "foo", "baz", "baz", "baz", "bar"]
    >>> result = get_word_frequency(words)
    >>> result["foo"]
    2
    >>> result["bar"]
    2
    >>> result["baz"]
    3
    """
    result = {}
    for word in words:
        if word in result:
            result[word] += 1
        else:
            result[word] = 1
    return result

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python main.py <file>")
        exit(1)
    words = []
    with open(sys.argv[1], encoding="utf8") as file:
        for line in file:
            # Split by various characters or strings, such as:
            # 1. whitespace characters
            # 2. punctuation marks
            # 3. blocks of strings surrounded in square brackets, denoting the
            #    speaker
            words_in_line = re.split("[\s.,?()\u200e]|\[.*?\]", line)
            for word in words_in_line:
                words += [word.lower()]
    result = get_word_frequency(words)
    with open("word_frequency.csv", "w", encoding="utf8") as file:
        for k, v in sorted(result.items(), key=lambda item: item[1],
                reverse=True):
            # Remove empty string resulting from removal of consecutive separators.
            if k == "":
                continue
            file.write("%s,%d\n" % (k, v))
	import sys
	import re

	def get_word_frequency(words: list[str]) -> dict[str, int]:
	"""
	Given a list of words, returns a Dictionary containing the words as keys
	and their frequency as values.

	>>> words = ["foo", "bar", "foo", "baz", "baz", "baz", "bar"]
	>>> result = get_word_frequency(words)
	>>> result["foo"]
	2
	>>> result["bar"]
	2
	>>> result["baz"]
	3
	"""
	result = {}
	for word in words:
	if word in result:
	result[word] += 1
	else:
	result[word] = 1
	return result

	if __name__ == "__main__":
	if len(sys.argv) != 2:
	print("Usage: python main.py <file>")
	exit(1)
	words = []
	with open(sys.argv[1], encoding="utf8") as file:
	for line in file:
	# Split by various characters or strings, such as:
	# 1. whitespace characters
	# 2. punctuation marks
	# 3. blocks of strings surrounded in square brackets, denoting the
	# speaker
	words_in_line = re.split("[\s.,?()\u200e]\|\[.*?\]", line)
	for word in words_in_line:
	words += [word.lower()]
	result = get_word_frequency(words)
	with open("word_frequency.csv", "w", encoding="utf8") as file:
	for k, v in sorted(result.items(), key=lambda item: item[1],
	reverse=True):
	# Remove empty string resulting from removal of consecutive separators.
	if k == "":
	continue
	file.write("%s,%d\n" % (k, v))