Last active
December 1, 2021 18:01
-
-
Save niftycode/2f6e909614623a11ced0d1e5989806e4 to your computer and use it in GitHub Desktop.
This is an example how to count words in a text file using Python 3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
''' | |
Author: @niftycode | |
Version: 1.3 | |
Python 3.10 | |
Date created: February 14th, 2018 | |
Date modified: December 1st, 2021 | |
''' | |
import re | |
import argparse | |
import logging | |
logging.basicConfig(level=logging.disable()) | |
# logging.basicConfig(level=logging.DEBUG) | |
logger = logging.getLogger() | |
VERSION = "1.0" | |
FILE_NAME = "koalitionsvertrag_2021.txt" # <- Enter your file here. | |
def get_parser() -> argparse.ArgumentParser: | |
""" | |
Create a command line parser. | |
Returns: | |
argparse.ArgumentParser: Created parser | |
""" | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"-w", "--word", type=str, required=False, help="the word to search for" | |
) | |
parser.add_argument( | |
"-v", "--version", required=False, action="store_true", help="show the version" | |
) | |
return parser | |
def find_word(search_tearm: str): | |
""" | |
Open a textfile and search for the given word. | |
Show the number of words found. | |
Show the total. | |
Args: | |
search_tearm: | |
The word to search for | |
""" | |
# It is good practice to use the with keyword when dealing with file objects. | |
with open(FILE_NAME) as file_handle: | |
for line in file_handle: | |
line = line.rstrip() | |
text_line = re.findall( | |
search_tearm + "[^ ]*", line # <- define your regex here | |
) | |
logger.debug(text_line) | |
for w in text_line: | |
found_words[w] = found_words.get(w, 0) + 1 | |
logger.debug(found_words) | |
print("") | |
word_count = 0 | |
total = 0 | |
for word, count in found_words.items(): | |
print(f"Searched word: {word}\n Count: {count}") | |
word_count += 1 | |
total += count | |
print("") | |
print("Total number: {0}".format(total)) | |
def main(): | |
""" | |
Invoke the parser and evaluate the result. | |
""" | |
parser = get_parser() | |
args = parser.parse_args() | |
if args.word: | |
find_word(args.word) | |
elif args.version: | |
print(f"Koalitionsvertrag - Version: {VERSION}") | |
else: | |
parser.print_help() | |
if __name__ == "__main__": | |
# Create an empty dict for the words found | |
found_words = dict() | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment