Skip to content

Instantly share code, notes, and snippets.

@nithinivi
Last active September 9, 2019 03:06
Show Gist options
  • Save nithinivi/f34008c67f5c0f083f1051f95c0cf849 to your computer and use it in GitHub Desktop.
Save nithinivi/f34008c67f5c0f083f1051f95c0cf849 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# coding: utf-8
#Usage Example
# python Word_counter.py --folder ./fileFolder --words nithin neetha --out d.csv
import argparse
import csv
import pathlib
def count_dict(file: pathlib.PosixPath, match_words: list) -> dict:
"""
creates dict of counts of match_words in a file
"""
count_dict = {}
with open(file) as f:
contents = f.read()
for match_word in match_words:
count_dict[match_word] = contents.count(match_word)
return count_dict
def folder_name_dict(files_l, match_words: list) -> dict:
"""
create dict of count match_words in folder
"""
dict_list = []
for file in files_l:
folder_dict = count_dict(file, match_words)
folder_dict["file"] = file.name
dict_list.append(folder_dict)
return dict_list
def create_csv(dict_list: list, csv_file: pathlib.PosixPath) -> None:
with open(csv_file, 'w') as f: # Just use 'w' mode in 3.x
w = csv.DictWriter(f, dict_list.keys())
w.writeheader()
w.writerow(dict_list)
def main(folder_path, csv_file, matching_words):
files_l = (file for file in folder_path.iterdir() if file.is_file())
list_dict = folder_name_dict(files_l, matching_words)
create_csv(list_dict, csv_file)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='Count some words in files in folder.')
parser.add_argument('--folder',
dest='folderPath',
help='folder path to the txt files')
parser.add_argument('--out',
dest='csvFile',
type=str,
help='name of output csv')
parser.add_argument('--words',
dest='matchWords',
type=str,
nargs='+',
help='words which are to be counted')
args = parser.parse_args()
folder_path = pathlib.Path(args.folderPath)
csv_file = pathlib.Path(args.csvFile)
matching_words = args.matchWords
if not folder_path.is_dir():
print("invalid directory")
if csv_file.suffix != ".csv":
print("file format cannot be accpeted")
main(folder_path, csv_file, matching_words)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment