Last active
September 9, 2019 03:06
-
-
Save nithinivi/f34008c67f5c0f083f1051f95c0cf849 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
#Usage Example | |
# python Word_counter.py --folder ./fileFolder --words nithin neetha --out d.csv | |
import argparse | |
import csv | |
import pathlib | |
def count_dict(file: pathlib.PosixPath, match_words: list) -> dict: | |
""" | |
creates dict of counts of match_words in a file | |
""" | |
count_dict = {} | |
with open(file) as f: | |
contents = f.read() | |
for match_word in match_words: | |
count_dict[match_word] = contents.count(match_word) | |
return count_dict | |
def folder_name_dict(files_l, match_words: list) -> dict: | |
""" | |
create dict of count match_words in folder | |
""" | |
dict_list = [] | |
for file in files_l: | |
folder_dict = count_dict(file, match_words) | |
folder_dict["file"] = file.name | |
dict_list.append(folder_dict) | |
return dict_list | |
def create_csv(dict_list: list, csv_file: pathlib.PosixPath) -> None: | |
with open(csv_file, 'w') as f: # Just use 'w' mode in 3.x | |
w = csv.DictWriter(f, dict_list.keys()) | |
w.writeheader() | |
w.writerow(dict_list) | |
def main(folder_path, csv_file, matching_words): | |
files_l = (file for file in folder_path.iterdir() if file.is_file()) | |
list_dict = folder_name_dict(files_l, matching_words) | |
create_csv(list_dict, csv_file) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser( | |
description='Count some words in files in folder.') | |
parser.add_argument('--folder', | |
dest='folderPath', | |
help='folder path to the txt files') | |
parser.add_argument('--out', | |
dest='csvFile', | |
type=str, | |
help='name of output csv') | |
parser.add_argument('--words', | |
dest='matchWords', | |
type=str, | |
nargs='+', | |
help='words which are to be counted') | |
args = parser.parse_args() | |
folder_path = pathlib.Path(args.folderPath) | |
csv_file = pathlib.Path(args.csvFile) | |
matching_words = args.matchWords | |
if not folder_path.is_dir(): | |
print("invalid directory") | |
if csv_file.suffix != ".csv": | |
print("file format cannot be accpeted") | |
main(folder_path, csv_file, matching_words) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment