Last active
January 6, 2023 10:29
-
-
Save lawrenceadams/df379f4a590a30b16e1a988cd1afba0e to your computer and use it in GitHub Desktop.
Python Notebook Markdown Word Counter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import argparse | |
""" | |
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
""" | |
# Inspired by https://stackoverflow.com/questions/71194571/word-count-of-markdown-cells-in-jupyter-notebook | |
__author__ = "Lawrence Adams" | |
parser = argparse.ArgumentParser( | |
prog="ipynb_word_count.py", | |
description="Counts the number of words in Markdown blocks within a iPy Notebook. Ignores titles and words in the ignore_words list.", # noqa | |
) | |
parser.add_argument("filename") | |
args = parser.parse_args() | |
with open(args.filename, "r", encoding="utf-8") as json_file: | |
data = json.load(json_file) | |
ignore_words = ["|"] | |
ignore_linestart = ["#", "<", "|"] | |
block_count = 0 | |
word_count = 0 | |
all_lines = list() | |
for each in data["cells"]: | |
cellType = each["cell_type"] | |
if cellType == "markdown": | |
content = each["source"] | |
block_count += 1 | |
for line in content: | |
if line[0] not in ignore_linestart: | |
temp = [word for word in line.split()] | |
temp = list(filter(lambda word: word not in ignore_words, temp)) | |
word_count = word_count + len(temp) | |
print( | |
f"Word Count → \033[01m\033[94m{word_count}\033[0m, in {block_count} blocks." # noqa | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example usage: