Skip to content

Instantly share code, notes, and snippets.

@kemsakurai
Last active September 2, 2018 14:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kemsakurai/1c36b2f5caa4959d5d6260707518b2db to your computer and use it in GitHub Desktop.
Save kemsakurai/1c36b2f5caa4959d5d6260707518b2db to your computer and use it in GitHub Desktop.
Markdown 文書の統計量を計算するスクリプト
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import markdown
from bs4 import BeautifulSoup
from markdown import markdown
CHARSET = 'utf-8'
MARKDOWN_STATS_FILE = "./check_results/markdown_stats.csv"
MARKDOWN_FILE_LIST = "./markdown_list.txt"
def main():
results = [
["ファイル名", "文字数", "句読点数", "h1の数", "h2の数", "h3の数", "h4の数", "h5の数", "h6の数", "tableの数", "liの数", "dtの数", "imgの数",
"aの数"]]
# ヘッダの設定
with open(MARKDOWN_FILE_LIST, encoding=CHARSET) as f:
for line in f:
line = line.rstrip('\n')
with open(line, encoding=CHARSET) as markdown_file:
elems = []
source = markdown_file.read()
html = markdown(source)
soup = BeautifulSoup(html, 'html.parser')
# 文章統計情報を取得
# ファイル名
elems.append(os.path.basename(line))
# 文字数
text = ''.join(soup.findAll(text=True))
elems.append(len(text))
elems.append(str(text.count("、") + text.count("。")))
elems.append(len(soup.find_all('h1')))
elems.append(len(soup.find_all('h2')))
elems.append(len(soup.find_all('h3')))
elems.append(len(soup.find_all('h4')))
elems.append(len(soup.find_all('h5')))
elems.append(len(soup.find_all('h6')))
elems.append(len(soup.find_all('table')))
elems.append(len(soup.find_all('li')))
elems.append(len(soup.find_all('dt')))
elems.append(len(soup.find_all('img')))
elems.append(len(soup.find_all('a')))
results.append(elems)
import csv
with open(MARKDOWN_STATS_FILE, "w+") as results_csv:
csv_writer = csv.writer(results_csv, delimiter=',')
csv_writer.writerows(results)
if __name__ == '__main__':
main()
ファイル名 文字数 句読点数 h1の数 h2の数 h3の数 h4の数 h5の数 h6の数 tableの数 liの数 dtの数 imgの数 aの数
wicketとdropwizardを連携する.md 2339 37 1 1 0 7 5 0 0 8 0 0 8
EclipseLinkのDDLにsemicolonを付与する.md 1895 19 2 0 3 0 0 0 0 1 0 0 2
Apache Wicket 6でRestAPIを使う.md 6944 57 0 8 0 0 0 0 0 14 0 0 20
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment