Skip to content

Instantly share code, notes, and snippets.

@RisingInIris2017
Created November 28, 2023 03:03
Show Gist options
  • Save RisingInIris2017/9ee52508fed267ee33febe435257445d to your computer and use it in GitHub Desktop.
Save RisingInIris2017/9ee52508fed267ee33febe435257445d to your computer and use it in GitHub Desktop.
# Generated by GPT 4
# Modified by RisingInIris2017
# Licensed under Public Domain
import os
import re
# 是否清除空行。如果设为 True,需先备份文件
REMOVE_BLANK_LINES = True
def remove_blank_lines(root_folder):
for root, dirs, files in os.walk(root_folder):
for file in files:
if file.endswith('.txt'):
file_path = os.path.join(root, file)
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
with open(file_path, 'w', encoding='utf-8') as f:
for line in lines:
if not line.strip():
continue
f.write(line)
def count_chinese_characters_and_lines(root_folder):
total_lines = 0
total_chinese_characters = 0
# 假定文件以 UTF-8 编码,Minecraft 语言文件基本满足这个假设
chinese_regex = re.compile(r'[\u4e00-\u9fff]')
for root, dirs, files in os.walk(root_folder):
for file in files:
if file.endswith('.txt'):
file_path = os.path.join(root, file)
with open(file_path, 'r', encoding='utf-8') as f:
for line in f:
total_lines += 1
total_chinese_characters += len(chinese_regex.findall(line))
return total_lines, total_chinese_characters
# 将待计数的文件所在的顶层目录填写在此处
folder_path = r''
if REMOVE_BLANK_LINES:
remove_blank_lines(folder_path)
lines, characters = count_chinese_characters_and_lines(folder_path)
print(f'总行数: {lines}')
print(f'汉字总数: {characters}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment