Skip to content

Instantly share code, notes, and snippets.

@aont
Created April 6, 2024 07:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aont/a6a9559334980a87376957cae2748c93 to your computer and use it in GitHub Desktop.
Save aont/a6a9559334980a87376957cae2748c93 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import os
import sys
input_fn = sys.argv[1]
with open(input_fn, "rt", encoding="UTF-8") as fp:
input_text = fp.read()
input_text = input_text.replace(" ", "").replace("\n", "")
output_fn_base = os.path.splitext(os.path.basename(input_fn))[0]
output_idx = 1
pos_begin = 0
chunk_len = 4000
margin_len = 200
while True:
output_fn = f"{output_fn_base} {output_idx:03d}.txt"
chunk_text = input_text[pos_begin:pos_begin+chunk_len]
with open(output_fn, "wt", encoding="UTF-8") as fp:
fp.write(chunk_text)
output_idx += 1
pos_begin += chunk_len - margin_len
if pos_begin > len(input_text):
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment