Created
July 17, 2025 15:19
-
-
Save SharedAcademia/5e73a7a375c0409f47c5cb3d380d4910 to your computer and use it in GitHub Desktop.
text to speech (tts) .docx script for python using kokoro
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # tts_from_docx.py | |
| # make sure in terminal this command has run and kokoro is installed: pip install kokoro>=0.9.2 soundfile python-docx | |
| from kokoro import KPipeline | |
| import soundfile as sf | |
| from docx import Document | |
| import numpy as np | |
| import os | |
| # CONFIGURATION | |
| ########################################## | |
| DOCX_FILE = "abc.docx" # Path to your input file | |
| OUTPUT_DIR = "audio_parts" # Folder for chunks | |
| FINAL_AUDIO = "audio.wav" # Final combined file | |
| LANG_CODE = 'a' # 'a' = auto | |
| VOICE = 'en_heart' # Kokoro voice en_heart = standard | |
| CHUNK_SIZE = 500 # Characters per chunk | |
| SAMPLE_RATE = 24000 # Audio sample rate | |
| # 1. Load DOCX | |
| ########################################## | |
| def extract_text_from_docx(docx_path): | |
| doc = Document(docx_path) | |
| text = "\n".join(p.text for p in doc.paragraphs if p.text.strip()) | |
| return text | |
| print(f"Reading DOCX: {DOCX_FILE}") | |
| full_text = extract_text_from_docx(DOCX_FILE) | |
| print(f"Total text length: {len(full_text)} characters") | |
| # 2. Split text into chunks | |
| ########################################## | |
| def split_text(text, max_chars=CHUNK_SIZE): | |
| paragraphs = text.split("\n") | |
| chunks = [] | |
| chunk = "" | |
| for p in paragraphs: | |
| if len(chunk) + len(p) < max_chars: | |
| chunk += p + "\n" | |
| else: | |
| chunks.append(chunk.strip()) | |
| chunk = p + "\n" | |
| if chunk.strip(): | |
| chunks.append(chunk.strip()) | |
| return chunks | |
| chunks = split_text(full_text) | |
| print(f"Split into {len(chunks)} chunks of ~{CHUNK_SIZE} characters") | |
| # 3. Create output folder | |
| ########################################## | |
| if not os.path.exists(OUTPUT_DIR): | |
| os.makedirs(OUTPUT_DIR) | |
| # 4. Initialize Kokoro TTS | |
| ########################################## | |
| print("Loading Kokoro pipeline...") | |
| pipeline = KPipeline(lang_code=LANG_CODE) | |
| # 5. Generate audio for each chunk | |
| ########################################## | |
| all_audio_files = [] | |
| for i, chunk in enumerate(chunks): | |
| print(f"Generating audio for chunk {i+1}/{len(chunks)}") | |
| generator = pipeline(chunk, voice=VOICE) | |
| for j, (gs, ps, audio) in enumerate(generator): | |
| filename = os.path.join(OUTPUT_DIR, f"part_{i}_{j}.wav") | |
| sf.write(filename, audio, SAMPLE_RATE) | |
| all_audio_files.append(filename) | |
| print(f"Saved {filename}") | |
| # 6. Combine all chunks into single WAV | |
| ########################################## | |
| print("Combining audio files into one audiobook...") | |
| combined_audio = [] | |
| for filename in all_audio_files: | |
| audio_data, sr = sf.read(filename) | |
| if sr != SAMPLE_RATE: | |
| raise ValueError(f"Sample rate mismatch in {filename}") | |
| combined_audio.append(audio_data) | |
| final_audio_data = np.concatenate(combined_audio) | |
| sf.write(FINAL_AUDIO, final_audio_data, SAMPLE_RATE) | |
| print(f"🎧 Audiobook created: {FINAL_AUDIO}") | |
| # 7. Delete individual chunk files | |
| ########################################## | |
| print("🧹 Cleaning up individual audio chunks...") | |
| for filename in all_audio_files: | |
| try: | |
| os.remove(filename) | |
| print(f"🗑️ Deleted {filename}") | |
| except Exception as e: | |
| print(f"⚠️ Could not delete {filename}: {e}") | |
| print("✅ All done! Created by SharedAcademia.org") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment