Last active
March 19, 2026 00:37
-
-
Save sawin0/34a69847983058e7790fc40a318ee9db to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sarvamai import SarvamAI | |
| from PyPDF2 import PdfReader, PdfWriter | |
| import os | |
| def split_pdf(input_path, output_dir, chunk_size=10): | |
| reader = PdfReader(input_path) | |
| total_pages = len(reader.pages) | |
| os.makedirs(output_dir, exist_ok=True) | |
| file_paths = [] | |
| for start in range(0, total_pages, chunk_size): | |
| writer = PdfWriter() | |
| end = min(start + chunk_size, total_pages) | |
| for i in range(start, end): | |
| writer.add_page(reader.pages[i]) | |
| output_path = os.path.join(output_dir, f"chunk_{start+1}_to_{end}.pdf") | |
| with open(output_path, "wb") as f: | |
| writer.write(f) | |
| file_paths.append(output_path) | |
| return file_paths | |
| client = SarvamAI(api_subscription_key="your-api-token") | |
| chunks = split_pdf("/path/to/document.pdf", "chunks") | |
| for i, chunk in enumerate(chunks): | |
| job = client.document_intelligence.create_job( | |
| language="ne-IN", | |
| output_format="md" | |
| ) | |
| print(f"Job created: {job.job_id}") | |
| job.upload_file(chunk) | |
| print(f"File uploaded: {chunk}") | |
| job.start() | |
| print("Job started") | |
| status = job.wait_until_complete() | |
| print(f"Job completed with state: {status.job_state}") | |
| metrics = job.get_page_metrics() | |
| print(f"Page metrics: {metrics}") | |
| job.download_output("./output.zip") | |
| os.rename("./output.zip", f"./output_{i}.zip") | |
| print(f"Output saved to ./output_{i}.zip") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment