Skip to content

Instantly share code, notes, and snippets.

@sawin0
Last active March 19, 2026 00:37
Show Gist options
  • Select an option

  • Save sawin0/34a69847983058e7790fc40a318ee9db to your computer and use it in GitHub Desktop.

Select an option

Save sawin0/34a69847983058e7790fc40a318ee9db to your computer and use it in GitHub Desktop.
from sarvamai import SarvamAI
from PyPDF2 import PdfReader, PdfWriter
import os
def split_pdf(input_path, output_dir, chunk_size=10):
reader = PdfReader(input_path)
total_pages = len(reader.pages)
os.makedirs(output_dir, exist_ok=True)
file_paths = []
for start in range(0, total_pages, chunk_size):
writer = PdfWriter()
end = min(start + chunk_size, total_pages)
for i in range(start, end):
writer.add_page(reader.pages[i])
output_path = os.path.join(output_dir, f"chunk_{start+1}_to_{end}.pdf")
with open(output_path, "wb") as f:
writer.write(f)
file_paths.append(output_path)
return file_paths
client = SarvamAI(api_subscription_key="your-api-token")
chunks = split_pdf("/path/to/document.pdf", "chunks")
for i, chunk in enumerate(chunks):
job = client.document_intelligence.create_job(
language="ne-IN",
output_format="md"
)
print(f"Job created: {job.job_id}")
job.upload_file(chunk)
print(f"File uploaded: {chunk}")
job.start()
print("Job started")
status = job.wait_until_complete()
print(f"Job completed with state: {status.job_state}")
metrics = job.get_page_metrics()
print(f"Page metrics: {metrics}")
job.download_output("./output.zip")
os.rename("./output.zip", f"./output_{i}.zip")
print(f"Output saved to ./output_{i}.zip")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment