Skip to content

Instantly share code, notes, and snippets.

@olostep
Last active March 30, 2025 21:38
Show Gist options
  • Save olostep/e903f2e4fc28f8093b834b4df68b8031 to your computer and use it in GitHub Desktop.
Save olostep/e903f2e4fc28f8093b834b4df68b8031 to your computer and use it in GitHub Desktop.
import requests
import hashlib
import time
API_KEY = "API_KEY"
# Step 1: Utilities
def create_hash_id(url):
return hashlib.sha256(url.encode()).hexdigest()[:16]
def compose_items_array():
urls = [
"https://www.google.com/search?q=nikola+tesla&gl=us&hl=en",
"https://www.google.com/search?q=alexander+the+great&gl=us&hl=en",
"https://www.google.com/search?q=google+solar+eclipse&gl=us&hl=en",
"https://www.google.com/search?q=crispr&gl=us&hl=en",
"https://www.google.com/search?q=genghis%20khan&gl=us&hl=en"
]
return [{"custom_id": create_hash_id(url), "url": url} for url in urls]
# Step 2: Start batch
def start_batch(items):
payload = {
"items": items,
}
headers = {"Authorization": f"Bearer {API_KEY}"}
response = requests.post("https://api.olostep.com/v1/batches", headers=headers, json=payload)
response.raise_for_status()
return response.json()["id"]
# Step 3: Wait for completion
def check_batch_status(batch_id):
headers = {"Authorization": f"Bearer {API_KEY}"}
response = requests.get(f"https://api.olostep.com/v1/batches/{batch_id}", headers=headers)
response.raise_for_status()
return response.json()["status"]
def wait_until_complete(batch_id):
print("Waiting for batch to complete...")
while True:
status = check_batch_status(batch_id)
print("Status:", status)
if status == "completed":
print("Batch completed!")
return
time.sleep(10)
# Step 4: Get items
def get_completed_items(batch_id):
headers = {"Authorization": f"Bearer {API_KEY}"}
response = requests.get(f"https://api.olostep.com/v1/batches/{batch_id}/items", headers=headers)
response.raise_for_status()
return response.json()["items"]
# Step 5: Retrieve content with format specified
def retrieve_content(retrieve_id):
url = "https://api.olostep.com/v1/retrieve"
headers = {"Authorization": f"Bearer {API_KEY}"}
params = {
"retrieve_id": retrieve_id,
"formats": ["markdown"]
}
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
return response.json()
# Step 6: Run end-to-end flow
if __name__ == "__main__":
print("Composing batch...")
items = compose_items_array()
print("Starting batch...")
batch_id = start_batch(items)
print("Batch ID:", batch_id)
print(f"You can check the status of the batch at: https://api.olostep.com/v1/batches/{batch_id}?token={API_KEY}")
wait_until_complete(batch_id)
print("Fetching completed items...")
completed_items = get_completed_items(batch_id)
print("Retrieving content...")
for item in completed_items:
retrieve_id = item["retrieve_id"]
print(f"\nRetrieving content for item {item['retrieve_id']}...")
content = retrieve_content(retrieve_id)
print(f"\n---\nURL: {item['url']}\nCustom ID: {item['custom_id']}\n")
print("Markdown:\n", content.get("markdown_content", "[No markdown found]"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment