-
-
Save olostep/e903f2e4fc28f8093b834b4df68b8031 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import hashlib | |
import time | |
API_KEY = "API_KEY" | |
# Step 1: Utilities | |
def create_hash_id(url): | |
return hashlib.sha256(url.encode()).hexdigest()[:16] | |
def compose_items_array(): | |
urls = [ | |
"https://www.google.com/search?q=nikola+tesla&gl=us&hl=en", | |
"https://www.google.com/search?q=alexander+the+great&gl=us&hl=en", | |
"https://www.google.com/search?q=google+solar+eclipse&gl=us&hl=en", | |
"https://www.google.com/search?q=crispr&gl=us&hl=en", | |
"https://www.google.com/search?q=genghis%20khan&gl=us&hl=en" | |
] | |
return [{"custom_id": create_hash_id(url), "url": url} for url in urls] | |
# Step 2: Start batch | |
def start_batch(items): | |
payload = { | |
"items": items, | |
} | |
headers = {"Authorization": f"Bearer {API_KEY}"} | |
response = requests.post("https://api.olostep.com/v1/batches", headers=headers, json=payload) | |
response.raise_for_status() | |
return response.json()["id"] | |
# Step 3: Wait for completion | |
def check_batch_status(batch_id): | |
headers = {"Authorization": f"Bearer {API_KEY}"} | |
response = requests.get(f"https://api.olostep.com/v1/batches/{batch_id}", headers=headers) | |
response.raise_for_status() | |
return response.json()["status"] | |
def wait_until_complete(batch_id): | |
print("Waiting for batch to complete...") | |
while True: | |
status = check_batch_status(batch_id) | |
print("Status:", status) | |
if status == "completed": | |
print("Batch completed!") | |
return | |
time.sleep(10) | |
# Step 4: Get items | |
def get_completed_items(batch_id): | |
headers = {"Authorization": f"Bearer {API_KEY}"} | |
response = requests.get(f"https://api.olostep.com/v1/batches/{batch_id}/items", headers=headers) | |
response.raise_for_status() | |
return response.json()["items"] | |
# Step 5: Retrieve content with format specified | |
def retrieve_content(retrieve_id): | |
url = "https://api.olostep.com/v1/retrieve" | |
headers = {"Authorization": f"Bearer {API_KEY}"} | |
params = { | |
"retrieve_id": retrieve_id, | |
"formats": ["markdown"] | |
} | |
response = requests.get(url, headers=headers, params=params) | |
response.raise_for_status() | |
return response.json() | |
# Step 6: Run end-to-end flow | |
if __name__ == "__main__": | |
print("Composing batch...") | |
items = compose_items_array() | |
print("Starting batch...") | |
batch_id = start_batch(items) | |
print("Batch ID:", batch_id) | |
print(f"You can check the status of the batch at: https://api.olostep.com/v1/batches/{batch_id}?token={API_KEY}") | |
wait_until_complete(batch_id) | |
print("Fetching completed items...") | |
completed_items = get_completed_items(batch_id) | |
print("Retrieving content...") | |
for item in completed_items: | |
retrieve_id = item["retrieve_id"] | |
print(f"\nRetrieving content for item {item['retrieve_id']}...") | |
content = retrieve_content(retrieve_id) | |
print(f"\n---\nURL: {item['url']}\nCustom ID: {item['custom_id']}\n") | |
print("Markdown:\n", content.get("markdown_content", "[No markdown found]")) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment