Skip to content

Instantly share code, notes, and snippets.

@markpbaggett
Created March 15, 2024 02:26
Show Gist options
  • Save markpbaggett/1bdf6c6c9fd82de2ae85bf898bfb95a2 to your computer and use it in GitHub Desktop.
Save markpbaggett/1bdf6c6c9fd82de2ae85bf898bfb95a2 to your computer and use it in GitHub Desktop.
Experiments with Claude 3 and Student Newspapers
import anthropic
import base64
import httpx
import json
class Issue:
def __init__(self, iiif_manifest):
self.manifest = iiif_manifest
self.pages = self.__get_pages()
def __get_pages(self):
canvases = httpx.get(self.manifest).json()
pages = []
for item in canvases['items']:
pid = item['items'][0]['id'].split('/')[-1].split(':')[1]
pages.append(
self.__switch_image_to_hash(
self.__convert(
f"https://digital.lib.utk.edu/collections/islandora/object/beacon%3A{pid}/datastream/JPG"
)
)
)
return pages
@staticmethod
def __convert(image_url):
return base64.b64encode(
httpx.get(
image_url
).content
).decode("utf-8")
@staticmethod
def __switch_image_to_hash(image):
return {
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": image
}
}
class CostCalculator:
def __init__(self, input_tokens, output_tokens, model):
self.input_tokens = input_tokens
self.output_tokens = output_tokens
self.model = model
self.cost = self.__calculate_cost()
def __calculate_cost(self):
input_cost = self.input_tokens / 1000000 * .25
output_cost = self.output_tokens / 1000000 * 1.25
return f"Input cost: {input_cost}, Output cost: {output_cost}"
class ClaudeRequest:
def __init__(self, model, key, prompt, pages):
self.model = model
self.prompt = prompt
self.content = self.__add_prompt(pages, prompt)
self.client = self.__create_client(key)
self.output = self.__request()
self.cost = self.__determine_cost()
@staticmethod
def __create_client(apikey):
return anthropic.Anthropic(
api_key=apikey,
)
@staticmethod
def __add_prompt(all_pages, user_prompt):
all_pages.append(
{
"type": "text",
"text": user_prompt
}
)
return all_pages
def __request(self):
response = self.client.messages.create(
model=self.model,
max_tokens=4096,
messages=[
{
"role": "user",
"content": self.content
}
],
)
return json.loads(response.json())
def __determine_cost(self):
return CostCalculator(
input_tokens=self.output['usage']['input_tokens'],
output_tokens=self.output['usage']['output_tokens'],
model=self.model
).cost
def write_output(self, output_file, iiif_manifest):
with open(output_file, 'w') as output_file:
output_file.write(f"# Newspaper Issue [{iiif_manifest.split('/')[-2]}:{iiif_manifest.split('/')[-1]}]({iiif_manifest})\n\n")
output_file.write(f"## Cost:\n\nCosts to run: \n\n")
output_file.write(f"{self.cost}\n\n")
text_response = json.loads(self.output['content'][0]['text'])
output_file.write(f"## Articles and Ads Found:\n\n")
for item in text_response['articles']:
output_file.write(f"* {item}\n")
output_file.write(f"\n\n## Keywords:\n\n")
for item in text_response['keywords']:
output_file.write(f"* {item}\n")
if __name__ == "__main__":
manifest = "https://digital.lib.utk.edu/assemble/manifest/beacon/5521"
key = "my-api-key"
model = "claude-3-haiku-20240307"
prompt = "The combined images above create an issue of a newspaper. Give me the title of the articles available in the issue and 10 keywords that describe the contents of the issue overall. Please respond with the message as JSON with the following format: {\"articles\": [\"article\"], \"keywords\": [\"keyword1\", \"keyword2\", \"keyword3\"]}."
my_output_file = "harrison.md"
x = Issue(manifest)
content = x.pages
y = ClaudeRequest(
model=model,
key=key,
prompt=prompt,
pages=content
)
y.write_output(my_output_file, manifest)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment