Created
March 15, 2024 02:26
-
-
Save markpbaggett/1bdf6c6c9fd82de2ae85bf898bfb95a2 to your computer and use it in GitHub Desktop.
Experiments with Claude 3 and Student Newspapers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import anthropic | |
import base64 | |
import httpx | |
import json | |
class Issue: | |
def __init__(self, iiif_manifest): | |
self.manifest = iiif_manifest | |
self.pages = self.__get_pages() | |
def __get_pages(self): | |
canvases = httpx.get(self.manifest).json() | |
pages = [] | |
for item in canvases['items']: | |
pid = item['items'][0]['id'].split('/')[-1].split(':')[1] | |
pages.append( | |
self.__switch_image_to_hash( | |
self.__convert( | |
f"https://digital.lib.utk.edu/collections/islandora/object/beacon%3A{pid}/datastream/JPG" | |
) | |
) | |
) | |
return pages | |
@staticmethod | |
def __convert(image_url): | |
return base64.b64encode( | |
httpx.get( | |
image_url | |
).content | |
).decode("utf-8") | |
@staticmethod | |
def __switch_image_to_hash(image): | |
return { | |
"type": "image", | |
"source": { | |
"type": "base64", | |
"media_type": "image/jpeg", | |
"data": image | |
} | |
} | |
class CostCalculator: | |
def __init__(self, input_tokens, output_tokens, model): | |
self.input_tokens = input_tokens | |
self.output_tokens = output_tokens | |
self.model = model | |
self.cost = self.__calculate_cost() | |
def __calculate_cost(self): | |
input_cost = self.input_tokens / 1000000 * .25 | |
output_cost = self.output_tokens / 1000000 * 1.25 | |
return f"Input cost: {input_cost}, Output cost: {output_cost}" | |
class ClaudeRequest: | |
def __init__(self, model, key, prompt, pages): | |
self.model = model | |
self.prompt = prompt | |
self.content = self.__add_prompt(pages, prompt) | |
self.client = self.__create_client(key) | |
self.output = self.__request() | |
self.cost = self.__determine_cost() | |
@staticmethod | |
def __create_client(apikey): | |
return anthropic.Anthropic( | |
api_key=apikey, | |
) | |
@staticmethod | |
def __add_prompt(all_pages, user_prompt): | |
all_pages.append( | |
{ | |
"type": "text", | |
"text": user_prompt | |
} | |
) | |
return all_pages | |
def __request(self): | |
response = self.client.messages.create( | |
model=self.model, | |
max_tokens=4096, | |
messages=[ | |
{ | |
"role": "user", | |
"content": self.content | |
} | |
], | |
) | |
return json.loads(response.json()) | |
def __determine_cost(self): | |
return CostCalculator( | |
input_tokens=self.output['usage']['input_tokens'], | |
output_tokens=self.output['usage']['output_tokens'], | |
model=self.model | |
).cost | |
def write_output(self, output_file, iiif_manifest): | |
with open(output_file, 'w') as output_file: | |
output_file.write(f"# Newspaper Issue [{iiif_manifest.split('/')[-2]}:{iiif_manifest.split('/')[-1]}]({iiif_manifest})\n\n") | |
output_file.write(f"## Cost:\n\nCosts to run: \n\n") | |
output_file.write(f"{self.cost}\n\n") | |
text_response = json.loads(self.output['content'][0]['text']) | |
output_file.write(f"## Articles and Ads Found:\n\n") | |
for item in text_response['articles']: | |
output_file.write(f"* {item}\n") | |
output_file.write(f"\n\n## Keywords:\n\n") | |
for item in text_response['keywords']: | |
output_file.write(f"* {item}\n") | |
if __name__ == "__main__": | |
manifest = "https://digital.lib.utk.edu/assemble/manifest/beacon/5521" | |
key = "my-api-key" | |
model = "claude-3-haiku-20240307" | |
prompt = "The combined images above create an issue of a newspaper. Give me the title of the articles available in the issue and 10 keywords that describe the contents of the issue overall. Please respond with the message as JSON with the following format: {\"articles\": [\"article\"], \"keywords\": [\"keyword1\", \"keyword2\", \"keyword3\"]}." | |
my_output_file = "harrison.md" | |
x = Issue(manifest) | |
content = x.pages | |
y = ClaudeRequest( | |
model=model, | |
key=key, | |
prompt=prompt, | |
pages=content | |
) | |
y.write_output(my_output_file, manifest) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment