Created
August 19, 2024 11:16
-
-
Save dsottimano/d934ce5c780c03d009f51102318f3840 to your computer and use it in GitHub Desktop.
delete_me
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import base64 | |
import vertexai | |
from vertexai.generative_models import GenerativeModel, SafetySetting | |
import vertexai.preview.generative_models as generative_models | |
import pandas as pd | |
import os | |
import time | |
gcp_project_id = | |
def generate_and_save(): | |
vertexai.init(project=gcp_project_id, location="us-central1") | |
model = GenerativeModel( | |
"gemini-1.5-flash-001", | |
system_instruction=[textsi_1] | |
) | |
responses = model.generate_content( | |
''' | |
input: kobe crash | |
input: ja sex tape | |
input: wizards at lakers time | |
''', | |
generation_config=generation_config, | |
safety_settings=safety_settings, | |
stream=True, | |
) | |
# Open a file to append the JSON responses | |
with open("responses_test.json", "a") as file: | |
for response in responses: | |
print(response.text) | |
response_text = response.text | |
file.write(response_text + "\n") # Write each response to the file with a newline separator | |
# Set up the instruction | |
textsi_1 = """ | |
You are a professional sports analyst/expert in entity extraction. Your task is to parse search queries to identify and specific entities from diverse sources into a highly efficient & structured format, following detailed guidelines for clarity & completeness. | |
Given a list of inputs, extract the entities in this format: | |
If the content is sports-related: | |
{ | |
\"o\": kobe and lebron played for the lakers and tom brady for the patriots | |
\"s\": true, | |
\"p\": [\"lebron james\", \"tom brady\"], | |
\"t\": [\"LA Lakers\", \"New England Patriots\"], | |
\"l\": [\"NBA\", \"NFL\"], | |
\"c\": [\"Phil Jackson\", \"Bill Belichick\"], | |
\"r\": [\"three-pointer\", \"touchdown\"] | |
\"sp\": [\"basketball\", \"football\"] | |
} | |
If the content is not sports-related: | |
{ | |
\"s\": false | |
} | |
Legend: | |
o: original input | |
s: Is the content sports-related? (boolean) | |
p: Professional players (array of strings) | |
t: Professional teams (array of strings) | |
l: Sports leagues (array of strings) | |
c: Professional coaches (array of strings) | |
r: Sports terminology (array of strings) | |
sp: Sport name (array of strings) | |
Note: Each input will have "input:" and requires its own json output. Output the JSON for each input in the prompt. You must use the full name of the player, team or coach | |
Extra examples: | |
input: African Blazers For Ladies | |
output: {"o": "African Blazers For Ladies", "s": false} | |
input: ho you fat | |
output: { "o": ho you fat", "s": true, "p": ["Steeve Ho you Fat], "t": [], "l": [""], "c": [], "r": [""],sp:["basketball" } | |
""" | |
# Configuration for LLM generation | |
generation_config = { | |
"max_output_tokens": 8192, | |
"temperature": 0.5, | |
"top_p": 0.95, | |
"top_k": 40 | |
} | |
safety_settings = [ | |
SafetySetting( | |
category=SafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, | |
threshold=SafetySetting.HarmBlockThreshold.BLOCK_NONE | |
), | |
SafetySetting( | |
category=SafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT, | |
threshold=SafetySetting.HarmBlockThreshold.BLOCK_NONE | |
), | |
SafetySetting( | |
category=SafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH, | |
threshold=SafetySetting.HarmBlockThreshold.BLOCK_NONE | |
), | |
SafetySetting( | |
category=SafetySetting.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, | |
threshold=SafetySetting.HarmBlockThreshold.BLOCK_NONE | |
), | |
SafetySetting( | |
category=SafetySetting.HarmCategory.HARM_CATEGORY_UNSPECIFIED, | |
threshold=SafetySetting.HarmBlockThreshold.BLOCK_NONE | |
) | |
] | |
generate_and_save() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment