|
from typing import Mapping, Iterable |
|
from io import BytesIO |
|
import google |
|
from dotenv import load_dotenv |
|
import google.generativeai as genai |
|
import os |
|
import base64 |
|
import google.ai.generativelanguage as glm |
|
from google.ai.generativelanguage_v1beta import HarmCategory |
|
from google.generativeai import GenerationConfig |
|
from google.generativeai.types import HarmBlockThreshold |
|
from google.generativeai.types.safety_types import LooseSafetySettingDict |
|
import requests |
|
import re |
|
|
|
load_dotenv() |
|
|
|
|
|
def is_valid_url(url): |
|
""" |
|
Check if a given URL is valid. |
|
|
|
Args: |
|
url (str): The URL to be validated. |
|
|
|
Returns: |
|
bool: True if the URL is valid, False otherwise. |
|
""" |
|
# Regular expression pattern for a simple URL |
|
url_pattern = re.compile(r"^(http|https)://[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)+([/?].*)?$") |
|
|
|
# Use the match method to check if the URL matches the pattern |
|
# Return True if it matches, False otherwise |
|
return bool(url_pattern.match(url)) |
|
|
|
|
|
def read_url_page(url): |
|
""" |
|
Retrieve the content of a web page from a given URL. |
|
|
|
Args: |
|
url (str): The URL of the web page to retrieve. |
|
|
|
Returns: |
|
str or None: The text content of the web page if retrieval was successful, |
|
None if an error occurred. |
|
""" |
|
try: |
|
# Send a GET request to the URL |
|
response = requests.get(url) |
|
|
|
# Check if the request was successful (status code 200) |
|
if response.status_code == 200: |
|
# Return the text content of the page |
|
return response.text |
|
else: |
|
# Print an error message if the request was not successful |
|
print("Failed to retrieve page. Status code:", response.status_code) |
|
return None |
|
except requests.RequestException as e: |
|
# Handle any request exceptions (e.g., network errors) |
|
print("Error fetching page:", e) |
|
return None |
|
|
|
|
|
def read_file(file_path, encodings=['utf-8', 'latin-1', 'cp1252']): |
|
""" |
|
Read the content of a file using various encodings until successful decoding. |
|
|
|
Args: |
|
file_path (str): The path to the file to be read. |
|
encodings (list, optional): List of encodings to try for decoding. |
|
Defaults to ['utf-8', 'latin-1', 'cp1252']. |
|
|
|
Returns: |
|
str: The content of the file if decoding was successful. |
|
|
|
Raises: |
|
UnicodeDecodeError: If the file cannot be decoded using any of the specified encodings. |
|
""" |
|
# Iterate through each encoding in the list of encodings |
|
for encoding in encodings: |
|
try: |
|
# Open the file with the specified encoding |
|
with open(file_path, "r", encoding=encoding) as file: |
|
# Read the content of the file |
|
content = file.read() |
|
# Return the content if decoding was successful |
|
return content |
|
except UnicodeDecodeError: |
|
# If decoding fails with the current encoding, continue to the next encoding |
|
pass |
|
# If none of the encodings were successful, raise an exception |
|
raise UnicodeDecodeError("Unable to decode the file using the specified encodings.") |
|
|
|
|
|
def built_blob_part(data): |
|
""" |
|
Create a MIME part containing binary data encoded as Base64. |
|
|
|
Args: |
|
data (bytes): The binary data to be encoded. |
|
|
|
Returns: |
|
glm.Part: A MIME part containing the Base64-encoded data. |
|
""" |
|
# Create a BytesIO object to handle the binary data |
|
data = BytesIO(data) |
|
|
|
# Get the bytes from the BytesIO object |
|
data_bytes = data.getvalue() |
|
|
|
# Encode the binary data as Base64 |
|
base64_bytes = base64.b64encode(data_bytes).decode("utf-8") |
|
|
|
# Create a Blob with the Base64-encoded data |
|
blob = glm.Blob(mime_type="text/basic", data=base64_bytes) |
|
|
|
# Create a Part with the Blob as inline data |
|
return glm.Part(inline_data=blob) |
|
|
|
|
|
def get_document(path_or_url): |
|
""" |
|
Get the content of a document, either from a URL, a file, or as plain text. |
|
|
|
Args: |
|
path_or_url (str): The path to a local file, a URL, or plain text. |
|
|
|
Returns: |
|
glm.Part: A MIME part containing the content of the document. |
|
""" |
|
# Check if the input is a valid URL |
|
if is_valid_url(path_or_url): |
|
# If it's a URL, read the web page |
|
data = read_url_page(path_or_url) |
|
# Encode the data as a MIME part and return |
|
return built_blob_part(data.encode()) |
|
# Check if the input is a valid file path |
|
elif os.path.exists(path_or_url): |
|
# If it's a file, read the file content |
|
with open(path_or_url, 'rb') as file: |
|
file_content = file.read() |
|
# Encode the file content as a MIME part and return |
|
return built_blob_part(file_content) |
|
else: |
|
# If it's neither a URL nor a file, treat it as plain text |
|
# Encode the plain text as a MIME part and return |
|
return built_blob_part(path_or_url.encode()) |
|
|
|
|
|
class GeminiService: |
|
generation_config: GenerationConfig = { |
|
'temperature': 0.9, |
|
'top_p': 1, |
|
'top_k': 40, |
|
'max_output_tokens': 2048, |
|
'stop_sequences': [], |
|
} |
|
|
|
safety_settings: Mapping[str | int | HarmCategory, str | int | HarmBlockThreshold] | Iterable[ |
|
LooseSafetySettingDict] = [{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"}, |
|
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"}, |
|
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"}, |
|
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}] |
|
|
|
def __init__(self, system_instruction): |
|
genai.configure(api_key=os.getenv("API_KEY")) |
|
self.model = genai.GenerativeModel(model_name=os.getenv("MODEL_NAME_LATEST"), |
|
system_instruction=system_instruction, |
|
generation_config=self.generation_config, |
|
safety_settings=self.safety_settings) |
|
|
|
def single_completion(self, message: str, |
|
uploaded_document: google.ai.generativelanguage_v1beta.types.content.Part = None) -> str: |
|
try: |
|
content = message |
|
if uploaded_document: |
|
# this will add a prompt directly related to file data include in the prompt |
|
content = [ |
|
uploaded_document, |
|
message, |
|
] |
|
|
|
response = self.model.generate_content(content) |
|
return response.text |
|
except google.api_core.exceptions.FailedPrecondition as e: |
|
if "User location" in str(e): |
|
return "Error: User location not accepted" |
|
return str(e) |
|
|
|
|
|
class GeminiSingleCompletion: |
|
def __init__(self, system_instruction) -> None: |
|
self.service = GeminiService(system_instruction) |
|
|
|
def single_answer(self, message: str, |
|
uploaded_document: google.ai.generativelanguage_v1beta.types.content.Part = None) -> str: |
|
return self.service.single_completion(message=message, uploaded_document=uploaded_document) |
|
|
|
|
|
def main(): |
|
gemini_service = GeminiSingleCompletion("You are 'Corporate Synthesizer' AI: Your expert alchemist of information. " |
|
"Trained to distill complex texts into concise summaries for management. " |
|
"Analyzes, identifies key insights, and crafts actionable summaries. " |
|
"Ensures executives grasp crucial details swiftly. Your strategic partner " |
|
"in navigating the sea of data for informed decisions." |
|
"I will give you a text and your job is to digest it down to at most a few" |
|
"paragraphs. ") |
|
|
|
# Loading Inline. |
|
# load page |
|
file_path = "test.txt" |
|
url = "https://en.wikipedia.org/wiki/Pacific_parrotlet" |
|
|
|
doc = get_document(file_path) |
|
answer = gemini_service.single_answer(f"Give me five bullet point facts", doc) |
|
print(answer) |
|
|
|
doc = get_document(url) |
|
answer = gemini_service.single_answer(f"Give me five bullet point facts", doc) |
|
print(answer) |
|
|
|
doc = get_document("Just some text") |
|
answer = gemini_service.single_answer(f"Give me five bullet point facts", doc) |
|
print(answer) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |