Last active
May 13, 2024 10:21
-
-
Save JGalego/67a3db0005f5ed052224e549e8ee627c to your computer and use it in GitHub Desktop.
Bedrock Scraper with ScrapeGraphAI 🕷️
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
r""" | |
Bedrock Scrapper with ScrapeGraphAI | |
/\ \ / /\ | |
//\\ .. //\\ | |
//\(( ))/\\ | |
/ < `' > \ | |
""" | |
import boto3 | |
import streamlit as st | |
from scrapegraphai.graphs import SmartScraperGraph | |
from scrapegraphai.helpers import models_tokens | |
from langchain_core.exceptions import OutputParserException | |
st.set_page_config(page_title="Bedrock Scraper", page_icon="🕷️") | |
st.title("Bedrock Scraper 🕷️") | |
# 0a. Check supported models | |
bedrock = boto3.client("bedrock") | |
@st.cache_data | |
def bedrock_models(output_modality, inference_type="ON_DEMAND"): | |
"""Lists all Bedrock models for a given output modality and inference type""" | |
models = bedrock.list_foundation_models( | |
byOutputModality=output_modality, | |
byInferenceType=inference_type | |
) | |
return list(map(lambda model: model['modelId'], models['modelSummaries'])) | |
@st.cache_data | |
def supported_bedrock_models(): | |
"""Lists all Bedrock models supported by ScrapeGraphAI""" | |
return list(models_tokens['bedrock'].keys()) | |
supported_models = supported_bedrock_models() | |
text_models = list(set(bedrock_models("TEXT")) & set(supported_models)) | |
embed_models = list(set(bedrock_models("EMBEDDING")) & set(supported_models)) | |
# 0b. Get user input | |
llm = st.selectbox( | |
label="Select model", | |
options=text_models | |
) | |
temperature = st.sidebar.slider( | |
label="> Temperature", | |
min_value=0.0, | |
max_value=1.0 | |
) | |
model_tokens = st.sidebar.slider( | |
label="> Model Tokens", | |
min_value=0, | |
max_value=models_tokens['bedrock'][llm], | |
value=models_tokens['bedrock'][llm] | |
) | |
embedder = st.selectbox( | |
label="Select embedder", | |
options=embed_models | |
) | |
source = st.text_input( | |
label="Link to scrape" | |
) | |
prompt = st.text_area( | |
label="Write the prompt" | |
) | |
# 1. Define graph configuration | |
config = { | |
"llm": { | |
"model": f"bedrock/{llm}", | |
"temperature": temperature, | |
"format": "json" | |
}, | |
"embeddings": { | |
"model": f"bedrock/{embedder}" | |
}, | |
} | |
# 2. Create graph instance | |
graph = SmartScraperGraph( | |
prompt=prompt, | |
source=source, | |
config=config | |
) | |
# 3. Scrape away! | |
def run(): | |
"""Execute graph and return result""" | |
st.session_state.output = None | |
try: | |
st.session_state.output = graph.run() | |
except OutputParserException as ex: | |
st.error(ex) | |
run = st.button( | |
label="Run", | |
on_click=run | |
) | |
if st.session_state.get('output', None): | |
st.json(st.session_state.output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment