Skip to content

Instantly share code, notes, and snippets.

@JGalego
Last active May 13, 2024 10:21
Show Gist options
  • Save JGalego/67a3db0005f5ed052224e549e8ee627c to your computer and use it in GitHub Desktop.
Save JGalego/67a3db0005f5ed052224e549e8ee627c to your computer and use it in GitHub Desktop.
Bedrock Scraper with ScrapeGraphAI 🕷️
r"""
Bedrock Scrapper with ScrapeGraphAI
/\ \ / /\
//\\ .. //\\
//\(( ))/\\
/ < `' > \
"""
import boto3
import streamlit as st
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.helpers import models_tokens
from langchain_core.exceptions import OutputParserException
st.set_page_config(page_title="Bedrock Scraper", page_icon="🕷️")
st.title("Bedrock Scraper 🕷️")
# 0a. Check supported models
bedrock = boto3.client("bedrock")
@st.cache_data
def bedrock_models(output_modality, inference_type="ON_DEMAND"):
"""Lists all Bedrock models for a given output modality and inference type"""
models = bedrock.list_foundation_models(
byOutputModality=output_modality,
byInferenceType=inference_type
)
return list(map(lambda model: model['modelId'], models['modelSummaries']))
@st.cache_data
def supported_bedrock_models():
"""Lists all Bedrock models supported by ScrapeGraphAI"""
return list(models_tokens['bedrock'].keys())
supported_models = supported_bedrock_models()
text_models = list(set(bedrock_models("TEXT")) & set(supported_models))
embed_models = list(set(bedrock_models("EMBEDDING")) & set(supported_models))
# 0b. Get user input
llm = st.selectbox(
label="Select model",
options=text_models
)
temperature = st.sidebar.slider(
label="> Temperature",
min_value=0.0,
max_value=1.0
)
model_tokens = st.sidebar.slider(
label="> Model Tokens",
min_value=0,
max_value=models_tokens['bedrock'][llm],
value=models_tokens['bedrock'][llm]
)
embedder = st.selectbox(
label="Select embedder",
options=embed_models
)
source = st.text_input(
label="Link to scrape"
)
prompt = st.text_area(
label="Write the prompt"
)
# 1. Define graph configuration
config = {
"llm": {
"model": f"bedrock/{llm}",
"temperature": temperature,
"format": "json"
},
"embeddings": {
"model": f"bedrock/{embedder}"
},
}
# 2. Create graph instance
graph = SmartScraperGraph(
prompt=prompt,
source=source,
config=config
)
# 3. Scrape away!
def run():
"""Execute graph and return result"""
st.session_state.output = None
try:
st.session_state.output = graph.run()
except OutputParserException as ex:
st.error(ex)
run = st.button(
label="Run",
on_click=run
)
if st.session_state.get('output', None):
st.json(st.session_state.output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment