Skip to content

Instantly share code, notes, and snippets.

@bojanbabic
Last active August 20, 2023 22:55
Show Gist options
  • Save bojanbabic/9052c007a13b7164c6031bf56b8b58cf to your computer and use it in GitHub Desktop.
Save bojanbabic/9052c007a13b7164c6031bf56b8b58cf to your computer and use it in GitHub Desktop.
import openai
import os
from bs4 import BeautifulSoup
from bs4.element import Comment
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import re
openai.api_key = os.getenv('OPENAI_API_KEY')
url = "https://www.greatschools.org/california/san-jose/5442-Joaquin-Miller-Middle-School"
questions = [
"where is the location of school? ",
"how many students this school has?",
"what percentage of the students are asian?"
]
tags = ["p_div"]
question_str = " ".join(questions)
chrome_driver_path = "<path_to_chromium>"
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(service=Service(chrome_driver_path), options=options)
driver.get(url)
driver.implicitly_wait(5) # Let the JavaScript load
html = driver.page_source
soup = BeautifulSoup(html, "html.parser") # Get the page source and parse it
driver.close() # Close the driver
def tag_visible(element):
if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']:
return False
if isinstance(element, Comment):
return False
return True
def text_from_html(html):
texts = soup.findAll(text=True)
visible_texts = filter(tag_visible, texts)
return u" ".join(t.strip() for t in visible_texts)
def get_completion(prompt, model="gpt-3.5-turbo"): #This is a more one turn conversation
messages = [{"role": "user", "content": prompt}]
response = openai.ChatCompletion.create(
model=model,
messages=messages,
temperature=0, # this is the degree of randomness of the model's output
)
return response.choices[0].message["content"]
target_text = text_from_html(html)
prompt = f"""
The output from webscrap and subsequent regex processing is delimited in triple quotes.
Answer the following questions: {question_str}
### {target_text} ###.
"""
response = get_completion(prompt)
print(response)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment