Skip to content

Instantly share code, notes, and snippets.

@shawngraham
Created January 29, 2024 21:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shawngraham/e26db0d7189df12bb0ffaf992308c2dd to your computer and use it in GitHub Desktop.
Save shawngraham/e26db0d7189df12bb0ffaf992308c2dd to your computer and use it in GitHub Desktop.
trying to build a retriever for gpt-researcher, to explore OpenContext
import requests
class OpenContextSearch():
"""
Open Context Search Retriever
"""
def __init__(self, query, content_type='subjects'):
"""
Initializes the OpenContextSearch object
Args:
query: The search query (topic of interest)
content_type: The type of content to search for ('subjects', 'media', 'projects', or 'everything')
"""
self.query = query
self.content_type = content_type
def search(self, max_results=5):
"""
Searches the OpenContext API for records related to the query
Args:
max_results: The maximum number of results to retrieve
Returns:
A list of dictionary objects containing information about each record
"""
print(f"Searching OpenContext for '{self.query}'...")
base_url = "https://opencontext.org/query/.json" # Ensure correct URL structure
params = {
'q': self.query,
'rows': max_results
}
if self.content_type in ['subjects', 'media', 'projects']:
params['type'] = self.content_type
headers = {'User-Agent': 'oc-api-client'}
resp = requests.get(base_url, params=params, headers=headers)
print(resp.url)
if resp.status_code != 200:
print(f"Failed to retrieve data: HTTP {resp.status_code}")
return None
try:
search_results = resp.json()
except Exception as e:
print(f"Failed to parse the response: {e}")
return None
# Here we access the 'features' key directly
results = search_results.get('features', [])
formatted_results = []
for result in results:
# Extract and format relevant data from each item
formatted_result = {
"label": result.get('label', 'No label provided'),
"id": result.get('id', 'No ID provided'),
"uri": result.get('properties', {}).get('uri', 'No URI provided'),
"category": result.get('properties', {}).get('item category', 'No category provided'),
# Add more fields as necessary
}
formatted_results.append(formatted_result)
return formatted_results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment