Skip to content

Instantly share code, notes, and snippets.

@jaklinger
Last active September 16, 2020 16:14
Show Gist options
  • Save jaklinger/1c9596fcbc5a2c266506fede645ab1f3 to your computer and use it in GitHub Desktop.
Save jaklinger/1c9596fcbc5a2c266506fede645ab1f3 to your computer and use it in GitHub Desktop.
Tells you whether a query is related to tech
import wikipedia
from functools import lru_cache
@lru_cache()
def get_page_cats(query):
try:
page = wikipedia.page(query, auto_suggest=False)
except (wikipedia.PageError, wikipedia.DisambiguationError):
return []
return page.categories
def is_tech(query, depth=0, max_depth=2):
if any(term in query.split() for term in ['containing', 'needing', 'need', 'dead', 'Wikipedia', 'unsourced']):
return False
#print('\t'*depth, query)
cats = get_page_cats(query)
if 'Technology-related lists' in cats:
return True
if any(keyword == term.lower()
for cat in cats for term in cat.split()
for keyword in ('computer', 'computing', 'software', 'hardware')):
return True
elif depth == max_depth:
return False
for cat in cats:
if is_tech(cat, depth=depth+1, max_depth=max_depth):
return True
return False
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment