Skip to content

Instantly share code, notes, and snippets.

@agriffis
Created March 1, 2012 17:43
Show Gist options
  • Save agriffis/1951623 to your computer and use it in GitHub Desktop.
Save agriffis/1951623 to your computer and use it in GitHub Desktop.
pysearch
import operator, os, re, urllib
from google.appengine.ext import webapp
from google.appengine.ext.webapp import template
__all__ = ['PySearch']
anchors, lower_anchors = None, None
def get_anchors():
global anchors, lower_anchors
if anchors is None:
anchors, lower_anchors = {}, {}
genindex = os.path.join(
os.path.dirname(__file__), 'genindex-all.html')
genpatt = re.compile(r'<dt><a href="([^"]+)">([^<]*)')
f = open(genindex)
for match in re.finditer(genpatt, f.read()):
ank = match.group(1)
anchors[ank] = ank
lower_anchors[ank.lower()] = ank
return anchors, lower_anchors
def exact_match(q, anchors):
for tag in ['#%s' % q, # anchor match (includes builtins)
'#module-%s' % q, # module match
'.%s' % q, # attribute match
'-%s' % q, # something match
]:
for k, v in anchors.items():
if k.endswith(tag):
return v
def inner_match(q, anchors):
for patt in [r'\b%s\b' % q, # word match
r'\b%s' % q, # start of word match
r'%s\b' % q, # end of word match
]:
reg = re.compile(patt)
for k, v in anchors.items():
if re.search(reg, k):
return v
class PySearch(webapp.RequestHandler):
def get(self):
if self.request.get('q'):
return self.post()
path = os.path.join(os.path.dirname(__file__), 'pysearch.html')
self.response.out.write(template.render(path, {}))
def post(self):
q = self.request.get('q')
if not q:
return self.redirect(request.path)
# Get our anchor dicts.
anchors, lower_anchors = get_anchors()
# Filter down to candidates.
cands = dict((k, v) for k, v in anchors.items() if q in k)
if q.islower():
lower_cands = dict((k, v) for k, v in lower_anchors.items()
if q in k)
else:
lower_cands = {}
# Look for an exact match first in both lists.
for c in [cands, lower_cands]:
found = exact_match(q, c)
if found:
return self.redirect_docs(found)
# Look for inner matches in both lists.
for c in [cands, lower_cands]:
found = inner_match(q, c)
if found:
return self.redirect_docs(found)
# What did you search for? Sheesh
if c in [cands, lower_cands]:
found = sorted(c.values())[:1]
if found:
return self.redirect_docs(found[0])
# I give up
return self.redirect_google(q)
def redirect_docs(self, path):
return self.redirect(os.path.join(
"http://docs.python.org/", path))
def redirect_google(self, q):
query = '+'.join(urllib.quote_plus(x) for x in [
'site:docs.python.org',
'( inurl:library/ OR inurl:reference/ )',
q,
])
return self.redirect('http://google.com/search?q=%s' % query)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment