Created
March 1, 2012 17:43
-
-
Save agriffis/1951623 to your computer and use it in GitHub Desktop.
pysearch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import operator, os, re, urllib | |
from google.appengine.ext import webapp | |
from google.appengine.ext.webapp import template | |
__all__ = ['PySearch'] | |
anchors, lower_anchors = None, None | |
def get_anchors(): | |
global anchors, lower_anchors | |
if anchors is None: | |
anchors, lower_anchors = {}, {} | |
genindex = os.path.join( | |
os.path.dirname(__file__), 'genindex-all.html') | |
genpatt = re.compile(r'<dt><a href="([^"]+)">([^<]*)') | |
f = open(genindex) | |
for match in re.finditer(genpatt, f.read()): | |
ank = match.group(1) | |
anchors[ank] = ank | |
lower_anchors[ank.lower()] = ank | |
return anchors, lower_anchors | |
def exact_match(q, anchors): | |
for tag in ['#%s' % q, # anchor match (includes builtins) | |
'#module-%s' % q, # module match | |
'.%s' % q, # attribute match | |
'-%s' % q, # something match | |
]: | |
for k, v in anchors.items(): | |
if k.endswith(tag): | |
return v | |
def inner_match(q, anchors): | |
for patt in [r'\b%s\b' % q, # word match | |
r'\b%s' % q, # start of word match | |
r'%s\b' % q, # end of word match | |
]: | |
reg = re.compile(patt) | |
for k, v in anchors.items(): | |
if re.search(reg, k): | |
return v | |
class PySearch(webapp.RequestHandler): | |
def get(self): | |
if self.request.get('q'): | |
return self.post() | |
path = os.path.join(os.path.dirname(__file__), 'pysearch.html') | |
self.response.out.write(template.render(path, {})) | |
def post(self): | |
q = self.request.get('q') | |
if not q: | |
return self.redirect(request.path) | |
# Get our anchor dicts. | |
anchors, lower_anchors = get_anchors() | |
# Filter down to candidates. | |
cands = dict((k, v) for k, v in anchors.items() if q in k) | |
if q.islower(): | |
lower_cands = dict((k, v) for k, v in lower_anchors.items() | |
if q in k) | |
else: | |
lower_cands = {} | |
# Look for an exact match first in both lists. | |
for c in [cands, lower_cands]: | |
found = exact_match(q, c) | |
if found: | |
return self.redirect_docs(found) | |
# Look for inner matches in both lists. | |
for c in [cands, lower_cands]: | |
found = inner_match(q, c) | |
if found: | |
return self.redirect_docs(found) | |
# What did you search for? Sheesh | |
if c in [cands, lower_cands]: | |
found = sorted(c.values())[:1] | |
if found: | |
return self.redirect_docs(found[0]) | |
# I give up | |
return self.redirect_google(q) | |
def redirect_docs(self, path): | |
return self.redirect(os.path.join( | |
"http://docs.python.org/", path)) | |
def redirect_google(self, q): | |
query = '+'.join(urllib.quote_plus(x) for x in [ | |
'site:docs.python.org', | |
'( inurl:library/ OR inurl:reference/ )', | |
q, | |
]) | |
return self.redirect('http://google.com/search?q=%s' % query) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment