Skip to content

Instantly share code, notes, and snippets.

@tkf
Created August 15, 2010 19:00
Show Gist options
  • Save tkf/525828 to your computer and use it in GitHub Desktop.
Save tkf/525828 to your computer and use it in GitHub Desktop.
import re
import json
import urllib
RE_FIRST_PARENTHESIS = re.compile('^([^\{]*){([^}]*)}(.*)$')
def parse_parenthesis(string):
"""
>>> parse_parenthesis('aaa {bbb,ccc} ddd')
['aaa bbb ddd', 'aaa ccc ddd']
>>> parse_parenthesis('aaa {bbb,ccc} {ddd,eee}')
['aaa bbb ddd', 'aaa bbb eee', 'aaa ccc ddd', 'aaa ccc eee']
>>> parse_parenthesis('no parenthesis')
['no parenthesis']
"""
match = RE_FIRST_PARENTHESIS.match(string)
if match:
(sa, sb, sc) = match.groups()
return [parsed
for w in sb.split(',')
for parsed in parse_parenthesis(sa + w + sc)]
else:
return [string]
def get_result_count(searchfor):
query = urllib.urlencode({'q': searchfor})
url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&%s' % query
search_response = urllib.urlopen(url)
search_results = search_response.read()
results = json.loads(search_results)
return results['responseData']['cursor']['estimatedResultCount']
def main(patterns):
parsed_patterns = [
'"%s"' % parsed for p in patterns for parsed in parse_parenthesis(p)]
result_count_list = [
get_result_count(parsed) for parsed in parsed_patterns]
count_width = max([len(cont) for cont in result_count_list])
for (count, parsed) in zip(result_count_list, parsed_patterns):
print count.rjust(count_width), parsed
if __name__ == '__main__':
import sys
main(sys.argv[1:])
$ python gstats.py 'search{, on, in} the web'
25600000 "search the web"
22600 "search on the web"
4270 "search in the web"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment