Skip to content

Instantly share code, notes, and snippets.

@Phyks
Last active May 7, 2019
Embed
What would you like to do?
Fetch Google "Now" answers from the CLI. Usage: `python3 google_now.py QUERY`.
#!/usr/bin/env python3
import sys
import urllib.parse
import html2text
import scrapy
from scrapy.crawler import CrawlerProcess
results = []
class MyPipeline():
def process_item(self, item, spider):
results.append(dict(item))
class GoogleNowSpider(scrapy.Spider):
name = "googlenow"
def __init__(self, query="", *args, **kwargs):
super().__init__(*args, **kwargs)
self.start_urls = ["https://www.google.fr/search?q=%s" %
urllib.parse.quote(query)]
def parse(self, response):
if len(response.css("h2.r")) > 0:
# Google calc
return self.parse_calc(response)
elif len(response.css("td#rhs_block>*")) > 0:
return self.parse_col(response)
else:
return self.parse_rest(response)
def parse_calc(self, response):
return {
"type": "calc",
"result": response.css("h2.r::text")[0].extract()
}
def parse_col(self, response):
return {
"type": "col",
"result": html2text.html2text(
response.xpath("//td[@id='rhs_block']/ol/*[1]")[0].extract())
}
def parse_rest(self, response):
return {
"type": "other",
"result": html2text.html2text(
response.xpath("//div[@id='ires']/ol/*[1]")[0].extract())
}
if __name__ == "__main__":
if len(sys.argv) < 2:
sys.exit("Usage: %s QUERY" % sys.argv[0])
process = CrawlerProcess({
'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',
'ITEM_PIPELINES': {'__main__.MyPipeline': 1},
'LOG_LEVEL': 'ERROR'
})
process.crawl(GoogleNowSpider, query=(" ".join(sys.argv[1:])))
process.start()
print(results[0]["result"])
@EvanDotPro

This comment has been minimized.

Copy link

@EvanDotPro EvanDotPro commented May 7, 2019

Hey @Phyks, I was wondering if you'd be willing to update this gist with a header comment specifying which license you intend this code to be released under, if any. Thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment