Created
August 13, 2012 07:46
-
-
Save binux/3337949 to your computer and use it in GitHub Desktop.
search for a larger size of image
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- encoding: utf-8 -*- | |
# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8: | |
# Author: Binux<17175297.hk@gmail.com> | |
# http://binux.me | |
# Created on <T_CREATE_DATE> | |
import sys | |
import re | |
import requests | |
from xml.sax.saxutils import unescape | |
all_size_re = re.compile(r'href="([^\"]+)">More sizes</a>') | |
image_re = re.compile(r'href="/imgres\?imgurl=(.*?)&imgrefurl=') | |
image_size_re = re.compile(r'(\d+) × (\d+)') | |
image_size2_re = re.compile(r'(\d+) × (\d+)</span>') | |
if __name__ == "__main__": | |
file_path = sys.argv[1] | |
r = requests.post("http://www.google.com/searchbyimage/upload", | |
data = { | |
'image_url': '', | |
'btnG': 'Search', | |
'image_content': '', | |
'filename': '', | |
'hl': 'en', | |
'bih': 704, | |
'biw': 1440, | |
'num': 10, | |
'safe': 'off' | |
}, | |
files = { | |
'encoded_image': ('1.jpg', open(file_path)) | |
}, | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1' | |
}, | |
) | |
assert "simg" in r.text | |
print "Pages that include matching images:" | |
images = image_re.findall(r.text) | |
images_size = image_size_re.findall(r.text) | |
for each in zip(images, images_size): | |
print unescape(each[0]), each[1] | |
all_size = all_size_re.search(r.text) | |
if all_size: | |
all_size_url = unescape(all_size.group(1)) | |
print "All size:" | |
print all_size_url | |
r_all_size = requests.get("http://www.google.com"+all_size_url, | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1' | |
}, | |
) | |
images = image_re.findall(r_all_size.text) | |
images_size = image_size2_re.findall(r_all_size.text) | |
for each in zip(images, images_size): | |
print unescape(each[0]), each[1] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment