binux/google_image_search.py

## google_image_search.py
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8:
# Author: Binux<17175297.hk@gmail.com>
#         http://binux.me
# Created on <T_CREATE_DATE>

import sys
import re
import requests
from xml.sax.saxutils import unescape

all_size_re = re.compile(r'href="([^\"]+)">More sizes</a>')
image_re = re.compile(r'href="/imgres\?imgurl=(.*?)&amp;imgrefurl=')
image_size_re = re.compile(r'(\d+)&nbsp;&times;&nbsp;(\d+)')
image_size2_re = re.compile(r'(\d+)&nbsp;&#215;&nbsp;(\d+)</span>')
if __name__ == "__main__":
    file_path = sys.argv[1]
    r = requests.post("http://www.google.com/searchbyimage/upload",
            data = {
                'image_url': '',
                'btnG': 'Search',
                'image_content': '',
                'filename': '',
                'hl': 'en',
                'bih': 704,
                'biw': 1440,
                'num': 10,
                'safe': 'off'
                },
            files = {
                'encoded_image': ('1.jpg', open(file_path))
                },
            headers = {
                'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1'
                },
            )
    assert "simg" in r.text

    print "Pages that include matching images:"
    images = image_re.findall(r.text)
    images_size = image_size_re.findall(r.text)
    for each in zip(images, images_size):
        print unescape(each[0]), each[1]

    all_size = all_size_re.search(r.text)
    if all_size:
        all_size_url = unescape(all_size.group(1))
        print "All size:"
        print all_size_url
        r_all_size = requests.get("http://www.google.com"+all_size_url,
                headers = {
                    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1'
                    },
                )
        images = image_re.findall(r_all_size.text)
        images_size = image_size2_re.findall(r_all_size.text)
        for each in zip(images, images_size):
            print unescape(each[0]), each[1]
	#!/usr/bin/env python
	# -- encoding: utf-8 --
	# vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8:
	# Author: Binux<17175297.hk@gmail.com>
	# http://binux.me
	# Created on <T_CREATE_DATE>

	import sys
	import re
	import requests
	from xml.sax.saxutils import unescape

	all_size_re = re.compile(r'href="([^\"]+)">More sizes</a>')
	image_re = re.compile(r'href="/imgres\?imgurl=(.*?)&imgrefurl=')
	image_size_re = re.compile(r'(\d+) × (\d+)')
	image_size2_re = re.compile(r'(\d+) × (\d+)</span>')
	if __name__ == "__main__":
	file_path = sys.argv[1]
	r = requests.post("http://www.google.com/searchbyimage/upload",
	data = {
	'image_url': '',
	'btnG': 'Search',
	'image_content': '',
	'filename': '',
	'hl': 'en',
	'bih': 704,
	'biw': 1440,
	'num': 10,
	'safe': 'off'
	},
	files = {
	'encoded_image': ('1.jpg', open(file_path))
	},
	headers = {
	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1'
	},
	)
	assert "simg" in r.text

	print "Pages that include matching images:"
	images = image_re.findall(r.text)
	images_size = image_size_re.findall(r.text)
	for each in zip(images, images_size):
	print unescape(each[0]), each[1]

	all_size = all_size_re.search(r.text)
	if all_size:
	all_size_url = unescape(all_size.group(1))
	print "All size:"
	print all_size_url
	r_all_size = requests.get("http://www.google.com"+all_size_url,
	headers = {
	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1'
	},
	)
	images = image_re.findall(r_all_size.text)
	images_size = image_size2_re.findall(r_all_size.text)
	for each in zip(images, images_size):
	print unescape(each[0]), each[1]