Skip to content

Instantly share code, notes, and snippets.

@shihyu
Created April 24, 2015 19:42
Show Gist options
  • Save shihyu/7b0e4bf8469a243ae2bb to your computer and use it in GitHub Desktop.
Save shihyu/7b0e4bf8469a243ae2bb to your computer and use it in GitHub Desktop.
import urllib2
# Specify the url
url = "https://www.google.com.tw/search?newwindow=1&hl=zh-TW&gl=tw&authuser=0&tbm=nws&q=3008&oq=3008&gs_l=serp.3...0.0.0.13549.0.0.0.0.0.0.0.0..0.0.msedr...0...1c..64.serp..0.0.0.ulDDMtr-drE"
headers = {'host' : 'www.google.com.tw',
'method' : 'GET',
'scheme' : 'https',
'version': 'HTTP/1.1',
'accept' : 'image/webp,*/*;q=0.8',
'accept-encoding' : 'gzip, deflate, sdch',
'accept-language' : 'zh-TW,zh;q=0.8,en-US;q=0.6,en;q=0.4',
'cookie' : '[890 bytes were stripped]',
'dnt' : '1',
'referer' : 'https://www.google.com.tw/',
'user-agent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.99 Safari/537.36'
}
# This packages the request (it doesn't make it)
#request = urllib2.Request(url)
request = urllib2.Request(url,"",headers)
# Sends the request and catches the response
response = urllib2.urlopen(request)
# Extracts the response
html = response.read().decode('big5')
# Print it out
print html
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment