Skip to content

Instantly share code, notes, and snippets.

@BernardOng
Created August 22, 2016 00:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BernardOng/b893bad82ad94f7996ad7f380fa1083d to your computer and use it in GitHub Desktop.
Save BernardOng/b893bad82ad94f7996ad7f380fa1083d to your computer and use it in GitHub Desktop.
def retrieveSource(self, url):
# Retrieve raw html as beautiful soup form
headers = {
'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding' : 'gzip, deflate, sdch',
'Accept-Language' : 'en-US,en;q=0.8,ar;q=0.6',
'Cache-Control' : 'max-age=0',
'Connection' : 'keep-alive',
'Cookie' : """x-wl-uid=1sKNa/Tu+dnJiObdvPzWmOWvHoQHs7SbunE2P1wtUNyXZoFmxflkD74oIUtjg6je4M1DNJgid9D73eSQ/1szSzyViGKhQNfYhzyrIF+0sYuCypgc4oFkknUPA4xZ0Aeienl0XdGyC/GE=; bhqns=d0sdm59meaki4gsmq4gsdpv5k3; x-main="HSxQRguP7RaA0XNnOlvYoSu?jhNN?Gbu"; b2b-main=0; s_vnum=1857890451251%26vn%3D3; appstore-devportal-locale=en_US; aws_lang=en; aws-target-data=%7B%22support%22%3A%221%22%7D; s_dslv=1428249404729; aws-target-visitor-id=1428249404407-552911.26_29; s_sq=%5B%5BB%5D%5D; _mkto_trk=id':'810-GRW-452&token':'_mch-amazon.com-1428249580944-19910; __utma=194891197.1337326270.1428250257.1428250257.1428250257.1; __utmc=194891197; __utmz=194891197.1428250257.1.1.utmccn=(referral)|utmcsr=google.com.eg|utmcct=/|utmcmd=referral; pN=1; s_pers=%20s_fid%3D3E166C526619A613-1D8EA49490B2AAB8%7C1491407981003%3B%20s_dl%3D1%7C1428251381006%3B%20gpv_page%3DUS%253AAS%253APADS-how-it-works%7C1428251381011%3B%20s_ev15%3D%255B%255B%2527NSGoogle%2527%252C%25271428249581020%2527%255D%255D%7C1586102381020%3B%20s_invisit%3Dtrue%7C1428252056959%3B%20s_nr%3D1428250256962-Repeat%7C1436026256962%3B; s_sess=%20c_m%3Dwww.google.com.egNatural%2520Search%3B%20s_ppvl%3DUS%25253AAS%25253APADS-how-it-works%252C56%252C56%252C955%252C1920%252C955%252C1920%252C1080%252C1%252CL%3B%20s_cc%3Dtrue%3B%20s_sq%3D%3B%20s_ppv%3DUS%25253AAS%25253APADS-how-it-works%252C91%252C56%252C1555%252C1920%252C955%252C1920%252C1080%252C1%252CL%3B; session-token=opZ2DsH3n42Pz4spSbxaz9oQx5kGTKq1pj4n5WE+pGWvRBk6IyjvUQWw/GrDH2dGqbwR7d97ibcno5K4B1yUnpoVceB8aEDziGrWpU+I+tHBkWWy0ZMorJZowa3DMS40gfMTAc1Gd/o4ZpYOibC1SPQEzU3Eg2OslDRNyys+e90lJaothbV12MO62XiHFsnjjhCVFk8Ztr3hRgnl5oC1NvbZ9lZ7QjmhDP9wXgnYmG6wevsgqdukBbKoohHv1kGOWg60CWLBFS/RhVqukMAO5g==; skin=noskin; ubid-main=182-4576256-0011665; session-id-time=2082787201l; session-id=180-4378028-6113727; csm-hit=1RPD28BQJKGVJNHSZKFR+s-1RPD28BQJKGVJNHSZKFR|1428854614288""",
'Host' : 'www.amazon.com',
'User-Agent' : 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537a'
}
response = requests.get(url, headers=headers)
return BeautifulSoup(response.content, 'html.parser')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment