Skip to content

Instantly share code, notes, and snippets.

@eupendra
Created December 11, 2020 11:23
Show Gist options
  • Save eupendra/e7f72904d9a561adb6e44fae16af64ec to your computer and use it in GitHub Desktop.
Save eupendra/e7f72904d9a561adb6e44fae16af64ec to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import scrapy
from scrapy.utils.response import open_in_browser
def get_headers(s, sep=': ', strip_cookie=True, strip_cl=True, strip_headers: list = []) -> dict():
d = dict()
for kv in s.split('\n'):
kv = kv.strip()
if kv and sep in kv:
v=''
k = kv.split(sep)[0]
if len(kv.split(sep)) == 1:
v = ''
else:
v = kv.split(sep)[1]
if v == '\'\'':
v =''
# v = kv.split(sep)[1]
if strip_cookie and k.lower() == 'cookie': continue
if strip_cl and k.lower() == 'content-length': continue
if k in strip_headers: continue
d[k] = v
return d
class WsSpider(scrapy.Spider):
name = 'ws'
def start_requests(self):
h = get_headers('''
accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
accept-encoding: gzip, deflate, br
accept-language: en-US,en;q=0.9
sec-fetch-dest: document
sec-fetch-mode: navigate
sec-fetch-site: none
upgrade-insecure-requests: 1
user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36
cookie: cookie_enabled=true; ID=MJHDC7TT2D70BWF; IDPWD=I55722139; COOKIE_ID=MJHDC7TT2D70BWF; visit=MJHDC7TT2D70BWF%7C20201211094710%7C%2Fbiz%2Fproducers%2Fcanada%7C%7Cend%20; user_status=A%7C; fflag=flag_store_manager%3A0%2Cend; _csrf=9iauzg0wH9qtx0aJNWZfufQyKrOGIxPo; _pxhd=996f3f5938fe57b8b25d1537c6cd623b8244d92d58ffa16bc4bcc83076669d8c:d683a651-3b95-11eb-9210-31942c6b15e0; _pxvid=d683a651-3b95-11eb-9210-31942c6b15e0; _gid=GA1.2.1112469370.1607680033; _ga_M0W3BEYMXL=GS1.1.1607680032.1.1.1607680234.0; _ga=GA1.2.1736177304.1607680033; __gads=ID=03e4454d1a5b70bb:T=1607680036:S=ALNI_MbKY38CaYSCQKSCLAGi_bDFdqg_FQ; _px3=ff767f5546c27c337924d2df0becd8fdfacf7a0daf4b3a3c711597f908d693b6:Kykj5xcibzza3GO1XxKPXIRm2kpApik8o2Rbao62N1hkOI9p1zeqNHVuWq+dbkGTzZk4B9JKS0iE32PEedrqMA==:1000:nJXdHhIDcTcyI0rMJsQi87T+ABu9tKZiCOVSrHzKQodKjvMo6jf9MkpN06BHLqX4xdvRM/Fe5waFUHzxPOZOBeN/pLWwBXnVWt69BLIq7bjj0RLeTeCCLsky4lxOzxgrSZz+zd/FALzdyrTvennzpAQTDVQBeAwzMLKqJK8x9p0=; _px2=eyJ1IjoiNTBhMzNhZTAtM2I5Ni0xMWViLTlkZmMtNGYyNTM0MDhlYzU1IiwidiI6ImQ2ODNhNjUxLTNiOTUtMTFlYi05MjEwLTMxOTQyYzZiMTVlMCIsInQiOjE2MDc2ODEyNTk1NDcsImgiOiIwOGViY2U1OWIwOWM0MzA2MjQyNzRiZTQ1NzA2NWJmZTdkNTdkODMyOWQ3ZDYxNDA4OGMyYTFlNzRkNTBjZDk1In0=; _pxde=5eef4b6944a88ff4b800d3fc7d412d77240d66691e17ac42b7b9026dc4a4ab0e:eyJ0aW1lc3RhbXAiOjE2MDc2ODA5NTk1NDcsImZfa2IiOjAsImlwY19pZCI6W119
''')
req = scrapy.Request('https://www.wine-searcher.com/biz/producers/canada',headers=h)
yield req
def parse(self, response):
open_in_browser(response)
@ZenonH
Copy link

ZenonH commented Jan 1, 2021

Hi; I'm new to scrrapy, but not to web scraping (VBA, python beautiful soup, etc). I tried running your code in windows vscode. I did not get any results nor error messages. In debug mode the code the never enters the functions in the wsSpider class. The only variable that gets set is name = "ws". Any help would be greatly appreciated.

@ZenonH
Copy link

ZenonH commented Jan 1, 2021

Hi;

I got the answer to my original question by watching another video of yours: One just needs to add the process statements to run the code.
Thanks again for posting your videos.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment