Skip to content

Instantly share code, notes, and snippets.

def get_headers(s, sep=': ', strip_cookie=True,strip_cl=True, strip_headers:list=[]) -> dict():
l = s.split('\n')
d = dict()
for kv in l:
if kv:
k = kv.split(sep)[0]
v = kv.split(sep)[1]
if strip_cookie and k == 'cookie': continue
if strip_cl and k == 'content-length': continue
if k in strip_headers: continue
# -*- coding: utf-8 -*-
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
class h1_tagsSpider(CrawlSpider):
name = 'h1_tags'
allowed_domains = ['books.toscrape.com']
start_urls = ['http://books.toscrape.com/']
import scrapy
base = 'http://quotes.toscrape.com/api/quotes?page={}'
class ScrollSpider(scrapy.Spider):
name = 'scroll'
start_urls = [base.format(1)]
def parse(self, response):
data = response.json() #scrapy 2.2
import scrapy
from .config import API
from scraper_api import ScraperAPIClient
client = ScraperAPIClient(API)
class ApiSpider(scrapy.Spider):
name = 'api'
def start_requests(self):
from selenium.webdriver import Chrome, ChromeOptions
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv
options = ChromeOptions()
options.headless = True
driver = Chrome(executable_path=r"c:\Program Files (x86)\Chromedriver\chromedriver.exe",
options=options)
import scrapy
import pandas as pd
base_url = 'https://stackoverflow.com/questions/tagged/{}'
def read_csv():
df = pd.read_csv('so_tags.csv')
return df['Tags'].values.tolist()
Public Sub Transpose()
'Declare Variables
Dim SourceTable As Table
Dim RowCount As Long, ColumnCount As Long
Dim TableRange As Range
Dim i As Long, j As Long 'Loop Counters
Dim RowDataAsArray() As String
Dim NewTable As Table
Dim SourceTableStyle As Style
Dim TableAsArray() As String 'Will contain the table text in memory
# -*- coding: utf-8 -*-
import scrapy
from scrapy import FormRequest
class LoginSpider(scrapy.Spider):
name = 'login'
start_urls = ['http://quotes.toscrape.com/login']
def parse(self, response):
def get_headers(s, sep=': ', strip_cookie=True, strip_cl=True, strip_headers: list = []) -> dict():
d = dict()
for kv in s.split('\n'):
kv = kv.strip()
if kv and sep in kv:
v=''
k = kv.split(sep)[0]
if len(kv.split(sep)) == 1:
v = ''
else:
# -*- coding: utf-8 -*-
import scrapy
from scrapy.utils.response import open_in_browser
def get_headers(s, sep=': ', strip_cookie=True, strip_cl=True, strip_headers: list = []) -> dict():
d = dict()
for kv in s.split('\n'):
kv = kv.strip()
if kv and sep in kv:
v=''
k = kv.split(sep)[0]