Skip to content

Instantly share code, notes, and snippets.

def process_item(self, item, spider):
adapter = ItemAdapter(item)
d = adapter.asdict()
if self.current_row_index == 0: # Write Header
fmt = self.heading_format
for column, value in enumerate(d.keys()):
self.worksheet.write(self.current_row_index, column, value, fmt)
data = d.values()
import datetime
import time
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from my_project.spiders.regular import SpiderClassName
def run_spider(spider):
@eupendra
eupendra / clean_up_mp3_files.py
Created June 24, 2021 08:26
I wrote this to rename my mp3 files to remove unwanted text like junk, backup etc. Used `renames` instead of `rename` so that new directories can be created as well.
strip_text=['junk','backup']
for fn in glob.glob("**/*.mp3", recursive=True):
f=os.path.abspath(fn)
try:
for t in strip_text:
if t in f:
new_f=f.replace(t,"").strip()
os.renames(f, new_f)
f=new_f
except:
import threading
import time
import requests
from bs4 import BeautifulSoup
import csv
from multiprocessing import Pool, cpu_count
from urllib.parse import urljoin
from concurrent.futures import ThreadPoolExecutor
import pandas as pd
from plates_scrapy import PlatesSpider
# -*- coding: utf-8 -*-
import scrapy
class AllSpider(scrapy.Spider):
name = "all"
start_urls = ["https://directory.ntschools.net/#/schools"]
headers = {
"Accept": "application/json",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.9,hi;q=0.8,lb;q=0.7",
@eupendra
eupendra / send_mail_scrapy.py
Created December 19, 2020 08:58
Send scrapy output as email attachment
import scrapy
from scrapy.crawler import CrawlerProcess
import config
class DetailsSpider(scrapy.Spider):
name = 'details'
start_urls = ['http://books.toscrape.com/catalogue/category/books/travel_2/index.html']
custom_settings = {
# -*- coding: utf-8 -*-
import scrapy
from scrapy.utils.response import open_in_browser
def get_headers(s, sep=': ', strip_cookie=True, strip_cl=True, strip_headers: list = []) -> dict():
d = dict()
for kv in s.split('\n'):
kv = kv.strip()
if kv and sep in kv:
v=''
k = kv.split(sep)[0]
def get_headers(s, sep=': ', strip_cookie=True, strip_cl=True, strip_headers: list = []) -> dict():
d = dict()
for kv in s.split('\n'):
kv = kv.strip()
if kv and sep in kv:
v=''
k = kv.split(sep)[0]
if len(kv.split(sep)) == 1:
v = ''
else:
# -*- coding: utf-8 -*-
import scrapy
from scrapy import FormRequest
class LoginSpider(scrapy.Spider):
name = 'login'
start_urls = ['http://quotes.toscrape.com/login']
def parse(self, response):
Public Sub Transpose()
'Declare Variables
Dim SourceTable As Table
Dim RowCount As Long, ColumnCount As Long
Dim TableRange As Range
Dim i As Long, j As Long 'Loop Counters
Dim RowDataAsArray() As String
Dim NewTable As Table
Dim SourceTableStyle As Style
Dim TableAsArray() As String 'Will contain the table text in memory