Skip to content

Instantly share code, notes, and snippets.

@CodyKochmann
Last active May 16, 2021 19:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save CodyKochmann/69c0b56be888781f81daf1f6d2d498ea to your computer and use it in GitHub Desktop.
Save CodyKochmann/69c0b56be888781f81daf1f6d2d498ea to your computer and use it in GitHub Desktop.
sqlite scraper in python
import sqlite3, requests, sys
db = sqlite3.connect(':memory:')
cur = db.cursor()
def run(*sql):
print('running:', *sql, file=sys.stderr)
for i, row in enumerate(cur.execute(*sql)):
if i == 0:
print('result:', file=sys.stderr)
print(i, end='\t', file=sys.stderr)
print(*row)
def wget(url):
print('wget:', url, file=sys.stderr)
return requests.get(url).text
db.create_function('wget', 1, wget)
schema = [
''' CREATE TABLE scraper_targets (
url TEXT UNIQUE NOT NULL ON CONFLICT IGNORE,
content JSON
)
''',
''' CREATE TRIGGER scrape_new_target
AFTER INSERT ON scraper_targets
WHEN
new.content is null
BEGIN
UPDATE
scraper_targets
SET
content=wget(new.url)
WHERE
url == new.url;
END
'''
]
# load the schema
print('loading the db schema')
for i in schema:
run(i)
# test out the scraper bit
print('giving the scraper something to scrape')
run(
'INSERT INTO scraper_targets (url) VALUES (?)',
['https://mo-powah.baby/test.json']
)
# see the result in the db
print('dumping scraper_targets')
run('SELECT * FROM scraper_targets')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment