Last active
May 16, 2021 19:50
-
-
Save CodyKochmann/69c0b56be888781f81daf1f6d2d498ea to your computer and use it in GitHub Desktop.
sqlite scraper in python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sqlite3, requests, sys | |
db = sqlite3.connect(':memory:') | |
cur = db.cursor() | |
def run(*sql): | |
print('running:', *sql, file=sys.stderr) | |
for i, row in enumerate(cur.execute(*sql)): | |
if i == 0: | |
print('result:', file=sys.stderr) | |
print(i, end='\t', file=sys.stderr) | |
print(*row) | |
def wget(url): | |
print('wget:', url, file=sys.stderr) | |
return requests.get(url).text | |
db.create_function('wget', 1, wget) | |
schema = [ | |
''' CREATE TABLE scraper_targets ( | |
url TEXT UNIQUE NOT NULL ON CONFLICT IGNORE, | |
content JSON | |
) | |
''', | |
''' CREATE TRIGGER scrape_new_target | |
AFTER INSERT ON scraper_targets | |
WHEN | |
new.content is null | |
BEGIN | |
UPDATE | |
scraper_targets | |
SET | |
content=wget(new.url) | |
WHERE | |
url == new.url; | |
END | |
''' | |
] | |
# load the schema | |
print('loading the db schema') | |
for i in schema: | |
run(i) | |
# test out the scraper bit | |
print('giving the scraper something to scrape') | |
run( | |
'INSERT INTO scraper_targets (url) VALUES (?)', | |
['https://mo-powah.baby/test.json'] | |
) | |
# see the result in the db | |
print('dumping scraper_targets') | |
run('SELECT * FROM scraper_targets') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment