Created
April 18, 2017 00:26
-
-
Save jonjesbuzz/95b9f6b6616cc1081414a7a7f6bc7349 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##################### | |
# Jonathan Jemson | |
# MIT License | |
# Dumps all our JS into files | |
#################### | |
import psycopg2 | |
import os | |
import datetime | |
conn = psycopg2.connect('dbname=webcrawler user=webcrawler') | |
cur = conn.cursor() | |
SCRAPES = (10) | |
MAPFILE = 'map.txt' | |
cur.execute('SELECT url, content FROM javascripts WHERE scrape_id IN (%s);', (SCRAPES,)) | |
jscontents = cur.fetchall() | |
outdir = datetime.datetime.now().isoformat() | |
os.makedirs(outdir) | |
mapping = open(outdir + "/" + MAPFILE, 'w') | |
i = 1 | |
for js in jscontents: | |
filename = outdir + "/" + str(i) + ".js" | |
jsfile = open(filename, 'w') | |
jsfile.write(js[1]) | |
jsfile.close() | |
cur.execute("SELECT domain FROM website_javascripts WHERE url=%s;", (js[0],)) | |
p = cur.fetchone() | |
if p is not None: | |
mapping.write(str(i) + ".js" + "\t" + js[0] + "\t" + p[0] + "\n") | |
i += 1 | |
mapping.close() | |
cur.close() | |
conn.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment