Skip to content

Instantly share code, notes, and snippets.

@jonjesbuzz
Created April 18, 2017 00:26
Show Gist options
  • Save jonjesbuzz/95b9f6b6616cc1081414a7a7f6bc7349 to your computer and use it in GitHub Desktop.
Save jonjesbuzz/95b9f6b6616cc1081414a7a7f6bc7349 to your computer and use it in GitHub Desktop.
#####################
# Jonathan Jemson
# MIT License
# Dumps all our JS into files
####################
import psycopg2
import os
import datetime
conn = psycopg2.connect('dbname=webcrawler user=webcrawler')
cur = conn.cursor()
SCRAPES = (10)
MAPFILE = 'map.txt'
cur.execute('SELECT url, content FROM javascripts WHERE scrape_id IN (%s);', (SCRAPES,))
jscontents = cur.fetchall()
outdir = datetime.datetime.now().isoformat()
os.makedirs(outdir)
mapping = open(outdir + "/" + MAPFILE, 'w')
i = 1
for js in jscontents:
filename = outdir + "/" + str(i) + ".js"
jsfile = open(filename, 'w')
jsfile.write(js[1])
jsfile.close()
cur.execute("SELECT domain FROM website_javascripts WHERE url=%s;", (js[0],))
p = cur.fetchone()
if p is not None:
mapping.write(str(i) + ".js" + "\t" + js[0] + "\t" + p[0] + "\n")
i += 1
mapping.close()
cur.close()
conn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment