Last active
August 29, 2015 14:16
-
-
Save jbobrow/824fb314839b29ff4f72 to your computer and use it in GitHub Desktop.
Script for scraping processing sourcecode from openProcessing.org
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Python script to scrape processing sketches from openProcessing.org | |
# by Jonathan Bobrow, 2015 | |
# | |
# This should only need to be run once to gather all of the processing sketches hosted | |
# | |
# The script pings the site and if there is a sketch by this number, it looks inside the | |
# sourceCode div and writes that code into a file, saving that file inside of a folder | |
# with the same name, so Processing will be happy when asking it to run this sketch | |
# | |
# EDIT THESE VALUES: | |
# start_sketch - what is the first sketch we should begin scraping from | |
# end_sketch - what is the last sketch we should scrape | |
# my_location - location to save the sketches in | |
# | |
# Notes for running Processing sketches from terminal | |
# in terminal, run the processing sketch from the folder like so | |
# processing-java --sketch=full_path/processing_XXXXX --output=processing_XXXXX --run | |
import os | |
import re | |
import urllib2 | |
from bs4 import BeautifulSoup as soup | |
from os.path import expanduser, join | |
# edit these values | |
start_sketch = 1 | |
end_sketch = 186337 | |
my_location = '~/MIT_local/Playful/EigenProcessing' | |
def downloadSourceCode(sketchNum): | |
print sketchNum | |
url = "http://www.openprocessing.org/sketch/%s/code" % sketchNum | |
web_soup = soup(urllib2.urlopen(url)) | |
# get main-content div | |
result = web_soup.find(name="div", attrs={'class': 'sourceCode'}) | |
if result: | |
print 'downloading %s' % sketchNum | |
sourceCode = result.text | |
#print sourceCode | |
name = "processing_%s" % sketchNum | |
path = expanduser('%s/%s/' % (my_location, name)) | |
filename = '%s.pde' % name | |
fullpath = join(path, filename) | |
if not os.path.exists(path): os.makedirs(path) | |
text_file = open(fullpath, "w") | |
text_file.write(sourceCode.encode('utf-8')) | |
text_file.close() | |
# iterate forwards through the sketches | |
for sketchNum in xrange(start_sketch, end_sketch): | |
downloadSourceCode(sketchNum) | |
# iterate backwards through the sketches | |
# for sketchNum in xrange(end_sketch, start_sketch, -1): | |
# downloadSourceCode(sketchNum) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
To run this backwards, change the for loop at the bottom to: