N0NamedGuy/ld48parse.py

## ld48parse.py
# By David Serrano aka N0_Named_Guy (@n0namedguy)
# Downloads all (Linux) games to the current folder, sorted by user name
# Change the code, to change your filtering needs

# download it from http://www.crummy.com/software/BeautifulSoup/
from BeautifulSoup import BeautifulSoup

from StringIO import StringIO
import urllib, urllib2
import re,htmlentitydefs
import json
import pickle
import os,sys
import shutil

def unescape(text):
    """Removes HTML or XML character references
    and entities from a text string.
    from Fredrik Lundh
    http://effbot.org/zone/re-sub.htm#unescape-html
    """

    def fixup(m):
        text = m.group(0)
        if text[:2] == "&#":
        # character reference
            try:
                if text[:3] == "&#x":
                    return unichr(int(text[3:-1], 16))
                else:
                    return unichr(int(text[2:-1]))
            except ValueError:
                pass
        else:
            # named entity
            try:
                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
            except KeyError:
                pass

        return text # leave as is

    return re.sub("&#?\w+;", fixup, text)

def xmkdir(d):
    try:
        os.mkdir(d)
    except:
        pass

def xmv(src, dest):
    try:
        shutil.move(src, dest)
    except:
        pass

base_url = "http://www.ludumdare.com/compo/ludum-dare-21/"

def entry_str(entry):
    return "%s (by %s) [V: %d C: %d] - %s" % (entry['name'],
        entry['user'],
        entry['votes'],
        entry['coolness'],
        base_url + entry['rate'])

def download(url, path):
    ret = urllib.urlretrieve(url)
    print "moving %s to %s" % (ret[0], path)
    xmv(ret[0], path)


def parse_entry(entry):

    for plat in entry['download']:
        if (plat.lower().find('linux') >= 0):
            print "[%s] %s" % (plat, entry_str(entry))
            path = entry['user'] + os.sep + plat
            xmkdir(entry['user'])
            xmkdir(path)
            try:
                download(entry['download'][plat], path)
            except (e):
                print e


# Make the request
url = base_url + "?action=misc_links"
response = urllib2.urlopen(url)
page = response.read()

# Get the soup object
soup = BeautifulSoup(page)
body = soup.html.body

div = body.find('div', {'id' : 'compo2'})
table = div.find('table')

trows = table.findAll('tr');

# Each entry is a dictionay with the following keys:
# 'name' - The entry's name
# 'donwload' - dictionary where key represents platform, and value download link

count = 0
for trow in trows[1:]:
    fields = trow.findAll('td')

    aelem = fields[0].find('a');
    entry = {}
    rate = str(aelem['href'])
    entry['rate'] = rate
    entry['name'] = str(unescape(aelem.string))
    entry['user'] = str(fields[1].string)
    entry['download'] = {}

    linkselem = fields[2].findAll('a')
    for link in linkselem:
        entry['download'][link.string] = str(link['href'])

    entry['votes'] = int(str(fields[3].string))
    entry['coolness'] = int(str(fields[4].string))

    count += 1
    print "%d out of 599" % (count)
    parse_entry(entry);
	# By David Serrano aka N0_Named_Guy (@n0namedguy)
	# Downloads all (Linux) games to the current folder, sorted by user name
	# Change the code, to change your filtering needs

	# download it from http://www.crummy.com/software/BeautifulSoup/
	from BeautifulSoup import BeautifulSoup

	from StringIO import StringIO
	import urllib, urllib2
	import re,htmlentitydefs
	import json
	import pickle
	import os,sys
	import shutil

	def unescape(text):
	"""Removes HTML or XML character references
	and entities from a text string.
	from Fredrik Lundh
	http://effbot.org/zone/re-sub.htm#unescape-html
	"""

	def fixup(m):
	text = m.group(0)
	if text[:2] == "&#":
	# character reference
	try:
	if text[:3] == "&#x":
	return unichr(int(text[3:-1], 16))
	else:
	return unichr(int(text[2:-1]))
	except ValueError:
	pass
	else:
	# named entity
	try:
	text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
	except KeyError:
	pass

	return text # leave as is

	return re.sub("&#?\w+;", fixup, text)

	def xmkdir(d):
	try:
	os.mkdir(d)
	except:
	pass

	def xmv(src, dest):
	try:
	shutil.move(src, dest)
	except:
	pass

	base_url = "http://www.ludumdare.com/compo/ludum-dare-21/"

	def entry_str(entry):
	return "%s (by %s) [V: %d C: %d] - %s" % (entry['name'],
	entry['user'],
	entry['votes'],
	entry['coolness'],
	base_url + entry['rate'])

	def download(url, path):
	ret = urllib.urlretrieve(url)
	print "moving %s to %s" % (ret[0], path)
	xmv(ret[0], path)



	def parse_entry(entry):

	for plat in entry['download']:
	if (plat.lower().find('linux') >= 0):
	print "[%s] %s" % (plat, entry_str(entry))
	path = entry['user'] + os.sep + plat
	xmkdir(entry['user'])
	xmkdir(path)
	try:
	download(entry['download'][plat], path)
	except (e):
	print e


	# Make the request
	url = base_url + "?action=misc_links"
	response = urllib2.urlopen(url)
	page = response.read()

	# Get the soup object
	soup = BeautifulSoup(page)
	body = soup.html.body

	div = body.find('div', {'id' : 'compo2'})
	table = div.find('table')

	trows = table.findAll('tr');

	# Each entry is a dictionay with the following keys:
	# 'name' - The entry's name
	# 'donwload' - dictionary where key represents platform, and value download link

	count = 0
	for trow in trows[1:]:
	fields = trow.findAll('td')

	aelem = fields[0].find('a');
	entry = {}
	rate = str(aelem['href'])
	entry['rate'] = rate
	entry['name'] = str(unescape(aelem.string))
	entry['user'] = str(fields[1].string)
	entry['download'] = {}

	linkselem = fields[2].findAll('a')
	for link in linkselem:
	entry['download'][link.string] = str(link['href'])

	entry['votes'] = int(str(fields[3].string))
	entry['coolness'] = int(str(fields[4].string))

	count += 1
	print "%d out of 599" % (count)
	parse_entry(entry);