c7h/gist:5521563

## gistfile1.py
#! /usr/bin/env python
'''
Created on 05.05.2013

@author: christoph gerneth

ugly, short hack!
read in firefox bookmarks and
download images from maxgif.com
'''

import sys
import json
import urllib2
import re
import HTMLParser
from subprocess import call

with open(sys.argv[1], "r") as f:
    x = f.read()

jout = json.loads(x)

root = jout["children"][1]["children"][10]["children"]

#pprint(root)
#pattern : (?<=url\()(((f|ht){1}tp[s]?://)[-a-zA-Z0-9@:%_\+.~#?&//=;\\]+)

maxgif_pattern = r'(?<=url\()(((f|ht){1}tp[s]?://)[-a-zA-Z0-9@:%_\+.~#?&//=;\\]+)'

_htmlparser = HTMLParser.HTMLParser()
unescape = _htmlparser.unescape

img_list = []
for entry in root:
    entry_uri =  entry["uri"]
    if "maxgif" in entry_uri:
        #this is a maxif uri
        sitedata = unescape(urllib2.urlopen(entry_uri, timeout=100).read())
        found = re.findall(maxgif_pattern, sitedata)
        print "found %s on site %s" % (found[0][0], entry_uri)
        try:
            img_list.append(found[0][0])
        except:
            pass

# ok - now, download everything
call("wget %s" % " ".join(img_list), shell=True)
	#! /usr/bin/env python
	'''
	Created on 05.05.2013

	@author: christoph gerneth

	ugly, short hack!
	read in firefox bookmarks and
	download images from maxgif.com
	'''

	import sys
	import json
	import urllib2
	import re
	import HTMLParser
	from subprocess import call

	with open(sys.argv[1], "r") as f:
	x = f.read()

	jout = json.loads(x)

	root = jout["children"][1]["children"][10]["children"]

	#pprint(root)
	#pattern : (?<=url\()(((f\|ht){1}tp[s]?://)[-a-zA-Z0-9@:%_\+.~#?&//=;\\]+)

	maxgif_pattern = r'(?<=url\()(((f\|ht){1}tp[s]?://)[-a-zA-Z0-9@:%_\+.~#?&//=;\\]+)'

	_htmlparser = HTMLParser.HTMLParser()
	unescape = _htmlparser.unescape

	img_list = []
	for entry in root:
	entry_uri = entry["uri"]
	if "maxgif" in entry_uri:
	#this is a maxif uri
	sitedata = unescape(urllib2.urlopen(entry_uri, timeout=100).read())
	found = re.findall(maxgif_pattern, sitedata)
	print "found %s on site %s" % (found[0][0], entry_uri)
	try:
	img_list.append(found[0][0])
	except:
	pass

	# ok - now, download everything
	call("wget %s" % " ".join(img_list), shell=True)