icedraco/url-extract-photobucket.py

## url-extract-photobucket.py
###--# PhotoBucket URL Extractor 0.1 [20140730-2207]
#
# This script retrieves a library/album page from a given URL using urllib2 and
# extracts all the URLs to the full images featured in that page. The URLs are
# printed to stdout and can later be redirected through the shell.
#
# Author: IceDragon <icedragon at quickfox org>

import urllib2
import re
import sys

RE_FULLSIZE_URL_PART = re.compile('"fullsizeUrl":"([^"]*)"')


def clean_url(dirty_url):
		'''Takes a dirty URL from the fullsizeUrl JSON data and strips off
		   the \ characters to clean it up
		'''
        return dirty_url.replace('\\', '')


def find_urls(html_data):
		'''Takes a PhotoBucket HTML page and returns a list of image URLs
		   featured in it.
		'''
        global RE_FULLSIZE_URL_PART
        return map(clean_url, RE_FULLSIZE_URL_PART.findall(html_data))


def main(argv):
        for scan_url in argv:
                for dirty_url in find_urls(urllib2.urlopen(scan_url).read()):
                        print clean_url(dirty_url)
        return 0


if __name__ == "__main__":
        raise SystemExit(main(sys.argv[1:]))
	###--# PhotoBucket URL Extractor 0.1 [20140730-2207]
	#
	# This script retrieves a library/album page from a given URL using urllib2 and
	# extracts all the URLs to the full images featured in that page. The URLs are
	# printed to stdout and can later be redirected through the shell.
	#
	# Author: IceDragon <icedragon at quickfox org>

	import urllib2
	import re
	import sys

	RE_FULLSIZE_URL_PART = re.compile('"fullsizeUrl":"([^"]*)"')


	def clean_url(dirty_url):
	'''Takes a dirty URL from the fullsizeUrl JSON data and strips off
	the \ characters to clean it up
	'''
	return dirty_url.replace('\\', '')


	def find_urls(html_data):
	'''Takes a PhotoBucket HTML page and returns a list of image URLs
	featured in it.
	'''
	global RE_FULLSIZE_URL_PART
	return map(clean_url, RE_FULLSIZE_URL_PART.findall(html_data))


	def main(argv):
	for scan_url in argv:
	for dirty_url in find_urls(urllib2.urlopen(scan_url).read()):
	print clean_url(dirty_url)
	return 0


	if __name__ == "__main__":
	raise SystemExit(main(sys.argv[1:]))