mappu/gist:6971832

## gistfile1.py
# gununu.nipah.co.uk downloader

import urllib.request
import re

# Download all 334 pages.
# But range() counts up until one-less, because fuck you that's why (well, off-by-one indicing, that's why)
for i in range(1, 334 + 1):

	print("Page " + str(i) + " of 334...")

	html = urllib.request.urlopen("http://gununu.nipah.co.uk/post/list/" + str(i)).read().decode('UTF-8')

	# print( len(html) )

	imagepages = re.findall("post/view/\d+", html)

	for u in imagepages:

		# print( "http://gununu.nipah.co.uk/" + u )

		# Download the image page...
		innerhtml = urllib.request.urlopen("http://gununu.nipah.co.uk/" + u).read().decode('UTF-8')

		# ... and extract the actual image link from it.
		mainimage = re.findall("_images/[^/]+/", innerhtml)

		# "mainimage" should only contain one result. Arrays start counting from zero.

		print( "http://gununu.nipah.co.uk/" + mainimage[0] )

		# Let's use the hash as a unique filename.
		# Extract the hash - start eight characters in, finish one from the end
		filename = mainimage[0][8:-1]

		# open target file for saving
		# In the first argument i'm calling them all .png even though several might not be #dealwithit #rekt
		# The second argument here means we want [w]rite access in [b]inary mode.
		file = open(filename + ".png", "wb")

		# Download the image... this time don't .decode(), since we want to
		#   save raw png bytes
		image = urllib.request.urlopen("http://gununu.nipah.co.uk/" + mainimage[0]).read()

		# Yooooooooooo
		file.write(image)
		file.close()

	#untab

	# I don't actually want to download every single image (blasphemy!) but
	#  you could totally remove this exit() from the inner loop so that it will
	#  keep going with the next page

	exit()

#untab
	# gununu.nipah.co.uk downloader

	import urllib.request
	import re

	# Download all 334 pages.
	# But range() counts up until one-less, because fuck you that's why (well, off-by-one indicing, that's why)
	for i in range(1, 334 + 1):

	print("Page " + str(i) + " of 334...")

	html = urllib.request.urlopen("http://gununu.nipah.co.uk/post/list/" + str(i)).read().decode('UTF-8')

	# print( len(html) )

	imagepages = re.findall("post/view/\d+", html)

	for u in imagepages:

	# print( "http://gununu.nipah.co.uk/" + u )

	# Download the image page...
	innerhtml = urllib.request.urlopen("http://gununu.nipah.co.uk/" + u).read().decode('UTF-8')

	# ... and extract the actual image link from it.
	mainimage = re.findall("_images/[^/]+/", innerhtml)

	# "mainimage" should only contain one result. Arrays start counting from zero.

	print( "http://gununu.nipah.co.uk/" + mainimage[0] )

	# Let's use the hash as a unique filename.
	# Extract the hash - start eight characters in, finish one from the end
	filename = mainimage[0][8:-1]

	# open target file for saving
	# In the first argument i'm calling them all .png even though several might not be #dealwithit #rekt
	# The second argument here means we want [w]rite access in [b]inary mode.
	file = open(filename + ".png", "wb")

	# Download the image... this time don't .decode(), since we want to
	# save raw png bytes
	image = urllib.request.urlopen("http://gununu.nipah.co.uk/" + mainimage[0]).read()

	# Yooooooooooo
	file.write(image)
	file.close()

	#untab

	# I don't actually want to download every single image (blasphemy!) but
	# you could totally remove this exit() from the inner loop so that it will
	# keep going with the next page

	exit()

	#untab