Skip to content

Instantly share code, notes, and snippets.

@mappu
Last active December 25, 2015 11:49
Show Gist options
  • Save mappu/6971832 to your computer and use it in GitHub Desktop.
Save mappu/6971832 to your computer and use it in GitHub Desktop.
# gununu.nipah.co.uk downloader
import urllib.request
import re
# Download all 334 pages.
# But range() counts up until one-less, because fuck you that's why (well, off-by-one indicing, that's why)
for i in range(1, 334 + 1):
print("Page " + str(i) + " of 334...")
html = urllib.request.urlopen("http://gununu.nipah.co.uk/post/list/" + str(i)).read().decode('UTF-8')
# print( len(html) )
imagepages = re.findall("post/view/\d+", html)
for u in imagepages:
# print( "http://gununu.nipah.co.uk/" + u )
# Download the image page...
innerhtml = urllib.request.urlopen("http://gununu.nipah.co.uk/" + u).read().decode('UTF-8')
# ... and extract the actual image link from it.
mainimage = re.findall("_images/[^/]+/", innerhtml)
# "mainimage" should only contain one result. Arrays start counting from zero.
print( "http://gununu.nipah.co.uk/" + mainimage[0] )
# Let's use the hash as a unique filename.
# Extract the hash - start eight characters in, finish one from the end
filename = mainimage[0][8:-1]
# open target file for saving
# In the first argument i'm calling them all .png even though several might not be #dealwithit #rekt
# The second argument here means we want [w]rite access in [b]inary mode.
file = open(filename + ".png", "wb")
# Download the image... this time don't .decode(), since we want to
# save raw png bytes
image = urllib.request.urlopen("http://gununu.nipah.co.uk/" + mainimage[0]).read()
# Yooooooooooo
file.write(image)
file.close()
#untab
# I don't actually want to download every single image (blasphemy!) but
# you could totally remove this exit() from the inner loop so that it will
# keep going with the next page
exit()
#untab
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment