Last active
December 25, 2015 11:49
-
-
Save mappu/6971832 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# gununu.nipah.co.uk downloader | |
import urllib.request | |
import re | |
# Download all 334 pages. | |
# But range() counts up until one-less, because fuck you that's why (well, off-by-one indicing, that's why) | |
for i in range(1, 334 + 1): | |
print("Page " + str(i) + " of 334...") | |
html = urllib.request.urlopen("http://gununu.nipah.co.uk/post/list/" + str(i)).read().decode('UTF-8') | |
# print( len(html) ) | |
imagepages = re.findall("post/view/\d+", html) | |
for u in imagepages: | |
# print( "http://gununu.nipah.co.uk/" + u ) | |
# Download the image page... | |
innerhtml = urllib.request.urlopen("http://gununu.nipah.co.uk/" + u).read().decode('UTF-8') | |
# ... and extract the actual image link from it. | |
mainimage = re.findall("_images/[^/]+/", innerhtml) | |
# "mainimage" should only contain one result. Arrays start counting from zero. | |
print( "http://gununu.nipah.co.uk/" + mainimage[0] ) | |
# Let's use the hash as a unique filename. | |
# Extract the hash - start eight characters in, finish one from the end | |
filename = mainimage[0][8:-1] | |
# open target file for saving | |
# In the first argument i'm calling them all .png even though several might not be #dealwithit #rekt | |
# The second argument here means we want [w]rite access in [b]inary mode. | |
file = open(filename + ".png", "wb") | |
# Download the image... this time don't .decode(), since we want to | |
# save raw png bytes | |
image = urllib.request.urlopen("http://gununu.nipah.co.uk/" + mainimage[0]).read() | |
# Yooooooooooo | |
file.write(image) | |
file.close() | |
#untab | |
# I don't actually want to download every single image (blasphemy!) but | |
# you could totally remove this exit() from the inner loop so that it will | |
# keep going with the next page | |
exit() | |
#untab |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment