-
-
Save ashildebrandt/9ad37ea659a0fbff5a05 to your computer and use it in GitHub Desktop.
""" | |
ash_ffffind.py | |
v1.1 (September 14, 2015) | |
by me@aaronhildebrandt.com | |
Automatically downloads all images from ffffound saved by a specific user. | |
Will first try to download the image from the original source (to get the highest quality possible). | |
If that fails, it'll download the cached version from ffffound. | |
Prerequisities: | |
Beautiful Soup (http://www.crummy.com/software/BeautifulSoup/) | |
Usage: | |
python ffffind.py username | |
""" | |
import os, sys, urllib, imghdr | |
from BeautifulSoup import BeautifulSoup | |
from urlparse import urlparse | |
from posixpath import basename, dirname | |
class URLOpener(urllib.FancyURLopener): | |
version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11' | |
urllib._urlopener = URLOpener() | |
def main(user): | |
offset = 0 | |
page = 1 | |
while True: | |
print "Capturing page "+str(page)+" ..." | |
f = urllib.urlopen("http://ffffound.com/home/"+user+"/found/?offset="+str(offset)) | |
s = f.read() | |
f.close() | |
if "<div class=\"description\">" in s: | |
images = [] | |
offset += 25 | |
count = 0 | |
soup = BeautifulSoup(s) | |
for i in soup.findAll("div", { "class" : "description" }): | |
images.append({"url": urlparse("http://" + str(i).split("<br />")[0].replace("<div class=\"description\">", ""))}) | |
for i in soup.findAll("img"): | |
if str(i).find("_m.") != -1: | |
images[count]["backup"] = str(i).split("src=\"")[1].split("\"")[0] | |
count += 1 | |
for i in images: | |
if os.path.exists(user+"/"+basename(i["url"].path)): | |
print basename(i["url"].path) + " exists, stopping." | |
sys.exit() | |
else: | |
print "Downloading " + basename(i["url"].path), | |
try: | |
urllib.urlretrieve(i["url"].geturl(), user+"/"+basename(i["url"].path)) | |
print "... done." | |
if not imghdr.what(user+"/"+basename(i["url"].path)) in ["gif", "jpeg", "png", None]: | |
print "... unfortunately, it seems to be a bad image.\nDownloading backup", | |
try: | |
urllib.urlretrieve(i["backup"], user+"/"+basename(i["url"].path)) | |
print "... which seems to have worked." | |
except: | |
print "... which also failed." | |
if os.path.getsize(user+"/"+basename(i["url"].path)) < 5000: | |
raise | |
except: | |
print "... failed. Downloading backup", | |
try: | |
urllib.urlretrieve(i["backup"], user+"/"+basename(i["url"].path)) | |
print "... which seems to have worked." | |
except: | |
print "... which also failed." | |
page += 1 | |
else: | |
print "Reached the end of the list, stopping." | |
break | |
if __name__ == '__main__': | |
print("ffffound image downloader") | |
if len(sys.argv) < 2: | |
print "Usage:\n\t python ffffind.py username" | |
else: | |
try: | |
if not os.path.exists("./"+sys.argv[1]): | |
os.mkdir(sys.argv[1]) | |
except: | |
print "Error creating directory." | |
sys.exit() | |
user = sys.argv[1] | |
print "Downloading all pictures from user '"+user+"'" | |
main(user) |
Hi Aaron,
I am getting the same problems that jwmacias is experiencing. Is there a way to fix the file extension issue. That would be great
@ashildebrandt Hi, I have installed BS4 via pip and I get this error when I run your script
$ python ffffind.py monsieurm
Traceback (most recent call last):
File "ffffind.py", line 22, in
from BeautifulSoup import BeautifulSoup
ImportError: No module named BeautifulSoup
However
$ pip install beautifulsoup4
Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python2.7/site-packages
Any idea ?
ok just install BeautifulSoup previous version
pip install beautifulsoup
not pip install beautifulsoup4
Hey. I never before worked with Python. But I want to save my ffffounds so bad. So, is there any instruction, what to do with this script. I dowloaded python already, and tried to use the code, but i failed. Please, can you help?
BG - Phil :)
I updated the script to also create HTML pages in which you can view your images, plus fixed a couple of edge cases I came across: https://github.com/philgyford/ffffound-export
Anyone have a torrent of the html page dump? I totally learned about this too late :(
@Triune the entirety of ffffound.com is in fact available, via torrent, from archive.org:
https://archive.org/details/ffffound.com-warc-archive-2017-05-07
Enjoy!
@ashildebrandt when I run this it stops on page six of my 491 page account. Any reason that you can think of? I am trying to archive my account images since the site is going offline in May. It also seems to be saving quite a few images without the file extensions. Any thoughts?