Skip to content

Instantly share code, notes, and snippets.

@daramcq
Last active December 15, 2015 11:08
Show Gist options
  • Save daramcq/5250314 to your computer and use it in GitHub Desktop.
Save daramcq/5250314 to your computer and use it in GitHub Desktop.
Work in progress script to download threads from 4chan using their API
#!/usr/bin/env python
import requests
import json
import csv
import datetime
import os
from PIL import Image
from StringIO import StringIO
import fcntl
import sys
import time
def writeThreadToFile(num,j_obj):
f = open(str(num)+'.json','wb')
f.write(str(j_obj))
f.close()
def writeThreadsToFile(threadObject):
for key in threadObject.keys():
writeThreadToFile(key,threadObject[key])
def writeImagesToFile(images):
for image in images:
try:
ir = requests.get('http://images.4chan.org/b/src/'+image)
i = Image.open(StringIO(ir.content))
image_file = open('img/'+image,"w")
i.save(image_file)
image_file.close()
#print("Saved image "+image)
except BaseException as e:
print("Unable to save image "+str(e))
def getThreadImages(thread):
print(type(thread))
images = []
try:
for post in thread['posts']:
if 'tim' in post:
name = str(post['tim']) + str(post['ext'])
images.append(name)
except BaseException as e:
print('Error getting images out '+str(e))
return images
def getAllThreadImages(threadObject):
imageList = []
for thread in threadObject.values():
imageList += getThreadImages(thread)
return imageList
def getThread(num):
try:
r = requests.get('http://api.4chan.org/b/res/'+str(num)+'.json')
if (r.status_code==200):
j_obj = json.loads(r.content)
return j_obj
except BaseException as e:
print("Error in getting thread "+str(num)+": "+str(e))
def getThreadObject(threadNums):
threadObject = {}
for num in threadNums:
threadObject[num] = getThread(num)
return threadObject
def getThreadList():
r = requests.get('http://api.4chan.org/b/threads.json')
threadNums = []
try:
j_obj = json.loads(r.content)
for i,threadArr in enumerate(j_obj):
pageThreadList = threadArr['threads']
for thread in pageThreadList:
threadNums.append(thread['no'])
except:
print 'Decoding Threads JSON has failed'
return threadNums
def main():
threadObject = getThreadObject(getThreadList())
writeThreadsToFile(threadObject)
writeImagesToFile(getAllThreadImages(threadObject))
if __name__ =='__main__':
fl = open ('naggle.lock', 'w')
try:
fcntl.lockf(fl, fcntl.LOCK_EX | fcntl.LOCK_NB)
except:
# no need to log this...
sys.stderr.write('[%s] nagglebot.py already running.\n' % time.strftime('%c') )
sys.exit(-1)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment