Skip to content

Instantly share code, notes, and snippets.

@exaroth
Last active October 26, 2016 09:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save exaroth/e37818faa3e8a8ac7c68 to your computer and use it in GitHub Desktop.
Save exaroth/e37818faa3e8a8ac7c68 to your computer and use it in GitHub Desktop.
archive.reddit-filepicker_middleware.public.py
import json
from pprint import pprint
import urllib
import re
from mimetypes import MimeTypes
import copy
import os
from bs4 import BeautifulSoup
from flask import Flask, request, Response
from imgurpython import ImgurClient
import praw
import requests
# Constants
DEFAULT_SUBREDDIT = "gonewild"
ITEM_LIMIT = 100
# Initialize Flask application
app = Flask(__name__)
# Define user agent for reddit
r_user_agent = "Filepicker-Reddit client by /u/exaroth"
# Initialize reddit client
r_client = praw.Reddit(user_agent=r_user_agent)
mime = MimeTypes()
# Basic regex for checking urls in reddit links
r_regexp = r'^(?:https?:\/\/)?(?:www\.)?([^\/]+)'
# Default contents response
contents_response = {
"contents": [],
"has_next": False,
"display_name": "Reddit: /r/"+ DEFAULT_SUBREDDIT
}
# Default contents item
content_item = {
"path": "",
"is_dir": False,
"filename": "",
"metadata": {}
}
# Default metadata Item
content_metadata = {
"thumb_exists": True,
"mimetype": "application/octet-stream",
"thumb_url": "",
"modified": "",
"bytes": 0
}
####################################################################
# Root handler for api
@app.route("/", methods=["GET"])
def index_view():
return "Filepicker-Reddit custom provider v.0.1"
####################################################################
# Handler for metadata view view in Filepicker dialog
# REQUIRES valid reddit path
@app.route("/contents/", methods=["GET"])
def folder_view():
subreddit = r_client.get_subreddit(DEFAULT_SUBREDDIT)
resp = contents_response.copy()
file_list = []
for submission in subreddit.get_hot(limit=ITEM_LIMIT):
rresult = re.findall(r_regexp, submission.url, re.IGNORECASE)
if rresult[0] == "imgur.com" or rresult[0] == "i.imgur.com":
filename, url = imgur_parse(submission.url, rresult[0])
if filename and url:
item = content_item.copy()
item["path"] = submission.id
item["filename"] = submission.title
item["metadata"] = content_metadata.copy()
item["metadata"]["mimetype"] = mime.guess_type(url)[0]
item["metadata"]["thumb_url"] = submission.thumbnail
item["metadata"]["modified"] = submission.created_utc
item["metadata"]["bytes"] = get_contents_size(url)
file_list.append(item)
resp["contents"] = file_list
return Response(json.dumps(resp), mimetype="application/json"), 200
####################################################################
# Handler for downloading a full version of the image from imgur
# REQUIRES valid reddit submission id
@app.route("/contents/<id>/download/")
def download_view(id):
try:
submission = r_client.get_submission(submission_id=id)
rresult = re.findall(r_regexp, submission.url, re.IGNORECASE)
url = None
if rresult[0] == "imgur.com" or rresult[0] == "i.imgur.com":
_, url = imgur_parse(submission.url, rresult[0])
if url:
return Response(json.dumps({"URL": url, "size": get_contents_size(url)}), mimetype="application/json"), 200
return Response(json.dumps({"error": "Not found"})), 404
except praw.errors.NotFound:
return Response(json.dumps({"error": "Not found"})), 404
except Exception as e:
return Response(json.dumps({"error", e.message})), 500
####################################################################
# Handler for retrieving thubmnail for the file
# REQUIRES valid reddit id
@app.route("/contents/<id>/thumb/")
def thumbnail_view(id):
try:
submission = r_client.get_submission(submission_id=id)
return Response(json.dumps({"URL": submission.thumbnail, "bytes": get_contents_size(submission.url)}), mimetype="application/json"), 200
except praw.errors.NotFound:
return Response(json.dumps({"error": "Not found"})), 404
except Exception as e:
return Response(json.dumps({"error", e.message})), 500
####################################################################
# Handler for metadata view view in Filepicker dialog
# REQUIRES valid reddit id
@app.route("/contents/<id>/metadata/")
def metadata_view(id):
try:
submission = r_client.get_submission(submission_id=id)
rresult = re.findall(r_regexp, submission.url, re.IGNORECASE)
url = None
if rresult[0] == "imgur.com" or rresult[0] == "i.imgur.com":
_, url = imgur_parse(submission.url, rresult[0])
if url:
resp = content_metadata.copy()
resp["thumb_url"] = submission.thumbnail
resp["modified"] = submission.created_utc
resp["mimetype"] = mime.guess_type(url)[0]
resp["bytes"] = get_contents_size(url)
return Response(json.dumps(resp), mimetype="application/json"), 200
return Response(json.dumps({"error": "Not found"})), 404
except praw.errors.NotFound:
return Response(json.dumps({"error": "Not found"})), 404
except Exception as e:
return Response(json.dumps({"error", e.message})), 500
####################################################################
# Parse imgur urls retrieved from reddit links
# REQUIRES valid imgur url
# Thanks to Cameron Gagnon (https://github.com/cameron-gagnon)
def imgur_parse(url, regex):
# Only get first image from the list
fragments= url.split(",")
if len(fragments) > 1:
url = fragments[0]
# Check id url has extension
# if not apend jpg
fragments = url.split(".")
if len(fragments) == 0:
url += ".jpg"
# check if it's a gif or not from imgur. These don't
# download/display
if (url.rfind(".gif") != -1)\
or (url.rfind(".gifv") != -1):
return False, False
# Only get first image in a row
# then check if it's a direct link
elif regex == "i.imgur.com":
image_name = imgur_image_format(url)
return image_name, url
# check if an imgur.com/gallery link
elif (url.find('/gallery/') != -1):
image_name = imgur_image_format(url)
url = "https://i.imgur.com/" + image_name
return image_name, url
# /a/ means an album in imgur standards
elif (url.find('/a/') != -1):
# have to find new url to download the first image from album
resp = requests.get(url, headers = { 'User-Agent': r_user_agent})
soup = BeautifulSoup(resp.text)
# | class=image w/ child <a> | gets href of this <a> child |
try:
url = soup.select('.image a')[0].get('href')
url = "https:" + url
image_name = imgur_image_format(url)
return image_name, url
except:
return False, False
# a regular imgur.com domain but no img type in url
elif regex == "imgur.com":
image_name = imgur_image_format(url)
url = "https://i.imgur.com/" + image_name
return image_name, url
# if we get here, there's like a format of url error
else:
return False, False
####################################################################
# REQUIRES url in imgur formatting
# MODIFIES url, image_name
def imgur_image_format(url):
# finds last '/' in url
remove = url.rindex('/')
# returns only past the last '/'
image_name = url[-(len(url) - remove - 1):]
#
return image_name.split(".")[0]
####################################################################
# REQUIRES url
# In both cases for images and thumbnails the size
# of the file will always be in 'Content-Length'
# header key
def get_contents_size(url):
resp = requests.head(url, allow_redirects=True)
return int(resp.headers.get("Content-Length", 0))
if __name__ == "__main__":
port = int(os.environ.get("PORT", 5000))
app.run(host="0.0.0.0", port=port, debug=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment