exaroth/reddit-filepicker_middleware.public.py

## reddit-filepicker_middleware.public.py
import json
from pprint import pprint
import urllib
import re
from mimetypes import MimeTypes
import copy
import os

from bs4 import BeautifulSoup
from flask import Flask, request, Response
from imgurpython import ImgurClient
import praw
import requests

# Constants
DEFAULT_SUBREDDIT = "gonewild"
ITEM_LIMIT = 100

# Initialize Flask application
app = Flask(__name__)

# Define user agent for reddit
r_user_agent = "Filepicker-Reddit client by /u/exaroth"
# Initialize reddit client
r_client = praw.Reddit(user_agent=r_user_agent)

mime = MimeTypes()

# Basic regex for checking urls in reddit links
r_regexp = r'^(?:https?:\/\/)?(?:www\.)?([^\/]+)'

# Default contents response
contents_response = {
        "contents": [],
        "has_next": False,
        "display_name": "Reddit: /r/"+ DEFAULT_SUBREDDIT
        }
# Default contents item
content_item = {
        "path": "",
        "is_dir": False,
        "filename": "",
        "metadata": {}
        }
# Default metadata Item
content_metadata = {
        "thumb_exists": True,
        "mimetype": "application/octet-stream",
        "thumb_url": "",
        "modified": "",
        "bytes": 0
        }
####################################################################
# Root handler for api

@app.route("/", methods=["GET"])
def index_view():
    return "Filepicker-Reddit custom provider v.0.1"

####################################################################
# Handler for metadata view view in Filepicker dialog
# REQUIRES valid reddit path

@app.route("/contents/", methods=["GET"])
def folder_view():
    subreddit = r_client.get_subreddit(DEFAULT_SUBREDDIT)
    resp = contents_response.copy()
    file_list = []
    for submission in subreddit.get_hot(limit=ITEM_LIMIT):
        rresult = re.findall(r_regexp, submission.url, re.IGNORECASE)
        if rresult[0] == "imgur.com" or rresult[0] == "i.imgur.com":
            filename, url = imgur_parse(submission.url, rresult[0])
            if filename and url:
                item = content_item.copy()
                item["path"] = submission.id
                item["filename"] = submission.title
                item["metadata"] = content_metadata.copy()
                item["metadata"]["mimetype"] = mime.guess_type(url)[0]
                item["metadata"]["thumb_url"] = submission.thumbnail
                item["metadata"]["modified"] = submission.created_utc
                item["metadata"]["bytes"] = get_contents_size(url)
                file_list.append(item)
    resp["contents"] = file_list
    return Response(json.dumps(resp), mimetype="application/json"), 200

####################################################################
# Handler for downloading a full version of the image from imgur
# REQUIRES valid reddit submission id

@app.route("/contents/<id>/download/")
def download_view(id):
    try:
        submission = r_client.get_submission(submission_id=id)
        rresult = re.findall(r_regexp, submission.url, re.IGNORECASE)
        url = None
        if rresult[0] == "imgur.com" or rresult[0] == "i.imgur.com":
            _, url = imgur_parse(submission.url, rresult[0])
        if url:
            return Response(json.dumps({"URL": url, "size": get_contents_size(url)}), mimetype="application/json"), 200
        return Response(json.dumps({"error": "Not found"})), 404
    except praw.errors.NotFound:
        return Response(json.dumps({"error": "Not found"})), 404
    except Exception as e:
        return Response(json.dumps({"error", e.message})), 500


####################################################################
# Handler for retrieving thubmnail for the file
# REQUIRES valid reddit id

@app.route("/contents/<id>/thumb/")
def thumbnail_view(id):
    try:
        submission = r_client.get_submission(submission_id=id)
        return Response(json.dumps({"URL": submission.thumbnail, "bytes": get_contents_size(submission.url)}), mimetype="application/json"), 200
    except praw.errors.NotFound:
        return Response(json.dumps({"error": "Not found"})), 404
    except Exception as e:
        return Response(json.dumps({"error", e.message})), 500

####################################################################
# Handler for metadata view view in Filepicker dialog
# REQUIRES valid reddit id
@app.route("/contents/<id>/metadata/")
def metadata_view(id):
    try:
        submission = r_client.get_submission(submission_id=id)
        rresult = re.findall(r_regexp, submission.url, re.IGNORECASE)
        url = None
        if rresult[0] == "imgur.com" or rresult[0] == "i.imgur.com":
            _, url = imgur_parse(submission.url, rresult[0])
        if url:
            resp = content_metadata.copy()
            resp["thumb_url"] = submission.thumbnail
            resp["modified"] = submission.created_utc
            resp["mimetype"] = mime.guess_type(url)[0]
            resp["bytes"] = get_contents_size(url)
            return Response(json.dumps(resp), mimetype="application/json"), 200
        return Response(json.dumps({"error": "Not found"})), 404
    except praw.errors.NotFound:
        return Response(json.dumps({"error": "Not found"})), 404
    except Exception as e:
        return Response(json.dumps({"error", e.message})), 500

####################################################################
# Parse imgur urls retrieved from reddit links
# REQUIRES valid imgur url
# Thanks to Cameron Gagnon (https://github.com/cameron-gagnon)
def imgur_parse(url, regex):

    # Only get first image from the list
    fragments= url.split(",")
    if len(fragments) > 1:
        url = fragments[0]

    # Check id url has extension
    # if not apend jpg
    fragments = url.split(".")
    if len(fragments) == 0:
        url += ".jpg"

    # check if it's a gif or not from imgur. These don't
    # download/display
    if (url.rfind(".gif") != -1)\
        or (url.rfind(".gifv") != -1):

        return False, False

    # Only get first image in a row
    # then check if it's a direct link
    elif regex == "i.imgur.com":
        image_name = imgur_image_format(url)
        return image_name, url

    # check if an imgur.com/gallery link
    elif (url.find('/gallery/') != -1):
        image_name = imgur_image_format(url)
        url = "https://i.imgur.com/" + image_name
        return image_name, url

    # /a/ means an album in imgur standards
    elif (url.find('/a/') != -1):
        # have to find new url to download the first image from album
        resp = requests.get(url, headers = { 'User-Agent': r_user_agent})
        soup = BeautifulSoup(resp.text)
        #   | class=image  w/ child <a> | gets href of this <a> child |
        try:
            url = soup.select('.image a')[0].get('href')
            url = "https:" + url
            image_name = imgur_image_format(url)
            return image_name, url
        except:
            return False, False

    # a regular imgur.com domain but no img type in url
    elif regex == "imgur.com":
        image_name = imgur_image_format(url)
        url = "https://i.imgur.com/" + image_name
        return image_name, url

    # if we get here, there's like a format of url error
    else:
        return False, False

####################################################################
# REQUIRES url in imgur formatting
# MODIFIES url, image_name
def imgur_image_format(url):
    # finds last '/' in url
    remove = url.rindex('/')
    # returns only past the last '/'
    image_name =  url[-(len(url) - remove - 1):]
    #
    return image_name.split(".")[0]

####################################################################
# REQUIRES url
# In both cases for images and thumbnails the size
# of the file will always be in 'Content-Length'
# header key

def get_contents_size(url):
    resp = requests.head(url, allow_redirects=True)
    return int(resp.headers.get("Content-Length", 0))

if __name__ == "__main__":
    port = int(os.environ.get("PORT", 5000))
    app.run(host="0.0.0.0", port=port, debug=True)
	import json
	from pprint import pprint
	import urllib
	import re
	from mimetypes import MimeTypes
	import copy
	import os

	from bs4 import BeautifulSoup
	from flask import Flask, request, Response
	from imgurpython import ImgurClient
	import praw
	import requests

	# Constants
	DEFAULT_SUBREDDIT = "gonewild"
	ITEM_LIMIT = 100

	# Initialize Flask application
	app = Flask(__name__)

	# Define user agent for reddit
	r_user_agent = "Filepicker-Reddit client by /u/exaroth"
	# Initialize reddit client
	r_client = praw.Reddit(user_agent=r_user_agent)

	mime = MimeTypes()

	# Basic regex for checking urls in reddit links
	r_regexp = r'^(?:https?:\/\/)?(?:www\.)?([^\/]+)'

	# Default contents response
	contents_response = {
	"contents": [],
	"has_next": False,
	"display_name": "Reddit: /r/"+ DEFAULT_SUBREDDIT
	}
	# Default contents item
	content_item = {
	"path": "",
	"is_dir": False,
	"filename": "",
	"metadata": {}
	}
	# Default metadata Item
	content_metadata = {
	"thumb_exists": True,
	"mimetype": "application/octet-stream",
	"thumb_url": "",
	"modified": "",
	"bytes": 0
	}
	####################################################################
	# Root handler for api

	@app.route("/", methods=["GET"])
	def index_view():
	return "Filepicker-Reddit custom provider v.0.1"

	####################################################################
	# Handler for metadata view view in Filepicker dialog
	# REQUIRES valid reddit path

	@app.route("/contents/", methods=["GET"])
	def folder_view():
	subreddit = r_client.get_subreddit(DEFAULT_SUBREDDIT)
	resp = contents_response.copy()
	file_list = []
	for submission in subreddit.get_hot(limit=ITEM_LIMIT):
	rresult = re.findall(r_regexp, submission.url, re.IGNORECASE)
	if rresult[0] == "imgur.com" or rresult[0] == "i.imgur.com":
	filename, url = imgur_parse(submission.url, rresult[0])
	if filename and url:
	item = content_item.copy()
	item["path"] = submission.id
	item["filename"] = submission.title
	item["metadata"] = content_metadata.copy()
	item["metadata"]["mimetype"] = mime.guess_type(url)[0]
	item["metadata"]["thumb_url"] = submission.thumbnail
	item["metadata"]["modified"] = submission.created_utc
	item["metadata"]["bytes"] = get_contents_size(url)
	file_list.append(item)
	resp["contents"] = file_list
	return Response(json.dumps(resp), mimetype="application/json"), 200

	####################################################################
	# Handler for downloading a full version of the image from imgur
	# REQUIRES valid reddit submission id

	@app.route("/contents/<id>/download/")
	def download_view(id):
	try:
	submission = r_client.get_submission(submission_id=id)
	rresult = re.findall(r_regexp, submission.url, re.IGNORECASE)
	url = None
	if rresult[0] == "imgur.com" or rresult[0] == "i.imgur.com":
	_, url = imgur_parse(submission.url, rresult[0])
	if url:
	return Response(json.dumps({"URL": url, "size": get_contents_size(url)}), mimetype="application/json"), 200
	return Response(json.dumps({"error": "Not found"})), 404
	except praw.errors.NotFound:
	return Response(json.dumps({"error": "Not found"})), 404
	except Exception as e:
	return Response(json.dumps({"error", e.message})), 500


	####################################################################
	# Handler for retrieving thubmnail for the file
	# REQUIRES valid reddit id

	@app.route("/contents/<id>/thumb/")
	def thumbnail_view(id):
	try:
	submission = r_client.get_submission(submission_id=id)
	return Response(json.dumps({"URL": submission.thumbnail, "bytes": get_contents_size(submission.url)}), mimetype="application/json"), 200
	except praw.errors.NotFound:
	return Response(json.dumps({"error": "Not found"})), 404
	except Exception as e:
	return Response(json.dumps({"error", e.message})), 500

	####################################################################
	# Handler for metadata view view in Filepicker dialog
	# REQUIRES valid reddit id
	@app.route("/contents/<id>/metadata/")
	def metadata_view(id):
	try:
	submission = r_client.get_submission(submission_id=id)
	rresult = re.findall(r_regexp, submission.url, re.IGNORECASE)
	url = None
	if rresult[0] == "imgur.com" or rresult[0] == "i.imgur.com":
	_, url = imgur_parse(submission.url, rresult[0])
	if url:
	resp = content_metadata.copy()
	resp["thumb_url"] = submission.thumbnail
	resp["modified"] = submission.created_utc
	resp["mimetype"] = mime.guess_type(url)[0]
	resp["bytes"] = get_contents_size(url)
	return Response(json.dumps(resp), mimetype="application/json"), 200
	return Response(json.dumps({"error": "Not found"})), 404
	except praw.errors.NotFound:
	return Response(json.dumps({"error": "Not found"})), 404
	except Exception as e:
	return Response(json.dumps({"error", e.message})), 500

	####################################################################
	# Parse imgur urls retrieved from reddit links
	# REQUIRES valid imgur url
	# Thanks to Cameron Gagnon (https://github.com/cameron-gagnon)
	def imgur_parse(url, regex):

	# Only get first image from the list
	fragments= url.split(",")
	if len(fragments) > 1:
	url = fragments[0]

	# Check id url has extension
	# if not apend jpg
	fragments = url.split(".")
	if len(fragments) == 0:
	url += ".jpg"

	# check if it's a gif or not from imgur. These don't
	# download/display
	if (url.rfind(".gif") != -1)\
	or (url.rfind(".gifv") != -1):

	return False, False

	# Only get first image in a row
	# then check if it's a direct link
	elif regex == "i.imgur.com":
	image_name = imgur_image_format(url)
	return image_name, url

	# check if an imgur.com/gallery link
	elif (url.find('/gallery/') != -1):
	image_name = imgur_image_format(url)
	url = "https://i.imgur.com/" + image_name
	return image_name, url

	# /a/ means an album in imgur standards
	elif (url.find('/a/') != -1):
	# have to find new url to download the first image from album
	resp = requests.get(url, headers = { 'User-Agent': r_user_agent})
	soup = BeautifulSoup(resp.text)
	# \| class=image w/ child <a> \| gets href of this <a> child \|
	try:
	url = soup.select('.image a')[0].get('href')
	url = "https:" + url
	image_name = imgur_image_format(url)
	return image_name, url
	except:
	return False, False

	# a regular imgur.com domain but no img type in url
	elif regex == "imgur.com":
	image_name = imgur_image_format(url)
	url = "https://i.imgur.com/" + image_name
	return image_name, url

	# if we get here, there's like a format of url error
	else:
	return False, False

	####################################################################
	# REQUIRES url in imgur formatting
	# MODIFIES url, image_name
	def imgur_image_format(url):
	# finds last '/' in url
	remove = url.rindex('/')
	# returns only past the last '/'
	image_name = url[-(len(url) - remove - 1):]
	#
	return image_name.split(".")[0]

	####################################################################
	# REQUIRES url
	# In both cases for images and thumbnails the size
	# of the file will always be in 'Content-Length'
	# header key

	def get_contents_size(url):
	resp = requests.head(url, allow_redirects=True)
	return int(resp.headers.get("Content-Length", 0))

	if __name__ == "__main__":
	port = int(os.environ.get("PORT", 5000))
	app.run(host="0.0.0.0", port=port, debug=True)