Last active
October 26, 2016 09:22
-
-
Save exaroth/e37818faa3e8a8ac7c68 to your computer and use it in GitHub Desktop.
archive.reddit-filepicker_middleware.public.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from pprint import pprint | |
import urllib | |
import re | |
from mimetypes import MimeTypes | |
import copy | |
import os | |
from bs4 import BeautifulSoup | |
from flask import Flask, request, Response | |
from imgurpython import ImgurClient | |
import praw | |
import requests | |
# Constants | |
DEFAULT_SUBREDDIT = "gonewild" | |
ITEM_LIMIT = 100 | |
# Initialize Flask application | |
app = Flask(__name__) | |
# Define user agent for reddit | |
r_user_agent = "Filepicker-Reddit client by /u/exaroth" | |
# Initialize reddit client | |
r_client = praw.Reddit(user_agent=r_user_agent) | |
mime = MimeTypes() | |
# Basic regex for checking urls in reddit links | |
r_regexp = r'^(?:https?:\/\/)?(?:www\.)?([^\/]+)' | |
# Default contents response | |
contents_response = { | |
"contents": [], | |
"has_next": False, | |
"display_name": "Reddit: /r/"+ DEFAULT_SUBREDDIT | |
} | |
# Default contents item | |
content_item = { | |
"path": "", | |
"is_dir": False, | |
"filename": "", | |
"metadata": {} | |
} | |
# Default metadata Item | |
content_metadata = { | |
"thumb_exists": True, | |
"mimetype": "application/octet-stream", | |
"thumb_url": "", | |
"modified": "", | |
"bytes": 0 | |
} | |
#################################################################### | |
# Root handler for api | |
@app.route("/", methods=["GET"]) | |
def index_view(): | |
return "Filepicker-Reddit custom provider v.0.1" | |
#################################################################### | |
# Handler for metadata view view in Filepicker dialog | |
# REQUIRES valid reddit path | |
@app.route("/contents/", methods=["GET"]) | |
def folder_view(): | |
subreddit = r_client.get_subreddit(DEFAULT_SUBREDDIT) | |
resp = contents_response.copy() | |
file_list = [] | |
for submission in subreddit.get_hot(limit=ITEM_LIMIT): | |
rresult = re.findall(r_regexp, submission.url, re.IGNORECASE) | |
if rresult[0] == "imgur.com" or rresult[0] == "i.imgur.com": | |
filename, url = imgur_parse(submission.url, rresult[0]) | |
if filename and url: | |
item = content_item.copy() | |
item["path"] = submission.id | |
item["filename"] = submission.title | |
item["metadata"] = content_metadata.copy() | |
item["metadata"]["mimetype"] = mime.guess_type(url)[0] | |
item["metadata"]["thumb_url"] = submission.thumbnail | |
item["metadata"]["modified"] = submission.created_utc | |
item["metadata"]["bytes"] = get_contents_size(url) | |
file_list.append(item) | |
resp["contents"] = file_list | |
return Response(json.dumps(resp), mimetype="application/json"), 200 | |
#################################################################### | |
# Handler for downloading a full version of the image from imgur | |
# REQUIRES valid reddit submission id | |
@app.route("/contents/<id>/download/") | |
def download_view(id): | |
try: | |
submission = r_client.get_submission(submission_id=id) | |
rresult = re.findall(r_regexp, submission.url, re.IGNORECASE) | |
url = None | |
if rresult[0] == "imgur.com" or rresult[0] == "i.imgur.com": | |
_, url = imgur_parse(submission.url, rresult[0]) | |
if url: | |
return Response(json.dumps({"URL": url, "size": get_contents_size(url)}), mimetype="application/json"), 200 | |
return Response(json.dumps({"error": "Not found"})), 404 | |
except praw.errors.NotFound: | |
return Response(json.dumps({"error": "Not found"})), 404 | |
except Exception as e: | |
return Response(json.dumps({"error", e.message})), 500 | |
#################################################################### | |
# Handler for retrieving thubmnail for the file | |
# REQUIRES valid reddit id | |
@app.route("/contents/<id>/thumb/") | |
def thumbnail_view(id): | |
try: | |
submission = r_client.get_submission(submission_id=id) | |
return Response(json.dumps({"URL": submission.thumbnail, "bytes": get_contents_size(submission.url)}), mimetype="application/json"), 200 | |
except praw.errors.NotFound: | |
return Response(json.dumps({"error": "Not found"})), 404 | |
except Exception as e: | |
return Response(json.dumps({"error", e.message})), 500 | |
#################################################################### | |
# Handler for metadata view view in Filepicker dialog | |
# REQUIRES valid reddit id | |
@app.route("/contents/<id>/metadata/") | |
def metadata_view(id): | |
try: | |
submission = r_client.get_submission(submission_id=id) | |
rresult = re.findall(r_regexp, submission.url, re.IGNORECASE) | |
url = None | |
if rresult[0] == "imgur.com" or rresult[0] == "i.imgur.com": | |
_, url = imgur_parse(submission.url, rresult[0]) | |
if url: | |
resp = content_metadata.copy() | |
resp["thumb_url"] = submission.thumbnail | |
resp["modified"] = submission.created_utc | |
resp["mimetype"] = mime.guess_type(url)[0] | |
resp["bytes"] = get_contents_size(url) | |
return Response(json.dumps(resp), mimetype="application/json"), 200 | |
return Response(json.dumps({"error": "Not found"})), 404 | |
except praw.errors.NotFound: | |
return Response(json.dumps({"error": "Not found"})), 404 | |
except Exception as e: | |
return Response(json.dumps({"error", e.message})), 500 | |
#################################################################### | |
# Parse imgur urls retrieved from reddit links | |
# REQUIRES valid imgur url | |
# Thanks to Cameron Gagnon (https://github.com/cameron-gagnon) | |
def imgur_parse(url, regex): | |
# Only get first image from the list | |
fragments= url.split(",") | |
if len(fragments) > 1: | |
url = fragments[0] | |
# Check id url has extension | |
# if not apend jpg | |
fragments = url.split(".") | |
if len(fragments) == 0: | |
url += ".jpg" | |
# check if it's a gif or not from imgur. These don't | |
# download/display | |
if (url.rfind(".gif") != -1)\ | |
or (url.rfind(".gifv") != -1): | |
return False, False | |
# Only get first image in a row | |
# then check if it's a direct link | |
elif regex == "i.imgur.com": | |
image_name = imgur_image_format(url) | |
return image_name, url | |
# check if an imgur.com/gallery link | |
elif (url.find('/gallery/') != -1): | |
image_name = imgur_image_format(url) | |
url = "https://i.imgur.com/" + image_name | |
return image_name, url | |
# /a/ means an album in imgur standards | |
elif (url.find('/a/') != -1): | |
# have to find new url to download the first image from album | |
resp = requests.get(url, headers = { 'User-Agent': r_user_agent}) | |
soup = BeautifulSoup(resp.text) | |
# | class=image w/ child <a> | gets href of this <a> child | | |
try: | |
url = soup.select('.image a')[0].get('href') | |
url = "https:" + url | |
image_name = imgur_image_format(url) | |
return image_name, url | |
except: | |
return False, False | |
# a regular imgur.com domain but no img type in url | |
elif regex == "imgur.com": | |
image_name = imgur_image_format(url) | |
url = "https://i.imgur.com/" + image_name | |
return image_name, url | |
# if we get here, there's like a format of url error | |
else: | |
return False, False | |
#################################################################### | |
# REQUIRES url in imgur formatting | |
# MODIFIES url, image_name | |
def imgur_image_format(url): | |
# finds last '/' in url | |
remove = url.rindex('/') | |
# returns only past the last '/' | |
image_name = url[-(len(url) - remove - 1):] | |
# | |
return image_name.split(".")[0] | |
#################################################################### | |
# REQUIRES url | |
# In both cases for images and thumbnails the size | |
# of the file will always be in 'Content-Length' | |
# header key | |
def get_contents_size(url): | |
resp = requests.head(url, allow_redirects=True) | |
return int(resp.headers.get("Content-Length", 0)) | |
if __name__ == "__main__": | |
port = int(os.environ.get("PORT", 5000)) | |
app.run(host="0.0.0.0", port=port, debug=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment