Skip to content

Instantly share code, notes, and snippets.

View Pop101's full-sized avatar

Leon Leibmann Pop101

  • University of Washington
  • Kirkland
  • 09:22 (UTC -07:00)
View GitHub Profile
@Pop101
Pop101 / pdf_extract.py
Created September 19, 2020 19:41
A commandline interface for extracting a pdf's usable text into json files, separated by chapter.
import sys
from utils import *
from pdfminer.high_level import extract_pages
from pdfminer.layout import LTTextContainer, LTChar, LTPage, LTTextLine
def avg_char_height(container:LTTextContainer):
chars = n_sample(container, 2, required_types=[LTTextLine, LTChar], max_samples=[4, 20])
char_size = list(map(lambda c: c.size, chars))
if len(char_size) > 0: return sum(char_size)/len(char_size)
@Pop101
Pop101 / flaskapp_with_api_key.py
Last active September 19, 2020 19:42
A flask starting point for creating a restful api
from functools import wraps
from flask import Flask, request, abort
from waitress import serve
app = Flask(__name__)
APPKEY = 'EXAMPLE_KEY'
def require_appkey(view_function):
@wraps(view_function)
def decorated_function(*args, **kwargs):
if request.args.get('key') and request.args.get('key') == APPKEY:
@Pop101
Pop101 / ripreddit.py
Last active September 19, 2020 19:43
A command-line interface to get a list of reddit image urls from select subreddits for image analysis
# Inspired by https://github.com/simonwillcock/RipReddit/
import json
import requests
# The main cmd
def get_items(subreddit, sort='hot',count:int=1000):
""" Returns a list of items from the given subreddit, sorted by hot, new, controversial, or top. """
url = 'http://www.reddit.com/r/{}/{}.json?limit={}'.format(subreddit, sort,count)
header = { 'User-Agent' : 'Amazing script' }
try: