I do all four of the following often:
-
Write lengthy academic prose.
-
Write code.
-
Analyze data and do other math-y things.
# USAGE: | |
# | |
# To add code to the end of every <head> tag (like a css link, a font link, etc.) to quick-format an entire website: | |
# 1. Start in the top-level-directory of the site. Put this file there. | |
# 2. Add your formtting for the <head> tag to the formatme variable. | |
# EXAMPLE: mine was '<link href="https://fonts.googleapis.com/css?family=Halant:300" rel="stylesheet" type="text/css"><link rel="stylesheet" href="http://paul-gowder.com/conlawII/prettify.css">' | |
# be sure to either escape quotes or use single quotes to demarcate the string and double-quotes in the html/vice versa | |
# 3. Run this script. | |
# 4. Bam. Every html page in in the top-level directory and all its subdirectories now has the formatting you want. |
# assumes documents are provided in the form of a list of (docid, doctext) tuples named thedocslist. docid = int/string/float; doctext = string | |
import nltk | |
import string | |
from collections import Counter | |
# get rid of punctuation, numbers; make all lowercase. no stemming. | |
counterslist = [] | |
for onedocument in thedocslist: |
import argparse | |
# this first bit is to enable multiline help text. apparently this is a known problem with argparse. | |
# Solution jacked from http://stackoverflow.com/questions/3853722/python-argparse-how-to-insert-newline-in-the-help-text | |
import textwrap as _textwrap | |
class MultilineFormatter(argparse.HelpFormatter): | |
def _fill_text(self, text, width, indent): | |
text = self._whitespace_matcher.sub(' ', text).strip() | |
paragraphs = text.split('|n ') |
# The point of this script is that pandoc commandline syntax is painful and hard to remember. | |
# I really only produce html, pdf, and docx. And I only ever use the defaults. Ergo, a script | |
# (subsequently to be put in $PATH with path to python added to top to be runnable trivially) to | |
# make it simple. | |
# | |
# usage: python pgmd.py INPUTFILE FORMAT[html/pdf/word] | |
# that's it. easy. | |
# | |
# there are a handful of other options (output file, overwrite output file, append scripts and | |
# css and such to html headers), details are in the commandline help via -h flag |
# OBSOLETE. | |
# GO HERE INSTEAD: https://github.com/paultopia/spideyscrape | |
# very basic scraper-spider for those html books where there's a table of contents page that links to a | |
# bunch of sub-pages with actual content. (Like the documentation for a bunch of libraries.) | |
# WARNING: has no validation, assumes pages contain relative links and are all on the same site. | |
# (this is an easy tweak but I don't have time today) | |
# also assumes all content is vanilla html or at least can be accessed through vanilla html. | |
# | |
# pass ToC page through raw_input. This script scrapes every unique page linked from ToC and |
# EDIT: this has now been upgraded to a full-fledged repo and is accepting PRs. This gist is no longer updating. | |
# go here: https://github.com/paultopia/spideyscrape | |
# This is a very basic scraper-spider for those html books where there's a table of contents page that links to a | |
# bunch of sub-pages with actual content (like the documentation for a bunch of libraries). | |
# | |
# Dependencies: Beautiful soup 4 on Python 2.7. | |
# | |
# It assumes all content is vanilla html or at least can be accessed through vanilla html. | |
# |
import sys | |
import spideyscrape | |
import console | |
import os | |
args = sys.argv[1:] # see if the user gave us a command line argument | |
start = args[0] if args else raw_input('URL to crawl: ') | |
html = spideyscrape.scrape(start) | |
filename = spideyscrape.savePage(html) | |
console.open_in(filename) |
# I think I've discovered a bit of Python code even more dangerous than https://github.com/ajalt/fuckitpy | |
class string(str): | |
def __call__(self): | |
try: | |
exec self | |
except Exception: | |
pass | |
evil = string('print "EVIL"') |
# NEVER DO THIS EXCEPT AS A PRANK ON YOUR WORST ENEMY | |
class foo(str): | |
def __call__(self): | |
try: | |
exec self | |
except Exception: | |
pass | |
str = foo |