Paul Gowder paultopia

## addcss.py
# USAGE:
#
# To add code to the end of every <head> tag (like a css link, a font link, etc.) to quick-format an entire website:
# 1.  Start in the top-level-directory of the site.  Put this file there.
# 2. Add your formtting for the <head> tag  to the formatme variable.
#           EXAMPLE: mine was '<link href="https://fonts.googleapis.com/css?family=Halant:300" rel="stylesheet" type="text/css"><link rel="stylesheet" href="http://paul-gowder.com/conlawII/prettify.css">'
#           be sure to either escape quotes or use single quotes to demarcate the string and double-quotes in the html/vice versa
# 3.  Run this script.
# 4. Bam.  Every html page in in the top-level directory and all its subdirectories now has the formatting you want.

## wordcount.py
# assumes documents are provided in the form of a list of (docid, doctext) tuples named thedocslist. docid = int/string/float; doctext = string

import nltk
import string
from collections import Counter

# get rid of punctuation, numbers; make all lowercase.  no stemming.

counterslist = []
for onedocument in thedocslist:

## toolchain.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                paultopia
                / toolchain.md
            
            
              Last active
              November 30, 2015 20:16
            
              
                The toolchain of a techy-ish political theorist/lawyer
              
          
    My toolchain (on OSX)

I do all four of the following often:


Write lengthy academic prose.


Write code.


Analyze data and do other math-y things.


## makeslide.py
import argparse

# this first bit is to enable multiline help text.  apparently this is a known problem with argparse.
# Solution jacked from http://stackoverflow.com/questions/3853722/python-argparse-how-to-insert-newline-in-the-help-text

import textwrap as _textwrap
class MultilineFormatter(argparse.HelpFormatter):
    def _fill_text(self, text, width, indent):
        text = self._whitespace_matcher.sub(' ', text).strip()
        paragraphs = text.split('|n ')

## pgmd.py
# The point of this script is that pandoc commandline syntax is painful and hard to remember.
# I really only produce html, pdf, and docx.  And I only ever use the defaults.  Ergo, a script
# (subsequently to be put in $PATH with path to python added to top to be runnable trivially) to
# make it simple.
#
# usage: python pgmd.py INPUTFILE FORMAT[html/pdf/word]
# that's it.  easy.
#
# there are a handful of other options (output file, overwrite output file, append scripts and
# css and such to html headers), details are in the commandline help via -h flag

## quickscrape.py
# OBSOLETE.
# GO HERE INSTEAD: https://github.com/paultopia/spideyscrape

# very basic scraper-spider for those html books where there's a table of contents page that links to a
# bunch of sub-pages with actual content.  (Like the documentation for a bunch of libraries.)
# WARNING: has no validation, assumes pages contain relative links and are all on the same site.
# (this is an easy tweak but I don't have time today)
# also assumes all content is vanilla html or at least can be accessed through vanilla html.
#
# pass ToC page through raw_input.  This script scrapes every unique page linked from ToC and

## val_quickscrape.py
# EDIT: this has now been upgraded to a full-fledged repo and is accepting PRs.  This gist is no longer updating.
# go here: https://github.com/paultopia/spideyscrape

# This is a very basic scraper-spider for those html books where there's a table of contents page that links to a
# bunch of sub-pages with actual content (like the documentation for a bunch of libraries).
#
# Dependencies: Beautiful soup 4 on Python 2.7.
#
# It assumes all content is vanilla html or at least can be accessed through vanilla html.
#

## scrapewrap.py
import sys
import spideyscrape
import console
import os

args = sys.argv[1:]  # see if the user gave us a command line argument
start = args[0] if args else raw_input('URL to crawl: ')
html = spideyscrape.scrape(start)
filename = spideyscrape.savePage(html)
console.open_in(filename)

## worst_python_ever.py
# I think I've discovered a bit of Python code even more dangerous than https://github.com/ajalt/fuckitpy

class string(str):
  def __call__(self):
    try:
      exec self
    except Exception:
      pass

evil = string('print "EVIL"')

## never_do_this.py
# NEVER DO THIS EXCEPT AS A PRANK ON YOUR WORST ENEMY

class foo(str):
  def __call__(self):
    try:
      exec self
    except Exception:
      pass

str = foo
	# USAGE:
	#
	# To add code to the end of every <head> tag (like a css link, a font link, etc.) to quick-format an entire website:
	# 1. Start in the top-level-directory of the site. Put this file there.
	# 2. Add your formtting for the <head> tag to the formatme variable.
	# EXAMPLE: mine was '<link href="https://fonts.googleapis.com/css?family=Halant:300" rel="stylesheet" type="text/css"><link rel="stylesheet" href="http://paul-gowder.com/conlawII/prettify.css">'
	# be sure to either escape quotes or use single quotes to demarcate the string and double-quotes in the html/vice versa
	# 3. Run this script.
	# 4. Bam. Every html page in in the top-level directory and all its subdirectories now has the formatting you want.
	# assumes documents are provided in the form of a list of (docid, doctext) tuples named thedocslist. docid = int/string/float; doctext = string

	import nltk
	import string
	from collections import Counter

	# get rid of punctuation, numbers; make all lowercase. no stemming.

	counterslist = []
	for onedocument in thedocslist:
	import argparse

	# this first bit is to enable multiline help text. apparently this is a known problem with argparse.
	# Solution jacked from http://stackoverflow.com/questions/3853722/python-argparse-how-to-insert-newline-in-the-help-text

	import textwrap as _textwrap
	class MultilineFormatter(argparse.HelpFormatter):
	def _fill_text(self, text, width, indent):
	text = self._whitespace_matcher.sub(' ', text).strip()
	paragraphs = text.split('\|n ')
	# The point of this script is that pandoc commandline syntax is painful and hard to remember.
	# I really only produce html, pdf, and docx. And I only ever use the defaults. Ergo, a script
	# (subsequently to be put in $PATH with path to python added to top to be runnable trivially) to
	# make it simple.
	#
	# usage: python pgmd.py INPUTFILE FORMAT[html/pdf/word]
	# that's it. easy.
	#
	# there are a handful of other options (output file, overwrite output file, append scripts and
	# css and such to html headers), details are in the commandline help via -h flag
	# OBSOLETE.
	# GO HERE INSTEAD: https://github.com/paultopia/spideyscrape

	# very basic scraper-spider for those html books where there's a table of contents page that links to a
	# bunch of sub-pages with actual content. (Like the documentation for a bunch of libraries.)
	# WARNING: has no validation, assumes pages contain relative links and are all on the same site.
	# (this is an easy tweak but I don't have time today)
	# also assumes all content is vanilla html or at least can be accessed through vanilla html.
	#
	# pass ToC page through raw_input. This script scrapes every unique page linked from ToC and
	# EDIT: this has now been upgraded to a full-fledged repo and is accepting PRs. This gist is no longer updating.
	# go here: https://github.com/paultopia/spideyscrape

	# This is a very basic scraper-spider for those html books where there's a table of contents page that links to a
	# bunch of sub-pages with actual content (like the documentation for a bunch of libraries).
	#
	# Dependencies: Beautiful soup 4 on Python 2.7.
	#
	# It assumes all content is vanilla html or at least can be accessed through vanilla html.
	#
	import sys
	import spideyscrape
	import console
	import os

	args = sys.argv[1:] # see if the user gave us a command line argument
	start = args[0] if args else raw_input('URL to crawl: ')
	html = spideyscrape.scrape(start)
	filename = spideyscrape.savePage(html)
	console.open_in(filename)
	# I think I've discovered a bit of Python code even more dangerous than https://github.com/ajalt/fuckitpy

	class string(str):
	def __call__(self):
	try:
	exec self
	except Exception:
	pass

	evil = string('print "EVIL"')
	# NEVER DO THIS EXCEPT AS A PRANK ON YOUR WORST ENEMY

	class foo(str):
	def __call__(self):
	try:
	exec self
	except Exception:
	pass

	str = foo