Chris Black infotroph

## gist:28bd34eabcaee9e600b8
> options(digits.secs=NULL)
> a = as.POSIXct("2014-12-04 09:18:27")
> b = as.POSIXct("2014-12-04 09:18:27.12345")
> a
[1] "2014-12-04 09:18:27 CST"
> b
[1] "2014-12-04 09:18:27 CST"
> options(digits.secs=6)
> a
[1] "2014-12-04 09:18:27 CST"

## histfailure.r
library(ggplot2)

set.seed(12345678)

sessionInfo()
# Loading required package: methods
# R version 3.2.0 Patched (2015-05-13 r68364)
# Platform: x86_64-apple-darwin10.8.0 (64-bit)
# Running under: OS X 10.8.5 (Mountain Lion)

## read.longline.R
# My input files have short header lines, then CSV data, then short footer lines.
# I'm currently trimming the short lines with an external call to sed,
# but I want a pure-R solution for portability.

# This version works nicely on small examples but gets very slow on large files,
# because append() grows the list, triggering a memory reallocation, for every line.
# Suggestions for speed improvement requested.

read.longline = function(file){
	f = file(file, "r")

## readtime.r
# Context: I have untidy CSVs that need some junk lines filtered out before they're even grid-shaped.
# I currently do the filtering with an external sed call,
# but wanted something that would work on any OS.

# In https://gist.github.com/infotroph/dd0faa5fd24bb78b4ff6
# I asked how to do the filtering from within R,
# and settled on readLines -> filter -> send filtered lines back to read.csv.

# This script doesn't filter anything,
# it just tests different ways of passing lines back into read.csv afterwards:

## gist:2f53db2f610730abe27a

# Have a set of Make rules that produce some outputs I usually want to keep,
# and some cruft I only want when debugging.
# Want cruft removed at the end of every successful build,
# and outputs AND cruft removed on $(make clean).

# This version appears to do all these things, but I welcome more feedback if something looks wrong.

OUTPUTS = \
	# bunch of compiled end products here

## pandoc-word-sectionbreak.hs
#!/usr/bin/env runhaskell

{-
Pandoc filter to replace horizontal rules with hard section breaks when output is in Word format.

Credits: This is a very lightly adapted version of a `\newpage` filter
previously described on pandoc-discuss:
	https://groups.google.com/forum/#!topic/pandoc-discuss/FzLrhk0vVbU


## one of many possible approaches to user counts
dat = data.frame(
	userID=c("one", "two", "three", "four", "five"),
	start_date=as.Date(c("2015-09-01", "2015-09-02", "2015-09-02", "2015-09-03", "2015-09-03")),
	end_date=as.Date(c("2015-09-02", NA, "2015-09-03", NA, "2015-09-03")))

n_start = 100 # number of active users on day zero

days = as.Date("2015-08-25")+1:10

n_new = sapply(days, function(x)length(which(dat$start_date == x)))

## bad_scope.py
#!/usr/bin/env python3

lst = [{'one':1,'two':2,'three':3}, {'one':100,'two':200,'three':300}]

def wrapper(x, fun):
    return fun(x)

def this_works():
    def local_inner(d):
        return d[key]

## plotshape.r
# The problem: Plotting from R to PNG requires that you specify x and y
# dimensions, which therefore also fixes the aspect ratio of
# the whole image. In most of my plots, I want a fixed *panel* aspect ratio,
# but the overall dimensions of the full *plot* still depend on the dimensions
# of other plot elements: axes, legends, titles, etc.
# In a facetted ggplot, this gets even trickier: "OK, three panels, each
# with aspect ratio of 1.5, that adds up to... wait, will every panel
# have its own y-axis, or just the leftmost one?"

# ggplot apparently computes absolute dimensions for everything EXCEPT

## unmathsub.py
#!/usr/bin/env python3

'''
Pandoc filter to convert inline math subscripts to text sbscripts.
Written for a very specific problem:
	Bibtex entries with "CO_{2}" are rendered by the Pandoc parser as
	[Str "CO",Math InlineMath "_{2}"],
	which is then rendered in OOXML as an inline equation that looks like
		"CO   2", with the 2 subscripted but an empty equation field between the subscript and the previous letters.
This filter solves this problem by replacing
	> options(digits.secs=NULL)
	> a = as.POSIXct("2014-12-04 09:18:27")
	> b = as.POSIXct("2014-12-04 09:18:27.12345")
	> a
	[1] "2014-12-04 09:18:27 CST"
	> b
	[1] "2014-12-04 09:18:27 CST"
	> options(digits.secs=6)
	> a
	[1] "2014-12-04 09:18:27 CST"
	library(ggplot2)

	set.seed(12345678)

	sessionInfo()
	# Loading required package: methods
	# R version 3.2.0 Patched (2015-05-13 r68364)
	# Platform: x86_64-apple-darwin10.8.0 (64-bit)
	# Running under: OS X 10.8.5 (Mountain Lion)
	# My input files have short header lines, then CSV data, then short footer lines.
	# I'm currently trimming the short lines with an external call to sed,
	# but I want a pure-R solution for portability.

	# This version works nicely on small examples but gets very slow on large files,
	# because append() grows the list, triggering a memory reallocation, for every line.
	# Suggestions for speed improvement requested.

	read.longline = function(file){
	f = file(file, "r")
	# Context: I have untidy CSVs that need some junk lines filtered out before they're even grid-shaped.
	# I currently do the filtering with an external sed call,
	# but wanted something that would work on any OS.

	# In https://gist.github.com/infotroph/dd0faa5fd24bb78b4ff6
	# I asked how to do the filtering from within R,
	# and settled on readLines -> filter -> send filtered lines back to read.csv.

	# This script doesn't filter anything,
	# it just tests different ways of passing lines back into read.csv afterwards:

	# Have a set of Make rules that produce some outputs I usually want to keep,
	# and some cruft I only want when debugging.
	# Want cruft removed at the end of every successful build,
	# and outputs AND cruft removed on $(make clean).

	# This version appears to do all these things, but I welcome more feedback if something looks wrong.

	OUTPUTS = \
	# bunch of compiled end products here
	#!/usr/bin/env runhaskell

	{-
	Pandoc filter to replace horizontal rules with hard section breaks when output is in Word format.

	Credits: This is a very lightly adapted version of a `\newpage` filter
	previously described on pandoc-discuss:
	https://groups.google.com/forum/#!topic/pandoc-discuss/FzLrhk0vVbU
	dat = data.frame(
	userID=c("one", "two", "three", "four", "five"),
	start_date=as.Date(c("2015-09-01", "2015-09-02", "2015-09-02", "2015-09-03", "2015-09-03")),
	end_date=as.Date(c("2015-09-02", NA, "2015-09-03", NA, "2015-09-03")))

	n_start = 100 # number of active users on day zero

	days = as.Date("2015-08-25")+1:10

	n_new = sapply(days, function(x)length(which(dat$start_date == x)))
	#!/usr/bin/env python3

	lst = [{'one':1,'two':2,'three':3}, {'one':100,'two':200,'three':300}]

	def wrapper(x, fun):
	return fun(x)

	def this_works():
	def local_inner(d):
	return d[key]
	# The problem: Plotting from R to PNG requires that you specify x and y
	# dimensions, which therefore also fixes the aspect ratio of
	# the whole image. In most of my plots, I want a fixed panel aspect ratio,
	# but the overall dimensions of the full plot still depend on the dimensions
	# of other plot elements: axes, legends, titles, etc.
	# In a facetted ggplot, this gets even trickier: "OK, three panels, each
	# with aspect ratio of 1.5, that adds up to... wait, will every panel
	# have its own y-axis, or just the leftmost one?"

	# ggplot apparently computes absolute dimensions for everything EXCEPT
	#!/usr/bin/env python3

	'''
	Pandoc filter to convert inline math subscripts to text sbscripts.
	Written for a very specific problem:
	Bibtex entries with "CO_{2}" are rendered by the Pandoc parser as
	[Str "CO",Math InlineMath "_{2}"],
	which is then rendered in OOXML as an inline equation that looks like
	"CO 2", with the 2 subscripted but an empty equation field between the subscript and the previous letters.
	This filter solves this problem by replacing