Jeremy B. Merrill jeremybmerrill

## compstat.rb
require 'tabula'
require 'fileutils'


folder_name = "compstat"
output_folder_name = "compstat_csvs"

#########################################################################
#########################################################################
FileUtils.mkdir_p(output_folder_name + "/")

## gist:1d058424aca5ebe2eb3d
#
# on mac, replace TABGOESHERE with a tab by typing Ctrl-V then the Tab key
#
mysql -u USERNAME --database=dbname --host=HOST --batch -e "select * from tablename" |
sed 's/TABGOESHERE/","/g'| sed 's/^/"/g' | sed 's/$/"/g' | sed 's/\n//g' > destination.csv


## atlantacrime2012until2017.small.csv
offense_id,occur_date,UC2 Literal,neighborhood,npu
110171050,01/14/2012,LARCENY-NON VEHICLE,Sweet Auburn,M
110181057,08/22/2011,LARCENY-NON VEHICLE,Glenrose Heights,Z
112032439,07/22/2011,AUTO THEFT,Downtown,M
112152334,08/03/2011,AUTO THEFT,Perkerson,X
113491709,12/07/2011,LARCENY-FROM VEHICLE,Hills Park,D
120010023,01/01/2012,AGG ASSAULT,The Villages at Carver,Y
120010069,12/31/2011,LARCENY-FROM VEHICLE,Old Fourth Ward,M
120010072,12/31/2011,LARCENY-FROM VEHICLE,English Avenue,L
120010086,01/01/2012,LARCENY-FROM VEHICLE,Morningside/Lenox Park,F

## demo.html
<!DOCTYPE html>
<html>
<head>
  <title>How To Cause Trouble With Events' isTrusted Attribute</title>
  <meta charset="UTF-8">
  <script
        src="http://code.jquery.com/jquery-2.2.4.min.js"
        integrity="sha256-BbhdlvQf/xTY9gja0Dq3HiwQF8LaCRTXxZKRutelT44="
        crossorigin="anonymous"></script>
</head>

## airplanes.sql
create table flight_segments as
SELECT hexid,start_time,end_time,callsign,point,
   -- take a substring if the length reamining in the segment is greater than 5280 feet (1609.34 m)
   -- otherwise take the remainder
   ST_LineSubstring(geom, 1609.34*n/length,
   CASE
     WHEN 1609.34*(n+1) < length THEN 1609.34*(n+1)/length
     ELSE 1
   END) as geom
FROM

## code_tos.rb
require 'sinatra'
require 'csv'

$csv_read_path = "my_thing.uncoded.csv"
$csv_write_path = "my_thing.coded.csv"

$data = CSV.read($csv_read_path, {:headers => true})

def write_csv!
  CSV.open($csv_write_path, 'wb') do |csv|

## edc.rb
require 'tabula'
require 'fileutils'


folder_name = "EDC"
output_folder_name = "EDCcsvs"

#########################################################################
#########################################################################
FileUtils.mkdir_p(output_folder_name + "/")

## tabula_basic.rb
require 'tabula'

pdf_file_path = "czechmaybe.pdf"
outfilename = "czechmaybe.csv"

out = open(outfilename, 'w')

extractor = Tabula::Extraction::ObjectExtractor.new(pdf_file_path, [5] ) #:all ) # 1..2643
extractor.extract.each do |pdf_page|
  pdf_page.spreadsheets.each do |spreadsheet|

## gender.rb
require 'csv'
require 'set'

class Gender
  def initialize(options={})
    countries = Set.new([:us, :uk])

    @threshold = options[:threshold] || 0.99

    @names_counts = {}

## count_scraper.rb
require 'upton'
require 'date'
require 'guess'

GLOBAL_VERBOSE = true

# - any lowercased pronoun is okay
# - capitalized pronouns are okay unless they're in a book title, which is a series of capitalized words;
#   that is, capitalized pronouns are okay if there are zero alphabetic characters between them and a sentence-final punct
FEMALE_REGEXES = [/ she[\.,\s!?\' ]/, / her[\.,\s!?\' ]/,
	require 'tabula'
	require 'fileutils'


	folder_name = "compstat"
	output_folder_name = "compstat_csvs"

	#########################################################################
	#########################################################################
	FileUtils.mkdir_p(output_folder_name + "/")
	#
	# on mac, replace TABGOESHERE with a tab by typing Ctrl-V then the Tab key
	#
	mysql -u USERNAME --database=dbname --host=HOST --batch -e "select * from tablename" \|
	sed 's/TABGOESHERE/","/g'\| sed 's/^/"/g' \| sed 's/$/"/g' \| sed 's/\n//g' > destination.csv
	offense_id,occur_date,UC2 Literal,neighborhood,npu
	110171050,01/14/2012,LARCENY-NON VEHICLE,Sweet Auburn,M
	110181057,08/22/2011,LARCENY-NON VEHICLE,Glenrose Heights,Z
	112032439,07/22/2011,AUTO THEFT,Downtown,M
	112152334,08/03/2011,AUTO THEFT,Perkerson,X
	113491709,12/07/2011,LARCENY-FROM VEHICLE,Hills Park,D
	120010023,01/01/2012,AGG ASSAULT,The Villages at Carver,Y
	120010069,12/31/2011,LARCENY-FROM VEHICLE,Old Fourth Ward,M
	120010072,12/31/2011,LARCENY-FROM VEHICLE,English Avenue,L
	120010086,01/01/2012,LARCENY-FROM VEHICLE,Morningside/Lenox Park,F
	<!DOCTYPE html>
	<html>
	<head>
	<title>How To Cause Trouble With Events' isTrusted Attribute</title>
	<meta charset="UTF-8">
	<script
	src="http://code.jquery.com/jquery-2.2.4.min.js"
	integrity="sha256-BbhdlvQf/xTY9gja0Dq3HiwQF8LaCRTXxZKRutelT44="
	crossorigin="anonymous"></script>
	</head>
	create table flight_segments as
	SELECT hexid,start_time,end_time,callsign,point,
	-- take a substring if the length reamining in the segment is greater than 5280 feet (1609.34 m)
	-- otherwise take the remainder
	ST_LineSubstring(geom, 1609.34*n/length,
	CASE
	WHEN 1609.34(n+1) < length THEN 1609.34(n+1)/length
	ELSE 1
	END) as geom
	FROM
	require 'sinatra'
	require 'csv'

	$csv_read_path = "my_thing.uncoded.csv"
	$csv_write_path = "my_thing.coded.csv"

	$data = CSV.read($csv_read_path, {:headers => true})

	def write_csv!
	CSV.open($csv_write_path, 'wb') do \|csv\|
	require 'tabula'
	require 'fileutils'


	folder_name = "EDC"
	output_folder_name = "EDCcsvs"

	#########################################################################
	#########################################################################
	FileUtils.mkdir_p(output_folder_name + "/")
	require 'tabula'

	pdf_file_path = "czechmaybe.pdf"
	outfilename = "czechmaybe.csv"

	out = open(outfilename, 'w')

	extractor = Tabula::Extraction::ObjectExtractor.new(pdf_file_path, [5] ) #:all ) # 1..2643
	extractor.extract.each do \|pdf_page\|
	pdf_page.spreadsheets.each do \|spreadsheet\|
	require 'csv'
	require 'set'

	class Gender
	def initialize(options={})
	countries = Set.new([:us, :uk])

	@threshold = options[:threshold] \|\| 0.99

	@names_counts = {}
	require 'upton'
	require 'date'
	require 'guess'

	GLOBAL_VERBOSE = true

	# - any lowercased pronoun is okay
	# - capitalized pronouns are okay unless they're in a book title, which is a series of capitalized words;
	# that is, capitalized pronouns are okay if there are zero alphabetic characters between them and a sentence-final punct
	FEMALE_REGEXES = [/ she[\.,\s!?\' ]/, / her[\.,\s!?\' ]/,