Skip to content

Instantly share code, notes, and snippets.

View spara's full-sized avatar

Sophia Parafina spara

View GitHub Profile
@spara
spara / clean_data.sh
Created February 3, 2012 14:42
shell foo for data munging
#
# Look at the first 5 lines of a file
#
$ head -n 2 campus_dropout.csv
"CAMP_SPEN","CAMP_SPER","CAMP_SPED","CAMP_OVRN","CAMP_OVRR","CAMP_OVRD","CAMP_MIGN","CAMP_MIGR","CAMP_MIGD","CAMP_LEPN","CAMP_LEPR","CAMP_LEPD","CAMP_IMMN","CAMP_IMMR","CAMP_IMMD","CAMP_GFTN","CAMP_GFTR","CAMP_GFTD","CAMP_ECNN","CAMP_ECNR","CAMP_ECND","CAMP_CTEN","CAMP_CTER","CAMP_CTED","CAMP_TTLN","CAMP_TTLR","CAMP_TTLD","CAMP_BEN","CAMP_BER","CAMP_BED","CAMP_ATRN","CAMP_ATRR","CAMP_ATRD","CAMP_MUN","CAMP_MUR","CAMP_MUD","CAMP_PIN","CAMP_PIR","CAMP_PID","CAMP_WHN","CAMP_WHR","CAMP_WHD","CAMP_NAN","CAMP_NAR","CAMP_NAD","CAMP_HSN","CAMP_HSR","CAMP_HSD","CAMP_ASN","CAMP_ASR","CAMP_ASD","CAMP_AAN","CAMP_AAR","CAMP_AAD","CAMP_FEMN","CAMP_FEMR","CAMP_FEMD","CAMP_MALN","CAMP_MALR","CAMP_MALD","CAMP_ALLN","CAMP_ALLR","CAMP_ALLD","DISTRICT","Gradespan","campus","cntyname","regnname","campname","distname","COUNTY","REGION"
"0","0.0","28","-","3.7","<100",".",".",".",".",".",".",".",".",".","0","0.0","24","-","1.5","<100","0","0.0","45","-","1.4"
@spara
spara / example1
Created February 6, 2012 16:13
examine file
#
# Look at the first 2 lines of a file
#
$ head -n 2 campus_dropout.csv
"CAMP_SPEN","CAMP_SPER","CAMP_SPED","CAMP_OVRN","CAMP_OVRR","CAMP_OVRD","CAMP_MIGN","CAMP_MIGR","CAMP_MIGD","CAMP_LEPN","CAMP_LEPR","CAMP_LEPD","CAMP_IMMN","CAMP_IMMR","CAMP_IMMD","CAMP_GFTN","CAMP_GFTR","CAMP_GFTD","CAMP_ECNN","CAMP_ECNR","CAMP_ECND","CAMP_CTEN","CAMP_CTER","CAMP_CTED","CAMP_TTLN","CAMP_TTLR","CAMP_TTLD","CAMP_BEN","CAMP_BER","CAMP_BED","CAMP_ATRN","CAMP_ATRR","CAMP_ATRD","CAMP_MUN","CAMP_MUR","CAMP_MUD","CAMP_PIN","CAMP_PIR","CAMP_PID","CAMP_WHN","CAMP_WHR","CAMP_WHD","CAMP_NAN","CAMP_NAR","CAMP_NAD","CAMP_HSN","CAMP_HSR","CAMP_HSD","CAMP_ASN","CAMP_ASR","CAMP_ASD","CAMP_AAN","CAMP_AAR","CAMP_AAD","CAMP_FEMN","CAMP_FEMR","CAMP_FEMD","CAMP_MALN","CAMP_MALR","CAMP_MALD","CAMP_ALLN","CAMP_ALLR","CAMP_ALLD","DISTRICT","Gradespan","campus","cntyname","regnname","campname","distname","COUNTY","REGION"
"0","0.0","28","-","3.7","<100",".",".",".",".",".",".",".",".",".","0","0.0","24","-","1.5","<100","0","0.0","45","-","1.4"
@spara
spara / eample2_gis_win
Created February 6, 2012 17:12
sed and gist to create sql
#
# create a sql script to create a table in postgres
# First step is extract the header
#
$ head -n 1 campus_dropout.csv > campus_header
#
# look at header
#
$ cat campus_header
@spara
spara / example2_gis_on_win
Created February 6, 2012 16:44
Using sed
#
# replace the "." values with nulls using sed and write to
# another file
#
sed 's/\"\.\"//g' campus_dropout.csv > campus_dropout_clean.csv
#
# check the output of the new file
#
$ head -n 2 campus_dropout_clean.csv
@spara
spara / example4_gis_on_win
Created February 6, 2012 17:27
adding create table and closing paren
#
# Finish the script by adding a create table statement
# using your favorite editor or on the command line.
# Note that this version of sed does not have inplace
# editing, so a tmp file is used
#
$ sed '1s/^/CREATE TABLE campust_dropout_raw( /' campus_dropout.sql > tmp
$ sed '$s/$/ varchar);/' tmp > campus_dropout.sql
@spara
spara / example5_gis_on_win
Created February 6, 2012 18:15
make sql script for campus dropout rate
# Make a sql script to create a table with only the dropout rate
# column using sed
#
$ tr -s ',' '\n' < campus_header | sed -n '2~3p' > campus_dropout_rate.sql
# grab the first n items in a file
$ wc -l campus_dropout_rate.sql
$ head -n 21 campus_dropout_rate.sql > tmp
# add the numeric type definition
@spara
spara / example6_gis_on_win
Created February 6, 2012 19:00
pgsql copy command
COPY campus_dropout_raw FROM 'c:\path\to\campus_dropout_clean.csv' CSV;
@spara
spara / campus_dropout_rate.sql
Created February 6, 2012 19:52
campus dropout rate sql
CREATE TABLE CAMPUS_DROPOUT_RATE(
"CAMP_SPER" numeric,
"CAMP_OVRR" numeric,
"CAMP_MIGR" numeric,
"CAMP_LEPR" numeric,
"CAMP_IMMR" numeric,
"CAMP_GFTR" numeric,
"CAMP_ECNR" numeric,
"CAMP_CTER" numeric,
"CAMP_TTLR" numeric,
@spara
spara / example7_gis_on_win
Created February 6, 2012 19:46
insert data
INSERT INTO campus_dropout_rate(
"CAMP_SPER",
"CAMP_OVRR",
"CAMP_MIGR",
"CAMP_LEPR",
"CAMP_IMMR",
"CAMP_GFTR",
"CAMP_ECNR",
"CAMP_CTER",
"CAMP_TTLR",
<!DOCTYPE html>
<html>
<head>
<meta charset='utf-8' />
<title>Tornadoes in the U.S. - 2010</title>
<script src='wax/ext/modestmaps.min.js' type='text/javascript'></script>
<script src='wax/dist/wax.mm.js' type='text/javascript'></script>
<link href='wax/theme/controls.css' rel='stylesheet' type='text/css' />
<style type='text/css'>