Last active
October 13, 2015 15:09
-
-
Save nikita-barsukov/8a2e085492631d256d53 to your computer and use it in GitHub Desktop.
Code snippets used for creating dataset used in grading paper for UDacity's 'Exploratory data analysis' course.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TABLE "public"."crime" ( | |
"id" serial, | |
"crime" character varying, | |
"lat" numeric, | |
"lon" numeric, | |
"reported_at" timestamp, | |
"type" character varying, | |
"the_geom" geometry, | |
PRIMARY KEY ("id") | |
); | |
CREATE TABLE IF NOT EXISTS "public"."population" ( | |
"id" serial, | |
"geoid" character varying, | |
"geoid2" character varying, | |
"name" text, | |
"population" numeric, | |
"margin_of_error" numeric, | |
PRIMARY KEY ("id") | |
); | |
CREATE TABLE "public"."income" ( | |
"id" serial, | |
"geoid" character varying, | |
"geoid2" character varying, | |
"name" text, | |
"median_income" numeric, | |
"margin_of_error" numeric, | |
PRIMARY KEY ("id") | |
); | |
CREATE TABLE "public"."unemployment" ( | |
"id" serial, | |
"geoid" character varying, | |
"geoid2" character varying, | |
"name" text, | |
"total_labor_force" numeric, | |
"in_labor_force" numeric, | |
"unemployed" numeric, | |
"not_in_labor_force" numeric, | |
PRIMARY KEY ("id") | |
); | |
CREATE TABLE "public"."education" ( | |
"id" serial, | |
"geoid" character varying, | |
"geoid2" character varying, | |
"name" text, | |
"total" numeric, | |
"college_or_higher" numeric, | |
PRIMARY KEY ("id") | |
); | |
copy crime (crime,lat,lon,reported_at,type) from '/Users/nikita/Documents/crime_factors_USA/crime_reports.csv' DELIMITER ',' CSV header | |
copy population (geoid,geoid2,name,population,margin_of_error) from '/Users/nikita/Documents/crime_factors_USA/raw/population/ACS_13_5YR_B01003_with_ann.csv' DELIMITER ',' CSV header | |
copy income (geoid,geoid2,name,median_income,margin_of_error) from '/Users/nikita/Documents/crime_factors_USA/raw/percapita_income/ACS_13_5YR_B19301_with_ann.csv' DELIMITER ',' CSV header | |
copy unemployment (geoid,geoid2,name,total_labor_force,in_labor_force,unemployed, not_in_labor_force) from '/Users/nikita/Documents/crime_factors_USA/raw/employment_status/employment_status_2013.csv' DELIMITER ';' CSV header | |
copy education (geoid,geoid2,name,total,college_or_higher) from '/Users/nikita/Documents/crime_factors_USA/raw/edu_attainment/edu_attainment_2013.csv' DELIMITER ';' CSV header | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH tbl AS ( | |
SELECT | |
geoid, coalesce(other, 0) as other, coalesce(personal, 0) as personal, coalesce(property, 0) as property | |
FROM | |
crosstab ( | |
'select geoid, | |
ct.type as ty, | |
count(*)::integer from crime_block_data as ct | |
group by geoid, | |
ct.type order by 1,2' ) | |
AS ct ( | |
"geoid" CHARACTER VARYING, | |
"other" INTEGER, | |
"personal" INTEGER, | |
"property" INTEGER ) ) | |
SELECT | |
tbl.geoid, | |
pop.name, | |
tbl.other, | |
tbl.personal, | |
tbl.property, | |
100000 * tbl.other/pop.population AS crime_other, | |
100000 * tbl.personal/pop.population AS crime_personal, | |
100000 * tbl.property/pop.population AS crime_property, | |
100000 * (tbl.property + tbl.personal + tbl.other)/pop.population AS crime_all, | |
pop.population AS population, | |
pop.population/(st_area(block_group.geom::geography) / 1000000) AS density, | |
income.median_income, | |
unemployment.unemployed / unemployment.total_labor_force AS unemployed, | |
education.high_school_or_higher AS education | |
FROM | |
tbl | |
JOIN population pop ON tbl.geoid = pop.geoid2 | |
JOIN income ON tbl.geoid = income.geoid2 | |
JOIN unemployment ON tbl.geoid = unemployment.geoid2 | |
JOIN block_group on tbl.geoid = block_group.geoid | |
JOIN education ON tbl.geoid = education.geoid2; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment