Skip to content

Instantly share code, notes, and snippets.

@talos
Forked from clhenrick/script.sql
Last active August 29, 2015 14:16
Show Gist options
  • Save talos/68f13261643f70d87630 to your computer and use it in GitHub Desktop.
Save talos/68f13261643f70d87630 to your computer and use it in GitHub Desktop.
-- PostgreSQL
-- combine data for all years into one table
-- to find distinct addresses & BBL numbers
create table dhcr_all (
zip integer,
bldgno1 text,
street_name1 text,
street_suffix1 text,
bldgno2 text,
street_name2 text,
street_suffix2 text,
bldgno3 text,
street_name3 text,
street_suffix3 text,
status1 text,
status2 text,
status3 text,
block text,
lot integer,
boro_code integer
);
insert into dhcr_all (
zip,
bldgno1,
street_name1,
street_suffix1,
bldgno2,
street_name2,
street_suffix2,
bldgno3,
street_name3,
street_suffix3,
status1,
status2,
status3,
block,
lot,
boro_code
)
SELECT * FROM dhcr2009tmp;
insert into dhcr_all (
zip,
bldgno1,
street_name1,
street_suffix1,
bldgno2,
street_name2,
street_suffix2,
bldgno3,
street_name3,
street_suffix3,
status1,
status2,
status3,
block,
lot,
boro_code
)
SELECT * FROM dhcr2011;
insert into dhcr_all (
zip,
bldgno1,
street_name1,
street_suffix1,
bldgno2,
street_name2,
street_suffix2,
bldgno3,
street_name3,
street_suffix3,
status1,
status2,
status3,
block,
lot,
boro_code
)
SELECT * FROM dhcr2012;
-- 2013 doesn't have a bldg03, street3, stsufx3 columns
insert into dhcr_all (
zip,
bldgno1,
street_name1,
street_suffix1,
bldgno2,
street_name2,
street_suffix2,
status1,
status2,
status3,
block,
lot,
boro_code
)
SELECT
"ZIP",
"BLDGNO1",
"STREET1",
"STSUFX1",
"BLDGNO2",
"STREET2",
"STSUFX2",
"STATUS1",
"STATUS2",
"STATUS3",
"BLOCK",
"LOT",
"BORO_CODE"
FROM dhcr2013;
-- should return about 170,000 rows
select count(*) from dhcr_all;
-- split bldgno1 column into two separate numbers for geocoding with NYC Geoclient API
alter table dhcr_all add column bldgno1a text;
alter table dhcr_all add column bldgno1b text;
update dhcr_all set bldgno1a = split_part(bldgno1, 'TO', 1);
update dhcr_all set bldgno1b = split_part(bldgno1, 'TO', 2;
-- find all distinct addresses
select bldgno1, bldgno1a, bldgno1b, street_name1, street_suffix1, boro_code, zip
from dhcr_all
group by bldgno1, bldgno1a, bldgno1b, street_name1, street_suffix1, boro_code, zip;
-- returns a measley 4,755 rows, DHCR's data is obviously not complete!
select count(*) from (
select bldgno1, bldgno1a, bldgno1b, street_name1, street_suffix1, boro_code, zip
from dhcr_all
group by bldgno1, bldgno1a, bldgno1b, street_name1, street_suffix1, boro_code, zip
) as distinct_addresses;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment