Created
November 11, 2018 19:29
-
-
Save erikgregorywebb/81a18da6282283e5e9ab8c57950baf99 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# call libraries | |
library(dplyr) | |
library(readxl) | |
# set working directory | |
rm(list = ls()) | |
setwd("~/Documents/Python/scarsdale") | |
# import the data | |
properties <- read.csv("properties-raw.csv") | |
# select descriptive variable names | |
properties <- properties %>% | |
select(PropertyID = prnt_key, | |
PropertyClassCode = pc_2017, | |
LegalAddrNo = st_nbr, | |
LegalAddr = st_name, | |
Owner = owner, | |
TaxRollYr = roll_yr, | |
SalePrice = sale_pri, | |
SaleDate = sale_dt, | |
TotalAssessedVal = avT_2017, | |
LandAssessedVal = avL_2017, | |
SchoolCode = sch_cd, | |
ZoningCode = zone_cd, | |
NeighborhoodCode = nbhd_cd, | |
NghbrhdTypeCode = nbhd_typ, | |
NghbrhdRatingCode = nbhd_rat, | |
BuildingStyleCode = bldg_sty, | |
NumberOfStories = nbr_stor, | |
SqFootLivingArea = sfla, | |
FirstStoryArea = first_st, | |
FinishedBasementArea = fnshd_ba, | |
FinishedRecRoom = rec_room, | |
OverallGradeCode = grade, | |
ConditionCode = ov_cond, | |
ActualYearBuilt = yr_blt, | |
EffectYearBuilt = ef_yr_bl, | |
HeatTypeCode = heat_typ, | |
FuelTypeCode = fuel_typ, | |
ExtWallMaterialCode = ext_wall, | |
NumberOfBedrooms = nbr_bed, | |
NumberOfKitchens = nbr_kit, | |
NumberOfFireplaces = nbr_fire, | |
BasementTypeCode = bsmnt_ty, | |
CentralAir = cent_air, | |
PoolType = pool_typ, | |
DesirabilityCode = site_des, | |
UtilityTypeCode = utilitie, | |
WaterSupplyCode = water_su, | |
SewerTypeCode = sewer_ty, | |
RoadTypeCode = road_typ, | |
Frontage = front_ft, | |
Acreage = tot_acre, | |
WaterfrontTypeCode = water_ty, | |
TotalSites = tot_site, | |
OwnershipCode = owner_cd, | |
ActiveFlag = active, | |
NumberOfBathrooms = n_f_bths, | |
NumHalfBathrooms = n_h_bths, | |
GradeAdjustment = gradjpct, | |
RemodelYear = yr_remod, | |
KitchenQualityCode = kitcqual, | |
BathroomQualityCode = bathqual, | |
InteriorCondition = int_cond, | |
ExteriorCondition = ext_cond, | |
Depth = depth, | |
RollSection = roll_sec, | |
ImproveSquareFeet = attgarsf, | |
NumberOfRooms = nbrrooms, | |
PhysicalQtyIncrease = physincr, | |
PhysicalQtyDecrease = physdecr, | |
SecondStoryArea = sec_st, | |
ThirdStoryArea = addl_sto, | |
HalfStoryArea = half, | |
ThreeQuarterStoryAre = quarter, | |
UnfinishedHalfStory = uhalf, | |
Unfinished3_4Story = uquart, | |
FinishedAreaOverGara = fin_over, | |
FinishedAtticArea = fin_att, | |
UnfinishedRoom = unfin_ro, | |
UnfinishedOverGarage = unfin_ov, | |
BasementGarCapacity = bsmt_gar, | |
TrafficCode = traffic) | |
# clean the data | |
properties <- properties %>% | |
filter(PropertyClassCode == 210) %>% # filter out non-residential properties | |
mutate(Address = paste(LegalAddrNo, LegalAddr, sep = " ")) # form full address | |
# export | |
write.csv(properties, "properties-clean.csv", row.names = F) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment