Instantly share code, notes, and snippets.

Embed
What would you like to do?
# call libraries
library(dplyr)
library(readxl)
# set working directory
rm(list = ls())
setwd("~/Documents/Python/scarsdale")
# import the data
properties <- read.csv("properties-raw.csv")
# select descriptive variable names
properties <- properties %>%
select(PropertyID = prnt_key,
PropertyClassCode = pc_2017,
LegalAddrNo = st_nbr,
LegalAddr = st_name,
Owner = owner,
TaxRollYr = roll_yr,
SalePrice = sale_pri,
SaleDate = sale_dt,
TotalAssessedVal = avT_2017,
LandAssessedVal = avL_2017,
SchoolCode = sch_cd,
ZoningCode = zone_cd,
NeighborhoodCode = nbhd_cd,
NghbrhdTypeCode = nbhd_typ,
NghbrhdRatingCode = nbhd_rat,
BuildingStyleCode = bldg_sty,
NumberOfStories = nbr_stor,
SqFootLivingArea = sfla,
FirstStoryArea = first_st,
FinishedBasementArea = fnshd_ba,
FinishedRecRoom = rec_room,
OverallGradeCode = grade,
ConditionCode = ov_cond,
ActualYearBuilt = yr_blt,
EffectYearBuilt = ef_yr_bl,
HeatTypeCode = heat_typ,
FuelTypeCode = fuel_typ,
ExtWallMaterialCode = ext_wall,
NumberOfBedrooms = nbr_bed,
NumberOfKitchens = nbr_kit,
NumberOfFireplaces = nbr_fire,
BasementTypeCode = bsmnt_ty,
CentralAir = cent_air,
PoolType = pool_typ,
DesirabilityCode = site_des,
UtilityTypeCode = utilitie,
WaterSupplyCode = water_su,
SewerTypeCode = sewer_ty,
RoadTypeCode = road_typ,
Frontage = front_ft,
Acreage = tot_acre,
WaterfrontTypeCode = water_ty,
TotalSites = tot_site,
OwnershipCode = owner_cd,
ActiveFlag = active,
NumberOfBathrooms = n_f_bths,
NumHalfBathrooms = n_h_bths,
GradeAdjustment = gradjpct,
RemodelYear = yr_remod,
KitchenQualityCode = kitcqual,
BathroomQualityCode = bathqual,
InteriorCondition = int_cond,
ExteriorCondition = ext_cond,
Depth = depth,
RollSection = roll_sec,
ImproveSquareFeet = attgarsf,
NumberOfRooms = nbrrooms,
PhysicalQtyIncrease = physincr,
PhysicalQtyDecrease = physdecr,
SecondStoryArea = sec_st,
ThirdStoryArea = addl_sto,
HalfStoryArea = half,
ThreeQuarterStoryAre = quarter,
UnfinishedHalfStory = uhalf,
Unfinished3_4Story = uquart,
FinishedAreaOverGara = fin_over,
FinishedAtticArea = fin_att,
UnfinishedRoom = unfin_ro,
UnfinishedOverGarage = unfin_ov,
BasementGarCapacity = bsmt_gar,
TrafficCode = traffic)
# clean the data
properties <- properties %>%
filter(PropertyClassCode == 210) %>% # filter out non-residential properties
mutate(Address = paste(LegalAddrNo, LegalAddr, sep = " ")) # form full address
# export
write.csv(properties, "properties-clean.csv", row.names = F)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment