Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
introduction to r lecture notes - furman center for real estate and urban policy
#three reasons to use R-
#it's free
#it's open source- package system
#it's a programming language for statistics.
x <- 1:5
x
length(x)
class(x)
is.numeric(x)
y <- is.numeric(x)
is.numeric(y)
is.logical(y)
w <- data.frame( name_of_person=c("betty","fred","sammy") , high_fiving_ability=c(50,50,100) )
w
class(w)
is.numeric(w)
w[2,]
w[,2]
w$high_fiving_ability
w[,"high_fiving_ability"]
w[3,2]
w[2,3]
is.numeric(w[,2])
is.numeric(w[2,])
nrow(w)
ncol(w)
names(w)
rownames(w)
colnames(w)
names(w)[1]
names(w)[2]
length(w[,2])
z <- w[2:3,]
z
z <- nrow(w)
savehistory("C:\\Users\\AnthonyD\\Documents\\example 01.Rhistory")
#make a 3 x 5 data table in m. 3 columns and 5 rows. the three columns should be a person's name, sex (0 for male, 1 for female), and person's height in inches
#then take their average height
#then isolate the data table into another data table - n - of only the females
#--together we're going to tack on the person's height in centimeters
#use the transform function
#and use m[,"cm"] <- m[,"inches"] / 2.3
#download tax class 1 & 2/3/4 data from http://www.nyc.gov/html/dof/html/property/property_val_valuation.shtml
x <- read.csv("TC.csv")
table( x$BORO )
class(x)
head(x)
y <- subset( x , EASE != "" )
tapply( x$CUR_FV_T , x$BORO , mean )
summary( x$CUR_FV_T )
z <- subset( x , CUR_FV_T < 2000000 & GR_SQFT < 10000 )
plot( z$CUR_FV_T , z$GR_SQFT )
boxplot( z$CUR_FV_T ~ z$BORO )
#for loop to separate land area by tax classes
x <- transform( x , TXCL_1 = substr( TXCL , 1 , 1 ) )
#check tax class recoding worked properly
table( x$TXCL , x$TXCL_1 )
#print the number of properties by tax class, 1-4
for ( j in 1:4 ){
print( nrow( subset( x , TXCL_1 == j ) ) )
}
#same loop
for ( j in unique(x$TXCL_1) ){
print( j )
print( nrow( subset( x , TXCL_1 == j ) ) )
}
#for loop to create new table
date_built <- data.frame( value_increment = NULL , average_year_built=NULL )
for (i in 1:20){
z <- subset( x , CUR_FV_T >= (i-1)*100000 & CUR_FV_T < i*100000 & YRB > 1800 )
date_built[i,"value_increment"] <- i*100000
date_built[i,"average_year_built"] <- mean(z$YRB)
}
#glm
attach(x)
glm( CUR_FV_T ~ factor(BORO) + GR_SQFT + factor(TXCL) )
#download rolling sales data from http://www.nyc.gov/html/dof/html/property/property_val_sales.shtml
#merge on other data sets
library(gdata)
queens <- read.xls("rollingsales_queens.xls",skip=4)
names(queens)[1] <- "BORO"
TC_queens <- merge( x , queens , by=c("BORO","BLOCK","LOT") , all.y=T )
nrow(queens)
nrow(TC_queens)
#sql
library(sqldf)
a <- sqldf("select BORO , BLOCK, LOT , count(*) as count from x where BORO==4 group by BORO, BLOCK, LOT having count>1")
unique_boroughs <- sqldf("select distinct BORO from x")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment