Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
introduction to r lecture notes - furman center for real estate and urban policy
#three reasons to use R-
#it's free
#it's open source- package system
#it's a programming language for statistics.
x <- 1:5
x
length(x)
class(x)
is.numeric(x)
y <- is.numeric(x)
is.numeric(y)
is.logical(y)
w <- data.frame( name_of_person=c("betty","fred","sammy") , high_fiving_ability=c(50,50,100) )
w
class(w)
is.numeric(w)
w[2,]
w[,2]
w$high_fiving_ability
w[,"high_fiving_ability"]
w[3,2]
w[2,3]
is.numeric(w[,2])
is.numeric(w[2,])
nrow(w)
ncol(w)
names(w)
rownames(w)
colnames(w)
names(w)[1]
names(w)[2]
length(w[,2])
z <- w[2:3,]
z
z <- nrow(w)
savehistory("C:\\Users\\AnthonyD\\Documents\\example 01.Rhistory")
#make a 3 x 5 data table in m. 3 columns and 5 rows. the three columns should be a person's name, sex (0 for male, 1 for female), and person's height in inches
#then take their average height
#then isolate the data table into another data table - n - of only the females
#--together we're going to tack on the person's height in centimeters
#use the transform function
#and use m[,"cm"] <- m[,"inches"] / 2.3
#download tax class 1 & 2/3/4 data from http://www.nyc.gov/html/dof/html/property/property_val_valuation.shtml
x <- read.csv("TC.csv")
table( x$BORO )
class(x)
head(x)
y <- subset( x , EASE != "" )
tapply( x$CUR_FV_T , x$BORO , mean )
summary( x$CUR_FV_T )
z <- subset( x , CUR_FV_T < 2000000 & GR_SQFT < 10000 )
plot( z$CUR_FV_T , z$GR_SQFT )
boxplot( z$CUR_FV_T ~ z$BORO )
#for loop to separate land area by tax classes
x <- transform( x , TXCL_1 = substr( TXCL , 1 , 1 ) )
#check tax class recoding worked properly
table( x$TXCL , x$TXCL_1 )
#print the number of properties by tax class, 1-4
for ( j in 1:4 ){
print( nrow( subset( x , TXCL_1 == j ) ) )
}
#same loop
for ( j in unique(x$TXCL_1) ){
print( j )
print( nrow( subset( x , TXCL_1 == j ) ) )
}
#for loop to create new table
date_built <- data.frame( value_increment = NULL , average_year_built=NULL )
for (i in 1:20){
z <- subset( x , CUR_FV_T >= (i-1)*100000 & CUR_FV_T < i*100000 & YRB > 1800 )
date_built[i,"value_increment"] <- i*100000
date_built[i,"average_year_built"] <- mean(z$YRB)
}
#glm
attach(x)
glm( CUR_FV_T ~ factor(BORO) + GR_SQFT + factor(TXCL) )
#download rolling sales data from http://www.nyc.gov/html/dof/html/property/property_val_sales.shtml
#merge on other data sets
library(gdata)
queens <- read.xls("rollingsales_queens.xls",skip=4)
names(queens)[1] <- "BORO"
TC_queens <- merge( x , queens , by=c("BORO","BLOCK","LOT") , all.y=T )
nrow(queens)
nrow(TC_queens)
#sql
library(sqldf)
a <- sqldf("select BORO , BLOCK, LOT , count(*) as count from x where BORO==4 group by BORO, BLOCK, LOT having count>1")
unique_boroughs <- sqldf("select distinct BORO from x")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.