ajdamico/furman intro to r lecture.R

## furman intro to r lecture.R
#three reasons to use R-

#it's free

#it's open source- package system

#it's a programming language for statistics.

x <- 1:5

x

length(x)

class(x)

is.numeric(x)

y <- is.numeric(x)

is.numeric(y)

is.logical(y)

w <- data.frame( name_of_person=c("betty","fred","sammy") , high_fiving_ability=c(50,50,100) )

w

class(w)

is.numeric(w)

w[2,]

w[,2]

w$high_fiving_ability

w[,"high_fiving_ability"]

w[3,2]

w[2,3]

is.numeric(w[,2])

is.numeric(w[2,])

nrow(w)

ncol(w)

names(w)

rownames(w)

colnames(w)

names(w)[1]

names(w)[2]

length(w[,2])

z <- w[2:3,]

z

z <- nrow(w)

savehistory("C:\\Users\\AnthonyD\\Documents\\example 01.Rhistory")

#make a 3 x 5 data table in m.  3 columns and 5 rows.  the three columns should be a person's name, sex (0 for male, 1 for female), and person's height in inches

#then take their average height

#then isolate the data table into another data table - n - of only the females

#--together we're going to tack on the person's height in centimeters

#use the transform function
#and use m[,"cm"] <- m[,"inches"] / 2.3


#download tax class 1 & 2/3/4 data from http://www.nyc.gov/html/dof/html/property/property_val_valuation.shtml
x <- read.csv("TC.csv")

table( x$BORO )

class(x)

head(x)

y <- subset( x , EASE != "" )

tapply( x$CUR_FV_T , x$BORO , mean )

summary( x$CUR_FV_T )

z <- subset( x , CUR_FV_T < 2000000 & GR_SQFT < 10000 )

plot( z$CUR_FV_T , z$GR_SQFT )

boxplot( z$CUR_FV_T ~ z$BORO )


#for loop to separate land area by tax classes
x <- transform( x , TXCL_1 = substr( TXCL , 1 , 1 ) )

#check tax class recoding worked properly
table( x$TXCL , x$TXCL_1 )

#print the number of properties by tax class, 1-4
for ( j in 1:4 ){
	print( nrow( subset( x , TXCL_1 == j ) ) )
}

#same loop
for ( j in unique(x$TXCL_1) ){
	print( j )
	print( nrow( subset( x , TXCL_1 == j ) ) )
}

#for loop to create new table
date_built <- data.frame( value_increment = NULL , average_year_built=NULL )
for (i in 1:20){
	z <- subset( x , CUR_FV_T >= (i-1)*100000 & CUR_FV_T < i*100000 & YRB > 1800 )
	date_built[i,"value_increment"] <- i*100000
	date_built[i,"average_year_built"] <- mean(z$YRB)
}

#glm
attach(x)
glm( CUR_FV_T ~ factor(BORO) + GR_SQFT + factor(TXCL) )

#download rolling sales data from http://www.nyc.gov/html/dof/html/property/property_val_sales.shtml
#merge on other data sets
library(gdata)
queens <- read.xls("rollingsales_queens.xls",skip=4)

names(queens)[1] <- "BORO"
TC_queens <- merge( x , queens , by=c("BORO","BLOCK","LOT") , all.y=T )

nrow(queens)
nrow(TC_queens)

#sql
library(sqldf)
a <- sqldf("select BORO , BLOCK, LOT , count(*) as count from x where BORO==4 group by BORO, BLOCK, LOT having count>1")
unique_boroughs <- sqldf("select distinct BORO from x")
	#three reasons to use R-

	#it's free

	#it's open source- package system

	#it's a programming language for statistics.

	x <- 1:5

	x

	length(x)

	class(x)

	is.numeric(x)

	y <- is.numeric(x)

	is.numeric(y)

	is.logical(y)

	w <- data.frame( name_of_person=c("betty","fred","sammy") , high_fiving_ability=c(50,50,100) )

	w

	class(w)

	is.numeric(w)

	w[2,]

	w[,2]

	w$high_fiving_ability

	w[,"high_fiving_ability"]

	w[3,2]

	w[2,3]

	is.numeric(w[,2])

	is.numeric(w[2,])

	nrow(w)

	ncol(w)

	names(w)

	rownames(w)

	colnames(w)

	names(w)[1]

	names(w)[2]

	length(w[,2])

	z <- w[2:3,]

	z

	z <- nrow(w)

	savehistory("C:\\Users\\AnthonyD\\Documents\\example 01.Rhistory")

	#make a 3 x 5 data table in m. 3 columns and 5 rows. the three columns should be a person's name, sex (0 for male, 1 for female), and person's height in inches

	#then take their average height

	#then isolate the data table into another data table - n - of only the females

	#--together we're going to tack on the person's height in centimeters

	#use the transform function
	#and use m[,"cm"] <- m[,"inches"] / 2.3


	#download tax class 1 & 2/3/4 data from http://www.nyc.gov/html/dof/html/property/property_val_valuation.shtml
	x <- read.csv("TC.csv")

	table( x$BORO )

	class(x)

	head(x)

	y <- subset( x , EASE != "" )

	tapply( x$CUR_FV_T , x$BORO , mean )

	summary( x$CUR_FV_T )

	z <- subset( x , CUR_FV_T < 2000000 & GR_SQFT < 10000 )

	plot( z$CUR_FV_T , z$GR_SQFT )

	boxplot( z$CUR_FV_T ~ z$BORO )


	#for loop to separate land area by tax classes
	x <- transform( x , TXCL_1 = substr( TXCL , 1 , 1 ) )

	#check tax class recoding worked properly
	table( x$TXCL , x$TXCL_1 )

	#print the number of properties by tax class, 1-4
	for ( j in 1:4 ){
	print( nrow( subset( x , TXCL_1 == j ) ) )
	}

	#same loop
	for ( j in unique(x$TXCL_1) ){
	print( j )
	print( nrow( subset( x , TXCL_1 == j ) ) )
	}

	#for loop to create new table
	date_built <- data.frame( value_increment = NULL , average_year_built=NULL )
	for (i in 1:20){
	z <- subset( x , CUR_FV_T >= (i-1)100000 & CUR_FV_T < i100000 & YRB > 1800 )
	date_built[i,"value_increment"] <- i*100000
	date_built[i,"average_year_built"] <- mean(z$YRB)
	}

	#glm
	attach(x)
	glm( CUR_FV_T ~ factor(BORO) + GR_SQFT + factor(TXCL) )

	#download rolling sales data from http://www.nyc.gov/html/dof/html/property/property_val_sales.shtml
	#merge on other data sets
	library(gdata)
	queens <- read.xls("rollingsales_queens.xls",skip=4)

	names(queens)[1] <- "BORO"
	TC_queens <- merge( x , queens , by=c("BORO","BLOCK","LOT") , all.y=T )

	nrow(queens)
	nrow(TC_queens)

	#sql
	library(sqldf)
	a <- sqldf("select BORO , BLOCK, LOT , count(*) as count from x where BORO==4 group by BORO, BLOCK, LOT having count>1")
	unique_boroughs <- sqldf("select distinct BORO from x")