Skip to content

Instantly share code, notes, and snippets.

@flare9x
Last active October 29, 2019 15:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save flare9x/5022df0f1740201bcaebbbbf6c74104f to your computer and use it in GitHub Desktop.
Save flare9x/5022df0f1740201bcaebbbbf6c74104f to your computer and use it in GitHub Desktop.
Extract prospect list from gosection8.com (copy and paste as excel .csv) sep=","
# gosection8.com Prospects
# Andrew Bannerman - 8/13/2019
# 10/29/2019 - added histogram of voucher holder per month approval
library(stringr)
# Read files
data_dir <- "C:/Users/Andrew.Bannerman/Desktop/Real Estate/Real Estate Portfolio/Galveston section8 prospects/p"
data_list = list()
i=1
for (i in 1:5) {
temp = read.csv(paste0(data_dir,i,".csv"),stringsAsFactors = FALSE, header=F)
str(temp)
col_nums = length(colnames(temp))
if (col_nums == 2) {
temp_df = data.frame(paste(temp$V1,temp$V2),stringsAsFactors = FALSE)
names = "one"
colnames(temp_df) = names
} else if (col_nums == 3) {
temp_df = data.frame(paste(temp$V1,temp$V2),stringsAsFactors = FALSE)
names = "one"
colnames(temp_df) = names
} else if (col_nums == 1) {
temp_df = data.frame(temp,stringsAsFactors = FALSE)
names = "one"
colnames(temp_df) = names
}
data_list[[i]] = temp_df
cat("\n this is iteration",i)
}
# make one data frame
all_data = as.data.frame(do.call(rbind,data_list))
all_data = data.frame(all_data[!apply(all_data == " ", 1, all),])
city_index = seq(4,nrow(all_data),10)
voucher_price_index = seq(6,nrow(all_data),10)
bedrooms_index = seq(5,nrow(all_data),10)
# Extract City Names
city_extract = list()
i=1
for(i in 1:length(city_index)) {
city_extract[[i]] = paste(all_data[city_index[i],1])
}
city = do.call(rbind,city_extract)
# Extract voucher prices
voucher_extract = list()
i=1
for(i in 1:length(voucher_price_index)) {
temp = str_extract_all(all_data[voucher_price_index[i],1], "\\d+")[[1]]
voucher_extract[[i]] = as.numeric(paste(temp, collapse=""))
}
voucher = do.call(rbind,voucher_extract)
# Extract bedrooms
bedroom_extract = list()
j=1
for(j in 1:length(bedrooms_index)) {
temp = paste(all_data[bedrooms_index[j],1])
temp = str_extract_all(temp, "\\d+")[[1]]
temp = as.numeric(paste(temp, collapse=""))
if (nchar(temp) == 2) {
strings = substring(temp, 1:2, 1:2)
bedroom_extract[[j]] = as.numeric(strings[1]) # pick lower bedroom limit
} else if (nchar(temp) == 1) {
strings = substring(temp, 1:1, 1:1)
bedroom_extract[[j]] = as.numeric(strings[1])
}
# strings = substring(temp, 1:1, 1:1)
#bedroom_extract[[i]] = paste(all_data[bedrooms_index[i],1])
}
bedrooms = do.call(rbind,bedroom_extract)
# all data
all_df = data.frame(city,voucher,bedrooms)
names = c("city","voucher_amount","bedrooms")
colnames(all_df) = names
# Average voucher price per bedroom
bedrooms = 1:6
average_voucher_per_bdrm = list()
min_rent_per_bdrm = list()
max_rent_per_bdrm = list()
sd_rent_per_bdrm = list()
for (i in 1:6) {
i=1
average_voucher_per_bdrm[[i]] = mean(all_df[all_df$bedrooms == i, "voucher_amount"], na.rm = TRUE)
min_rent_per_bdrm[[i]] = min(all_df[all_df$bedrooms == i, "voucher_amount"], na.rm = TRUE)
max_rent_per_bdrm[[i]] = max(all_df[all_df$bedrooms == i, "voucher_amount"], na.rm = TRUE)
sd_rent_per_bdrm[[i]] = sd(all_df[all_df$bedrooms == i, "voucher_amount"], na.rm = TRUE)
}
# Histograms of data
bed_1_hist_df = all_df[all_df$bedrooms == 1, "voucher_amount"]
bed_2_hist_df = all_df[all_df$bedrooms == 2, "voucher_amount"]
bed_3_hist_df = all_df[all_df$bedrooms == 3, "voucher_amount"]
bed_4_hist_df = all_df[all_df$bedrooms == 4, "voucher_amount"]
bed_5_hist_df = all_df[all_df$bedrooms == 5, "voucher_amount"]
bed_6_hist_df = all_df[all_df$bedrooms == 6, "voucher_amount"]
no_obs = length(bed_3_hist_df)
h <- hist(bed_3_hist_df,breaks=25)
hist(bed_3_hist_df,
main="Voucher Amount - 3 bedroom Histogram - Total Observations = 87",
xlab="Section 8 Approved Rent Per Month",
#xlim=c(min(bed_3_hist_df),max(bed_3_hist_df)),
col="red",
freq=T,
labels = TRUE,
ylim=c(0, 20),
xaxt='n',
breaks=25
)
axis(side=1, at=seq(min(bed_3_hist_df),max(bed_3_hist_df), 50), labels=seq(min(bed_3_hist_df),max(bed_3_hist_df), 50))
histogram_df = data.frame(h$counts,h$mids)
# make df
bedroom_mean_price_mo = as.data.frame(round(do.call(rbind,average_voucher_per_bdrm),digits=2))
bedroom_min_price_mo = as.data.frame(round(do.call(rbind,min_rent_per_bdrm),digits=2))
bedroom_max_price_mo = as.data.frame(round(do.call(rbind,max_rent_per_bdrm),digits=2))
sd_price_mo = as.data.frame(round(do.call(rbind,sd_rent_per_bdrm),digits=2))
bedroom_row_names = c("1 Bedroom","2 Bedroom","3 Bedroom","4 Bedroom","5 Bedroom","6 Bedroom")
all_stats = data.frame(bedroom_row_names,bedroom_mean_price_mo, sd_price_mo, bedroom_min_price_mo, bedroom_max_price_mo)
names = c("Bedroom Number","Mean Voucher Per Month","Standard Deviation","Min Price","Max Price")
colnames(all_stats) = names
# Plot
library(gridExtra)
grid.table(all_stats)
graphics.off()
# Count total number bedrooms voucher holder approved for
bedrooms = 1:6
total_approved_bedroom = list()
for (i in 1:6) {
total_approved_bedroom[[i]] = sum(all_df$bedrooms == i)
}
# make df
total_approved = as.data.frame(do.call(rbind,total_approved_bedroom))
# Plots
city_name = "Galveston"
barplot(bedroom_mean_price_mo$V1, names.arg = bedrooms,col = "grey", xlab="Bedroom Number", ylab="Rent Per Month", yaxt = "n", main=paste("All gosection 8 prospects ",city_name,", TX - Per Bedroom Number - Average Voucher Price Per Month"), cex.main = 1)
axis(2, at=seq(0,max(bedroom_mean_price_mo$V1, na.rm=T),100))
barplot(total_approved$V1, names.arg = bedrooms,col = "grey", xlab="Total Units - Bedroom Size", ylab="Total Stock", yaxt = "n", main=paste("All gosection 8 prospects ",city_name,", TX - Total Approved Voucher Per Bedroom Size"), cex.main = 1)
axis(2, at=seq(0,max(total_approved$V1, na.rm=T),1))
# Export csv
write.csv(all_df,"C:/Users/Andrew.Bannerman/Desktop/Real Estate/Real Estate Portfolio/Galveston section8 prospects/out.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment