This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def parse_main_page(self, response): | |
ids = response.xpath('//*[@class="row"]/@data-pid').extract() | |
for id in ids: | |
link = 'https://newyork.craigslist.org/stn/cto/' + str(id) + '.html' | |
yield Request(link, callback=self.parse_detail_page) | |
def parse_detail_page(self, response): | |
price = response.xpath('//*[@class = "price"]/text()').extract()[0] | |
title = response.xpath('//*[@ id = "titletextonly"]/text()').extract()[0] | |
post_time=response.xpath('//*[@id = "pagecontainer"]/section/section/div[2]/p[2]/time/text()').extract()[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scrapy import Spider, Request | |
from scrapy.selector import Selector | |
from demo.items import DemoItem | |
class DemoSpider(Spider): | |
name = 'demo' | |
allowed_urls = ['https://newyork.craigslist.org'] | |
start_urls = ['https://newyork.craigslist.org/search/stn/cto'] | |
def parse(self, response): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
######################################################################################################################### | |
# Summarizing the full df containing more than 22 millions rows by grouping by year,state,gender and then finding the sum | |
######################################################################################################################### | |
home_all_year_borr_male_female_only_count= group_by(home_all_year_borr_male_female_only,Year,US.Postal.code,Borrower.Gender) %>% summarise(Borrower.Gender.count=n()) | |
dim(home_all_year_borr_male_female_only_count) | |
#540 4 !!! That makes perfect sense , 5 years * 54 states * 2 gender |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##################################### | |
#Finally,binding all rows from years 2014 t0 2010 together | |
######################################### | |
home_all_year_male_female=rbind(home_2014_male_female,home_2013_male_female,home_2012_male_female,home_2011_male_female,home_2010_male_female) | |
dim(home_all_year_male_female) | |
#22298905 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################### | |
#####Checking Missing Data##### | |
################################## | |
#Checking how many US_Postal_codes have 00 | |
sum(home_FNM_2014$V3 == 00) | |
# 2014 Fannie Mac have 171 rows whose state code is 00 | |
# Deleting those rows | |
fnm_rows_0=which(home_FNM_2014$V3== 0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################### | |
#Reading the tables | |
######################################################## | |
# 2014 Fannie Mae File, Enterprise Code is 1 | |
home_FNM_2014=read.table("~/Documents/shiny_project/home_all_year/2014_SFCensusTractFNM2014/fnma_sf2014c_loans.txt",header=FALSE,sep="") | |
dim(home_FNM_2014) | |
#1899729 39 | |
#2014 Freddie Mac File, Enterprise Code is 2 | |
home_FRE_2014=read.table("~/Documents/shiny_project/home_all_year/2014_SFCensusTractFRE2014/fhlmc_sf2014c_loans.txt",header=FALSE,sep="") | |
dim(home_FRE_2014) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##Calculating monthly default rate from April 2005 to September 2005 | |
default_rate=vector(length=6) | |
names(default_rate)= c("April 2005","May 2005","June 2005","July 2005","August 2005","September 2005") | |
c("April 2005","May 2005","June 2005","July 2005","August 2005","September 2005") | |
default_rate[6]= percent(nrow(filter(credi_tbl_temp,PAY_0 > 2 ))/total_rows) | |
default_rate[5]= percent(nrow(filter(credi_tbl_temp,PAY_2 > 2 ))/total_rows) | |
default_rate[4]= percent(nrow(filter(credi_tbl_temp,PAY_3 > 2 ))/total_rows) | |
default_rate[3]= percent(nrow(filter(credi_tbl_temp,PAY_4 > 2 ))/total_rows) | |
default_rate[2]= percent(nrow(filter(credi_tbl_temp,PAY_5 > 2 ))/total_rows) | |
default_rate[1]= percent(nrow(filter(credi_tbl_temp,PAY_6 > 2 ))/total_rows) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Changing Marital status numerical values to facors | |
married_vec=c(1,2,3) | |
married_desc=c("married","single","others") | |
credi_tbl_temp$MARRIAGE=factor(x=credit_tbl$MARRIAGE,levels=married_vec,labels=married_desc) | |
# Crearing Marrital Status bar chart with fill | |
m=ggplot(data = credi_tbl_temp, aes(x = default.payment.next.month)) + | |
geom_bar(aes (fill = MARRIAGE), position = "fill") + ggtitle("Marital Status of Default Vs. Non Default")+ xlab("") | |
m |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Violin Plot | |
p= ggplot(data = credi_tbl_temp, aes(x = default.payment.next.month,y=AGE)) + | |
geom_violin(aes(fill =default.payment.next.month ))+ ggtitle("Age Profile of Default Payment Vs.Non Default")+xlab("") | |
p | |
# Creating Density plots | |
g4= g + geom_density(aes(color = default.payment.next.month ))+ggtitle("Density Vs. Age Profile")+ylab("Denisty") | |
g4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Creating Continuency table for sex | |
#filter(credi_tbl_temp,default.payment.next.month==1) %>% group_by(SEX) %>% summarise(count(SEX)) | |
table(credi_tbl_temp,credi_tbl_temp$SEX) |
NewerOlder