Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save timelyportfolio/5008182 to your computer and use it in GitHub Desktop.
Save timelyportfolio/5008182 to your computer and use it in GitHub Desktop.
require(latticeExtra)
require(Hmisc)
require(reshape2)
require(xts)
loadfrench <- function(zipfile, txtfile, skip, nrows) {
#my.url will be the location of the zip file with the data
my.url=paste("http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/",zipfile,".zip",sep="")
#this will be the temp file set up for the zip file
my.tempfile<-paste(tempdir(),"\\frenchzip.zip",sep="")
#my.usefile is the name of the txt file with the data
my.usefile<-paste(tempdir(),"\\",txtfile,".txt",sep="")
download.file(my.url, my.tempfile, method="auto",
quiet = FALSE, mode = "wb",cacheOK = TRUE)
unzip(my.tempfile,exdir=tempdir(),junkpath=TRUE)
#read space delimited text file extracted from zip
french <- read.table(file=my.usefile,
header = FALSE, sep = "", fill=TRUE, #add fill = true to handle bad data
as.is = FALSE ,
skip = skip, nrows=nrows)
#get dates ready for xts index
datestoformat <- french[,1]
datestoformat <- paste(substr(datestoformat,1,4),
"12","31",sep="-")
#get xts for analysis
#unfortunately the last percentile in 1942 is not separated by a space so we will delete last two columns
french_xts <- as.xts(french[,1:(NCOL(french)-2)],
order.by=as.Date(datestoformat))
#delete missing data which is denoted by -0.9999
french_xts[which(french_xts < -0.99,arr.ind=TRUE)[,1],
unique(which(french_xts < -0.99,arr.ind=TRUE)[,2])] <- 0
#divide by 100 to get percent
french_xts <- french_xts/100
return(french_xts)
}
filenames <- c("BE-ME_Breakpoints")
BE_ME = loadfrench(zipfile=filenames[1],txtfile=filenames[1],skip=3,nrows=87)
#first column is year which we can remove
#columns 2 and 3 are counts for positive and negative which we will remove
BE_ME = BE_ME[,4:NCOL(BE_ME)]
colnames(BE_ME) <- paste(5*0:(NCOL(BE_ME)-1),"pctile",sep="")
#kind of normalize data by dividing each percentile by the percentile mean
BE_ME.adj <- BE_ME/matrix(rep(apply(BE_ME,MARGIN=2,FUN=mean),times=NROW(BE_ME)),
ncol=NCOL(BE_ME),byrow=TRUE)-1
BE_ME.adj.df <- as.data.frame(cbind(as.numeric(format(as.Date(index(BE_ME.adj)),"%Y")),coredata(BE_ME.adj)))
BE_ME.adj.melt <- melt(BE_ME.adj.df,id.vars=1)
#add column for the decade so we can use in plots
BE_ME.adj.melt[,4] <- paste(substr(BE_ME.adj.melt[,1],1,3),"0",sep="")
colnames(BE_ME.adj.melt) <- c("Year","Pctile","value","Decade")
#good way to get decent colors
#stole from http://timotheepoisot.fr/2013/02/17/stacked-barcharts/
pal = colorRampPalette(brewer.pal(5,'Paired'))(20)
p1<-Ecdf(~value|Decade,groups=Year%%10,col=pal[seq(1,20,by=2)],data=BE_ME.adj.melt, #data=BE_ME.adj.melt[which(BE_ME.adj.melt[,"Year"] %% 2 == 0),],
label.curves=TRUE,
layout=c(1,10),
strip=FALSE,strip.left=strip.custom(bg="grey70"),
scales=list(x=list(tck=c(1,0)),y=list(alternating=0,tck=c(0,0))),
ylab=NULL,
xlab="BE_ME/percentile mean",
main=" ") +
layer(panel.abline(v=0, col="grey50"))
p2<-
dotplot(factor(Year)~value|Decade,col=pal[seq(1,20,by=2)],data=BE_ME.adj.melt, #data=BE_ME.adj.melt[which(BE_ME.adj.melt[,"Year"] %% 2 == 0),],
pch=19,
cex=0.6,
strip=FALSE,strip.left=strip.custom(bg="grey70"),
scales=list(x=list(tck=c(1,0)),y=list(relation="free",draw=FALSE)),
layout=c(1,10),
xlab="BE_ME/percentile mean",
main="Kenneth French BE_ME Percentile Breakpoints") +
layer(panel.abline(v=0, col="grey50")) #+
#layer(panel.abline(v=0.25, col="darkolivegreen4")) +
#layer(panel.abline(v=-0.25, col="indianred4"))
#side by side
print(p2,position=c(0,0.015,0.5,1),more=TRUE)
print(p1,position=c(0.45,0.015,1,1))
grid.text("Dot Plot by Year by Decade",x=unit(0.1,"npc"),y=unit(0.96,"npc"),hjust=0)
grid.text("Cumulative Density by Year by Decade",x=unit(0.55,"npc"),y=unit(0.96,"npc"),hjust=0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment