Skip to content

Instantly share code, notes, and snippets.

@andrie
Created April 27, 2016 15:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save andrie/7fd7372d77d1a96ef6129b4c2c7b172c to your computer and use it in GitHub Desktop.
Save andrie/7fd7372d77d1a96ef6129b4c2c7b172c to your computer and use it in GitHub Desktop.
Segmented regression model of CRAN packages
library(segmented)
library(Ecdat)
library(ggplot2)
data("CRANpackages")
str(CRANpackages)
CRANpackages$Version <- as.character(CRANpackages$Version)
CRANpackages <- rbind(CRANpackages,
data.frame(Version = "3.2",
Date = as.Date("2016-04-27"),
Packages = 8329,
Source = "Andrie de Vries"))
# Helper function to compute the inverse of log10(x)
exp10 <- function(x)10^x
# Limit model data to distance from 200m to marathon
# Fit linear model
pkg_data <- within(CRANpackages, {
Date <- as.numeric(Date)
log_Packages <- log(Packages)
})
ggplot(data = CRANpackages, aes(x=Date, y = Packages)) +
geom_smooth(col = "blue") +
geom_point(col = "red") +
theme_minimal(16) +
ggtitle("CRAN packages per release") +
xlab(NULL) +
ylab(NULL)
lfit <- lm(log_Packages ~ Date, data = pkg_data)
summary(lfit)
# Fit segmented model with single break point
sfit_1 <- segmented(lfit, seg.Z = ~ Date)
sfit_2 <- segmented(lfit, seg.Z = ~ Date,
psi = list(Date = as.Date(c("2007-01-01", "2011-01-01")))
)
# Helper function for plotting
plot_segmented <- function(sfit, title){
bpoints <- as.Date1970(sfit$psi[, 2])
newx <- c(as.Date("2002-01-01"), bpoints, as.Date("2016-04-01"))
newy <- exp(predict(sfit, data.frame(Date = as.numeric(newx))))
newy
exp(newy)
ggplot(data = CRANpackages, aes(x=Date, y = Packages)) +
geom_path(data = data.frame(Date = newx, Packages = newy), col = "blue", size = 1) +
geom_point(col = "red") +
geom_vline(data = data.frame(Date = bpoints),
aes(xintercept = as.numeric(Date)),
colour = "grey80",
size = 2
) +
scale_y_log10(lim = c(100, 10000)) +
theme_minimal(16) +
ggtitle(title) +
xlab(NULL) +
ylab(NULL)
}
plot_segmented(sfit_1, title = "CRAN packages - segmented model with 1 break point")
plot_segmented(sfit_2, title = "CRAN packages - segmented model with 2 break points")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment