krisselden/llogis.r

## llogis.r
#!/usr/bin/env Rscript

# real load samples
a <- c(1075, 1043, 964, 965, 1067, 1111, 929, 1016, 983, 998, 995, 994, 995, 1064, 1025, 1078, 987, 998, 992, 1094, 965, 993, 1022, 964, 1002, 926, 1408, 1083, 944, 988, 960, 979, 939, 936, 990, 947, 1037, 1034, 1057, 1095, 1015, 941, 1054, 1050, 1192, 973, 1321, 1051, 974, 906)
b <- c(1023, 1175, 1230, 1055, 1073, 1118, 1030, 1093, 1095, 1040, 1166, 1257, 1151, 1047, 1217, 1247, 1256, 1057, 1152, 1074, 1016, 1040, 1176, 1135, 1003, 1178, 1136, 1402, 1215, 1176, 1107, 1202, 1080, 1068, 1133, 1091, 1041, 1143, 1140, 1156, 1062, 1076, 973, 1048, 1183, 1165, 1246, 1225, 1174, 1063)

# the 3 param log-logistic is a good distribution for simulating typical
# performance samples from page load
dllogis <- function(x, location = 0, scale = 1, shape = 2) {
    ((shape / scale) * (((x - location) / scale)^(shape - 1))) / (
        (1 + ((x - location) / scale)^shape)^2
    )
}

pllogis <- function(q, location = 0, scale = 1, shape = 2) {
  (q - location)^shape / (scale^shape + (q - location)^shape)
}

qllogis <- function(p, location = 0, scale = 1, shape = 2) {
  (scale * (p / (1 - p))^(1 / shape)) + location
}

rllogis <- function(n, location = 0, scale = 1, shape = 2) {
  qllogis(runif(n), location, scale, shape)
}

# estimate starting args from samples
start_args_for_loc <- function(x, location) {
    list(
        location = location,
        scale = exp(mean(log(x - location))),
        # sqrt of 3 divide pi is the scale from normal sd to logistic
        shape = 1 / (sqrt(3) / pi * sd(log(x - location)))
    )
}
get_start_args <- function(x) start_args_for_loc(x, min(x) * 0.95)

library(fitdistrplus)

get_fit <- function(x) {
    # Anderson Darling left tail
    fitdist(
        x, "llogis", "mge",
        start = get_start_args(x), gof = "ADL"
    )
}

theoretical_samples <- function(f) {
    do.call(
        # call rllogis with n and estimated params
        match.fun(paste0("r", f$distname)), c(list(n = f$n), f$estimate)
    )
}

fit_a <- get_fit(a)
fit_b <- get_fit(b)

summary(fit_a)
summary(fit_b)

wilcox.test(a, theoretical_samples(fit_a), conf.int = T)
wilcox.test(b, theoretical_samples(fit_b), conf.int = T)
wilcox.test(a, b, conf.int = T)
wilcox.test(
    theoretical_samples(fit_a),
    theoretical_samples(fit_b),
    conf.int = T
)

denscomp(fit_a, demp = T)
cdfcomp(fit_a, xlogscale = T, ylogscale = T)
qqcomp(fit_a, xlogscale = T, ylogscale = T)
ppcomp(fit_a)

denscomp(fit_b, demp = T)
cdfcomp(fit_b, xlogscale = T, ylogscale = T)
qqcomp(fit_b, xlogscale = T, ylogscale = T)
ppcomp(fit_b)
	#!/usr/bin/env Rscript

	# real load samples
	a <- c(1075, 1043, 964, 965, 1067, 1111, 929, 1016, 983, 998, 995, 994, 995, 1064, 1025, 1078, 987, 998, 992, 1094, 965, 993, 1022, 964, 1002, 926, 1408, 1083, 944, 988, 960, 979, 939, 936, 990, 947, 1037, 1034, 1057, 1095, 1015, 941, 1054, 1050, 1192, 973, 1321, 1051, 974, 906)
	b <- c(1023, 1175, 1230, 1055, 1073, 1118, 1030, 1093, 1095, 1040, 1166, 1257, 1151, 1047, 1217, 1247, 1256, 1057, 1152, 1074, 1016, 1040, 1176, 1135, 1003, 1178, 1136, 1402, 1215, 1176, 1107, 1202, 1080, 1068, 1133, 1091, 1041, 1143, 1140, 1156, 1062, 1076, 973, 1048, 1183, 1165, 1246, 1225, 1174, 1063)

	# the 3 param log-logistic is a good distribution for simulating typical
	# performance samples from page load
	dllogis <- function(x, location = 0, scale = 1, shape = 2) {
	((shape / scale) * (((x - location) / scale)^(shape - 1))) / (
	(1 + ((x - location) / scale)^shape)^2
	)
	}

	pllogis <- function(q, location = 0, scale = 1, shape = 2) {
	(q - location)^shape / (scale^shape + (q - location)^shape)
	}

	qllogis <- function(p, location = 0, scale = 1, shape = 2) {
	(scale * (p / (1 - p))^(1 / shape)) + location
	}

	rllogis <- function(n, location = 0, scale = 1, shape = 2) {
	qllogis(runif(n), location, scale, shape)
	}

	# estimate starting args from samples
	start_args_for_loc <- function(x, location) {
	list(
	location = location,
	scale = exp(mean(log(x - location))),
	# sqrt of 3 divide pi is the scale from normal sd to logistic
	shape = 1 / (sqrt(3) / pi * sd(log(x - location)))
	)
	}
	get_start_args <- function(x) start_args_for_loc(x, min(x) * 0.95)

	library(fitdistrplus)

	get_fit <- function(x) {
	# Anderson Darling left tail
	fitdist(
	x, "llogis", "mge",
	start = get_start_args(x), gof = "ADL"
	)
	}

	theoretical_samples <- function(f) {
	do.call(
	# call rllogis with n and estimated params
	match.fun(paste0("r", f$distname)), c(list(n = f$n), f$estimate)
	)
	}

	fit_a <- get_fit(a)
	fit_b <- get_fit(b)

	summary(fit_a)
	summary(fit_b)

	wilcox.test(a, theoretical_samples(fit_a), conf.int = T)
	wilcox.test(b, theoretical_samples(fit_b), conf.int = T)
	wilcox.test(a, b, conf.int = T)
	wilcox.test(
	theoretical_samples(fit_a),
	theoretical_samples(fit_b),
	conf.int = T
	)

	denscomp(fit_a, demp = T)
	cdfcomp(fit_a, xlogscale = T, ylogscale = T)
	qqcomp(fit_a, xlogscale = T, ylogscale = T)
	ppcomp(fit_a)

	denscomp(fit_b, demp = T)
	cdfcomp(fit_b, xlogscale = T, ylogscale = T)
	qqcomp(fit_b, xlogscale = T, ylogscale = T)
	ppcomp(fit_b)