-
-
Save artemklevtsov/6b39f44fa2745cf970770cc0b9445693 to your computer and use it in GitHub Desktop.
#' @title Floor Dates | |
#' @param x A vector of date. | |
#' @param unit A character string specifying a time unit. | |
#' @param start.on.monday Should the week start on Mondays or Sundays? | |
#' @return An object of class "Date". | |
floor_date <- function(x, unit = c("day", "week", "month", "quarter", "year"), start.on.monday = TRUE) { | |
stopifnot(is(x, "Date")) | |
unit <- match.arg(unit) | |
if (unit == "day") { | |
return(x) | |
} | |
if (unit == "week") { | |
l <- ((unclass(ll) - 3L) %/% 7L) * 7L + 4L | |
if (start.on.monday) { | |
return(.Date(7 * ((unclass(x) - 4L) %/% 7) + 4L)) | |
} else { | |
return(.Date(7 * ((unclass(x) - 3L) %/% 7) + 3L)) | |
} | |
} else { | |
l <- as.POSIXlt(x) | |
l <- switch( | |
unit, | |
month = l$mday, | |
quarter = l$mon %/% 3L, | |
year = l$year | |
) | |
} | |
return(x - l + 1L) | |
} |
It provides a different results:
> head(.Date(7 * (unclass(ll) %/% 7)), 20)
[1] "1970-01-01" "1970-01-01" "1970-01-01" "1970-01-01" "1970-01-01" "1970-01-01" "1970-01-01" "1970-01-08"
[9] "1970-01-08" "1970-01-08" "1970-01-08" "1970-01-08" "1970-01-08" "1970-01-08" "1970-01-15" "1970-01-15"
[17] "1970-01-15" "1970-01-15" "1970-01-15" "1970-01-15"
> head(floor_date(ll, "week"), 20)
[1] "1969-12-29" "1969-12-29" "1969-12-29" "1969-12-29" "1970-01-05" "1970-01-05" "1970-01-05" "1970-01-05"
[9] "1970-01-05" "1970-01-05" "1970-01-05" "1970-01-12" "1970-01-12" "1970-01-12" "1970-01-12" "1970-01-12"
[17] "1970-01-12" "1970-01-12" "1970-01-19" "1970-01-19"
sorry I was being imprecise... since Jan 1 1970 is I think a Wednesday you'd need to include an offset after unclass
That's right, but:
> head((unclass(ll) - 3L) %/% 7, 20)
[1] -1 -1 -1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2
> head(unclass(ll) - as.POSIXlt(ll)$wday, 20)
[1] -4 -4 -4 3 3 3 3 3 3 3 10 10 10 10 10 10 10 17 17 17
How can we get it?
Found:
> head(unclass(ll) - as.POSIXlt(ll)$wday, 20)
[1] -4 -4 -4 3 3 3 3 3 3 3 10 10 10 10 10 10 10 17 17 17
> head(((unclass(ll) - 3L) %/% 7L) * 7L + 3L, 20)
[1] -4 -4 -4 3 3 3 3 3 3 3 10 10 10 10 10 10 10 17 17 17
yea depends on trunc to monday or sunday, I see
all.equal(
lubridate::floor_date(ll, "week", 1),
.Date(7*((unclass(ll) - 4L) %/% 7) + 4L)
)
all.equal(
lubridate::floor_date(ll, "week"),
.Date(7*((unclass(ll) - 3L) %/% 7) + 3L)
)
There should be a way to get that number and make your floor_date
about to match lubridate::floor_date
completely 👍
I see this on the benchmark:
library(lubridate)
st <- as.Date("1970-01-01")
en <- as.Date("2020-03-15")
ll <- seq.Date(st, en, by = "day")
base_floor_date <- function(x, unit = c("day", "week", "month", "quarter", "year"), start.on.monday = TRUE) {
stopifnot(is(x, "Date"))
unit <- match.arg(unit)
if (unit == "day") {
return(x)
}
if (unit == 'week') {
offset = 3L + start.on.monday
return(.Date(7L*((unclass(x) - offset) %/% 7L) + offset))
}
l <- as.POSIXlt(x)
l <- switch(
unit,
week = l$wday,
month = l$mday,
quarter = l$mon %/% 3L,
year = l$year
)
if (unit == "week" && start.on.monday) {
l <- replace(l, l == 0L, 7L)
}
return(x - l + 1L)
}
bench::mark(
floor_date(ll, "week", 1),
as.Date(cut.Date(ll, "week")),
base_floor_date(ll, "week")
)
# A tibble: 3 x 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int>
1 floor_date(ll, "week", 1) 4.64ms 5.12ms 181. 2.45MB 9.17 79
2 as.Date(cut.Date(ll, "week")) 63.35ms 67.93ms 12.3 5.52MB 2.04 6
3 base_floor_date(ll, "week") 281.35µs 325.04µs 2867. 280.7KB 6.60 1304
# … with 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
# memory <list>, time <list>, gc <list>
So, now it's perfect.
> bench::mark(
+ lubridate::floor_date(ll, "week", 1),
+ as.Date(cut.Date(ll, "week")),
+ floor_date(ll, "week", TRUE),
+ floor_date_(ll, "week", TRUE)
+ )
# A tibble: 4 x 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list>
1 lubridate::floor_date(ll, "week", 1) 4.51ms 4.65ms 213. NA 10.8 99 5 464ms <date…
2 as.Date(cut.Date(ll, "week")) 46.58ms 47.04ms 21.3 NA 2.13 10 1 470ms <date…
3 floor_date(ll, "week", TRUE) 1.27ms 1.3ms 756. NA 21.7 349 10 462ms <date…
4 floor_date_(ll, "week", TRUE) 772.06µs 783.96µs 1195. NA 4.53 528 2 442ms <date…
# … with 3 more variables: memory <list>, time <list>, gc <list>
> bench::mark(
+ lubridate::floor_date(ll, "week", 7),
+ floor_date(ll, "week", FALSE),
+ floor_date_(ll, "week", FALSE)
+ )
# A tibble: 3 x 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list>
1 lubridate::floor_date(ll, "week", 7) 4.5ms 4.9ms 201. NA 10.9 92 5 457ms <date…
2 floor_date(ll, "week", FALSE) 1.24ms 1.29ms 717. NA 17.1 336 8 469ms <date…
3 floor_date_(ll, "week", FALSE) 771.52µs 787.64µs 1168. NA 6.87 510 3 437ms <date…
# … with 3 more variables: memory <list>, time <list>, gc <list>
@MichaelChirico thank you for the notes. I hope we will look something like that in data.table
.
🚀 i wonder lubridate
would accept a PR... I don't see mem_alloc
on your benchmark, but on mine it's 5x faster & 10x less memory... sacrifice is slight readability issue on internal code? can be solved with comments ideally
Strange to me there's no trunc.Date('week')
method in base
. Anyway, we might consider supporting that in trunc.IDate
if you could please file PR 🙏
lubridate
wants stringr
...
Anyway, we might consider supporting that in trunc.IDate if you could please file PR pray
Good idea. I'll do it.
for weeks, can't we just do .Date(7*(unclass(x) %/% 7))?