Time Series
#use "Utils.ml";; | |
#use "TimeSeriesWithRecords.ml";; | |
#use "TimeSeriesWithModules.ml";; | |
let test_data = | |
[| | |
{date = {year = 2019; month = Jan; day = 1}; time = None; value = Some 1.0}; | |
{date = {year = 2019; month = Jan; day = 2}; time = None; value = Some 2.0}; | |
{date = {year = 2019; month = Jan; day = 4}; time = None; value = None}; | |
{date = {year = 2019; month = Jan; day = 6}; time = None; value = Some 3.0}; | |
{date = {year = 2019; month = Jan; day = 8}; time = None; value = Some 3.0} | |
|] | |
(* Record Test *) | |
let ts = build_time_series test_data (Days 1) | |
(* Module Test *) | |
module FloatBuilder (D : TsData with type t = float) : (TimeSeries with type t = float) = | |
struct | |
type t = D.t | |
let data = | |
let data_copy = Array.copy D.data in | |
sort_series data_copy; | |
data_copy | |
let sorted = true | |
let missing_points = | |
is_missing_points { | |
data = data; | |
sorted = true; | |
missing_points = true; | |
interval = D.interval | |
} | |
let interval = D.interval | |
end | |
module TestTsData = | |
struct | |
type t = float | |
let data = test_data | |
let interval = Days 1 | |
end | |
(* data type is abstract *) | |
module Test = Builder (TestTsData) | |
(* data type is concrete *) | |
module FloatTest = FloatBuilder (TestTsData) | |
(* Which is faster? *) | |
let rec call_n_times ?(count = 0) n f = | |
if count < n | |
then (f (); call_n_times ~count:(count + 1) n f) | |
else () | |
let timer n f = | |
let start_time = Unix.gettimeofday () in | |
call_n_times n f; | |
let end_time = Unix.gettimeofday () in | |
end_time -. start_time | |
let build_ts_with_record () = | |
let ts = build_time_series test_data (Days 1) in | |
if ts.sorted | |
then () | |
else () | |
let build_ts_with_module () = | |
let ts = (module FloatBuilder (TestTsData) : TimeSeries) in | |
let module TS = (val ts : TimeSeries) in | |
if TS.sorted | |
then () | |
else () | |
let benchmark n = timer n build_ts_with_record /. timer n build_ts_with_module | |
(* Future TODOs : Which is cheaper, fill missing data, drop None values *) |
module type TimeSeries = | |
sig | |
type t | |
val data : t data_point array | |
val sorted : bool | |
val missing_points : bool | |
val interval : interval | |
end | |
module type TsData = | |
sig | |
type t | |
val data : t data_point array | |
val interval : interval | |
end | |
module Builder (D : TsData) : TimeSeries = | |
struct | |
type t = D.t | |
let data = | |
let data_copy = Array.copy D.data in | |
sort_series data_copy; | |
data_copy | |
let sorted = true | |
let missing_points = | |
is_missing_points { | |
data = data; | |
sorted = true; | |
missing_points = true; | |
interval = D.interval | |
} | |
let interval = D.interval | |
end |
type 'a time_series = { | |
data : 'a data_point array; | |
sorted : bool; | |
missing_points : bool; | |
interval : interval | |
} | |
let mkdate (d : date) = { | |
Unix.tm_sec = 0; | |
tm_min = 0; | |
tm_hour = 0; | |
tm_mday = d.day; | |
tm_mon = int_of_month d.month; | |
tm_year = d.year; | |
tm_wday = 0; | |
tm_yday = 0; | |
tm_isdst = true | |
} | |
|> Unix.mktime | |
let make_next_date interval current_date = | |
let open Unix in | |
let next_dt = | |
localtime ( | |
(fst @@ mkdate current_date) +. | |
float_of_int (seconds_of_interval interval) | |
) | |
in | |
{ | |
year = next_dt.tm_year; | |
month = month_of_int next_dt.tm_mon; | |
day = next_dt.tm_mday | |
} | |
(* NOTE: This function assumes that data is always sorted *) | |
let is_missing_points time_series = | |
(* TODO: Raise an exception here if the data is not already sorted! *) | |
let rec run prior_date remaining_points = | |
match Array.length remaining_points with | |
| 0 -> false | |
| _ -> | |
let next_date = (Array.get remaining_points 0).date in | |
if next_date <> make_next_date time_series.interval prior_date | |
then true | |
else run next_date (Array.sub remaining_points 1 (Array.length remaining_points - 1)) | |
in | |
run | |
(Array.get time_series.data 0).date | |
(Array.sub time_series.data 1 (Array.length time_series.data - 1)) | |
let build_time_series data interval = | |
let data_copy = Array.copy data in | |
sort_series data_copy; | |
let missing_points = | |
is_missing_points { | |
data = data; | |
sorted = true; | |
missing_points = true; | |
interval = interval | |
} | |
in | |
{ | |
data = data_copy; | |
sorted = true; | |
missing_points = missing_points; | |
interval = interval | |
} |
exception InvalidMonth of int | |
exception SortError of string | |
exception CustomException of string | |
type month = | |
| Jan | |
| Feb | |
| Mar | |
| Apr | |
| May | |
| Jun | |
| Jul | |
| Aug | |
| Sep | |
| Oct | |
| Nov | |
| Dec | |
type interval = | |
| Seconds of int | |
| Minutes of int | |
| Hours of int | |
| Days of int | |
type date = { | |
year : int; | |
month : month; | |
day : int; | |
} | |
type time = { | |
hour : int; | |
min : int; | |
sec : int; | |
} | |
type 'a data_point = { | |
date : date; | |
time : time option; | |
value : 'a option | |
} | |
let month_of_int = function | |
| 0 -> Jan | |
| 1 -> Feb | |
| 2 -> Mar | |
| 3 -> Apr | |
| 4 -> May | |
| 5 -> Jun | |
| 6 -> Jul | |
| 7 -> Aug | |
| 8 -> Sep | |
| 9 -> Oct | |
| 10 -> Nov | |
| 11 -> Dec | |
| _ as m -> raise (InvalidMonth m) | |
let int_of_month = function | |
| Jan -> 0 | |
| Feb -> 1 | |
| Mar -> 2 | |
| Apr -> 3 | |
| May -> 4 | |
| Jun -> 5 | |
| Jul -> 6 | |
| Aug -> 7 | |
| Sep -> 8 | |
| Oct -> 9 | |
| Nov -> 10 | |
| Dec -> 11 | |
let string_of_month = function | |
| Jan -> "Jan" | |
| Feb -> "Feb" | |
| Mar -> "Mar" | |
| Apr -> "Apr" | |
| May -> "May" | |
| Jun -> "Jun" | |
| Jul -> "Jul" | |
| Aug -> "Aug" | |
| Sep -> "Sep" | |
| Oct -> "Oct" | |
| Nov -> "Nov" | |
| Dec -> "Dec" | |
let string_of_date d = | |
(string_of_int d.year) ^ "-" ^ | |
(string_of_month d.month) ^ "-" ^ | |
(string_of_int d.day) | |
let short_string_of_date d = | |
(string_of_month d.month) ^ "/" ^ | |
(string_of_int d.day) | |
let seconds_of_interval = function | |
| Seconds s -> s | |
| Minutes m -> 60 * m | |
| Hours h -> 3_600 * h | |
| Days d -> 86_400 * d | |
let time_compare a b = | |
match a.time, b.time with | |
| None, _ | |
| _, None -> raise (CustomException "Cannot compare Some time and None") | |
| None, None -> 0 | |
| Some x, Some y -> | |
if x.hour < y.hour | |
then -1 | |
else ( | |
if x.hour > y.hour | |
then 1 | |
else ( | |
if x.min < y.min | |
then -1 | |
else ( | |
if x.min > y.min | |
then 1 | |
else ( | |
if x.sec < y.sec | |
then -1 | |
else ( | |
if x.sec > y.sec | |
then 1 | |
else 0 | |
) | |
) | |
) | |
) | |
) | |
let date_compare x y = | |
if x.date.year < y.date.year | |
then -1 | |
else ( | |
if x.date.year > y.date.year | |
then 1 | |
else ( | |
if x.date.month < y.date.month | |
then -1 | |
else ( | |
if x.date.month > y.date.month | |
then 1 | |
else ( | |
if x.date.day < y.date.day | |
then -1 | |
else ( | |
if x.date.day > y.date.day | |
then 1 | |
else time_compare x y | |
) | |
) | |
) | |
) | |
) | |
let sort_series data = | |
Array.fast_sort (fun x y -> date_compare x y) data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment