Skip to content

Instantly share code, notes, and snippets.

@ThomasBrittain
Last active December 12, 2019 01:31
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ThomasBrittain/c930febaa60e3cd286420b71cd94bb76 to your computer and use it in GitHub Desktop.
Save ThomasBrittain/c930febaa60e3cd286420b71cd94bb76 to your computer and use it in GitHub Desktop.
Time Series
#use "Utils.ml";;
#use "TimeSeriesWithRecords.ml";;
#use "TimeSeriesWithModules.ml";;
let test_data =
[|
{date = {year = 2019; month = Jan; day = 1}; time = None; value = Some 1.0};
{date = {year = 2019; month = Jan; day = 2}; time = None; value = Some 2.0};
{date = {year = 2019; month = Jan; day = 4}; time = None; value = None};
{date = {year = 2019; month = Jan; day = 6}; time = None; value = Some 3.0};
{date = {year = 2019; month = Jan; day = 8}; time = None; value = Some 3.0}
|]
(* Record Test *)
let ts = build_time_series test_data (Days 1)
(* Module Test *)
module FloatBuilder (D : TsData with type t = float) : (TimeSeries with type t = float) =
struct
type t = D.t
let data =
let data_copy = Array.copy D.data in
sort_series data_copy;
data_copy
let sorted = true
let missing_points =
is_missing_points {
data = data;
sorted = true;
missing_points = true;
interval = D.interval
}
let interval = D.interval
end
module TestTsData =
struct
type t = float
let data = test_data
let interval = Days 1
end
(* data type is abstract *)
module Test = Builder (TestTsData)
(* data type is concrete *)
module FloatTest = FloatBuilder (TestTsData)
(* Which is faster? *)
let rec call_n_times ?(count = 0) n f =
if count < n
then (f (); call_n_times ~count:(count + 1) n f)
else ()
let timer n f =
let start_time = Unix.gettimeofday () in
call_n_times n f;
let end_time = Unix.gettimeofday () in
end_time -. start_time
let build_ts_with_record () =
let ts = build_time_series test_data (Days 1) in
if ts.sorted
then ()
else ()
let build_ts_with_module () =
let ts = (module FloatBuilder (TestTsData) : TimeSeries) in
let module TS = (val ts : TimeSeries) in
if TS.sorted
then ()
else ()
let benchmark n = timer n build_ts_with_record /. timer n build_ts_with_module
(* Future TODOs : Which is cheaper, fill missing data, drop None values *)
module type TimeSeries =
sig
type t
val data : t data_point array
val sorted : bool
val missing_points : bool
val interval : interval
end
module type TsData =
sig
type t
val data : t data_point array
val interval : interval
end
module Builder (D : TsData) : TimeSeries =
struct
type t = D.t
let data =
let data_copy = Array.copy D.data in
sort_series data_copy;
data_copy
let sorted = true
let missing_points =
is_missing_points {
data = data;
sorted = true;
missing_points = true;
interval = D.interval
}
let interval = D.interval
end
type 'a time_series = {
data : 'a data_point array;
sorted : bool;
missing_points : bool;
interval : interval
}
let mkdate (d : date) = {
Unix.tm_sec = 0;
tm_min = 0;
tm_hour = 0;
tm_mday = d.day;
tm_mon = int_of_month d.month;
tm_year = d.year;
tm_wday = 0;
tm_yday = 0;
tm_isdst = true
}
|> Unix.mktime
let make_next_date interval current_date =
let open Unix in
let next_dt =
localtime (
(fst @@ mkdate current_date) +.
float_of_int (seconds_of_interval interval)
)
in
{
year = next_dt.tm_year;
month = month_of_int next_dt.tm_mon;
day = next_dt.tm_mday
}
(* NOTE: This function assumes that data is always sorted *)
let is_missing_points time_series =
(* TODO: Raise an exception here if the data is not already sorted! *)
let rec run prior_date remaining_points =
match Array.length remaining_points with
| 0 -> false
| _ ->
let next_date = (Array.get remaining_points 0).date in
if next_date <> make_next_date time_series.interval prior_date
then true
else run next_date (Array.sub remaining_points 1 (Array.length remaining_points - 1))
in
run
(Array.get time_series.data 0).date
(Array.sub time_series.data 1 (Array.length time_series.data - 1))
let build_time_series data interval =
let data_copy = Array.copy data in
sort_series data_copy;
let missing_points =
is_missing_points {
data = data;
sorted = true;
missing_points = true;
interval = interval
}
in
{
data = data_copy;
sorted = true;
missing_points = missing_points;
interval = interval
}
exception InvalidMonth of int
exception SortError of string
exception CustomException of string
type month =
| Jan
| Feb
| Mar
| Apr
| May
| Jun
| Jul
| Aug
| Sep
| Oct
| Nov
| Dec
type interval =
| Seconds of int
| Minutes of int
| Hours of int
| Days of int
type date = {
year : int;
month : month;
day : int;
}
type time = {
hour : int;
min : int;
sec : int;
}
type 'a data_point = {
date : date;
time : time option;
value : 'a option
}
let month_of_int = function
| 0 -> Jan
| 1 -> Feb
| 2 -> Mar
| 3 -> Apr
| 4 -> May
| 5 -> Jun
| 6 -> Jul
| 7 -> Aug
| 8 -> Sep
| 9 -> Oct
| 10 -> Nov
| 11 -> Dec
| _ as m -> raise (InvalidMonth m)
let int_of_month = function
| Jan -> 0
| Feb -> 1
| Mar -> 2
| Apr -> 3
| May -> 4
| Jun -> 5
| Jul -> 6
| Aug -> 7
| Sep -> 8
| Oct -> 9
| Nov -> 10
| Dec -> 11
let string_of_month = function
| Jan -> "Jan"
| Feb -> "Feb"
| Mar -> "Mar"
| Apr -> "Apr"
| May -> "May"
| Jun -> "Jun"
| Jul -> "Jul"
| Aug -> "Aug"
| Sep -> "Sep"
| Oct -> "Oct"
| Nov -> "Nov"
| Dec -> "Dec"
let string_of_date d =
(string_of_int d.year) ^ "-" ^
(string_of_month d.month) ^ "-" ^
(string_of_int d.day)
let short_string_of_date d =
(string_of_month d.month) ^ "/" ^
(string_of_int d.day)
let seconds_of_interval = function
| Seconds s -> s
| Minutes m -> 60 * m
| Hours h -> 3_600 * h
| Days d -> 86_400 * d
let time_compare a b =
match a.time, b.time with
| None, _
| _, None -> raise (CustomException "Cannot compare Some time and None")
| None, None -> 0
| Some x, Some y ->
if x.hour < y.hour
then -1
else (
if x.hour > y.hour
then 1
else (
if x.min < y.min
then -1
else (
if x.min > y.min
then 1
else (
if x.sec < y.sec
then -1
else (
if x.sec > y.sec
then 1
else 0
)
)
)
)
)
let date_compare x y =
if x.date.year < y.date.year
then -1
else (
if x.date.year > y.date.year
then 1
else (
if x.date.month < y.date.month
then -1
else (
if x.date.month > y.date.month
then 1
else (
if x.date.day < y.date.day
then -1
else (
if x.date.day > y.date.day
then 1
else time_compare x y
)
)
)
)
)
let sort_series data =
Array.fast_sort (fun x y -> date_compare x y) data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment