Last active
December 8, 2020 17:06
-
-
Save jrevels/3e55d10c8d1bcea9c1c95d87c3cad899 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using UUIDs, Dates, Arrow, MsgPack | |
MsgPack.msgpack_type(::Type{Nanosecond}) = MsgPack.IntegerType() | |
MsgPack.from_msgpack(::Type{Nanosecond}, x::Integer) = Nanosecond(x) | |
MsgPack.to_msgpack(::MsgPack.IntegerType, x::Nanosecond) = x.value | |
MsgPack.msgpack_type(::Type{UUID}) = MsgPack.StringType() | |
MsgPack.from_msgpack(::Type{UUID}, x::String) = UUID(x) | |
MsgPack.to_msgpack(::MsgPack.StringType, x::UUID) = string(x) | |
struct Signal | |
file_uri::String | |
file_metadata::Union{Nothing,Dict{String,String}} | |
channel_names::Vector{String} | |
start_nanosecond::Nanosecond | |
stop_nanosecond::Nanosecond | |
sample_unit::String | |
sample_resolution_in_unit::Float64 | |
sample_offset_in_unit::Float64 | |
sample_type::String | |
sample_rate::Float64 | |
end | |
struct Annotation | |
value::String | |
start_nanosecond::Nanosecond | |
stop_nanosecond::Nanosecond | |
end | |
struct Recording | |
signals::Dict{String,Signal} | |
annotations::Vector{Annotation} | |
end | |
Arrow.ArrowTypes.registertype!(Annotation, Annotation) | |
Arrow.ArrowTypes.registertype!(Signal, Signal) | |
Arrow.ArrowTypes.registertype!(Recording, Recording) | |
MsgPack.msgpack_type(::Type{Recording}) = MsgPack.StructType() | |
MsgPack.msgpack_type(::Type{Signal}) = MsgPack.StructType() | |
MsgPack.msgpack_type(::Type{Annotation}) = MsgPack.StructType() | |
r = Recording( | |
Dict( | |
"eeg" => Signal( | |
"file://joe/dave/eeg.lpcm.zst", | |
nothing, | |
["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"], | |
Nanosecond(0), | |
Nanosecond(100000), | |
"microvolt", | |
1.0, | |
1.0, | |
"float64", | |
256.0 | |
), | |
"ecg" => Signal( | |
"file://joe/dave/ecg.lpcm.zst", | |
Dict("a" => "absidufbaid", "b" => "adjfhaudi"), | |
["a", "b", "c", "d"], | |
Nanosecond(0), | |
Nanosecond(100000), | |
"microvolt", | |
0.134, | |
1.9875, | |
"int32", | |
128.0 | |
) | |
), | |
[Annotation("1", Nanosecond(0), Nanosecond(10)), Annotation("aksdfhakjsdhfadf", Nanosecond(0), Nanosecond(10))] | |
) | |
nt = (uuids = [uuid4() for _ in 1:10000], recordings = [r for _ in 1:10000]) | |
arrow_io = IOBuffer() | |
Arrow.write(arrow_io, nt) | |
msgpack_io = IOBuffer() | |
MsgPack.pack(msgpack_io, nt) | |
read_arrow_fully_strict(io) = (t = Arrow.Table(io); (uuids=collect(t[1]), recordings=collect(t[2]))) | |
read_msgpack_fully_strict(io) = MsgPack.unpack(io, NamedTuple{(:uuids, :recordings),Tuple{Vector{UUID},Vector{Recording}}}; strict=(Recording,)) | |
@time begin seekstart(msgpack_io); MsgPack.unpack(msgpack_io) end; # 0.136834 seconds (1.20 M allocations: 72.175 MiB) | |
@time begin seekstart(msgpack_io); read_msgpack_fully_strict(msgpack_io) end; # 0.200281 seconds (3.16 M allocations: 102.082 MiB) | |
@time begin seekstart(arrow_io); Arrow.Table(arrow_io) end; # 0.001726 seconds (2.17 k allocations: 4.844 MiB) | |
@time begin seekstart(arrow_io); read_arrow_fully_strict(arrow_io) end; # 0.096876 seconds (1.79 M allocations: 124.082 MiB) | |
############################################################################################# | |
############################################################################################# | |
############################################################################################# | |
struct SignalSym | |
file_uri::String | |
file_metadata::Union{Nothing,Dict{String,String}} | |
channel_names::Vector{Symbol} | |
start_nanosecond::Nanosecond | |
stop_nanosecond::Nanosecond | |
sample_unit::Symbol | |
sample_resolution_in_unit::Float64 | |
sample_offset_in_unit::Float64 | |
sample_type::Symbol | |
sample_rate::Float64 | |
end | |
struct AnnotationSym | |
value::String | |
start_nanosecond::Nanosecond | |
stop_nanosecond::Nanosecond | |
end | |
struct RecordingSym | |
signals::Dict{Symbol,SignalSym} | |
annotations::Vector{AnnotationSym} | |
end | |
Arrow.ArrowTypes.registertype!(AnnotationSym, AnnotationSym) | |
Arrow.ArrowTypes.registertype!(SignalSym, SignalSym) | |
Arrow.ArrowTypes.registertype!(RecordingSym, RecordingSym) | |
MsgPack.msgpack_type(::Type{RecordingSym}) = MsgPack.StructType() | |
MsgPack.msgpack_type(::Type{SignalSym}) = MsgPack.StructType() | |
MsgPack.msgpack_type(::Type{AnnotationSym}) = MsgPack.StructType() | |
r_sym = RecordingSym( | |
Dict( | |
:eeg => SignalSym( | |
"file://joe/dave/eeg.lpcm.zst", | |
nothing, | |
[:a, :b, :c, :d, :e, :f, :g, :h, :i, :j, :k, :l, :m, :n, :o, :p, :q, :r, :s, :t, :u, :v, :w, :x, :y, :z], | |
Nanosecond(0), | |
Nanosecond(100000), | |
:microvolt, | |
1.0, | |
1.0, | |
:float64, | |
256.0 | |
), | |
:ecg => SignalSym( | |
"file://joe/dave/ecg.lpcm.zst", | |
Dict("a" => "absidufbaid", "b" => "adjfhaudi"), | |
[:a, :b, :c, :d], | |
Nanosecond(0), | |
Nanosecond(100000), | |
:microvolt, | |
0.134, | |
1.9875, | |
:int32, | |
128.0 | |
) | |
), | |
[AnnotationSym("1", Nanosecond(0), Nanosecond(10)), AnnotationSym("aksdfhakjsdhfadf", Nanosecond(0), Nanosecond(10))] | |
) | |
nt_sym = (uuids = [uuid4() for _ in 1:10000], recordings = [r_sym for _ in 1:10000]) | |
arrow_io_sym = IOBuffer() | |
Arrow.write(arrow_io_sym, nt_sym) | |
msgpack_io_sym = IOBuffer() | |
MsgPack.pack(msgpack_io_sym, nt_sym) | |
read_arrow_fully_strict_sym(io) = (t = Arrow.Table(io); (uuids=collect(t[1]), recordings=collect(t[2]))) | |
read_msgpack_fully_strict_sym(io) = MsgPack.unpack(io, NamedTuple{(:uuids, :recordings),Tuple{Vector{UUID},Vector{RecordingSym}}}; strict=(RecordingSym,)) | |
@time begin seekstart(msgpack_io_sym); MsgPack.unpack(msgpack_io_sym) end; # 0.141025 seconds (1.20 M allocations: 72.175 MiB) | |
@time begin seekstart(msgpack_io_sym); read_msgpack_fully_strict_sym(msgpack_io_sym) end; # 0.222684 seconds (3.16 M allocations: 102.082 MiB) | |
@time begin seekstart(arrow_io_sym); Arrow.Table(arrow_io_sym) end; # 0.001674 seconds (2.35 k allocations: 4.861 MiB) | |
@time begin seekstart(arrow_io_sym); read_arrow_fully_strict_sym(arrow_io_sym) end; # 0.169594 seconds (1.79 M allocations: 124.099 MiB) | |
############################################################################################# | |
############################################################################################# | |
############################################################################################# | |
using UUIDs, Dates, Arrow, Tables | |
struct Signal | |
file_uri::String | |
file_metadata::Union{Nothing,Dict{String,String}} | |
channel_names::Vector{String} | |
start_nanosecond::Nanosecond | |
stop_nanosecond::Nanosecond | |
sample_unit::String | |
sample_resolution_in_unit::Float64 | |
sample_offset_in_unit::Float64 | |
sample_type::String | |
sample_rate::Float64 | |
end | |
struct Annotation | |
value::String | |
start_nanosecond::Nanosecond | |
stop_nanosecond::Nanosecond | |
end | |
struct Recording | |
signals::Dict{String,Signal} | |
annotations::Dict{UUID,Annotation} | |
end | |
Arrow.ArrowTypes.registertype!(Annotation, Annotation) | |
Arrow.ArrowTypes.registertype!(Signal, Signal) | |
Arrow.ArrowTypes.registertype!(Recording, Recording) | |
r = Recording( | |
Dict( | |
"eeg" => Signal( | |
"file://joe/dave/eeg.lpcm.zst", | |
nothing, | |
["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"], | |
Nanosecond(0), | |
Nanosecond(100000), | |
"microvolt", | |
1.0, | |
1.0, | |
"float64", | |
256.0 | |
), | |
"ecg" => Signal( | |
"file://joe/dave/ecg.lpcm.zst", | |
Dict("a" => "absidufbaid", "b" => "adjfhaudi"), | |
["a", "b", "c", "d"], | |
Nanosecond(0), | |
Nanosecond(100000), | |
"microvolt", | |
0.134, | |
1.9875, | |
"int32", | |
128.0 | |
) | |
), | |
Dict(uuid4() => Annotation("1", Nanosecond(0), Nanosecond(10)), | |
uuid4() => Annotation("aksdfhakjsdhfadf", Nanosecond(0), Nanosecond(10))) | |
) | |
n = 10000 | |
tbl = (uuids=[uuid4() for _ in 1:n], signals=[r.signals for _ in 1:n], annotations=[r.annotations for _ in 1:n]) | |
write_table(tbl) = (io = IOBuffer(); Arrow.write(io, tbl); seekstart(io); Arrow.Table(io)) | |
function index_rows(tbl) | |
allunique(tbl.uuids) || error("") | |
return Dict(zip(tbl.uuids, Tables.rows(tbl))) | |
end | |
# tbl = write_table(tbl); | |
# @time map(collect, tbl); # 0.127786 seconds (2.08 M allocations: 147.139 MiB) | |
# @time index_rows(tbl); # 0.001919 seconds (10.05 k allocations: 1.489 MiB) | |
############################################################################################# | |
############################################################################################# | |
############################################################################################# | |
using UUIDs, Dates, Arrow, Tables | |
struct Signal | |
file_uri::String | |
file_metadata::Union{Nothing,Dict{String,String}} | |
channel_names::Vector{String} | |
start_nanosecond::Nanosecond | |
stop_nanosecond::Nanosecond | |
sample_unit::String | |
sample_resolution_in_unit::Float64 | |
sample_offset_in_unit::Float64 | |
sample_type::String | |
sample_rate::Float64 | |
end | |
struct Annotation | |
value::String | |
start_nanosecond::Nanosecond | |
stop_nanosecond::Nanosecond | |
end | |
mutable struct Recording | |
_row::Tables.ColumnsRow{Tables.CopiedColumns{Arrow.Table}} | |
_signals::Dict{String,Signal} | |
_annotations::Dict{UUID,Annotation} | |
function Recording(_row) | |
recording = new() | |
recording._row = _row | |
return recording | |
end | |
function Recording(_signals, _annotations) | |
recording = new() | |
recording._signals = _signals | |
recording._annotations = _annotations | |
return recording | |
end | |
end | |
function Base.getproperty(recording::Recording, field::Symbol) | |
if field === :signals | |
if !isdefined(recording, :_signals) | |
recording._signals = recording._row.signals | |
end | |
field = :_signals | |
elseif field === :annotations | |
if !isdefined(recording, :_annotations) | |
recording._annotations = recording._row.annotations | |
end | |
field = :_annotations | |
end | |
return getfield(recording, field) | |
end | |
Arrow.ArrowTypes.registertype!(Annotation, Annotation) | |
Arrow.ArrowTypes.registertype!(Signal, Signal) | |
signals = Dict( | |
"eeg" => Signal( | |
"file://joe/dave/eeg.lpcm.zst", | |
nothing, | |
["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"], | |
Nanosecond(0), | |
Nanosecond(100000), | |
"microvolt", | |
1.0, | |
1.0, | |
"float64", | |
256.0 | |
), | |
"ecg" => Signal( | |
"file://joe/dave/ecg.lpcm.zst", | |
Dict("a" => "absidufbaid", "b" => "adjfhaudi"), | |
["a", "b", "c", "d"], | |
Nanosecond(0), | |
Nanosecond(100000), | |
"microvolt", | |
0.134, | |
1.9875, | |
"int32", | |
128.0 | |
) | |
) | |
annotations = Dict(uuid4() => Annotation("1", Nanosecond(0), Nanosecond(10)), | |
uuid4() => Annotation("aksdfhakjsdhfadf", Nanosecond(0), Nanosecond(10))) | |
write_table(tbl) = (io = IOBuffer(); Arrow.write(io, tbl); seekstart(io); Arrow.Table(io)) | |
n = 10000 | |
tbl = write_table((uuids=[uuid4() for _ in 1:n], signals=[signals for _ in 1:n], annotations=[annotations for _ in 1:n])) | |
function read_recordings(tbl) | |
allunique(tbl.uuids) || error("") | |
recordings = (Recording(row) for row in Tables.rows(tbl)) | |
return Dict{UUID,Recording}(zip(tbl.uuids, recordings)) | |
end | |
@time recordings = read_recordings(tbl) # 0.001716 seconds (10.05 k allocations: 1.642 MiB) | |
example = first(values(recordings)) | |
@time example.signals # 0.000058 seconds (150 allocations: 9.688 KiB) | |
@time example.signals # 0.000004 seconds | |
@time example.annotations # 0.000045 seconds (63 allocations: 5.516 KiB) | |
@time example.annotations # 0.000005 seconds |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment