Skip to content

Instantly share code, notes, and snippets.

@jrevels
Last active December 8, 2020 17:06
Show Gist options
  • Save jrevels/3e55d10c8d1bcea9c1c95d87c3cad899 to your computer and use it in GitHub Desktop.
Save jrevels/3e55d10c8d1bcea9c1c95d87c3cad899 to your computer and use it in GitHub Desktop.
using UUIDs, Dates, Arrow, MsgPack
MsgPack.msgpack_type(::Type{Nanosecond}) = MsgPack.IntegerType()
MsgPack.from_msgpack(::Type{Nanosecond}, x::Integer) = Nanosecond(x)
MsgPack.to_msgpack(::MsgPack.IntegerType, x::Nanosecond) = x.value
MsgPack.msgpack_type(::Type{UUID}) = MsgPack.StringType()
MsgPack.from_msgpack(::Type{UUID}, x::String) = UUID(x)
MsgPack.to_msgpack(::MsgPack.StringType, x::UUID) = string(x)
struct Signal
file_uri::String
file_metadata::Union{Nothing,Dict{String,String}}
channel_names::Vector{String}
start_nanosecond::Nanosecond
stop_nanosecond::Nanosecond
sample_unit::String
sample_resolution_in_unit::Float64
sample_offset_in_unit::Float64
sample_type::String
sample_rate::Float64
end
struct Annotation
value::String
start_nanosecond::Nanosecond
stop_nanosecond::Nanosecond
end
struct Recording
signals::Dict{String,Signal}
annotations::Vector{Annotation}
end
Arrow.ArrowTypes.registertype!(Annotation, Annotation)
Arrow.ArrowTypes.registertype!(Signal, Signal)
Arrow.ArrowTypes.registertype!(Recording, Recording)
MsgPack.msgpack_type(::Type{Recording}) = MsgPack.StructType()
MsgPack.msgpack_type(::Type{Signal}) = MsgPack.StructType()
MsgPack.msgpack_type(::Type{Annotation}) = MsgPack.StructType()
r = Recording(
Dict(
"eeg" => Signal(
"file://joe/dave/eeg.lpcm.zst",
nothing,
["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"],
Nanosecond(0),
Nanosecond(100000),
"microvolt",
1.0,
1.0,
"float64",
256.0
),
"ecg" => Signal(
"file://joe/dave/ecg.lpcm.zst",
Dict("a" => "absidufbaid", "b" => "adjfhaudi"),
["a", "b", "c", "d"],
Nanosecond(0),
Nanosecond(100000),
"microvolt",
0.134,
1.9875,
"int32",
128.0
)
),
[Annotation("1", Nanosecond(0), Nanosecond(10)), Annotation("aksdfhakjsdhfadf", Nanosecond(0), Nanosecond(10))]
)
nt = (uuids = [uuid4() for _ in 1:10000], recordings = [r for _ in 1:10000])
arrow_io = IOBuffer()
Arrow.write(arrow_io, nt)
msgpack_io = IOBuffer()
MsgPack.pack(msgpack_io, nt)
read_arrow_fully_strict(io) = (t = Arrow.Table(io); (uuids=collect(t[1]), recordings=collect(t[2])))
read_msgpack_fully_strict(io) = MsgPack.unpack(io, NamedTuple{(:uuids, :recordings),Tuple{Vector{UUID},Vector{Recording}}}; strict=(Recording,))
@time begin seekstart(msgpack_io); MsgPack.unpack(msgpack_io) end; # 0.136834 seconds (1.20 M allocations: 72.175 MiB)
@time begin seekstart(msgpack_io); read_msgpack_fully_strict(msgpack_io) end; # 0.200281 seconds (3.16 M allocations: 102.082 MiB)
@time begin seekstart(arrow_io); Arrow.Table(arrow_io) end; # 0.001726 seconds (2.17 k allocations: 4.844 MiB)
@time begin seekstart(arrow_io); read_arrow_fully_strict(arrow_io) end; # 0.096876 seconds (1.79 M allocations: 124.082 MiB)
#############################################################################################
#############################################################################################
#############################################################################################
struct SignalSym
file_uri::String
file_metadata::Union{Nothing,Dict{String,String}}
channel_names::Vector{Symbol}
start_nanosecond::Nanosecond
stop_nanosecond::Nanosecond
sample_unit::Symbol
sample_resolution_in_unit::Float64
sample_offset_in_unit::Float64
sample_type::Symbol
sample_rate::Float64
end
struct AnnotationSym
value::String
start_nanosecond::Nanosecond
stop_nanosecond::Nanosecond
end
struct RecordingSym
signals::Dict{Symbol,SignalSym}
annotations::Vector{AnnotationSym}
end
Arrow.ArrowTypes.registertype!(AnnotationSym, AnnotationSym)
Arrow.ArrowTypes.registertype!(SignalSym, SignalSym)
Arrow.ArrowTypes.registertype!(RecordingSym, RecordingSym)
MsgPack.msgpack_type(::Type{RecordingSym}) = MsgPack.StructType()
MsgPack.msgpack_type(::Type{SignalSym}) = MsgPack.StructType()
MsgPack.msgpack_type(::Type{AnnotationSym}) = MsgPack.StructType()
r_sym = RecordingSym(
Dict(
:eeg => SignalSym(
"file://joe/dave/eeg.lpcm.zst",
nothing,
[:a, :b, :c, :d, :e, :f, :g, :h, :i, :j, :k, :l, :m, :n, :o, :p, :q, :r, :s, :t, :u, :v, :w, :x, :y, :z],
Nanosecond(0),
Nanosecond(100000),
:microvolt,
1.0,
1.0,
:float64,
256.0
),
:ecg => SignalSym(
"file://joe/dave/ecg.lpcm.zst",
Dict("a" => "absidufbaid", "b" => "adjfhaudi"),
[:a, :b, :c, :d],
Nanosecond(0),
Nanosecond(100000),
:microvolt,
0.134,
1.9875,
:int32,
128.0
)
),
[AnnotationSym("1", Nanosecond(0), Nanosecond(10)), AnnotationSym("aksdfhakjsdhfadf", Nanosecond(0), Nanosecond(10))]
)
nt_sym = (uuids = [uuid4() for _ in 1:10000], recordings = [r_sym for _ in 1:10000])
arrow_io_sym = IOBuffer()
Arrow.write(arrow_io_sym, nt_sym)
msgpack_io_sym = IOBuffer()
MsgPack.pack(msgpack_io_sym, nt_sym)
read_arrow_fully_strict_sym(io) = (t = Arrow.Table(io); (uuids=collect(t[1]), recordings=collect(t[2])))
read_msgpack_fully_strict_sym(io) = MsgPack.unpack(io, NamedTuple{(:uuids, :recordings),Tuple{Vector{UUID},Vector{RecordingSym}}}; strict=(RecordingSym,))
@time begin seekstart(msgpack_io_sym); MsgPack.unpack(msgpack_io_sym) end; # 0.141025 seconds (1.20 M allocations: 72.175 MiB)
@time begin seekstart(msgpack_io_sym); read_msgpack_fully_strict_sym(msgpack_io_sym) end; # 0.222684 seconds (3.16 M allocations: 102.082 MiB)
@time begin seekstart(arrow_io_sym); Arrow.Table(arrow_io_sym) end; # 0.001674 seconds (2.35 k allocations: 4.861 MiB)
@time begin seekstart(arrow_io_sym); read_arrow_fully_strict_sym(arrow_io_sym) end; # 0.169594 seconds (1.79 M allocations: 124.099 MiB)
#############################################################################################
#############################################################################################
#############################################################################################
using UUIDs, Dates, Arrow, Tables
struct Signal
file_uri::String
file_metadata::Union{Nothing,Dict{String,String}}
channel_names::Vector{String}
start_nanosecond::Nanosecond
stop_nanosecond::Nanosecond
sample_unit::String
sample_resolution_in_unit::Float64
sample_offset_in_unit::Float64
sample_type::String
sample_rate::Float64
end
struct Annotation
value::String
start_nanosecond::Nanosecond
stop_nanosecond::Nanosecond
end
struct Recording
signals::Dict{String,Signal}
annotations::Dict{UUID,Annotation}
end
Arrow.ArrowTypes.registertype!(Annotation, Annotation)
Arrow.ArrowTypes.registertype!(Signal, Signal)
Arrow.ArrowTypes.registertype!(Recording, Recording)
r = Recording(
Dict(
"eeg" => Signal(
"file://joe/dave/eeg.lpcm.zst",
nothing,
["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"],
Nanosecond(0),
Nanosecond(100000),
"microvolt",
1.0,
1.0,
"float64",
256.0
),
"ecg" => Signal(
"file://joe/dave/ecg.lpcm.zst",
Dict("a" => "absidufbaid", "b" => "adjfhaudi"),
["a", "b", "c", "d"],
Nanosecond(0),
Nanosecond(100000),
"microvolt",
0.134,
1.9875,
"int32",
128.0
)
),
Dict(uuid4() => Annotation("1", Nanosecond(0), Nanosecond(10)),
uuid4() => Annotation("aksdfhakjsdhfadf", Nanosecond(0), Nanosecond(10)))
)
n = 10000
tbl = (uuids=[uuid4() for _ in 1:n], signals=[r.signals for _ in 1:n], annotations=[r.annotations for _ in 1:n])
write_table(tbl) = (io = IOBuffer(); Arrow.write(io, tbl); seekstart(io); Arrow.Table(io))
function index_rows(tbl)
allunique(tbl.uuids) || error("")
return Dict(zip(tbl.uuids, Tables.rows(tbl)))
end
# tbl = write_table(tbl);
# @time map(collect, tbl); # 0.127786 seconds (2.08 M allocations: 147.139 MiB)
# @time index_rows(tbl); # 0.001919 seconds (10.05 k allocations: 1.489 MiB)
#############################################################################################
#############################################################################################
#############################################################################################
using UUIDs, Dates, Arrow, Tables
struct Signal
file_uri::String
file_metadata::Union{Nothing,Dict{String,String}}
channel_names::Vector{String}
start_nanosecond::Nanosecond
stop_nanosecond::Nanosecond
sample_unit::String
sample_resolution_in_unit::Float64
sample_offset_in_unit::Float64
sample_type::String
sample_rate::Float64
end
struct Annotation
value::String
start_nanosecond::Nanosecond
stop_nanosecond::Nanosecond
end
mutable struct Recording
_row::Tables.ColumnsRow{Tables.CopiedColumns{Arrow.Table}}
_signals::Dict{String,Signal}
_annotations::Dict{UUID,Annotation}
function Recording(_row)
recording = new()
recording._row = _row
return recording
end
function Recording(_signals, _annotations)
recording = new()
recording._signals = _signals
recording._annotations = _annotations
return recording
end
end
function Base.getproperty(recording::Recording, field::Symbol)
if field === :signals
if !isdefined(recording, :_signals)
recording._signals = recording._row.signals
end
field = :_signals
elseif field === :annotations
if !isdefined(recording, :_annotations)
recording._annotations = recording._row.annotations
end
field = :_annotations
end
return getfield(recording, field)
end
Arrow.ArrowTypes.registertype!(Annotation, Annotation)
Arrow.ArrowTypes.registertype!(Signal, Signal)
signals = Dict(
"eeg" => Signal(
"file://joe/dave/eeg.lpcm.zst",
nothing,
["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"],
Nanosecond(0),
Nanosecond(100000),
"microvolt",
1.0,
1.0,
"float64",
256.0
),
"ecg" => Signal(
"file://joe/dave/ecg.lpcm.zst",
Dict("a" => "absidufbaid", "b" => "adjfhaudi"),
["a", "b", "c", "d"],
Nanosecond(0),
Nanosecond(100000),
"microvolt",
0.134,
1.9875,
"int32",
128.0
)
)
annotations = Dict(uuid4() => Annotation("1", Nanosecond(0), Nanosecond(10)),
uuid4() => Annotation("aksdfhakjsdhfadf", Nanosecond(0), Nanosecond(10)))
write_table(tbl) = (io = IOBuffer(); Arrow.write(io, tbl); seekstart(io); Arrow.Table(io))
n = 10000
tbl = write_table((uuids=[uuid4() for _ in 1:n], signals=[signals for _ in 1:n], annotations=[annotations for _ in 1:n]))
function read_recordings(tbl)
allunique(tbl.uuids) || error("")
recordings = (Recording(row) for row in Tables.rows(tbl))
return Dict{UUID,Recording}(zip(tbl.uuids, recordings))
end
@time recordings = read_recordings(tbl) # 0.001716 seconds (10.05 k allocations: 1.642 MiB)
example = first(values(recordings))
@time example.signals # 0.000058 seconds (150 allocations: 9.688 KiB)
@time example.signals # 0.000004 seconds
@time example.annotations # 0.000045 seconds (63 allocations: 5.516 KiB)
@time example.annotations # 0.000005 seconds
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment