Created
June 2, 2020 21:05
-
-
Save kolia/f2a3fe892498a555c0f0bd03d8e8dbc6 to your computer and use it in GitHub Desktop.
Figuring out whether tuh_events annotation timescale makes sense
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://www.isip.piconepress.com/projects/tuh_eeg/downloads/tuh_eeg_events/ | |
# downloaded to /home/ubuntu/data/. | |
using Pkg | |
Pkg.activate(@__DIR__) | |
pkg""" | |
add EDF | |
add Onda | |
st | |
""" | |
using EDF, Onda, Dates, UUIDs | |
function get_labels(dir) | |
label_files = [ joinpath(dir, f) for f in readdir(dir) if endswith(f, ".lab") ] | |
labels = [ split(line) for f in label_files for line in readlines(f) ] | |
unique!(labels) | |
map(labels) do triple | |
event = triple[3] | |
start, finish = Nanosecond.( parse.(Int, triple[1:2]) .* 10000 ) | |
Annotation(event, start, finish) | |
end | |
end | |
root = "/home/ubuntu/data/tuh_events_raw/edf/train/" | |
patientdirs = [joinpath(root, d) for d in readdir(root)] | |
stops = [last(get_labels(pd)).stop_nanosecond.value / 1e9 for pd in patientdirs] | |
function duration(dir) | |
for (root, _, files) in walkdir(dir) | |
for file in files | |
if endswith(file, ".edf") | |
try | |
edf = EDF.read(joinpath(root, file)) | |
return edf.header.record_count * edf.header.seconds_per_record | |
catch | |
return missing | |
end | |
end | |
end | |
end | |
end | |
durations = duration.(patientdirs) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment