Last active
June 9, 2017 14:55
-
-
Save remomueller/c853734a2f85937b69b373c46760fcdd to your computer and use it in GitHub Desktop.
XML Integrity Checking
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# frozen_string_literal: true | |
# https://gist.github.com/remomueller/c853734a2f85937b69b373c46760fcdd | |
# gem install xml-simple colorize --no-document | |
# ruby xml_testing.rb | |
# XML Integrity Checking | |
# I think checking through all XML for the following would be a good place to | |
# start on a list to check: | |
# 1. No scored events at all (<ScoredEvent> area essentially empty, save for | |
# staging data) | |
# 2. Arousal <3 seconds in duration (<ScoredEvent> with "Arousal" in <Name> and | |
# <Duration> less than 3 seconds) | |
# 3. Respiratory event <10 seconds in duration (<ScoredEvent> with "Apnea", | |
# "Hypopnea", or "Unsure" in <Name> and <Duration> less than 10 seconds) | |
# This could sort of work like "edfize" where it checks each individual file for | |
# any failures and then spits out a warning when it encounters an issue. For #2 | |
# and #3 it might also be nice to have the start time of the event(s) flagged in | |
# the output to know where to look when inspecting the XML manually. | |
require "rubygems" | |
require "colorize" | |
require "json" | |
require "xmlsimple" | |
def extract(hash, key, convert: nil) | |
value = hash[key] && !hash[key][0].empty? ? hash[key][0] : nil | |
convert && value ? value.send(convert) : value | |
end | |
def starts_with?(search, word) | |
!(/^#{search}/i =~ word).nil? | |
end | |
def contains_word?(search, word) | |
!(/#{search}/i =~ word).nil? | |
end | |
good_count = 0 | |
bad_count = 0 | |
xmls = Dir.glob("**/*.xml", File::FNM_CASEFOLD) | |
xml_count = xmls.size | |
xmls.each_with_index do |xml_path, index| | |
print "\r#{index + 1} of #{xml_count} (#{(index + 1) * 100 / xml_count}%)" | |
xml = XmlSimple.xml_in(xml_path) | |
scored_events = \ | |
if xml["ScoredEvents"].nil? || xml["ScoredEvents"][0].nil? || xml["ScoredEvents"][0]["ScoredEvent"].nil? | |
[] | |
else | |
xml["ScoredEvents"][0]["ScoredEvent"].collect do |hash| | |
{ | |
name: extract(hash, "Name"), | |
lowest_spo2: extract(hash, "LowestSpO2", convert: :to_f), | |
desaturation: extract(hash, "Desaturation", convert: :to_f), | |
start: extract(hash, "Start", convert: :to_f), | |
duration: extract(hash, "Duration", convert: :to_f), | |
input: extract(hash, "Input") | |
} | |
end | |
end | |
arousal_events = scored_events.select do |event| | |
starts_with?("Arousal", event[:name]) && event[:duration] < 3 | |
end | |
respiratory_events = scored_events.select do |event| | |
( | |
contains_word?("Apnea", event[:name]) || | |
contains_word?("Hypopnea", event[:name]) || | |
contains_word?("Unsure", event[:name]) | |
) && event[:duration] < 10 | |
end | |
if !scored_events.empty? && arousal_events.empty? && respiratory_events.empty? | |
good_count += 1 | |
else | |
bad_count += 1 | |
puts "\n#{xml_path}" | |
puts "Empty: No Scored Events" if scored_events.empty? | |
unless arousal_events.empty? | |
puts "Detected: Arousal Events <3 seconds" | |
puts " Start: #{arousal_events.collect { |event| event[:start] }.join(', ')}" | |
end | |
unless respiratory_events.empty? | |
puts "Detected: Respiratory Events <10 seconds" | |
puts " Start: #{respiratory_events.collect { |event| event[:start] }.join(', ')}" | |
end | |
puts "\n" | |
end | |
end | |
puts "\n" + good_count.to_s.colorize(good_count.positive? ? :green : nil) + " XML#{good_count == 1 ? ' ' : 's'} passed" | |
puts bad_count.to_s.colorize(bad_count.positive? ? :red : nil) + " XML#{bad_count == 1 ? ' ' : 's'} failed" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment