Skip to content

Instantly share code, notes, and snippets.

@vasile
Created October 5, 2015 20:58
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vasile/64340d8233673ef96b00 to your computer and use it in GitHub Desktop.
Save vasile/64340d8233673ef96b00 to your computer and use it in GitHub Desktop.
data.gov.ro infofer parser
require 'nokogiri'
require 'sqlite3'
require 'FileUtils'
def seconds_to_hhmm (total_seconds)
# From http://stackoverflow.com/a/9916691
seconds = total_seconds % 60
minutes = (total_seconds / 60) % 60
hours = total_seconds / (60 * 60)
hhmm = format("%02d:%02d", hours, minutes)
return hhmm
end
agency_map_id = {
"236037" => "interregional",
"227098" => "regiotrans",
"6100826" => "sntfc",
"236025" => "softrans",
"228389" => "transferoviar",
}
data_stops = {}
data_trips = []
Dir["#{Dir.pwd}/data.gov.ro/2014-2015/*.xml"].each do |file_in|
file_content = IO.read(file_in)
doc = Nokogiri::XML(file_content)
print "Parsing #{file_in}\n"
doc.xpath('/XmlIf/XmlMts/Mt/Trenuri/Tren').each_with_index do |row_train, k_train|
data_trip = {
"type" => row_train.attr('CategorieTren'),
"no" => row_train.attr('Numar'),
"stops" => [],
"agency_id" => row_train.attr('Operator'),
}
if data_trip['no'] != "9481"
# next
end
km_cumulated = 0
row_stations = row_train.xpath('Trase/Trasa/ElementTrasa')
row_stations.each_with_index do |row_station, k|
st_a_id = row_station.attr('CodStaOrigine')
st_a_name = row_station.attr('DenStaOrigine')
st_a_sec = row_station.attr('OraP').to_i
st_a_hms = seconds_to_hhmm(st_a_sec)
st_b_id = row_station.attr('CodStaDest')
st_b_name = row_station.attr('DenStaDestinatie')
st_b_sec = row_station.attr('OraS').to_i
st_b_hms = seconds_to_hhmm(st_b_sec)
if data_stops[st_a_id].nil?
data_stop = {
"id" => st_a_id,
"name" => st_a_name
}
data_stops[st_a_id] = data_stop
end
trip_stop = {
"stop_id" => st_a_id
}
if k == 0
trip_stop["arr"] = ""
trip_stop["km_cumulated"] = 0
else
trip_stop["arr"] = seconds_to_hhmm(row_stations[k-1].attr('OraS').to_i)
end
if k == (row_stations.length - 1)
trip_stop["dep"] = ""
else
trip_stop["dep"] = st_a_hms
end
trip_stop['km_cumulated'] = km_cumulated
km_cumulated += (row_station.attr('Km').to_i / 1000).to_i
data_trip['stops'].push(trip_stop)
# p "#{st_a_name}(#{st_a_hms})-#{st_b_name}(#{st_b_hms})"
# p "#{st_a_sec} -- #{st_b_sec}"
end
data_trips.push(data_trip)
if k_train > 10
# break
end
end
end
p data_trips
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment