Skip to content

Instantly share code, notes, and snippets.

@ptrv
Created March 15, 2012 00:44
Show Gist options
  • Save ptrv/2040767 to your computer and use it in GitHub Desktop.
Save ptrv/2040767 to your computer and use it in GitHub Desktop.
import vienna opendata trees GML file
#!/usr/bin/env ruby
# Get all tree entries from vienna's open data
# service and put them into a sqlite3 database.
#
# Peter Vasil
# Date: 2012-01-15
require 'rubygems'
# require 'rexml/document'
# include REXML
require 'nokogiri'
require 'sqlite3'
require 'progressbar'
$properties_names = [
"BAUMNUMMER",
"GEBIET",
"STRASSE",
"ART",
"PFLANZJAHR",
"STAMMUMFANG",
"KRONENDURCHMESSER",
"BAUMHOEHE"
]
$create_db_str = "
CREATE TABLE trees (
treeid INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
idstr TEXT NOT NULL,
lon REAL NOT NULL,
lat REAL NOT NULL,
#{$properties_names[0]} INTEGER,
#{$properties_names[1]} TEXT,
#{$properties_names[2]} TEXT,
#{$properties_names[3]} TEXT,
#{$properties_names[4]} INTEGER,
#{$properties_names[5]} REAL,
#{$properties_names[6]} REAL,
#{$properties_names[7]} REAL
);"
class Tree
attr_accessor :idstr, :lon, :lat, :properties
def initialize idstr, lon, lat, properties
@idstr = idstr
@lon = lon
@lat = lat
@properties = properties
end
def to_s
result = "Tree: "
result << "#{idstr}, #{lat}, #{lon}\n"
properties.each do |key, val|
result << "#{key} => #{val}\n"
end
result
end
end
# # DOM parser
# def parse_trees_xml(xml_file)
# gml_file = File.new(xml_file)
# doc = Document.new(gml_file)
# gml_file.close
#
# result = Array.new()
# root = doc.root
# num_elems = root.elements["count(//ogdwien:BAUMOGD)"]
# puts num_elems
# pbar = ProgressBar.new("Parse", num_elems)
# root.each_element("//ogdwien:BAUMOGD") do |elem|
# id = elem.attributes["gml:id"]
# geo = elem.elements[
# "./ogdwien:SHAPE/gml:Point/gml:pos"
# ].text.split(' ', 2)
# lon = geo[0].to_f
# lat = geo[1].to_f
# properties = {}
# $properties_names.each do |name|
# #puts name
# xpath_val = "./ogdwien:#{name}"
# properties[name] = elem.elements[xpath_val].text
# end
# result.push(Tree.new(id, lon, lat, properties))
# pbar.inc
# end
# pbar.finish
# result
# end
# Stream parser
def parse_trees_xml(xml_file)
reader = Nokogiri::XML::Reader(File.open(xml_file))
result = Array.new
count = 0
reader.each do |node|
if node.name == "ogdwien:BAUMOGD" && node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
count += 1
end
end
# puts count
reader = Nokogiri::XML::Reader(File.open(xml_file))
pbar = ProgressBar.new("Parse", count)
id = ""
lon = 0.0
lat = 0.0
properties = {}
start_element = Nokogiri::XML::Reader::TYPE_ELEMENT
end_element = Nokogiri::XML::Reader::TYPE_END_ELEMENT
reader.each do |node|
if node.name == "ogdwien:BAUMOGD" && node.node_type == start_element
# puts "BAUMOGD"
id = node.attribute("gml:id")
pbar.inc
elsif node.name == "gml:pos" && node.node_type == start_element
# puts "pos"
geo = node.inner_xml.split(' ', 2)
lon = geo[0]
lat = geo[1]
elsif node.name == "ogdwien:#{$properties_names[0]}" && node.node_type == start_element
properties[$properties_names[0]] = node.inner_xml.to_i
elsif node.name == "ogdwien:#{$properties_names[1]}" && node.node_type == start_element
properties[$properties_names[1]] = node.inner_xml
elsif node.name == "ogdwien:#{$properties_names[2]}" && node.node_type == start_element
properties[$properties_names[2]] = node.inner_xml
elsif node.name == "ogdwien:#{$properties_names[3]}" && node.node_type == start_element
properties[$properties_names[3]] = node.inner_xml
elsif node.name == "ogdwien:#{$properties_names[4]}" && node.node_type == start_element
properties[$properties_names[4]] = node.inner_xml
elsif node.name == "ogdwien:#{$properties_names[5]}" && node.node_type == start_element
properties[$properties_names[5]] = node.inner_xml
elsif node.name == "ogdwien:#{$properties_names[6]}" && node.node_type == start_element
properties[$properties_names[6]] = node.inner_xml
elsif node.name == "ogdwien:#{$properties_names[7]}" && node.node_type == start_element
properties[$properties_names[7]] = node.inner_xml
elsif node.name == "ogdwien:BAUMOGD" && node.node_type == end_element
# puts "/BAUMOGD"
result.push(Tree.new(id,lon,lat,properties))
id = ""
lon = 0.0
lat = 0.0
properties = {}
end
end
pbar.finish
result
end
def prepare_db(db)
result = db.execute(
"SELECT CASE WHEN tbl_name = 'trees'
THEN 1 ELSE 0 END FROM sqlite_master
WHERE tbl_name = 'trees' AND type = 'table'"
)
if result.size == 0
unless result[0] == 1
db.execute($create_db_str)
end
end
end
def insert_trees(db, trees, num_trees)
actual_num_tree = 0
pbar = ProgressBar.new("Inserting", num_trees)
db.transaction
trees.each do |elem|
# actual_num_tree += 1
db.execute(
"INSERT INTO trees (idstr, lon, lat,
#{$properties_names[0]},
#{$properties_names[1]},
#{$properties_names[2]},
#{$properties_names[3]},
#{$properties_names[4]},
#{$properties_names[5]},
#{$properties_names[6]},
#{$properties_names[7]})
VALUES (:idstr, :lon, :lat,
:p1, :p2, :p3, :p4, :p5,:p6, :p7, :p8)",
"idstr" => elem.idstr,
"lon" => elem.lon,
"lat" => elem.lat,
"p1" => elem.properties[$properties_names[0]],
"p2" => elem.properties[$properties_names[1]],
"p3" => elem.properties[$properties_names[2]],
"p4" => elem.properties[$properties_names[3]],
"p5" => elem.properties[$properties_names[4]],
"p6" => elem.properties[$properties_names[5]],
"p7" => elem.properties[$properties_names[6]],
"p8" => elem.properties[$properties_names[7]]
)
# pbar.set(actual_num_tree);
pbar.inc
# puts "Iserted tree #{actual_num_tree} of #{num_trees}"
end
db.commit
db.close
pbar.finish
end
###########################################################
# program
###########################################################
# check if number of arguments is ok
unless ARGV.length == 2
puts "Not the right number of arguments."
puts "Usage: ruby trees2sqlite.rb gml.xml trees.sqlite.\n"
puts "GML file link: http://data.wien.gv.at/daten/wfs?service=WFS&request=GetFeature&version=1.1.0&typeName=ogdwien:BAUMOGD&srsName=EPSG:4326"
exit
end
import_file = ARGV[0]
database_path = ARGV[1]
###########################################################
# parse xml
###########################################################
trees = parse_trees_xml(import_file)
num_trees = trees.size
puts "Parsed #{num_trees} trees"
#trees.each do |elem|
#puts elem.to_s
#end
###########################################################
# insert trees into db
###########################################################
db = SQLite3::Database.new(database_path)
prepare_db(db)
insert_trees(db, trees, num_trees)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment