Skip to content

Instantly share code, notes, and snippets.

@mthh
Last active June 6, 2017 15:52
Show Gist options
  • Save mthh/98d32b1d10a68e933782481d802ba9a7 to your computer and use it in GitHub Desktop.
Save mthh/98d32b1d10a68e933782481d802ba9a7 to your computer and use it in GitHub Desktop.
extern crate osmpbfreader;
extern crate geojson;
extern crate serde_json;
extern crate regex;
#[macro_use] extern crate lazy_static;
use osmpbfreader::{OsmPbfReader, OsmObj, Tags};
use geojson::*;
use std::io::Write;
use std::collections::HashMap;
use regex::Regex;
fn to_comparable(start_date: &str) -> Option<i64> {
lazy_static! {
static ref RE1: Regex = Regex::new(r"^(~?|before ?)(\d{4})(s?)$").unwrap();
static ref RE2: Regex = Regex::new(r"^(\d{4})(...|..|-)(\d{4})$").unwrap();
static ref RE3: Regex = Regex::new(r"^(\d{4})-\d{2}$").unwrap();
static ref RE4: Regex = Regex::new(r"^(late ?|mid ?|early ?|\D?)C(\d{2})$").unwrap();
static ref RE5: Regex = Regex::new(r"^(\d{4})-\d{2}-\d{2}$").unwrap();
static ref RE6: Regex = Regex::new(r"^(\d{4})-\d{2}-\d{2}(...|..|-)(\d{4})-\d{2}-\d{2}$").unwrap();
static ref RE7: Regex = Regex::new(r"^\d{1,2}(?:/| )\d{2}(?:/| )(\d{4})").unwrap();
static ref RE8: Regex = Regex::new(r"^\d{2}/(\d{4})$").unwrap();
};
if RE1.is_match(start_date) {
let caps = RE1.captures(start_date).unwrap();
if caps[1].len() == 0 && caps[3].len() == 0 {
Some(caps[2].parse::<i64>().unwrap())
} else if caps[1].len() == 0 && caps[3].contains("s") {
Some(caps[2].parse::<i64>().unwrap() + 5)
} else if caps[1].contains("before "){
Some(caps[2].parse::<i64>().unwrap() - 20)
} else {
Some(caps[2].parse::<i64>().unwrap())
}
} else if RE2.is_match(start_date) {
let caps = RE2.captures(start_date).unwrap();
let d1 = caps[1].parse::<i64>().unwrap();
let d2 = caps[3].parse::<i64>().unwrap();
Some((d2 + d1) / 2 )
} else if RE3.is_match(start_date) {
let caps = RE3.captures(start_date).unwrap();
Some(caps[1].parse::<i64>().unwrap())
} else if RE4.is_match(start_date) {
let caps = RE4.captures(start_date).unwrap();
if caps[1].contains("late ") {
Some((caps[2].parse::<i64>().unwrap() * 100) - 20)
} else if caps[1].contains("mid ") {
Some((caps[2].parse::<i64>().unwrap() * 100) - 50)
} else if caps[1].contains("early ") {
Some((caps[2].parse::<i64>().unwrap() * 100) - 80)
} else if caps[1].len() == 0 {
Some((caps[2].parse::<i64>().unwrap() * 100) - 50)
} else {
println!("{:?}", caps[1].to_string());
println!("Not parsed date format: {:?}", start_date);
None
}
} else if RE5.is_match(start_date) {
let caps = RE5.captures(start_date).unwrap();
Some(caps[1].parse::<i64>().unwrap())
} else if RE6.is_match(start_date) {
let caps = RE6.captures(start_date).unwrap();
let d1 = caps[1].parse::<i64>().unwrap();
let d2 = caps[3].parse::<i64>().unwrap();
Some((d2 + d1) / 2 )
} else if RE7.is_match(start_date) {
let caps = RE7.captures(start_date).unwrap();
Some(caps[1].parse::<i64>().unwrap())
} else if RE8.is_match(start_date) {
let caps = RE8.captures(start_date).unwrap();
Some(caps[1].parse::<i64>().unwrap())
} else {
println!("Not parsed date format: {:?}", start_date);
None
}
}
fn add_pt_feature(features_pts: &mut Vec<Feature>, node: &osmpbfreader::objects::Node){
let geometry = Geometry::new(Value::Point(vec![node.lon(), node.lat()]));
let mut prop = serde_json::Map::new();
prop.insert(String::from("id"), serde_json::to_value(node.id.0).unwrap());
for (k, v) in node.tags.iter() {
if k == "start_date" {
let comparable_value = to_comparable(v);
prop.insert(String::from("comparable_start_date"), serde_json::to_value(comparable_value).unwrap());
}
prop.insert(String::from(k.as_str()), serde_json::to_value(v).unwrap());
}
features_pts.push(
Feature {
bbox: None,
geometry: Some(geometry),
id: None,
foreign_members: None,
properties: Some(prop),
});
}
fn add_polygon_feature(features_polygons: &mut Vec<Feature>, id: i64, tags: &Tags, rings: Vec<Vec<Vec<f64>>>){
let geometry = Geometry::new(Value::Polygon(rings));
let mut prop = serde_json::Map::new();
prop.insert(String::from("id"), serde_json::to_value(id).unwrap());
for (k, v) in tags.iter() {
if k == "start_date" {
let comparable_value = to_comparable(v);
prop.insert(String::from("comparable_start_date"), serde_json::to_value(comparable_value).unwrap());
}
prop.insert(String::from(k.as_str()), serde_json::to_value(v).unwrap());
}
features_polygons.push(
Feature {
bbox: None,
geometry: Some(geometry),
id: None,
foreign_members: None,
properties: Some(prop),
});
}
fn extract1(filename: &std::ffi::OsString){
let r = std::fs::File::open(&std::path::Path::new(filename)).unwrap();
let mut pbf = OsmPbfReader::new(r);
let mut map_nodes = HashMap::new();
let mut map_ways = HashMap::new();
let mut features_pts = Vec::new();
let mut features_polygons = Vec::new();
let objs = pbf.get_objs_and_deps(|obj| {
if obj.is_way() || obj.is_node() {
let tags = obj.tags();
tags.contains_key("building") && tags.contains_key("start_date")
} else if obj.is_relation() {
let tags = obj.tags();
tags.contains_key("building") && tags.contains_key("start_date") && tags.contains("type", "multipolygon")
} else {
false
}
}).unwrap();
for (_, obj) in &objs {
match *obj {
OsmObj::Node(ref node) => {
map_nodes.insert(node.id.0, (node.lon(), node.lat()));
if node.tags.contains_key("building") && node.tags.contains_key("start_date") {
add_pt_feature(&mut features_pts, &node);
}
},
OsmObj::Way(ref way) => {
if !way.tags.contains_key("building") && !way.tags.contains_key("start_date") {
let mut ids: Vec<i64> = Vec::new();
for id in &way.nodes {
ids.push(id.0);
}
map_ways.insert(way.id.0, ids);
}
},
_ => continue
}
}
for (_, obj) in &objs {
match *obj {
OsmObj::Way(ref way) => {
if way.tags.contains_key("building") && way.tags.contains_key("start_date") {
let mut vec_pts = Vec::new();
for osm_id in &way.nodes {
let node = map_nodes.get(&osm_id.0).unwrap();
vec_pts.push(vec![node.0, node.1]);
}
add_polygon_feature(&mut features_polygons, way.id.0, &way.tags, vec![vec_pts]);
}
},
OsmObj::Relation(ref relation) => {
if relation.tags.contains_key("building") && relation.tags.contains_key("start_date"){
let mut rings = Vec::with_capacity(relation.refs.len());
let mut interiors = Vec::new();
for member in &relation.refs {
if member.role == "outer" {
if !member.member.is_way(){
continue;
}
let mut exterior = Vec::new();
let way_id = member.member.way().unwrap();
let way = map_ways.get(&way_id.0);
if way.is_some(){
for osm_id in way.unwrap() {
let node = map_nodes.get(&osm_id).unwrap();
exterior.push(vec![node.0, node.1]);
}
}
rings.push(exterior);
for ring in interiors {
rings.push(ring);
}
interiors = Vec::new();
add_polygon_feature(&mut features_polygons, relation.id.0, &relation.tags, rings);
rings = Vec::with_capacity(relation.refs.len());
} else if member.role == "inner" {
if !member.member.is_way(){
continue;
}
let way_id = member.member.way().unwrap();
let way = map_ways.get(&way_id.0);
if way.is_some() {
let mut int_ring = Vec::new();
for osm_id in way.unwrap() {
let node = map_nodes.get(&osm_id).unwrap();
int_ring.push(vec![node.0, node.1]);
}
interiors.push(int_ring);
}
} else {
println!("Unknow role : \"{}\"", member.role);
}
}
add_polygon_feature(&mut features_polygons, relation.id.0, &relation.tags, rings);
}
}, _ => continue
}
}
let feature_collection_pts = FeatureCollection {
bbox: None,
features: features_pts,
foreign_members: None,
};
let feature_collection_polygons = FeatureCollection {
bbox: None,
features: features_polygons,
foreign_members: None,
};
let serialized_pts = GeoJson::from(feature_collection_pts).to_string();
let mut file_pts = std::fs::File::create("points.geojson").unwrap();
file_pts.write(serialized_pts.as_bytes()).unwrap();
let serialized_polygons = GeoJson::from(feature_collection_polygons).to_string();
let mut file_polygons = std::fs::File::create("polygons.geojson").unwrap();
file_polygons.write(serialized_polygons.as_bytes()).unwrap();
}
fn main() {
let args: Vec<_> = std::env::args_os().collect();
match args.len() {
2 => {
extract1(&args[1]);
},
_ => {
println!("Usage: {:?} filename", args[0]);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment