Skip to content

Instantly share code, notes, and snippets.

@kenji4569
Last active August 10, 2021 13:04
Show Gist options
  • Save kenji4569/7e11c73894ba925f2d6d43727f442d3e to your computer and use it in GitHub Desktop.
Save kenji4569/7e11c73894ba925f2d6d43727f442d3e to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "72391778",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--- node tags ---\n",
"[(\"name\", 337484), (\"source\", 213180), (\"highway\", 185851), (\"amenity\", 163303), (\"name:en\", 103501), (\"name:ja\", 74679), (\"shop\", 60935), (\"operator\", 56152), (\"public_transport\", 52231), (\"natural\", 46208), (\"power\", 42221), (\"note\", 41456), (\"bus\", 39772), (\"source_ref\", 38462), (\"crossing\", 32619), (\"railway\", 32216), (\"place\", 31430), (\"brand\", 31110), (\"barrier\", 29033), (\"note:ja\", 28777), (\"brand:wikidata\", 25806), (\"cuisine\", 23996), (\"ref\", 23961), (\"brand:en\", 23766), (\"brand:ja\", 23382), (\"brand:wikipedia\", 22984), (\"name:ja_rm\", 22925), (\"opening_hours\", 20177), (\"tourism\", 19656), (\"name:ja-Hira\", 19612), (\"level\", 18624), (\"website\", 17065), (\"KSJ2:ADS\", 16182), (\"KSJ2:PubFacAdmin\", 16181), (\"phone\", 16007), (\"branch\", 15789), (\"addr:postcode\", 13339), (\"wheelchair\", 12812), (\"name:ja_kana\", 11452), (\"name:ko\", 11162), (\"information\", 11027), (\"traffic_signals\", 10694), (\"addr:housenumber\", 10547), (\"wikidata\", 9829), (\"emergency\", 9720), (\"addr:city\", 9617), (\"KSJ2:AdminArea\", 9382), (\"religion\", 8930), (\"wikipedia\", 8319), (\"addr:quarter\", 8253), (\"addr:province\", 8224), (\"office\", 8200), (\"healthcare\", 8131), (\"historic\", 8114), (\"vending\", 7285), (\"KSJ2:curve_id\", 7115), (\"noexit\", 7084), (\"addr:full\", 6618), (\"shelter\", 6554), (\"takeaway\", 6445), (\"addr:neighbourhood\", 6300), (\"addr:block_number\", 5966), (\"social_facility\", 5770), (\"bicycle\", 5666), (\"KSJ2:filename\", 5598), (\"leisure\", 5397), (\"man_made\", 5299), (\"denotation\", 5235), (\"leaf_type\", 5200), (\"entrance\", 5126), (\"official_name:en\", 4921), (\"bench\", 4791), (\"direction\", 4766), (\"created_by\", 4757), (\"name:ja-Latn\", 4697), (\"internet_access\", 4686), (\"description\", 4273), (\"material\", 4232), (\"access\", 4229), (\"backrest\", 3797), (\"train\", 3789), (\"leaf_cycle\", 3668), (\"atm\", 3503), (\"name:es\", 3463), (\"covered\", 3462), (\"local_ref\", 3321), (\"network\", 3245), (\"foot\", 3120), (\"fee\", 3115)]\n",
"\n",
"--- way tags ---\n",
"[(\"building\", 3212646), (\"source\", 2575111), (\"highway\", 1822317), (\"yh:WIDTH\", 321061), (\"name\", 273654), (\"source_ref\", 193750), (\"surface\", 155667), (\"landuse\", 150326), (\"yh:TYPE\", 135061), (\"yh:STRUCTURE\", 135060), (\"yh:TOTYUMONO\", 124827), (\"yh:WIDTH_RANK\", 124806), (\"oneway\", 119129), (\"layer\", 119014), (\"service\", 117376), (\"amenity\", 116859), (\"name:en\", 95188), (\"building:levels\", 94666), (\"name:ja\", 88999), (\"lanes\", 86388), (\"footway\", 82361), (\"natural\", 78738), (\"note\", 70762), (\"ref\", 70659), (\"bridge\", 67807), (\"waterway\", 56311), (\"leisure\", 50984), (\"maxspeed\", 50347), (\"parking\", 40504), (\"access\", 37432), (\"source:ja\", 36462), (\"note:ja\", 34493), (\"operator\", 33290), (\"railway\", 31563), (\"bicycle\", 29245), (\"foot\", 28947), (\"crossing\", 28847), (\"tunnel\", 28527), (\"voltage\", 27168), (\"KSJ2:curve_id\", 26409), (\"gauge\", 25709), (\"frequency\", 25062), (\"electrified\", 24819), (\"name:es\", 24614), (\"est_width\", 23944), (\"name:ja_rm\", 23739), (\"barrier\", 23070), (\"addr:block_number\", 20069), (\"addr:quarter\", 19876), (\"addr:neighbourhood\", 18604), (\"operator:en\", 17472), (\"tracktype\", 17152), (\"usage\", 17136), (\"official_name\", 16523), (\"operator:ja\", 15861), (\"area\", 14267), (\"addr:city\", 14074), (\"addr:housenumber\", 13701), (\"name:ja-Latn\", 13189), (\"smoothness\", 13027), (\"addr:province\", 12833), (\"toll\", 12281), (\"name:ja-Hira\", 12037), (\"addr:postcode\", 11413), (\"shop\", 11368), (\"water\", 11019), (\"motorcar\", 10743), (\"motorcycle\", 10657), (\"name:ko\", 10571), (\"roof:shape\", 10538), (\"sport\", 9846), (\"noname\", 9804), (\"wikidata\", 9778), (\"level\", 9602), (\"source:geometry\", 9383), (\"name:ja_kana\", 9380), (\"golf\", 9228), (\"man_made\", 9188), (\"admin_level\", 8907), (\"power\", 8155), (\"KSJ2:DFD\", 8109), (\"KSJ2:WSC\", 8109), (\"KSJ2:RIN\", 8109), (\"KSJ2:RIC\", 8109), (\"KSJ2:river_id\", 8109), (\"KSJ2:LOC\", 8109), (\"KSJ2:COP_label\", 8109), (\"KSJ2:filename\", 8089), (\"building:colour\", 7792), (\"operator:ja_rm\", 7700), (\"wikipedia\", 7680), (\"height\", 7632), (\"fee\", 7614), (\"incline\", 7497), (\"boundary\", 7330), (\"nat_name\", 7300), (\"roof:colour\", 7016), (\"width\", 6878), (\"brand\", 6787), (\"colour\", 6748), (\"nat_name:en\", 6701), (\"religion\", 6385), (\"lit\", 6383), (\"route\", 6227), (\"building:part\", 6025), (\"sidewalk\", 5953), (\"name:de\", 5943), (\"name:ru\", 5928), (\"brand:wikidata\", 5451), (\"passenger_lines\", 5231), (\"brand:en\", 5100), (\"brand:ja\", 4956), (\"brand:wikipedia\", 4942), (\"alt_name\", 4324), (\"tactile_paving\", 4319), (\"motor_vehicle\", 4277), (\"website\", 4217), (\"opening_hours\", 4198), (\"branch\", 4144), (\"railway:traffic_mode\", 4110), (\"covered\", 3985), (\"bridge:name\", 3871), (\"nat_name:ja\", 3735), (\"building:material\", 3711), (\"indoor\", 3638), (\"segregated\", 3566), (\"embankment\", 3492), (\"capacity\", 3368), (\"public_transport\", 3232), (\"phone\", 3216), (\"addr:suburb\", 3213), (\"aeroway\", 3132), (\"oneway:bicycle\", 3028)]\n",
"\n",
"--- relation tags ---\n",
"[(\"type\", 27449), (\"name\", 14859), (\"operator\", 7686), (\"ref\", 7533), (\"route\", 7126), (\"public_transport:version\", 4878), (\"network\", 4584), (\"name:en\", 4204), (\"natural\", 3512), (\"name:ja\", 3437), (\"source\", 3434), (\"restriction\", 3310), (\"building\", 3206), (\"from\", 3010)]\n",
"\n"
]
}
],
"source": [
"extern crate osmpbfreader;\n",
"use std::collections::HashMap;\n",
"\n",
"let filename = \"./kanto-latest.osm.pbf\";\n",
"let path = std::path::Path::new(filename);\n",
"let r = std::fs::File::open(&path).unwrap();\n",
"let mut pbf = osmpbfreader::OsmPbfReader::new(r);\n",
"\n",
"type TagMap = HashMap::<String, Vec<String>>;\n",
"let mut node_tags = TagMap::new();\n",
"let mut way_tags = TagMap::new();\n",
"let mut relation_tags = TagMap::new();\n",
"\n",
"for obj in pbf.par_iter().map(Result::unwrap) {\n",
" match obj {\n",
" osmpbfreader::OsmObj::Node(node) => {\n",
" for (k, v) in node.tags.iter() {\n",
" (*node_tags.entry(k.to_string()).or_insert(vec![])).push(v.to_string());\n",
" }\n",
" }\n",
" osmpbfreader::OsmObj::Way(way) => {\n",
" for (k, v) in way.tags.iter() {\n",
" (*way_tags.entry(k.to_string()).or_insert(vec![])).push(v.to_string());\n",
" }\n",
" }\n",
" osmpbfreader::OsmObj::Relation(rel) => {\n",
" for (k, v) in rel.tags.iter() {\n",
" (*relation_tags.entry(k.to_string()).or_insert(vec![])).push(v.to_string());\n",
" }\n",
" }\n",
" }\n",
"};\n",
"\n",
"fn select_tags(tags: &TagMap, max_items: usize) -> Vec::<(String, usize)> {\n",
" let mut filtered_tags = tags.iter().filter_map(|(k, v)| {\n",
" if v.len() > max_items { Some((k.clone(), v.len())) } else { None }\n",
" }).collect::<Vec::<(String, usize)>>();\n",
" filtered_tags.sort_by(|(_k1, v1), (_k2, v2)| v2.cmp(v1));\n",
" filtered_tags\n",
"}\n",
"\n",
"let max_items = 3000;\n",
"\n",
"println!(\"--- node tags ---\");\n",
"println!(\"{:?}\", select_tags(&node_tags, max_items));\n",
"println!(\"\");\n",
"\n",
"println!(\"--- way tags ---\");\n",
"println!(\"{:?}\", select_tags(&way_tags, max_items));\n",
"println!(\"\");\n",
"\n",
"println!(\"--- relation tags ---\");\n",
"println!(\"{:?}\", select_tags(&relation_tags, max_items));\n",
"println!(\"\");"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "17d29cd6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--- values for highway node ---\n",
"[(\"traffic_signals\", 58610), (\"crossing\", 56701), (\"bus_stop\", 53173), (\"street_lamp\", 6934), (\"stop\", 5880), (\"turning_circle\", 1487), (\"motorway_junction\", 1241), (\"elevator\", 734), (\"traffic_mirror\", 446)]\n",
"\n",
"--- values for highway way ---\n",
"[(\"residential\", 597695), (\"unclassified\", 438707), (\"service\", 211148), (\"footway\", 178498), (\"track\", 107295), (\"path\", 80424), (\"tertiary\", 79179), (\"steps\", 33224), (\"primary\", 19633), (\"trunk\", 17413), (\"secondary\", 16548), (\"pedestrian\", 11553), (\"motorway\", 6665), (\"motorway_link\", 5952), (\"living_street\", 5215), (\"cycleway\", 5151), (\"trunk_link\", 2254), (\"primary_link\", 1307), (\"construction\", 1267), (\"tertiary_link\", 906), (\"secondary_link\", 581), (\"road\", 528), (\"raceway\", 246), (\"services\", 223), (\"corridor\", 157), (\"elevator\", 144), (\"platform\", 110)]\n",
"\n"
]
}
],
"source": [
"fn select_values(values: &Vec<String>, max_values: usize) -> Vec::<(String, usize)> {\n",
" let mut count_by_value = HashMap::<String, usize>::new();\n",
" for v in values.iter() {\n",
" (*count_by_value.entry(v.to_string()).or_insert(0)) += 1;\n",
" };\n",
" \n",
" let mut filtered_count_by_value = count_by_value.iter().filter_map(|(k, v)| {\n",
" if *v > max_values { Some((k.clone(), *v)) } else { None }\n",
" }).collect::<Vec::<(String, usize)>>();\n",
" filtered_count_by_value.sort_by(|(_k1, v1), (_k2, v2)| v2.cmp(v1));\n",
" filtered_count_by_value\n",
"}\n",
"\n",
"let max_values = 100;\n",
"\n",
"println!(\"--- values for highway node ---\");\n",
"println!(\"{:?}\", select_values(&node_tags[\"highway\"], max_values));\n",
"println!(\"\");\n",
"\n",
"println!(\"--- values for highway way ---\");\n",
"println!(\"{:?}\", select_values(&way_tags[\"highway\"], max_values));\n",
"println!(\"\");"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Rust",
"language": "rust",
"name": "rust"
},
"language_info": {
"codemirror_mode": "rust",
"file_extension": ".rs",
"mimetype": "text/rust",
"name": "Rust",
"pygment_lexer": "rust",
"version": ""
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment