Skip to content

Instantly share code, notes, and snippets.

@tmcw
Created November 5, 2013 16:26
Show Gist options
  • Save tmcw/7321691 to your computer and use it in GitHub Desktop.
Save tmcw/7321691 to your computer and use it in GitHub Desktop.

name-extract

A libosmium-based utility for extracting all names from OSM.

build

make

run

./name_count mexico.pbf
var fs = require('fs'),
split = require('split');
var total = 0;
fs.createReadStream('collisions.json')
.pipe(split())
.on('data', function(d) {
if (d) {
var _ = JSON.parse(d);
total += _.terms.length;
}
})
.on('end', function() {
console.log(total);
});
// Converts text into a token ID.
// This is a 28 bit FNV1a with room for 4 bits of room for bonus data.
// This bonus data is currently used by degenerate token mappings to specify
// the character distance of degenerates from original tokens.
// FNV-1a hash.
// For 32-bit: offset = 2166136261, prime = 16777619.
module.exports = function fnv1a(str) {
var hash = 0x811C9DC5;
if (str.length) for (var i = 0; i < str.length; i++) {
hash = hash ^ str.charCodeAt(i);
// 2**24 + 2**8 + 0x93 = 16777619
hash += (hash << 24) + (hash << 8) + (hash << 7) + (hash << 4) + (hash << 1);
}
return hash >>> 0;
};
#------------------------------------------------------------------------------
#
# Makefile for Osmium examples
#
#------------------------------------------------------------------------------
#
# You can set several environment variables before running make if you don't
# like the defaults:
#
# CXX - Your C++ compiler.
# CPLUS_INCLUDE_PATH - Include file search path.
# CXXFLAGS - Extra compiler flags.
# LDFLAGS - Extra linker flags.
#
#------------------------------------------------------------------------------
CXXFLAGS += -O3
#CXXFLAGS += -g
CXXFLAGS += -std=c++11 -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
CXXFLAGS += -I../include
OS:=$(shell uname -s)
ifeq ($(OS),Darwin)
CXXFLAGS += -stdlib=libc++
LDFLAGS += -stdlib=libc++
endif
CXXFLAGS_GEOS := $(shell geos-config --cflags)
CXXFLAGS_LIBXML2 := $(shell xml2-config --cflags)
CXXFLAGS_OGR := $(shell gdal-config --cflags)
CXXFLAGS_WARNINGS := -Wall -Wextra -pedantic -Wredundant-decls -Wdisabled-optimization -Wctor-dtor-privacy -Wnon-virtual-dtor -Woverloaded-virtual -Wsign-promo -Wold-style-cast
LIB_EXPAT := -lexpat
LIB_PBF := -pthread -lz -lprotobuf-lite -losmpbf
LIB_GD := -lgd -lz -lm
LIB_GEOS := $(shell geos-config --libs)
LIB_OGR := $(shell gdal-config --libs)
LIB_SHAPE := -lshp $(LIB_GEOS)
LIB_XML2 := $(shell xml2-config --libs)
# this links libboost_program_options statically, because for some reason
# it always wants the exact version which might not be available on a
# different host
LIB_PRGOPT := -Wl,-Bstatic -lboost_program_options -Wl,-Bdynamic
PROGRAMS := \
name_count \
.PHONY: all clean
all: $(PROGRAMS)
name_count: name_count.cpp
$(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) $(CXXFLAGS_LIBXML2) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF) $(LIB_XML2)
# osmium_debug: osmium_debug.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF)
#
# osmium_index: osmium_index.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) -o $@ $< $(LDFLAGS) $(LIB_PRGOPT)
#
# osmium_read: osmium_read.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF)
#
# osmium_find_bbox: osmium_find_bbox.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF)
#
# osmium_mpdump: osmium_mpdump.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) $(CXXFLAGS_GEOS) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF) $(LIB_GEOS)
#
# osmium_progress: osmium_progress.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF)
#
# osmium_range_from_history: osmium_range_from_history.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) $(CXXFLAGS_LIBXML2) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF) $(LIB_XML2)
#
# osmium_relation_members: osmium_relation_members.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF)
#
# osmium_sizeof: osmium_sizeof.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF)
#
# osmium_serdebug: osmium_serdebug.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) -o $@ $< $(LDFLAGS)
#
# osmium_serdump: osmium_serdump.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF)
#
# osmium_serget: osmium_serget.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) -o $@ $< $(LDFLAGS)
#
# osmium_store_and_debug: osmium_store_and_debug.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF)
#
# osmium_time: osmium_time.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF)
#
# osmium_toogr: osmium_toogr.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) $(CXXFLAGS_OGR) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF) $(LIB_OGR)
#
# osmium_toogr2: osmium_toogr2.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) $(CXXFLAGS_OGR) $(CXXFLAGS_GEOS) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF) $(LIB_OGR) $(LIB_GEOS)
#
# osmium_to_postgis: osmium_to_postgis.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) $(CXXFLAGS_OGR) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF) $(LIB_OGR)
#
# osmium_toshape: osmium_toshape.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) $(CXXFLAGS_GEOS) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF) $(LIB_SHAPE)
#
# nodedensity: nodedensity.cpp
# $(CXX) $(CXXFLAGS) $(CXXFLAGS_WARNINGS) -o $@ $< $(LDFLAGS) $(LIB_EXPAT) $(LIB_PBF) $(LIB_GD)
clean:
rm -f *.o core $(PROGRAMS)
#include <iostream>
#include <getopt.h>
#include <osmium/io/any_input.hpp>
#include <osmium/io/any_output.hpp>
#include <sstream>
void print_help() {
std::cout << "name_count INFILE\n\n" \
<< "Outputs a name frequency table.";
}
class MyCountHandler : public osmium::handler::Handler<MyCountHandler> {
public:
MyCountHandler(const std::string& fieldname) {
}
void way(const osmium::Way& way) {
const char* highway = way.tags().get_value_by_key("highway");
const char* name = way.tags().get_value_by_key("name");
if (highway && name) {
std::cout << std::string(name) << std::endl;
}
}
};
int main(int argc, char* argv[]) {
std::string input_filename;
std::string output_filename("ogr_out");
int remaining_args = argc - optind;
if (remaining_args == 1) {
input_filename = argv[optind];
} else {
input_filename = "-";
}
osmium::io::Reader reader(input_filename);
osmium::io::Header header = reader.open();
MyCountHandler count_handler("name");
reader.apply(count_handler);
google::protobuf::ShutdownProtobufLibrary();
}
{
"name": "name-extract",
"version": "0.0.0",
"description": "A libosmium-based utility for extracting all names from OSM.",
"main": "fnv.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"repository": {
"type": "git",
"url": "git://github.com/osmlab/name-count.git"
},
"author": "",
"license": "BSD-2-Clause",
"bugs": {
"url": "https://github.com/osmlab/name-count/issues"
},
"dependencies": {
"split": "~0.2.10"
}
}
var fs = require('fs'),
split = require('split'),
fnv = require('./fnv');
var idx = {},
collisions = {};
var out = fs.createWriteStream('collisions.json');
var i = 0;
fs.createReadStream('all_names_u')
.pipe(split())
.on('data', function(d) {
var hash = fnv(d);
if (idx[hash] === undefined) {
idx[hash] = [];
idx[hash].push(d);
} else {
if (collisions[hash] === undefined) {
collisions[hash] = [];
collisions[hash].push(idx[hash][0]);
}
idx[hash].push(d);
collisions[hash].push(d);
}
})
.on('end', function() {
for (var i in collisions) {
out.write(JSON.stringify({
hash: i,
terms: collisions[i]
}) + '\n');
}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment