Created
March 5, 2012 08:36
-
-
Save emj/1977484 to your computer and use it in GitHub Desktop.
Openstreetmap changeset statistics of source tags.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# wget http://planet.openstreetmap.org/changesets-120229.osm.bz2 | |
# tail -c 1000000 changesets-120229.osm.bz2 >tail.bz2 | |
# bzip2recover tail.bz2 | |
# bzcat rec000* |perl id+source.pl >all_sources.txt | |
$id=0; | |
$source=0; | |
while(<>){ | |
if(/<changeset.* id="([^"]*)"/){ | |
$id=$1; | |
$changesets++; | |
} | |
if(/tag k="source" v="([^"]*)/){ | |
$source++; | |
print "$id: $1\n" | |
} | |
if ( $changesets % 10000 == 1) | |
{ | |
print STDERR "$changesets/$source: ". $source/$changesets."\n"; | |
} | |
} | |
print STDERR "DONE\n"; | |
print STDERR "$changesets/$source: ". $source/$changesets."\n"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Latest means "changesets-120229.osm.bz2" | |
# wget http://planet.openstreetmap.org/changesets-latest.osm.bz2 | |
# tail -c 1000000 changesets-latest.osm.bz2 >tail.bz2 | |
# bzip2recover tail.bz2 | |
# bzcat rec000* |perl id+source.pl >all_sources.txt; rm rec000* | |
# bzcat changesets-latest.osm.bz2 |perl id+source.pl >all_sources_2.txt | |
$id=0; | |
$source=0; | |
while(<>){ | |
if(/<changeset.* id="([^"]*).* num_changes="([^"]*)/){ | |
$id=$1; | |
$num_changes=$2; | |
$total_changes+=$num_changes; | |
$changesets++; | |
} | |
if(/tag k="source" v="([^"]*)/){ | |
$source++; | |
print "$id: $1: $num_changes\n"; | |
$source_changes+=$num_changes; | |
} | |
if ( $changesets % 10000 == 1) | |
{ | |
stats(); | |
} | |
} | |
print STDERR "DONE\n"; | |
stats; | |
sub stats{ | |
print STDERR "Changets: $source/$changesets: ". $source/$changesets." Number of changes: $source_changes/$total_changes:". $source_changes/$total_changes . "\n"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment