Skip to content

Instantly share code, notes, and snippets.

@remi-dupre
Last active July 15, 2020 11:47
Show Gist options
  • Save remi-dupre/94761202426a8fe46656a5d61c56ef5b to your computer and use it in GitHub Desktop.
Save remi-dupre/94761202426a8fe46656a5d61c56ef5b to your computer and use it in GitHub Desktop.
from collections import Counter
def load_pg_request(path):
return {
osm_id: (key, subclass)
for osm_id, key, subclass in map(
lambda line: map(str.strip, line.split("|")), open(path)
)
}
before = load_pg_request("classes_before_pr.txt")
after = load_pg_request("classes_after_pr.txt")
stats = Counter(
f"{before[x]} -> {after[x]}"
for x in before.keys() & after.keys()
if before[x] != after[x]
)
for x, count in sorted(stats.items(), key=lambda x: x[1], reverse=True)[:50]:
print(f" - {count:>4}x {x}")
import json
CFG = {
"amenity": {
"before_whitelist": {
"arts_centre",
"bank",
"bar",
"bbq",
"bicycle_parking",
"bicycle_rental",
"biergarten",
"bus_station",
"cafe",
"cinema",
"clinic",
"college",
"community_centre",
"courthouse",
"dentist",
"doctors",
"embassy",
"fast_food",
"ferry_terminal",
"fire_station",
"food_court",
"fuel",
"grave_yard",
"hospital",
"ice_cream",
"kindergarten",
"library",
"marketplace",
"motorcycle_parking",
"nightclub",
"nursing_home",
"parking",
"pharmacy",
"place_of_worship",
"police",
"post_box",
"post_office",
"prison",
"pub",
"public_building",
"recycling",
"restaurant",
"school",
"shelter",
"taxi",
"telephone",
"theatre",
"toilets",
"townhall",
"university",
"veterinary",
"waste_basket",
},
"after_blacklist": {
"bench",
"parking_space",
"drinking_water",
"vending_machine",
"water_point",
"parking_entrance",
},
},
"craft": {
"before_whitelist": {
"carpenter",
"shoemaker",
"winery",
"tailor",
"photographer",
"electrician",
"metal_construction",
"brewery",
"plumber",
"sawmill",
"electronics_repair",
"caterer",
"hvac",
"confectionery",
"window_construction",
"dressmaker",
"handicraft",
"gardener",
"stonemason",
"painter",
"glaziery",
"beekeeper",
"key_cutter",
"blacksmith",
"roofer",
"upholsterer",
"pottery",
"builder",
"jeweller",
"joiner",
"photographic_laboratory",
"locksmith",
"distillery",
"tiler",
"clockmaker",
"watchmaker",
"agricultural_engines",
"signmaker",
"optician",
"tinsmith",
"sculptor",
"grinding_mill",
"scaffolder",
"boatbuilder",
"floorer",
"bookbinder",
"bakery",
"plasterer",
"printer",
"carpet_layer",
"saddler",
"cabinet_maker",
"welder",
"print_shop",
"insulation",
"dental_technician",
"chimney_sweeper",
"sun_protection",
"parquet_layer",
"printmaker",
"car_repair",
"sailmaker",
"basket_maker",
"musical_instrument",
"toolmaker",
"atelier",
"engraver",
"goldsmith",
"oil_mill",
"turner",
"luthier",
"organ_builder",
"leather",
"rigger",
"car_painter",
"embroiderer",
"sewing",
"restoration",
"stand_builder",
"paver",
"sweep",
"door_construction",
"glassblower",
"information_electronics",
"water_well_drilling",
"piano_tuner",
"carpet_cleaner",
"cooper",
"mint",
"lacquerer",
"paperhanger",
"bag_repair",
}
},
"leisure": {
"before_whitelist": {
"dog_park",
"escape_game",
"garden",
"golf_course",
"ice_rink",
"hackerspace",
"marina",
"miniature_golf",
"park",
"pitch",
"playground",
"sports_centre",
"stadium",
"swimming_area",
"water_park",
},
"after_blacklist": {"picnic_table", "track", "common", "swimming_pool"},
},
"shop": {
"before_whitelist": {
"accessories",
"alcohol",
"antiques",
"art",
"bag",
"bakery",
"beauty",
"bed",
"beverages",
"bicycle",
"books",
"boutique",
"butcher",
"camera",
"car",
"car_repair",
"carpet",
"charity",
"chemist",
"chocolate",
"clothes",
"coffee",
"computer",
"confectionery",
"convenience",
"copyshop",
"cosmetics",
"deli",
"delicatessen",
"department_store",
"doityourself",
"dry_cleaning",
"electronics",
"erotic",
"fabric",
"farm",
"florist",
"frozen_food",
"furniture",
"garden_centre",
"general",
"gift",
"greengrocer",
"hairdresser",
"hardware",
"hearing_aids",
"hifi",
"ice_cream",
"interior_decoration",
"jewelry",
"kiosk",
"lamps",
"laundry",
"mall",
"massage",
"mobile_phone",
"motorcycle",
"music",
"musical_instrument",
"newsagent",
"optician",
"outdoor",
"perfume",
"perfumery",
"pet",
"photo",
"second_hand",
"shoes",
"sports",
"stationery",
"supermarket",
"tailor",
"tattoo",
"ticket",
"tobacco",
"toys",
"travel_agency",
"video",
"video_games",
"watches",
"weapons",
"wholesale",
"wine",
}
},
"sport": {
"before_whitelist": {
"american_football",
"archery",
"athletics",
"australian_football",
"badminton",
"baseball",
"basketball",
"beachvolleyball",
"billiards",
"bmx",
"boules",
"bowls",
"boxing",
"canadian_football",
"canoe",
"chess",
"climbing",
"climbing_adventure",
"cricket",
"cricket_nets",
"croquet",
"curling",
"cycling",
"disc_golf",
"diving",
"dog_racing",
"equestrian",
"fatsal",
"field_hockey",
"free_flying",
"gaelic_games",
"golf",
"gymnastics",
"handball",
"hockey",
"horse_racing",
"horseshoes",
"ice_hockey",
"ice_stock",
"judo",
"karting",
"korfball",
"long_jump",
"model_aerodrome",
"motocross",
"motor",
"multi",
"netball",
"orienteering",
"paddle_tennis",
"paintball",
"paragliding",
"pelota",
"racquet",
"rc_car",
"rowing",
"rugby",
"rugby_league",
"rugby_union",
"running",
"sailing",
"scuba_diving",
"shooting",
"shooting_range",
"skateboard",
"skating",
"skiing",
"soccer",
"surfing",
"swimming",
"table_soccer",
"table_tennis",
"team_handball",
"tennis",
"toboggan",
"volleyball",
"water_ski",
"yoga",
}
},
"tourism": {
"before_whitelist": {
"alpine_hut",
"aquarium",
"artwork",
"attraction",
"bed_and_breakfast",
"camp_site",
"caravan_site",
"chalet",
"gallery",
"guest_house",
"hostel",
"hotel",
"information",
"motel",
"museum",
"picnic_site",
"theme_park",
"viewpoint",
"zoo",
}
},
}
for kind in CFG:
data = json.load(open(kind + ".json"))["data"]
before_whitelist = CFG[kind].get("before_whitelist", set())
after_blacklist = CFG[kind].get("after_blacklist", set()).union({"yes", "no"})
total_count = sum(x["count"] for x in data if x["value"] not in after_blacklist)
before_count = sum(x["count"] for x in data if x["value"] in before_whitelist)
new_cats = {x["value"] for x in data} - before_whitelist - after_blacklist
print("\n####", kind, "\n")
print(f" - new total: {total_count}")
print(f" - previously: {before_count} ({100*before_count/total_count:.0f}%)")
print(" - top 10 new imports:")
for count, val in sorted(
(x["count"], x["value"]) for x in data if x["value"] in new_cats
)[::-1][:10]:
print(f" * {val}: {count} ({100*count/total_count:.2f}%)")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment