remi-dupre/class_changes.py

## class_changes.py
from collections import Counter


def load_pg_request(path):
    return {
        osm_id: (key, subclass)
        for osm_id, key, subclass in map(
            lambda line: map(str.strip, line.split("|")), open(path)
        )
    }


before = load_pg_request("classes_before_pr.txt")
after = load_pg_request("classes_after_pr.txt")
stats = Counter(
    f"{before[x]} -> {after[x]}"
    for x in before.keys() & after.keys()
    if before[x] != after[x]
)

for x, count in sorted(stats.items(), key=lambda x: x[1], reverse=True)[:50]:
    print(f" - {count:>4}x {x}")

## stats.py
import json

CFG = {
    "amenity": {
        "before_whitelist": {
            "arts_centre",
            "bank",
            "bar",
            "bbq",
            "bicycle_parking",
            "bicycle_rental",
            "biergarten",
            "bus_station",
            "cafe",
            "cinema",
            "clinic",
            "college",
            "community_centre",
            "courthouse",
            "dentist",
            "doctors",
            "embassy",
            "fast_food",
            "ferry_terminal",
            "fire_station",
            "food_court",
            "fuel",
            "grave_yard",
            "hospital",
            "ice_cream",
            "kindergarten",
            "library",
            "marketplace",
            "motorcycle_parking",
            "nightclub",
            "nursing_home",
            "parking",
            "pharmacy",
            "place_of_worship",
            "police",
            "post_box",
            "post_office",
            "prison",
            "pub",
            "public_building",
            "recycling",
            "restaurant",
            "school",
            "shelter",
            "taxi",
            "telephone",
            "theatre",
            "toilets",
            "townhall",
            "university",
            "veterinary",
            "waste_basket",
        },
        "after_blacklist": {
            "bench",
            "parking_space",
            "drinking_water",
            "vending_machine",
            "water_point",
            "parking_entrance",
        },
    },
    "craft": {
        "before_whitelist": {
            "carpenter",
            "shoemaker",
            "winery",
            "tailor",
            "photographer",
            "electrician",
            "metal_construction",
            "brewery",
            "plumber",
            "sawmill",
            "electronics_repair",
            "caterer",
            "hvac",
            "confectionery",
            "window_construction",
            "dressmaker",
            "handicraft",
            "gardener",
            "stonemason",
            "painter",
            "glaziery",
            "beekeeper",
            "key_cutter",
            "blacksmith",
            "roofer",
            "upholsterer",
            "pottery",
            "builder",
            "jeweller",
            "joiner",
            "photographic_laboratory",
            "locksmith",
            "distillery",
            "tiler",
            "clockmaker",
            "watchmaker",
            "agricultural_engines",
            "signmaker",
            "optician",
            "tinsmith",
            "sculptor",
            "grinding_mill",
            "scaffolder",
            "boatbuilder",
            "floorer",
            "bookbinder",
            "bakery",
            "plasterer",
            "printer",
            "carpet_layer",
            "saddler",
            "cabinet_maker",
            "welder",
            "print_shop",
            "insulation",
            "dental_technician",
            "chimney_sweeper",
            "sun_protection",
            "parquet_layer",
            "printmaker",
            "car_repair",
            "sailmaker",
            "basket_maker",
            "musical_instrument",
            "toolmaker",
            "atelier",
            "engraver",
            "goldsmith",
            "oil_mill",
            "turner",
            "luthier",
            "organ_builder",
            "leather",
            "rigger",
            "car_painter",
            "embroiderer",
            "sewing",
            "restoration",
            "stand_builder",
            "paver",
            "sweep",
            "door_construction",
            "glassblower",
            "information_electronics",
            "water_well_drilling",
            "piano_tuner",
            "carpet_cleaner",
            "cooper",
            "mint",
            "lacquerer",
            "paperhanger",
            "bag_repair",
        }
    },
    "leisure": {
        "before_whitelist": {
            "dog_park",
            "escape_game",
            "garden",
            "golf_course",
            "ice_rink",
            "hackerspace",
            "marina",
            "miniature_golf",
            "park",
            "pitch",
            "playground",
            "sports_centre",
            "stadium",
            "swimming_area",
            "water_park",
        },
        "after_blacklist": {"picnic_table", "track", "common", "swimming_pool"},
    },
    "shop": {
        "before_whitelist": {
            "accessories",
            "alcohol",
            "antiques",
            "art",
            "bag",
            "bakery",
            "beauty",
            "bed",
            "beverages",
            "bicycle",
            "books",
            "boutique",
            "butcher",
            "camera",
            "car",
            "car_repair",
            "carpet",
            "charity",
            "chemist",
            "chocolate",
            "clothes",
            "coffee",
            "computer",
            "confectionery",
            "convenience",
            "copyshop",
            "cosmetics",
            "deli",
            "delicatessen",
            "department_store",
            "doityourself",
            "dry_cleaning",
            "electronics",
            "erotic",
            "fabric",
            "farm",
            "florist",
            "frozen_food",
            "furniture",
            "garden_centre",
            "general",
            "gift",
            "greengrocer",
            "hairdresser",
            "hardware",
            "hearing_aids",
            "hifi",
            "ice_cream",
            "interior_decoration",
            "jewelry",
            "kiosk",
            "lamps",
            "laundry",
            "mall",
            "massage",
            "mobile_phone",
            "motorcycle",
            "music",
            "musical_instrument",
            "newsagent",
            "optician",
            "outdoor",
            "perfume",
            "perfumery",
            "pet",
            "photo",
            "second_hand",
            "shoes",
            "sports",
            "stationery",
            "supermarket",
            "tailor",
            "tattoo",
            "ticket",
            "tobacco",
            "toys",
            "travel_agency",
            "video",
            "video_games",
            "watches",
            "weapons",
            "wholesale",
            "wine",
        }
    },
    "sport": {
        "before_whitelist": {
            "american_football",
            "archery",
            "athletics",
            "australian_football",
            "badminton",
            "baseball",
            "basketball",
            "beachvolleyball",
            "billiards",
            "bmx",
            "boules",
            "bowls",
            "boxing",
            "canadian_football",
            "canoe",
            "chess",
            "climbing",
            "climbing_adventure",
            "cricket",
            "cricket_nets",
            "croquet",
            "curling",
            "cycling",
            "disc_golf",
            "diving",
            "dog_racing",
            "equestrian",
            "fatsal",
            "field_hockey",
            "free_flying",
            "gaelic_games",
            "golf",
            "gymnastics",
            "handball",
            "hockey",
            "horse_racing",
            "horseshoes",
            "ice_hockey",
            "ice_stock",
            "judo",
            "karting",
            "korfball",
            "long_jump",
            "model_aerodrome",
            "motocross",
            "motor",
            "multi",
            "netball",
            "orienteering",
            "paddle_tennis",
            "paintball",
            "paragliding",
            "pelota",
            "racquet",
            "rc_car",
            "rowing",
            "rugby",
            "rugby_league",
            "rugby_union",
            "running",
            "sailing",
            "scuba_diving",
            "shooting",
            "shooting_range",
            "skateboard",
            "skating",
            "skiing",
            "soccer",
            "surfing",
            "swimming",
            "table_soccer",
            "table_tennis",
            "team_handball",
            "tennis",
            "toboggan",
            "volleyball",
            "water_ski",
            "yoga",
        }
    },
    "tourism": {
        "before_whitelist": {
            "alpine_hut",
            "aquarium",
            "artwork",
            "attraction",
            "bed_and_breakfast",
            "camp_site",
            "caravan_site",
            "chalet",
            "gallery",
            "guest_house",
            "hostel",
            "hotel",
            "information",
            "motel",
            "museum",
            "picnic_site",
            "theme_park",
            "viewpoint",
            "zoo",
        }
    },
}

for kind in CFG:
    data = json.load(open(kind + ".json"))["data"]
    before_whitelist = CFG[kind].get("before_whitelist", set())
    after_blacklist = CFG[kind].get("after_blacklist", set()).union({"yes", "no"})

    total_count = sum(x["count"] for x in data if x["value"] not in after_blacklist)
    before_count = sum(x["count"] for x in data if x["value"] in before_whitelist)
    new_cats = {x["value"] for x in data} - before_whitelist - after_blacklist

    print("\n####", kind, "\n")
    print(f" - new total: {total_count}")
    print(f" - previously: {before_count} ({100*before_count/total_count:.0f}%)")
    print(" - top 10 new imports:")

    for count, val in sorted(
        (x["count"], x["value"]) for x in data if x["value"] in new_cats
    )[::-1][:10]:
        print(f"   * {val}: {count} ({100*count/total_count:.2f}%)")
	from collections import Counter


	def load_pg_request(path):
	return {
	osm_id: (key, subclass)
	for osm_id, key, subclass in map(
	lambda line: map(str.strip, line.split("\|")), open(path)
	)
	}


	before = load_pg_request("classes_before_pr.txt")
	after = load_pg_request("classes_after_pr.txt")
	stats = Counter(
	f"{before[x]} -> {after[x]}"
	for x in before.keys() & after.keys()
	if before[x] != after[x]
	)

	for x, count in sorted(stats.items(), key=lambda x: x[1], reverse=True)[:50]:
	print(f" - {count:>4}x {x}")
	import json

	CFG = {
	"amenity": {
	"before_whitelist": {
	"arts_centre",
	"bank",
	"bar",
	"bbq",
	"bicycle_parking",
	"bicycle_rental",
	"biergarten",
	"bus_station",
	"cafe",
	"cinema",
	"clinic",
	"college",
	"community_centre",
	"courthouse",
	"dentist",
	"doctors",
	"embassy",
	"fast_food",
	"ferry_terminal",
	"fire_station",
	"food_court",
	"fuel",
	"grave_yard",
	"hospital",
	"ice_cream",
	"kindergarten",
	"library",
	"marketplace",
	"motorcycle_parking",
	"nightclub",
	"nursing_home",
	"parking",
	"pharmacy",
	"place_of_worship",
	"police",
	"post_box",
	"post_office",
	"prison",
	"pub",
	"public_building",
	"recycling",
	"restaurant",
	"school",
	"shelter",
	"taxi",
	"telephone",
	"theatre",
	"toilets",
	"townhall",
	"university",
	"veterinary",
	"waste_basket",
	},
	"after_blacklist": {
	"bench",
	"parking_space",
	"drinking_water",
	"vending_machine",
	"water_point",
	"parking_entrance",
	},
	},
	"craft": {
	"before_whitelist": {
	"carpenter",
	"shoemaker",
	"winery",
	"tailor",
	"photographer",
	"electrician",
	"metal_construction",
	"brewery",
	"plumber",
	"sawmill",
	"electronics_repair",
	"caterer",
	"hvac",
	"confectionery",
	"window_construction",
	"dressmaker",
	"handicraft",
	"gardener",
	"stonemason",
	"painter",
	"glaziery",
	"beekeeper",
	"key_cutter",
	"blacksmith",
	"roofer",
	"upholsterer",
	"pottery",
	"builder",
	"jeweller",
	"joiner",
	"photographic_laboratory",
	"locksmith",
	"distillery",
	"tiler",
	"clockmaker",
	"watchmaker",
	"agricultural_engines",
	"signmaker",
	"optician",
	"tinsmith",
	"sculptor",
	"grinding_mill",
	"scaffolder",
	"boatbuilder",
	"floorer",
	"bookbinder",
	"bakery",
	"plasterer",
	"printer",
	"carpet_layer",
	"saddler",
	"cabinet_maker",
	"welder",
	"print_shop",
	"insulation",
	"dental_technician",
	"chimney_sweeper",
	"sun_protection",
	"parquet_layer",
	"printmaker",
	"car_repair",
	"sailmaker",
	"basket_maker",
	"musical_instrument",
	"toolmaker",
	"atelier",
	"engraver",
	"goldsmith",
	"oil_mill",
	"turner",
	"luthier",
	"organ_builder",
	"leather",
	"rigger",
	"car_painter",
	"embroiderer",
	"sewing",
	"restoration",
	"stand_builder",
	"paver",
	"sweep",
	"door_construction",
	"glassblower",
	"information_electronics",
	"water_well_drilling",
	"piano_tuner",
	"carpet_cleaner",
	"cooper",
	"mint",
	"lacquerer",
	"paperhanger",
	"bag_repair",
	}
	},
	"leisure": {
	"before_whitelist": {
	"dog_park",
	"escape_game",
	"garden",
	"golf_course",
	"ice_rink",
	"hackerspace",
	"marina",
	"miniature_golf",
	"park",
	"pitch",
	"playground",
	"sports_centre",
	"stadium",
	"swimming_area",
	"water_park",
	},
	"after_blacklist": {"picnic_table", "track", "common", "swimming_pool"},
	},
	"shop": {
	"before_whitelist": {
	"accessories",
	"alcohol",
	"antiques",
	"art",
	"bag",
	"bakery",
	"beauty",
	"bed",
	"beverages",
	"bicycle",
	"books",
	"boutique",
	"butcher",
	"camera",
	"car",
	"car_repair",
	"carpet",
	"charity",
	"chemist",
	"chocolate",
	"clothes",
	"coffee",
	"computer",
	"confectionery",
	"convenience",
	"copyshop",
	"cosmetics",
	"deli",
	"delicatessen",
	"department_store",
	"doityourself",
	"dry_cleaning",
	"electronics",
	"erotic",
	"fabric",
	"farm",
	"florist",
	"frozen_food",
	"furniture",
	"garden_centre",
	"general",
	"gift",
	"greengrocer",
	"hairdresser",
	"hardware",
	"hearing_aids",
	"hifi",
	"ice_cream",
	"interior_decoration",
	"jewelry",
	"kiosk",
	"lamps",
	"laundry",
	"mall",
	"massage",
	"mobile_phone",
	"motorcycle",
	"music",
	"musical_instrument",
	"newsagent",
	"optician",
	"outdoor",
	"perfume",
	"perfumery",
	"pet",
	"photo",
	"second_hand",
	"shoes",
	"sports",
	"stationery",
	"supermarket",
	"tailor",
	"tattoo",
	"ticket",
	"tobacco",
	"toys",
	"travel_agency",
	"video",
	"video_games",
	"watches",
	"weapons",
	"wholesale",
	"wine",
	}
	},
	"sport": {
	"before_whitelist": {
	"american_football",
	"archery",
	"athletics",
	"australian_football",
	"badminton",
	"baseball",
	"basketball",
	"beachvolleyball",
	"billiards",
	"bmx",
	"boules",
	"bowls",
	"boxing",
	"canadian_football",
	"canoe",
	"chess",
	"climbing",
	"climbing_adventure",
	"cricket",
	"cricket_nets",
	"croquet",
	"curling",
	"cycling",
	"disc_golf",
	"diving",
	"dog_racing",
	"equestrian",
	"fatsal",
	"field_hockey",
	"free_flying",
	"gaelic_games",
	"golf",
	"gymnastics",
	"handball",
	"hockey",
	"horse_racing",
	"horseshoes",
	"ice_hockey",
	"ice_stock",
	"judo",
	"karting",
	"korfball",
	"long_jump",
	"model_aerodrome",
	"motocross",
	"motor",
	"multi",
	"netball",
	"orienteering",
	"paddle_tennis",
	"paintball",
	"paragliding",
	"pelota",
	"racquet",
	"rc_car",
	"rowing",
	"rugby",
	"rugby_league",
	"rugby_union",
	"running",
	"sailing",
	"scuba_diving",
	"shooting",
	"shooting_range",
	"skateboard",
	"skating",
	"skiing",
	"soccer",
	"surfing",
	"swimming",
	"table_soccer",
	"table_tennis",
	"team_handball",
	"tennis",
	"toboggan",
	"volleyball",
	"water_ski",
	"yoga",
	}
	},
	"tourism": {
	"before_whitelist": {
	"alpine_hut",
	"aquarium",
	"artwork",
	"attraction",
	"bed_and_breakfast",
	"camp_site",
	"caravan_site",
	"chalet",
	"gallery",
	"guest_house",
	"hostel",
	"hotel",
	"information",
	"motel",
	"museum",
	"picnic_site",
	"theme_park",
	"viewpoint",
	"zoo",
	}
	},
	}

	for kind in CFG:
	data = json.load(open(kind + ".json"))["data"]
	before_whitelist = CFG[kind].get("before_whitelist", set())
	after_blacklist = CFG[kind].get("after_blacklist", set()).union({"yes", "no"})

	total_count = sum(x["count"] for x in data if x["value"] not in after_blacklist)
	before_count = sum(x["count"] for x in data if x["value"] in before_whitelist)
	new_cats = {x["value"] for x in data} - before_whitelist - after_blacklist

	print("\n####", kind, "\n")
	print(f" - new total: {total_count}")
	print(f" - previously: {before_count} ({100*before_count/total_count:.0f}%)")
	print(" - top 10 new imports:")

	for count, val in sorted(
	(x["count"], x["value"]) for x in data if x["value"] in new_cats
	)[::-1][:10]:
	print(f" * {val}: {count} ({100*count/total_count:.2f}%)")