Skip to content

Instantly share code, notes, and snippets.

@JettScythe
Created March 13, 2023 20:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JettScythe/5959b5b159386cfb330cf62b4cbda842 to your computer and use it in GitHub Desktop.
Save JettScythe/5959b5b159386cfb330cf62b4cbda842 to your computer and use it in GitHub Desktop.
from phonenumbers import geocoder, carrier, number_type, parse, is_valid_number
import argparse
import json
import csv
from phone_gen import PhoneNumber
from rapidfuzz import process, fuzz
type_map = {0: "Fixed Line", 1: "Mobile", 2: "Fixed Line or Mobile", 3: "Toll Free", 4: "Premium Rate",
5: "Shared Cost", 6: "VOIP", 7: "Personal Number", 8: "Pager",
9: "Universal Access Number / Company Number", 10: "Voicemail", 99: "Unknown"}
parser = argparse.ArgumentParser(description='Generate phone numbers for a country and save them to a CSV file.')
parser.add_argument('country_code', type=str, help='The country code for the phone numbers to be generated. eg: US, '
'CA, IN')
parser.add_argument('num_of_phones', type=int, help='The amount of numbers you would like to generate')
args = parser.parse_args()
with open('pincodes.json', 'r') as pincode_file:
pincode_map = json.load(pincode_file)
with open('pincode_district_map.json', 'r') as pincode_districts_file:
pincode_district_map = json.load(pincode_districts_file)
def create_final_dict(exact_matches):
final_dict = {}
for exact_match in exact_matches:
final_dict[exact_match["Level"].lower()] = exact_match["Name"]
if exact_match["TRU"] == "Rural":
final_dict[f"{exact_match['Name']}_rural_pop"] = exact_match["TOT_P"]
if exact_match["TRU"] == "Urban":
final_dict[f"{exact_match['Name']}_urban_pop"] = exact_match["TOT_P"]
if exact_match["TRU"] == "Total":
final_dict[f"{exact_match['Name']}_total_pop"] = exact_match["TOT_P"]
return final_dict
def get_needed_maps_data(assumed_district: str, search_term: str):
with open("merged.csv") as maps_data:
reader = csv.DictReader(maps_data)
best_matches = []
exact_city_matches = []
exact_subdistrict_matches = []
exact_district_matches = []
for row in reader:
# find the row where district name matches
if row["Level"] == "DISTRICT" and row["Name"].lower() == assumed_district:
# find most similar name until row["Level"] == "DISTRICT" again
while True:
try:
next_row = next(reader)
except StopIteration:
break
if next_row["Name"].lower() == search_term.lower() and next_row["Level"] != "DISTRICT":
exact_city_matches.append(next_row)
elif next_row["Name"].lower() == assumed_district and next_row["Level"] == "DISTRICT":
exact_district_matches.append(next_row)
elif next_row["Name"].lower() == assumed_district and next_row["Level"] == "SUB-DISTRICT":
exact_subdistrict_matches.append(next_row)
else:
name_similarity = fuzz.token_sort_ratio(search_term, next_row["Name"])
if not best_matches or name_similarity > best_matches[0]["name_similarity"]:
best_matches = [{"row": next_row, "name_similarity": name_similarity}]
elif name_similarity == best_matches[0]["name_similarity"]:
best_matches.append({"row": next_row, "name_similarity": name_similarity})
merged_list = exact_city_matches + exact_district_matches + exact_subdistrict_matches
return create_final_dict(merged_list)
def generate_mobile_numbers():
unique_phone_numbers = set()
while len(unique_phone_numbers) < args.num_of_phones:
phone_number = PhoneNumber(args.country_code).get_number()
if is_valid_number(parse(phone_number)):
unique_phone_numbers.add(phone_number)
return unique_phone_numbers
with open(f'phone_numbers_{args.country_code}_{args.num_of_phones}.csv',
mode='w') as file:
writer = csv.writer(file)
writer.writerow(
['Phone Number', 'Type', 'Parsed Carrier', 'Parsed City',
'Parsed Region', 'Pincode', 'District From Pincode', 'Sub-District From Pincode',
'Village From Pincode', 'Village Total Population', 'Village Rural Population', 'Village Urban Population',
'Sub-District Total Population', 'Sub-District Rural Population', 'Sub-District Urban Population',
'District Total Population', 'District Rural Population', 'District Urban Population']
)
for phone_num in generate_mobile_numbers():
parsed_phone_number = parse(phone_num)
parsed_region = geocoder.description_for_number(parsed_phone_number, "en")
parsed_city = ""
pincode = ""
if "," in parsed_region:
parsed_city, parsed_region = parsed_region.split(", ")
choices = pincode_map.keys()
if parsed_city in choices:
pincode = pincode_map[parsed_city]
for row in pincode_district_map:
if row.get("Pincode") == int(pincode):
district_name = row["Districtname"].lower()
mapped_data_row = get_needed_maps_data(district_name, parsed_city)
else:
results = process.extractOne(parsed_city, choices, scorer=fuzz.WRatio)
assumed_city = results[0]
pincode = pincode_map[assumed_city]
for row in pincode_district_map:
if row.get("Pincode") == int(pincode):
district_name = row["Districtname"].lower()
mapped_data_row = get_needed_maps_data(district_name, assumed_city)
parsed_carrier = carrier.name_for_number(parsed_phone_number, "en", region=args.country_code)
district = mapped_data_row.get("district")
subdistrict = mapped_data_row.get("sub-district")
village = mapped_data_row.get("village")
phone_number_type = type_map[number_type(parsed_phone_number)]
if parsed_region == "India":
row = [phone_num, phone_number_type, parsed_carrier, parsed_city, parsed_region, pincode, "", "", "", "",
"", "", "", "", "", 1416459205, 909384771, 498179071]
else:
row = [phone_num, phone_number_type, parsed_carrier, parsed_city,
parsed_region, pincode, district, subdistrict, village, mapped_data_row.get(f"{village}_total_pop"),
mapped_data_row.get(f"{village}_rural_pop"), mapped_data_row.get(f"{village}_urban_pop"),
mapped_data_row.get(f"{subdistrict}_total_pop"), mapped_data_row.get(f"{subdistrict}_rural_pop"),
mapped_data_row.get(f"{subdistrict}_urban_pop"), mapped_data_row.get(f"{district}_total_pop"),
mapped_data_row.get(f"{district}_rural_pop"), mapped_data_row.get(f"{district}_urban_pop")]
writer.writerow(row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment