Skip to content

Instantly share code, notes, and snippets.

@vladimirgamalyan
Last active June 5, 2017 09:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save vladimirgamalyan/f30264f0fbda5d6d9171 to your computer and use it in GitHub Desktop.
Save vladimirgamalyan/f30264f0fbda5d6d9171 to your computer and use it in GitHub Desktop.
grab mcc/mnc from wiki to json
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import json
import re
def add_operator(mcc, mnc, brand, operator, status, country, country_code, db):
assert re.match('^\d{3}$', mcc)
assert re.match('^\d{2,3}$', mnc)
assert re.match('^[A-Z/-]*$', country_code)
if mcc not in db:
db[mcc] = {}
db[mcc][mnc] = {
'brand': brand,
'operator': operator,
'country': country,
'countryCode': country_code
}
def scan_table(table, country, country_code, db):
rows = table.find_all('tr')
hdr = rows.pop(0).find_all('th')
assert hdr[0].text == u'MCC'
assert hdr[1].text == u'MNC'
assert hdr[2].text == u'Brand'
assert hdr[3].text == u'Operator'
assert hdr[4].text == u'Status'
for row in rows:
td = row.find_all('td')
mcc = td[0].text
mnc = td[1].text
brand = td[2].text.replace('[citation needed]', '')
operator = td[3].text.replace('[citation needed]', '')
status = re.sub(r'\([^)]*\)', '', td[4].text.replace('[citation needed]', '')).strip()
if mcc and mnc and '?' not in mnc:
if '-' in mnc:
# TODO: mnc range
pass
else:
add_operator(mcc, mnc, brand, operator, status, country, country_code, db)
def contains_headline(tag):
return tag.find(class_='mw-headline') is not None
def main():
db = {}
soup = BeautifulSoup(requests.get('https://en.wikipedia.org/wiki/Mobile_country_code').text, 'xml')
for th in soup.find_all('th', text='MCC'):
table = th.find_parent('table')
tab_title = table.find_previous_sibling(contains_headline).find(class_='mw-headline').findAll(text=True)
tab_title = ''.join(tab_title).split(' - ')
assert (len(tab_title) == 1) or (len(tab_title) == 2)
country = tab_title.pop(0)
country_code = ''.join(tab_title)
scan_table(table, country, country_code, db)
with open('mccmnc.json', 'w') as f:
json.dump(db, f, indent=4, sort_keys=True)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment