Skip to content

Instantly share code, notes, and snippets.

@bound2
Created August 1, 2017 08:01
Show Gist options
  • Save bound2/813ee50072188c347c21c9e783ce839a to your computer and use it in GitHub Desktop.
Save bound2/813ee50072188c347c21c9e783ce839a to your computer and use it in GitHub Desktop.
extract mobile country codes from webpage
from bs4 import BeautifulSoup
import urllib2
from collections import defaultdict
if __name__ == '__main__':
page = urllib2.urlopen("http://mcc-mnc.com")
html = page.read()
soup = BeautifulSoup(html, "html.parser")
table_body = soup.find("tbody")
rows = table_body.findAll("tr")
data = defaultdict(frozenset)
for row in rows:
columns = row.findAll("td")
mcc = columns[0].string.strip()
country_iso_code = columns[2].string.strip()
items = set(data[country_iso_code])
items.add(mcc)
data[country_iso_code] = frozenset(items)
for key, values in data.iteritems():
s = ""
for value in values:
s += value + ", "
s = s[:-2]
print('.put("%s", ImmutableList.of(%s))' % (key.upper(), s))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment