Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
IANA Language Tag Parser
"""
Parses the language tags provided by IANA_
For a definition of the tag types, see
http://www.w3.org/International/questions/qa-choosing-language-tags
IANA_: http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
"""
from collections import OrderedDict
from pprint import pprint
from urllib.request import urlopen
URL = "http://www.iana.org/assignments/language-subtag-registry/language" \
"-subtag-registry"
def string_lines(iterator):
prev_line = None
for line in iterator:
line = line.decode(encoding='UTF-8').replace('\n', '')
if line.startswith(' '):
prev_line += line[1:]
continue
if prev_line:
yield prev_line
prev_line = line
if prev_line:
yield prev_line
def locale_chunks(iterator):
data = []
for line in iterator:
if line == '%%':
yield data
data = []
else:
try:
data.append(line.split(': ')[0:2])
except ValueError:
pass
def locale_dict(iterator):
for data in iterator:
yield {key: value for key, value in data}
def language_filter(iterator):
for locale in iterator:
type = locale.get('Type')
if type == 'language':
yield locale['Subtag'], locale['Description']
elif type == 'extlang':
yield locale['Subtag'], locale['Description']
elif type == 'redundant':
yield locale['Tag'], locale['Description']
language_iterator = language_filter(locale_dict(locale_chunks(string_lines(
urlopen(URL)))))
LANGUAGES = OrderedDict(sorted(language_iterator), key=lambda item: item[0])
pprint(LANGUAGES)
@Bouke

This comment has been minimized.

Copy link
Owner Author

@Bouke Bouke commented Nov 11, 2013

Parses the language tags provided by IANA. For a definition of the tag types, see W3's explanation.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.