Skip to content

Instantly share code, notes, and snippets.

@acdha
Created June 12, 2015 22:32
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save acdha/49a610089c2798db6fe2 to your computer and use it in GitHub Desktop.
Save acdha/49a610089c2798db6fe2 to your computer and use it in GitHub Desktop.
Ways to get the name of a Unicode block for a character in Python
#!/usr/bin/env PYTHONIOENCODING=utf-8 python
# encoding: utf-8
from __future__ import absolute_import, print_function, unicode_literals
import os
import re
import requests
def get_block_for_codepoint(cp):
"""Return the Unicode block name for the provided numeric codepoint"""
for start, end, block_name in UNICODE_BLOCKS:
if start <= cp <= end:
return block_name
return 'No_Block'
def load_unicode_blocks_from_file(f):
file_contents = f.read().decode('utf-8')
blocks = []
for start, end, block_name in re.findall(r'([0-9A-F]+)\.\.([0-9A-F]+);\ (\S.*\S)', file_contents):
if block_name == 'No_Block':
continue
blocks.append((int(start, 16), int(end, 16), block_name))
return blocks
def load_unicode_blocks(block_filename):
if not os.path.exists(block_filename):
print('Unicode block file %s does not exist. Downloading…' % block_filename)
r = requests.get('http://unicode.org/Public/UNIDATA/Blocks.txt')
r.raise_for_status()
with open(block_filename, 'wb') as f:
for chunk in r.iter_content():
f.write(chunk)
with open(block_filename, 'rb') as f:
blocks = load_unicode_blocks_from_file(f)
return blocks
UNICODE_BLOCKS = load_unicode_blocks('UNIDATA-Blocks.txt')
import icu
# Astoundingly, PyICU has no documented way to get a Unicode block name.
# There are two ways to get the offset into the UCodeBlock enum
# – icu.Char.ublock_getCode and icu.Char.getIntPropertyValue(…, icu.UProperty.BLOCK) –
# so we'll build a lookup table to turn that into a human-readable string:
UNICODE_BLOCKS = {getattr(icu.UBlockCode, i): i for i in icu.UBlockCode.__dict__ if i.isupper()}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment