Created
July 15, 2012 14:41
-
-
Save ycopin/3117239 to your computer and use it in GitHub Desktop.
Search through unicode character descriptions.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# Time-stamp: <2012-07-15 16:39 ycopin@lyopc469> | |
# Copyright: This document has been placed in the public domain. | |
""" | |
Search through unicode character descriptions, e.g.:: | |
$ searchUnicode.py letter lamda | |
Λ GREEK CAPITAL LETTER LAMDA Λ 0x39b | |
λ GREEK SMALL LETTER LAMDA λ 0x3bb | |
ᴧ GREEK LETTER SMALL CAPITAL LAMDA ᴧ 0x1d27 | |
$ searchUnicode.py "letter lamda" | |
Λ GREEK CAPITAL LETTER LAMDA Λ 0x39b | |
λ GREEK SMALL LETTER LAMDA λ 0x3bb | |
""" | |
__author__ = "Yannick Copin <yannick.copin@laposte.net>" | |
import sys | |
import re | |
import unicodedata | |
maxpage = 831 # It becomes really exotic after... | |
regexp = re.compile('.*'.join(sys.argv[1:])) | |
for page in xrange(0, maxpage*16+1, 16): | |
for cell in xrange(0, 16): | |
x = page + cell | |
try: # Valid Unicode char | |
name = unicodedata.name(unichr(x)) | |
except ValueError: # Invalid Unicode char | |
continue | |
search = regexp.search(name.lower()) | |
if search is not None: | |
print unichr(x), name, "&#%d" % x, "0x%0x" % x |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment