Skip to content

Instantly share code, notes, and snippets.

@CoDEmanX
Created November 15, 2018 15:57
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save CoDEmanX/f88ab41a3d71c1e69925bac7cb6cb953 to your computer and use it in GitHub Desktop.
Save CoDEmanX/f88ab41a3d71c1e69925bac7cb6cb953 to your computer and use it in GitHub Desktop.
Allowed filename characters (and how Windows Explorer sorts them)
import unicodedata
allowed = []
forbidden = []
folder = 'output/' # needs to be created beforehand
ext = '.txt'
for i in range(0, 256):
letter = chr(i)
try:
with open(folder + letter + ext, 'w'): pass
allowed.append(letter)
except IOError as e:
if e.errno == 22:
forbidden.append(letter)
else:
raise e
except TypeError:
forbidden.append(letter)
ascii_names = {
# Control characters (0-31)
'\x00': 'Null char',
'\x01': 'Start of Heading',
'\x02': 'Start of Text',
'\x03': 'End of Text',
'\x04': 'End of Transmission',
'\x05': 'Enquiry',
'\x06': 'Acknowledgement',
'\x07': 'Bell',
'\x08': 'Back Space',
'\x09': 'Horizontal Tab',
'\x0A': 'Line Feed',
'\x0B': 'Vertical Tab',
'\x0C': 'Form Feed',
'\x0D': 'Carriage Return',
'\x0E': 'Shift Out / X-On',
'\x0F': 'Shift In / X-Off',
'\x10': 'Data Line Escape',
'\x11': 'Device Control 1 (oft. XON)',
'\x12': 'Device Control 2',
'\x13': 'Device Control 3 (oft. XOFF)',
'\x14': 'Device Control 4',
'\x15': 'Negative Acknowledgement',
'\x16': 'Synchronous Idle',
'\x17': 'End of Transmit Block',
'\x18': 'Cancel',
'\x19': 'End of Medium',
'\x1A': 'Substitute',
'\x1B': 'Escape',
'\x1C': 'File Separator',
'\x1D': 'Group Separator',
'\x1E': 'Record Separator',
'\x1F': 'Unit Separator',
# Printable characters (32-127)
'\x7F': 'Delete', # ?
# Extended codes (128-255), Windows-1252 (Latin-1)
'\x80': 'Euro sign',
'\x81': '',
'\x82': 'Single low-9 quotation mark',
'\x83': 'Latin small letter f with hook',
'\x84': 'Double low-9 quotation mark',
'\x85': 'Horizontal ellipsis',
'\x86': 'Dagger',
'\x87': 'Double dagger',
'\x88': 'Modifier letter circumflex accent',
'\x89': 'Per mille sign',
'\x8A': 'Latin capital letter S with caron',
'\x8B': 'Single left-pointing angle quotation',
'\x8C': 'Latin capital ligature OE',
'\x8D': '',
'\x8E': 'Latin capital letter Z with caron',
'\x8F': '',
'\x90': '',
'\x91': 'Left single quotation mark',
'\x92': 'Right single quotation mark',
'\x93': 'Left double quotation mark',
'\x94': 'Right double quotation mark',
'\x95': 'Bullet',
'\x96': 'En dash',
'\x97': 'Em dash',
'\x98': 'Small tilde',
'\x99': 'Trade mark sign',
'\x9A': 'Latin small letter S with caron',
'\x9B': 'Single right-pointing angle quotation mark',
'\x9C': 'Latin small ligature oe',
'\x9D': '',
'\x9E': 'Latin small letter z with caron',
'\x9F': 'Latin capital letter Y with diaeresis',
'\xA0': 'Non-breaking space',
'\xA1': 'Inverted exclamation mark',
'\xA2': 'Cent sign',
'\xA3': 'Pound sign',
'\xA4': 'Currency sign',
'\xA5': 'Yen sign',
'\xA6': 'Pipe, Broken vertical bar',
'\xA7': 'Section sign',
'\xA8': 'Spacing diaeresis - umlaut',
'\xA9': 'Copyright sign',
'\xAA': 'Feminine ordinal indicator',
'\xAB': 'Left double angle quotes',
'\xAC': 'Not sign',
'\xAD': 'Soft hyphen',
'\xAE': 'Registered trade mark sign',
'\xAF': 'Spacing macron - overline',
'\xB0': 'Degree sign',
'\xB1': 'Plus-or-minus sign',
'\xB2': 'Superscript two - squared',
'\xB3': 'Superscript three - cubed',
'\xB4': 'Acute accent - spacing acute',
'\xB5': 'Micro sign',
'\xB6': 'Pilcrow sign - paragraph sign',
'\xB7': 'Middle dot - Georgian comma',
'\xB8': 'Spacing cedilla',
'\xB9': 'Superscript one',
'\xBA': 'Masculine ordinal indicator',
'\xBB': 'Right double angle quotes',
'\xBC': 'Fraction one quarter',
'\xBD': 'Fraction one half',
'\xBE': 'Fraction three quarters',
'\xBF': 'Inverted question mark',
'\xC0': 'Latin capital letter A with grave',
'\xC1': 'Latin capital letter A with acute',
'\xC2': 'Latin capital letter A with circumflex',
'\xC3': 'Latin capital letter A with tilde',
'\xC4': 'Latin capital letter A with diaeresis',
'\xC5': 'Latin capital letter A with ring above',
'\xC6': 'Latin capital letter AE',
'\xC7': 'Latin capital letter C with cedilla',
'\xC8': 'Latin capital letter E with grave',
'\xC9': 'Latin capital letter E with acute',
'\xCA': 'Latin capital letter E with circumflex',
'\xCB': 'Latin capital letter E with diaeresis',
'\xCC': 'Latin capital letter I with grave',
'\xCD': 'Latin capital letter I with acute',
'\xCE': 'Latin capital letter I with circumflex',
'\xCF': 'Latin capital letter I with diaeresis',
'\xD0': 'Latin capital letter ETH',
'\xD1': 'Latin capital letter N with tilde',
'\xD2': 'Latin capital letter O with grave',
'\xD3': 'Latin capital letter O with acute',
'\xD4': 'Latin capital letter O with circumflex',
'\xD5': 'Latin capital letter O with tilde',
'\xD6': 'Latin capital letter O with diaeresis',
'\xD7': 'Multiplication sign',
'\xD8': 'Latin capital letter O with slash',
'\xD9': 'Latin capital letter U with grave',
'\xDA': 'Latin capital letter U with acute',
'\xDB': 'Latin capital letter U with circumflex',
'\xDC': 'Latin capital letter U with diaeresis',
'\xDD': 'Latin capital letter Y with acute',
'\xDE': 'Latin capital letter THORN',
'\xDF': 'Latin small letter sharp s',
'\xE0': 'Latin small letter a with grave',
'\xE1': 'Latin small letter a with acute',
'\xE2': 'Latin small letter a with circumflex',
'\xE3': 'Latin small letter a with tilde',
'\xE4': 'Latin small letter a with diaeresis',
'\xE5': 'Latin small letter a with ring above',
'\xE6': 'Latin small letter ae',
'\xE7': 'Latin small letter c with cedilla',
'\xE8': 'Latin small letter e with grave',
'\xE9': 'Latin small letter e with acute',
'\xEA': 'Latin small letter e with circumflex',
'\xEB': 'Latin small letter e with diaeresis',
'\xEC': 'Latin small letter i with grave',
'\xED': 'Latin small letter i with acute',
'\xEE': 'Latin small letter i with circumflex',
'\xEF': 'Latin small letter i with diaeresis',
'\xF0': 'Latin small letter eth',
'\xF1': 'Latin small letter n with tilde',
'\xF2': 'Latin small letter o with grave',
'\xF3': 'Latin small letter o with acute',
'\xF4': 'Latin small letter o with circumflex',
'\xF5': 'Latin small letter o with tilde',
'\xF6': 'Latin small letter o with diaeresis',
'\xF7': 'Division sign',
'\xF8': 'Latin small letter o with slash',
'\xF9': 'Latin small letter u with grave',
'\xFA': 'Latin small letter u with acute',
'\xFB': 'Latin small letter u with circumflex',
'\xFC': 'Latin small letter u with diaeresis',
'\xFD': 'Latin small letter y with acute',
'\xFE': 'Latin small letter thorn',
'\xFF': 'Latin small letter y with diaeresis',
}
def letter_name(char):
try:
return unicodedata.name(unicode(char))
except ValueError:
return ascii_names.get(char, '').upper()
def letter_repr(char):
return repr(char)[1:-1]
def char_info(char_array):
for l in char_array:
yield '{:<4} {:>3} {:02X} {}\n'.format(letter_repr(l), ord(l), ord(l), letter_name(l))
with open('output.txt', 'w') as file:
file.write('**Allowed:**\n\n')
for line in char_info(allowed):
file.write(line)
file.write('\n**Forbidden:**\n\n')
for line in char_info(forbidden):
file.write(line)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment