Skip to content

Instantly share code, notes, and snippets.

@minacle
Created December 30, 2018 11:13
Show Gist options
  • Save minacle/951c125483983070f90e82e850b20d19 to your computer and use it in GitHub Desktop.
Save minacle/951c125483983070f90e82e850b20d19 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
#
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# For more information, please refer to <http://unlicense.org>
from __future__ import with_statement, print_function, unicode_literals
import sys
try:
from io import StringIO
except ImportError:
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
if "xrange" in dir(__builtins__):
range = xrange
if "unichr" in dir(__builtins__):
chr = unichr
x64 = sys.maxsize > 2 ** 32
if len(sys.argv) <= 1:
sys.exit(1)
s = set()
try:
f = open(sys.argv[1], encoding="utf-8")
except TypeError:
from io import open
f = open(sys.argv[1], encoding="utf-8")
n = 8192 * (2 if x64 else 1)
while True:
b = f.read(n)
if b:
for c in b:
s.add(c)
else:
break
f.close()
s.remove("\n")
s.remove("\t")
s.remove(chr(65279))
a = [ord(c) for c in sorted(s)]
r = []
l = 0
s = 0
n = False
for v in a:
n = False
if s and v == l + 1:
n = True
if not n:
if s:
if l > s:
r.append(range(s, l))
else:
r.append(s)
s = v
else:
s = v
l = v
if s:
if l > s:
r.append(range(s, l))
else:
r.append(s)
o = StringIO()
m = 100
for i in r:
if isinstance(i, range):
t = "%d-%d" % (i[0], i[-1] + 1)
else:
t = "%d" % i
l = len(t) + 1
m -= l
if m < 0:
print("", file=o)
m = 100 - l
print(t, end=",", file=o)
r = o.getvalue()[:-1].split("\n")
print("# selected chars")
for l in r:
print("chars=%s" % l)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment