Skip to content

Instantly share code, notes, and snippets.

@ficapy
Created September 10, 2015 03:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ficapy/775f662c953c8ace27c8 to your computer and use it in GitHub Desktop.
Save ficapy/775f662c953c8ace27c8 to your computer and use it in GitHub Desktop.
编码检测
total = 2292627
import sys
import codecs
from encodings.aliases import aliases
import mmap
all_encoding = aliases.values()
all_encoding = list(set(all_encoding))
def character(contain_char, encoding='GB2312'):
try:
with codecs.open('test.txt', 'r+', encoding) as f:
index = mmap.mmap(f.fileno(), 0).rfind(bytes(contain_char, encoding))
if index == -1:
return 6000
error = 0
while 1:
try:
for line in f:
pass
else:
break
except UnicodeDecodeError:
error += 1
if error >= 5000:
return error
return error
except:
return 7000
ret = []
for i in all_encoding:
ret.append((i, character('刘健', i)))
import time
time.sleep(0.1)
progress = len(ret) / len(all_encoding) * 100
sys.stdout.write('\r{}{:.2f}%'.format('=' * int(progress), progress))
sys.stdout.flush()
sys.stdout.write('\n')
for i in sorted(ret, key=lambda item: item[1]):
print(i)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment