Skip to content

Instantly share code, notes, and snippets.

@wynemo
Created March 23, 2012 04:32
Show Gist options
  • Save wynemo/2166836 to your computer and use it in GitHub Desktop.
Save wynemo/2166836 to your computer and use it in GitHub Desktop.
check file containing chinese,but not in utf-8 format
#coding:utf-8
suffix_list = ['.cpp','.c','.h','.hpp','.txt','.html','.htm','.xml','.py']
def FoundInvalidFile(input_folder):#str,list
def calc_bom(str1):
rt = ''
for i in range(0,3):
rt += '%02X'%(ord(str1[i]))
return rt
def walk(str1):
import os,os.path
str1 = os.path.abspath(str1)
for file in [file for file in os.listdir(str1) if not file in [".",".."]]:
nfile = os.path.join(str1,file)
if os.path.isdir(nfile) is True:
walk(nfile)
pass
else:
nfile = nfile.replace('\\','/')
file_suffix = None
for each in suffix_list:
if nfile.lower().endswith(each) is True:
file_suffix = each
break
if file_suffix is None:
continue
#file1 = nfile.replace(input_folder + '/','',1)
#print file1
f1 = open(nfile,'rb')
s1 = f1.read()
try:
if len(s1) > 3:
if 'EFBBBF' == calc_bom(s1[0:4]):
s1[3:].decode('utf-8')
continue
s1.decode('utf-8')
except Exception,e:
print nfile,str(e)
walk(input_folder)
if __name__ == "__main__":
FoundInvalidFile('.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment