wynemo/check_utf8.py

## check_utf8.py
#coding:utf-8

suffix_list = ['.cpp','.c','.h','.hpp','.txt','.html','.htm','.xml','.py']

def FoundInvalidFile(input_folder):#str,list
    def calc_bom(str1):
        rt = ''
        for i in range(0,3):
            rt += '%02X'%(ord(str1[i]))
        return rt
    def walk(str1):
        import os,os.path
        str1 = os.path.abspath(str1)
        for file in [file for file in os.listdir(str1) if not file in [".",".."]]:
            nfile = os.path.join(str1,file)
            if os.path.isdir(nfile) is True:
                walk(nfile)
                pass
            else:
                nfile = nfile.replace('\\','/')
                file_suffix = None
                for each in suffix_list:
                    if nfile.lower().endswith(each) is True:
                        file_suffix = each
                        break
                if file_suffix is None:
                    continue
                #file1 = nfile.replace(input_folder + '/','',1)
                #print file1
                f1 = open(nfile,'rb')
                s1 = f1.read()
                try:
                    if len(s1) > 3:
                        if 'EFBBBF' == calc_bom(s1[0:4]):
                            s1[3:].decode('utf-8')
                            continue
                    s1.decode('utf-8')
                except Exception,e:
                    print nfile,str(e)
    walk(input_folder)

if __name__ == "__main__":
    FoundInvalidFile('.')
	#coding:utf-8

	suffix_list = ['.cpp','.c','.h','.hpp','.txt','.html','.htm','.xml','.py']

	def FoundInvalidFile(input_folder):#str,list
	def calc_bom(str1):
	rt = ''
	for i in range(0,3):
	rt += '%02X'%(ord(str1[i]))
	return rt
	def walk(str1):
	import os,os.path
	str1 = os.path.abspath(str1)
	for file in [file for file in os.listdir(str1) if not file in [".",".."]]:
	nfile = os.path.join(str1,file)
	if os.path.isdir(nfile) is True:
	walk(nfile)
	pass
	else:
	nfile = nfile.replace('\\','/')
	file_suffix = None
	for each in suffix_list:
	if nfile.lower().endswith(each) is True:
	file_suffix = each
	break
	if file_suffix is None:
	continue
	#file1 = nfile.replace(input_folder + '/','',1)
	#print file1
	f1 = open(nfile,'rb')
	s1 = f1.read()
	try:
	if len(s1) > 3:
	if 'EFBBBF' == calc_bom(s1[0:4]):
	s1[3:].decode('utf-8')
	continue
	s1.decode('utf-8')
	except Exception,e:
	print nfile,str(e)
	walk(input_folder)

	if __name__ == "__main__":
	FoundInvalidFile('.')