Skip to content

Instantly share code, notes, and snippets.

@btspoony
Last active May 6, 2019 13:36
Show Gist options
  • Save btspoony/a06f2e188d697efb4a3e8a544b7b4d39 to your computer and use it in GitHub Desktop.
Save btspoony/a06f2e188d697efb4a3e8a544b7b4d39 to your computer and use it in GitHub Desktop.
筛选文件中的中文字符
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Test
"""
import sys
import os
import re
import csv
BASE_PATH = os.path.dirname(os.path.abspath(__file__))
DEFAULT_FILE = "./data.sql"
def haschinese(name):
"""
检查中文情况
"""
str_utf8 = name.decode('utf-8')
for k in str_utf8:
if u'\u4e00' <= k <= u'\u9fff':
return True
return False
def help_args():
"""
Print Help String
"""
print "请输入要转换的文件名"
def arg_check(key):
"""
check arguments
"""
return key in ["-f"]
def arg_value(argv, key):
"""
get arguments value
"""
values = []
idx = argv.index(key)
for i in range(idx + 1, len(argv)):
if arg_check(argv[i]):
break
values.append(argv[i])
return values
def arg_set(argv, args):
"""
set argument value
"""
if "-f" in argv:
args["file"] = arg_value(argv, "-f")
def arg_parse():
"""
parse arguments
"""
argv = sys.argv
if "-h" in argv:
return None
args = {
"file":[DEFAULT_FILE]
}
arg_set(argv, args)
return args
def find_in_line(line, reg, libs):
"""
搜索函数
"""
result = re.findall(reg, line, flags=re.U)
if not result:
return
for word in result:
if re.match(r"^[\[\{].*", word, flags=re.U):
find_in_line(word, r"\\\"([^\\]+)\\\"", libs)
else:
if haschinese(word):
libs.add(word)
def main():
"""
运行函数
"""
reload(sys)
sys.setdefaultencoding('utf-8')
args = arg_parse()
if not args:
return help_args()
for k in args["file"]:
file_path = os.path.normpath(os.path.join(BASE_PATH, k))
libs = set()
file_opened = open(file_path, "r")
for line in file_opened.readlines():
find_in_line(line, r",'([^']+)'", libs)
file_opened.close()
# 所有找到的中文
file_write = open(file_path+'.csv', 'w')
file_write.write("from,to\n")
for k in libs:
file_write.write(k+",\n")
file_write.close()
# 运行
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment