Skip to content

Instantly share code, notes, and snippets.

@yui0
Last active May 11, 2022 04:35
Show Gist options
  • Save yui0/87ad993c98af6c85ca5783c115252b0d to your computer and use it in GitHub Desktop.
Save yui0/87ad993c98af6c85ca5783c115252b0d to your computer and use it in GitHub Desktop.
import argparse
parser = argparse.ArgumentParser(description='指定された言語の文を抽出します。')
parser.add_argument('name', help='file name')
parser.add_argument('-l', '--lang', help='select language [ja/en/pt]', default='en')
args = parser.parse_args()
import re
#p = r'\~[^~]*\~' # ~で囲まれている~以外の文字
#p = r'\~.+\~' # ~で囲まれている
import pycld2 as cld2
f = open(args.name, 'r')
line = f.readline()
while line:
#isReliable, textBytesFound, detail = cld2.detect(line)
isReliable, textBytesFound, detail = cld2.detect(line.lower().replace('#',''))
#print(line.lower().replace('#',''))
#print(detail[0])
#if args.lang == detail[0][1]: print(line)
#r = re.findall(p, line)
#if detail[0][1] == 'un': isReliable, textBytesFound, detail = cld2.detect(line.lower())
if args.lang == detail[0][1]:
s = re.sub(r'\*\*\~\~.*?\~\~\*\*', '', line) # 最小一致 [**~~ ... ~~**]
s = re.sub(r'\~\~.*?\~\~', '', s) # 最小一致 [~~ ... ~~]
if s.strip() != '':
if s.strip() != '*': print(s)
line = f.readline()
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment