Last active
June 11, 2017 16:50
-
-
Save zestone/8e6c395cb6e24ba488359cf82c4b419a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import re | |
import codecs | |
def analyze_command(cmd, str, narg): | |
""" | |
:param cmd: '\command1' | |
:param str: 'hoge' + cmd + '{arg1}{arg2}...{argN}' + 'foo' | |
:param narg: N | |
:return: 'hoge', [arg1, arg2, ... ,argN]', 'foo' | |
""" | |
nest_depth = 0 | |
c_narg = 0 | |
arg_head = 0 | |
prefix = '' | |
args = [] | |
suffix = '' | |
cmd_head = str.find(cmd) | |
if cmd_head == -1: # command not found | |
return None | |
else: | |
for i, c in zip(range(cmd_head+len(cmd),len(str)), str[cmd_head+len(cmd):]): | |
if c == '{': | |
if nest_depth == 0: # 引数の始点 | |
arg_head = i + 1 | |
if c_narg == 0: # 最初の引数 | |
if i == cmd_head+len(cmd): | |
prefix = str[:i-len(cmd)] | |
else: | |
print('Warning: Invalid syntax.') | |
return None | |
elif str[i-1] != '}': # コマンドを強制的に切る | |
break | |
nest_depth += 1 | |
if c == '}': | |
nest_depth -= 1 | |
if nest_depth == 0: # 引数の終わり | |
args.append(str[arg_head:i]) | |
suffix = str[i+1:] | |
c_narg += 1 | |
if nest_depth != 0: | |
print('Warning: Invalid syntax.') | |
return None | |
if c_narg > narg: | |
print('Warning: Too many arguments.') | |
if c_narg < narg: | |
print('Warning: Too few arguments.') | |
return prefix, args, suffix | |
def textract_newcommand(tx_ref, tx_tgt): | |
tx_ref = list(map(lambda l:l.rstrip('\n '), tx_ref)) | |
tx_ref = list(map(lambda l:l.lstrip('\t '), tx_ref)) | |
# コマンド辞書の作成 | |
cmd_list = [] | |
for line in tx_ref: | |
# \newcommand{cmd_name}[arg_num]{process} | |
if re.fullmatch('\\\\newcommand\{.*\}\[.*\]\{.*\}', line): | |
_, cmd_name, arg_num, process = re.split('[\{\}\[\]]+', line, 3) | |
process = process[:-1] | |
cmd_list.append({'name':cmd_name, 'arg_num':arg_num, 'process':process}) | |
# \newcommand{cmd_name}{process} | |
elif re.fullmatch('\\\\newcommand\{.*\}\{.*\}', line): | |
arg_num = 1 | |
_, cmd_name, process = re.split('[\{\}]+', line, 2) | |
process = process[:-1] | |
cmd_list.append({'name': cmd_name, 'arg_num': arg_num, 'process': process}) | |
# コマンドの置換 | |
while True: | |
flg_replace = False | |
for li, line in enumerate(tx_tgt): | |
for cmd in cmd_list: | |
arg_num = int(cmd['arg_num']) | |
cmd_name = cmd['name'] | |
process = cmd['process'] | |
line_anal = analyze_command(cmd_name, line, arg_num) | |
if line_anal is not None: | |
prefix, args, suffix = line_anal | |
arg_replaced = process | |
for i in range(arg_num): | |
arg_replaced = arg_replaced.replace('#'+str(i+1), args[i]) | |
tx_tgt[li] = prefix + arg_replaced + suffix | |
flg_replace = True | |
if not flg_replace: | |
break | |
return tx_tgt | |
if __name__ == '__main__': | |
text_codec = 'utf-8' | |
argv = sys.argv | |
argc = len(argv) | |
if argc < 4: | |
exit('Few arguments.') | |
if argc > 5: | |
exit('Too many arguments.') | |
if argc == 4: | |
_, fp_src, fp_ref, fp_enc = argv | |
if argc == 5: | |
_, fp_src, fp_ref, fp_enc, text_codec = argv | |
with codecs.open(fp_ref, 'r', text_codec) as f: | |
tx_ref = f.readlines() | |
with codecs.open(fp_src, 'r', text_codec) as f: | |
tx_src = f.readlines() | |
tx_dst = textract_newcommand(tx_ref=tx_ref, tx_tgt=tx_src) | |
with codecs.open(fp_enc, 'w', text_codec) as f: | |
f.writelines(tx_dst) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment