Skip to content

Instantly share code, notes, and snippets.

@zestone
Last active June 11, 2017 16:50
Show Gist options
  • Save zestone/8e6c395cb6e24ba488359cf82c4b419a to your computer and use it in GitHub Desktop.
Save zestone/8e6c395cb6e24ba488359cf82c4b419a to your computer and use it in GitHub Desktop.
import sys
import re
import codecs
def analyze_command(cmd, str, narg):
"""
:param cmd: '\command1'
:param str: 'hoge' + cmd + '{arg1}{arg2}...{argN}' + 'foo'
:param narg: N
:return: 'hoge', [arg1, arg2, ... ,argN]', 'foo'
"""
nest_depth = 0
c_narg = 0
arg_head = 0
prefix = ''
args = []
suffix = ''
cmd_head = str.find(cmd)
if cmd_head == -1: # command not found
return None
else:
for i, c in zip(range(cmd_head+len(cmd),len(str)), str[cmd_head+len(cmd):]):
if c == '{':
if nest_depth == 0: # 引数の始点
arg_head = i + 1
if c_narg == 0: # 最初の引数
if i == cmd_head+len(cmd):
prefix = str[:i-len(cmd)]
else:
print('Warning: Invalid syntax.')
return None
elif str[i-1] != '}': # コマンドを強制的に切る
break
nest_depth += 1
if c == '}':
nest_depth -= 1
if nest_depth == 0: # 引数の終わり
args.append(str[arg_head:i])
suffix = str[i+1:]
c_narg += 1
if nest_depth != 0:
print('Warning: Invalid syntax.')
return None
if c_narg > narg:
print('Warning: Too many arguments.')
if c_narg < narg:
print('Warning: Too few arguments.')
return prefix, args, suffix
def textract_newcommand(tx_ref, tx_tgt):
tx_ref = list(map(lambda l:l.rstrip('\n '), tx_ref))
tx_ref = list(map(lambda l:l.lstrip('\t '), tx_ref))
# コマンド辞書の作成
cmd_list = []
for line in tx_ref:
# \newcommand{cmd_name}[arg_num]{process}
if re.fullmatch('\\\\newcommand\{.*\}\[.*\]\{.*\}', line):
_, cmd_name, arg_num, process = re.split('[\{\}\[\]]+', line, 3)
process = process[:-1]
cmd_list.append({'name':cmd_name, 'arg_num':arg_num, 'process':process})
# \newcommand{cmd_name}{process}
elif re.fullmatch('\\\\newcommand\{.*\}\{.*\}', line):
arg_num = 1
_, cmd_name, process = re.split('[\{\}]+', line, 2)
process = process[:-1]
cmd_list.append({'name': cmd_name, 'arg_num': arg_num, 'process': process})
# コマンドの置換
while True:
flg_replace = False
for li, line in enumerate(tx_tgt):
for cmd in cmd_list:
arg_num = int(cmd['arg_num'])
cmd_name = cmd['name']
process = cmd['process']
line_anal = analyze_command(cmd_name, line, arg_num)
if line_anal is not None:
prefix, args, suffix = line_anal
arg_replaced = process
for i in range(arg_num):
arg_replaced = arg_replaced.replace('#'+str(i+1), args[i])
tx_tgt[li] = prefix + arg_replaced + suffix
flg_replace = True
if not flg_replace:
break
return tx_tgt
if __name__ == '__main__':
text_codec = 'utf-8'
argv = sys.argv
argc = len(argv)
if argc < 4:
exit('Few arguments.')
if argc > 5:
exit('Too many arguments.')
if argc == 4:
_, fp_src, fp_ref, fp_enc = argv
if argc == 5:
_, fp_src, fp_ref, fp_enc, text_codec = argv
with codecs.open(fp_ref, 'r', text_codec) as f:
tx_ref = f.readlines()
with codecs.open(fp_src, 'r', text_codec) as f:
tx_src = f.readlines()
tx_dst = textract_newcommand(tx_ref=tx_ref, tx_tgt=tx_src)
with codecs.open(fp_enc, 'w', text_codec) as f:
f.writelines(tx_dst)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment