zestone/texpand.py

## texpand.py
import sys
import re
import codecs

def analyze_command(cmd, str, narg):
    """

    :param cmd: '\command1'
    :param str: 'hoge' + cmd + '{arg1}{arg2}...{argN}' + 'foo'
    :param narg: N
    :return: 'hoge', [arg1, arg2, ... ,argN]', 'foo'
    """

    nest_depth = 0
    c_narg = 0
    arg_head = 0
    prefix = ''
    args = []
    suffix = ''
    cmd_head = str.find(cmd)

    if cmd_head == -1: # command not found
        return None

    else:
        for i, c in zip(range(cmd_head+len(cmd),len(str)), str[cmd_head+len(cmd):]):
            if c == '{':
                if nest_depth == 0: # 引数の始点
                    arg_head = i + 1

                    if c_narg == 0: # 最初の引数
                        if i == cmd_head+len(cmd):
                            prefix = str[:i-len(cmd)]
                        else:
                            print('Warning: Invalid syntax.')
                            return None

                    elif str[i-1] != '}': # コマンドを強制的に切る
                        break

                nest_depth += 1

            if c == '}':
                nest_depth -= 1
                if nest_depth == 0: # 引数の終わり
                    args.append(str[arg_head:i])
                    suffix = str[i+1:]
                    c_narg += 1

        if nest_depth != 0:
            print('Warning: Invalid syntax.')
            return None

        if c_narg > narg:
            print('Warning: Too many arguments.')

        if c_narg < narg:
            print('Warning: Too few arguments.')

    return prefix, args, suffix


def textract_newcommand(tx_ref, tx_tgt):
    tx_ref = list(map(lambda l:l.rstrip('\n '), tx_ref))
    tx_ref = list(map(lambda l:l.lstrip('\t '), tx_ref))

    # コマンド辞書の作成
    cmd_list = []
    for line in tx_ref:
        # \newcommand{cmd_name}[arg_num]{process}
        if re.fullmatch('\\\\newcommand\{.*\}\[.*\]\{.*\}', line):
            _, cmd_name, arg_num, process = re.split('[\{\}\[\]]+', line, 3)
            process = process[:-1]
            cmd_list.append({'name':cmd_name, 'arg_num':arg_num, 'process':process})

        # \newcommand{cmd_name}{process}
        elif re.fullmatch('\\\\newcommand\{.*\}\{.*\}', line):
            arg_num = 1
            _, cmd_name, process = re.split('[\{\}]+', line, 2)
            process = process[:-1]
            cmd_list.append({'name': cmd_name, 'arg_num': arg_num, 'process': process})

    # コマンドの置換

    while True:
        flg_replace = False
        for li, line in enumerate(tx_tgt):
            for cmd in cmd_list:
                arg_num = int(cmd['arg_num'])
                cmd_name = cmd['name']
                process = cmd['process']
                line_anal = analyze_command(cmd_name, line, arg_num)
                if line_anal is not None:
                    prefix, args, suffix = line_anal
                    arg_replaced = process
                    for i in range(arg_num):
                        arg_replaced = arg_replaced.replace('#'+str(i+1), args[i])
                    tx_tgt[li] = prefix + arg_replaced + suffix
                    flg_replace = True
        if not flg_replace:
            break

    return tx_tgt


if __name__ == '__main__':
    text_codec = 'utf-8'

    argv = sys.argv
    argc = len(argv)
    if argc < 4:
        exit('Few arguments.')
    if argc > 5:
        exit('Too many arguments.')
    if argc == 4:
        _, fp_src, fp_ref, fp_enc = argv
    if argc == 5:
        _, fp_src, fp_ref, fp_enc, text_codec = argv

    with codecs.open(fp_ref, 'r', text_codec) as f:
        tx_ref = f.readlines()

    with codecs.open(fp_src, 'r', text_codec) as f:
        tx_src = f.readlines()

    tx_dst = textract_newcommand(tx_ref=tx_ref, tx_tgt=tx_src)

    with codecs.open(fp_enc, 'w', text_codec) as f:
        f.writelines(tx_dst)
	import sys
	import re
	import codecs

	def analyze_command(cmd, str, narg):
	"""

	:param cmd: '\command1'
	:param str: 'hoge' + cmd + '{arg1}{arg2}...{argN}' + 'foo'
	:param narg: N
	:return: 'hoge', [arg1, arg2, ... ,argN]', 'foo'
	"""

	nest_depth = 0
	c_narg = 0
	arg_head = 0
	prefix = ''
	args = []
	suffix = ''
	cmd_head = str.find(cmd)

	if cmd_head == -1: # command not found
	return None

	else:
	for i, c in zip(range(cmd_head+len(cmd),len(str)), str[cmd_head+len(cmd):]):
	if c == '{':
	if nest_depth == 0: # 引数の始点
	arg_head = i + 1

	if c_narg == 0: # 最初の引数
	if i == cmd_head+len(cmd):
	prefix = str[:i-len(cmd)]
	else:
	print('Warning: Invalid syntax.')
	return None

	elif str[i-1] != '}': # コマンドを強制的に切る
	break

	nest_depth += 1

	if c == '}':
	nest_depth -= 1
	if nest_depth == 0: # 引数の終わり
	args.append(str[arg_head:i])
	suffix = str[i+1:]
	c_narg += 1

	if nest_depth != 0:
	print('Warning: Invalid syntax.')
	return None

	if c_narg > narg:
	print('Warning: Too many arguments.')

	if c_narg < narg:
	print('Warning: Too few arguments.')

	return prefix, args, suffix


	def textract_newcommand(tx_ref, tx_tgt):
	tx_ref = list(map(lambda l:l.rstrip('\n '), tx_ref))
	tx_ref = list(map(lambda l:l.lstrip('\t '), tx_ref))

	# コマンド辞書の作成
	cmd_list = []
	for line in tx_ref:
	# \newcommand{cmd_name}[arg_num]{process}
	if re.fullmatch('\\\\newcommand\{.\}\[.\]\{.*\}', line):
	_, cmd_name, arg_num, process = re.split('[\{\}\[\]]+', line, 3)
	process = process[:-1]
	cmd_list.append({'name':cmd_name, 'arg_num':arg_num, 'process':process})

	# \newcommand{cmd_name}{process}
	elif re.fullmatch('\\\\newcommand\{.\}\{.\}', line):
	arg_num = 1
	_, cmd_name, process = re.split('[\{\}]+', line, 2)
	process = process[:-1]
	cmd_list.append({'name': cmd_name, 'arg_num': arg_num, 'process': process})

	# コマンドの置換

	while True:
	flg_replace = False
	for li, line in enumerate(tx_tgt):
	for cmd in cmd_list:
	arg_num = int(cmd['arg_num'])
	cmd_name = cmd['name']
	process = cmd['process']
	line_anal = analyze_command(cmd_name, line, arg_num)
	if line_anal is not None:
	prefix, args, suffix = line_anal
	arg_replaced = process
	for i in range(arg_num):
	arg_replaced = arg_replaced.replace('#'+str(i+1), args[i])
	tx_tgt[li] = prefix + arg_replaced + suffix
	flg_replace = True
	if not flg_replace:
	break

	return tx_tgt



	if __name__ == '__main__':
	text_codec = 'utf-8'

	argv = sys.argv
	argc = len(argv)
	if argc < 4:
	exit('Few arguments.')
	if argc > 5:
	exit('Too many arguments.')
	if argc == 4:
	_, fp_src, fp_ref, fp_enc = argv
	if argc == 5:
	_, fp_src, fp_ref, fp_enc, text_codec = argv

	with codecs.open(fp_ref, 'r', text_codec) as f:
	tx_ref = f.readlines()

	with codecs.open(fp_src, 'r', text_codec) as f:
	tx_src = f.readlines()

	tx_dst = textract_newcommand(tx_ref=tx_ref, tx_tgt=tx_src)

	with codecs.open(fp_enc, 'w', text_codec) as f:
	f.writelines(tx_dst)