dnng/unicode_code_point_fix.py

## unicode_code_point_fix.py
from re import compile, findall, split

with open('original_file', 'r') as of:
    with open('fixed_file', 'w') as ff:
        for line in of:
            # looks for substrings that are either 'u00fo' or '\u00fo'
            pattern = compile(r'\\?u00..')
            code_points = findall(pattern, line)
            pieces = split(pattern, line)
            res = ''
            for idx, cp in enumerate(code_points):
                res += pieces[idx] + chr(int(cp[1::] if cp[0] == 'u' else cp[2::], 16))
            else:
                res += pieces[-1]
                ff.write(res)
	from re import compile, findall, split

	with open('original_file', 'r') as of:
	with open('fixed_file', 'w') as ff:
	for line in of:
	# looks for substrings that are either 'u00fo' or '\u00fo'
	pattern = compile(r'\\?u00..')
	code_points = findall(pattern, line)
	pieces = split(pattern, line)
	res = ''
	for idx, cp in enumerate(code_points):
	res += pieces[idx] + chr(int(cp[1::] if cp[0] == 'u' else cp[2::], 16))
	else:
	res += pieces[-1]
	ff.write(res)