Skip to content

Instantly share code, notes, and snippets.

@chickenmatt5
Last active April 18, 2020 16:33
Show Gist options
  • Save chickenmatt5/be6ab2dc48a579c57ba22f9def877bcb to your computer and use it in GitHub Desktop.
Save chickenmatt5/be6ab2dc48a579c57ba22f9def877bcb to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
from os import walk, path
from lxml import etree
from csv import writer
from html import unescape
## Get all files
eng_path = 'path\\to\\acnh1.1msgen'
jpn_path = 'path\\to\\acnh1.1msgjp'
eng_files = []
jpn_files = []
for r, _, f in walk(eng_path):
for file in f:
eng_files.append(path.join(r, file))
for r, _, f in walk(jpn_path):
for file in f:
jpn_files.append(path.join(r, file))
## Set up CSV writing
output_file = open('output.csv', 'w+', newline='', encoding='utf-8')
writer = writer(output_file)
writer.writerow(["File", "name", "Version", "English", "Japanese"])
parser = etree.XMLParser(recover=True)
for file_num in range(len(eng_files)):
if file_num < len(jpn_files):
eng_offset = 0
eng_filename = eng_files[file_num + eng_offset].split('\\')[-1:][0]
jpn_filename = jpn_files[file_num].split('\\')[-1:][0]
if eng_filename != jpn_filename: # In case files don't match up, try searching around the file_num index for a match
recovered = False
for new_offset in range(-50,50):
if file_num + eng_offset + new_offset < len(eng_files):
if eng_files[file_num + eng_offset + new_offset].split('\\')[-1:][0] == jpn_files[file_num].split('\\')[-1:][0]:
eng_offset += new_offset
recovered = True
eng_filename = eng_files[file_num + eng_offset].split('\\')[-1:][0]
jpn_filename = jpn_files[file_num].split('\\')[-1:][0]
if not recovered:continue
## Read each file and parse it from XML formatting
eng_f = open(eng_files[file_num + eng_offset], "r", encoding='utf-8')
eng_lines = eng_f.readlines()
eng_f.close()
eng_root = etree.fromstringlist(eng_lines[1:], parser=parser)
jpn_f = open(jpn_files[file_num], "r", encoding='utf-8')
jpn_lines = jpn_f.readlines()
jpn_f.close()
jpn_root = etree.fromstringlist(jpn_lines[1:], parser=parser)
## Extract desired data from each entry
for entry in range(len(eng_root[2])):
name = eng_root[2][entry].attrib['name']
if eng_root[2][entry][0].text != None:
eng_text_original = unescape(eng_root[2][entry][0].text)
else: eng_text_original = ''
if eng_root[2][entry][1].text != None:
eng_text_edited = unescape(eng_root[2][entry][1].text)
else: eng_text_edited = ''
if jpn_root[2][entry][0].text != None:
jpn_text_original = unescape(jpn_root[2][entry][0].text)
else: jpn_text_original = ''
if jpn_root[2][entry][1].text != None:
jpn_text_edited = unescape(jpn_root[2][entry][1].text)
else: jpn_text_edited = ''
## Write data to CSV file
writer.writerow([eng_filename, name, "Original", eng_text_original, jpn_text_original])
writer.writerow([eng_filename, name, "Edited", eng_text_edited, jpn_text_edited])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment