Last active
April 18, 2020 16:33
-
-
Save chickenmatt5/be6ab2dc48a579c57ba22f9def877bcb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from os import walk, path | |
from lxml import etree | |
from csv import writer | |
from html import unescape | |
## Get all files | |
eng_path = 'path\\to\\acnh1.1msgen' | |
jpn_path = 'path\\to\\acnh1.1msgjp' | |
eng_files = [] | |
jpn_files = [] | |
for r, _, f in walk(eng_path): | |
for file in f: | |
eng_files.append(path.join(r, file)) | |
for r, _, f in walk(jpn_path): | |
for file in f: | |
jpn_files.append(path.join(r, file)) | |
## Set up CSV writing | |
output_file = open('output.csv', 'w+', newline='', encoding='utf-8') | |
writer = writer(output_file) | |
writer.writerow(["File", "name", "Version", "English", "Japanese"]) | |
parser = etree.XMLParser(recover=True) | |
for file_num in range(len(eng_files)): | |
if file_num < len(jpn_files): | |
eng_offset = 0 | |
eng_filename = eng_files[file_num + eng_offset].split('\\')[-1:][0] | |
jpn_filename = jpn_files[file_num].split('\\')[-1:][0] | |
if eng_filename != jpn_filename: # In case files don't match up, try searching around the file_num index for a match | |
recovered = False | |
for new_offset in range(-50,50): | |
if file_num + eng_offset + new_offset < len(eng_files): | |
if eng_files[file_num + eng_offset + new_offset].split('\\')[-1:][0] == jpn_files[file_num].split('\\')[-1:][0]: | |
eng_offset += new_offset | |
recovered = True | |
eng_filename = eng_files[file_num + eng_offset].split('\\')[-1:][0] | |
jpn_filename = jpn_files[file_num].split('\\')[-1:][0] | |
if not recovered:continue | |
## Read each file and parse it from XML formatting | |
eng_f = open(eng_files[file_num + eng_offset], "r", encoding='utf-8') | |
eng_lines = eng_f.readlines() | |
eng_f.close() | |
eng_root = etree.fromstringlist(eng_lines[1:], parser=parser) | |
jpn_f = open(jpn_files[file_num], "r", encoding='utf-8') | |
jpn_lines = jpn_f.readlines() | |
jpn_f.close() | |
jpn_root = etree.fromstringlist(jpn_lines[1:], parser=parser) | |
## Extract desired data from each entry | |
for entry in range(len(eng_root[2])): | |
name = eng_root[2][entry].attrib['name'] | |
if eng_root[2][entry][0].text != None: | |
eng_text_original = unescape(eng_root[2][entry][0].text) | |
else: eng_text_original = '' | |
if eng_root[2][entry][1].text != None: | |
eng_text_edited = unescape(eng_root[2][entry][1].text) | |
else: eng_text_edited = '' | |
if jpn_root[2][entry][0].text != None: | |
jpn_text_original = unescape(jpn_root[2][entry][0].text) | |
else: jpn_text_original = '' | |
if jpn_root[2][entry][1].text != None: | |
jpn_text_edited = unescape(jpn_root[2][entry][1].text) | |
else: jpn_text_edited = '' | |
## Write data to CSV file | |
writer.writerow([eng_filename, name, "Original", eng_text_original, jpn_text_original]) | |
writer.writerow([eng_filename, name, "Edited", eng_text_edited, jpn_text_edited]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment