Skip to content

Instantly share code, notes, and snippets.

@AlexPernot
Last active June 25, 2022 21:31
Show Gist options
  • Save AlexPernot/245b20ec36bef895d9fac48ad36767f3 to your computer and use it in GitHub Desktop.
Save AlexPernot/245b20ec36bef895d9fac48ad36767f3 to your computer and use it in GitHub Desktop.
A friend of mine wanted to extract all the dialogue from the original Monkey Island into CSVs by character and language. Here's a JS script to do it, it was a fun experiment retro-engineering a 90s game localization file and working with NodeJS Buffers. Usage : put the "speech.info" file next to this script and simply run it with `node index.js`…
/**
* Reads the Monkey Island speech.info file and extract the data into several CSVs.
*/
const fs = require("fs");
// All the sound file IDs following the file order. E.g.: "GUY_32_alley_1_1". The first three letters are the actor ID.
const fileIds = [];
// All the lines following the file order
const lines = {
"en": [],
"fr": [],
"it": [],
"de": [],
"es": []
};
const locales = Object.keys(lines);
// Change it to ";" if you have a french Windows install and you want Excel support. Thanks Excel.
const csvSeparator = ",";
const csvData = {};
// Buffer scanning constants, you probably don't want to change those.
const lineStartOffset = 16; // Non-printable bytes, maybe a header of some sort, but we don't use it.
const lineLength = 256;
const speakerLength = 32;
const encoding = "latin1";
const cleanString = str => str.replace(/\x00/g, "").replace(/\x01/g, " ").trimEnd();
const escapeCsvString = str => {
const escapedString = str.replace(/"/g, '""');
return str.indexOf(csvSeparator) > -1 || str.indexOf("\n") > -1 ? `"${escapedString}"` : escapedString;
};
/**
* Stores the next lines in the "lines" object.
* @param {Buffer} buf The input buffer
* @param {Number} startOffset The start position in the buffer where we start reading
* @return {Number} The new position in the buffer after reading
*/
const readNextLines = (buf, startOffset) => {
let currentPosition = startOffset;
for (const locale of locales) {
lines[locale].push(
cleanString(buf.toString(encoding, currentPosition, currentPosition + lineLength))
);
currentPosition += lineLength;
}
return currentPosition;
}
/**
* Stores the next line in the "fileIds" array.
* @param {Buffer} buf The input buffer
* @param {Number} startOffset The start position in the buffer where we start reading
* @return {Number} The new position in the buffer after reading
*/
const readNextSpeaker = (buf, startOffset) => {
fileIds.push(
cleanString(buf.toString(encoding, startOffset, startOffset + speakerLength))
);
return startOffset + speakerLength;
};
const writeCsv = () => {
fs.mkdirSync("csv", {recursive: true});
// For each actor, we write a csv
for (let actorId of Object.keys(csvData)) {
const writeStream = fs.createWriteStream(`./csv/${actorId}.csv`, {encoding});
writeStream.write('"Sound file ID",English,Français,Italiano,Deutsche,Español\n');
for (let i = 0 ; i < csvData[actorId].fileId.length; i++) {
writeStream.write(csvData[actorId].fileId[i]+csvSeparator);
writeStream.write(escapeCsvString(csvData[actorId].en[i])+csvSeparator);
writeStream.write(escapeCsvString(csvData[actorId].fr[i])+csvSeparator);
writeStream.write(escapeCsvString(csvData[actorId].it[i])+csvSeparator);
writeStream.write(escapeCsvString(csvData[actorId].de[i])+csvSeparator);
writeStream.write(escapeCsvString(csvData[actorId].es[i])+"\n");
}
writeStream.close();
}
};
fs.readFile("speech.info", (err, input) => {
// We read the file and build the "lines" and "fileIds" collections
let position = 0;
while (position < input.length) {
position += lineStartOffset;
position = readNextLines(input, position);
position = readNextSpeaker(input, position);
}
// We make a new data structure to sort the lines by actor
const actors = [];
for (let [i, fileId] of fileIds.entries()) {
// Some lines don't have an actor. /shrug
const actorId = fileId.slice(0,3) || "___";
// If we have a new actor ID, we build a new sub-object
if (!actors.includes(actorId)) {
actors.push(actorId);
csvData[actorId] = {
"fileId": [],
"en": [],
"fr": [],
"it": [],
"de": [],
"es": []
};
}
csvData[actorId].fileId.push(fileId);
for (let locale of locales) {
csvData[actorId][locale].push(lines[locale][i]);
}
}
// We build the CSVs
writeCsv();
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment