Skip to content

Instantly share code, notes, and snippets.

@CalmBit
Created December 2, 2017 04:32
Show Gist options
  • Save CalmBit/8bf05af7c825cd623c6eebb634483675 to your computer and use it in GitHub Desktop.
Save CalmBit/8bf05af7c825cd623c6eebb634483675 to your computer and use it in GitHub Desktop.
#include <fstream>
#include <sstream>
#include <iostream>
#include <vector>
#include <string>
#include <map>
const wchar_t CODEPAGE[256] =
{
' ', 9786, 9787, 9829, 9830, 9827, 9824, 8226, 9688, 9675, 9689, 9794, 9792, 9834, 9835, 9788,
9658, 9668, 8597, 8252, 182, 167, 9644, 8616, 8593, 8595, 8594, 8592, 8735, 8596, 9650, 9660,
' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',','-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?',
'@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']','^', '_',
'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', 8962,
199, 252, 233, 226, 228, 224, 229, 231, 234, 235, 232, 239, 238, 236, 196, 197,
201, 230, 198, 244, 246, 242, 251, 249, 255, 214, 220, 162, 163, 165, 8359, 402,
225, 237, 243, 250, 241, 209, 170, 186, 191, 8976, 172, 189, 188, 161, 171, 187,
9617, 9618, 9619, 9474, 9508, 9569, 9570, 9558, 9557, 9571, 9553, 9559, 9565, 9564, 9563, 9488,
9492, 9524, 9516, 9500, 9472, 9532, 9566, 9567, 9562, 9556, 9577, 9574, 9568, 9552, 9580, 9575,
9576, 9572, 9573, 9561, 9560, 9554, 9555, 9579, 9578, 9496, 9484, 9608, 9604, 9612, 9616, 9600,
945, 223, 915, 960, 931, 963, 181, 964, 934, 920, 937, 948, 8734, 966, 949, 8745,
8801, 177, 8805, 8804, 8992, 8993, 247, 8776, 176, 8729, 183, 8730, 8319, 178, 9632, ' '
};
int getShortFromCharArray(char arr[]) {
return static_cast<unsigned char>(arr[0]) + (static_cast<unsigned char>(arr[1]) << 8);
}
int getIntgrFromCharArray(char arr[]) {
return static_cast<unsigned char>(arr[0]) + (static_cast<unsigned char>(arr[1]) << 8)
+ (static_cast<unsigned char>(arr[2]) << 16) + (static_cast<unsigned char>(arr[3]) << 24);
}
int main(int argc, char* argv[]) {
// Seriously, fuck C/C++ and their inability to even come close to
// modern fucking character encoding.
std::locale::global(std::locale(""));
if(argc < 2) {
std::cout << "Usage: " << argv[0] << " <world.dat>" << std::endl;
return 0;
}
std::ifstream datFile{argv[1], std::ifstream::in|std::ifstream::binary};
if(!datFile.good()) {
std::cerr << "Error loading file " << argv[1] << std::endl;
return -1;
}
datFile.seekg(0xE8);
// (final testing offset is 0x13D0D)
std::vector<std::vector<std::wstring>> bufferList{};
while(datFile.tellg() < 0x130D0D) {
char metaBuffer[4] = {};
datFile.read(metaBuffer, 4);
int numLists = getIntgrFromCharArray(metaBuffer);
if(numLists == 0)
continue;
for(auto l = 0;l < numLists;l++) {
char szBuffer[4] = {};
datFile.read(szBuffer, 4);
int listSize = getIntgrFromCharArray(szBuffer);
if(listSize == 0) {
datFile.seekg(4, std::ifstream::cur);
continue;
}
std::vector<std::wstring> stringList{};
for(auto i = 0;i < listSize;i++) {
char strSzBuffer[2] = {};
datFile.read(strSzBuffer, 2);
int strSize = getShortFromCharArray(strSzBuffer);
char *strBuffer = new char[strSize+1];
datFile.read(strBuffer, strSize);
strBuffer[strSize] = 0x00;
std::wstringstream finalStr{};
for(auto c = 0;c < strSize;c++) {
finalStr << CODEPAGE[static_cast<unsigned char>(strBuffer[c])];
}
stringList.push_back(finalStr.str());
delete[](strBuffer);
}
bufferList.push_back(stringList);
}
}
std::wcout << bufferList.at(0).at(0) << std::endl;
datFile.close();
std::map<std::wstring, int> categories{};
for(unsigned int i = 0;i < bufferList.size();i++) {
std::wstring title = bufferList.at(i).at(0);
std::stringstream realTitle{};
for(auto i =0;i < title.size();i+=1) {
realTitle << static_cast<char>(title.c_str()[i]);
}
if(categories.find(title) == categories.end()) {
categories.insert(std::pair<std::wstring, int>{title, 0});
}
std::stringstream filename{};
filename << realTitle.str() << "_" << categories.at(title)++ << ".txt";
std::wofstream outFile{filename.str()};
for(auto j = bufferList.at(i).begin();j != bufferList.at(i).end();j++) {
outFile << *j << std::endl;
}
outFile.close();
}
std::cout << "Finished processing raw files." << std::endl;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment