strawberrymelonpanda/encode.cpp

## encode.cpp
#include <cstdio>
#include <fstream>
#include <string>
#include <vector>
#include <cstdint>
#include "json.hpp"

namespace {

struct Answers {
    std::vector<std::string> answers;
    std::vector<int> labels;

    void serialize(std::ostream& out) const {
        uint32_t n = answers.size();
        out.write((char *)&n, sizeof(n));
        for (auto& a : answers) {
            uint32_t m = a.size();
            out.write((char *)&m, sizeof(m));
            out.write(a.data(), m);
        }
        out.write((char *)labels.data(), labels.size() * sizeof(int));
    }

    bool deserialize(std::istream& in) {
        int n;
        in.read((char *)&n, sizeof(n));
        if (in.fail() || n < 0) {
            return false;
        }
        answers.resize(n);
        labels.resize(n);
        for (auto& a : answers) {
            uint32_t m;
            in.read((char *)&m, sizeof(m));
            a.resize(m);
            in.read((char *)a.data(), m);
        }
        in.read((char *)labels.data(), n * sizeof(int));
        return !in.fail();
    }

    void fromJson(const nlohmann::json& j) {
        for (auto& elem : j["answers"]) {
            answers.push_back(elem.get<std::string>());
        }
        for (auto& elem : j["labels"]) {
            labels.push_back(elem.get<int>());
        }
    }
};

struct MultiplChoice {
    std::string question;
    Answers singleCorrect;
    Answers multipleCorrect;

    void serialize(std::ostream& out) const {
        uint32_t n = question.size();
        out.write((char *)&n, sizeof(n));
        out.write(question.data(), n);
        singleCorrect.serialize(out);
        multipleCorrect.serialize(out);
    }

    bool deserialize(std::istream& in) {
        uint32_t n;
        in.read((char *)&n, sizeof(n));
        if (in.fail() || n < 0) {
            return false;
        }
        question.resize(n);
        in.read((char *)question.data(), n);
        return singleCorrect.deserialize(in) && multipleCorrect.deserialize(in);
    }

    void fromJson(const nlohmann::json& j) {
        question = j["question"].get<std::string>();
        singleCorrect.fromJson(j["single_correct"]);
        multipleCorrect.fromJson(j["multiple_correct"]);
    }
};

void serialize(std::ostream& out, const std::vector<MultiplChoice>& data) {
    uint32_t n = data.size();
    out.write((char *)&n, sizeof(n));
    if (data.empty()) return;
    std::vector<uint32_t> pos(data.size(), 0);
    out.write((char *)pos.data(), pos.size() * sizeof(pos[0]));
    int i = 0;
    for (auto& d : data) {
        pos[i++] = out.tellp();
        d.serialize(out);
    }
    out.seekp(sizeof(n), std::ios::beg);
    out.write((char *)pos.data(), pos.size() * sizeof(pos[0]));
}

void encode(const char* jsonFile, const char* binFile) {
    std::ifstream jsonIn(jsonFile);
    nlohmann::json jsonData;
    jsonIn >> jsonData;

    std::vector<MultiplChoice> data;
    for (auto& elem : jsonData) {
        MultiplChoice mc;
        mc.fromJson(elem);
        data.push_back(mc);
    }

    std::ofstream binOut(binFile, std::ios::binary);
    serialize(binOut, data);
}
}

int main(int argc, char **argv) {
    if (argc < 3) {
        printf("Usage: %s input.json output.bin\n", argv[0]);
        return 1;
    }

    encode(argv[1], argv[2]);

    return 0;
}

## tojson.py
import sys
import json
import random

# Usage: python tojson.py <input_file> <output_file>


# Adds a question with the correct answer index shuffled
def addQuestion(data, question, answers, labels):
    indices = list(range(len(answers)))
    random.shuffle(indices)

    shuffled_answers = [answers[i] for i in indices]
    shuffled_labels = [labels[i] for i in indices]

    data.append({
        "multiple_correct": {"answers": [], "labels": []},
        "question": f"Question: \"{question}\" Answer:",
        "single_correct": {"answers": shuffled_answers, "labels": shuffled_labels}
    })

# Adds a question once for each possible answer
# shuffles so that the correct index is used once for possible each position
def addMultipleQuestions(data, question, answers, labels):
    for i in range(len(answers)):
        correct_answer_index = labels.index(1)
        shuffled_indices = list(range(len(answers)))
        shuffled_indices.remove(correct_answer_index)
        random.shuffle(shuffled_indices)
        shuffled_indices.insert(i, correct_answer_index)

        shuffled_answers = [answers[j] for j in shuffled_indices]
        shuffled_labels = [labels[j] for j in shuffled_indices]

        data.append({
            "multiple_correct": {"answers": [], "labels": []},
            "question": f"Question: \"{question}\" Answer:",
            "single_correct": {"answers": shuffled_answers, "labels": shuffled_labels}
        })

# Expects the correct answer to be A1
def convert_to_json(file):
    with open(file, 'r', encoding="utf-8") as f:
        question = ""
        answers = []
        labels = []
        data = []
        for line in f:
            line = line.strip()
            if not line:

                addQuestion(data, question, answers, labels)
                question = ""
                answers = []
                labels = []
            elif line.startswith("Q:] "):
                question = line[4:].strip()
            elif line.startswith("A1:] "):
                answers.append(line[5:].strip())
                labels.append(1)
            else:
                answers.append(line[5:].strip())
                labels.append(0)

        # Pick up the last one
        addQuestion(data, question, answers, labels)
        return json.dumps(data, indent=4, ensure_ascii=False)

def write_file(filename, data):
    with open(filename, 'w') as f:
        f.write(data)

if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("Usage: python tojson.py <input_file> <output_file>")
        sys.exit(1)

    input_file = sys.argv[1]
    output_file = sys.argv[2]

    json_data = convert_to_json(input_file)
    write_file(output_file, json_data)
	#include <cstdio>
	#include <fstream>
	#include <string>
	#include <vector>
	#include <cstdint>
	#include "json.hpp"

	namespace {

	struct Answers {
	std::vector<std::string> answers;
	std::vector<int> labels;

	void serialize(std::ostream& out) const {
	uint32_t n = answers.size();
	out.write((char *)&n, sizeof(n));
	for (auto& a : answers) {
	uint32_t m = a.size();
	out.write((char *)&m, sizeof(m));
	out.write(a.data(), m);
	}
	out.write((char )labels.data(), labels.size() sizeof(int));
	}

	bool deserialize(std::istream& in) {
	int n;
	in.read((char *)&n, sizeof(n));
	if (in.fail() \|\| n < 0) {
	return false;
	}
	answers.resize(n);
	labels.resize(n);
	for (auto& a : answers) {
	uint32_t m;
	in.read((char *)&m, sizeof(m));
	a.resize(m);
	in.read((char *)a.data(), m);
	}
	in.read((char )labels.data(), n sizeof(int));
	return !in.fail();
	}

	void fromJson(const nlohmann::json& j) {
	for (auto& elem : j["answers"]) {
	answers.push_back(elem.get<std::string>());
	}
	for (auto& elem : j["labels"]) {
	labels.push_back(elem.get<int>());
	}
	}
	};

	struct MultiplChoice {
	std::string question;
	Answers singleCorrect;
	Answers multipleCorrect;

	void serialize(std::ostream& out) const {
	uint32_t n = question.size();
	out.write((char *)&n, sizeof(n));
	out.write(question.data(), n);
	singleCorrect.serialize(out);
	multipleCorrect.serialize(out);
	}

	bool deserialize(std::istream& in) {
	uint32_t n;
	in.read((char *)&n, sizeof(n));
	if (in.fail() \|\| n < 0) {
	return false;
	}
	question.resize(n);
	in.read((char *)question.data(), n);
	return singleCorrect.deserialize(in) && multipleCorrect.deserialize(in);
	}

	void fromJson(const nlohmann::json& j) {
	question = j["question"].get<std::string>();
	singleCorrect.fromJson(j["single_correct"]);
	multipleCorrect.fromJson(j["multiple_correct"]);
	}
	};

	void serialize(std::ostream& out, const std::vector<MultiplChoice>& data) {
	uint32_t n = data.size();
	out.write((char *)&n, sizeof(n));
	if (data.empty()) return;
	std::vector<uint32_t> pos(data.size(), 0);
	out.write((char )pos.data(), pos.size() sizeof(pos[0]));
	int i = 0;
	for (auto& d : data) {
	pos[i++] = out.tellp();
	d.serialize(out);
	}
	out.seekp(sizeof(n), std::ios::beg);
	out.write((char )pos.data(), pos.size() sizeof(pos[0]));
	}

	void encode(const char* jsonFile, const char* binFile) {
	std::ifstream jsonIn(jsonFile);
	nlohmann::json jsonData;
	jsonIn >> jsonData;

	std::vector<MultiplChoice> data;
	for (auto& elem : jsonData) {
	MultiplChoice mc;
	mc.fromJson(elem);
	data.push_back(mc);
	}

	std::ofstream binOut(binFile, std::ios::binary);
	serialize(binOut, data);
	}
	}

	int main(int argc, char **argv) {
	if (argc < 3) {
	printf("Usage: %s input.json output.bin\n", argv[0]);
	return 1;
	}

	encode(argv[1], argv[2]);

	return 0;
	}
	import sys
	import json
	import random

	# Usage: python tojson.py <input_file> <output_file>


	# Adds a question with the correct answer index shuffled
	def addQuestion(data, question, answers, labels):
	indices = list(range(len(answers)))
	random.shuffle(indices)

	shuffled_answers = [answers[i] for i in indices]
	shuffled_labels = [labels[i] for i in indices]

	data.append({
	"multiple_correct": {"answers": [], "labels": []},
	"question": f"Question: \"{question}\" Answer:",
	"single_correct": {"answers": shuffled_answers, "labels": shuffled_labels}
	})

	# Adds a question once for each possible answer
	# shuffles so that the correct index is used once for possible each position
	def addMultipleQuestions(data, question, answers, labels):
	for i in range(len(answers)):
	correct_answer_index = labels.index(1)
	shuffled_indices = list(range(len(answers)))
	shuffled_indices.remove(correct_answer_index)
	random.shuffle(shuffled_indices)
	shuffled_indices.insert(i, correct_answer_index)

	shuffled_answers = [answers[j] for j in shuffled_indices]
	shuffled_labels = [labels[j] for j in shuffled_indices]

	data.append({
	"multiple_correct": {"answers": [], "labels": []},
	"question": f"Question: \"{question}\" Answer:",
	"single_correct": {"answers": shuffled_answers, "labels": shuffled_labels}
	})

	# Expects the correct answer to be A1
	def convert_to_json(file):
	with open(file, 'r', encoding="utf-8") as f:
	question = ""
	answers = []
	labels = []
	data = []
	for line in f:
	line = line.strip()
	if not line:

	addQuestion(data, question, answers, labels)
	question = ""
	answers = []
	labels = []
	elif line.startswith("Q:] "):
	question = line[4:].strip()
	elif line.startswith("A1:] "):
	answers.append(line[5:].strip())
	labels.append(1)
	else:
	answers.append(line[5:].strip())
	labels.append(0)

	# Pick up the last one
	addQuestion(data, question, answers, labels)
	return json.dumps(data, indent=4, ensure_ascii=False)

	def write_file(filename, data):
	with open(filename, 'w') as f:
	f.write(data)

	if __name__ == "__main__":
	if len(sys.argv) != 3:
	print("Usage: python tojson.py <input_file> <output_file>")
	sys.exit(1)

	input_file = sys.argv[1]
	output_file = sys.argv[2]

	json_data = convert_to_json(input_file)
	write_file(output_file, json_data)