bathtime/Latin Dictionary

## Latin Dictionary
// This program reads an XML dictionary file and prints a formatted result in
// terminal.
//
// NOTE: The required XML dictionary (76mb) will be downloaded to this
//       machine if it is not found!
//
//
//              *******************************************
//              *                                         *
//              *      A  VERY  SPECIAL  THANK  YOU !     *
//              *                                         *
//              *******************************************
//
//
//  Perseus Digital Library:  http://www.perseus.tufts.edu/hopper/
//    - Created the XML dictionary file.
//
//  Perseus Github: https://github.com/PerseusDL
//    - For supplying the XML dictionary file and sed commands that made
//      adding the proper characters so much easier.
//
//  The Latin Forum: http://latindiscussion.com/forum/
//    - For putting up with all my questions pertaining to Latin (and even
//      helping with some script commands!)
//
//  Dream In Code: http://www.dreamincode.net/forums/index
//    - For all their contributions about how to improve this program.
//
//  Unix and Linux Forums: https://www.unix.com/
//    - For being so helpful when I was just beginning programming.
//
// Geeks for Geeks: https://www.geeksforgeeks.org/b-tree-set-1-insert-2/
//    - The B-Tree code which sped up indexing enormously.
//
//
// The goals of this project:
//
//	1. < 300 lines code
//	2. Fast & efficient execution.
//	3. Simple & elegant coding
//
//		"Do one thing,
//		 and do it well."
//
//		—Linux Credo
//
// Compile with:
// $ g++ -O3 -Wall la.cpp -o la
//
// Run with:
// $ la amo tam sic
//
// (where 'amo', 'tam', and 'sic' are the words to be searched)
//
// Edit with:
// $ nano -\$cglmS la.cpp
//
// Benchmark testing:
// run='la ad'; l=1000; i=0;time while [ $i -le $l ];do $run;i=$(($i+1));done
//
// My github (feel free to suggest, edit, fork, contribute!):
// https://gist.github.com/bathtime/5581898da581d4b0f3bd9d0a1794ec15
//

#include<iostream>
#include<fstream>
#include<string>

using namespace std;


// A BTree node
class BTreeNode
{
	string *keys; // An array of keys
	int *offsets;

	int t;	 // Minimum degree (defines the range for number of keys)
	BTreeNode **C; // An array of child pointers
	int n;	 // Current number of keys
	bool leaf; // Is true when node is leaf. Otherwise false
public:
	BTreeNode(int _t, bool _leaf); // Constructor

	// A utility function to insert a new key in the subtree rooted with
	// this node. The assumption is, the node must be non-full when this
	// function is called
	void insertNonFull(string k, int off);

	// A utility function to split the child y of this node. i is index of y in
	// child array C[]. The Child y must be full when this function is called
	void splitChild(int i, BTreeNode *y);

	// A function to traverse all nodes in a subtree rooted with this node
	void traverse();

	// A function to search a key in subtree rooted with this node.
	BTreeNode *search(string k, int &offset);

// Make BTree friend of this so that we can access private members of this
// class in BTree functions
friend class BTree;
};

// A BTree
class BTree
{
	BTreeNode *root; // Pointer to root node
	int t; // Minimum degree
public:
	// Constructor (Initializes tree as empty)
	BTree(int _t)
	{ root = NULL; t = _t; }

	// function to traverse the tree
	void traverse()
	{ if (root != NULL) root->traverse(); }

	// function to search a key in this tree
        BTreeNode* search(string k, int &offset)
        { return (root == NULL)? NULL : root->search(k, offset); }

	// The main function that inserts a new key in this B-Tree
	void insert(string k, int off);
};

// Constructor for BTreeNode class
BTreeNode::BTreeNode(int t1, bool leaf1)
{
	// Copy the given minimum degree and leaf property
	t = t1;
	leaf = leaf1;

	// Allocate memory for maximum number of possible keys
	// and child pointers
	keys	= new string[2 * t - 1];
	offsets = new int[2 * t - 1];
	C	= new BTreeNode *[2 * t];

	// Initialize the number of keys as 0
	n = 0;
}

// Function to traverse all nodes in a subtree rooted with this node
void BTreeNode::traverse()
{
	// There are n keys and n+1 children, travers through n keys
	// and first n children
	int i;
	for (i = 0; i < n; i++)
	{
		// If this is not leaf, then before printing key[i],
		// traverse the subtree rooted with child C[i].
		if (leaf == false)
			C[i]->traverse();
		cout << "Key name: " << keys[i] << "        Key offset: " << offsets[i] << std::endl;
	}

	// Print the subtree rooted with last child
	if (leaf == false)
		C[i]->traverse();
}

BTreeNode *BTreeNode::search(string k, int &offset)
{
        // Find the first key greater than or equal to k
        int i = 0;
        while (i < n && k > keys[i])
                i++;

        // If the found key is equal to k, return this node
        if (keys[i] == k)
	{
		offset  = offsets[i];

                return this;
	}

        // If key is not found here and this is a leaf node
        if (leaf == true)
                return NULL;

        // Go to the appropriate child
        return C[i]->search(k, offset);
}

// The main function that inserts a new key in this B-Tree
void BTree::insert(string k, int off)
{
	// If tree is empty
	if (root == NULL)
	{
		// Allocate memory for root
		root = new BTreeNode(t, true);
		root->keys[0]	 = k;	// Insert key
		root->offsets[0] = off; // Insert offset
		root->n = 1; // Update number of keys in root
	}
	else // If tree is not empty
	{
		// If root is full, then tree grows in height
		if (root->n == 2 * t - 1)
		{
			// Allocate memory for new root
			BTreeNode *s = new BTreeNode(t, false);

			// Make old root as child of new root
			s->C[0] = root;

			// Split the old root and move 1 key to the new root
			s->splitChild(0, root);

			// New root has two children now. Decide which of the
			// two children is going to have new key
			int i = 0;
			if (s->keys[0] < k)
				i++;
			s->C[i]->insertNonFull(k, off);

			// Change root
			root = s;
		}
		else // If root is not full, call insertNonFull for root
			root->insertNonFull(k, off);
	}
}

// A utility function to insert a new key in this node
// The assumption is, the node must be non-full when this
// function is called
void BTreeNode::insertNonFull(string k, int off)
{
	// Initialize index as index of rightmost element
	int i = n - 1;

	// If this is a leaf node
	if (leaf == true)
	{
		// The following loop does two things
		// a) Finds the location of new key to be inserted
		// b) Moves all greater keys to one place ahead
		while (i >= 0 && keys[i] > k)
		{
			keys[i + 1]	= keys[i];
			offsets[i + 1]	= offsets[i];
			i--;
		}

		// Insert the new key at found location
		keys[i + 1]	= k;
		offsets[i + 1]	= off;
		n = n + 1;
	}
	else // If this node is not leaf
	{
		// Find the child which is going to have the new key
		while (i >= 0 && keys[i] > k)
			i--;

		// See if the found child is full
		if (C[i+1]->n == 2 * t - 1)
		{
			// If the child is full, then split it
			splitChild(i + 1, C[i + 1]);

			// After split, the middle key of C[i] goes up and
			// C[i] is splitted into two. See which of the two
			// is going to have the new key
			if (keys[i + 1] < k)
				i++;
		}
		C[i + 1]->insertNonFull(k, off);
	}
}

// A utility function to split the child y of this node
// Note that y must be full when this function is called
void BTreeNode::splitChild(int i, BTreeNode *y)
{
	// Create a new node which is going to store (t-1) keys
	// of y
	BTreeNode *z = new BTreeNode(y->t, y->leaf);
	z->n = t - 1;

	// Copy the last (t-1) keys of y to z
	for (int j = 0; j < t - 1; j++)
	{
		z->keys[j]	= y->keys[j + t];
		z->offsets[j]	= y->offsets[j + t];
	}

	// Copy the last t children of y to z
	if (y->leaf == false)
	{
		for (int j = 0; j < t; j++)
			z->C[j] = y->C[j + t];
	}

	// Reduce the number of keys in y
	y->n = t - 1;

	// Since this node is going to have a new child,
	// create space of new child
	for (int j = n; j >= i + 1; j--)
		C[j + 1] = C[j];

	// Link the new child to this node
	C[i + 1] = z;

	// A key of y will move to this node. Find location of
	// new key and move all greater keys one space ahead
	for (int j = n-1; j >= i; j--)
	{
		keys[j + 1]	= keys[j];
		offsets[j + 1]	= offsets[j];
	}
	// Copy the middle key of y to this node
	keys[i]		= y->keys[t - 1];
	offsets[i]	= y->offsets[t - 1];

	// Increment count of keys in this node
	n = n + 1;
}

bool findAndReplaceAllBetween(string &trueText, const std::string &strBegin, const std::string &strEnd, const std::string &strReplace)
{

	std::size_t beginFound;
	std::size_t endFound;
        std::size_t lastPos = 0;

	do {
		// Try to find string, but always start at last position set
		beginFound = trueText.find(strBegin, lastPos);

		if (beginFound != std::string::npos)	// Check for the first part
		{
			endFound = trueText.find(strEnd, beginFound + strBegin.length());

        	        if (endFound != std::string::npos)	// Search for second part
			{
				if (strReplace.length() > strBegin.length() + strEnd.length() && !lastPos)
				{
					lastPos =  strReplace.length();

					continue;

				}else if (strReplace.length() > strBegin.length() + strEnd.length() && lastPos)
				{
					lastPos = beginFound + strReplace.length();
					trueText.replace(beginFound, strEnd.length() + endFound - beginFound, strReplace);

					continue;

				}else
				{
					lastPos = 0;
					trueText.replace(beginFound, strEnd.length() + endFound - beginFound, strReplace);

					continue;
				}
			}
		}

		return true;

	} while(1);

}

bool createIndexFile (std::fstream &XMLFile, const std::string offsetFileName)
{

	system("beep");

	std::ofstream offsetFile(offsetFileName);

	XMLFile.clear();

	std::string line;
	const std::string key	= "key=\"";
	std::size_t keyPos1	= 0;
	std::size_t keyPos2;

	while(XMLFile)
	{

		while (getline (XMLFile,line))
		{
			keyPos1 = line.find(key);
			if (keyPos1 != std::string::npos)
				break;
		}

		keyPos2 = line.find("\"", keyPos1 + key.length() + 1);

		if (keyPos2 != std::string::npos)
		{

			line.erase(0, keyPos1 + 5);
			line.erase(keyPos2 - keyPos1 - key.length(), string::npos);

			for (char &c : line)
				if(c <= 'Z' && c >= 'A')
					c -= ('Z'-'z');

			offsetFile << line << "," << XMLFile.tellg() << "\n";
		}
	}

//	offsetFile.close();

	return 0;
}

std::string prepareXML(std::string &sText)
{
	struct Replacement {
		const char * Start;
		const char * End;
		const char * NewText;
	};

	// Linux escape sequences for colour. Windows has this feature too. :)
	const Replacement replacements[] = {
	{ "\r",	 "",   "" },				// Remove DOS formatting

	{ "<entryFree",    "key",  "\n\033[1;36m[key"                 },
	{ "\"><orth",	   "",	   "\"]\033[0m<orth"                  },
	{ "</entryFree>",  "",     "\n\n\033[1;31m[end key]\033[0m\n" },

	{ "<orth",      ">",  "\n\n\033[1;33m" },	// The word and its roots
	{ "</orth",     ">",  "\033[0m"        },
	{ "<itype>",	"",   "\033[1;33m"  },		// Roots
	{ "</itype>",   "",   "\033[0m"     },
	{ "<pos",       ">",  "\033[1;36m"  },		// Grammatical term
	{ "</pos",      ">",  "\033[0m"     },
	{ "<gen",       ">",  "\033[0;36m"  },		// Gender
	{ "</gen",      ">",  ".\033[0m"    },
	{ "<etym",      ">",  "\033[0;33m[" },		// Etymology
	{ "</etym",     ">",  "]\033[0m"    },
	{ "<tr opt=",   ">",  "\033[0;35m"  },		// Translation
	{ "<tr>",       "",   "\033[0;35m"  },		// "
	{ "</tr>",      "",   "\033[0m"     },
	{ "<usg opt=",  ">",  "\033[0;33m"  },		// How used
	{ "</usg",      ">",  "\033[0m"     },
	{ "<foreign",   ">",  "\033[1;32m"  },		// Language other than Latin/English
	{ "</foreign",  ">",  "\033[0m"     },
	{ "<hi",	">",  "\033[1;32m"  },		// Grammatical term
	{ "</hi>",	"",   "\033[0m"     },
	{ "<quote",     ">",  "\033[1;35m"  },		// Latin quote
	{ "</quote>",	"",   "\033[0m"     },
	{ "<author>",	"",   "\033[1;31m"  },		// Author of quote
	{ "</author>",  "",   "\033[0m"     },
	{ "<bibl",      ">",  "\033[0;32m"  },		// Book reference paragraph/pages
	{ "</bibl>",    "",   "\033[0m"     },

	{ "<",     ">",	""  },				// Break down all remaining tags

	{ "&Agrave;",  "",   "À" }, { "&Egrave;",  "",   "È" }, { "&Igrave;",  "",   "Ì" },
	{ "&Ograve;",  "",   "Ò" }, { "&Ugrave;",  "",   "Ù" }, { "&Ygrave;",  "",   "Ỳ" },
	{ "&agrave;",  "",   "à" }, { "&egrave;",  "",   "è" }, { "&igrave;",  "",   "ì" },
	{ "&ograve;",  "",   "ò" }, { "&ugrave;",  "",   "ù" }, { "&ygrave;",  "",   "ỳ" },
	{ "&Aacute;",  "",   "Á" }, { "&Eacute;",  "",   "É" }, { "&Iacute;",  "",   "Í" },
	{ "&Oacute;",  "",   "Ó" }, { "&Uacute;",  "",   "Ú" }, { "&Yacute;",  "",   "Ý" },
	{ "&aacute;",  "",   "á" }, { "&eacute;",  "",   "é" }, { "&iacute;",  "",   "í" },
	{ "&oacute;",  "",   "ó" }, { "&uacute;",  "",   "ú" }, { "&yacute;",  "",   "ý" },
	{ "&cacute;",  "",   "ć" }, { "&gacute;",  "",   "ǵ" }, { "&sacute;",  "",   "ś" },
	{ "&imacracute;","", "í" }, { "&imacrbreve;","", "ĭ" }, { "&uml;",     "",   "ü" },
	{ "&Abreve;",  "",   "A" }, { "&Ebreve;",  "",   "E" }, { "&Ibreve;",  "",   "I" },
	{ "&Obreve;",  "",   "Ŏ" }, { "&Ubreve;",  "",   "Ŭ" }, { "&Ybreve;",  "",   "Y" },
	{ "&abreve;",  "",   "ă" }, { "&ebreve;",  "",   "ĕ" }, { "&ibreve;",  "",   "ĭ" },
	{ "&obreve;",  "",   "ŏ" }, { "&ubreve;",  "",   "ŭ" }, { "&ybreve;",  "",   "y" },
	{ "&Amacr;",   "",   "Ā" }, { "&Emacr;",  "",   "Ē"  }, { "&Imacr;",  "",   "Ī" },
	{ "&Omacr;",   "",   "Ō" }, { "&Umacr;",  "",   "Ū"  }, { "&Ymacr;",  "",   "Ȳ" },
	{ "&amacr;",   "",   "ā" }, { "&emacr;",  "",   "ē"  }, { "&imacr;",  "",   "ī" },
	{ "&omacr;",   "",   "ō" }, { "&umacr;",  "",   "ū"  }, { "&ymacr;",  "",   "ȳ" },
	{ "&Acirc;",   "",   "Â" }, { "&Ecirc;",  "",   "Ê"  }, { "&Icirc;",  "",   "Î" },
	{ "&Ocirc;",   "",   "Ô" }, { "&Ucirc;",  "",   "Û"  }, { "&Ycirc;",  "",   "Ŷ" },
	{ "&acirc;",   "",   "â" }, { "&ecirc;",  "",   "ê"  }, { "&icirc;",  "",   "î" },
	{ "&ocirc;",   "",   "ô" }, { "&ucirc;",  "",   "û"  }, { "&ycirc;",  "",   "ŷ" },
	{ "&Aring;",   "",   "Å" }, { "&Ering;",  "",   "E̊"  }, { "&Oring;",  "",   "O̊" },
	{ "&Uring;",   "",   "Ů" }, { "&Yring;",  "",   "Y̊"  },	{ "Le&acute;","","Lérin"},
	{ "&aring;",   "",   "å" }, { "&ering;",  "",   "e̊"  }, { "&oring;",  "",   "o̊" },
	{ "&uring;",   "",   "ů" }, { "&yring;",  "",   "ẙ"  }, { "&Auml;",   "",   "Ä" },
	{ "&Euml;",    "",   "Ë" }, { "&Iuml;",  "",   "Ï"   },	{ "c&racute;","",  "cr" },
	{ "&Ouml;",    "",   "Ö" }, { "&Uuml;",  "",   "Ü"   }, { "&Yuml;",   "",   "Ÿ" },
	{ "&auml;",    "",   "ä" }, { "&euml;",  "",   "ë"   }, { "&iuml;",   "",   "ï" },
	{ "&ouml;",    "",   "ö" }, { "&uuml;",  "",   "ü"   }, { "&yuml;",   "",   "ÿ" },
	{ "&Dagger;",  "",   "‡" }, { "&dagger;",  "",   "†" }, { "&sect;",   "",   "§" },
	{ "&ast;",     "",   "*" }, { "&OElig;",  "",   "Oe" }, { "&oelig;",  "",  "oe" },
	{ "&AElig;",   "",   "Ae"}, { "&aelig;",  "",   "ae" }, { "&rdquo;",  "",   "˝" },
	{ "&ldquo;",   "",   " ̏" }, { "&rsquo;",  "",   "´"  }, { "&lsquo;",  "",   "`" },
	{ "&lpar;",    "",   "(" }, { "&rpar;",  "",   ")"   }, { "&adot;",   "",   "ạ" },
	{ "&larr;",    "",   "←" }, { "&macr;",  "",   "¯"   }, { "&breve;",  "",   "˘" },
	{ "&ccedil;",  "",   "ç" }, { "&etilde;",  "",   "ẽ" }, { "&ntilde;", "",   "ñ" },
	{ "&pound;",   "",   "£" }, { "&dollar;",  "",   "$" }, { "&brevemacr;","",  "" },
	{ "&dibreve;", "",   ""  }, { "&cbreve;",  "",   "č" }, { "&lacute;", "",   "l" },
	{ "&qacute;",  "",   "q" }, { "&macracute;",  "",   "¯" },
	{ "&mdash;",   "",   "\n\n • " },

	{ "  ",  "", " " }, { " .",  "", "." }, { " ,",  "", "," }, { " )",  "", ")" },
	{ " :",  "", ":" }, { " ;",  "", ";" }, { " ’",  "", "’" }, { "§ ",  "", "§" },
	{ "‘ ",  "", "‘" }, { "( ",  "", "(" }, { "... ","", "..."},{ "\" n=\"", "\"", "\"" }
	};

	for (auto replace : replacements)
		findAndReplaceAllBetween(sText, replace.Start, replace.End, replace.NewText);

	return sText;
}

void getXMLfileKey (std::fstream &XMLDictFile, int &offsetTmp)
{

	std::string line;

	XMLDictFile.clear();
	XMLDictFile.seekg(offsetTmp, std::ios::beg);

	while (getline (XMLDictFile,line) && line.find("[end key]") ==  std::string::npos)
	{
		std::cout << line + '\n';
	}

}

bool setupXMLDict(const std::string &origXMLfileName, const std::string &newXMLfileName, const std::string &offsetFileName)
{

	std::cout << "\nDictionary not found. Downloading...\n\n";

	FILE * f = popen("wget https://github.com/PerseusDL/lexica/raw/master/CTS_XML_TEI/perseus/pdllex/lat/ls/lat.ls.perseus-eng1.xml; rm -f latDictIndex.txt", "r");

	if (f == 0)
	{
		std::cerr << "\n\nCould not download file. Exiting.\n\n";

		return false;
	}

	char buf[1024];

	while(fgets(buf, 1024,  f))
		std::cout << buf;

	pclose(f);

	std::cout << "Rendering XML file. Please wait...\n\n";

	fstream XMLDictFile;
	XMLDictFile.open(origXMLfileName);		// Program expects it is open
	std::ofstream XMLDictSave(newXMLfileName);

	std::string line;
	std::string keyStr = "key=\"";			// Catch every single key

	while (getline (XMLDictFile,line))
		if (line.find(keyStr) != std::string::npos)
			do
				XMLDictSave << prepareXML(line) + '\n';
			while (getline (XMLDictFile,line));

	XMLDictSave.close();	// Won't need it anymore; but XMLDict will use.

	return true;
}

bool loadXMLoffsets(std::fstream &offsetFile, BTree& offvals)
{
	offsetFile.clear();

	std::string keyName;
	std::string keyVal;
	unsigned short offsetNum = 0;

	while(getline(offsetFile, keyName, ','))	// Delimited by a comma
	{
		getline(offsetFile, keyVal, '\n');	// Delimited by newline
		offvals.insert(keyName, stoi(keyVal));
		offsetNum++;
	}

	if (!offsetNum)
	{
		std::cerr << "\nCould not load offsets. Exiting.\n" << std::endl;
		return false;
	}

	return true;
}

int main(int argc, char* argv[])
{

	BTree t(10);

	int offset = 0;

	const std::string origXMLfileName       = "lat.ls.perseus-eng1.xml";
	const std::string XMLfileName           = "lat.ls.perseus-eng2.xml";
	const std::string offsetFileName        = "latDictIndex.txt";

	fstream XMLDictFile(XMLfileName);
	fstream offsetFile(offsetFileName);

	if (!XMLDictFile && !setupXMLDict(origXMLfileName, XMLfileName, offsetFileName))
		return 1;

	XMLDictFile.open(XMLfileName);

	if (!offsetFile)
		createIndexFile(XMLDictFile, offsetFileName);

	offsetFile.open(offsetFileName);

	if (!loadXMLoffsets(offsetFile, t))
		return 1;

	for(short keyNum = 1; keyNum < argc; keyNum++)
	{
		t.search(argv[keyNum], offset);
		getXMLfileKey(XMLDictFile, offset);
	}

//	t.traverse();

	XMLDictFile.close();
	offsetFile.close();

	return 0;
}
	// This program reads an XML dictionary file and prints a formatted result in
	// terminal.
	//
	// NOTE: The required XML dictionary (76mb) will be downloaded to this
	// machine if it is not found!
	//
	//
	// *******************************************
	// * *
	// * A VERY SPECIAL THANK YOU ! *
	// * *
	// *******************************************
	//
	//
	// Perseus Digital Library: http://www.perseus.tufts.edu/hopper/
	// - Created the XML dictionary file.
	//
	// Perseus Github: https://github.com/PerseusDL
	// - For supplying the XML dictionary file and sed commands that made
	// adding the proper characters so much easier.
	//
	// The Latin Forum: http://latindiscussion.com/forum/
	// - For putting up with all my questions pertaining to Latin (and even
	// helping with some script commands!)
	//
	// Dream In Code: http://www.dreamincode.net/forums/index
	// - For all their contributions about how to improve this program.
	//
	// Unix and Linux Forums: https://www.unix.com/
	// - For being so helpful when I was just beginning programming.
	//
	// Geeks for Geeks: https://www.geeksforgeeks.org/b-tree-set-1-insert-2/
	// - The B-Tree code which sped up indexing enormously.
	//
	//
	// The goals of this project:
	//
	// 1. < 300 lines code
	// 2. Fast & efficient execution.
	// 3. Simple & elegant coding
	//
	// "Do one thing,
	// and do it well."
	//
	// —Linux Credo
	//
	// Compile with:
	// $ g++ -O3 -Wall la.cpp -o la
	//
	// Run with:
	// $ la amo tam sic
	//
	// (where 'amo', 'tam', and 'sic' are the words to be searched)
	//
	// Edit with:
	// $ nano -\$cglmS la.cpp
	//
	// Benchmark testing:
	// run='la ad'; l=1000; i=0;time while [ $i -le $l ];do $run;i=$(($i+1));done
	//
	// My github (feel free to suggest, edit, fork, contribute!):
	// https://gist.github.com/bathtime/5581898da581d4b0f3bd9d0a1794ec15
	//

	#include<iostream>
	#include<fstream>
	#include<string>

	using namespace std;


	// A BTree node
	class BTreeNode
	{
	string *keys; // An array of keys
	int *offsets;

	int t; // Minimum degree (defines the range for number of keys)
	BTreeNode **C; // An array of child pointers
	int n; // Current number of keys
	bool leaf; // Is true when node is leaf. Otherwise false
	public:
	BTreeNode(int _t, bool _leaf); // Constructor

	// A utility function to insert a new key in the subtree rooted with
	// this node. The assumption is, the node must be non-full when this
	// function is called
	void insertNonFull(string k, int off);

	// A utility function to split the child y of this node. i is index of y in
	// child array C[]. The Child y must be full when this function is called
	void splitChild(int i, BTreeNode *y);

	// A function to traverse all nodes in a subtree rooted with this node
	void traverse();

	// A function to search a key in subtree rooted with this node.
	BTreeNode *search(string k, int &offset);

	// Make BTree friend of this so that we can access private members of this
	// class in BTree functions
	friend class BTree;
	};

	// A BTree
	class BTree
	{
	BTreeNode *root; // Pointer to root node
	int t; // Minimum degree
	public:
	// Constructor (Initializes tree as empty)
	BTree(int _t)
	{ root = NULL; t = _t; }

	// function to traverse the tree
	void traverse()
	{ if (root != NULL) root->traverse(); }

	// function to search a key in this tree
	BTreeNode* search(string k, int &offset)
	{ return (root == NULL)? NULL : root->search(k, offset); }

	// The main function that inserts a new key in this B-Tree
	void insert(string k, int off);
	};

	// Constructor for BTreeNode class
	BTreeNode::BTreeNode(int t1, bool leaf1)
	{
	// Copy the given minimum degree and leaf property
	t = t1;
	leaf = leaf1;

	// Allocate memory for maximum number of possible keys
	// and child pointers
	keys = new string[2 * t - 1];
	offsets = new int[2 * t - 1];
	C = new BTreeNode [2 t];

	// Initialize the number of keys as 0
	n = 0;
	}

	// Function to traverse all nodes in a subtree rooted with this node
	void BTreeNode::traverse()
	{
	// There are n keys and n+1 children, travers through n keys
	// and first n children
	int i;
	for (i = 0; i < n; i++)
	{
	// If this is not leaf, then before printing key[i],
	// traverse the subtree rooted with child C[i].
	if (leaf == false)
	C[i]->traverse();
	cout << "Key name: " << keys[i] << " Key offset: " << offsets[i] << std::endl;
	}

	// Print the subtree rooted with last child
	if (leaf == false)
	C[i]->traverse();
	}

	BTreeNode *BTreeNode::search(string k, int &offset)
	{
	// Find the first key greater than or equal to k
	int i = 0;
	while (i < n && k > keys[i])
	i++;

	// If the found key is equal to k, return this node
	if (keys[i] == k)
	{
	offset = offsets[i];

	return this;
	}

	// If key is not found here and this is a leaf node
	if (leaf == true)
	return NULL;

	// Go to the appropriate child
	return C[i]->search(k, offset);
	}

	// The main function that inserts a new key in this B-Tree
	void BTree::insert(string k, int off)
	{
	// If tree is empty
	if (root == NULL)
	{
	// Allocate memory for root
	root = new BTreeNode(t, true);
	root->keys[0] = k; // Insert key
	root->offsets[0] = off; // Insert offset
	root->n = 1; // Update number of keys in root
	}
	else // If tree is not empty
	{
	// If root is full, then tree grows in height
	if (root->n == 2 * t - 1)
	{
	// Allocate memory for new root
	BTreeNode *s = new BTreeNode(t, false);

	// Make old root as child of new root
	s->C[0] = root;

	// Split the old root and move 1 key to the new root
	s->splitChild(0, root);

	// New root has two children now. Decide which of the
	// two children is going to have new key
	int i = 0;
	if (s->keys[0] < k)
	i++;
	s->C[i]->insertNonFull(k, off);

	// Change root
	root = s;
	}
	else // If root is not full, call insertNonFull for root
	root->insertNonFull(k, off);
	}
	}

	// A utility function to insert a new key in this node
	// The assumption is, the node must be non-full when this
	// function is called
	void BTreeNode::insertNonFull(string k, int off)
	{
	// Initialize index as index of rightmost element
	int i = n - 1;

	// If this is a leaf node
	if (leaf == true)
	{
	// The following loop does two things
	// a) Finds the location of new key to be inserted
	// b) Moves all greater keys to one place ahead
	while (i >= 0 && keys[i] > k)
	{
	keys[i + 1] = keys[i];
	offsets[i + 1] = offsets[i];
	i--;
	}

	// Insert the new key at found location
	keys[i + 1] = k;
	offsets[i + 1] = off;
	n = n + 1;
	}
	else // If this node is not leaf
	{
	// Find the child which is going to have the new key
	while (i >= 0 && keys[i] > k)
	i--;

	// See if the found child is full
	if (C[i+1]->n == 2 * t - 1)
	{
	// If the child is full, then split it
	splitChild(i + 1, C[i + 1]);

	// After split, the middle key of C[i] goes up and
	// C[i] is splitted into two. See which of the two
	// is going to have the new key
	if (keys[i + 1] < k)
	i++;
	}
	C[i + 1]->insertNonFull(k, off);
	}
	}

	// A utility function to split the child y of this node
	// Note that y must be full when this function is called
	void BTreeNode::splitChild(int i, BTreeNode *y)
	{
	// Create a new node which is going to store (t-1) keys
	// of y
	BTreeNode *z = new BTreeNode(y->t, y->leaf);
	z->n = t - 1;

	// Copy the last (t-1) keys of y to z
	for (int j = 0; j < t - 1; j++)
	{
	z->keys[j] = y->keys[j + t];
	z->offsets[j] = y->offsets[j + t];
	}

	// Copy the last t children of y to z
	if (y->leaf == false)
	{
	for (int j = 0; j < t; j++)
	z->C[j] = y->C[j + t];
	}

	// Reduce the number of keys in y
	y->n = t - 1;

	// Since this node is going to have a new child,
	// create space of new child
	for (int j = n; j >= i + 1; j--)
	C[j + 1] = C[j];

	// Link the new child to this node
	C[i + 1] = z;

	// A key of y will move to this node. Find location of
	// new key and move all greater keys one space ahead
	for (int j = n-1; j >= i; j--)
	{
	keys[j + 1] = keys[j];
	offsets[j + 1] = offsets[j];
	}
	// Copy the middle key of y to this node
	keys[i] = y->keys[t - 1];
	offsets[i] = y->offsets[t - 1];

	// Increment count of keys in this node
	n = n + 1;
	}

	bool findAndReplaceAllBetween(string &trueText, const std::string &strBegin, const std::string &strEnd, const std::string &strReplace)
	{

	std::size_t beginFound;
	std::size_t endFound;
	std::size_t lastPos = 0;

	do {
	// Try to find string, but always start at last position set
	beginFound = trueText.find(strBegin, lastPos);

	if (beginFound != std::string::npos) // Check for the first part
	{
	endFound = trueText.find(strEnd, beginFound + strBegin.length());

	if (endFound != std::string::npos) // Search for second part
	{
	if (strReplace.length() > strBegin.length() + strEnd.length() && !lastPos)
	{
	lastPos = strReplace.length();

	continue;

	}else if (strReplace.length() > strBegin.length() + strEnd.length() && lastPos)
	{
	lastPos = beginFound + strReplace.length();
	trueText.replace(beginFound, strEnd.length() + endFound - beginFound, strReplace);

	continue;

	}else
	{
	lastPos = 0;
	trueText.replace(beginFound, strEnd.length() + endFound - beginFound, strReplace);

	continue;
	}
	}
	}

	return true;

	} while(1);

	}

	bool createIndexFile (std::fstream &XMLFile, const std::string offsetFileName)
	{

	system("beep");

	std::ofstream offsetFile(offsetFileName);

	XMLFile.clear();

	std::string line;
	const std::string key = "key=\"";
	std::size_t keyPos1 = 0;
	std::size_t keyPos2;

	while(XMLFile)
	{

	while (getline (XMLFile,line))
	{
	keyPos1 = line.find(key);
	if (keyPos1 != std::string::npos)
	break;
	}

	keyPos2 = line.find("\"", keyPos1 + key.length() + 1);

	if (keyPos2 != std::string::npos)
	{

	line.erase(0, keyPos1 + 5);
	line.erase(keyPos2 - keyPos1 - key.length(), string::npos);

	for (char &c : line)
	if(c <= 'Z' && c >= 'A')
	c -= ('Z'-'z');

	offsetFile << line << "," << XMLFile.tellg() << "\n";
	}
	}

	// offsetFile.close();

	return 0;
	}

	std::string prepareXML(std::string &sText)
	{
	struct Replacement {
	const char * Start;
	const char * End;
	const char * NewText;
	};

	// Linux escape sequences for colour. Windows has this feature too. :)
	const Replacement replacements[] = {
	{ "\r", "", "" }, // Remove DOS formatting

	{ "<entryFree", "key", "\n\033[1;36m[key" },
	{ "\"><orth", "", "\"]\033[0m<orth" },
	{ "</entryFree>", "", "\n\n\033[1;31m[end key]\033[0m\n" },

	{ "<orth", ">", "\n\n\033[1;33m" }, // The word and its roots
	{ "</orth", ">", "\033[0m" },
	{ "<itype>", "", "\033[1;33m" }, // Roots
	{ "</itype>", "", "\033[0m" },
	{ "<pos", ">", "\033[1;36m" }, // Grammatical term
	{ "</pos", ">", "\033[0m" },
	{ "<gen", ">", "\033[0;36m" }, // Gender
	{ "</gen", ">", ".\033[0m" },
	{ "<etym", ">", "\033[0;33m[" }, // Etymology
	{ "</etym", ">", "]\033[0m" },
	{ "<tr opt=", ">", "\033[0;35m" }, // Translation
	{ "<tr>", "", "\033[0;35m" }, // "
	{ "</tr>", "", "\033[0m" },
	{ "<usg opt=", ">", "\033[0;33m" }, // How used
	{ "</usg", ">", "\033[0m" },
	{ "<foreign", ">", "\033[1;32m" }, // Language other than Latin/English
	{ "</foreign", ">", "\033[0m" },
	{ "<hi", ">", "\033[1;32m" }, // Grammatical term
	{ "</hi>", "", "\033[0m" },
	{ "<quote", ">", "\033[1;35m" }, // Latin quote
	{ "</quote>", "", "\033[0m" },
	{ "<author>", "", "\033[1;31m" }, // Author of quote
	{ "</author>", "", "\033[0m" },
	{ "<bibl", ">", "\033[0;32m" }, // Book reference paragraph/pages
	{ "</bibl>", "", "\033[0m" },

	{ "<", ">", "" }, // Break down all remaining tags

	{ "À", "", "À" }, { "È", "", "È" }, { "Ì", "", "Ì" },
	{ "Ò", "", "Ò" }, { "Ù", "", "Ù" }, { "&Ygrave;", "", "Ỳ" },
	{ "à", "", "à" }, { "è", "", "è" }, { "ì", "", "ì" },
	{ "ò", "", "ò" }, { "ù", "", "ù" }, { "&ygrave;", "", "ỳ" },
	{ "Á", "", "Á" }, { "É", "", "É" }, { "Í", "", "Í" },
	{ "Ó", "", "Ó" }, { "Ú", "", "Ú" }, { "Ý", "", "Ý" },
	{ "á", "", "á" }, { "é", "", "é" }, { "í", "", "í" },
	{ "ó", "", "ó" }, { "ú", "", "ú" }, { "ý", "", "ý" },
	{ "&cacute;", "", "ć" }, { "&gacute;", "", "ǵ" }, { "&sacute;", "", "ś" },
	{ "&imacracute;","", "í" }, { "&imacrbreve;","", "ĭ" }, { "¨", "", "ü" },
	{ "&Abreve;", "", "A" }, { "&Ebreve;", "", "E" }, { "&Ibreve;", "", "I" },
	{ "&Obreve;", "", "Ŏ" }, { "&Ubreve;", "", "Ŭ" }, { "&Ybreve;", "", "Y" },
	{ "&abreve;", "", "ă" }, { "&ebreve;", "", "ĕ" }, { "&ibreve;", "", "ĭ" },
	{ "&obreve;", "", "ŏ" }, { "&ubreve;", "", "ŭ" }, { "&ybreve;", "", "y" },
	{ "&Amacr;", "", "Ā" }, { "&Emacr;", "", "Ē" }, { "&Imacr;", "", "Ī" },
	{ "&Omacr;", "", "Ō" }, { "&Umacr;", "", "Ū" }, { "&Ymacr;", "", "Ȳ" },
	{ "&amacr;", "", "ā" }, { "&emacr;", "", "ē" }, { "&imacr;", "", "ī" },
	{ "&omacr;", "", "ō" }, { "&umacr;", "", "ū" }, { "&ymacr;", "", "ȳ" },
	{ "Â", "", "Â" }, { "Ê", "", "Ê" }, { "Î", "", "Î" },
	{ "Ô", "", "Ô" }, { "Û", "", "Û" }, { "&Ycirc;", "", "Ŷ" },
	{ "â", "", "â" }, { "ê", "", "ê" }, { "î", "", "î" },
	{ "ô", "", "ô" }, { "û", "", "û" }, { "&ycirc;", "", "ŷ" },
	{ "Å", "", "Å" }, { "&Ering;", "", "E̊" }, { "&Oring;", "", "O̊" },
	{ "&Uring;", "", "Ů" }, { "&Yring;", "", "Y̊" }, { "Le´","","Lérin"},
	{ "å", "", "å" }, { "&ering;", "", "e̊" }, { "&oring;", "", "o̊" },
	{ "&uring;", "", "ů" }, { "&yring;", "", "ẙ" }, { "Ä", "", "Ä" },
	{ "Ë", "", "Ë" }, { "Ï", "", "Ï" }, { "c&racute;","", "cr" },
	{ "Ö", "", "Ö" }, { "Ü", "", "Ü" }, { "&Yuml;", "", "Ÿ" },
	{ "ä", "", "ä" }, { "ë", "", "ë" }, { "ï", "", "ï" },
	{ "ö", "", "ö" }, { "ü", "", "ü" }, { "ÿ", "", "ÿ" },
	{ "&Dagger;", "", "‡" }, { "&dagger;", "", "†" }, { "§", "", "§" },
	{ "&ast;", "", "*" }, { "&OElig;", "", "Oe" }, { "&oelig;", "", "oe" },
	{ "Æ", "", "Ae"}, { "æ", "", "ae" }, { "”", "", "˝" },
	{ "“", "", " ̏" }, { "’", "", "´" }, { "‘", "", "`" },
	{ "(", "", "(" }, { ")", "", ")" }, { "&adot;", "", "ạ" },
	{ "←", "", "←" }, { "¯", "", "¯" }, { "&breve;", "", "˘" },
	{ "ç", "", "ç" }, { "&etilde;", "", "ẽ" }, { "ñ", "", "ñ" },
	{ "£", "", "£" }, { "&dollar;", "", "$" }, { "&brevemacr;","", "" },
	{ "&dibreve;", "", "" }, { "&cbreve;", "", "č" }, { "&lacute;", "", "l" },
	{ "&qacute;", "", "q" }, { "&macracute;", "", "¯" },
	{ "—", "", "\n\n • " },

	{ " ", "", " " }, { " .", "", "." }, { " ,", "", "," }, { " )", "", ")" },
	{ " :", "", ":" }, { " ;", "", ";" }, { " ’", "", "’" }, { "§ ", "", "§" },
	{ "‘ ", "", "‘" }, { "( ", "", "(" }, { "... ","", "..."},{ "\" n=\"", "\"", "\"" }
	};

	for (auto replace : replacements)
	findAndReplaceAllBetween(sText, replace.Start, replace.End, replace.NewText);

	return sText;
	}

	void getXMLfileKey (std::fstream &XMLDictFile, int &offsetTmp)
	{

	std::string line;

	XMLDictFile.clear();
	XMLDictFile.seekg(offsetTmp, std::ios::beg);

	while (getline (XMLDictFile,line) && line.find("[end key]") == std::string::npos)
	{
	std::cout << line + '\n';
	}

	}

	bool setupXMLDict(const std::string &origXMLfileName, const std::string &newXMLfileName, const std::string &offsetFileName)
	{

	std::cout << "\nDictionary not found. Downloading...\n\n";

	FILE * f = popen("wget https://github.com/PerseusDL/lexica/raw/master/CTS_XML_TEI/perseus/pdllex/lat/ls/lat.ls.perseus-eng1.xml; rm -f latDictIndex.txt", "r");

	if (f == 0)
	{
	std::cerr << "\n\nCould not download file. Exiting.\n\n";

	return false;
	}

	char buf[1024];

	while(fgets(buf, 1024, f))
	std::cout << buf;

	pclose(f);

	std::cout << "Rendering XML file. Please wait...\n\n";

	fstream XMLDictFile;
	XMLDictFile.open(origXMLfileName); // Program expects it is open
	std::ofstream XMLDictSave(newXMLfileName);

	std::string line;
	std::string keyStr = "key=\""; // Catch every single key

	while (getline (XMLDictFile,line))
	if (line.find(keyStr) != std::string::npos)
	do
	XMLDictSave << prepareXML(line) + '\n';
	while (getline (XMLDictFile,line));

	XMLDictSave.close(); // Won't need it anymore; but XMLDict will use.

	return true;
	}

	bool loadXMLoffsets(std::fstream &offsetFile, BTree& offvals)
	{
	offsetFile.clear();

	std::string keyName;
	std::string keyVal;
	unsigned short offsetNum = 0;

	while(getline(offsetFile, keyName, ',')) // Delimited by a comma
	{
	getline(offsetFile, keyVal, '\n'); // Delimited by newline
	offvals.insert(keyName, stoi(keyVal));
	offsetNum++;
	}

	if (!offsetNum)
	{
	std::cerr << "\nCould not load offsets. Exiting.\n" << std::endl;
	return false;
	}

	return true;
	}

	int main(int argc, char* argv[])
	{

	BTree t(10);

	int offset = 0;

	const std::string origXMLfileName = "lat.ls.perseus-eng1.xml";
	const std::string XMLfileName = "lat.ls.perseus-eng2.xml";
	const std::string offsetFileName = "latDictIndex.txt";

	fstream XMLDictFile(XMLfileName);
	fstream offsetFile(offsetFileName);

	if (!XMLDictFile && !setupXMLDict(origXMLfileName, XMLfileName, offsetFileName))
	return 1;

	XMLDictFile.open(XMLfileName);

	if (!offsetFile)
	createIndexFile(XMLDictFile, offsetFileName);

	offsetFile.open(offsetFileName);

	if (!loadXMLoffsets(offsetFile, t))
	return 1;

	for(short keyNum = 1; keyNum < argc; keyNum++)
	{
	t.search(argv[keyNum], offset);
	getXMLfileKey(XMLDictFile, offset);
	}

	// t.traverse();

	XMLDictFile.close();
	offsetFile.close();

	return 0;
	}