Skip to content

Instantly share code, notes, and snippets.

@parttimenerd
Created January 1, 2014 14:24
Show Gist options
  • Save parttimenerd/8208406 to your computer and use it in GitHub Desktop.
Save parttimenerd/8208406 to your computer and use it in GitHub Desktop.
A text query based engine to sort a list of elements. Eventually used inside a Qt based project.
#ifndef BERRYENGINE_H
#define BERRYENGINE_H
#include <vector>
#include <QString>
#include <QMap>
#include <QHash>
#include <QSet>
#include <QRegExp>
#include <map>
#include <set>
#include <iostream>
#include <functional>
#include <algorithm>
#include <utility>
#include "elementgroup.h"
template<typename Element>
class BerryEngine
{
public:
BerryEngine(){
this->filterFuncs["raw"] = [](QString str, const Element &elem){ return elem == str; };
this->filterFuncs["line"] = [](QString str, const Element &elem){ return elem == str; };
this->filterFuncs["file"] = [](QString str, const Element &elem){ return elem == str; };
this->filterFuncs["description"] = [](QString str, const Element &elem){ return elem == str; };
this->filterFuncs["id"] = [](QString str, const Element &elem){ return elem == str; };
this->filterPoolFuncs["line"] = [](const Element &elem){ return "line"; };
this->filterPoolFuncs["file"] = [](const Element elem){ return "file"; };
this->filterPoolFuncs["description"] = [](const Element &elem){ return "description"; };
this->filterPoolFuncs["id"] = [](const Element &elem){ return "id"; };
}
/**
* @brief Adds a new element and updates the string pools.
* @param element new element
*/
void addNewElement(Element element){
this->elements.append(element);
auto it = this->filterPoolFuncs.constBegin();
while (it != this->filterPoolFuncs.constEnd()) {
this->filterPool[it.key()].insert(it.value()(element));
++it;
}
auto it2 = this->filterCSPoolFuncs.constBegin();
while (it2 != this->filterCSPoolFuncs.constEnd()) {
this->filterCSPool[it2.key()].unite(it2.value()(element));
++it;
}
}
/**
* @brief Returns a number of suggestions for a given query.
* E.g. query = "#sourt" => "#sort"
* @param query given query
* @param number maximum number of suggestions
* @return suggestions for the given query
*/
QStringList getSuggestions(QString _query, size_t number){
QString query(_query);
if (!query.startsWith("#")){
query = "#raw " + query;
}
QStringList cmdStrings = query.split("#");
QStringList _cmdStrings = _query.split("#");
QString lastCmdString;
if (cmdStrings.empty()){
lastCmdString = "";
} else {
lastCmdString = cmdStrings[cmdStrings.size() - 1];
}
QStringList suggs = this->getSuggestionsForCmdQuery(lastCmdString, number);
for (int i = 0; i < suggs.size(); i++){
if (_cmdStrings.empty()){
suggs[i] = suggs[i].right(suggs[i].size() - 1);
} else {
_cmdStrings[_cmdStrings.size() - 1] = suggs[i];
suggs[i] = _cmdStrings.join(" #");
}
}
return suggs;
}
QList<ElementGroup<Element> > query(QString query){
if (!query.startsWith("#")){
query = "#raw " + query;
}
QList<Element> elemList;
QStringList cmdStrings = query.split("#", QString::SkipEmptyParts);
elemList = this->executeFilters(this->elements, cmdStrings);
elemList = this->executeFilterCS(elemList, cmdStrings);
elemList = this->executeSortCmds(elemList, cmdStrings);
auto groups = this->executeGroupCmds(elemList, cmdStrings);
return groups;
}
/**
* @brief Reexecutes the last query.
* If no query had been executed before, a blank string is used.
*
* @return query result.
*/
QList<ElementGroup<Element>> reexecuteLastQuery(){
return this->query(this->lastQuery);
}
/**
* @brief Sets the elements inherited by this engine.
*
* @param newElements new elements, now inherited by this engine
*/
void setElements(const QList<Element> &newElements){
foreach (Element elem, newElements){
this->addNewElement(elem);
}
}
private:
QList<Element> elements;
QString lastQuery = "";
QMap<QString, std::function<bool(QString, const Element)> > filterFuncs;
QMap<QString, std::function<QString(const Element)> > filterPoolFuncs;
QHash<QString, QSet<QString> > filterPool;
QMap<QString, std::function<bool(QStringList, const Element)> > filterCSFuncs;
QMap<QString, std::function<QSet<QString>(const Element)> > filterCSPoolFuncs;
QHash<QString, QSet<QString> > filterCSPool;
QMap<QString, std::function<int(Element, const Element)>> sortFuncs;
QMap<QString, std::function<QString(const Element)>> groupFuncs;
QList<Element> executeFilters(const QList<Element> &elements, const QStringList &cmdStrings){
QList<Element> resList;
foreach (QString cmdString, cmdStrings){
QStringList arr = cmdString.split(" ", QString::SkipEmptyParts);
QStringList cmd = arr.takeFirst();
QStringList argument = arr.join(" ");
if (!arr.empty() && this->isFilterCmd(cmd)){
foreach (Element element, elements){
if (this->filterFuncs[cmd](argument, element) && !resList.contains(element)){
resList.append(element);
}
}
}
}
return resList;
}
QList<Element> executeFilterCS(const QList<Element> &elements, const QStringList &cmdStrings){
QList<Element> resList;
foreach (QString cmdString, cmdStrings){
QStringList arr = cmdString.split(" ", QString::SkipEmptyParts);
QString cmd = arr.takeFirst();
QString argument = arr.join("");
if (!arr.empty() && this->isFilterCSCmd(cmd)){
QStringList args = argument.split(",", QString::SkipEmptyParts);
foreach (Element element, elements){
if (this->filterCSFuncs[cmd](args, element) && !resList.contains(element)){
resList.append(element);
}
}
}
}
return resList;
}
QList<Element> executeSortCmds(const QList<Element> &elements, const QStringList &cmdStrings){
QList<std::pair<QString, bool> > sortCmds;
foreach (QString cmdString, cmdStrings){
QStringList arr = cmdString.split(" ", QString::SkipEmptyParts);
if (arr.size() < 2)
continue;
QString cmd = arr.takeFirst();
if (arr[0] == "by")
arr.removeFirst();
arr = arr.join(" ").split(",", QString::SkipEmptyParts);
foreach (QString cmdPart, arr){
cmdPart = cmdPart.trimmed();
QStringList cmdPartList = cmdPart.split(" ");
if (cmdPartList.empty())
continue;
cmdPart = cmdPartList[0];
bool asc = true;
if (cmdPartList.size() >= 2){
asc = cmdPartList[1] == "asc";
}
if (this->isSortCmd(cmdPart))
sortCmds.append(std::make_pair(cmdPart, asc));
}
}
QList<Element> resList(elements);
foreach (auto sortCmd, sortCmds){
if (sortCmd.second){
qStableSort(resList.begin(), resList.end(), this->sortFuncs[sortCmd.first]);
} else {
auto sortFunc = this->sortFuncs[sortCmd.first];
qStableSort(resList.begin(), resList.end(), [&](const Element &elem1, const Element &elem2){
return !sortFunc(elem1, elem2);
});
}
}
return resList;
}
QList<ElementGroup<Element> > executeGroupCmds(const QList<Element> &elements, const QStringList &cmdStrings){
QStringList groupCmds;
foreach (QString cmdString, cmdStrings){
QStringList arr = cmdString.split(" ", QString::SkipEmptyParts);
if (arr.size() < 2)
continue;
QString cmd = arr.takeFirst();
if (arr[0] == "by")
arr.removeFirst();
arr = arr.join("").split(",", QString::SkipEmptyParts);
foreach (QString cmdPart, arr){
QStringList cmdPartList = cmdPart.split(" ");
if (cmdPartList.empty())
continue;
groupCmds.append(cmdPartList[0]);
}
}
int id = 0;
QHash<QStringList, std::pair<QList<Element>, int> > groupHash;
foreach (Element element, elements){
QStringList groupTitles;
if (!groupHash.contains(groupTitles)){
groupHash[groupTitles] = std::make_pair(QList<Element>(), id);
id++;
}
groupHash[groupTitles].first.append(element);
}
QList<ElementGroup<Element> > groupList;
ElementGroup<Element> dummy(*(new QStringList()), *(new QList<Element>()));
for (int i = 0; i < groupHash.size(); i++)
groupList.append(dummy);
auto it = groupHash.constBegin();
while (it != groupHash.constEnd()) {
groupList[it.value().second] = *(new ElementGroup<Element>(it.key(), it.value().first));
it++;
}
return groupList;
}
QStringList getSuggestionsForCmdQuery(QString cmdQuery, size_t number){
QStringList tokens = cmdQuery.split(" ");
QStringList suggs;
if (tokens.empty())
return suggs;
bool hasByString = tokens.size() >= 2 && tokens[1] == "by";
QString cmd = tokens[0];
if (isSortCmd(cmd) || isGroupCmd(cmd)){
int frontCut = std::min(1 + (hasByString ? 1 : 0), tokens.size());
tokens = tokens.mid(frontCut, tokens.size());
QStringList args = tokens.join(" ").replace("\\s+", "\\s").split(",", QString::SkipEmptyParts);
args.removeDuplicates();
if (isSortCmd(cmd)){
suggs = getSuggestionsForSortCmd(args);
} else {
suggs = getSuggestionsForGroupCmd(args);
}
} else if (isFilterCmd(cmd) || isFilterCSCmd(cmd)){
tokens = tokens.mid(0, tokens.size());
QString rejoined = tokens.join(" ").replace("\\s+", "\\s");
if (isFilterCmd(cmd)){
suggs = getSuggestionsForFilterCmd(cmd, rejoined);
} else {
QStringList args = rejoined.split(",", QString::SkipEmptyParts);
suggs = getSuggestionsForFilterCSCmd(cmd, args);
}
} else {
suggs = getSuggestionsForCmd(cmd);
}
return suggs.mid(0, number);
}
QStringList getSuggestionsForSortCmd(QStringList args){
QString last;
if (args.empty()){
last = "";
} else {
last = args[args.size() - 1];
}
QStringList pool(this->groupFuncs.keys());
QStringList list;
QStringList arr = last.split(" ");
if (pool.contains(arr[0])){
list.append("asc");
list.append("desc");
if (arr.size() > 1){
list = this->sortStringsByStringEquality(list, arr[1]);
}
for (int i = 0; i < list.size(); i++)
list[i] = arr[i] + " " + list[i];
} else {
list = this->sortStringsByStringEquality(pool, last);
}
for (int i = 0; i < list.size(); i++){
if (args.empty()){
list[i] = "sort by " + list[i];
} else {
args[args.size() - 1] = list[i];
list[i] = "sort by " + args.join(", ");
}
}
return list;
}
QStringList getSuggestionsForGroupCmd(QStringList args){
QString last;
if (args.empty()){
last = "";
} else {
last = args[args.size() - 1];
}
QStringList pool(this->groupFuncs.keys());
QStringList list = this->sortStringsByStringEquality(pool, last);
for (int i = 0; i < list.size(); i++){
if (args.empty()){
list[i] = "group by " + list[i];
} else {
args[args.size() - 1] = list[i];
list[i] = "group by " + args.join(", ");
}
}
return list;
}
QStringList getSuggestionsForFilterCmd(QString cmd, QString argument){
QStringList pool(this->filterPool[cmd].toList());
return this->sortStringsByStringEquality(pool, argument);
}
QStringList getSuggestionsForFilterCSCmd(QString cmd, QStringList args){
QString last;
if (args.empty()){
last = "";
} else {
last = args[args.size() - 1];
}
QStringList pool(this->filterCSPool[cmd].toList());
QStringList list = this->sortStringsByStringEquality(pool, last);
for (int i = 0; i < list.size(); i++){
if (args.empty()){
list[i] = cmd + " " + list[i];
} else {
args[args.size() - 1] = list[i];
list[i] = cmd + " " + args.join(", ");
}
}
return list;
}
QStringList getSuggestionsForCmd(QString cmd){
QStringList suppCmds = getSupportedCommands();
return sortStringsByStringEquality(suppCmds, cmd);
}
/**
* @brief Returns a list of supported commands.
* E.g. "#sort by", "#group by", "#[filter name]"
* @return a string list of commands
*/
QStringList getSupportedCommands(){
QStringList list;
{
auto it = this->filterFuncs.constBegin();
while (it != this->filterFuncs.constEnd()) {
list.append(it.key());
++it;
}
}
{
auto it = this->filterCSFuncs.constBegin();
while (it != this->filterCSFuncs.constEnd()) {
list.append(it.key());
++it;
}
}
{
auto it = this->groupFuncs.constBegin();
while (it != this->groupFuncs.constEnd()) {
list.append(it.key() + " by");
++it;
}
}
{
auto it = this->sortFuncs.constBegin();
while (it != this->sortFuncs.constEnd()) {
list.append(it.key() + " by ");
++it;
}
}
return list;
}
/**
* @brief It sorts the strings by their edit distance to the compareWith string in ascending order.
* @param strings strings to sort
* @param compareWith compare them with this string
* @return the sorted list
*/
QStringList sortStringsByStringEquality(QStringList strings, QString compareWith){
QMap<int, QString> weightedStrings;
for(auto it = strings.begin(); it != strings.end(); it++) {
int strEqu = this->stringEquality(compareWith, *it);
weightedStrings[strEqu] = *it;
}
QStringList retList;
foreach (QString string, weightedStrings){
retList.append(string + "[" + QString::number(this->stringEquality(string, compareWith)) + "]");
}
return retList;
}
bool isSortCmd(QString cmd){
return sortFuncs.count(cmd) > 0;
}
bool isGroupCmd(QString cmd){
return groupFuncs.count(cmd) > 0;
}
bool isFilterCmd(QString cmd){
return filterFuncs.count(cmd) > 0;
}
bool isFilterCSCmd(QString cmd){
return filterCSFuncs.count(cmd) > 0;
}
/**
* @brief Calculates the equality of two strings.
* If both strings are only single words, a combination of the levenshtein edit distance and a phonetic matching algorithm is used.
* If not only the first is used.
* Attention: using a phonetic algorithm is much slower, than the simple levenshtein.
* @param str1 first string
* @param str2 second string
* @return equality of both strings, 0 means both string are equal, the greater the number, the more unequal are both strings.
*/
int stringEquality(QString str1, QString str2){
if (this->isSingleWord(str1) && this->isSingleWord(str2)){
return phoneticEquality(str1, str2);
}
return editDistance(str1, str2);
}
/**
* @brief Implementation of the levenshtein distance, a metric for the edit distance between to strings.
* Original source: http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#C.2B.2B
* @param str1 first string
* @param str2 second string
* @return edit distance
*/
int editDistance(QString str1, QString str2){
const int len1 = str1.size(), len2 = str2.size();
std::vector<int> col(len2+1), prevCol(len2+1);
for (int i = 0; i < prevCol.size(); i++)
prevCol[i] = i;
for (int i = 0; i < len1; i++) {
col[0] = i+1;
for (int j = 0; j < len2; j++)
col[j+1] = std::min( std::min( 1 + col[j], 1 + prevCol[1 + j]), prevCol[j] + (str1[i] == str2[j] ? 0 : 1) );
col.swap(prevCol);
}
return prevCol[len2];
}
/**
* @brief Implementation of a phonetic algorithm to compare two words.
* It generates the NYSIIS for both words and returns the levenshtein edit distance between them.
* Attention: using a phonetic algorithm is much slower, than the simple levenshtein.
* @param word1 first word
* @param word2 second word
* @return equality of both words, 0 means both words are equal, the greater the number, the more unequal are both words.
*/
int phoneticEquality(QString word1, QString word2){
return editDistance(nysiisForWord(word1), nysiisForWord(word2));
}
/**
* @brief Examines the NYSIIS of the given word.
* The NYSIIS is the New York State Identification and Intelligence System Phonetic Code,
* http://en.wikipedia.org/wiki/NYSIIS.
* @param word given word
* @return NYSIIS of the given word
*/
QString nysiisForWord(QString word){
if (word.size() == 0){
return "";
}
QString code;
word = word.toUpper();
word.replace(QRegExp("^MAC"), "MCC");
word.replace(QRegExp("^KN"), "NN");
word.replace(QRegExp("^K"), "C");
word.replace(QRegExp("^P(H|F)"), "FF");
word.replace(QRegExp("^SCH"), "SSS");
word.replace(QRegExp("(EE|IE)$"), "Y");
word.replace(QRegExp("(DT|RT|RD|NT|ND)$"), "D");
code.append(word[0]);
word = word.right(word.size() - 1);
static QHash<QString, QRegExp> regexps; //good idea to make it static?
regexps["AF"] = QRegExp("^EV");
regexps["A"] = QRegExp("^(A|E|I|O|U)");
regexps["G"] = QRegExp("^Q");
regexps["S"] = QRegExp("^Z");
regexps["N"] = QRegExp("^(M|KN)");
regexps["C"] = QRegExp("^K");
regexps["SSS"] = QRegExp("^SCH");
regexps["FF"] = QRegExp("^PH");
while (word.size() > 0){
this->replaceRegExpsInText(regexps, word);
if (!(word.startsWith("H") && (!this->isVowel(code[code.size() - 1])
|| (word.size() >= 2 && !this->isVowel(word[1]))))
&& !(word.startsWith("W") && this->isVowel(code[code.size() - 1]))){
if (word[0] != code[code.size() - 1]){
code.append(word[0]);
}
}
word = word.right(word.size() - 1);
}
if (code.endsWith("S")){
code = code.left(code.size() - 1);
}
if (code.endsWith("AY")){
code.replace(QRegExp("AY$"), "Y");
} else if (code.endsWith("A")){
code = code.left(code.size() - 1);
}
code = removeRepeatedCharacters(code);
return code;
}
QString removeRepeatedCharacters(const QString &str){
QString res;
int i = 0;
while (i < str.size() - 1){
if (str[i] != str[i + 1]){
res.append(str[i]);
}
i++;
}
res.append(str[str.size() - 1]);
return res;
}
void replaceRegExpsInText(QHash<QString, QRegExp> &regexps, QString &text){
auto it = regexps.constBegin();
while (it != regexps.constEnd()) {
text.replace(it.value(), it.key());
++it;
}
}
bool isVowel(const QChar &someChar){
return someChar == 'a' || someChar == 'e' || someChar == 'i' || someChar == 'o' || someChar == 'u'
|| someChar == 'ä' || someChar == 'ö' || someChar == 'ü';
}
bool isSingleWord(QString &str){
QRegExp regexp("[A-Za-zäöü]+");
return regexp.exactMatch(str);
}
};
#endif // BERRYENGINE_H
#ifndef ELEMENTGROUP_H
#define ELEMENTGROUP_H
#include <functional>
#include <QList>
#include <QStringList>
#include <QString>
#include <stdint.h>
template<class Element>
class ElementGroup
{
public:
ElementGroup(QStringList _titles, const QList<Element> _elements){
this->elements = _elements;
this->titles = _titles;
}
bool contains(Element element){
return this->elements.contains(element);
}
const std::vector<Element> getElements(){
return this->elements;
}
size_t size(){
return this->elements.size();
}
const QStringList getTitles(){
return this->titles;
}
Element get(size_t index){
return this->elements[index];
}
private:
QStringList titles;
QList<Element> elements;
};
#endif // ELEMENTGROUP_H
#include "mainwindow.h"
#include <QApplication>
#include <qstring.h>
#include <iostream>
#include <berryengine.h>
int main(int argc, char *argv[])
{
QApplication a(argc, argv);
MainWindow w;
w.show();
BerryEngine<QString> engine;
engine.addNewElement("3");
auto path = engine.getSuggestions("#sort by asc", 3);
for (auto i = path.begin(); i != path.end(); ++i)
std::cout << (*i).toStdString() << std::endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment