Skip to content

Instantly share code, notes, and snippets.

@modeco80
Last active August 30, 2019 19:58
Show Gist options
  • Save modeco80/d9f6350fe6c18f5807235d299d2920d9 to your computer and use it in GitHub Desktop.
Save modeco80/d9f6350fe6c18f5807235d299d2920d9 to your computer and use it in GitHub Desktop.
Simple STL-only HTML entity unescaper
inline void unescape(std::string& input) {
bool in_escape_block = false;
std::string::size_type begin{}, count{};
auto iter = input.begin();
while(iter != input.end()) {
char c = *iter;
if(c == '&' && !in_escape_block && /* Please send help */ (*(iter+1) != '&' && *(iter+1) != ' ')) {
// Entered a escape block
begin = (iter - input.begin());
in_escape_block = true;
}
if(c == ';' && in_escape_block) {
// Exited a escape block.
std::string code = input.substr(begin+1, (count-1));
in_escape_block = false;
#define HTML_CODECASE(htmlcode, replacement) if(code == htmlcode) { \
input.replace(begin, sizeof("&" htmlcode), replacement); \
unescape(input); \
return; }
HTML_CODECASE("nbsp", " ")
HTML_CODECASE("amp", "&")
HTML_CODECASE("lt", "<")
HTML_CODECASE("gt", ">")
HTML_CODECASE("quot", "\"")
HTML_CODECASE("#x27", "\'")
HTML_CODECASE("#x2F", "/")
}
if(in_escape_block && count >= 5 /* Max size of any entity processed, update */) {
return;
}
if(in_escape_block) count++;
iter++;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment