Skip to content

Instantly share code, notes, and snippets.

@namazso
Created October 15, 2021 03:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save namazso/0b417ea1dc8a587a3b720b7fbfb8be44 to your computer and use it in GitHub Desktop.
Save namazso/0b417ea1dc8a587a3b720b7fbfb8be44 to your computer and use it in GitHub Desktop.
IRC formatting parser
#include <string>
#include "irc_format.h"
struct IRCParser
{
enum State : int
{
Normal,
Color1_1,
Color1_2,
ColorComma,
Color2_1,
Color2_2,
ColorFinish,
ColorHex1_1,
ColorHex1_2,
ColorHex1_3,
ColorHex1_4,
ColorHex1_5,
ColorHex1_6,
ColorHexComma,
ColorHex2_1,
ColorHex2_2,
ColorHex2_3,
ColorHex2_4,
ColorHex2_5,
ColorHex2_6,
ColorHexFinish
};
std::vector<std::pair<char, IRCFormat>> parsed;
int state = Normal;
IRCFormat current_format{};
uint32_t color_fg{};
uint32_t color_bg{};
static bool push_dec(uint32_t& v, char c);
static bool push_hex(uint32_t& v, char c);
void color_finish(bool fg_set, bool bg_set);
void colorhex_finish(bool fg_set, bool bg_set);
void normal(char c);
void process(char c);
};
constexpr static uint32_t rgb(uint8_t r, uint8_t g, uint8_t b)
{
return ((uint32_t)r) | ((uint32_t)g << 8) | ((uint32_t)b << 16);
}
constexpr static uint8_t unhex(char c)
{
#define TEST_RANGE(c, a, b, offset) if (uint8_t(c) >= uint8_t(a) && uint8_t(c) <= uint8_t(b))\
return uint8_t(c) - uint8_t(a) + (offset)
TEST_RANGE(c, '0', '9', 0x0);
TEST_RANGE(c, 'a', 'f', 0xa);
TEST_RANGE(c, 'A', 'F', 0xA);
#undef TEST_RANGE
return 0xFF;
};
constexpr static uint32_t k_irc_colors[] =
{
rgb(255, 255, 255),
rgb(0, 0, 0),
rgb(0, 0, 127),
rgb(0, 147, 0),
rgb(255, 0, 0),
rgb(127, 0, 0),
rgb(156, 0, 156),
rgb(252, 127, 0),
rgb(255, 255, 0),
rgb(0, 252, 0),
rgb(0, 147, 147),
rgb(0, 255, 255),
rgb(0, 0, 252),
rgb(255, 0, 255),
rgb(127, 127, 127),
rgb(210, 210, 210),
rgb(0x47, 0x00, 0x00),
rgb(0x47, 0x21, 0x00),
rgb(0x47, 0x47, 0x00),
rgb(0x32, 0x47, 0x00),
rgb(0x00, 0x47, 0x00),
rgb(0x00, 0x47, 0x2c),
rgb(0x00, 0x47, 0x47),
rgb(0x00, 0x27, 0x47),
rgb(0x00, 0x00, 0x47),
rgb(0x2e, 0x00, 0x47),
rgb(0x47, 0x00, 0x47),
rgb(0x47, 0x00, 0x2a),
rgb(0x74, 0x00, 0x00),
rgb(0x74, 0x3a, 0x00),
rgb(0x74, 0x74, 0x00),
rgb(0x51, 0x74, 0x00),
rgb(0x00, 0x74, 0x00),
rgb(0x00, 0x74, 0x49),
rgb(0x00, 0x74, 0x74),
rgb(0x00, 0x40, 0x74),
rgb(0x00, 0x00, 0x74),
rgb(0x4b, 0x00, 0x74),
rgb(0x74, 0x00, 0x74),
rgb(0x74, 0x00, 0x45),
rgb(0xb5, 0x00, 0x00),
rgb(0xb5, 0x63, 0x00),
rgb(0xb5, 0xb5, 0x00),
rgb(0x7d, 0xb5, 0x00),
rgb(0x00, 0xb5, 0x00),
rgb(0x00, 0xb5, 0x71),
rgb(0x00, 0xb5, 0xb5),
rgb(0x00, 0x63, 0xb5),
rgb(0x00, 0x00, 0xb5),
rgb(0x75, 0x00, 0xb5),
rgb(0xb5, 0x00, 0xb5),
rgb(0xb5, 0x00, 0x6b),
rgb(0xff, 0x00, 0x00),
rgb(0xff, 0x8c, 0x00),
rgb(0xff, 0xff, 0x00),
rgb(0xb2, 0xff, 0x00),
rgb(0x00, 0xff, 0x00),
rgb(0x00, 0xff, 0xa0),
rgb(0x00, 0xff, 0xff),
rgb(0x00, 0x8c, 0xff),
rgb(0x00, 0x00, 0xff),
rgb(0xa5, 0x00, 0xff),
rgb(0xff, 0x00, 0xff),
rgb(0xff, 0x00, 0x98),
rgb(0xff, 0x59, 0x59),
rgb(0xff, 0xb4, 0x59),
rgb(0xff, 0xff, 0x71),
rgb(0xcf, 0xff, 0x60),
rgb(0x6f, 0xff, 0x6f),
rgb(0x65, 0xff, 0xc9),
rgb(0x6d, 0xff, 0xff),
rgb(0x59, 0xb4, 0xff),
rgb(0x59, 0x59, 0xff),
rgb(0xc4, 0x59, 0xff),
rgb(0xff, 0x66, 0xff),
rgb(0xff, 0x59, 0xbc),
rgb(0xff, 0x9c, 0x9c),
rgb(0xff, 0xd3, 0x9c),
rgb(0xff, 0xff, 0x9c),
rgb(0xe2, 0xff, 0x9c),
rgb(0x9c, 0xff, 0x9c),
rgb(0x9c, 0xff, 0xdb),
rgb(0x9c, 0xff, 0xff),
rgb(0x9c, 0xd3, 0xff),
rgb(0x9c, 0x9c, 0xff),
rgb(0xdc, 0x9c, 0xff),
rgb(0xff, 0x9c, 0xff),
rgb(0xff, 0x94, 0xd3),
rgb(0x00, 0x00, 0x00),
rgb(0x13, 0x13, 0x13),
rgb(0x28, 0x28, 0x28),
rgb(0x36, 0x36, 0x36),
rgb(0x4d, 0x4d, 0x4d),
rgb(0x65, 0x65, 0x65),
rgb(0x81, 0x81, 0x81),
rgb(0x9f, 0x9f, 0x9f),
rgb(0xbc, 0xbc, 0xbc),
rgb(0xe2, 0xe2, 0xe2),
rgb(0xff, 0xff, 0xff),
IRCFormat::color_unset
};
bool IRCParser::push_dec(uint32_t& v, char c)
{
if (!(c >= '0' && c <= '9'))
return false;
v = v * 10 + (uint32_t)(c - '0');
return true;
}
bool IRCParser::push_hex(uint32_t& v, char c)
{
const auto cv = unhex(c);
if (cv == 0xFF)
return false;
v = (v << 4) | cv;
return true;
}
void IRCParser::color_finish(bool fg_set, bool bg_set)
{
if (fg_set)
current_format.fg_color = k_irc_colors[color_fg];
if (bg_set)
current_format.bg_color = k_irc_colors[color_bg];
if (!fg_set && !bg_set)
{
current_format.fg_color = IRCFormat::color_unset;
current_format.bg_color = IRCFormat::color_unset;
}
}
void IRCParser::colorhex_finish(bool fg_set, bool bg_set)
{
if (fg_set)
current_format.fg_color = rgb((uint8_t)(color_fg >> 16), (uint8_t)(color_fg >> 8), (uint8_t)color_fg);
if (bg_set)
current_format.bg_color = rgb((uint8_t)(color_bg >> 16), (uint8_t)(color_bg >> 8), (uint8_t)color_bg);
if (!fg_set && !bg_set)
{
current_format.fg_color = IRCFormat::color_unset;
current_format.bg_color = IRCFormat::color_unset;
}
}
void IRCParser::normal(char c)
{
switch (c)
{
case 0x02:
current_format.bold ^= 1;
break;
case 0x1D:
current_format.italic ^= 1;
break;
case 0x1F:
current_format.underline ^= 1;
break;
case 0x1E:
current_format.strikethrough ^= 1;
break;
case 0x11:
current_format.monospace ^= 1;
break;
case 0x16:
std::swap(current_format.fg_color, current_format.bg_color);
break;
case 0x0F:
current_format = {};
break;
case 0x03:
state = Color1_1;
color_fg = 0;
color_bg = 0;
break;
case 0x04:
state = ColorHex1_1;
color_fg = 0;
color_bg = 0;
break;
case 0x00:
break; // end of string
default:
if (c >= 1 && c < 32)
{
break; // swallow unknown control character
}
if (current_format.fg_color == current_format.bg_color && current_format.fg_color != IRCFormat::color_unset)
{
auto copy_format = current_format;
copy_format.fg_color = IRCFormat::color_unset;
copy_format.bg_color = IRCFormat::color_unset;
copy_format.spoiler = true;
parsed.emplace_back(c, copy_format);
}
else
{
parsed.emplace_back(c, current_format);
}
break;
}
}
void IRCParser::process(char c)
{
switch (state)
{
case Normal:
normal(c);
break;
case Color1_1:
if (!push_dec(color_fg, c))
{
color_finish(false, false);
state = Normal;
return process(c);
}
++state;
break;
case Color1_2:
if (c == ',')
{
++state;
return process(c);
}
if (!push_dec(color_fg, c))
{
color_finish(true, false);
state = Normal;
return process(c);
}
++state;
break;
case ColorComma:
if (c != ',')
{
color_finish(true, false);
state = Normal;
return process(c);
}
++state;
break;
case Color2_1:
if (!push_dec(color_bg, c))
{
color_finish(true, false);
state = Normal;
process(','); // add back the comma we ate
return process(c);
}
++state;
break;
case Color2_2:
if (!push_dec(color_bg, c))
{
color_finish(true, true);
state = Normal;
return process(c);
}
++state;
break;
case ColorFinish:
color_finish(true, true);
state = Normal;
return process(c);
case ColorHex1_1:
if (!push_hex(color_fg, c))
{
colorhex_finish(false, false);
state = Normal;
return process(c);
}
++state;
break;
case ColorHex1_2:
case ColorHex1_3:
case ColorHex1_4:
case ColorHex1_5:
case ColorHex1_6:
if (!push_hex(color_fg, c))
{
// some invalid garbage, just reset color
colorhex_finish(false, false);
state = Normal;
return process(c);
}
++state;
break;
case ColorHexComma:
if (c != ',')
{
colorhex_finish(true, false);
state = Normal;
return process(c);
}
++state;
break;
case ColorHex2_1:
if (!push_hex(color_bg, c))
{
color_finish(true, false);
state = Normal;
process(','); // add back the comma we ate
return process(c);
}
++state;
break;
case ColorHex2_2:
case ColorHex2_3:
case ColorHex2_4:
case ColorHex2_5:
case ColorHex2_6:
if (!push_hex(color_bg, c))
{
// invalid stuff, just use the first color
colorhex_finish(true, false);
state = Normal;
return process(c);
}
++state;
break;
case ColorHexFinish:
colorhex_finish(true, true);
state = Normal;
return process(c);
}
}
std::vector<std::pair<char, IRCFormat>> IRCFormat::parse_message(const char* str)
{
IRCParser p;
while (const auto c = *str++)
p.process(c);
p.process(0);
return p.parsed;
}
std::string IRCFormat::to_html(const std::vector<std::pair<char, IRCFormat>>& msg)
{
IRCFormat last{};
std::string html = last.html_open();
for (const auto ch : msg)
{
if(ch.second != last)
{
html += last.html_close();
last = ch.second;
html += last.html_open();
}
html += ch.first;
}
html += last.html_close();
return html;
}
static std::string rgb_to_html(uint32_t c)
{
char color[7];
sprintf_s(color, "%02X%02X%02X", c & 0xFF, (c >> 8) & 0xFF, (c >> 16) & 0xFF);
return color;
}
std::string IRCFormat::html_open() const
{
std::ostringstream out;
if (bold)
out << "<b>";
if (italic)
out << "<i>";
if (underline)
out << "<u>";
if (strikethrough)
out << "<strike>";
if (monospace)
out << "<code>";
if (spoiler)
out << "<span data-mx-spoiler>";
if (fg_color != color_unset)
out << "<span data-mx-color=\"#" << rgb_to_html(fg_color) << "\">";
if (bg_color != color_unset)
out << "<span data-mx-bg-color=\"#" << rgb_to_html(bg_color) << "\">";
return out.str();
}
std::string IRCFormat::html_close() const
{
std::ostringstream out;
if (bg_color != color_unset)
out << "</span>";
if (fg_color != color_unset)
out << "</span>";
if (spoiler)
out << "</span>";
if (monospace)
out << "</code>";
if (strikethrough)
out << "</strike>";
if (underline)
out << "</u>";
if (italic)
out << "</i>";
if (bold)
out << "</b>";
return out.str();
}
#pragma once
#include <cstdint>
#include <string>
#include <sstream>
#include <vector>
struct IRCFormat
{
bool bold{};
bool italic{};
bool underline{};
bool strikethrough{};
bool monospace{};
bool spoiler{};
uint32_t fg_color = color_unset;
uint32_t bg_color = color_unset;
std::string html_open() const;
std::string html_close() const;
constexpr static auto color_unset = 0xFF000000;
bool operator==(const IRCFormat& rhs) const
{
return true
&& bold == rhs.bold
&& italic == rhs.italic
&& underline == rhs.underline
&& strikethrough == rhs.strikethrough
&& monospace == rhs.monospace
&& spoiler == rhs.spoiler
&& fg_color == rhs.fg_color
&& spoiler == rhs.spoiler;
}
bool operator!=(const IRCFormat& rhs) const { return !(*this == rhs); }
static std::vector<std::pair<char, IRCFormat>> parse_message(const char* str);
static std::string to_html(const std::vector<std::pair<char, IRCFormat>>& msg);
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment