Skip to content

Instantly share code, notes, and snippets.

@tomykaira
Last active March 2, 2024 07:46
  • Star 78 You must be signed in to star a gist
  • Fork 20 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save tomykaira/f0fd86b6c73063283afe550bc5d77594 to your computer and use it in GitHub Desktop.
C++ single header base64 decode/encoder for C++ 11 and above.
#ifndef _MACARON_BASE64_H_
#define _MACARON_BASE64_H_
/**
* The MIT License (MIT)
* Copyright (c) 2016-2024 tomykaira
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <cstdint>
#include <string>
namespace macaron {
class Base64 {
public:
static std::string Encode(const std::string &data) {
static constexpr char sEncodingTable[] = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'};
size_t in_len = data.size();
size_t out_len = 4 * ((in_len + 2) / 3);
std::string ret(out_len, '\0');
size_t i;
char *p = const_cast<char *>(ret.c_str());
for (i = 0; in_len > 2 && i < in_len - 2; i += 3) {
*p++ = sEncodingTable[(data[i] >> 2) & 0x3F];
*p++ = sEncodingTable[((data[i] & 0x3) << 4) |
((int)(data[i + 1] & 0xF0) >> 4)];
*p++ = sEncodingTable[((data[i + 1] & 0xF) << 2) |
((int)(data[i + 2] & 0xC0) >> 6)];
*p++ = sEncodingTable[data[i + 2] & 0x3F];
}
if (i < in_len) {
*p++ = sEncodingTable[(data[i] >> 2) & 0x3F];
if (i == (in_len - 1)) {
*p++ = sEncodingTable[((data[i] & 0x3) << 4)];
*p++ = '=';
} else {
*p++ = sEncodingTable[((data[i] & 0x3) << 4) |
((int)(data[i + 1] & 0xF0) >> 4)];
*p++ = sEncodingTable[((data[i + 1] & 0xF) << 2)];
}
*p++ = '=';
}
return ret;
}
static std::string Decode(const std::string &input, std::string &out) {
static constexpr unsigned char kDecodingTable[] = {
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 62, 64, 64, 64, 63, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 64, 64, 64, 64, 64, 64, 64, 0, 1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 64, 64, 64, 64, 64, 64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64};
size_t in_len = input.size();
if (in_len % 4 != 0)
return "Input data size is not a multiple of 4";
size_t out_len = in_len / 4 * 3;
if (in_len >= 1 && input[in_len - 1] == '=')
out_len--;
if (in_len >= 2 && input[in_len - 2] == '=')
out_len--;
out.resize(out_len);
for (size_t i = 0, j = 0; i < in_len;) {
uint32_t a = input[i] == '='
? 0 & i++
: kDecodingTable[static_cast<int>(input[i++])];
uint32_t b = input[i] == '='
? 0 & i++
: kDecodingTable[static_cast<int>(input[i++])];
uint32_t c = input[i] == '='
? 0 & i++
: kDecodingTable[static_cast<int>(input[i++])];
uint32_t d = input[i] == '='
? 0 & i++
: kDecodingTable[static_cast<int>(input[i++])];
uint32_t triple =
(a << 3 * 6) + (b << 2 * 6) + (c << 1 * 6) + (d << 0 * 6);
if (j < out_len)
out[j++] = (triple >> 2 * 8) & 0xFF;
if (j < out_len)
out[j++] = (triple >> 1 * 8) & 0xFF;
if (j < out_len)
out[j++] = (triple >> 0 * 8) & 0xFF;
}
return "";
}
};
} // namespace macaron
#endif /* _MACARON_BASE64_H_ */
#include <cstdint>
#include <iostream>
#include <vector>
#include "Base64.h"
int test(const std::string &data) {
std::string out;
auto b64 = macaron::Base64::Encode(data);
auto error = macaron::Base64::Decode(b64, out);
if (!error.empty()) {
std::cout << "Error: " << error << std::endl;
return 1;
}
if (data == out) {
std::cout << "OK: " << out << std::endl;
} else {
std::cout << "Wrong: " << data << ", " << b64 << ", " << out << std::endl;
}
return 0;
}
int main() {
test("hello");
test("");
test("1");
test("22");
test("333");
test("4444");
return 0;
}
@vietnguyen09
Copy link

Thanks, this Encoded version works like a charm with another platform decode 💯

@aaangeletakis
Copy link

python3 verson

def encode(data):
    sEncodingTable = [
          'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
          'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
          'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
          'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
          'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
          'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
          'w', 'x', 'y', 'z', '0', '1', '2', '3',
          '4', '5', '6', '7', '8', '9', '+', '/'
    ]
    in_len = len(data)
    out_len = 4 * ((in_len + 2) / 3)
    ret = [0]*int(out_len)
    i=0
    charPointer=0
    while(i < in_len - 2):
        ret[charPointer] = sEncodingTable[(data[i] >> 2) & 0x3F]; charPointer+=1
        ret[charPointer] = sEncodingTable[((data[i] & 0x3) << 4) | (int (data[i + 1] & 0xF0) >> 4)]; charPointer+=1
        ret[charPointer] = sEncodingTable[((data[i + 1] & 0xF) << 2) | (int (data[i + 2] & 0xC0) >> 6)]; charPointer+=1
        ret[charPointer] = sEncodingTable[data[i + 2] & 0x3F]; charPointer+=1
        i += 3

    if (i < in_len):
        ret[charPointer] = sEncodingTable[(data[i] >> 2) & 0x3F]; charPointer+=1
        if (i == (in_len - 1)):
            ret[charPointer] = sEncodingTable[((data[i] & 0x3) << 4)];charPointer+=1
            ret[charPointer] = '=';charPointer+=1
        else:
            ret[charPointer] = sEncodingTable[((data[i] & 0x3) << 4) | (int(data[i + 1] & 0xF0) >> 4)];charPointer+=1
            ret[charPointer] = sEncodingTable[((data[i + 1] & 0xF) << 2)];charPointer+=1

        ret[charPointer] = '=';charPointer+=1

    return (''.join([n for n in ret if n != 0]))

@noloader
Copy link

I believe this is incorrect:

char *p = const_cast<char*>(ret.c_str());

I believe the proper way to get the non-const pointer is to take the address of the first element:

char *p = &ret[0];

@makwiatkowski
Copy link

I believe this is incorrect:

char *p = const_cast<char*>(ret.c_str());

I believe the proper way to get the non-const pointer is to take the address of the first element:

char *p = &ret[0];

Both ways are correct, however original version clearly states to remove constness (and is recommended). Taking address of first element is C-way

@noloader
Copy link

Both ways are correct, however original version clearly states to remove constness (and is recommended).

Writing to the character array returned by c_str() is undefined behavior. Whomever told you it is OK was wrong.

@0x3f00
Copy link

0x3f00 commented May 28, 2021

Here is crash on short/empty strings fixed:
https://gist.github.com/0x3f00/90edbec0c04616d0b8c21586762bf1ac

@Paril
Copy link

Paril commented Oct 24, 2021

Both ways are correct, however original version clearly states to remove constness (and is recommended).

Writing to the character array returned by c_str() is undefined behavior. Whomever told you it is OK was wrong.

@noloader that was only a thing in C++98. C++11 allows you to write to the value returned by c_str()/data(), as it is guaranteed to be the backing storage of the pointer; in C++98 however it was up to implementations as to whether this returned a backing array or something else entirely.

@noloader
Copy link

noloader commented Oct 24, 2021

@Paril,

In C++11, data and c_str member functions are still const. You need C++17 for the non-const versions. See https://en.cppreference.com/w/cpp/string/basic_string/data and https://en.cppreference.com/w/cpp/string/basic_string/c_str.

If the intention was C++17 and above, it would be noted in the release notes or source code. There may even be a guard like __cplusplus >= 201703L to make it self-documenting. But even release notes and annotations do not help compiling on some platforms, like some ARM platforms still shipping with GCC 4.9 or compiling with most versions of Visual Studio.

If you want the non-const pointer, you have to take the address of the first element.

@Paril
Copy link

Paril commented Oct 25, 2021

@Paril,

In C++11, data and c_str member functions are still const. You need C++17 for the non-const versions. See https://en.cppreference.com/w/cpp/string/basic_string/data and https://en.cppreference.com/w/cpp/string/basic_string/c_str.

If the intention was C++17 and above, it would be noted in the release notes or source code. There may even be a guard like __cplusplus >= 201703L to make it self-documenting. But even release notes and annotations do not help compiling on some platforms, like some ARM platforms still shipping with GCC 4.9 or compiling with most versions of Visual Studio.

If you want the non-const pointer, you have to take the address of the first element.

Yes, they are const, but you are technically allowed to modify them now. The commenter above was saying that it's illegal according to the standard, which was only true in C++98.

@noloader
Copy link

@Paril,

Please put an end to this once and for all. Please cite the portion of the standard that says you get a writeable pointer from data or string prior to C++17.

@Paril
Copy link

Paril commented Oct 25, 2021

@noloader Sure. See section 21.4.7 of the C++11 standard (http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2013/n3690.pdf) - it states that the return value of data() and c_str() are equivalent to &operator[](i), which is the exact same access that you're saying you have to do to get a "non-const pointer". Thus, you are technically allowed to write to this pointer since they are equivalent. There is a guarantee that they are the same exact thing. Just because the return value of data() or c_str() are const doesn't mean you can't reinterpret-cast them.

This wasn't the case in the C++98 standard, which was a bit vague on exactly what pointer c_str() and data() would return. Both say you can't alter the return value, but, if it's guaranteed to be the same as &[](i) then it's a moot clause now.

EDIT: I'll concede that it's not legal to modify the pointer given to you by data() and c_str() in C++11, but given that c_str() + i is guaranteed to be equivalent to &::operator[](i), I can safely say that it really doesn't matter. If you wanted portable code even for C++98 then yes, you're absolutely right. My main point was just that it's not UB, it's well-defined that you'll be modifying the same data since C++11.

@noloader
Copy link

noloader commented Oct 25, 2021

@Paril,

And from the document you just cited under 21.4.7.1 basic_string accessors for data and c_str (p. 659):

Requires: The program shall not alter any of the values stored in the character array

@matheusgomes28
Copy link

matheusgomes28 commented Nov 28, 2021

Just a note to people who are thinking of copying and pasting this into their projects: don't! There are several issues with the example here, and I've unfortunately seen people copying this into their productions systems before.

If you are looking for a safer modern Cpp Base64 alternative, I've made a post on how to write a good C++ Base64 encoder. At the very least you will learn how base64 encoding works.

@skullchap
Copy link

@ravstrix
Copy link

static std::string Encode(const std::string data)

Did you forget a '&'?

@theicfire
Copy link

+1 @ravstrix , otherwise this seems like a copy

@zdj1414
Copy link

zdj1414 commented Aug 12, 2022

when the input data length is 1, it will be crashed.

static std::string Encode(const std::string data) {
    static constexpr char sEncodingTable[] = {
      'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
      'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
      'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
      'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
      'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
      'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
      'w', 'x', 'y', 'z', '0', '1', '2', '3',
      '4', '5', '6', '7', '8', '9', '+', '/'
    };

    size_t in_len = data.size();
    size_t out_len = 4 * ((in_len + 2) / 3);
    std::string ret(out_len, '\0');
    size_t i = 0;
    char *p = const_cast<char*>(ret.c_str());

    if (in_len >= 2) {
      for (i = 0; i < in_len - 2; i += 3) {
        *p++ = sEncodingTable[(data[i] >> 2) & 0x3F];
        *p++ = sEncodingTable[((data[i] & 0x3) << 4) | ((int) (data[i + 1] & 0xF0) >> 4)];
        *p++ = sEncodingTable[((data[i + 1] & 0xF) << 2) | ((int) (data[i + 2] & 0xC0) >> 6)];
        *p++ = sEncodingTable[data[i + 2] & 0x3F];
      }
    }

    if (i < in_len) {
      *p++ = sEncodingTable[(data[i] >> 2) & 0x3F];
      if (i == (in_len - 1)) {
        *p++ = sEncodingTable[((data[i] & 0x3) << 4)];
        *p++ = '=';
      }
      else {
        *p++ = sEncodingTable[((data[i] & 0x3) << 4) | ((int) (data[i + 1] & 0xF0) >> 4)];
        *p++ = sEncodingTable[((data[i + 1] & 0xF) << 2)];
      }
      *p++ = '=';
    }

    return ret;
  }

@29654761
Copy link

29654761 commented Mar 2, 2023

I fixed this:

size_t i=0; // init i=0
char* p = const_cast<char*>(ret.c_str());
if (in_len >= 2) { // skip when in_len less than 2
for (i = 0; i < in_len - 2; i += 3) {
*p++ = sEncodingTable[(data[i] >> 2) & 0x3F];
*p++ = sEncodingTable[((data[i] & 0x3) << 4) | ((int)(data[i + 1] & 0xF0) >> 4)];
*p++ = sEncodingTable[((data[i + 1] & 0xF) << 2) | ((int)(data[i + 2] & 0xC0) >> 6)];
*p++ = sEncodingTable[data[i + 2] & 0x3F];
}
}

@joneeky
Copy link

joneeky commented Jan 6, 2024

I think the Encode method should be like this:
static std::string Encode(const char *data, size_t in_len )

Because chat* to std::string may lost some data. The std::string ended by '\0'.
Yes, than I can use the func to encode a image data.

@tomykaira
Copy link
Author

Everyone, thank you very much for the discussions.
I fixed pointed bugs and version dependent behavior.

This is just a small example for Base64 encoding/decoding. Feel free to adapt it to your needs, like handling binary data 😄

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment