-
-
Save tomykaira/f0fd86b6c73063283afe550bc5d77594 to your computer and use it in GitHub Desktop.
#ifndef _MACARON_BASE64_H_ | |
#define _MACARON_BASE64_H_ | |
/** | |
* The MIT License (MIT) | |
* Copyright (c) 2016-2024 tomykaira | |
* | |
* Permission is hereby granted, free of charge, to any person obtaining | |
* a copy of this software and associated documentation files (the | |
* "Software"), to deal in the Software without restriction, including | |
* without limitation the rights to use, copy, modify, merge, publish, | |
* distribute, sublicense, and/or sell copies of the Software, and to | |
* permit persons to whom the Software is furnished to do so, subject to | |
* the following conditions: | |
* | |
* The above copyright notice and this permission notice shall be | |
* included in all copies or substantial portions of the Software. | |
* | |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE | |
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | |
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION | |
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
*/ | |
#include <cstdint> | |
#include <string> | |
namespace macaron { | |
class Base64 { | |
public: | |
static std::string Encode(const std::string &data) { | |
static constexpr char sEncodingTable[] = { | |
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', | |
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', | |
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', | |
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', | |
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'}; | |
size_t in_len = data.size(); | |
size_t out_len = 4 * ((in_len + 2) / 3); | |
std::string ret(out_len, '\0'); | |
size_t i; | |
char *p = const_cast<char *>(ret.c_str()); | |
for (i = 0; in_len > 2 && i < in_len - 2; i += 3) { | |
*p++ = sEncodingTable[(data[i] >> 2) & 0x3F]; | |
*p++ = sEncodingTable[((data[i] & 0x3) << 4) | | |
((int)(data[i + 1] & 0xF0) >> 4)]; | |
*p++ = sEncodingTable[((data[i + 1] & 0xF) << 2) | | |
((int)(data[i + 2] & 0xC0) >> 6)]; | |
*p++ = sEncodingTable[data[i + 2] & 0x3F]; | |
} | |
if (i < in_len) { | |
*p++ = sEncodingTable[(data[i] >> 2) & 0x3F]; | |
if (i == (in_len - 1)) { | |
*p++ = sEncodingTable[((data[i] & 0x3) << 4)]; | |
*p++ = '='; | |
} else { | |
*p++ = sEncodingTable[((data[i] & 0x3) << 4) | | |
((int)(data[i + 1] & 0xF0) >> 4)]; | |
*p++ = sEncodingTable[((data[i + 1] & 0xF) << 2)]; | |
} | |
*p++ = '='; | |
} | |
return ret; | |
} | |
static std::string Decode(const std::string &input, std::string &out) { | |
static constexpr unsigned char kDecodingTable[] = { | |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, | |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, | |
64, 64, 64, 64, 64, 64, 64, 62, 64, 64, 64, 63, 52, 53, 54, 55, 56, 57, | |
58, 59, 60, 61, 64, 64, 64, 64, 64, 64, 64, 0, 1, 2, 3, 4, 5, 6, | |
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, | |
25, 64, 64, 64, 64, 64, 64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, | |
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, | |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, | |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, | |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, | |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, | |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, | |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, | |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, | |
64, 64, 64, 64}; | |
size_t in_len = input.size(); | |
if (in_len % 4 != 0) | |
return "Input data size is not a multiple of 4"; | |
size_t out_len = in_len / 4 * 3; | |
if (in_len >= 1 && input[in_len - 1] == '=') | |
out_len--; | |
if (in_len >= 2 && input[in_len - 2] == '=') | |
out_len--; | |
out.resize(out_len); | |
for (size_t i = 0, j = 0; i < in_len;) { | |
uint32_t a = input[i] == '=' | |
? 0 & i++ | |
: kDecodingTable[static_cast<int>(input[i++])]; | |
uint32_t b = input[i] == '=' | |
? 0 & i++ | |
: kDecodingTable[static_cast<int>(input[i++])]; | |
uint32_t c = input[i] == '=' | |
? 0 & i++ | |
: kDecodingTable[static_cast<int>(input[i++])]; | |
uint32_t d = input[i] == '=' | |
? 0 & i++ | |
: kDecodingTable[static_cast<int>(input[i++])]; | |
uint32_t triple = | |
(a << 3 * 6) + (b << 2 * 6) + (c << 1 * 6) + (d << 0 * 6); | |
if (j < out_len) | |
out[j++] = (triple >> 2 * 8) & 0xFF; | |
if (j < out_len) | |
out[j++] = (triple >> 1 * 8) & 0xFF; | |
if (j < out_len) | |
out[j++] = (triple >> 0 * 8) & 0xFF; | |
} | |
return ""; | |
} | |
}; | |
} // namespace macaron | |
#endif /* _MACARON_BASE64_H_ */ |
#include <cstdint> | |
#include <iostream> | |
#include <vector> | |
#include "Base64.h" | |
int test(const std::string &data) { | |
std::string out; | |
auto b64 = macaron::Base64::Encode(data); | |
auto error = macaron::Base64::Decode(b64, out); | |
if (!error.empty()) { | |
std::cout << "Error: " << error << std::endl; | |
return 1; | |
} | |
if (data == out) { | |
std::cout << "OK: " << out << std::endl; | |
} else { | |
std::cout << "Wrong: " << data << ", " << b64 << ", " << out << std::endl; | |
} | |
return 0; | |
} | |
int main() { | |
test("hello"); | |
test(""); | |
test("1"); | |
test("22"); | |
test("333"); | |
test("4444"); | |
return 0; | |
} |
@noloader Sure. See section 21.4.7 of the C++11 standard (http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2013/n3690.pdf) - it states that the return value of data() and c_str() are equivalent to &operator[](i)
, which is the exact same access that you're saying you have to do to get a "non-const pointer". Thus, you are technically allowed to write to this pointer since they are equivalent. There is a guarantee that they are the same exact thing. Just because the return value of data() or c_str() are const doesn't mean you can't reinterpret-cast them.
This wasn't the case in the C++98 standard, which was a bit vague on exactly what pointer c_str() and data() would return. Both say you can't alter the return value, but, if it's guaranteed to be the same as &[](i)
then it's a moot clause now.
EDIT: I'll concede that it's not legal to modify the pointer given to you by data() and c_str() in C++11, but given that c_str() + i
is guaranteed to be equivalent to &::operator[](i)
, I can safely say that it really doesn't matter. If you wanted portable code even for C++98 then yes, you're absolutely right. My main point was just that it's not UB, it's well-defined that you'll be modifying the same data since C++11.
And from the document you just cited under 21.4.7.1 basic_string accessors for data
and c_str
(p. 659):
Requires: The program shall not alter any of the values stored in the character array
Just a note to people who are thinking of copying and pasting this into their projects: don't! There are several issues with the example here, and I've unfortunately seen people copying this into their productions systems before.
If you are looking for a safer modern Cpp Base64 alternative, I've made a post on how to write a good C++ Base64 encoder. At the very least you will learn how base64 encoding works.
Thank you @tomykaira
C version: https://github.com/skullchap/b64/
static std::string Encode(const std::string data)
Did you forget a '&'?
+1 @ravstrix , otherwise this seems like a copy
when the input data length is 1, it will be crashed.
static std::string Encode(const std::string data) {
static constexpr char sEncodingTable[] = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', '0', '1', '2', '3',
'4', '5', '6', '7', '8', '9', '+', '/'
};
size_t in_len = data.size();
size_t out_len = 4 * ((in_len + 2) / 3);
std::string ret(out_len, '\0');
size_t i = 0;
char *p = const_cast<char*>(ret.c_str());
if (in_len >= 2) {
for (i = 0; i < in_len - 2; i += 3) {
*p++ = sEncodingTable[(data[i] >> 2) & 0x3F];
*p++ = sEncodingTable[((data[i] & 0x3) << 4) | ((int) (data[i + 1] & 0xF0) >> 4)];
*p++ = sEncodingTable[((data[i + 1] & 0xF) << 2) | ((int) (data[i + 2] & 0xC0) >> 6)];
*p++ = sEncodingTable[data[i + 2] & 0x3F];
}
}
if (i < in_len) {
*p++ = sEncodingTable[(data[i] >> 2) & 0x3F];
if (i == (in_len - 1)) {
*p++ = sEncodingTable[((data[i] & 0x3) << 4)];
*p++ = '=';
}
else {
*p++ = sEncodingTable[((data[i] & 0x3) << 4) | ((int) (data[i + 1] & 0xF0) >> 4)];
*p++ = sEncodingTable[((data[i + 1] & 0xF) << 2)];
}
*p++ = '=';
}
return ret;
}
I fixed this:
size_t i=0; // init i=0
char* p = const_cast<char*>(ret.c_str());
if (in_len >= 2) { // skip when in_len less than 2
for (i = 0; i < in_len - 2; i += 3) {
*p++ = sEncodingTable[(data[i] >> 2) & 0x3F];
*p++ = sEncodingTable[((data[i] & 0x3) << 4) | ((int)(data[i + 1] & 0xF0) >> 4)];
*p++ = sEncodingTable[((data[i + 1] & 0xF) << 2) | ((int)(data[i + 2] & 0xC0) >> 6)];
*p++ = sEncodingTable[data[i + 2] & 0x3F];
}
}
I think the Encode method should be like this:
static std::string Encode(const char *data, size_t in_len )
Because chat* to std::string may lost some data. The std::string ended by '\0'.
Yes, than I can use the func to encode a image data.
Everyone, thank you very much for the discussions.
I fixed pointed bugs and version dependent behavior.
This is just a small example for Base64 encoding/decoding. Feel free to adapt it to your needs, like handling binary data 😄
omg this gist is still alive
omg this gist is still alive
Yes indeed. I am suprised as well.
Currently I am trying to copying-pasting some codes from one project(which established in 2021) to another, which is using a header only base64 algorithm from here, till now I realize this piece of code still have issues left.
So does anyone know is there any better alternatives for header-only version of base64 algorithm ?
To @tomykaira:
Thanks a lot for your effort, your codes helped us greatly. But according to the discussion above, from my perspective, I would personally recommend to use std::vector for the return value of 'Encode' instead of std::string, and also for the input parameter of 'Decode', since usually the raw data is not human readable text, which is not necessary a 'string' .
What's more, personally I don't think return error message with a string value is a good practice. I would prefer using exception in modern C++, or leverage std::expect in C++20, or, the old school style way, use an predefined integer error code, since you don't have too many error types here, but you can still put some more later .
What's more, about frequently discussed 'const_cast' thing, what about using std::string::iterator instead ? So this line
char *p = const_cast<char *>(ret.c_str());
Will become
std::string::iterator p = ret.begin();
Pretty neat isn't it ? And better add if (p == ret.end()) within the loop for safety .
@Paril,
Please put an end to this once and for all. Please cite the portion of the standard that says you get a writeable pointer from
data
orstring
prior to C++17.