Created
May 17, 2018 12:08
-
-
Save maksverver/2b225637186d64878d3e635ef0a4fd18 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <assert.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sqlite3.h> | |
void insert_utf8(sqlite3 *db, const char *text) { | |
sqlite3_stmt *stmt = NULL; | |
int err = sqlite3_prepare(db, "INSERT INTO tab(col) VALUES (?)", -1, &stmt, NULL); | |
assert(err == SQLITE_OK); | |
sqlite3_bind_text(stmt, 1, text, -1, NULL); | |
err = sqlite3_step(stmt); | |
assert(err == SQLITE_DONE); | |
sqlite3_finalize(stmt); | |
} | |
void insert_utf16(sqlite3 *db, const char *text) { | |
sqlite3_stmt *stmt = NULL; | |
int err = sqlite3_prepare(db, "INSERT INTO tab(col) VALUES (?)", -1, &stmt, NULL); | |
assert(err == SQLITE_OK); | |
sqlite3_bind_text16(stmt, 1, text, -1, NULL); | |
err = sqlite3_step(stmt); | |
assert(err == SQLITE_DONE); | |
sqlite3_finalize(stmt); | |
} | |
int main() { | |
sqlite3 *db = NULL; | |
int err = sqlite3_open("test.db", &db); | |
assert(err == SQLITE_OK); | |
char *errmsg = NULL; | |
sqlite3_exec(db, "CREATE TABLE IF NOT EXISTS tab(col TEXT)", NULL, NULL, &errmsg); | |
if (errmsg) { | |
puts(errmsg); | |
return 1; | |
} | |
insert_utf8(db, "foo # bar"); | |
// ASCII-only but encoded as UTF-16 works fine. | |
insert_utf16(db, "f\0o\0o\0 \0$\0 \0b\0a\0r\0\0"); | |
// Valid UTF-16 encoding gets translated to valid UTF-8 encoding. | |
insert_utf16(db, "f\0o\0o\0 \0\x3d\xd8\xa9\xdc \0b\0a\0r\0\0"); | |
// \xd83d is a UTF-16 high surrogate character. | |
// Ends up inserting "foo \xf0\x9f\x90\xa0bar" (UTF-8). | |
// On disk: UTF-8 f09f90a0 U0001f420 (space character gets corrupted) | |
insert_utf16(db, "f\0o\0o\0 \0\x3d\xd8 \0b\0a\0r\0\0"); | |
// \xdca9 is a UTF-16 low surrogate character. | |
// On disk: UTF-8 f0ba90a0 U0003a420 (space character gets corrupted) | |
insert_utf16(db, "f\0o\0o\0 \0\xa9\xdc \0b\0a\0r\0\0"); | |
// \xd83d is a UTF-16 high surrogate character. | |
// It gets converted to UTF-8 on its own (without consuming the following | |
// character). | |
// On disk: UTF-8 eda0bd U000d83d (invalid UTF-8) | |
insert_utf16(db, "f\0o\0o\0 \0\x3d\xd8\0"); | |
sqlite3_close(db); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment