Skip to content

Instantly share code, notes, and snippets.

@maksverver
Created May 17, 2018 12:08
Show Gist options
  • Save maksverver/2b225637186d64878d3e635ef0a4fd18 to your computer and use it in GitHub Desktop.
Save maksverver/2b225637186d64878d3e635ef0a4fd18 to your computer and use it in GitHub Desktop.
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sqlite3.h>
void insert_utf8(sqlite3 *db, const char *text) {
sqlite3_stmt *stmt = NULL;
int err = sqlite3_prepare(db, "INSERT INTO tab(col) VALUES (?)", -1, &stmt, NULL);
assert(err == SQLITE_OK);
sqlite3_bind_text(stmt, 1, text, -1, NULL);
err = sqlite3_step(stmt);
assert(err == SQLITE_DONE);
sqlite3_finalize(stmt);
}
void insert_utf16(sqlite3 *db, const char *text) {
sqlite3_stmt *stmt = NULL;
int err = sqlite3_prepare(db, "INSERT INTO tab(col) VALUES (?)", -1, &stmt, NULL);
assert(err == SQLITE_OK);
sqlite3_bind_text16(stmt, 1, text, -1, NULL);
err = sqlite3_step(stmt);
assert(err == SQLITE_DONE);
sqlite3_finalize(stmt);
}
int main() {
sqlite3 *db = NULL;
int err = sqlite3_open("test.db", &db);
assert(err == SQLITE_OK);
char *errmsg = NULL;
sqlite3_exec(db, "CREATE TABLE IF NOT EXISTS tab(col TEXT)", NULL, NULL, &errmsg);
if (errmsg) {
puts(errmsg);
return 1;
}
insert_utf8(db, "foo # bar");
// ASCII-only but encoded as UTF-16 works fine.
insert_utf16(db, "f\0o\0o\0 \0$\0 \0b\0a\0r\0\0");
// Valid UTF-16 encoding gets translated to valid UTF-8 encoding.
insert_utf16(db, "f\0o\0o\0 \0\x3d\xd8\xa9\xdc \0b\0a\0r\0\0");
// \xd83d is a UTF-16 high surrogate character.
// Ends up inserting "foo \xf0\x9f\x90\xa0bar" (UTF-8).
// On disk: UTF-8 f09f90a0 U0001f420 (space character gets corrupted)
insert_utf16(db, "f\0o\0o\0 \0\x3d\xd8 \0b\0a\0r\0\0");
// \xdca9 is a UTF-16 low surrogate character.
// On disk: UTF-8 f0ba90a0 U0003a420 (space character gets corrupted)
insert_utf16(db, "f\0o\0o\0 \0\xa9\xdc \0b\0a\0r\0\0");
// \xd83d is a UTF-16 high surrogate character.
// It gets converted to UTF-8 on its own (without consuming the following
// character).
// On disk: UTF-8 eda0bd U000d83d (invalid UTF-8)
insert_utf16(db, "f\0o\0o\0 \0\x3d\xd8\0");
sqlite3_close(db);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment