Last active
July 1, 2021 11:39
-
-
Save tylov/37b6a21fa0062e49f6c7c3a15c2967e6 to your computer and use it in GitHub Desktop.
SSO String in C99 using all (typical 23) bytes available for the short string optimization (24th is the null terminator)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* MIT License | |
* | |
* Copyright (c) 2021 Tyge Løvset, NORCE, www.norceresearch.no | |
* | |
* Permission is hereby granted, free of charge, to any person obtaining a copy | |
* of this software and associated documentation files (the "Software"), to deal | |
* in the Software without restriction, including without limitation the rights | |
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
* copies of the Software, and to permit persons to whom the Software is | |
* furnished to do so, subject to the following conditions: | |
* | |
* The above copyright notice and this permission notice shall be included in all | |
* copies or substantial portions of the Software. | |
* | |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
* SOFTWARE. | |
*/ | |
/* A string type with short string optimization in C99 with optimal short string | |
* utilization (23 characters with 24 bytes string representation). | |
*/ | |
#ifndef STRING_INCLUDED | |
#define STRING_INCLUDED | |
#include <stdint.h> | |
#include <stdbool.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <stdio.h> | |
#ifndef string_size_t | |
typedef size_t string_size_t; | |
#endif | |
typedef struct { char* data; string_size_t size, cap; } string_rep_t; | |
typedef const char string_literal_t[]; | |
typedef union { | |
struct { char* data; string_size_t size, ncap; } lon; | |
struct { char data[sizeof(string_rep_t)]; } sso; | |
} string; | |
/**************************** PRIVATE API **********************************/ | |
enum { SSO_CAP = offsetof(string, lon.ncap) + sizeof((string){0}.lon.ncap) - 1 }; | |
#define string_is_long(s) (bool)((s)->sso.data[SSO_CAP] & 128) | |
#define string_select_(s, memb) (string_is_long(s) ? string_l_##memb : string_s_##memb) | |
#define string_s_cap(s) SSO_CAP | |
#define string_s_size(s) (SSO_CAP - (s)->sso.data[SSO_CAP]) | |
#define string_s_set_size(s, len) ((s)->sso.data[SSO_CAP] = SSO_CAP - (len), (s)->sso.data[len] = 0) | |
#define string_s_data(s) (s)->sso.data | |
#define string_s_end(s) ((s)->sso.data + string_s_size(s)) | |
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ | |
#define byte_rotl_(x, b) ((x) << (b)*8 | (x) >> (sizeof(x) - (b))*8) | |
#define string_l_cap(s) (~byte_rotl_((s)->lon.ncap, sizeof((s)->lon.ncap) - 1)) | |
#define string_l_set_cap(s, cap) ((s)->lon.ncap = ~byte_rotl_(cap, 1)) | |
#else | |
#define string_l_cap(s) (~(s)->lon.ncap) | |
#define string_l_set_cap(s, cap) ((s)->lon.ncap = ~(cap)) | |
#endif | |
#define string_l_size(s) ((s)->lon.size) | |
#define string_l_set_size(s, len) ((s)->lon.data[(s)->lon.size = (len)] = 0) | |
#define string_l_data(s) (s)->lon.data | |
#define string_l_end(s) ((s)->lon.data + string_l_size(s)) | |
#define string_l_del(s) free((s)->lon.data) | |
static inline void string_set_size_(string* self, string_size_t len) { | |
string_select_(self, set_size(self, len)); | |
} | |
static inline string_rep_t string_rep_(string* self) { | |
return string_is_long(self) | |
? (string_rep_t){self->lon.data, string_l_size(self), string_l_cap(self)} | |
: (string_rep_t){self->sso.data, string_s_size(self), string_s_cap(self)}; | |
} | |
static inline char* string_init_(string* self, string_size_t len, string_size_t cap) { | |
if (cap > SSO_CAP) { | |
self->lon.data = malloc(cap + 1); | |
string_l_set_size(self, len); | |
string_l_set_cap(self, cap); | |
return self->lon.data; | |
} | |
string_s_set_size(self, len); | |
return self->sso.data; | |
} | |
static inline char* string_reserve(string* self, string_size_t cap); | |
static inline void string_internal_move_(string* self, size_t pos1, size_t pos2) { | |
if (pos1 == pos2) | |
return; | |
string_rep_t rep = string_rep_(self); | |
size_t newlen = rep.size + pos2 - pos1; | |
if (newlen > rep.cap) | |
rep.data = string_reserve(self, (rep.size*13 >> 3) + pos2 - pos1); | |
memmove(&rep.data[pos2], &rep.data[pos1], rep.size - pos1); | |
string_set_size_(self, newlen); | |
} | |
/**************************** PUBLIC API **********************************/ | |
#define string_null (string){.sso.data[SSO_CAP] = SSO_CAP} | |
#define string_lit(lit) string_from_n(lit, sizeof((string_literal_t){lit}) - 1) | |
#define string_npos (~(string_size_t)0 >> 1) | |
static inline string string_init(void) { | |
return string_null; | |
} | |
static inline string string_from_n(const char* str, string_size_t n) { | |
string s; | |
memcpy(string_init_(&s, n, n), str, n); | |
return s; | |
} | |
static inline string string_from(const char* str) { | |
return string_from_n(str, strlen(str)); | |
} | |
static inline string string_with_size(string_size_t size, char value) { | |
string s; | |
memset(string_init_(&s, size, size), value, size); | |
return s; | |
} | |
static inline string string_with_capacity(string_size_t cap) { | |
string s; | |
string_init_(&s, 0, cap); | |
return s; | |
} | |
static inline string string_clone(string s) { | |
string_rep_t rep = string_rep_(&s); | |
return string_from_n(rep.data, rep.size); | |
} | |
static inline void string_del(string* self) { | |
if (string_is_long(self)) string_l_del(self); | |
} | |
static inline void string_clear(string* self) { | |
string_set_size_(self, 0); | |
} | |
static inline void string_shrink_to_fit(string* self) { | |
string_rep_t rep = string_rep_(self); | |
if (rep.size == rep.cap) | |
return; | |
if (rep.size > SSO_CAP) { | |
self->lon.data = realloc(self->lon.data, string_l_size(self) + 1); | |
string_l_set_cap(self, string_l_size(self)); | |
} else if (rep.cap > SSO_CAP) { | |
memcpy(self->sso.data, rep.data, rep.size + 1); | |
string_s_set_size(self, rep.size); | |
free(rep.data); | |
} | |
} | |
static inline char* string_reserve(string* self, string_size_t cap) { | |
if (string_is_long(self)) { | |
if (cap > string_l_cap(self)) { | |
self->lon.data = realloc(self->lon.data, cap + 1); | |
string_l_set_cap(self, cap); | |
} | |
return self->lon.data; | |
} | |
/* from short to long: */ | |
if (cap > string_s_cap(self)) { | |
char* data = malloc(cap + 1); | |
string_size_t len = string_s_size(self); | |
memcpy(data, self->sso.data, len); | |
self->lon.data = data; | |
string_l_set_size(self, len); | |
string_l_set_cap(self, cap); | |
return data; | |
} | |
return self->sso.data; | |
} | |
static inline void string_resize(string* self, string_size_t size, char value) { | |
string_rep_t rep = string_rep_(self); | |
if (size > rep.size) { | |
if (size > rep.cap) rep.data = string_reserve(self, size); | |
memset(rep.data + rep.size, value, size - rep.size); | |
} | |
string_set_size_(self, size); | |
} | |
static inline char* string_data(string* self) { | |
return string_select_(self, data(self)); | |
} | |
static inline const char* string_str(const string* self) { | |
return string_select_(self, data(self)); | |
} | |
static inline bool string_empty(string s) { | |
return string_select_(&s, size(&s)) == 0; | |
} | |
static inline string_size_t string_size(string s) { | |
return string_select_(&s, size(&s)); | |
} | |
static inline string_size_t string_length(string s) { | |
return string_select_(&s, size(&s)); | |
} | |
static inline string_size_t string_capacity(string s) { | |
return string_select_(&s, cap(&s)); | |
} | |
static inline bool string_equals(string s1, const char* str) { | |
return strcmp(string_str(&s1), str) == 0; | |
} | |
static inline bool string_equals_s(string s1, string s2) { | |
return strcmp(string_str(&s1), string_str(&s2)) == 0; | |
} | |
static inline int string_compare_ref(const string* s1, const string* s2) { | |
return strcmp(string_str(s1), string_str(s2)); | |
} | |
static inline string_size_t string_find(string s, const char* needle) { | |
const char *str = string_str(&s), *res = strstr(str, needle); | |
return res ? res - str : string_npos; | |
} | |
static inline bool string_find_s(string s, string needle) { | |
return string_find(s, string_str(&needle)); | |
} | |
static inline bool string_contains(string s, const char* needle) { | |
return strstr(string_str(&s), needle) != NULL; | |
} | |
static inline bool string_contains_s(string s, string needle) { | |
return strstr(string_str(&s), string_str(&needle)) != NULL; | |
} | |
static inline bool string_starts_with(string s, const char* sub) { | |
const char* str = string_str(&s); | |
while (*sub && *str == *sub) ++str, ++sub; | |
return *sub == 0; | |
} | |
static inline bool string_starts_with_s(string s, string sub) { | |
return string_starts_with(s, string_str(&sub)); | |
} | |
static inline bool string_ends_with(string s, const char* sub) { | |
string_rep_t rep = string_rep_(&s); string_size_t n = strlen(sub); | |
return n <= rep.size && memcmp(rep.data + rep.size - n, sub, n) == 0; | |
} | |
static inline bool string_ends_with_s(string s, string sub) { | |
return string_ends_with(s, string_str(&sub)); | |
} | |
static inline void string_assign_n(string* self, const char* str, string_size_t n) { | |
string_rep_t rep = string_rep_(self); | |
if (n > rep.cap) { | |
rep.data = realloc(string_is_long(self) ? rep.data : NULL, n + 1); | |
string_l_set_cap(self, n); | |
} | |
memmove(rep.data, str, n); | |
string_set_size_(self, n); | |
} | |
static inline void string_assign(string* self, const char* str) { | |
string_assign_n(self, str, strlen(str)); | |
} | |
static inline void string_copy(string* self, string s) { | |
string_rep_t rep = string_rep_(&s); | |
string_assign_n(self, rep.data, rep.size); | |
} | |
static inline void string_append_n(string* self, const char* str, string_size_t n) { | |
string_rep_t rep = string_rep_(self); | |
if (rep.size + n > rep.cap) { | |
string_size_t off = (string_size_t)(str - rep.data); /* handle self append */ | |
rep.data = string_reserve(self, (rep.size*13 >> 3) + n); | |
if (off <= rep.size) str = rep.data + off; | |
} | |
memcpy(rep.data + rep.size, str, n); | |
string_set_size_(self, rep.size + n); | |
} | |
static inline void string_append(string* self, const char* str) { | |
string_append_n(self, str, strlen(str)); | |
} | |
static inline void string_append_s(string* self, string s) { | |
string_rep_t rep = string_rep_(&s); | |
string_append_n(self, rep.data, rep.size); | |
} | |
static inline void string_replace_n(string* self, size_t pos, size_t len, const char* str, size_t n) { | |
string_internal_move_(self, pos + len, pos + n); | |
memcpy(&string_data(self)[pos], str, n); | |
} | |
static inline void string_replace(string* self, size_t pos, size_t len, const char* str) { | |
string_replace_n(self, pos, len, str, strlen(str)); | |
} | |
static inline void string_replace_s(string* self, size_t pos, size_t len, string s) { | |
string_rep_t rep = string_rep_(&s); | |
string_replace_n(self, pos, len, rep.data, rep.size); | |
} | |
static inline void string_insert_n(string* self, size_t pos, const char* str, size_t n) { | |
string_replace_n(self, pos, 0, str, n); | |
} | |
static inline void string_insert(string* self, size_t pos, const char* str) { | |
string_replace_n(self, pos, 0, str, strlen(str)); | |
} | |
static inline void string_insert_s(string* self, size_t pos, string s) { | |
string_rep_t rep = string_rep_(&s); | |
string_replace_n(self, pos, 0, rep.data, rep.size); | |
} | |
static inline void string_erase_n(string* self, size_t pos, size_t n) { | |
string_rep_t rep = string_rep_(self); | |
if (n > rep.size - pos) n = rep.size - pos; | |
memmove(&rep.data[pos], &rep.data[pos + n], rep.size - (pos + n)); | |
string_set_size_(self, rep.size - n); | |
} | |
static inline bool string_getdelim(string *self, int delim, FILE *fp) { | |
int c = fgetc(fp); | |
if (c == EOF) | |
return false; | |
string_size_t pos = 0; | |
string_rep_t rep = string_rep_(self); | |
for (;;) { | |
if (c == delim || c == EOF) { | |
string_set_size_(self, pos); | |
return true; | |
} | |
if (pos == rep.cap) { | |
string_set_size_(self, pos); | |
rep.data = string_reserve(self, (rep.cap = (rep.cap*13 >> 3) + 16)); | |
} | |
rep.data[pos++] = (char) c; | |
c = fgetc(fp); | |
} | |
} | |
static inline bool string_getline(string *self, FILE *fp) { | |
return string_getdelim(self, '\n', fp); | |
} | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment