Skip to content

Instantly share code, notes, and snippets.

@tylov
Last active July 1, 2021 11:39
Show Gist options
  • Save tylov/37b6a21fa0062e49f6c7c3a15c2967e6 to your computer and use it in GitHub Desktop.
Save tylov/37b6a21fa0062e49f6c7c3a15c2967e6 to your computer and use it in GitHub Desktop.
SSO String in C99 using all (typical 23) bytes available for the short string optimization (24th is the null terminator)
/* MIT License
*
* Copyright (c) 2021 Tyge Løvset, NORCE, www.norceresearch.no
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/* A string type with short string optimization in C99 with optimal short string
* utilization (23 characters with 24 bytes string representation).
*/
#ifndef STRING_INCLUDED
#define STRING_INCLUDED
#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#ifndef string_size_t
typedef size_t string_size_t;
#endif
typedef struct { char* data; string_size_t size, cap; } string_rep_t;
typedef const char string_literal_t[];
typedef union {
struct { char* data; string_size_t size, ncap; } lon;
struct { char data[sizeof(string_rep_t)]; } sso;
} string;
/**************************** PRIVATE API **********************************/
enum { SSO_CAP = offsetof(string, lon.ncap) + sizeof((string){0}.lon.ncap) - 1 };
#define string_is_long(s) (bool)((s)->sso.data[SSO_CAP] & 128)
#define string_select_(s, memb) (string_is_long(s) ? string_l_##memb : string_s_##memb)
#define string_s_cap(s) SSO_CAP
#define string_s_size(s) (SSO_CAP - (s)->sso.data[SSO_CAP])
#define string_s_set_size(s, len) ((s)->sso.data[SSO_CAP] = SSO_CAP - (len), (s)->sso.data[len] = 0)
#define string_s_data(s) (s)->sso.data
#define string_s_end(s) ((s)->sso.data + string_s_size(s))
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define byte_rotl_(x, b) ((x) << (b)*8 | (x) >> (sizeof(x) - (b))*8)
#define string_l_cap(s) (~byte_rotl_((s)->lon.ncap, sizeof((s)->lon.ncap) - 1))
#define string_l_set_cap(s, cap) ((s)->lon.ncap = ~byte_rotl_(cap, 1))
#else
#define string_l_cap(s) (~(s)->lon.ncap)
#define string_l_set_cap(s, cap) ((s)->lon.ncap = ~(cap))
#endif
#define string_l_size(s) ((s)->lon.size)
#define string_l_set_size(s, len) ((s)->lon.data[(s)->lon.size = (len)] = 0)
#define string_l_data(s) (s)->lon.data
#define string_l_end(s) ((s)->lon.data + string_l_size(s))
#define string_l_del(s) free((s)->lon.data)
static inline void string_set_size_(string* self, string_size_t len) {
string_select_(self, set_size(self, len));
}
static inline string_rep_t string_rep_(string* self) {
return string_is_long(self)
? (string_rep_t){self->lon.data, string_l_size(self), string_l_cap(self)}
: (string_rep_t){self->sso.data, string_s_size(self), string_s_cap(self)};
}
static inline char* string_init_(string* self, string_size_t len, string_size_t cap) {
if (cap > SSO_CAP) {
self->lon.data = malloc(cap + 1);
string_l_set_size(self, len);
string_l_set_cap(self, cap);
return self->lon.data;
}
string_s_set_size(self, len);
return self->sso.data;
}
static inline char* string_reserve(string* self, string_size_t cap);
static inline void string_internal_move_(string* self, size_t pos1, size_t pos2) {
if (pos1 == pos2)
return;
string_rep_t rep = string_rep_(self);
size_t newlen = rep.size + pos2 - pos1;
if (newlen > rep.cap)
rep.data = string_reserve(self, (rep.size*13 >> 3) + pos2 - pos1);
memmove(&rep.data[pos2], &rep.data[pos1], rep.size - pos1);
string_set_size_(self, newlen);
}
/**************************** PUBLIC API **********************************/
#define string_null (string){.sso.data[SSO_CAP] = SSO_CAP}
#define string_lit(lit) string_from_n(lit, sizeof((string_literal_t){lit}) - 1)
#define string_npos (~(string_size_t)0 >> 1)
static inline string string_init(void) {
return string_null;
}
static inline string string_from_n(const char* str, string_size_t n) {
string s;
memcpy(string_init_(&s, n, n), str, n);
return s;
}
static inline string string_from(const char* str) {
return string_from_n(str, strlen(str));
}
static inline string string_with_size(string_size_t size, char value) {
string s;
memset(string_init_(&s, size, size), value, size);
return s;
}
static inline string string_with_capacity(string_size_t cap) {
string s;
string_init_(&s, 0, cap);
return s;
}
static inline string string_clone(string s) {
string_rep_t rep = string_rep_(&s);
return string_from_n(rep.data, rep.size);
}
static inline void string_del(string* self) {
if (string_is_long(self)) string_l_del(self);
}
static inline void string_clear(string* self) {
string_set_size_(self, 0);
}
static inline void string_shrink_to_fit(string* self) {
string_rep_t rep = string_rep_(self);
if (rep.size == rep.cap)
return;
if (rep.size > SSO_CAP) {
self->lon.data = realloc(self->lon.data, string_l_size(self) + 1);
string_l_set_cap(self, string_l_size(self));
} else if (rep.cap > SSO_CAP) {
memcpy(self->sso.data, rep.data, rep.size + 1);
string_s_set_size(self, rep.size);
free(rep.data);
}
}
static inline char* string_reserve(string* self, string_size_t cap) {
if (string_is_long(self)) {
if (cap > string_l_cap(self)) {
self->lon.data = realloc(self->lon.data, cap + 1);
string_l_set_cap(self, cap);
}
return self->lon.data;
}
/* from short to long: */
if (cap > string_s_cap(self)) {
char* data = malloc(cap + 1);
string_size_t len = string_s_size(self);
memcpy(data, self->sso.data, len);
self->lon.data = data;
string_l_set_size(self, len);
string_l_set_cap(self, cap);
return data;
}
return self->sso.data;
}
static inline void string_resize(string* self, string_size_t size, char value) {
string_rep_t rep = string_rep_(self);
if (size > rep.size) {
if (size > rep.cap) rep.data = string_reserve(self, size);
memset(rep.data + rep.size, value, size - rep.size);
}
string_set_size_(self, size);
}
static inline char* string_data(string* self) {
return string_select_(self, data(self));
}
static inline const char* string_str(const string* self) {
return string_select_(self, data(self));
}
static inline bool string_empty(string s) {
return string_select_(&s, size(&s)) == 0;
}
static inline string_size_t string_size(string s) {
return string_select_(&s, size(&s));
}
static inline string_size_t string_length(string s) {
return string_select_(&s, size(&s));
}
static inline string_size_t string_capacity(string s) {
return string_select_(&s, cap(&s));
}
static inline bool string_equals(string s1, const char* str) {
return strcmp(string_str(&s1), str) == 0;
}
static inline bool string_equals_s(string s1, string s2) {
return strcmp(string_str(&s1), string_str(&s2)) == 0;
}
static inline int string_compare_ref(const string* s1, const string* s2) {
return strcmp(string_str(s1), string_str(s2));
}
static inline string_size_t string_find(string s, const char* needle) {
const char *str = string_str(&s), *res = strstr(str, needle);
return res ? res - str : string_npos;
}
static inline bool string_find_s(string s, string needle) {
return string_find(s, string_str(&needle));
}
static inline bool string_contains(string s, const char* needle) {
return strstr(string_str(&s), needle) != NULL;
}
static inline bool string_contains_s(string s, string needle) {
return strstr(string_str(&s), string_str(&needle)) != NULL;
}
static inline bool string_starts_with(string s, const char* sub) {
const char* str = string_str(&s);
while (*sub && *str == *sub) ++str, ++sub;
return *sub == 0;
}
static inline bool string_starts_with_s(string s, string sub) {
return string_starts_with(s, string_str(&sub));
}
static inline bool string_ends_with(string s, const char* sub) {
string_rep_t rep = string_rep_(&s); string_size_t n = strlen(sub);
return n <= rep.size && memcmp(rep.data + rep.size - n, sub, n) == 0;
}
static inline bool string_ends_with_s(string s, string sub) {
return string_ends_with(s, string_str(&sub));
}
static inline void string_assign_n(string* self, const char* str, string_size_t n) {
string_rep_t rep = string_rep_(self);
if (n > rep.cap) {
rep.data = realloc(string_is_long(self) ? rep.data : NULL, n + 1);
string_l_set_cap(self, n);
}
memmove(rep.data, str, n);
string_set_size_(self, n);
}
static inline void string_assign(string* self, const char* str) {
string_assign_n(self, str, strlen(str));
}
static inline void string_copy(string* self, string s) {
string_rep_t rep = string_rep_(&s);
string_assign_n(self, rep.data, rep.size);
}
static inline void string_append_n(string* self, const char* str, string_size_t n) {
string_rep_t rep = string_rep_(self);
if (rep.size + n > rep.cap) {
string_size_t off = (string_size_t)(str - rep.data); /* handle self append */
rep.data = string_reserve(self, (rep.size*13 >> 3) + n);
if (off <= rep.size) str = rep.data + off;
}
memcpy(rep.data + rep.size, str, n);
string_set_size_(self, rep.size + n);
}
static inline void string_append(string* self, const char* str) {
string_append_n(self, str, strlen(str));
}
static inline void string_append_s(string* self, string s) {
string_rep_t rep = string_rep_(&s);
string_append_n(self, rep.data, rep.size);
}
static inline void string_replace_n(string* self, size_t pos, size_t len, const char* str, size_t n) {
string_internal_move_(self, pos + len, pos + n);
memcpy(&string_data(self)[pos], str, n);
}
static inline void string_replace(string* self, size_t pos, size_t len, const char* str) {
string_replace_n(self, pos, len, str, strlen(str));
}
static inline void string_replace_s(string* self, size_t pos, size_t len, string s) {
string_rep_t rep = string_rep_(&s);
string_replace_n(self, pos, len, rep.data, rep.size);
}
static inline void string_insert_n(string* self, size_t pos, const char* str, size_t n) {
string_replace_n(self, pos, 0, str, n);
}
static inline void string_insert(string* self, size_t pos, const char* str) {
string_replace_n(self, pos, 0, str, strlen(str));
}
static inline void string_insert_s(string* self, size_t pos, string s) {
string_rep_t rep = string_rep_(&s);
string_replace_n(self, pos, 0, rep.data, rep.size);
}
static inline void string_erase_n(string* self, size_t pos, size_t n) {
string_rep_t rep = string_rep_(self);
if (n > rep.size - pos) n = rep.size - pos;
memmove(&rep.data[pos], &rep.data[pos + n], rep.size - (pos + n));
string_set_size_(self, rep.size - n);
}
static inline bool string_getdelim(string *self, int delim, FILE *fp) {
int c = fgetc(fp);
if (c == EOF)
return false;
string_size_t pos = 0;
string_rep_t rep = string_rep_(self);
for (;;) {
if (c == delim || c == EOF) {
string_set_size_(self, pos);
return true;
}
if (pos == rep.cap) {
string_set_size_(self, pos);
rep.data = string_reserve(self, (rep.cap = (rep.cap*13 >> 3) + 16));
}
rep.data[pos++] = (char) c;
c = fgetc(fp);
}
}
static inline bool string_getline(string *self, FILE *fp) {
return string_getdelim(self, '\n', fp);
}
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment