Skip to content

Instantly share code, notes, and snippets.

@imaami
Last active March 4, 2024 19:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save imaami/6b3a991ebd9a0b7df5fc6c9d356b0804 to your computer and use it in GitHub Desktop.
Save imaami/6b3a991ebd9a0b7df5fc6c9d356b0804 to your computer and use it in GitHub Desktop.
A dumb string thing.
/* SPDX-License-Identifier: GPL-2.0-or-later */
/** @file dstr.c
*
* @author Juuso Alasuutari
*/
#include <errno.h>
#include "dstr.h"
/**
* @brief Validate input arguments of functions with assignment semantics.
*
* @param[in] src The string pointer argument passed by the caller.
* @param[in,out] len Pointer to the length argument passed by the caller.
*
* @return 0 on success, otherwise an error code.
*
* @retval EFAULT The input string is NULL.
* @retval EINVAL The input string is empty.
* @retval ENAMETOOLONG The size of the input string, including the null
* terminator, cannot be expressed as `unsigned int`.
*/
static force_inline int
dstr_validate_input (char const *src,
size_t *len)
{
/* src was passed by the caller and therefore inherently sus. */
if (!src)
return EFAULT;
/* len is known to be an internal pointer to the length variable. */
size_t n = *len;
/* If `*len` is nonzero, the responsibility of measuring the string
* is on the caller, in which case we just verify that `src` is not
* the empty string. If `*len` is zero, then the caller wants us to
* call `strlen()` on it, which doubles as the empty string check.
*/
if (n ? !*src : !(n = __builtin_strlen(src)))
return EINVAL;
/* Ensure that there's space for the terminator, and that on 64-bit
* systems the length is not too large to fit in an unsigned int.
*/
if (n > UINT_MAX - 1u)
return ENAMETOOLONG;
*len = n;
return 0;
}
bool
dstr_set (dstr *dest,
char const *src,
size_t len,
int *err)
{
int e = dstr_validate_input(src, &len);
if (e) {
*err = e;
return false;
}
unsigned int size = dstr_is_pointer(dest) ? dest->size : 0u;
unsigned int len_ = (unsigned int)len;
char *ptr;
if (len < sizeof dest->arr) {
if (size)
free(dest->ptr);
ptr = &dest->arr[0];
size = sizeof dest->arr - 1u;
} else {
if (len_ < size) {
ptr = dest->ptr;
size = dest->len > len_ ? dest->len : len_;
} else {
char *p = size ? dest->ptr : NULL;
size = len_ + 1u;
ptr = realloc(p, size);
if (!ptr) {
*err = errno;
return false;
}
dest->ptr = ptr;
dest->size = size--;
}
}
__builtin_strncpy(ptr, src, size);
ptr[size] = '\0';
dest->len = len_;
return true;
}
void
dstr_move (dstr *dest,
dstr *src)
{
if (dest == src)
return;
if (dstr_owns_memory(dest))
free(dest->ptr);
if (dstr_is_array(src)) {
__builtin_strncpy(dest->arr, src->arr, sizeof dest->arr - 1u);
dest->arr[sizeof dest->arr - 1u] = '\0';
} else {
dest->ptr = src->ptr;
dest->size = src->size;
}
dest->len = src->len;
dstr_init(src);
}
/* SPDX-License-Identifier: GPL-2.0-or-later */
/** @file dstr.h
*
* @author Juuso Alasuutari
*/
#ifndef DSTR_H_
#define DSTR_H_
#include <limits.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
/** @brief `inline` for compilers hard of hearing.
*/
#define force_inline inline __attribute__((always_inline))
/** @brief Check if a value is a char array.
*/
#define is_char_array(x) _Generic((__typeof__(x) *){0}, \
char (*)[sizeof(x)]: 1, char const (*)[sizeof(x)]: 1, default: 0)
/** @brief Check if a value is a string literal.
*/
#define is_string_literal(x) __builtin_choose_expr( \
is_char_array(x), __builtin_constant_p(x), 0)
/** @brief Get the length of a string value.
*/
#define string_length(x) __builtin_choose_expr( \
is_char_array(x), sizeof(x) - 1u, __builtin_strlen(x))
/** @brief Evaluates to the string literal passed as the first parameter if
* it would fit in the array specified by the second parameter, but
* otherwise evaluates to the empty string.
*
* This macro is useful for removing warnings resulting from non-taken code
* paths being checked for correctness, a common occurrence with constructs
* such as `_Generic()` and `__builtin_choose_expr()`.
*
* @param lit The string literal which becomes the result of this macro if
* it would fit in the array specified by `what`.
* @param what The array, or type of array, to use as type information when
* determining the maximum size of `lit`.
*/
#define safe_string_literal(lit, what) \
__builtin_choose_expr( \
is_string_literal(lit) && sizeof(lit) <= sizeof(what), \
lit, "")
/**
* @brief dstr stands for dumb string.
*/
typedef struct dstr
{
_Alignas(char *) union
{
struct __attribute__((packed))
{
union {
char *ptr; //!< String pointer.
char const *view; //!< String view.
};
/** @brief Total heap allocation size. */
unsigned int size;
};
/** @brief String array. */
char arr[sizeof (char *) + sizeof (unsigned int)];
};
/** @brief String length excluding the null terminator. */
unsigned int len;
} dstr;
/**
* @brief Compound literal @ref dstr view initialized with a string literal.
*/
#define make_dstr_view_from_literal(x) \
__builtin_choose_expr( \
sizeof (x) <= sizeof ((dstr *)0)->arr, \
(dstr){.arr = {safe_string_literal(x, ((dstr *)0)->arr)}, \
.len = sizeof (x) - 1u}, \
(dstr){.view = x, .size = 0u, .len = sizeof (x) - 1u})
/**
* @brief Compound literal @ref dstr view initialized with a `char` array.
*/
#define make_dstr_view_from_array(x) \
__builtin_choose_expr( \
sizeof (x) <= sizeof ((dstr *)0)->arr, \
make_dstr_from_small_string(x, sizeof (x) - 1u), \
(dstr){.view = x, .size = 0u, .len = sizeof (x) - 1u})
/**
* @brief Initialize a @ref dstr view.
*/
#define make_dstr_view(x) \
__builtin_choose_expr( \
is_string_literal(x), \
make_dstr_view_from_literal(x), \
__builtin_choose_expr( \
is_char_array(x), \
make_dstr_view_from_array(x), \
make_dstr_view_from_decay(x, __builtin_strlen(x))))
/**
* @brief Initialize a @ref dstr from a `char` array.
*/
#define make_dstr_from_array(x) \
__builtin_choose_expr( \
sizeof (x) <= sizeof ((dstr *)0)->arr, \
make_dstr_from_small_string(x, sizeof (x) - 1u), \
(dstr){.ptr = __builtin_strndup((x), sizeof (x) - 1u), \
.size = sizeof (x), \
.len = sizeof (x) - 1u})
/**
* @brief Initialize a @ref dstr.
*/
#define make_dstr(x) \
__builtin_choose_expr( \
is_string_literal(x), \
make_dstr_view_from_literal(x), \
__builtin_choose_expr( \
is_char_array(x), \
make_dstr_from_array(x), \
make_dstr_from_decay(x, __builtin_strlen(x))))
/**
* @brief Create a @ref dstr by value from a `char *` and string length.
*
* @note The string length must be less than `UINT_MAX`.
*
* @param[in] src String pointer.
* @param[in] len String length.
* @return A @ref dstr by value.
*/
__attribute__((pure))
static force_inline dstr
make_dstr_from_small_string (char const *const src,
size_t len)
{
dstr d = {.arr = {0}, .len = (unsigned)len};
__builtin_memcpy(&d.arr[0], src, len);
return d;
}
/**
* @brief Create a @ref dstr view by value from a `char *` and string length.
*/
__attribute__((pure))
static force_inline dstr
make_dstr_view_from_decay (char const *const src,
size_t len)
{
return !len || len > UINT_MAX - 1u
? (dstr){0}
: len < sizeof ((dstr *)0)->arr
? make_dstr_from_small_string(src, len)
: (dstr){.view = src, .size = 0u, .len = (unsigned)len};
}
/**
* @brief Create a @ref dstr by value from a `char *` and string length.
*/
__attribute__((pure))
static force_inline dstr
make_dstr_from_decay (char const *const src,
size_t len)
{
return !len || len > UINT_MAX - 1u
? (dstr){0}
: len < sizeof ((dstr *)0)->arr
? make_dstr_from_small_string(src, len)
: (dstr){.ptr = __builtin_strndup(src, len),
.size = (unsigned)len + 1u,
.len = (unsigned)len};
}
/**
* @brief Zero-initialize a @ref dstr.
*
* @param[out] s Object to initialize.
*/
static force_inline void
dstr_init (dstr *s)
{
s->ptr = NULL;
s->size = 0u;
s->len = 0u;
}
/**
* @brief Get the value held by a @ref dstr as a pointer to `const char`.
*
* If the @ref dstr contains a pointer – either a heap-allocated string or
* a view – then the value returned is the pointer itself. If the @ref dstr
* contains a small string, then the value returned is the address of the
* first character of the string.
*
* If the @ref dstr is empty, then the value returned is a pointer to a
* null-terminated empty string. (At the implementation level, this is
* simply the same as the value returned for a small string, with the
* exception that the first character is the null terminator.)
*
* @warning The returned value is only valid until the next call to a
* function that mutates the @ref dstr.
*
* @param[in] x The @ref dstr.
* @return The value held by `x` as `const char *`.
*/
#define dstr_get(x) ( \
!dstr_is_pointer(x) \
? (char const *)(x)->arr \
: !(x)->size ? (x)->view \
: (char const *)(x)->ptr)
/**
* @brief Check if a @ref dstr is empty.
*
* @param[in] s Object to check.
*
* @return Whether or not @p s is empty.
* @retval true @p s is empty.
* @retval false @p s is not empty.
*/
__attribute__((pure))
static force_inline bool
dstr_is_empty (dstr const *s)
{
return !s->len;
}
/**
* @brief Check if a @ref dstr contains an array.
*
* @param[in] s Object to check.
*
* @return Whether or not @p s contains an array.
* @retval true @p s contains an array; implies
* `@ref !dstr_is_empty() && @ref !dstr_is_pointer()`.
* @retval false @p s does not contain an array.
*/
__attribute__((pure))
static force_inline bool
dstr_is_array (dstr const *s)
{
return s->len && s->len < sizeof s->arr;
}
/**
* @brief Check if a @ref dstr contains a pointer.
*
* @note Does not imply the pointed-to memory is owned by this @ref dstr,
* if you need to know that call @ref dstr_owns_memory() instead.
*
* @param[in] s Object to check.
*
* @return Whether or not @p s contains a pointer.
* @retval true @p s contains a pointer; implies @ref !dstr_is_empty().
* @retval false @p s does not contain a pointer.
*/
__attribute__((pure))
static force_inline bool
dstr_is_pointer (dstr const *s)
{
return s->len >= sizeof s->arr;
}
/**
* @brief Check if a @ref dstr has ownership of allocated heap memory.
*
* @param[in] s Object to check.
*
* @return Whether or not @p s owns allocated heap memory.
* @retval true @p s owns memory; implies @ref dstr_is_pointer().
* @retval false @p s does not own memory.
*/
__attribute__((pure))
static force_inline bool
dstr_owns_memory (dstr const *s)
{
return dstr_is_pointer(s) && s->size;
}
/**
* @brief Uninitialize and zero-out a @ref dstr.
*
* @param[in] s Object to uninitialize.
*/
static force_inline void
dstr_fini (dstr *s)
{
if (dstr_owns_memory(s))
free(s->ptr);
dstr_init(s);
}
/**
* @brief Compare a @ref dstr with a `char const *`.
*
* @param[in] s Object to compare.
* @param[in] str String to compare with.
* @param[in] len Length of @p str.
*
* @return Whether or not the strings are equal.
*/
__attribute__((pure))
static force_inline bool
dstr_eq (dstr const *s,
char const *str,
size_t len)
{
return len == s->len &&
(!len || !__builtin_memcmp(dstr_get(s), str, len));
}
/**
* @brief Set the value of a @ref dstr, discarding its old value (if any).
*
* @param[in,out] dest Object to modify.
* @param[in] src New value.
* @param[in] len Length of @p src.
* @param[out] err Where to save an errno value on failure,
* otherwise not accessed. Mandatory.
*
* @return Whether or not the operation succeeds.
*/
extern bool
dstr_set (dstr *dest,
char const *src,
size_t len,
int *err);
/**
* @brief Transfer the raw contents of one @ref dstr to another.
*
* @ref dstr_move() does nothing if @p src and @p dest point to the same
* address.
*
* If @p src owns heap memory, its ownership is transferred. Memory owned by
* @p dest is freed before the move operation.
*
* If @p dest is not empty, but @p src is empty, @ref dstr_move() is the
* equivalent of calling @ref dstr_fini() on @p dest.
*
* After @ref dstr_move() returns, calling @ref dstr_get() on @p src is
* guaranteed to return an empty string unless @p src and @p dest point
* to the same address.
*
* @ref dstr_move() is guaranteed to never allocate new memory, but it may
* free an existing allocation. Thus any pointers returned by @ref dstr_get()
* on @p src or @p dest become invalid when @ref dstr_move() is called.
*
* @param dest Where to move the string.
* @param src Source of the moved string.
*/
extern void
dstr_move (dstr *dest,
dstr *src);
#endif /* DSTR_H_ */
/* SPDX-License-Identifier: GPL-2.0-or-later */
/** @file test_dstr.c
*
* @author Juuso Alasuutari
*/
#include <stdio.h>
#include "dstr.h"
#define Y(b) &"-\0yes"[(b) << 1U]
#define STRIFY(f,x) STRIFY_(f, x)
#define STRIFY_(f,x) #f "(" #x ")"
#define construct(var, ctor, val) dstr var = ctor(val);
#define discard(var, ctor, val) dstr_fini(&var);
#define describe(s,f,v) \
do { \
printf(" %-31s\t %-15s\t%-11s %-11s %-11s \t", \
STRIFY(f, v), dstr_get(&s), Y(dstr_is_array(&s)), \
Y(dstr_is_pointer(&s)), Y(dstr_owns_memory(&s))); \
hd((unsigned char const *)&s, sizeof s, (char[72]){""}); \
} while (0);
static void
hd (unsigned char const *data,
size_t size,
char *buf);
#define TEST_SET(X) \
X(a, make_dstr_view, SMALL_STRING) \
X(b, make_dstr_view, sup_world) \
X(c, make_dstr_view, LARGE_STRING) \
X(d, make_dstr_view, how_do_you_do) \
X(e, make_dstr, SMALL_STRING) \
X(f, make_dstr, sup_world) \
X(g, make_dstr, LARGE_STRING) \
X(h, make_dstr, how_do_you_do)
#define SMALL_STRING "Sup world"
#define LARGE_STRING "How do you do"
int
main (void)
{
const char sup_world[] = SMALL_STRING;
const char how_do_you_do[] = LARGE_STRING;
TEST_SET(construct)
printf(" %-31s\t %-15s\t%-11s %-11s %-11s \t %s\n"
" %-31s\t %-15s\t%-11s %-11s %-11s \t %s\n",
"Constructor", "String value",
"Is array", "Is ptr", "Owns mem", "Hexdump",
"-----------", "------------",
"--------", "------", "--------", "-------");
TEST_SET(describe)
TEST_SET(discard)
}
static inline void
ascdump (unsigned char **pp,
unsigned char const *bs,
size_t sz)
{
*(*pp)++ = ' ';
*(*pp)++ = '|';
for (size_t i = 0; i < sz; ++i)
*(*pp)++ = bs[i] > 0x1fu && bs[i] < 0x7fu
? bs[i] : (unsigned char)'.';
*(*pp)++ = '|';
}
static inline void
hexchar (unsigned char **pp,
unsigned char ch)
{
static const unsigned char hex[16] = "0123456789abcdef";
*(*pp)++ = ' ';
*(*pp)++ = hex[ch >> 4u];
*(*pp)++ = hex[ch & 15u];
}
static void
hd (unsigned char const *data,
size_t size,
char *buf)
{
unsigned char *s = (unsigned char *)&buf[0];
unsigned char *p = s;
size_t i = 0u, n = size >> 4u;
for (size_t r = 0; r < n; ++r, i = 0u, p = s, data += 16) {
for (; i < 8u; ++i)
hexchar(&p, data[i]);
*p++ = (unsigned char)' ';
for (; i < 16u; ++i)
hexchar(&p, data[i]);
*p++ = (unsigned char)' ';
ascdump(&p, data, 16u);
*p++ = (unsigned char)'\n';
*p = (unsigned char)'\0';
fputs(buf, stdout);
}
if ((n = size & 15u)) {
for (; i < n; ++i) {
hexchar(&p, data[i]);
if ((i & 7u) == 7u)
*p++ = (unsigned char)' ';
}
for (; i < 16u; ++i) {
*p++ = (unsigned char)' ';
*p++ = (unsigned char)' ';
*p++ = (unsigned char)' ';
if ((i & 7u) == 7u)
*p++ = (unsigned char)' ';
}
ascdump(&p, data, n);
*p++ = (unsigned char)'\n';
*p = (unsigned char)'\0';
fputs(buf, stdout);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment