Skip to content

Instantly share code, notes, and snippets.

@kingsimba
Last active February 9, 2021 14:04
Show Gist options
  • Save kingsimba/0eeb7c5c900652186ec056dfd6c06a86 to your computer and use it in GitHub Desktop.
Save kingsimba/0eeb7c5c900652186ec056dfd6c06a86 to your computer and use it in GitHub Desktop.
Try to make std::string more efficient
#include "string_slice.h"
#include "gf_string.h"
#include "jansson/jansson.h"
#include "pch_gis_runtime.h"
#include "tr_stdlib.h"
bool StringCharIter::Next(wchar32* cOut, int* consumedBytesOut) {
int32_t c;
const char* newStr = utf8_iterate(str_, length_, &c);
if (newStr == str_) {
return false;
}
int consumedBytes = (int)(newStr - str_);
length_ -= consumedBytes;
str_ = newStr;
*cOut = c;
if (consumedBytesOut != NULL) *consumedBytesOut = consumedBytes;
return true;
}
//////////////////////////////////////////////////////////////////////////
static const char* _strchr(const char* s, const char* sEnd, char c) {
for (; s != sEnd; s++) {
if (*s == c) return s;
}
return s;
}
static int _strncmp(const char* s1, const char* s1End, const char* s2, size_t s2len) {
unsigned char u1, u2;
while (s2len-- > 0 && s1 != s1End) {
u1 = (unsigned char)*s1++;
u2 = (unsigned char)*s2++;
if (u1 != u2) return u1 - u2;
if (u1 == '\0') return 0;
}
return 0;
}
static const char* _strstr(const char* s1, const char* s1End, const char* s2, size_t s2len) {
const char* p = s1;
for (; (p = _strchr(p, s1End, *s2)) != NULL; p++) {
if (_strncmp(p, s1End, s2, s2len) == 0) return p;
}
return p;
}
bool StringSubsliceIter::Next(StringSlice* cOut, Range* rangeOut) {
if (str_ == str_end_) return false;
const char* newStr = _strstr(str_, str_end_, sep_, sep_length_);
*cOut = StringSlice(str_, (int)(newStr - str_));
if (rangeOut != NULL) *rangeOut = Range_make((int)(str_ - str_start_), cOut->Length());
if (newStr != str_end_) {
str_ = newStr + sep_length_;
} else {
str_ = newStr;
}
return true;
}
std::vector<StringSlice> StringSlice::Split(StringSlice sep) {
std::vector<StringSlice> sv;
auto iter = this->iterBySpliting(sep);
StringSlice slice;
while (iter.Next(&slice)) {
sv.push_back(slice);
}
return sv;
}
//////////////////////////////////////////////////////////////////////////
Range StringSlice::FindFrom(int start, wchar32 code) {
auto iter = this->Subslice(start, length_ - start).Iter();
wchar32 c;
int consumedBytes;
int totalBytes = 0;
while (iter.Next(&c, &consumedBytes)) {
if (c == code) {
return Range_make(start + totalBytes, consumedBytes);
}
totalBytes += consumedBytes;
}
return invalidRange;
}
int StringSlice::RFind(char c) {
for (int i = length_ - 1; i >= 0; i--) {
if (str_[i] == c) {
return i;
}
}
return -1;
}
Range StringSlice::FindSliceFrom(int start, StringSlice needle) {
const char* strEnd = str_ + length_;
const char* newStr = _strstr(str_ + start, strEnd, needle.str_, needle.length_);
if (newStr == strEnd) {
return invalidRange;
}
return Range_make((int)(newStr - str_), needle.length_);
}
sp<GfString> StringSlice::ToString() { return GfString::AllocWithBytes(str_, length_); }
#pragma once
#include "gf_object.h"
#include "tr_stdlib.h"
class GfString;
class StringSlice;
/**
* Iterate characters(as Unicode Code Points) in a string
*
* ```
* auto iter = StringSlice(u8"hello world").iter();
* wchar32 c;
* while(iter.next(&c)) {
* use(c);
* }
* ```
*/
class StringCharIter {
public:
StringCharIter(StringSlice& slice);
bool Next(wchar32* cOut) { return Next(cOut, NULL); }
bool Next(wchar32* cOut, int* consumedBytesOut);
private:
const char* str_;
int length_;
};
/**
* Split a string with a separate string
*
* For example. "hello---world".split("---") shall become "hello" and "world"
*
* ```
* auto iter = StringSlice("hello---world").iterBySpliting("---");
* StringSlice slice;
* while(iter.next(&slice)) {
* use(slice);
* }
* ```
*/
class StringSubsliceIter {
public:
StringSubsliceIter(StringSlice& slice, StringSlice& sep);
bool Next(StringSlice* slice_out) { return Next(slice_out, NULL); }
bool Next(StringSlice* slice_out, Range* rangeOut);
private:
const char* str_start_;
const char* str_end_;
const char* str_;
const char* sep_;
int sep_length_;
};
/**
* StringSlice uses UTF-8 encoding
*/
class StringSlice {
public:
forceinline StringSlice() {
str_ = NULL;
length_ = 0;
}
forceinline StringSlice(const char* str) {
str_ = (char*)str;
length_ = (int)strlen(str);
}
forceinline StringSlice(const char* str, int len) {
str_ = (char*)str;
length_ = len;
}
forceinline void Init(const char* str, int len) {
str_ = (char*)str;
length_ = len;
}
//////////////////////////////////////////////////////////////////////////
// Accessors
forceinline const char* Buffer() { return str_; }
forceinline int Length() { return length_; }
forceinline StringCharIter Iter() { return StringCharIter(*this); }
forceinline StringSubsliceIter iterBySpliting(StringSlice sep) {
return StringSubsliceIter(*this, sep);
}
//////////////////////////////////////////////////////////////////////////
// Conversions
// copy to C string
forceinline void ToCString(char* str, size_t max_len) {
if (max_len < (size_t)length_ + 1) {
str[0] = 0;
return;
}
memcpy(str, str_, length_);
str[length_] = 0;
}
// Create a standalone GfString
sp<GfString> ToString();
//////////////////////////////////////////////////////////////////////////
// Search
// Find subslice
forceinline Range FindSlice(StringSlice needle) { return FindSliceFrom(0, needle); }
Range FindSliceFrom(int start, StringSlice needle);
// Find Unicode character
forceinline Range Find(wchar32 code) { return FindFrom(0, code); }
Range FindFrom(int start, wchar32 code);
// Find ANSI character.
// If the string contains multi-byte character in UTF8, it will misbehave.
forceinline int Find(char c) { return FindFrom(0, c); }
int FindFrom(int start, char c);
int RFind(char c);
//////////////////////////////////////////////////////////////////////////
// Actions
forceinline StringSlice Subslice(int start, int length) {
return StringSlice(str_ + start, length);
}
std::vector<StringSlice> Split(StringSlice sep);
//////////////////////////////////////////////////////////////////////////
// Equals
forceinline bool Equals(StringSlice* r) {
return length_ == r->length_ && memcmp(str_, r->str_, length_) == 0;
}
forceinline bool Equals(const char* r) {
return length_ == strlen(r) && memcmp(str_, r, length_) == 0;
}
protected:
char* str_;
int length_;
};
inline StringCharIter::StringCharIter(StringSlice& slice) {
str_ = slice.Buffer();
length_ = slice.Length();
}
inline StringSubsliceIter::StringSubsliceIter(StringSlice& slice, StringSlice& sep) {
str_ = str_start_ = slice.Buffer();
str_end_ = str_ + slice.Length();
sep_ = sep.Buffer();
sep_length_ = sep.Length();
}
inline int StringSlice::FindFrom(int start, char c) {
for (const char* p = str_ + start; *p; p++) {
if (*p == c) return (int)(p - str_);
}
return -1;
}
template <typename T>
using sptr = std::shared_ptr<T>;
template <typename T>
using svector = std::vector<sptr<T>>;
using namespace std;
class MyString;
class MyObject {
public:
virtual sptr<MyString> ToString() = 0;
};
class MyString : public MyObject, public StringSlice {
public:
static sptr<MyString> AllocWithCString(const char* str) { return make_shared<MyString>(str); }
static sptr<MyString> AllocWithSlice(StringSlice str) { return make_shared<MyString>(str.Buffer(), str.Length()); }
virtual sptr<MyString> ToString() { return sptr<MyString>(this); }
MyString(const char* str) : str_(str) { this->Init(&str_[0], (int)str_.size()); }
MyString(const char* str, int len) : str_(str, len) { this->Init(&str_[0], (int)str_.size()); }
const char* CStr() { return this->str_.c_str(); }
private:
string str_;
};
template <typename T>
class MyArray : public MyObject {
public:
static sptr<MyArray<T>> Alloc() { return make_shared<MyArray<T>>(); }
void AddObject(sptr<T> obj) { this->array_.push_back(obj); }
sptr<T> ObjectAtIndex(int i) { return this->array_[i]; }
virtual sptr<MyString> ToString() { return MyString::AllocWithCString("This is an array"); }
private:
vector<sptr<T>> array_;
};
template<typename T>
class MyArrayRef {
public:
MyArrayRef(sptr<MyArray<T>> arr) : array_(arr) {}
sptr<T> operator[](size_t i) { return this->array_->ObjectAtIndex((int)i); }
sptr<MyArray<T>> operator->() { return array_; }
private:
sptr<MyArray<T>> array_;
};
TEST_F(ArrayTest, vector) {
auto str = MyString::AllocWithCString("hello world");
// cast to base
sptr<MyObject> base = str;
// cast to derived
str = static_pointer_cast<MyString>(base);
MyArrayRef<MyString> v = MyArray<MyString>::Alloc();
v->AddObject(str);
v->AddObject(str);
auto pieces = str->Split(" ");
for (auto s : pieces) {
v->AddObject(MyString::AllocWithSlice(s));
}
EXPECT_STREQ(v[2]->CStr(), "hello");
EXPECT_STREQ(v[3]->CStr(), "world");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment