|
diff --git a/encoding.h b/encoding.h |
|
index 4c1d2665ec647d385b6327260ec599553ae7ab7f..0e2a3dad6a4c52adce3b605e92d8daad58148c68 100644 |
|
--- a/encoding.h |
|
+++ b/encoding.h |
|
@@ -59,6 +59,8 @@ typedef struct { |
|
long length_in_bytes; |
|
char *bytes; |
|
str_flag_t flags; |
|
+ bool is_shared; |
|
+ bool is_embed; |
|
} rb_str_t; |
|
|
|
#define RSTR(x) ((rb_str_t *)x) |
|
diff --git a/include/ruby/intern.h b/include/ruby/intern.h |
|
index 6be2c6602fea1ca7a8ce056776114aee926e8018..217f53d5381d295c2375f446f5881e74a1b84519 100644 |
|
--- a/include/ruby/intern.h |
|
+++ b/include/ruby/intern.h |
|
@@ -491,6 +491,7 @@ VALUE rb_str_format(int, const VALUE *, VALUE); |
|
VALUE rb_str_new(const char*, long); |
|
VALUE rb_str_new_cstr(const char*); |
|
VALUE rb_str_new2(const char*); |
|
+VALUE rb_str_new_shared(VALUE); |
|
VALUE rb_str_new3(VALUE); |
|
VALUE rb_str_new_frozen(VALUE); |
|
VALUE rb_str_new4(VALUE); |
|
diff --git a/string.c b/string.c |
|
index bb14e07ee163eacb6f907fdd78f408a48a49d946..a723dd86f68f5689de9ee3e9116feb0a349ddb73 100644 |
|
--- a/string.c |
|
+++ b/string.c |
|
@@ -42,6 +42,11 @@ VALUE rb_fs; |
|
|
|
static SEL selMATCH; |
|
|
|
+#define SET_SHARED(s, bool) { ((rb_str_t*)s)->is_shared = bool; } |
|
+#define SET_EMBED(s, bool) { ((rb_str_t*)s)->is_embed = bool; } |
|
+#define STR_SHARED_P(s) (((rb_str_t*)s)->is_shared == true) |
|
+#define STR_EMBED_P(s) (((rb_str_t*)s)->is_embed == true) |
|
+ |
|
// rb_str_t primitives. |
|
|
|
static void |
|
@@ -228,6 +233,8 @@ str_alloc(VALUE klass) |
|
str->capacity_in_bytes = 0; |
|
str->length_in_bytes = 0; |
|
str->bytes = NULL; |
|
+ str->is_shared = false; |
|
+ str->is_embed = false; |
|
str_reset_flags(str); |
|
|
|
return str; |
|
@@ -249,6 +256,44 @@ str_new_like(VALUE obj) |
|
|
|
static void str_resize_bytes(rb_str_t *self, long new_capacity); |
|
static void str_concat_bytes(rb_str_t *self, const char *bytes, long len); |
|
+static void str_replace_with_cfstring(rb_str_t *self, CFStringRef source); |
|
+ |
|
+static inline void |
|
+str_shared(rb_str_t *self, rb_str_t *source) |
|
+{ |
|
+ GC_WB(&self->bytes, source->bytes); |
|
+ |
|
+ self->length_in_bytes = source->length_in_bytes; |
|
+ self->capacity_in_bytes = source->capacity_in_bytes; |
|
+ self->encoding = source->encoding; |
|
+ if (!source->flags) { |
|
+ str_update_flags(source); |
|
+ } |
|
+ self->flags = source->flags; |
|
+ |
|
+ SET_SHARED(source, true); |
|
+ SET_EMBED(self, false); |
|
+} |
|
+ |
|
+static inline void |
|
+str_shared_to_embed(rb_str_t *self) |
|
+{ |
|
+ if (!STR_SHARED_P(self) && STR_EMBED_P(self)) { |
|
+ return; |
|
+ } |
|
+ |
|
+ const char *bytes = self->bytes; |
|
+ |
|
+ SET_SHARED(self, false); |
|
+ SET_EMBED(self, true); |
|
+ |
|
+ if (self->capacity_in_bytes == self->length_in_bytes) { |
|
+ self->capacity_in_bytes++; |
|
+ } |
|
+ GC_WB(&self->bytes, xmalloc(self->capacity_in_bytes)); |
|
+ memcpy(self->bytes, bytes, self->length_in_bytes); |
|
+ self->bytes[self->length_in_bytes] = '\0'; |
|
+} |
|
|
|
static void |
|
str_replace_with_bytes(rb_str_t *self, const char *bytes, long len, |
|
@@ -257,6 +302,8 @@ str_replace_with_bytes(rb_str_t *self, const char *bytes, long len, |
|
assert(len >= 0); |
|
assert(enc != NULL); |
|
|
|
+ SET_SHARED(self, false) |
|
+ SET_EMBED(self, true); |
|
str_reset_flags(self); |
|
self->encoding = enc; |
|
if (len > 0) { |
|
@@ -280,8 +327,13 @@ str_replace_with_string(rb_str_t *self, rb_str_t *source) |
|
if (self == source) { |
|
return; |
|
} |
|
- str_replace_with_bytes(self, source->bytes, source->length_in_bytes, |
|
- source->encoding); |
|
+ if (source->length_in_bytes > 0) { |
|
+ str_shared(self, source); |
|
+ } |
|
+ else { |
|
+ str_replace_with_bytes(self, source->bytes, source->length_in_bytes, |
|
+ source->encoding); |
|
+ } |
|
if (!source->flags) { |
|
str_update_flags(source); |
|
} |
|
@@ -291,6 +343,7 @@ str_replace_with_string(rb_str_t *self, rb_str_t *source) |
|
static void |
|
str_append_uchar32(rb_str_t *self, UChar32 c) |
|
{ |
|
+ str_shared_to_embed(self); |
|
str_reset_flags(self); |
|
if ((c <= 127) && self->encoding->ascii_compatible) { |
|
str_resize_bytes(self, self->length_in_bytes + 1); |
|
@@ -327,6 +380,7 @@ str_replace_with_uchars(rb_str_t *self, const UChar *chars, long len) |
|
{ |
|
assert(len >= 0); |
|
|
|
+ str_shared_to_embed(self); |
|
str_reset_flags(self); |
|
self->length_in_bytes = 0; |
|
self->encoding = rb_encodings[ENCODING_UTF8]; |
|
@@ -606,6 +660,8 @@ str_new_copy_of_part(rb_str_t *self, long offset_in_bytes, |
|
// then a part of that string is also ASCII only |
|
str_set_ascii_only(str, true); |
|
} |
|
+ SET_SHARED(self, false); |
|
+ SET_EMBED(self, true); |
|
return str; |
|
} |
|
|
|
@@ -783,7 +839,8 @@ str_resize_bytes(rb_str_t *self, long new_capacity) |
|
rb_raise(rb_eArgError, "negative string size (or size too big)"); |
|
} |
|
if (self->capacity_in_bytes < new_capacity) { |
|
- size_t capacity = new_capacity * 1.2; |
|
+ str_shared_to_embed(self); |
|
+ size_t capacity = new_capacity * 1.2 + 1; |
|
if (capacity > 0){ |
|
new_capacity = capacity; |
|
} |
|
@@ -804,8 +861,7 @@ static void |
|
str_ensure_null_terminator(rb_str_t *self) |
|
{ |
|
if (self->length_in_bytes > 0 |
|
- && (self->capacity_in_bytes == self->length_in_bytes |
|
- || self->bytes[self->length_in_bytes] != '\0')) { |
|
+ && self->bytes[self->length_in_bytes] != '\0') { |
|
str_resize_bytes(self, self->length_in_bytes + 1); |
|
self->bytes[self->length_in_bytes] = '\0'; |
|
} |
|
@@ -817,6 +873,7 @@ str_splice(rb_str_t *self, long pos, long len, rb_str_t *str) |
|
// self[pos..pos+len] = str |
|
assert(pos >= 0 && len >= 0); |
|
|
|
+ str_shared_to_embed(self); |
|
if (str != NULL) { |
|
str_must_have_compatible_encoding(self, str); |
|
} |
|
@@ -911,6 +968,7 @@ str_concat_bytes(rb_str_t *self, const char *bytes, long len) |
|
|
|
const long new_length_in_bytes = self->length_in_bytes + len; |
|
|
|
+ str_shared_to_embed(self); |
|
str_resize_bytes(self, new_length_in_bytes); |
|
memcpy(self->bytes + self->length_in_bytes, bytes, len); |
|
self->length_in_bytes = new_length_in_bytes; |
|
@@ -922,6 +980,7 @@ str_concat_uchars(rb_str_t *self, const UChar *chars, long len) |
|
if (len == 0) { |
|
return; |
|
} |
|
+ str_shared_to_embed(self); |
|
str_reset_flags(self); |
|
if (IS_UTF8_ENC(self->encoding)) { |
|
long new_length_in_bytes = self->length_in_bytes; |
|
@@ -1499,6 +1558,10 @@ rstr_substr_with_cache(VALUE str, long beg, long len, |
|
len = n - beg; |
|
} |
|
|
|
+ if (beg == 0 && n == len) { |
|
+ return rb_str_new_shared(str); |
|
+ } |
|
+ |
|
rb_str_t *substr = str_get_characters(RSTR(str), beg, beg + len - 1, cache); |
|
OBJ_INFECT(substr, str); |
|
return substr == NULL ? Qnil : (VALUE)substr; |
|
@@ -1971,6 +2034,7 @@ rstr_setbyte(VALUE self, SEL sel, VALUE idx, VALUE value) |
|
if (index < 0) { |
|
index += RSTR(self)->length_in_bytes; |
|
} |
|
+ str_shared_to_embed(RSTR(self)); |
|
str_reset_flags(RSTR(self)); |
|
RSTR(self)->bytes[index] = byte; |
|
return value; |
|
@@ -3980,7 +4044,7 @@ rstr_sub_bang(VALUE str, SEL sel, int argc, VALUE *argv) |
|
assert(count > 0); |
|
|
|
if (block_given || !NIL_P(hash)) { |
|
- if (block_given) { |
|
+ if (block_given) { |
|
rb_match_busy(match); |
|
const unsigned long hash = rb_str_hash(str); |
|
repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match))); |
|
@@ -4286,6 +4350,7 @@ rstr_change_case(VALUE str, change_case_callback_t callback) |
|
char new_c = callback(c, i == 0); |
|
if (new_c != c) { |
|
changed = true; |
|
+ str_shared_to_embed(RSTR(str)); |
|
RSTR(str)->bytes[i] = new_c; |
|
} |
|
} |
|
@@ -4302,6 +4367,7 @@ rstr_change_case(VALUE str, change_case_callback_t callback) |
|
char new_c = callback(c, start_index == 0); |
|
if (new_c != c) { |
|
changed = true; |
|
+ str_shared_to_embed(RSTR(str)); |
|
memset(&RSTR(str)->bytes[start_index], 0, char_len); |
|
if (RSTR(str)->encoding->little_endian) { |
|
RSTR(str)->bytes[start_index] = new_c; |
|
@@ -5259,6 +5325,8 @@ rstr_reverse_bang(VALUE str, SEL sel) |
|
|
|
RSTR(str)->capacity_in_bytes = RSTR(str)->length_in_bytes; |
|
GC_WB(&RSTR(str)->bytes, new_bytes); |
|
+ SET_SHARED(str, false); |
|
+ SET_EMBED(str, true); |
|
|
|
// we modify it directly so the information stored |
|
// in the facultative flags might be outdated |
|
@@ -6371,10 +6439,17 @@ rb_str_new2(const char *cstr) |
|
VALUE |
|
rb_str_new3(VALUE source) |
|
{ |
|
- rb_str_t *str = str_alloc(rb_obj_class(source)); |
|
- str_replace(str, source); |
|
- OBJ_INFECT(str, source); |
|
- return (VALUE)str; |
|
+ rb_str_t *str1 = str_alloc(rb_obj_class(source)); |
|
+ rb_str_t *str2 = str_need_string(source); |
|
+ str_shared(str1, str2); |
|
+ OBJ_INFECT(str1, str2); |
|
+ return (VALUE)str1; |
|
+} |
|
+ |
|
+VALUE |
|
+rb_str_new_shared(VALUE source) |
|
+{ |
|
+ return rb_str_new3(source); |
|
} |
|
|
|
VALUE |