Skip to content

Instantly share code, notes, and snippets.

@funny-falcon
Created June 24, 2012 06:26
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save funny-falcon/2981959 to your computer and use it in GitHub Desktop.
Save funny-falcon/2981959 to your computer and use it in GitHub Desktop.
Test patch for increasing "array as queue" speed
diff --git a/array.c b/array.c
index 64647c3..76fac8a 100644
--- a/array.c
+++ b/array.c
@@ -255,15 +255,24 @@ rb_ary_modify(VALUE ary)
rb_ary_modify_check(ary);
if (ARY_SHARED_P(ary)) {
long len = RARRAY_LEN(ary);
+ VALUE shared = ARY_SHARED(ary);
if (len <= RARRAY_EMBED_LEN_MAX) {
VALUE *ptr = ARY_HEAP_PTR(ary);
- VALUE shared = ARY_SHARED(ary);
FL_UNSET_SHARED(ary);
FL_SET_EMBED(ary);
MEMCPY(ARY_EMBED_PTR(ary), ptr, VALUE, len);
rb_ary_decrement_share(shared);
ARY_SET_EMBED_LEN(ary, len);
}
+ else if (ARY_SHARED_NUM(shared) == 1 && len > RARRAY_LEN(shared)/4) {
+ long shift = RARRAY_PTR(ary) - RARRAY_PTR(shared);
+ ARY_SET_PTR(ary, RARRAY_PTR(shared));
+ ARY_SET_CAPA(ary, RARRAY_LEN(shared));
+ MEMMOVE(RARRAY_PTR(ary), RARRAY_PTR(ary)+shift, VALUE, len);
+ FL_UNSET_SHARED(ary);
+ FL_SET_EMBED(shared);
+ rb_ary_decrement_share(shared);
+ }
else {
VALUE *ptr = ALLOC_N(VALUE, len);
MEMCPY(ptr, RARRAY_PTR(ary), VALUE, len);
@@ -430,8 +439,9 @@ ary_make_shared(VALUE ary)
OBJSETUP(shared, 0, T_ARRAY);
FL_UNSET_EMBED(shared);
- ARY_SET_LEN((VALUE)shared, RARRAY_LEN(ary));
+ ARY_SET_LEN((VALUE)shared, ARY_CAPA(ary));
ARY_SET_PTR((VALUE)shared, RARRAY_PTR(ary));
+ rb_mem_clear(RARRAY_PTR(shared) + RARRAY_LEN(ary), ARY_CAPA(ary) - RARRAY_LEN(ary));
FL_SET_SHARED_ROOT(shared);
ARY_SET_SHARED_NUM((VALUE)shared, 1);
FL_SET_SHARED(ary);
@@ -706,6 +716,24 @@ ary_take_first_or_last(int argc, VALUE *argv, VALUE ary, enum ary_take_pos_flags
static VALUE rb_ary_push_1(VALUE ary, VALUE item);
+static int
+rb_ary_push_to_shared(VALUE ary, VALUE item)
+{
+ long len = RARRAY_LEN(ary);
+ if (len > RARRAY_EMBED_LEN_MAX) {
+ VALUE shared = ARY_SHARED(ary);
+ if (ARY_SHARED_NUM(shared) == 1 &&
+ RARRAY_PTR(shared) + RARRAY_LEN(shared) >
+ RARRAY_PTR(ary) + len) {
+ rb_ary_modify_check(ary);
+ RARRAY_PTR(ary)[len] = item;
+ ARY_SET_LEN(ary, len + 1);
+ return 1;
+ }
+ }
+ return 0;
+}
+
/*
* call-seq:
* ary << obj -> ary
@@ -722,6 +750,10 @@ static VALUE rb_ary_push_1(VALUE ary, VALUE item);
VALUE
rb_ary_push(VALUE ary, VALUE item)
{
+ if (ARY_SHARED_P(ary) && rb_ary_push_to_shared(ary, item)) {
+ return ary;
+ }
+
rb_ary_modify(ary);
return rb_ary_push_1(ary, item);
}
@@ -730,8 +762,9 @@ static VALUE
rb_ary_push_1(VALUE ary, VALUE item)
{
long idx = RARRAY_LEN(ary);
+ long capa = ARY_CAPA(ary);
- if (idx >= ARY_CAPA(ary)) {
+ if (idx >= (capa > ARY_DEFAULT_SIZE ? capa - 3 : capa)) {
ary_double_capa(ary, idx);
}
RARRAY_PTR(ary)[idx] = item;
@@ -739,6 +772,41 @@ rb_ary_push_1(VALUE ary, VALUE item)
return ary;
}
+static int
+rb_ary_cat_to_shared(VALUE ary, long oldlen, long len)
+{
+ if (oldlen > RARRAY_EMBED_LEN_MAX) {
+ VALUE shared = ARY_SHARED(ary);
+ if (ARY_SHARED_NUM(shared) == 1 &&
+ RARRAY_PTR(shared) + RARRAY_LEN(shared) >
+ RARRAY_PTR(ary) + oldlen + len) {
+ rb_ary_modify_check(ary);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+VALUE
+rb_ary_cat(VALUE ary, const VALUE *ptr, long len)
+{
+ long oldlen = RARRAY_LEN(ary), capa;
+
+ if (ARY_SHARED_P(ary) && rb_ary_cat_to_shared(ary, oldlen, len)) {
+ goto copy;
+ }
+ rb_ary_modify(ary);
+ capa = ARY_CAPA(ary);
+ if (oldlen + len >= (capa > ARY_DEFAULT_SIZE ? capa - 3 : capa)) {
+ ary_double_capa(ary, oldlen + len);
+ }
+
+copy:
+ MEMCPY(RARRAY_PTR(ary) + oldlen, ptr, VALUE, len);
+ ARY_SET_LEN(ary, oldlen + len);
+ return ary;
+}
+
/*
* call-seq:
* ary.push(obj, ... ) -> ary
@@ -755,28 +823,33 @@ rb_ary_push_1(VALUE ary, VALUE item)
static VALUE
rb_ary_push_m(int argc, VALUE *argv, VALUE ary)
{
- rb_ary_modify(ary);
- while (argc--) {
- rb_ary_push_1(ary, *argv++);
- }
- return ary;
+ return rb_ary_cat(ary, argv, argc);
}
VALUE
rb_ary_pop(VALUE ary)
{
long n;
+ VALUE top;
+
rb_ary_modify_check(ary);
if (RARRAY_LEN(ary) == 0) return Qnil;
+
+ n = RARRAY_LEN(ary) - 1;
+ top = RARRAY_PTR(ary)[n];
+ ARY_SET_LEN(ary, n);
+
if (ARY_OWNS_HEAP_P(ary) &&
RARRAY_LEN(ary) * 3 < ARY_CAPA(ary) &&
ARY_CAPA(ary) > ARY_DEFAULT_SIZE)
{
ary_resize_capa(ary, RARRAY_LEN(ary) * 2);
}
- n = RARRAY_LEN(ary)-1;
- ARY_SET_LEN(ary, n);
- return RARRAY_PTR(ary)[n];
+ else if (ARY_SHARED_P(ary) && ARY_SHARED_NUM(ARY_SHARED(ary)) == 1) {
+ RARRAY_PTR(ary)[n] = Qnil;
+ }
+
+ return top;
}
/*
@@ -887,6 +960,36 @@ rb_ary_shift_m(int argc, VALUE *argv, VALUE ary)
return result;
}
+static int
+rb_ary_unshift_to_shared(int argc, VALUE *argv, VALUE ary)
+{
+ long len = RARRAY_LEN(ary);
+ VALUE shared = ARY_SHARED(ary);
+ long shared_capa = RARRAY_LEN(shared);
+ if (ARY_SHARED_NUM(shared) == 1 && shared_capa > len + argc) {
+
+ VALUE *head = RARRAY_PTR(ary);
+ VALUE *sharedp = RARRAY_PTR(shared);
+
+ rb_ary_modify_check(ary);
+ /* make a room for unshifted values */
+ if (head - sharedp < argc) {
+ long room = shared_capa - len - argc;
+ room -= room / 4;
+ MEMMOVE(sharedp + argc + room, head, VALUE, len);
+ if (head - sharedp < room) {
+ rb_mem_clear(head, room - (head - sharedp));
+ }
+ head = sharedp + argc + room;
+ }
+ MEMCPY(head - argc, argv, VALUE, argc);
+ ARY_SET_PTR(ary, head - argc);
+ ARY_SET_LEN(ary, len + argc);
+ return 1;
+ }
+ return 0;
+}
+
/*
* call-seq:
* ary.unshift(obj, ...) -> ary
@@ -902,18 +1005,43 @@ rb_ary_shift_m(int argc, VALUE *argv, VALUE ary)
static VALUE
rb_ary_unshift_m(int argc, VALUE *argv, VALUE ary)
{
- long len;
+ long len = RARRAY_LEN(ary), capa;
+
+ if (argc == 0) {
+ rb_ary_modify_check(ary);
+ return ary;
+ }
+
+ if (ARY_SHARED_P(ary) && rb_ary_unshift_to_shared(argc, argv, ary)) {
+ return ary;
+ }
rb_ary_modify(ary);
- if (argc == 0) return ary;
- if (ARY_CAPA(ary) <= (len = RARRAY_LEN(ary)) + argc) {
+ capa = ARY_CAPA(ary);
+ if ((capa > ARY_DEFAULT_SIZE ? capa - 3 : capa) <= len + argc) {
ary_double_capa(ary, len + argc);
}
- /* sliding items */
- MEMMOVE(RARRAY_PTR(ary) + argc, RARRAY_PTR(ary), VALUE, len);
- MEMCPY(RARRAY_PTR(ary), argv, VALUE, argc);
- ARY_INCREASE_LEN(ary, argc);
+ /* use shared array for big "queues" */
+ if (len + argc > ARY_DEFAULT_SIZE * 4) {
+ /* make a room for unshifted items */
+ long room, capa = ARY_CAPA(ary);
+ VALUE *head = RARRAY_PTR(ary);
+ room = capa - len - argc;
+ room -= room / 4;
+ ary_make_shared(ary);
+ MEMMOVE(head + argc + room, head, VALUE, len);
+ MEMCPY(head + room, argv, VALUE, argc);
+ rb_mem_clear(head, room);
+ ARY_SET_LEN(ary, len + argc);
+ ARY_SET_PTR(ary, head + room);
+ }
+ else {
+ /* sliding items */
+ MEMMOVE(RARRAY_PTR(ary) + argc, RARRAY_PTR(ary), VALUE, len);
+ MEMCPY(RARRAY_PTR(ary), argv, VALUE, argc);
+ ARY_SET_LEN(ary, len + argc);
+ }
return ary;
}
@@ -2083,12 +2211,13 @@ rb_ary_sort_bang(VALUE ary)
if (RARRAY_LEN(ary) > 1) {
VALUE tmp = ary_make_substitution(ary); /* only ary refers tmp */
struct ary_sort_data data;
+ long len = RARRAY_LEN(ary);
RBASIC(tmp)->klass = 0;
data.ary = tmp;
data.opt_methods = 0;
data.opt_inited = 0;
- ruby_qsort(RARRAY_PTR(tmp), RARRAY_LEN(tmp), sizeof(VALUE),
+ ruby_qsort(RARRAY_PTR(tmp), len, sizeof(VALUE),
rb_block_given_p()?sort_1:sort_2, &data);
if (ARY_EMBED_P(tmp)) {
@@ -2105,7 +2234,7 @@ rb_ary_sort_bang(VALUE ary)
if (ARY_HEAP_PTR(ary) == ARY_HEAP_PTR(tmp)) {
assert(!ARY_EMBED_P(ary));
FL_UNSET_SHARED(ary);
- ARY_SET_CAPA(ary, ARY_CAPA(tmp));
+ ARY_SET_CAPA(ary, RARRAY_LEN(tmp));
}
else {
assert(!ARY_SHARED_P(tmp));
@@ -2120,8 +2249,8 @@ rb_ary_sort_bang(VALUE ary)
xfree(ARY_HEAP_PTR(ary));
}
ARY_SET_PTR(ary, RARRAY_PTR(tmp));
- ARY_SET_HEAP_LEN(ary, RARRAY_LEN(tmp));
- ARY_SET_CAPA(ary, ARY_CAPA(tmp));
+ ARY_SET_HEAP_LEN(ary, len);
+ ARY_SET_CAPA(ary, RARRAY_LEN(tmp));
}
/* tmp was lost ownership for the ptr */
FL_UNSET(tmp, FL_FREEZE);
@@ -3003,7 +3132,15 @@ rb_ary_concat(VALUE x, VALUE y)
rb_ary_modify_check(x);
y = to_ary(y);
if (RARRAY_LEN(y) > 0) {
- rb_ary_splice(x, RARRAY_LEN(x), 0, y);
+ /* rb_ary_splice make much more work than rb_ary_cat,
+ * so that, I think one more condition is justified */
+ if (x != y) {
+ rb_ary_cat(x, RARRAY_PTR(y), RARRAY_LEN(y));
+ RB_GC_GUARD(y);
+ }
+ else {
+ rb_ary_splice(x, RARRAY_LEN(x), 0, y);
+ }
}
return x;
}
require 'benchmark'
N = (ARGV[0] || 1000000).to_i
def push_shift(que, n)
n.times{|i| que.push(i)}
n.times{|i| que.shift}
end
def test_push_shift(size, count)
ar = (1..size).to_a
(N/count).times{|_| push_shift(ar, count) }
end
def unshift_pop(que, n)
n.times{|i| que.unshift(i)}
n.times{|i| que.pop}
end
def test_unshift_pop(size, count)
ar = (1..size).to_a
(N/count).times{|_| unshift_pop(ar, count) }
end
def test_case(func, size, count)
lambda{ send(:"test_#{func}", size, count) }
end
Benchmark.bmbm(6) do |x|
cases = [1, 10, 50, 100, 200, 500, 1000].product([1, 2, 5, 10])
for size, count in cases
x.report("push_shift #{size} #{count}", &test_case(:push_shift, size, count))
x.report("unshift_pop #{size} #{count}", &test_case(:unshift_pop, size, count))
end
end
user system total real
push_shift 1 1 0.650000 0.000000 0.650000 ( 0.652491)
unshift_pop 1 1 0.660000 0.000000 0.660000 ( 0.655564)
push_shift 1 2 0.500000 0.000000 0.500000 ( 0.499913)
unshift_pop 1 2 0.590000 0.000000 0.590000 ( 0.589515)
push_shift 1 5 0.400000 0.000000 0.400000 ( 0.397676)
unshift_pop 1 5 0.400000 0.000000 0.400000 ( 0.403689)
push_shift 1 10 0.360000 0.000000 0.360000 ( 0.359447)
unshift_pop 1 10 0.370000 0.000000 0.370000 ( 0.367177)
push_shift 10 1 0.660000 0.000000 0.660000 ( 0.660654)
unshift_pop 10 1 0.670000 0.000000 0.670000 ( 0.668498)
push_shift 10 2 0.500000 0.000000 0.500000 ( 0.503704)
unshift_pop 10 2 0.510000 0.000000 0.510000 ( 0.506029)
push_shift 10 5 0.400000 0.000000 0.400000 ( 0.397156)
unshift_pop 10 5 0.400000 0.000000 0.400000 ( 0.400085)
push_shift 10 10 0.330000 0.000000 0.330000 ( 0.336753)
unshift_pop 10 10 0.400000 0.000000 0.400000 ( 0.401316)
push_shift 50 1 0.650000 0.000000 0.650000 ( 0.657677)
unshift_pop 50 1 0.690000 0.000000 0.690000 ( 0.687709)
push_shift 50 2 0.490000 0.000000 0.490000 ( 0.484648)
unshift_pop 50 2 0.540000 0.000000 0.540000 ( 0.543795)
push_shift 50 5 0.380000 0.000000 0.380000 ( 0.379978)
unshift_pop 50 5 0.420000 0.000000 0.420000 ( 0.424817)
push_shift 50 10 0.350000 0.000000 0.350000 ( 0.343696)
unshift_pop 50 10 0.390000 0.000000 0.390000 ( 0.385852)
push_shift 100 1 0.650000 0.000000 0.650000 ( 0.652744)
unshift_pop 100 1 0.660000 0.000000 0.660000 ( 0.662414)
push_shift 100 2 0.480000 0.000000 0.480000 ( 0.477300)
unshift_pop 100 2 0.490000 0.000000 0.490000 ( 0.493553)
push_shift 100 5 0.370000 0.000000 0.370000 ( 0.373769)
unshift_pop 100 5 0.380000 0.000000 0.380000 ( 0.373796)
push_shift 100 10 0.340000 0.000000 0.340000 ( 0.340117)
unshift_pop 100 10 0.330000 0.000000 0.330000 ( 0.338587)
push_shift 200 1 0.650000 0.000000 0.650000 ( 0.652331)
unshift_pop 200 1 0.660000 0.000000 0.660000 ( 0.661295)
push_shift 200 2 0.480000 0.000000 0.480000 ( 0.475984)
unshift_pop 200 2 0.480000 0.000000 0.480000 ( 0.488238)
push_shift 200 5 0.380000 0.000000 0.380000 ( 0.371957)
unshift_pop 200 5 0.370000 0.000000 0.370000 ( 0.373429)
push_shift 200 10 0.330000 0.000000 0.330000 ( 0.332422)
unshift_pop 200 10 0.340000 0.000000 0.340000 ( 0.334362)
push_shift 500 1 0.650000 0.000000 0.650000 ( 0.652034)
unshift_pop 500 1 0.780000 0.000000 0.780000 ( 0.781155)
push_shift 500 2 0.470000 0.000000 0.470000 ( 0.471095)
unshift_pop 500 2 0.500000 0.000000 0.500000 ( 0.496558)
push_shift 500 5 0.390000 0.000000 0.390000 ( 0.389123)
unshift_pop 500 5 0.370000 0.000000 0.370000 ( 0.374500)
push_shift 500 10 0.340000 0.000000 0.340000 ( 0.331834)
unshift_pop 500 10 0.330000 0.000000 0.330000 ( 0.334508)
push_shift 1000 1 0.650000 0.000000 0.650000 ( 0.651575)
unshift_pop 1000 1 0.660000 0.000000 0.660000 ( 0.661359)
push_shift 1000 2 0.480000 0.000000 0.480000 ( 0.474535)
unshift_pop 1000 2 0.480000 0.000000 0.480000 ( 0.478616)
push_shift 1000 5 0.390000 0.000000 0.390000 ( 0.394620)
unshift_pop 1000 5 0.380000 0.000000 0.380000 ( 0.376721)
push_shift 1000 10 0.330000 0.000000 0.330000 ( 0.328130)
unshift_pop 1000 10 0.330000 0.000000 0.330000 ( 0.330137)
user system total real
push_shift 1 1 0.680000 0.000000 0.680000 ( 0.676353)
unshift_pop 1 1 0.700000 0.000000 0.700000 ( 0.703192)
push_shift 1 2 0.500000 0.000000 0.500000 ( 0.498007)
unshift_pop 1 2 0.530000 0.000000 0.530000 ( 0.529290)
push_shift 1 5 0.400000 0.000000 0.400000 ( 0.397688)
unshift_pop 1 5 0.410000 0.000000 0.410000 ( 0.407695)
push_shift 1 10 0.360000 0.000000 0.360000 ( 0.360972)
unshift_pop 1 10 0.370000 0.000000 0.370000 ( 0.372272)
push_shift 10 1 0.670000 0.000000 0.670000 ( 0.666040)
unshift_pop 10 1 0.710000 0.000000 0.710000 ( 0.713317)
push_shift 10 2 0.500000 0.000000 0.500000 ( 0.502478)
unshift_pop 10 2 0.530000 0.000000 0.530000 ( 0.523753)
push_shift 10 5 0.380000 0.000000 0.380000 ( 0.389965)
unshift_pop 10 5 0.400000 0.000000 0.400000 ( 0.406787)
push_shift 10 10 0.360000 0.000000 0.360000 ( 0.355443)
unshift_pop 10 10 0.370000 0.000000 0.370000 ( 0.368613)
push_shift 50 1 1.160000 0.000000 1.160000 ( 1.157077)
unshift_pop 50 1 0.740000 0.000000 0.740000 ( 0.741113)
push_shift 50 2 0.720000 0.000000 0.720000 ( 0.722402)
unshift_pop 50 2 0.550000 0.000000 0.550000 ( 0.552752)
push_shift 50 5 0.470000 0.000000 0.470000 ( 0.468821)
unshift_pop 50 5 0.420000 0.000000 0.420000 ( 0.422017)
push_shift 50 10 0.380000 0.000000 0.380000 ( 0.378803)
unshift_pop 50 10 0.380000 0.000000 0.380000 ( 0.375715)
push_shift 100 1 1.260000 0.000000 1.260000 ( 1.262165)
unshift_pop 100 1 0.750000 0.000000 0.750000 ( 0.753296)
push_shift 100 2 0.770000 0.000000 0.770000 ( 0.772351)
unshift_pop 100 2 0.570000 0.000000 0.570000 ( 0.565448)
push_shift 100 5 0.490000 0.000000 0.490000 ( 0.494461)
unshift_pop 100 5 0.440000 0.000000 0.440000 ( 0.435878)
push_shift 100 10 0.390000 0.000000 0.390000 ( 0.390234)
unshift_pop 100 10 0.390000 0.000000 0.390000 ( 0.393196)
push_shift 200 1 1.490000 0.000000 1.490000 ( 1.483872)
unshift_pop 200 1 0.800000 0.000000 0.800000 ( 0.798493)
push_shift 200 2 0.870000 0.000000 0.870000 ( 0.869386)
unshift_pop 200 2 0.590000 0.000000 0.590000 ( 0.595064)
push_shift 200 5 0.540000 0.000000 0.540000 ( 0.540635)
unshift_pop 200 5 0.470000 0.000000 0.470000 ( 0.463894)
push_shift 200 10 0.420000 0.000000 0.420000 ( 0.420032)
unshift_pop 200 10 0.440000 0.000000 0.440000 ( 0.438353)
push_shift 500 1 2.110000 0.000000 2.110000 ( 2.117800)
unshift_pop 500 1 0.900000 0.000000 0.900000 ( 0.900010)
push_shift 500 2 1.180000 0.000000 1.180000 ( 1.181661)
unshift_pop 500 2 0.700000 0.000000 0.700000 ( 0.694545)
push_shift 500 5 0.660000 0.000000 0.660000 ( 0.664302)
unshift_pop 500 5 0.560000 0.000000 0.560000 ( 0.563241)
push_shift 500 10 0.480000 0.000000 0.480000 ( 0.482299)
unshift_pop 500 10 0.510000 0.000000 0.510000 ( 0.515048)
push_shift 1000 1 2.980000 0.000000 2.980000 ( 2.984535)
unshift_pop 1000 1 1.050000 0.000000 1.050000 ( 1.049828)
push_shift 1000 2 1.640000 0.000000 1.640000 ( 1.646758)
unshift_pop 1000 2 0.840000 0.000000 0.840000 ( 0.835186)
push_shift 1000 5 0.830000 0.000000 0.830000 ( 0.834429)
unshift_pop 1000 5 0.700000 0.000000 0.700000 ( 0.704193)
push_shift 1000 10 0.560000 0.000000 0.560000 ( 0.564654)
unshift_pop 1000 10 0.660000 0.000000 0.660000 ( 0.656317)
@mpapis
Copy link

mpapis commented Jul 2, 2012

I wanted to add it to RVM, can it be used for all 1.9.3 patchlevels or it's only for -p194 ?

@funny-falcon
Copy link
Author

@mpapis , I had no test it against previous patch levels. I could do it and provide fixed patches, if you wish.
I may merge it into falcon.patch, if you want.

@mpapis
Copy link

mpapis commented Jul 2, 2012

we do not want to promote older patch levels - if it was not tested on older patchlevels do not waste your time on it,

but if that could be merged to the falcon patch for -p194 - that would be awesome - and thank you for the great work!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment