public
Last active

Patch against ruby-1.9.3-p0 to futher improve load.c

  • Download Gist
load.c.patch
Diff
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
diff --git a/load.c b/load.c
index 0ff4b60..019ccb9 100644
--- a/load.c
+++ b/load.c
@@ -18,6 +18,7 @@ VALUE ruby_dln_librefs;
#define IS_DLEXT(e) (strcmp((e), DLEXT) == 0)
#endif
+static int sorted_loaded_features = 1;
static const char *const loadable_ext[] = {
".rb", DLEXT,
@@ -129,12 +130,16 @@ loaded_feature_path_i(st_data_t v, st_data_t b, st_data_t f)
return ST_STOP;
}
+static int rb_feature_first_equal_or_greater(VALUE, const char *, long);
+static int rb_stop_search_feature(VALUE, const char *, long);
+static int feature_basename_length(const char *, long);
+
static int
rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const char **fn)
{
VALUE v, features, p, load_path = 0;
const char *f, *e;
- long i, len, elen, n;
+ long i, len, elen, n, flen;
st_table *loading_tbl;
st_data_t data;
int type;
@@ -151,8 +156,10 @@ rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const c
type = 0;
}
features = get_loaded_features();
- for (i = 0; i < RARRAY_LEN(features); ++i) {
+ i = rb_feature_first_equal_or_greater(features, feature, len);
+ for (; i < RARRAY_LEN(features); ++i) {
v = RARRAY_PTR(features)[i];
+ if (rb_stop_search_feature(v, feature, len)) break;
f = StringValuePtr(v);
if ((n = RSTRING_LEN(v)) < len) continue;
if (strncmp(f, feature, len) != 0) {
@@ -176,14 +183,14 @@ rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const c
}
}
loading_tbl = get_loading_table();
- if (loading_tbl) {
+ if (loading_tbl && loading_tbl->num_entries > 0) {
f = 0;
if (!expanded) {
struct loaded_feature_searching fs;
fs.name = feature;
fs.len = len;
fs.type = type;
- fs.load_path = load_path ? load_path : rb_get_load_path();
+ fs.load_path = load_path ? load_path : rb_get_expanded_load_path();
fs.result = 0;
st_foreach(loading_tbl, loaded_feature_path_i, (st_data_t)&fs);
if ((f = fs.result) != 0) {
@@ -251,6 +258,174 @@ rb_feature_provided(const char *feature, const char **loading)
return FALSE;
}
+static int
+feature_basename_length(const char *feature, long flen)
+{
+ if (sorted_loaded_features) {
+ const char *ext;
+ for (ext = feature + (flen - 1); ext >= feature; ext--) {
+ if (*ext == '.') return ext - feature;
+ if (*ext == '/') return flen;
+ }
+ return flen;
+ } else {
+ return 0;
+ }
+}
+
+static int
+compare_feature_name(const char *left, long llen, const char *right, long rlen)
+{
+ int diff = 0;
+ while (llen-- && rlen--) {
+ diff = left[llen] - right[rlen];
+ if (diff) break;
+ if (left[llen] == '/') break;
+ }
+ return diff;
+}
+
+static int
+rb_compare_feature_name(VALUE loaded, const char *feature, long flen)
+{
+ const char *loaded_name = StringValuePtr(loaded);
+ long loaded_len = feature_basename_length(loaded_name, RSTRING_LEN(loaded));
+ return compare_feature_name(loaded_name, loaded_len, feature, flen);
+}
+
+/* used to find when equal features run out */
+static int
+rb_stop_search_feature(VALUE loaded, const char *feature, long flen)
+{
+ if (sorted_loaded_features)
+ return rb_compare_feature_name(loaded, feature, flen) > 0;
+ else
+ return FALSE;
+}
+
+/* returns first position to search feature from */
+static int
+rb_feature_first_equal_or_greater(VALUE features, const char *feature, long flen)
+{
+ if (sorted_loaded_features) {
+ int before = 0, first = RARRAY_LEN(features);
+ VALUE *values = RARRAY_PTR(features);
+ if (first == 0)
+ return 0;
+ if (rb_compare_feature_name(values[0], feature, flen) >= 0)
+ return 0;
+
+ while (first - before > 1) {
+ int mid = (first + before) / 2;
+ int cmp = rb_compare_feature_name(values[mid], feature, flen);
+ if (cmp >= 0)
+ first = mid;
+ else
+ before = mid;
+ }
+ return first;
+ } else {
+ return 0;
+ }
+}
+
+/* returns position to insert new feature in */
+static int
+rb_feature_first_greater(VALUE features, const char *feature, long flen)
+{
+ if (sorted_loaded_features) {
+ int before = 0, first = RARRAY_LEN(features);
+ VALUE *values = RARRAY_PTR(features);
+ if (first == 0)
+ return 0;
+ if (rb_compare_feature_name(values[0], feature, flen) > 0)
+ return 0;
+ if (rb_compare_feature_name(values[first-1], feature, flen) <= 0)
+ return first;
+
+ while (first - before > 1) {
+ int mid = (first + before) / 2;
+ int cmp = rb_compare_feature_name(values[mid], feature, flen);
+ if (cmp > 0)
+ first = mid;
+ else
+ before = mid;
+ }
+ return first;
+ } else {
+ return RARRAY_LEN(features);
+ }
+}
+
+
+static VALUE
+rb_push_feature_1(VALUE features, VALUE feature)
+{
+ const char *fname = StringValuePtr(feature);
+ long flen = feature_basename_length(fname, RSTRING_LEN(feature));
+ int i = rb_feature_first_greater(features, fname, flen);
+ rb_ary_push(features, feature);
+ if ( i < RARRAY_LEN(features) - 1 ) {
+ MEMMOVE(RARRAY_PTR(features) + i + 1, RARRAY_PTR(features) + i,
+ VALUE, RARRAY_LEN(features) - i - 1);
+ RARRAY_PTR(features)[i] = feature;
+ }
+ return features;
+}
+
+static VALUE
+rb_push_feature_m(int argc, VALUE *argv, VALUE features)
+{
+ while (argc--) {
+ rb_push_feature_1(features, *argv++);
+ }
+ return features;
+}
+
+static VALUE
+rb_concat_features(VALUE features, VALUE add)
+{
+ add = rb_convert_type(add, T_ARRAY, "Array", "to_ary");
+ if (RARRAY_LEN(add)) {
+ rb_push_feature_m(RARRAY_LEN(add), RARRAY_PTR(add), features);
+ }
+ return features;
+}
+static const char *load_features_undefined_methods[] = {
+ "[]=", "reverse!", "rotate!", "sort!", "sort_by!",
+ "collect!", "map!", "shuffle!", "fill", "insert",
+ NULL
+};
+
+static VALUE
+rb_loaded_features_init(void)
+{
+ char *sorted_flag;
+ const char **name;
+ VALUE loaded_features = rb_ary_new();
+ VALUE loaded_features_c = rb_singleton_class(loaded_features);
+
+ sorted_flag = getenv("RUBY_LOADED_FEATURES_SORTED");
+ if (sorted_flag != NULL) {
+ int sorted_set = atoi(sorted_flag);
+ if (RTEST(ruby_verbose))
+ fprintf(stderr, "sorted_loaded_features=%d (%d)\n", sorted_set, sorted_loaded_features);
+ sorted_loaded_features = sorted_set;
+ }
+
+ for(name = load_features_undefined_methods; *name; name++) {
+ rb_undef_method(loaded_features_c, *name);
+ }
+
+ if (sorted_loaded_features) {
+ rb_define_method(loaded_features_c, "<<", rb_push_feature_1, 1);
+ rb_define_method(loaded_features_c, "push", rb_push_feature_m, -1);
+ rb_define_method(loaded_features_c, "concat", rb_concat_features, 1);
+ rb_define_method(loaded_features_c, "unshift", rb_push_feature_m, -1);
+ }
+ return loaded_features;
+}
+
static void
rb_provide_feature(VALUE feature)
{
@@ -258,7 +433,10 @@ rb_provide_feature(VALUE feature)
rb_raise(rb_eRuntimeError,
"$LOADED_FEATURES is frozen; cannot append feature");
}
- rb_ary_push(get_loaded_features(), feature);
+ if (sorted_loaded_features)
+ rb_push_feature_1(get_loaded_features(), feature);
+ else
+ rb_ary_push(get_loaded_features(), feature);
}
void
@@ -776,7 +954,7 @@ Init_load()
rb_define_virtual_variable("$\"", get_loaded_features, 0);
rb_define_virtual_variable("$LOADED_FEATURES", get_loaded_features, 0);
- vm->loaded_features = rb_ary_new();
+ vm->loaded_features = rb_loaded_features_init();
rb_define_global_function("load", rb_f_load, -1);
rb_define_global_function("require", rb_f_require, 1);

It improves loading of not too complex rails application from 6.4 seconds to 5.6 seconds.
Pull request https://github.com/ruby/ruby/pull/51

what do you mean "not too complex rails applications"?

can you explain in your words what this improves?

can you maybe perjaps comment your code?

"not too complex" means: ~20 models, ~15 controllers, not big bag of gems.

Currently require loops throw all loaded features to discover: is requested feature is already loaded?
There were other way cause loaded_features were unsorted.
With this patch loaded_features id keep sorted by filename without extension ('/usr/lib/ruby/asdf.rb'=>'asdf'), so that, checking could use binary search.

Currently it sorts by reversed filename ('/usr/lib/ruby/asdf.rb' => 'fdsa'). It simplifies code for me a bit, but I think is not necessary.

I understand that binary search can lead to a performance improvement, as it changes an O(n) operation into a O(log(n)) operation.
What is unclear to me: will it always return the same result as the original implementation?
Also, I don't quite understand the interaction of require and changing the loadpath dynamically in ruby.
What's the result of changing the loadpath in such a way that

require "a"
would load a different file when it is called a second time?

Patch preserves load_path checking, it only allows to skip fast all loaded_features with different basename (filename without directory path and extension), but still checks those, whose basename is equal (that is why I only add lines to rb_feature_p without any deletion). So that, there will be no any mistake in second require "a"

Patch applies cleanly, builds on my Win7 32bit using TDM-GCC 4.6.1 at ruby 1.9.3dev (2011-10-11 revision 33457) [i386-mingw32] and passes all make test-all TESTS='openssl fiddle psych' && make test

:)

Will try to get you some more benchmarking data using some custom stuff from awhile ago:

https://github.com/jonforums/measurements/blob/master/workloads/core_require_empty.rb
https://github.com/jonforums/measurements/blob/master/workloads/core_require_nested.rb

I see I was clearing out $LOADED_FEATURES in these tests and I've got to remember why I did that other than trying to force a reload via require to measure a full require code path

https://github.com/jonforums/measurements/blob/master/lib/inquisitor.rb#L78-103

Frankly, I think those are irrelevant to your patch and I need to create another benchmark. Agree?

Frankly, I think those are irrelevant to your patch and I need to create another benchmark. Agree?

@jonforums, you may if you want :)

By the way: I only overloaded #<< and #push methods, and #push overloaded as getting strictly one parameter (as #<<), cause I hadn't meet other usage of it. Should #push be overloaded correctly? Should some other methods be overloaded? I see you use #concat, but just to replace with previous content of $LOADED_FEATURES - this way it's safe. Should #concat be overloaded to be safe in any usage? What about #[]=? It seems that this methods are not used anywhere.

@skaes, I check the case, second time other file is loaded.

Considering ./tmp/1.rb and ./tmp2/1.rb exist, following prints true twice:

pwd = `pwd`.chomp
$LOAD_PATH.unshift pwd + '/tmp'
require '1'
puts $LOADED_FEATURES.include?(pwd + '/tmp/1.rb')
$LOAD_PATH.shift   # without it neither raw ruby 1.9.2 neither patched version will not try to /tmp2/1.rb
$LOAD_PATH.unshift pwd + '/tmp2'
require '1'
puts $LOADED_FEATURES.include?(pwd + '/tmp2/1.rb')

@jonforums, I'm glad to see such improvement :) it is impressive even for me :)

fyi, just did a quick google code search and found a few usages of #concat

http://www.google.com/codesearch#search/&q=%5C$:%5C.concat%20lang:%5Eruby$&type=cs

@jonforums, $: is $LOAD_PATH . $LOADED_FEATURES is $"
It seems, that $LOADED_FEATURES.concat used only by jruby in a same safe way as you - replacing with previous content in testing load.

I measured the effect on a rails application using 'rails runner 1'.
Got only a tiny improvement: down to 1.63 seconds from 1.67 seconds.
Doesn't seem to justify the extra complexity introduced by the patch.

Decided not to integrate this into my patchsets, but made it available on a branch: https://github.com/skaes/rvm-patchsets/tree/keep-loaded-features-sorted

@skaes Could you paste output of rails runner 'puts $LOADED_FEATURES.size' ? My application prints about 1900 and improvement is about 16% over stock ruby 1.9.3.

@skaes that is why improvement is so small in your case. Could you test patch with larger application?
@eregon made a good test https://gist.github.com/1278881 which shows that improvement grows quick with number of required files.

Don't have a bigger application right now. And probably won't have one for a
while. We're in the process of converting a big one to rails 3, but we're
not there yet. And first we need to move data centers :(

On Tue, Oct 25, 2011 at 10:35 AM, Sokolov Yura <
reply@reply.github.com>wrote:

@skaes that is why improvement is so small in your case. Could you test
patch with larger application?
@eregon made a good test https://gist.github.com/1278881 which shows that
improvement grows quick with number of required files.

Reply to this email directly or view it on GitHub:
https://gist.github.com/1272991

@skaes sorry, my application's $LOADED_FEATURES.size is nere 900 files (892 in development and 982 in production envirionmetn), not 1900, and still patch's improvement is about 17-19%: from 5.6 seconds down to 4.7 seconds in development and 6.27 down to 5.15 seconds in production environment (AthlonX2 4000+ running on 2.1Ghz)

Is there a version of the patch for 1.8.7? I have a much larger 1.8.7 app.

@skaes I made same patch for 1.8.7 but it gains no improvement cause ruby 1.8.7 do much less work on each iteration of checking file loop.

now it is configurable with RUBY_LOADED_FEATURES_SORTED environment variable: RUBY_LOADED_FEATURES_SORTED=0 turns behaviour to similar with unpatched ruby.

getting corrupt patch when trying git apply on ruby_1_9_3

C:\Users\Jon\Documents\RubyDev\ruby-git>git log -3 --oneline
8f29ff7 * 2011-10-30
2c1d1fb Bump version
14c3e60 Skip too heavy test.

C:\Users\Jon\Documents\RubyDev\ruby-git>git apply --check --verbose load.c.patch
fatal: corrupt patch at line 248

@jonforums issue were in a newline character only. fixed.

I have a report from one man (@saks) about it's application startup time with and without patch:

$ time ./bin/rails runner 'puts'

1.9.3-p0 without patch:
real 0m22.683s
user 0m21.417s
sys 0m0.956s

with patch:
real 0m17.806s
user 0m16.633s
sys 0m0.892s

thanks. looks fine.

git diff --check can't wait to meet you ;)

C:\Users\Jon\Documents\RubyDev\ruby-git>git apply --verbose load.c.patch
load.c.patch:92: trailing whitespace.
/* used to find when equal features run out */
Checking patch load.c...
Applied patch load.c cleanly.
warning: 1 line adds whitespace errors.

Aaaa ok, another trailing space... fixed

$ git checkout load.c
$ wget https://gist.github.com/raw/1272991/02bea3fef4ac3ff0eac1e8571225399842205acc/load.c.patch
--2011-11-03 09:41:13--  https://gist.github.com/raw/1272991/02bea3fef4ac3ff0eac1e8571225399842205acc/load.c.patch
Resolving gist.github.com... 207.97.227.243
Connecting to gist.github.com|207.97.227.243|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.github.com/gist/1272991/02bea3fef4ac3ff0eac1e8571225399842205acc/load.c.patch [following]
--2011-11-03 09:41:14--  https://raw.github.com/gist/1272991/02bea3fef4ac3ff0eac1e8571225399842205acc/load.c.patch
Resolving raw.github.com... 207.97.227.243
Connecting to raw.github.com|207.97.227.243|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7159 (7,0K) [text/plain]
Saving to: `load.c.patch'

100%[===================================================================================================================>] 7 159       --.-K/s   in 0s      

2011-11-03 09:41:15 (173 MB/s) - `load.c.patch' saved [7159/7159]

$ git apply --verbose load.c.patch
Checking patch load.c...
Applied patch load.c cleanly.

@skaes

I measured the effect on a rails application using 'rails runner 1'.
Got only a tiny improvement: down to 1.63 seconds from 1.67 seconds.
Doesn't seem to justify the extra complexity introduced by the patch.
I just retest patch against 1.9.2-p260 on clear rails application (rails new test_load) I've got following results (ubuntu 11.04 AthlonX2 on 2.1GHz):
Before 06-keep-loaded-features-sorted.patch

$ time rails runner 'puts $LOADED_FEATURES.size'
579

real    0m3.077s
user    0m2.660s
sys 0m0.420s

After patch

$ time rails runner 'puts $LOADED_FEATURES.size'
579

real    0m2.834s
user    0m2.436s
sys 0m0.400s

It seems that difference should be at least 8% on such small application. How did you get only 2.5% percent difference?

install it under rvm:

rvm install ruby-1.9.3-p0 --patch load.c.patch -n patched

Fixed bug, which prevents gem update --system.
Actually, I think this is hidden bug in stock ruby. I filled pull request https://github.com/ruby/ruby/pull/63 and ticket in redmine http://redmine.ruby-lang.org/issues/5727 .

Cumulative patch: this and "cached expanded load path" https://gist.github.com/1484985

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.