Skip to content

Instantly share code, notes, and snippets.

@funny-falcon
Created December 15, 2011 08:47
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save funny-falcon/1480404 to your computer and use it in GitHub Desktop.
Cache expanded_load_patch (patch against ruby-1.9.3-p0)
From 9088356975d13a9a7fbe6832dfd56a8b2ff7b21d Mon Sep 17 00:00:00 2001
From: Sokolov Yura <funny.falcon@gmail.com>
Date: Wed, 18 Jan 2012 14:01:59 +0400
Subject: [PATCH] cached load_path patch
This patch add caching of expanded $LOAD_PATH elements.
It invalidates cache on current directory change and
filesystem encoding change.
---
internal.h | 2 +
load.c | 273 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
ruby.c | 8 +-
vm.c | 1 +
vm_core.h | 1 +
5 files changed, 262 insertions(+), 23 deletions(-)
diff --git a/internal.h b/internal.h
index 172e7f4..11e4d30 100644
--- a/internal.h
+++ b/internal.h
@@ -108,6 +108,8 @@ VALUE rb_iseq_clone(VALUE iseqval, VALUE newcbase);
/* load.c */
VALUE rb_get_load_path(void);
+void rb_reset_expanded_cache();
+void rb_load_path_ary_push(VALUE path);
/* math.c */
VALUE rb_math_atan2(VALUE, VALUE);
diff --git a/load.c b/load.c
index 0ff4b60..da3f7d4 100644
--- a/load.c
+++ b/load.c
@@ -4,6 +4,7 @@
#include "ruby/ruby.h"
#include "ruby/util.h"
+#include "ruby/encoding.h"
#include "internal.h"
#include "dln.h"
#include "eval_intern.h"
@@ -27,28 +28,44 @@ static const char *const loadable_ext[] = {
0
};
-VALUE
-rb_get_load_path(void)
-{
- VALUE load_path = GET_VM()->load_path;
- return load_path;
-}
+static VALUE rb_checked_expanded_cache(int*);
+static void rb_set_expanded_cache(VALUE, int);
+static VALUE rb_expand_load_paths(int, VALUE*, int*);
+static int cached_expanded_load_path = 1;
VALUE
rb_get_expanded_load_path(void)
{
- VALUE load_path = rb_get_load_path();
- VALUE ary;
- long i;
+ VALUE expanded = rb_checked_expanded_cache(NULL);
- ary = rb_ary_new2(RARRAY_LEN(load_path));
- for (i = 0; i < RARRAY_LEN(load_path); ++i) {
- VALUE path = rb_file_expand_path(RARRAY_PTR(load_path)[i], Qnil);
- rb_str_freeze(path);
- rb_ary_push(ary, path);
+ if ( !RTEST(expanded) ) {
+ VALUE load_path = GET_VM()->load_path;
+ int has_relative = 0;
+
+ if (!load_path) return 0;
+
+ expanded = rb_expand_load_paths(
+ RARRAY_LEN(load_path), RARRAY_PTR(load_path),
+ &has_relative);
+ RB_GC_GUARD(load_path);
+
+ if (cached_expanded_load_path) {
+ rb_set_expanded_cache(expanded, has_relative);
+ }
+ } else {
+ expanded = rb_ary_dup(expanded);
}
- rb_obj_freeze(ary);
- return ary;
+ return expanded;
+}
+
+VALUE
+rb_get_load_path(void)
+{
+ VALUE load_path =
+ cached_expanded_load_path ?
+ rb_get_expanded_load_path():
+ GET_VM()->load_path;
+ return load_path;
}
static VALUE
@@ -183,7 +200,7 @@ rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const c
fs.name = feature;
fs.len = len;
fs.type = type;
- fs.load_path = load_path ? load_path : rb_get_load_path();
+ fs.load_path = load_path ? load_path : rb_get_expanded_load_path();
fs.result = 0;
st_foreach(loading_tbl, loaded_feature_path_i, (st_data_t)&fs);
if ((f = fs.result) != 0) {
@@ -760,6 +777,226 @@ rb_f_autoload_p(VALUE obj, VALUE sym)
return rb_mod_autoload_p(klass, sym);
}
+/* $LOAD_PATH methods which invalidates cache */
+static const char *load_path_reset_cache_methods[] = {
+ "[]=", "collect!", "compact!", "delete",
+ "delete_if", "fill", "flatten!", "insert", "keep_if",
+ "map!", "reject!", "replace", "select!", "shuffle!",
+ "sort!", "sort_by!", "uniq!", NULL
+};
+
+/* $LOAD_PATH methods which sends also to cache */
+static const char *load_path_apply_to_cache_methods[] = {
+ "clear", "delete_at", "pop", "reverse!", "rotate!",
+ "shift", "slice!", NULL
+};
+
+/* $LOAD_PATH methods which sends to cache whith expanded arguments */
+static const char *load_path_apply_expanded_methods[] = {
+ "<<", "push", "unshift", NULL
+};
+
+void
+rb_reset_expanded_cache()
+{
+ GET_VM()->load_path_expanded_cache = 0;
+}
+
+static VALUE
+rb_load_path_expanded_cache()
+{
+ VALUE cache = GET_VM()->load_path_expanded_cache;
+ VALUE expanded = Qnil;
+ if (RTEST(cache)) {
+ expanded = RARRAY_PTR(cache)[2];
+ }
+ return expanded;
+}
+
+/* Return cache only if we still in the same working directory
+ * and filesystem_encoding didn't change
+ * Invalidate cache otherwise
+ */
+static VALUE
+rb_checked_expanded_cache(int *has_relative)
+{
+ VALUE cache = GET_VM()->load_path_expanded_cache;
+ VALUE expanded = Qnil;
+ if (RTEST(cache)) {
+ VALUE curwd = RARRAY_PTR(cache)[0];
+ VALUE encindex = RARRAY_PTR(cache)[1];
+ int cache_valid = rb_filesystem_encindex() == FIX2INT(encindex);
+
+ if ( cache_valid ) {
+ cache_valid = curwd == Qtrue;
+ if (has_relative) {
+ *has_relative = cache_valid;
+ }
+ if (!cache_valid ) {
+ char *cwd = my_getcwd();
+ cache_valid = !strcmp(RSTRING_PTR(curwd), cwd);
+ xfree(cwd);
+ }
+ }
+
+ if ( !cache_valid ) {
+ rb_reset_expanded_cache();
+ } else {
+ expanded = RARRAY_PTR(cache)[2];
+ }
+ }
+ RB_GC_GUARD(cache);
+ return expanded;
+}
+
+static void
+rb_set_expanded_cache(VALUE expanded, int has_relative)
+{
+ VALUE cache = rb_ary_new2(3);
+
+ if (has_relative) {
+ char *cwd = my_getcwd();
+ rb_ary_push(cache, rb_str_new_cstr(cwd));
+ xfree(cwd);
+ } else {
+ rb_ary_push(cache, Qtrue);
+ }
+
+ rb_ary_push(cache, INT2FIX(rb_filesystem_encindex()));
+ rb_ary_push(cache, rb_ary_dup(expanded));
+ GET_VM()->load_path_expanded_cache = cache;
+}
+
+static VALUE
+rb_expand_load_paths(int pathc, VALUE* paths, int *has_relative)
+{
+ int i;
+ const char *p;
+ VALUE path, expanded = rb_ary_new2(pathc);
+
+ for(i = 0; i < pathc; i++) {
+ path = rb_get_path(paths[i]);
+ p = RSTRING_PTR(path);
+ *has_relative = *has_relative || !rb_is_absolute_path(p);
+ path = rb_file_expand_path(path, Qnil);
+ rb_str_freeze(path);
+ rb_ary_push(expanded, path);
+ }
+
+ return expanded;
+}
+
+/* Invalidating $LOAD_PATH methods implementation */
+static VALUE
+rb_load_path_reset_cache_method(int argc, VALUE *argv, VALUE self)
+{
+ rb_reset_expanded_cache();
+ return rb_call_super(argc, argv);
+}
+
+/* Proxying $LOAD_PATH methods implementation */
+static VALUE
+rb_load_path_apply_to_cache_method(int argc, VALUE *argv, VALUE self)
+{
+ VALUE load_path_expanded = rb_load_path_expanded_cache();
+ if (RTEST(load_path_expanded)) {
+ ID func = rb_frame_this_func();
+ rb_funcall2(load_path_expanded, func, argc, argv);
+ }
+ return rb_call_super(argc, argv);
+}
+
+/* Proxying with expansion $LOAD_PATH methods implementation */
+static VALUE
+rb_load_path_apply_expanded_method(int argc, VALUE *argv, VALUE self)
+{
+ int old_has_relative = 0;
+ /* We call methods on cache only if we still in the same working directory */
+ VALUE load_path_expanded = rb_checked_expanded_cache(&old_has_relative);
+ if (RTEST(load_path_expanded)) {
+ int has_relative = 0;
+ ID func = rb_frame_this_func();
+ VALUE expanded = rb_expand_load_paths(argc, argv, &has_relative);
+
+ rb_funcall2(load_path_expanded, func, argc, RARRAY_PTR(expanded));
+
+ if (!old_has_relative && has_relative) {
+ rb_set_expanded_cache(load_path_expanded, has_relative);
+ }
+ RB_GC_GUARD(expanded);
+ }
+ return rb_call_super(argc, argv);
+}
+/* $LOAD_PATH.concat(ary) - special, we call push(*ary) instead
+ * cause I'm lazy a bit and wish not to rewrite method above second time :)
+ */
+static VALUE
+rb_load_path_concat(VALUE self, VALUE ary)
+{
+ ID push;
+ CONST_ID(push, "push");
+ RB_GC_GUARD(ary);
+ return rb_funcall2(self, push, RARRAY_LEN(ary), RARRAY_PTR(ary));
+}
+
+void
+rb_load_path_ary_push(VALUE path)
+{
+ int old_has_relative = 0;
+ VALUE load_path_expanded = rb_checked_expanded_cache(&old_has_relative);
+ if (RTEST(load_path_expanded)) {
+ int has_relative = 0;
+ VALUE expanded = rb_expand_load_paths(1, &path, &has_relative);
+
+ rb_ary_push(load_path_expanded, RARRAY_PTR(expanded)[0]);
+
+ if (!old_has_relative && has_relative) {
+ rb_set_expanded_cache(load_path_expanded, has_relative);
+ }
+ RB_GC_GUARD(expanded);
+ }
+
+ rb_ary_push(GET_VM()->load_path, path);
+}
+
+static VALUE
+rb_load_path_init(void)
+{
+ const char **name;
+ VALUE load_path = rb_ary_new();
+ char *cached_flag;
+
+ cached_flag = getenv("RUBY_CACHED_LOAD_PATH");
+ if (cached_flag != NULL) {
+ cached_expanded_load_path = atoi(cached_flag);
+ }
+
+ /* Do all the magick if user did not disable it
+ * with RUBY_CACHED_LOAD_PATH=0 environment variable
+ */
+ if (cached_expanded_load_path) {
+ VALUE load_path_c = rb_singleton_class(load_path);
+
+ for(name = load_path_reset_cache_methods; *name; name++ ) {
+ rb_define_method(load_path_c, *name, rb_load_path_reset_cache_method, -1);
+ }
+
+ for(name = load_path_apply_to_cache_methods; *name; name++ ) {
+ rb_define_method(load_path_c, *name, rb_load_path_apply_to_cache_method, -1);
+ }
+
+ for(name = load_path_apply_expanded_methods; *name; name++ ) {
+ rb_define_method(load_path_c, *name, rb_load_path_apply_expanded_method, -1);
+ }
+
+ rb_define_method(load_path_c, "concat", rb_load_path_concat, 1);
+ }
+
+ rb_reset_expanded_cache();
+
+ return load_path;
+}
+
void
Init_load()
{
@@ -772,7 +1009,7 @@ Init_load()
rb_define_hooked_variable(var_load_path, (VALUE*)vm, load_path_getter, rb_gvar_readonly_setter);
rb_alias_variable(rb_intern("$-I"), id_load_path);
rb_alias_variable(rb_intern("$LOAD_PATH"), id_load_path);
- vm->load_path = rb_ary_new();
+ vm->load_path = rb_load_path_init();
rb_define_virtual_variable("$\"", get_loaded_features, 0);
rb_define_virtual_variable("$LOADED_FEATURES", get_loaded_features, 0);
diff --git a/ruby.c b/ruby.c
index b53784f..0897400 100644
--- a/ruby.c
+++ b/ruby.c
@@ -209,7 +209,6 @@ push_include(const char *path, VALUE (*filter)(VALUE))
{
const char sep = PATH_SEP_CHAR;
const char *p, *s;
- VALUE load_path = GET_VM()->load_path;
p = path;
while (*p) {
@@ -217,7 +216,7 @@ push_include(const char *path, VALUE (*filter)(VALUE))
p++;
if (!*p) break;
for (s = p; *s && *s != sep; s = CharNext(s));
- rb_ary_push(load_path, (*filter)(rubylib_mangled_path(p, s - p)));
+ rb_load_path_ary_push((*filter)(rubylib_mangled_path(p, s - p)));
p = s;
}
}
@@ -338,7 +337,6 @@ ruby_init_loadpath(void)
void
ruby_init_loadpath_safe(int safe_level)
{
- VALUE load_path;
ID id_initial_load_path_mark;
extern const char ruby_initial_load_paths[];
const char *paths = ruby_initial_load_paths;
@@ -438,7 +436,6 @@ ruby_init_loadpath_safe(int safe_level)
#define RUBY_RELATIVE(path, len) rubylib_mangled_path((path), (len))
#define PREFIX_PATH() RUBY_RELATIVE(exec_prefix, sizeof(exec_prefix)-1)
#endif
- load_path = GET_VM()->load_path;
if (safe_level == 0) {
#ifdef MANGLED_PATH
@@ -452,7 +449,7 @@ ruby_init_loadpath_safe(int safe_level)
size_t len = strlen(paths);
VALUE path = RUBY_RELATIVE(paths, len);
rb_ivar_set(path, id_initial_load_path_mark, path);
- rb_ary_push(load_path, path);
+ rb_load_path_ary_push(path);
paths += len + 1;
}
@@ -1349,6 +1346,7 @@ process_options(int argc, char **argv, struct cmdline_options *opt)
for (i = 0; i < RARRAY_LEN(load_path); ++i) {
rb_enc_associate(RARRAY_PTR(load_path)[i], lenc);
}
+ rb_reset_expanded_cache();
}
if (!(opt->disable & DISABLE_BIT(gems))) {
#if defined DISABLE_RUBYGEMS && DISABLE_RUBYGEMS
diff --git a/vm.c b/vm.c
index 2d7e15c..d1fe744 100644
--- a/vm.c
+++ b/vm.c
@@ -1575,6 +1575,7 @@ rb_vm_mark(void *ptr)
RUBY_MARK_UNLESS_NULL(vm->thgroup_default);
RUBY_MARK_UNLESS_NULL(vm->mark_object_ary);
RUBY_MARK_UNLESS_NULL(vm->load_path);
+ RUBY_MARK_UNLESS_NULL(vm->load_path_expanded_cache);
RUBY_MARK_UNLESS_NULL(vm->loaded_features);
RUBY_MARK_UNLESS_NULL(vm->top_self);
RUBY_MARK_UNLESS_NULL(vm->coverages);
diff --git a/vm_core.h b/vm_core.h
index 0dda1c4..f4dc58a 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -298,6 +298,7 @@ typedef struct rb_vm_struct {
/* load */
VALUE top_self;
VALUE load_path;
+ VALUE load_path_expanded_cache;
VALUE loaded_features;
struct st_table *loading_table;
--
1.7.4.1
@funny-falcon
Copy link
Author

Timing:

With patch turned off

$ time RUBY_CACHED_LOAD_PATH=0 rails runner 'puts $:.size, $".size'
66
933

real    0m7.330s
user    0m6.916s
sys 0m0.380s

$ time RUBY_CACHED_LOAD_PATH=0 RAILS_ENV=production rails runner 'puts $:.size, $".size'
66
1026

real    0m8.040s
user    0m7.464s
sys 0m0.428s

With patch turned on:

$ time rails runner 'puts $:.size, $".size'
66
933

real    0m5.367s
user    0m4.980s
sys 0m0.364s

$ time RAILS_ENV=production rails runner 'puts $:.size, $".size'
66
1026

real    0m5.859s
user    0m5.272s
sys 0m0.448s

@funny-falcon
Copy link
Author

install it under rvm:

rvm install ruby-1.9.3-p0 --patch cache_expanded_load_path.patch -n patched

@til
Copy link

til commented Dec 16, 2011

Wow, great speedup! Timings from a slighly larger rails project:

Without the patch:

 $ rbenv local 1.9.3-p0
 $ time bin/rails runner 'puts $:.size, $".size'
164
1709

real    0m13.232s
user    0m12.396s
sys     0m0.737s
 $ time RAILS_ENV=production bin/rails runner 'puts $:.size, $".size'
162
1456

real    0m11.588s
user    0m10.876s
sys     0m0.623s

With patch enabled:

 $ rbenv local 1.9.3-p0-cache-expanded-load-path
 $ time bin/rails runner 'puts $:.size, $".size'
164
1709

real    0m7.128s
user    0m6.316s
sys     0m0.717s
 $ time RAILS_ENV=production bin/rails runner 'puts $:.size, $".size'
162
1456

real    0m6.055s
user    0m5.330s
sys     0m0.627s

@funny-falcon
Copy link
Author

Cumulative patch: this and "sorted loaded features" https://gist.github.com/1484985

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment