funny-falcon/00-description.md

## 00-description.md

      
    Raw
  

              00-description.md
            
          
    This are parts of falcon patch for ruby-1.9.3-p327


01-backport-speedup-require.diff - backport of Greg Price's patch for speedup require.
For a long time my patch were famous cause of require speedup. ruby-core prefers slightly simpler patches from Greg Price,
and I wish not to compete with ruby-core, so that I just backport accepted changes.


02-st_opt.diff - speedup of Hash
This patch contains:

backport of accepted patch which compacts small hashes
ultrapacking for hashes of size 1
slab allocation for hash items


03-sparse_array.diff - speedup of method/constant/instance variable lookup
This patch introduce new hash structure instead of st_table for storing Constants, Methods and instance variable offsets.
This structure is more compact and suffers less from collisions that st_table.


04-ary_queue.diff - make Array really suitable as a queue
While there were a step in this direction in a ruby, this patch pushed it till the end.
Now push/shift pattern do not suffers from big queue size and high interleaving of push/shift.


05-backport-gc.diff - backport COW-friendly GC
Well, I update this patch so people could decide, worth it something or not.
I were disappointed with it cause it doesn't save much memory with big application and tuned garbage collector.


06-st_opt_sparse_array.diff - premerged 02 and 03


falcon.diff is a merge of 01, 04 and 06
falcon-gc.diff is a merge of falcon.diff and 05

  
## 01-backport-speedup-require.diff
diff --git a/array.c b/array.c
index e427cb3..cc15660 100644
--- a/array.c
+++ b/array.c
@@ -295,6 +295,33 @@ rb_ary_frozen_p(VALUE ary)
     return Qfalse;
 }

+/* This can be used to take a snapshot of an array (with
+   e.g. rb_ary_replace) and check later whether the array has been
+   modified from the snapshot.  The snapshot is cheap, though if
+   something does modify the array it will pay the cost of copying
+   it. */
+VALUE
+rb_ary_dup_of_p(VALUE ary1, VALUE ary2)
+{
+    VALUE *p1, *p2;
+    long len = RARRAY_LEN(ary1);
+
+    if (len != RARRAY_LEN(ary2)) return Qfalse;
+
+    p1 = RARRAY_PTR(ary1);
+    p2 = RARRAY_PTR(ary2);
+
+    if (ARY_EMBED_P(ary1) && ARY_EMBED_P(ary2)) {
+        for (; len; len--, p1++, p2++) {
+            if (*p1 != *p2) return Qfalse;
+        }
+        return Qtrue;
+    }
+
+    if (p1 == p2) return Qtrue;
+    return Qfalse;
+}
+
 static VALUE
 ary_alloc(VALUE klass)
 {
diff --git a/file.c b/file.c
index c1db6d7..3f465e5 100644
--- a/file.c
+++ b/file.c
@@ -148,40 +148,60 @@ file_path_convert(VALUE name)
     return name;
 }

-static VALUE
-rb_get_path_check(VALUE obj, int level)
+static rb_encoding *
+check_path_encoding(VALUE str)
+{
+    rb_encoding *enc = rb_enc_get(str);
+    if (!rb_enc_asciicompat(enc)) {
+	rb_raise(rb_eEncCompatError, "path name must be ASCII-compatible (%s): %s",
+		 rb_enc_name(enc), RSTRING_PTR(rb_str_inspect(str)));
+    }
+    return enc;
+}
+
+VALUE
+rb_get_path_check_to_string(VALUE obj, int level)
 {
     VALUE tmp;
     ID to_path;
-    rb_encoding *enc;

     if (insecure_obj_p(obj, level)) {
 	rb_insecure_operation();
     }

+    if (RB_TYPE_P(obj, T_STRING)) {
+	return obj;
+    }
     CONST_ID(to_path, "to_path");
     tmp = rb_check_funcall(obj, to_path, 0, 0);
     if (tmp == Qundef) {
 	tmp = obj;
     }
     StringValue(tmp);
+    return tmp;
+}

+VALUE
+rb_get_path_check_convert(VALUE obj, VALUE tmp, int level)
+{
     tmp = file_path_convert(tmp);
     if (obj != tmp && insecure_obj_p(tmp, level)) {
 	rb_insecure_operation();
     }
-    enc = rb_enc_get(tmp);
-    if (!rb_enc_asciicompat(enc)) {
-	tmp = rb_str_inspect(tmp);
-	rb_raise(rb_eEncCompatError, "path name must be ASCII-compatible (%s): %s",
-		 rb_enc_name(enc), RSTRING_PTR(tmp));
-    }

+    check_path_encoding(tmp);
     StringValueCStr(tmp);

     return rb_str_new4(tmp);
 }

+static VALUE
+rb_get_path_check(VALUE obj, int level)
+{
+    VALUE tmp = rb_get_path_check_to_string(obj, level);
+    return rb_get_path_check_convert(obj, tmp, level);
+}
+
 VALUE
 rb_get_path_no_checksafe(VALUE obj)
 {
@@ -3249,7 +3269,6 @@ rb_file_expand_path(VALUE fname, VALUE dname)
 VALUE
 rb_file_expand_path_fast(VALUE fname, VALUE dname)
 {
-    check_expand_path_args(fname, dname);
     return rb_file_expand_path_internal(fname, dname, 0, 0, EXPAND_PATH_BUFFER());
 }

@@ -5237,7 +5256,7 @@ rb_find_file_ext_safe(VALUE *filep, const char *const *ext, int safe_level)
 	rb_raise(rb_eSecurityError, "loading from non-absolute path %s", f);
     }

-    RB_GC_GUARD(load_path) = rb_get_load_path();
+    RB_GC_GUARD(load_path) = rb_get_expanded_load_path();
     if (!load_path) return 0;

     fname = rb_str_dup(*filep);
@@ -5302,7 +5321,7 @@ rb_find_file_safe(VALUE path, int safe_level)
 	rb_raise(rb_eSecurityError, "loading from non-absolute path %s", f);
     }

-    RB_GC_GUARD(load_path) = rb_get_load_path();
+    RB_GC_GUARD(load_path) = rb_get_expanded_load_path();
     if (load_path) {
 	long i;

diff --git a/hash.c b/hash.c
index fbd8237..8423558 100644
--- a/hash.c
+++ b/hash.c
@@ -1087,7 +1087,7 @@ clear_i(VALUE key, VALUE value, VALUE dummy)
  *
  */

-static VALUE
+VALUE
 rb_hash_clear(VALUE hash)
 {
     rb_hash_modify_check(hash);
diff --git a/include/ruby/intern.h b/include/ruby/intern.h
index 927b536..0781de8 100644
--- a/include/ruby/intern.h
+++ b/include/ruby/intern.h
@@ -56,6 +56,7 @@ VALUE rb_ary_tmp_new(long);
 void rb_ary_free(VALUE);
 void rb_ary_modify(VALUE);
 VALUE rb_ary_freeze(VALUE);
+VALUE rb_ary_dup_of_p(VALUE, VALUE);
 VALUE rb_ary_aref(int, VALUE*, VALUE);
 VALUE rb_ary_subseq(VALUE, long, long);
 void rb_ary_store(VALUE, long, VALUE);
@@ -440,6 +441,7 @@ VALUE rb_hash_lookup(VALUE, VALUE);
 VALUE rb_hash_lookup2(VALUE, VALUE, VALUE);
 VALUE rb_hash_fetch(VALUE, VALUE);
 VALUE rb_hash_aset(VALUE, VALUE, VALUE);
+VALUE rb_hash_clear(VALUE);
 VALUE rb_hash_delete_if(VALUE);
 VALUE rb_hash_delete(VALUE,VALUE);
 typedef VALUE rb_hash_update_func(VALUE newkey, VALUE oldkey, VALUE value);
diff --git a/internal.h b/internal.h
index 59c9284..f5af903 100644
--- a/internal.h
+++ b/internal.h
@@ -94,6 +94,8 @@ VALUE rb_home_dir(const char *user, VALUE result);
 VALUE rb_realpath_internal(VALUE basedir, VALUE path, int strict);
 VALUE rb_file_expand_path_fast(VALUE, VALUE);
 VALUE rb_file_expand_path_internal(VALUE, VALUE, int, int, VALUE);
+VALUE rb_get_path_check_to_string(VALUE, int);
+VALUE rb_get_path_check_convert(VALUE, VALUE, int);
 void Init_File(void);

 #ifdef _WIN32
@@ -119,6 +121,7 @@ VALUE rb_iseq_clone(VALUE iseqval, VALUE newcbase);

 /* load.c */
 VALUE rb_get_load_path(void);
+VALUE rb_get_expanded_load_path(void);

 /* math.c */
 VALUE rb_math_atan2(VALUE, VALUE);
diff --git a/load.c b/load.c
index 163ec4c..68caebc 100644
--- a/load.c
+++ b/load.c
@@ -34,21 +34,120 @@ rb_get_load_path(void)
     return load_path;
 }

-VALUE
-rb_get_expanded_load_path(void)
+enum expand_type {
+    EXPAND_ALL,
+    EXPAND_RELATIVE,
+    EXPAND_HOME,
+    EXPAND_NON_CACHE
+};
+
+/* Construct expanded load path and store it to cache.
+   We rebuild load path partially if the cache is invalid.
+   We don't cache non string object and expand it every time. We ensure that
+   string objects in $LOAD_PATH are frozen.
+ */
+static void
+rb_construct_expanded_load_path(int type, int *has_relative, int *has_non_cache)
 {
-    VALUE load_path = rb_get_load_path();
+    rb_vm_t *vm = GET_VM();
+    VALUE load_path = vm->load_path;
+    VALUE expanded_load_path = vm->expanded_load_path;
     VALUE ary;
     long i;
+    int level = rb_safe_level();

     ary = rb_ary_new2(RARRAY_LEN(load_path));
     for (i = 0; i < RARRAY_LEN(load_path); ++i) {
-	VALUE path = rb_file_expand_path_fast(RARRAY_PTR(load_path)[i], Qnil);
-	rb_str_freeze(path);
-	rb_ary_push(ary, path);
+	VALUE path, as_str, expanded_path;
+	int is_string, non_cache;
+	char *as_cstr;
+	as_str = path = RARRAY_PTR(load_path)[i];
+	is_string = RB_TYPE_P(path, T_STRING) ? 1 : 0;
+	non_cache = !is_string ? 1 : 0;
+	as_str = rb_get_path_check_to_string(path, level);
+	as_cstr = RSTRING_PTR(as_str);
+
+	if (!non_cache) {
+	    if ((type == EXPAND_RELATIVE &&
+		    rb_is_absolute_path(as_cstr)) ||
+		(type == EXPAND_HOME &&
+		    (!as_cstr[0] || as_cstr[0] != '~')) ||
+		(type == EXPAND_NON_CACHE)) {
+		    /* Use cached expanded path. */
+		    rb_ary_push(ary, RARRAY_PTR(expanded_load_path)[i]);
+		    continue;
+	    }
+	}
+	if (!*has_relative && !rb_is_absolute_path(as_cstr))
+	    *has_relative = 1;
+	if (!*has_non_cache && non_cache)
+	    *has_non_cache = 1;
+	/* Freeze only string object. We expand other objects every time. */
+	if (is_string)
+	    rb_str_freeze(path);
+	as_str = rb_get_path_check_convert(path, as_str, level);
+	expanded_path = rb_file_expand_path_fast(as_str, Qnil);
+	rb_str_freeze(expanded_path);
+	rb_ary_push(ary, expanded_path);
     }
     rb_obj_freeze(ary);
-    return ary;
+    vm->expanded_load_path = ary;
+    rb_ary_replace(vm->load_path_snapshot, vm->load_path);
+}
+
+static VALUE
+load_path_getcwd(void)
+{
+    char *cwd = my_getcwd();
+    VALUE cwd_str = rb_filesystem_str_new_cstr(cwd);
+    xfree(cwd);
+    return cwd_str;
+}
+
+VALUE
+rb_get_expanded_load_path(void)
+{
+    rb_vm_t *vm = GET_VM();
+    const VALUE non_cache = Qtrue;
+
+    if (!rb_ary_dup_of_p(vm->load_path_snapshot, vm->load_path)) {
+	/* The load path was modified. Rebuild the expanded load path. */
+	int has_relative = 0, has_non_cache = 0;
+	rb_construct_expanded_load_path(EXPAND_ALL, &has_relative, &has_non_cache);
+	if (has_relative) {
+	    vm->load_path_check_cache = load_path_getcwd();
+	}
+	else if (has_non_cache) {
+	    /* Non string object. */
+	    vm->load_path_check_cache = non_cache;
+	}
+	else {
+	    vm->load_path_check_cache = 0;
+	}
+    }
+    else if (vm->load_path_check_cache == non_cache) {
+	int has_relative = 1, has_non_cache = 1;
+	/* Expand only non-cacheable objects. */
+	rb_construct_expanded_load_path(EXPAND_NON_CACHE,
+					&has_relative, &has_non_cache);
+    }
+    else if (vm->load_path_check_cache) {
+	int has_relative = 1, has_non_cache = 1;
+	VALUE cwd = load_path_getcwd();
+	if (!rb_str_equal(vm->load_path_check_cache, cwd)) {
+	    /* Current working directory or filesystem encoding was changed.
+	       Expand relative load path and non-cacheable objects again. */
+	    vm->load_path_check_cache = cwd;
+	    rb_construct_expanded_load_path(EXPAND_RELATIVE,
+					    &has_relative, &has_non_cache);
+	}
+	else {
+	    /* Expand only tilde (User HOME) and non-cacheable objects. */
+	    rb_construct_expanded_load_path(EXPAND_HOME,
+					    &has_relative, &has_non_cache);
+	}
+    }
+    return vm->expanded_load_path;
 }

 static VALUE
@@ -63,12 +162,121 @@ get_loaded_features(void)
     return GET_VM()->loaded_features;
 }

+static void
+reset_loaded_features_snapshot(void)
+{
+    rb_vm_t *vm = GET_VM();
+    rb_ary_replace(vm->loaded_features_snapshot, vm->loaded_features);
+}
+
+static VALUE
+get_loaded_features_index_raw(void)
+{
+    return GET_VM()->loaded_features_index;
+}
+
 static st_table *
 get_loading_table(void)
 {
     return GET_VM()->loading_table;
 }

+static void
+features_index_add_single(VALUE short_feature, VALUE offset)
+{
+    VALUE features_index, this_feature_index;
+    features_index = get_loaded_features_index_raw();
+    if ((this_feature_index = rb_hash_lookup(features_index, short_feature)) == Qnil) {
+	this_feature_index = rb_ary_new();
+	rb_hash_aset(features_index, short_feature, this_feature_index);
+    }
+    rb_ary_push(this_feature_index, offset);
+}
+
+/* Add to the loaded-features index all the required entries for
+   `feature`, located at `offset` in $LOADED_FEATURES.  We add an
+   index entry at each string `short_feature` for which
+     feature == "#{prefix}#{short_feature}#{e}"
+   where `e` is empty or matches %r{^\.[^./]*$}, and `prefix` is empty
+   or ends in '/'.  This maintains the invariant that `rb_feature_p()`
+   relies on for its fast lookup.
+*/
+static void
+features_index_add(VALUE feature, VALUE offset)
+{
+    VALUE short_feature;
+    const char *feature_str, *feature_end, *ext, *p;
+
+    feature_str = StringValuePtr(feature);
+    feature_end = feature_str + RSTRING_LEN(feature);
+
+    for (ext = feature_end; ext > feature_str; ext--)
+      if (*ext == '.' || *ext == '/')
+	break;
+    if (*ext != '.')
+      ext = NULL;
+    /* Now `ext` points to the only string matching %r{^\.[^./]*$} that is
+       at the end of `feature`, or is NULL if there is no such string. */
+
+    p = ext ? ext : feature_end;
+    while (1) {
+	p--;
+	while (p >= feature_str && *p != '/')
+	    p--;
+	if (p < feature_str)
+	    break;
+	/* Now *p == '/'.  We reach this point for every '/' in `feature`. */
+	short_feature = rb_str_substr(feature, p + 1 - feature_str, feature_end - p - 1);
+	features_index_add_single(short_feature, offset);
+	if (ext) {
+	    short_feature = rb_str_substr(feature, p + 1 - feature_str, ext - p - 1);
+	    features_index_add_single(short_feature, offset);
+	}
+    }
+    features_index_add_single(feature, offset);
+    if (ext) {
+	short_feature = rb_str_substr(feature, 0, ext - feature_str);
+	features_index_add_single(short_feature, offset);
+    }
+}
+
+static VALUE
+get_loaded_features_index(void)
+{
+    VALUE features;
+    int i;
+    rb_vm_t *vm = GET_VM();
+
+    if (!rb_ary_dup_of_p(vm->loaded_features_snapshot, vm->loaded_features)) {
+	/* The sharing was broken; something (other than us in rb_provide_feature())
+	   modified loaded_features.  Rebuild the index. */
+	rb_hash_clear(vm->loaded_features_index);
+	features = vm->loaded_features;
+	for (i = 0; i < RARRAY_LEN(features); i++) {
+	    VALUE entry, as_str;
+	    as_str = entry = rb_ary_entry(features, i);
+	    StringValue(as_str);
+	    if (as_str != entry)
+		rb_ary_store(features, i, as_str);
+	    rb_str_freeze(as_str);
+	    features_index_add(as_str, INT2FIX(i));
+	}
+	reset_loaded_features_snapshot();
+    }
+    return vm->loaded_features_index;
+}
+
+/* This searches `load_path` for a value such that
+     name == "#{load_path[i]}/#{feature}"
+   if `feature` is a suffix of `name`, or otherwise
+     name == "#{load_path[i]}/#{feature}#{ext}"
+   for an acceptable string `ext`.  It returns
+   `load_path[i].to_str` if found, else 0.
+
+   If type is 's', then `ext` is acceptable only if IS_DLEXT(ext);
+   if 'r', then only if IS_RBEXT(ext); otherwise `ext` may be absent
+   or have any value matching `%r{^\.[^./]*$}`.
+*/
 static VALUE
 loaded_feature_path(const char *name, long vlen, const char *feature, long len,
 		    int type, VALUE load_path)
@@ -88,23 +296,22 @@ loaded_feature_path(const char *name, long vlen, const char *feature, long len,
 	    return 0;
 	plen = e - name - len - 1;
     }
+    if (type == 's' && !IS_DLEXT(&name[plen+len+1])
+     || type == 'r' && !IS_RBEXT(&name[plen+len+1])
+     || name[plen] != '/') {
+       return 0;
+    }
+    /* Now name == "#{prefix}/#{feature}#{ext}" where ext is acceptable
+       (possibly empty) and prefix is some string of length plen. */
+
     for (i = 0; i < RARRAY_LEN(load_path); ++i) {
 	VALUE p = RARRAY_PTR(load_path)[i];
 	const char *s = StringValuePtr(p);
 	long n = RSTRING_LEN(p);

-	if (n != plen ) continue;
-	if (n && (strncmp(name, s, n) || name[n] != '/')) continue;
-	switch (type) {
-	  case 's':
-	    if (IS_DLEXT(&name[n+len+1])) return p;
-	    break;
-	  case 'r':
-	    if (IS_RBEXT(&name[n+len+1])) return p;
-	    break;
-	  default:
-	    return p;
-	}
+	if (n != plen) continue;
+	if (n && strncmp(name, s, n)) continue;
+	return p;
     }
     return 0;
 }
@@ -132,7 +339,7 @@ loaded_feature_path_i(st_data_t v, st_data_t b, st_data_t f)
 static int
 rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const char **fn)
 {
-    VALUE v, features, p, load_path = 0;
+    VALUE features, features_index, feature_val, this_feature_index, v, p, load_path = 0;
     const char *f, *e;
     long i, len, elen, n;
     st_table *loading_tbl;
@@ -151,8 +358,39 @@ rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const c
 	type = 0;
     }
     features = get_loaded_features();
-    for (i = 0; i < RARRAY_LEN(features); ++i) {
-	v = RARRAY_PTR(features)[i];
+    features_index = get_loaded_features_index();
+
+    feature_val = rb_str_new(feature, len);
+    this_feature_index = rb_hash_lookup(features_index, feature_val);
+    /* We search `features` for an entry such that either
+         "#{features[i]}" == "#{load_path[j]}/#{feature}#{e}"
+       for some j, or
+         "#{features[i]}" == "#{feature}#{e}"
+       Here `e` is an "allowed" extension -- either empty or one
+       of the extensions accepted by IS_RBEXT, IS_SOEXT, or
+       IS_DLEXT.  Further, if `ext && rb` then `IS_RBEXT(e)`,
+       and if `ext && !rb` then `IS_SOEXT(e) || IS_DLEXT(e)`.
+
+       If `expanded`, then only the latter form (without load_path[j])
+       is accepted.  Otherwise either form is accepted, *unless* `ext`
+       is false and an otherwise-matching entry of the first form is
+       preceded by an entry of the form
+         "#{features[i2]}" == "#{load_path[j2]}/#{feature}#{e2}"
+       where `e2` matches %r{^\.[^./]*$} but is not an allowed extension.
+       After a "distractor" entry of this form, only entries of the
+       form "#{feature}#{e}" are accepted.
+
+       In `rb_provide_feature()` and `get_loaded_features_index()` we
+       maintain an invariant that the array `this_feature_index` will
+       point to every entry in `features` which has the form
+         "#{prefix}#{feature}#{e}"
+       where `e` is empty or matches %r{^\.[^./]*$}, and `prefix` is empty
+       or ends in '/'.  This includes both match forms above, as well
+       as any distractors, so we may ignore all other entries in `features`.
+     */
+    for (i = 0; this_feature_index != Qnil && i < RARRAY_LEN(this_feature_index); i++) {
+	long index = FIX2LONG(rb_ary_entry(this_feature_index, i));
+	v = RARRAY_PTR(features)[index];
 	f = StringValuePtr(v);
 	if ((n = RSTRING_LEN(v)) < len) continue;
 	if (strncmp(f, feature, len) != 0) {
@@ -175,6 +413,7 @@ rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const c
 	    return 'r';
 	}
     }
+
     loading_tbl = get_loading_table();
     if (loading_tbl) {
 	f = 0;
@@ -183,7 +422,7 @@ rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const c
 	    fs.name = feature;
 	    fs.len = len;
 	    fs.type = type;
-	    fs.load_path = load_path ? load_path : rb_get_load_path();
+	    fs.load_path = load_path ? load_path : rb_get_expanded_load_path();
 	    fs.result = 0;
 	    st_foreach(loading_tbl, loaded_feature_path_i, (st_data_t)&fs);
 	    if ((f = fs.result) != 0) {
@@ -233,7 +472,7 @@ rb_feature_provided(const char *feature, const char **loading)

     if (*feature == '.' &&
 	(feature[1] == '/' || strncmp(feature+1, "./", 2) == 0)) {
-	fullpath = rb_file_expand_path_fast(rb_str_new2(feature), Qnil);
+	fullpath = rb_file_expand_path_fast(rb_get_path(rb_str_new2(feature)), Qnil);
 	feature = RSTRING_PTR(fullpath);
     }
     if (ext && !strchr(ext, '/')) {
@@ -254,11 +493,18 @@ rb_feature_provided(const char *feature, const char **loading)
 static void
 rb_provide_feature(VALUE feature)
 {
-    if (OBJ_FROZEN(get_loaded_features())) {
+    VALUE features;
+
+    features = get_loaded_features();
+    if (OBJ_FROZEN(features)) {
 	rb_raise(rb_eRuntimeError,
 		 "$LOADED_FEATURES is frozen; cannot append feature");
     }
-    rb_ary_push(get_loaded_features(), feature);
+    rb_str_freeze(feature);
+
+    rb_ary_push(features, feature);
+    features_index_add(feature, INT2FIX(RARRAY_LEN(features)-1));
+    reset_loaded_features_snapshot();
 }

 void
@@ -774,10 +1020,15 @@ Init_load()
     rb_alias_variable(rb_intern("$-I"), id_load_path);
     rb_alias_variable(rb_intern("$LOAD_PATH"), id_load_path);
     vm->load_path = rb_ary_new();
+    vm->expanded_load_path = rb_ary_new();
+    vm->load_path_snapshot = rb_ary_new();
+    vm->load_path_check_cache = 0;

     rb_define_virtual_variable("$\"", get_loaded_features, 0);
     rb_define_virtual_variable("$LOADED_FEATURES", get_loaded_features, 0);
     vm->loaded_features = rb_ary_new();
+    vm->loaded_features_snapshot = rb_ary_new();
+    vm->loaded_features_index = rb_hash_new();

     rb_define_global_function("load", rb_f_load, -1);
     rb_define_global_function("require", rb_f_require, 1);
diff --git a/ruby.c b/ruby.c
index 3ddd96c..7ffc78e 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1366,7 +1366,8 @@ process_options(int argc, char **argv, struct cmdline_options *opt)
 	long i;
 	VALUE load_path = GET_VM()->load_path;
 	for (i = 0; i < RARRAY_LEN(load_path); ++i) {
-	    rb_enc_associate(RARRAY_PTR(load_path)[i], lenc);
+	    RARRAY_PTR(load_path)[i] =
+		rb_enc_associate(rb_str_dup(RARRAY_PTR(load_path)[i]), lenc);
 	}
     }
     if (!(opt->disable & DISABLE_BIT(gems))) {
diff --git a/test/ruby/test_require.rb b/test/ruby/test_require.rb
index 58a9ee2..ec75096 100644
--- a/test/ruby/test_require.rb
+++ b/test/ruby/test_require.rb
@@ -356,4 +356,114 @@ class TestRequire < Test::Unit::TestCase
     $:.replace(loadpath)
     $".replace(features)
   end
+
+  def test_require_changed_current_dir
+    bug7158 = '[ruby-core:47970]'
+    Dir.mktmpdir {|tmp|
+      Dir.chdir(tmp) {
+        Dir.mkdir("a")
+        Dir.mkdir("b")
+        open(File.join("a", "foo.rb"), "w") {}
+        open(File.join("b", "bar.rb"), "w") {|f|
+          f.puts "p :ok"
+        }
+        assert_in_out_err([], <<-INPUT, %w(:ok), [], bug7158)
+          $: << "."
+          Dir.chdir("a")
+          require "foo"
+          Dir.chdir("../b")
+          p :ng unless require "bar"
+          Dir.chdir("..")
+          p :ng if require "b/bar"
+        INPUT
+      }
+    }
+  end
+
+  def test_require_not_modified_load_path
+    bug7158 = '[ruby-core:47970]'
+    Dir.mktmpdir {|tmp|
+      Dir.chdir(tmp) {
+        open("foo.rb", "w") {}
+        assert_in_out_err([], <<-INPUT, %w(:ok), [], bug7158)
+          a = Object.new
+          def a.to_str
+            "#{tmp}"
+          end
+          $: << a
+          require "foo"
+          last_path = $:.pop
+          p :ok if last_path == a && last_path.class == Object
+        INPUT
+      }
+    }
+  end
+
+  def test_require_changed_home
+    bug7158 = '[ruby-core:47970]'
+    Dir.mktmpdir {|tmp|
+      Dir.chdir(tmp) {
+        open("foo.rb", "w") {}
+        Dir.mkdir("a")
+        open(File.join("a", "bar.rb"), "w") {}
+        assert_in_out_err([], <<-INPUT, %w(:ok), [], bug7158)
+          $: << '~'
+          ENV['HOME'] = "#{tmp}"
+          require "foo"
+          ENV['HOME'] = "#{tmp}/a"
+          p :ok if require "bar"
+        INPUT
+      }
+    }
+  end
+
+  def test_require_to_path_redefined_in_load_path
+    bug7158 = '[ruby-core:47970]'
+    Dir.mktmpdir {|tmp|
+      Dir.chdir(tmp) {
+        open("foo.rb", "w") {}
+        assert_in_out_err(["RUBYOPT"=>nil], <<-INPUT, %w(:ok), [], bug7158)
+          a = Object.new
+          def a.to_path
+            "bar"
+          end
+          $: << a
+          begin
+            require "foo"
+            p :ng
+          rescue LoadError
+          end
+          def a.to_path
+            "#{tmp}"
+          end
+          p :ok if require "foo"
+        INPUT
+      }
+    }
+  end
+
+  def test_require_to_str_redefined_in_load_path
+    bug7158 = '[ruby-core:47970]'
+    Dir.mktmpdir {|tmp|
+      Dir.chdir(tmp) {
+        open("foo.rb", "w") {}
+        assert_in_out_err(["RUBYOPT"=>nil], <<-INPUT, %w(:ok), [], bug7158)
+          a = Object.new
+          def a.to_str
+            "foo"
+          end
+          $: << a
+          begin
+            require "foo"
+            p :ng
+          rescue LoadError
+          end
+          def a.to_str
+            "#{tmp}"
+          end
+          p :ok if require "foo"
+        INPUT
+      }
+    }
+  end
 end
diff --git a/vm.c b/vm.c
index 63141ba..c187bb6 100644
--- a/vm.c
+++ b/vm.c
@@ -1578,7 +1578,12 @@ rb_vm_mark(void *ptr)
 	RUBY_MARK_UNLESS_NULL(vm->thgroup_default);
 	RUBY_MARK_UNLESS_NULL(vm->mark_object_ary);
 	RUBY_MARK_UNLESS_NULL(vm->load_path);
+	RUBY_MARK_UNLESS_NULL(vm->load_path_snapshot);
+	RUBY_MARK_UNLESS_NULL(vm->load_path_check_cache);
+	RUBY_MARK_UNLESS_NULL(vm->expanded_load_path);
 	RUBY_MARK_UNLESS_NULL(vm->loaded_features);
+	RUBY_MARK_UNLESS_NULL(vm->loaded_features_snapshot);
+	RUBY_MARK_UNLESS_NULL(vm->loaded_features_index);
 	RUBY_MARK_UNLESS_NULL(vm->top_self);
 	RUBY_MARK_UNLESS_NULL(vm->coverages);
 	rb_gc_mark_locations(vm->special_exceptions, vm->special_exceptions + ruby_special_error_count);
diff --git a/vm_core.h b/vm_core.h
index 60146f0..7b25806 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -298,7 +298,12 @@ typedef struct rb_vm_struct {
     /* load */
     VALUE top_self;
     VALUE load_path;
+    VALUE load_path_snapshot;
+    VALUE load_path_check_cache;
+    VALUE expanded_load_path;
     VALUE loaded_features;
+    VALUE loaded_features_snapshot;
+    VALUE loaded_features_index;
     struct st_table *loading_table;

     /* signal */

## 02-st_opt.diff
diff --git a/common.mk b/common.mk
index c9ef641..56e52b6 100644
--- a/common.mk
+++ b/common.mk
@@ -638,7 +638,8 @@ file.$(OBJEXT): {$(VPATH)}file.c $(RUBY_H_INCLUDES) {$(VPATH)}io.h \
 gc.$(OBJEXT): {$(VPATH)}gc.c $(RUBY_H_INCLUDES) {$(VPATH)}re.h \
   {$(VPATH)}regex.h $(ENCODING_H_INCLUDES) $(VM_CORE_H_INCLUDES) \
   {$(VPATH)}gc.h {$(VPATH)}io.h {$(VPATH)}eval_intern.h {$(VPATH)}util.h \
-  {$(VPATH)}debug.h {$(VPATH)}internal.h {$(VPATH)}constant.h
+  {$(VPATH)}debug.h {$(VPATH)}internal.h {$(VPATH)}constant.h \
+  {$(VPATH)}pool_alloc.inc.h {$(VPATH)}pool_alloc.h
 hash.$(OBJEXT): {$(VPATH)}hash.c $(RUBY_H_INCLUDES) {$(VPATH)}util.h \
   $(ENCODING_H_INCLUDES)
 inits.$(OBJEXT): {$(VPATH)}inits.c $(RUBY_H_INCLUDES) \
@@ -702,7 +703,7 @@ signal.$(OBJEXT): {$(VPATH)}signal.c $(RUBY_H_INCLUDES) \
   $(VM_CORE_H_INCLUDES) {$(VPATH)}debug.h
 sprintf.$(OBJEXT): {$(VPATH)}sprintf.c $(RUBY_H_INCLUDES) {$(VPATH)}re.h \
   {$(VPATH)}regex.h {$(VPATH)}vsnprintf.c $(ENCODING_H_INCLUDES)
-st.$(OBJEXT): {$(VPATH)}st.c $(RUBY_H_INCLUDES)
+st.$(OBJEXT): {$(VPATH)}st.c $(RUBY_H_INCLUDES) {$(VPATH)}pool_alloc.h
 strftime.$(OBJEXT): {$(VPATH)}strftime.c $(RUBY_H_INCLUDES) \
   {$(VPATH)}timev.h
 string.$(OBJEXT): {$(VPATH)}string.c $(RUBY_H_INCLUDES) {$(VPATH)}re.h \
diff --git a/configure.in b/configure.in
index b006a01..a3ccad7 100644
--- a/configure.in
+++ b/configure.in
@@ -1324,6 +1324,29 @@ main() {
 CFLAGS="$save_CFLAGS"])
 AC_DEFINE_UNQUOTED(GC_MARK_STACKFRAME_WORD, $rb_cv_gc_mark_stackframe_word)

+AS_CASE(["$target_os"],
+[openbsd*], [
+  AC_CACHE_CHECK(for heap align log on openbsd, rb_cv_page_size_log,
+    [rb_cv_page_size_log=no
+     for page_log in 12 13; do
+       AC_TRY_RUN([
+#include <math.h>
+#include <unistd.h>
+
+int
+main() {
+  if ((int)log2((double)sysconf(_SC_PAGESIZE)) != $page_log) return 1;
+  return 0;
+}
+       ],
+       rb_cv_page_size_log="$page_log"; break)
+     done])
+  if test $rb_cv_page_size_log != no; then
+    AC_DEFINE_UNQUOTED(HEAP_ALIGN_LOG, $rb_cv_page_size_log)
+  else
+    AC_DEFINE_UNQUOTED(HEAP_ALIGN_LOG, 12)
+  fi
+])

 dnl Checks for library functions.
 AC_TYPE_GETGROUPS
@@ -1424,7 +1447,8 @@ AC_CHECK_FUNCS(fmod killpg wait4 waitpid fork spawnv syscall __syscall chroot ge
 	      setsid telldir seekdir fchmod cosh sinh tanh log2 round\
 	      setuid setgid daemon select_large_fdset setenv unsetenv\
               mktime timegm gmtime_r clock_gettime gettimeofday poll ppoll\
-              pread sendfile shutdown sigaltstack dl_iterate_phdr)
+              pread sendfile shutdown sigaltstack dl_iterate_phdr\
+              dup3 pipe2 posix_memalign memalign)

 AC_CACHE_CHECK(for unsetenv returns a value, rb_cv_unsetenv_return_value,
   [AC_TRY_COMPILE([
diff --git a/ext/-test-/st/numhash/numhash.c b/ext/-test-/st/numhash/numhash.c
index e186cd4..53d9e1b 100644
--- a/ext/-test-/st/numhash/numhash.c
+++ b/ext/-test-/st/numhash/numhash.c
@@ -54,7 +54,7 @@ numhash_i(st_data_t key, st_data_t value, st_data_t arg, int error)
 static VALUE
 numhash_each(VALUE self)
 {
-    return st_foreach((st_table *)DATA_PTR(self), numhash_i, self) ? Qtrue : Qfalse;
+    return st_foreach_check((st_table *)DATA_PTR(self), numhash_i, self, 0) ? Qtrue : Qfalse;
 }

 void
diff --git a/gc.c b/gc.c
index e65d0ec..10ae77c 100644
--- a/gc.c
+++ b/gc.c
@@ -20,10 +20,12 @@
 #include "vm_core.h"
 #include "internal.h"
 #include "gc.h"
+#include "pool_alloc.h"
 #include "constant.h"
 #include <stdio.h>
 #include <setjmp.h>
 #include <sys/types.h>
+#include <assert.h>

 #ifdef HAVE_SYS_TIME_H
 #include <sys/time.h>
@@ -35,7 +37,12 @@

 #if defined _WIN32 || defined __CYGWIN__
 #include <windows.h>
+#elif defined(HAVE_POSIX_MEMALIGN)
+#elif defined(HAVE_MEMALIGN)
+#include <malloc.h>
 #endif
+static void aligned_free(void *);
+static void *aligned_malloc(size_t alignment, size_t size);

 #ifdef HAVE_VALGRIND_MEMCHECK_H
 # include <valgrind/memcheck.h>
@@ -321,6 +328,24 @@ struct gc_list {

 #define CALC_EXACT_MALLOC_SIZE 0

+#ifdef POOL_ALLOC_API
+/* POOL ALLOC API */
+#define POOL_ALLOC_PART 1
+#include "pool_alloc.inc.h"
+#undef POOL_ALLOC_PART
+
+typedef struct pool_layout_t pool_layout_t;
+struct pool_layout_t {
+    pool_header
+      p6,  /* st_table && st_table_entry */
+      p11;  /* st_table.bins init size */
+} pool_layout = {
+    INIT_POOL(void*[6]),
+    INIT_POOL(void*[11])
+};
+static void pool_finalize_header(pool_header *header);
+#endif
+
 typedef struct rb_objspace {
     struct {
 	size_t limit;
@@ -330,6 +355,9 @@ typedef struct rb_objspace {
 	size_t allocations;
 #endif
     } malloc_params;
+#ifdef POOL_ALLOC_API
+    pool_layout_t *pool_headers;
+#endif
     struct {
 	size_t increment;
 	struct heaps_slot *ptr;
@@ -377,7 +405,11 @@ typedef struct rb_objspace {
 #define ruby_initial_gc_stress	initial_params.gc_stress
 int *ruby_initial_gc_stress_ptr = &ruby_initial_gc_stress;
 #else
+#  ifdef POOL_ALLOC_API
+static rb_objspace_t rb_objspace = {{GC_MALLOC_LIMIT}, &pool_layout, {HEAP_MIN_SLOTS}};
+#  else
 static rb_objspace_t rb_objspace = {{GC_MALLOC_LIMIT}, {HEAP_MIN_SLOTS}};
+#  endif
 int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress;
 #endif
 #define malloc_limit		objspace->malloc_params.limit
@@ -413,6 +445,10 @@ rb_objspace_alloc(void)
     memset(objspace, 0, sizeof(*objspace));
     malloc_limit = initial_malloc_limit;
     ruby_gc_stress = ruby_initial_gc_stress;
+#ifdef POOL_ALLOC_API
+    objspace->pool_headers = (pool_layout_t*) malloc(sizeof(pool_layout));
+    memcpy(objspace->pool_headers, &pool_layout, sizeof(pool_layout));
+#endif

     return objspace;
 }
@@ -491,6 +527,13 @@ rb_objspace_free(rb_objspace_t *objspace)
 	heaps_used = 0;
 	heaps = 0;
     }
+#ifdef POOL_ALLOC_API
+    if (objspace->pool_headers) {
+        pool_finalize_header(&objspace->pool_headers->p6);
+        pool_finalize_header(&objspace->pool_headers->p11);
+        free(objspace->pool_headers);
+    }
+#endif
     free(objspace);
 }
 #endif
@@ -894,6 +937,27 @@ ruby_xfree(void *x)
 	vm_xfree(&rb_objspace, x);
 }

+#ifdef POOL_ALLOC_API
+/* POOL ALLOC API */
+#define POOL_ALLOC_PART 2
+#include "pool_alloc.inc.h"
+#undef POOL_ALLOC_PART
+
+void
+ruby_xpool_free(void *ptr)
+{
+    pool_free_entry((void**)ptr);
+}
+
+#define CONCRET_POOL_MALLOC(pnts) \
+void * ruby_xpool_malloc_##pnts##p () { \
+    return pool_alloc_entry(&rb_objspace.pool_headers->p##pnts ); \
+}
+CONCRET_POOL_MALLOC(6)
+CONCRET_POOL_MALLOC(11)
+#undef CONCRET_POOL_MALLOC
+
+#endif

 /*
  *  call-seq:
@@ -1008,6 +1072,55 @@ allocate_sorted_heaps(rb_objspace_t *objspace, size_t next_heaps_length)
     heaps_length = next_heaps_length;
 }

+static void *
+aligned_malloc(size_t alignment, size_t size)
+{
+    void *res;
+
+#if defined __MINGW32__
+    res = __mingw_aligned_malloc(size, alignment);
+#elif defined _WIN32 && !defined __CYGWIN__
+    res = _aligned_malloc(size, alignment);
+#elif defined(HAVE_POSIX_MEMALIGN)
+    if (posix_memalign(&res, alignment, size) == 0) {
+        return res;
+    }
+    else {
+        return NULL;
+    }
+#elif defined(HAVE_MEMALIGN)
+    res = memalign(alignment, size);
+#else
+    char* aligned;
+    res = malloc(alignment + size + sizeof(void*));
+    aligned = (char*)res + alignment + sizeof(void*);
+    aligned -= ((VALUE)aligned & (alignment - 1));
+    ((void**)aligned)[-1] = res;
+    res = (void*)aligned;
+#endif
+
+#if defined(_DEBUG) || defined(GC_DEBUG)
+    /* alignment must be a power of 2 */
+    assert((alignment - 1) & alignment == 0);
+    assert(alignment % sizeof(void*) == 0);
+#endif
+    return res;
+}
+
+static void
+aligned_free(void *ptr)
+{
+#if defined __MINGW32__
+    __mingw_aligned_free(ptr);
+#elif defined _WIN32 && !defined __CYGWIN__
+    _aligned_free(ptr);
+#elif defined(HAVE_MEMALIGN) || defined(HAVE_POSIX_MEMALIGN)
+    free(ptr);
+#else
+    free(((void**)ptr)[-1]);
+#endif
+}
+
 static void
 assign_heap_slot(rb_objspace_t *objspace)
 {
diff --git a/hash.c b/hash.c
index fbd8237..32917c3 100644
--- a/hash.c
+++ b/hash.c
@@ -44,7 +44,7 @@ rb_any_cmp(VALUE a, VALUE b)
     if (FIXNUM_P(a) && FIXNUM_P(b)) {
 	return a != b;
     }
-    if (TYPE(a) == T_STRING && RBASIC(a)->klass == rb_cString &&
+    if (RB_TYPE_P(a, T_STRING) && RBASIC(a)->klass == rb_cString &&
 	TYPE(b) == T_STRING && RBASIC(b)->klass == rb_cString) {
 	return rb_str_hash_cmp(a, b);
     }
@@ -80,20 +80,14 @@ rb_any_hash(VALUE a)
     VALUE hval;
     st_index_t hnum;

-    switch (TYPE(a)) {
-      case T_FIXNUM:
-      case T_SYMBOL:
-      case T_NIL:
-      case T_FALSE:
-      case T_TRUE:
-	hnum = rb_hash_end(rb_hash_start((unsigned int)a));
-	break;
-
-      case T_STRING:
+    if (SPECIAL_CONST_P(a)) {
+	if (a == Qundef) return 0;
+	hnum = rb_hash_end(rb_hash_start((st_index_t)a));
+    }
+    else if (BUILTIN_TYPE(a) == T_STRING) {
 	hnum = rb_str_hash(a);
-	break;
-
-      default:
+    }
+    else {
         hval = rb_hash(a);
 	hnum = FIX2LONG(hval);
     }
@@ -106,10 +100,8 @@ static const struct st_hash_type objhash = {
     rb_any_hash,
 };

-static const struct st_hash_type identhash = {
-    st_numcmp,
-    st_numhash,
-};
+extern const struct st_hash_type st_hashtype_num;
+#define identhash st_hashtype_num

 typedef int st_foreach_func(st_data_t, st_data_t, st_data_t);

@@ -124,7 +116,6 @@ foreach_safe_i(st_data_t key, st_data_t value, struct foreach_safe_arg *arg)
 {
     int status;

-    if (key == Qundef) return ST_CONTINUE;
     status = (*arg->func)(key, value, arg->arg);
     if (status == ST_CONTINUE) {
 	return ST_CHECK;
@@ -140,7 +131,7 @@ st_foreach_safe(st_table *table, int (*func)(ANYARGS), st_data_t a)
     arg.tbl = table;
     arg.func = (st_foreach_func *)func;
     arg.arg = a;
-    if (st_foreach(table, foreach_safe_i, (st_data_t)&arg)) {
+    if (st_foreach_check(table, foreach_safe_i, (st_data_t)&arg, 0)) {
 	rb_raise(rb_eRuntimeError, "hash modified during iteration");
     }
 }
@@ -154,21 +145,21 @@ struct hash_foreach_arg {
 };

 static int
-hash_foreach_iter(st_data_t key, st_data_t value, struct hash_foreach_arg *arg)
+hash_foreach_iter(st_data_t key, st_data_t value, st_data_t argp)
 {
+    struct hash_foreach_arg *arg = (struct hash_foreach_arg *)argp;
     int status;
     st_table *tbl;

     tbl = RHASH(arg->hash)->ntbl;
-    if ((VALUE)key == Qundef) return ST_CONTINUE;
     status = (*arg->func)((VALUE)key, (VALUE)value, arg->arg);
     if (RHASH(arg->hash)->ntbl != tbl) {
 	rb_raise(rb_eRuntimeError, "rehash occurred during iteration");
     }
     switch (status) {
       case ST_DELETE:
-	st_delete_safe(tbl, &key, 0, Qundef);
 	FL_SET(arg->hash, HASH_DELETED);
+	return ST_DELETE;
       case ST_CONTINUE:
 	break;
       case ST_STOP:
@@ -184,7 +175,7 @@ hash_foreach_ensure(VALUE hash)

     if (RHASH(hash)->iter_lev == 0) {
 	if (FL_TEST(hash, HASH_DELETED)) {
-	    st_cleanup_safe(RHASH(hash)->ntbl, Qundef);
+	    st_cleanup_safe(RHASH(hash)->ntbl, (st_data_t)Qundef);
 	    FL_UNSET(hash, HASH_DELETED);
 	}
     }
@@ -192,9 +183,10 @@ hash_foreach_ensure(VALUE hash)
 }

 static VALUE
-hash_foreach_call(struct hash_foreach_arg *arg)
+hash_foreach_call(VALUE arg)
 {
-    if (st_foreach(RHASH(arg->hash)->ntbl, hash_foreach_iter, (st_data_t)arg)) {
+    VALUE hash = ((struct hash_foreach_arg *)arg)->hash;
+    if (st_foreach_check(RHASH(hash)->ntbl, hash_foreach_iter, (st_data_t)arg, (st_data_t)Qundef)) {
 	rb_raise(rb_eRuntimeError, "hash modified during iteration");
     }
     return Qnil;
@@ -447,7 +439,7 @@ rb_hash_rehash_i(VALUE key, VALUE value, VALUE arg)
 {
     st_table *tbl = (st_table *)arg;

-    if (key != Qundef) st_insert(tbl, key, value);
+    st_insert(tbl, (st_data_t)key, (st_data_t)value);
     return ST_CONTINUE;
 }

@@ -490,6 +482,20 @@ rb_hash_rehash(VALUE hash)
     return hash;
 }

+static VALUE
+hash_default_value(VALUE hash, VALUE key)
+{
+    if (rb_method_basic_definition_p(CLASS_OF(hash), id_default)) {
+	VALUE ifnone = RHASH_IFNONE(hash);
+	if (!FL_TEST(hash, HASH_PROC_DEFAULT)) return ifnone;
+	if (key == Qundef) return Qnil;
+	return rb_funcall(ifnone, id_yield, 2, hash, key);
+    }
+    else {
+	return rb_funcall(hash, id_default, 1, key);
+    }
+}
+
 /*
  *  call-seq:
  *     hsh[key]    ->  value
@@ -510,13 +516,7 @@ rb_hash_aref(VALUE hash, VALUE key)
     st_data_t val;

     if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) {
-	if (!FL_TEST(hash, HASH_PROC_DEFAULT) &&
-	    rb_method_basic_definition_p(CLASS_OF(hash), id_default)) {
-	    return RHASH_IFNONE(hash);
-	}
-	else {
-	    return rb_funcall(hash, id_default, 1, key);
-	}
+	return hash_default_value(hash, key);
     }
     return (VALUE)val;
 }
@@ -659,7 +659,7 @@ rb_hash_default(int argc, VALUE *argv, VALUE hash)
 static VALUE
 rb_hash_set_default(VALUE hash, VALUE ifnone)
 {
-    rb_hash_modify(hash);
+    rb_hash_modify_check(hash);
     RHASH_IFNONE(hash) = ifnone;
     FL_UNSET(hash, HASH_PROC_DEFAULT);
     return ifnone;
@@ -707,7 +707,7 @@ rb_hash_set_default_proc(VALUE hash, VALUE proc)
 {
     VALUE b;

-    rb_hash_modify(hash);
+    rb_hash_modify_check(hash);
     b = rb_check_convert_type(proc, T_DATA, "Proc", "to_proc");
     if (NIL_P(b) || !rb_obj_is_proc(b)) {
 	rb_raise(rb_eTypeError,
@@ -776,7 +776,7 @@ rb_hash_delete_key(VALUE hash, VALUE key)
     if (!RHASH(hash)->ntbl)
         return Qundef;
     if (RHASH(hash)->iter_lev > 0) {
-	if (st_delete_safe(RHASH(hash)->ntbl, &ktmp, &val, Qundef)) {
+	if (st_delete_safe(RHASH(hash)->ntbl, &ktmp, &val, (st_data_t)Qundef)) {
 	    FL_SET(hash, HASH_DELETED);
 	    return (VALUE)val;
 	}
@@ -809,7 +809,7 @@ rb_hash_delete(VALUE hash, VALUE key)
 {
     VALUE val;

-    rb_hash_modify(hash);
+    rb_hash_modify_check(hash);
     val = rb_hash_delete_key(hash, key);
     if (val != Qundef) return val;
     if (rb_block_given_p()) {
@@ -828,7 +828,6 @@ shift_i(VALUE key, VALUE value, VALUE arg)
 {
     struct shift_var *var = (struct shift_var *)arg;

-    if (key == Qundef) return ST_CONTINUE;
     if (var->key != Qundef) return ST_STOP;
     var->key = key;
     var->val = value;
@@ -840,7 +839,6 @@ shift_i_safe(VALUE key, VALUE value, VALUE arg)
 {
     struct shift_var *var = (struct shift_var *)arg;

-    if (key == Qundef) return ST_CONTINUE;
     var->key = key;
     var->val = value;
     return ST_STOP;
@@ -864,29 +862,25 @@ rb_hash_shift(VALUE hash)
 {
     struct shift_var var;

-    rb_hash_modify(hash);
-    var.key = Qundef;
-    rb_hash_foreach(hash, RHASH(hash)->iter_lev > 0 ? shift_i_safe : shift_i,
-		    (VALUE)&var);
-
-    if (var.key != Qundef) {
-	if (RHASH(hash)->iter_lev > 0) {
-	    rb_hash_delete_key(hash, var.key);
+    rb_hash_modify_check(hash);
+    if (RHASH(hash)->ntbl) {
+	var.key = Qundef;
+	rb_hash_foreach(hash, RHASH(hash)->iter_lev > 0 ? shift_i_safe : shift_i,
+			(VALUE)&var);
+
+	if (var.key != Qundef) {
+	    if (RHASH(hash)->iter_lev > 0) {
+		rb_hash_delete_key(hash, var.key);
+	    }
+	    return rb_assoc_new(var.key, var.val);
 	}
-	return rb_assoc_new(var.key, var.val);
-    }
-    else if (FL_TEST(hash, HASH_PROC_DEFAULT)) {
-	return rb_funcall(RHASH_IFNONE(hash), id_yield, 2, hash, Qnil);
-    }
-    else {
-	return RHASH_IFNONE(hash);
     }
+    return hash_default_value(hash, Qnil);
 }

 static int
 delete_if_i(VALUE key, VALUE value, VALUE hash)
 {
-    if (key == Qundef) return ST_CONTINUE;
     if (RTEST(rb_yield_values(2, key, value))) {
 	rb_hash_delete_key(hash, key);
     }
@@ -912,8 +906,9 @@ VALUE
 rb_hash_delete_if(VALUE hash)
 {
     RETURN_ENUMERATOR(hash, 0, 0);
-    rb_hash_modify(hash);
-    rb_hash_foreach(hash, delete_if_i, hash);
+    rb_hash_modify_check(hash);
+    if (RHASH(hash)->ntbl)
+	rb_hash_foreach(hash, delete_if_i, hash);
     return hash;
 }

@@ -984,7 +979,6 @@ rb_hash_values_at(int argc, VALUE *argv, VALUE hash)
 static int
 select_i(VALUE key, VALUE value, VALUE result)
 {
-    if (key == Qundef) return ST_CONTINUE;
     if (RTEST(rb_yield_values(2, key, value)))
 	rb_hash_aset(result, key, value);
     return ST_CONTINUE;
@@ -1018,7 +1012,6 @@ rb_hash_select(VALUE hash)
 static int
 keep_if_i(VALUE key, VALUE value, VALUE hash)
 {
-    if (key == Qundef) return ST_CONTINUE;
     if (!RTEST(rb_yield_values(2, key, value))) {
 	return ST_DELETE;
     }
@@ -1040,7 +1033,7 @@ rb_hash_select_bang(VALUE hash)
     st_index_t n;

     RETURN_ENUMERATOR(hash, 0, 0);
-    rb_hash_modify(hash);
+    rb_hash_modify_check(hash);
     if (!RHASH(hash)->ntbl)
         return Qnil;
     n = RHASH(hash)->ntbl->num_entries;
@@ -1065,8 +1058,9 @@ VALUE
 rb_hash_keep_if(VALUE hash)
 {
     RETURN_ENUMERATOR(hash, 0, 0);
-    rb_hash_modify(hash);
-    rb_hash_foreach(hash, keep_if_i, hash);
+    rb_hash_modify_check(hash);
+    if (RHASH(hash)->ntbl)
+	rb_hash_foreach(hash, keep_if_i, hash);
     return hash;
 }

@@ -1144,9 +1138,7 @@ rb_hash_aset(VALUE hash, VALUE key, VALUE val)
 static int
 replace_i(VALUE key, VALUE val, VALUE hash)
 {
-    if (key != Qundef) {
-	rb_hash_aset(hash, key, val);
-    }
+    rb_hash_aset(hash, key, val);

     return ST_CONTINUE;
 }
@@ -1227,7 +1219,6 @@ rb_hash_empty_p(VALUE hash)
 static int
 each_value_i(VALUE key, VALUE value)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_yield(value);
     return ST_CONTINUE;
 }
@@ -1262,7 +1253,6 @@ rb_hash_each_value(VALUE hash)
 static int
 each_key_i(VALUE key, VALUE value)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_yield(key);
     return ST_CONTINUE;
 }
@@ -1296,7 +1286,6 @@ rb_hash_each_key(VALUE hash)
 static int
 each_pair_i(VALUE key, VALUE value)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_yield(rb_assoc_new(key, value));
     return ST_CONTINUE;
 }
@@ -1334,7 +1323,6 @@ rb_hash_each_pair(VALUE hash)
 static int
 to_a_i(VALUE key, VALUE value, VALUE ary)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_ary_push(ary, rb_assoc_new(key, value));
     return ST_CONTINUE;
 }
@@ -1367,7 +1355,6 @@ inspect_i(VALUE key, VALUE value, VALUE str)
 {
     VALUE str2;

-    if (key == Qundef) return ST_CONTINUE;
     str2 = rb_inspect(key);
     if (RSTRING_LEN(str) > 1) {
 	rb_str_cat2(str, ", ");
@@ -1434,7 +1421,6 @@ rb_hash_to_hash(VALUE hash)
 static int
 keys_i(VALUE key, VALUE value, VALUE ary)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_ary_push(ary, key);
     return ST_CONTINUE;
 }
@@ -1465,7 +1451,6 @@ rb_hash_keys(VALUE hash)
 static int
 values_i(VALUE key, VALUE value, VALUE ary)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_ary_push(ary, value);
     return ST_CONTINUE;
 }
@@ -1524,7 +1509,6 @@ rb_hash_search_value(VALUE key, VALUE value, VALUE arg)
 {
     VALUE *data = (VALUE *)arg;

-    if (key == Qundef) return ST_CONTINUE;
     if (rb_equal(value, data[1])) {
 	data[0] = Qtrue;
 	return ST_STOP;
@@ -1568,7 +1552,6 @@ eql_i(VALUE key, VALUE val1, VALUE arg)
     struct equal_data *data = (struct equal_data *)arg;
     st_data_t val2;

-    if (key == Qundef) return ST_CONTINUE;
     if (!st_lookup(data->tbl, key, &val2)) {
 	data->result = Qfalse;
 	return ST_STOP;
@@ -1599,7 +1582,7 @@ hash_equal(VALUE hash1, VALUE hash2, int eql)
     struct equal_data data;

     if (hash1 == hash2) return Qtrue;
-    if (TYPE(hash2) != T_HASH) {
+    if (!RB_TYPE_P(hash2, T_HASH)) {
 	if (!rb_respond_to(hash2, rb_intern("to_hash"))) {
 	    return Qfalse;
 	}
@@ -1670,7 +1653,6 @@ hash_i(VALUE key, VALUE val, VALUE arg)
     st_index_t *hval = (st_index_t *)arg;
     st_index_t hdata[2];

-    if (key == Qundef) return ST_CONTINUE;
     hdata[0] = rb_hash(key);
     hdata[1] = rb_hash(val);
     *hval ^= st_hash(hdata, sizeof(hdata), 0);
@@ -1711,7 +1693,6 @@ rb_hash_hash(VALUE hash)
 static int
 rb_hash_invert_i(VALUE key, VALUE value, VALUE hash)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_hash_aset(hash, value, key);
     return ST_CONTINUE;
 }
@@ -1740,7 +1721,6 @@ rb_hash_invert(VALUE hash)
 static int
 rb_hash_update_i(VALUE key, VALUE value, VALUE hash)
 {
-    if (key == Qundef) return ST_CONTINUE;
     hash_update(hash, key);
     st_insert(RHASH(hash)->ntbl, key, value);
     return ST_CONTINUE;
@@ -1749,7 +1729,6 @@ rb_hash_update_i(VALUE key, VALUE value, VALUE hash)
 static int
 rb_hash_update_block_i(VALUE key, VALUE value, VALUE hash)
 {
-    if (key == Qundef) return ST_CONTINUE;
     if (rb_hash_has_key(hash, key)) {
 	value = rb_yield_values(3, key, rb_hash_aref(hash, key), value);
     }
@@ -1806,7 +1785,6 @@ rb_hash_update_func_i(VALUE key, VALUE value, VALUE arg0)
     struct update_arg *arg = (struct update_arg *)arg0;
     VALUE hash = arg->hash;

-    if (key == Qundef) return ST_CONTINUE;
     if (rb_hash_has_key(hash, key)) {
 	value = (*arg->func)(key, rb_hash_aref(hash, key), value);
     }
@@ -1863,7 +1841,6 @@ assoc_i(VALUE key, VALUE val, VALUE arg)
 {
     VALUE *args = (VALUE *)arg;

-    if (key == Qundef) return ST_CONTINUE;
     if (RTEST(rb_equal(args[0], key))) {
 	args[1] = rb_assoc_new(key, val);
 	return ST_STOP;
@@ -1901,7 +1878,6 @@ rassoc_i(VALUE key, VALUE val, VALUE arg)
 {
     VALUE *args = (VALUE *)arg;

-    if (key == Qundef) return ST_CONTINUE;
     if (RTEST(rb_equal(args[0], val))) {
 	args[1] = rb_assoc_new(key, val);
 	return ST_STOP;
@@ -2198,7 +2174,7 @@ rb_env_path_tainted(void)
 }

 #if defined(_WIN32) || (defined(HAVE_SETENV) && defined(HAVE_UNSETENV))
-#elif defined __sun__
+#elif defined __sun
 static int
 in_origenv(const char *str)
 {
@@ -2286,7 +2262,7 @@ ruby_setenv(const char *name, const char *value)
 	    rb_sys_fail("unsetenv");
 #endif
     }
-#elif defined __sun__
+#elif defined __sun
     size_t len;
     char **env_ptr, *str;
     if (strchr(name, '=')) {
@@ -3084,11 +3060,9 @@ env_invert(void)
 static int
 env_replace_i(VALUE key, VALUE val, VALUE keys)
 {
-    if (key != Qundef) {
-	env_aset(Qnil, key, val);
-	if (rb_ary_includes(keys, key)) {
-	    rb_ary_delete(keys, key);
-	}
+    env_aset(Qnil, key, val);
+    if (rb_ary_includes(keys, key)) {
+	rb_ary_delete(keys, key);
     }
     return ST_CONTINUE;
 }
@@ -3120,12 +3094,10 @@ env_replace(VALUE env, VALUE hash)
 static int
 env_update_i(VALUE key, VALUE val)
 {
-    if (key != Qundef) {
-	if (rb_block_given_p()) {
-	    val = rb_yield_values(3, key, rb_f_getenv(Qnil, key), val);
-	}
-	env_aset(Qnil, key, val);
+    if (rb_block_given_p()) {
+	val = rb_yield_values(3, key, rb_f_getenv(Qnil, key), val);
     }
+    env_aset(Qnil, key, val);
     return ST_CONTINUE;
 }

@@ -3150,15 +3122,116 @@ env_update(VALUE env, VALUE hash)
 }

 /*
- *  A <code>Hash</code> is a collection of key-value pairs. It is
- *  similar to an <code>Array</code>, except that indexing is done via
- *  arbitrary keys of any object type, not an integer index. Hashes enumerate
- *  their values in the order that the corresponding keys were inserted.
+ *  A Hash is a dictionary-like collection of unique keys and their values.
+ *  Also called associative arrays, they are similar to Arrays, but where an
+ *  Array uses integers as its index, a Hash allows you to use any object
+ *  type.
+ *
+ *  Hashes enumerate their values in the order that the corresponding keys
+ *  were inserted.
+ *
+ *  A Hash can be easily created by using its implicit form:
+ *
+ *    grades = { "Jane Doe" => 10, "Jim Doe" => 6 }
+ *
+ *  Hashes allow an alternate syntax form when your keys are always symbols.
+ *  Instead of
+ *
+ *    options = { :font_size => 10, :font_family => "Arial" }
+ *
+ *  You could write it as:
+ *
+ *    options = { font_size: 10, font_family: "Arial" }
+ *
+ *  Each named key is a symbol you can access in hash:
+ *
+ *    options[:font_size]  # => 10
+ *
+ *  A Hash can also be created through its ::new method:
+ *
+ *    grades = Hash.new
+ *    grades["Dorothy Doe"] = 9
  *
  *  Hashes have a <em>default value</em> that is returned when accessing
- *  keys that do not exist in the hash. By default, that value is
- *  <code>nil</code>.
+ *  keys that do not exist in the hash. If no default is set +nil+ is used.
+ *  You can set the default value by sending it as an argument to Hash.new:
+ *
+ *    grades = Hash.new(0)
+ *
+ *  Or by using the #default= method:
+ *
+ *    grades = {"Timmy Doe" => 8}
+ *    grades.default = 0
+ *
+ *  Accessing a value in a Hash requires using its key:
+ *
+ *    puts grades["Jane Doe"] # => 10
+ *
+ *  === Common Uses
+ *
+ *  Hashes are an easy way to represent data structures, such as
+ *
+ *    books         = {}
+ *    books[:matz]  = "The Ruby Language"
+ *    books[:black] = "The Well-Grounded Rubyist"
+ *
+ *  Hashes are also commonly used as a way to have named parameters in
+ *  functions. Note that no brackets are used below. If a hash is the last
+ *  argument on a method call, no braces are needed, thus creating a really
+ *  clean interface:
+ *
+ *    Person.create(name: "John Doe", age: 27)
+ *
+ *    def self.create(params)
+ *      @name = params[:name]
+ *      @age  = params[:age]
+ *    end
+ *
+ *  === Hash Keys
+ *
+ *  Two objects refer to the same hash key when their <code>hash</code> value
+ *  is identical and the two objects are <code>eql?</code> to each other.
+ *
+ *  A user-defined class may be used as a hash key if the <code>hash</code>
+ *  and <code>eql?</code> methods are overridden to provide meaningful
+ *  behavior.  By default, separate instances refer to separate hash keys.
+ *
+ *  A typical implementation of <code>hash</code> is based on the
+ *  object's data while <code>eql?</code> is usually aliased to the overridden
+ *  <code>==</code> method:
+ *
+ *    class Book
+ *      attr_reader :author, :title
+ *
+ *      def initialize(author, title)
+ *        @author = author
+ *        @title = title
+ *      end
+ *
+ *      def ==(other)
+ *        self.class === other and
+ *          other.author == @author and
+ *          other.title == @title
+ *      end
+ *
+ *      alias eql? ==
+ *
+ *      def hash
+ *        @author.hash ^ @title.hash # XOR
+ *      end
+ *    end
+ *
+ *    book1 = Book.new 'matz', 'Ruby in a Nutshell'
+ *    book2 = Book.new 'matz', 'Ruby in a Nutshell'
+ *
+ *    reviews = {}
+ *
+ *    reviews[book1] = 'Great reference!'
+ *    reviews[book2] = 'Nice and compact!'
+ *
+ *    reviews.length #=> 1
  *
+ *  See also Object#hash and Object#eql?
  */

 void
diff --git a/include/ruby/st.h b/include/ruby/st.h
index 50f2a75..119dfde 100644
--- a/include/ruby/st.h
+++ b/include/ruby/st.h
@@ -36,7 +36,7 @@ typedef unsigned long st_data_t;
 #elif SIZEOF_LONG_LONG == SIZEOF_VOIDP
 typedef unsigned LONG_LONG st_data_t;
 #else
-# error ---->> st.c requires sizeof(void*) == sizeof(long) to be compiled. <<----
+# error ---->> st.c requires sizeof(void*) == sizeof(long) or sizeof(LONG_LONG) to be compiled. <<----
 #endif
 #define ST_DATA_T_DEFINED

@@ -74,6 +74,11 @@ struct st_hash_type {

 #define ST_INDEX_BITS (sizeof(st_index_t) * CHAR_BIT)

+typedef struct st_packed_entry {
+    st_index_t hash;
+    st_data_t key, val;
+} st_packed_entry;
+
 struct st_table {
     const struct st_hash_type *type;
     st_index_t num_bins;
@@ -91,8 +96,17 @@ struct st_table {
     __extension__
 #endif
     st_index_t num_entries : ST_INDEX_BITS - 1;
-    struct st_table_entry **bins;
-    struct st_table_entry *head, *tail;
+    union {
+	struct {
+	    struct st_table_entry **bins;
+	    struct st_table_entry *head, *tail;
+	} big;
+	struct {
+	    struct st_packed_entry *entries;
+	    st_index_t real_entries;
+	} packed;
+	st_packed_entry upacked;
+    } as;
 };

 #define st_is_member(table,key) st_lookup((table),(key),(st_data_t *)0)
@@ -114,6 +128,7 @@ int st_insert2(st_table *, st_data_t, st_data_t, st_data_t (*)(st_data_t));
 int st_lookup(st_table *, st_data_t, st_data_t *);
 int st_get_key(st_table *, st_data_t, st_data_t *);
 int st_foreach(st_table *, int (*)(ANYARGS), st_data_t);
+int st_foreach_check(st_table *, int (*)(ANYARGS), st_data_t, st_data_t);
 int st_reverse_foreach(st_table *, int (*)(ANYARGS), st_data_t);
 void st_add_direct(st_table *, st_data_t, st_data_t);
 void st_free_table(st_table *);
diff --git a/pool_alloc.h b/pool_alloc.h
new file mode 100644
index 0000000..957708e
--- /dev/null
+++ b/pool_alloc.h
@@ -0,0 +1,11 @@
+#ifndef POOL_ALLOC_H
+#define POOL_ALLOC_H
+
+#define POOL_ALLOC_API
+#ifdef POOL_ALLOC_API
+void  ruby_xpool_free(void *ptr);
+void *ruby_xpool_malloc_6p();
+void *ruby_xpool_malloc_11p();
+#endif
+
+#endif
diff --git a/pool_alloc.inc.h b/pool_alloc.inc.h
new file mode 100644
index 0000000..a7879ab
--- /dev/null
+++ b/pool_alloc.inc.h
@@ -0,0 +1,156 @@
+/*
+ * this is generic pool allocator
+ * you should define following macroses:
+ * ITEM_NAME - unique identifier, which allows to hold functions in a namespace
+ * ITEM_TYPEDEF(name) - passed to typedef to localize item type
+ * free_entry - desired name of function for free entry
+ * alloc_entry - defired name of function for allocate entry
+ */
+
+#if POOL_ALLOC_PART == 1
+#ifdef HEAP_ALIGN_LOG
+#define DEFAULT_POOL_SIZE (1 << HEAP_ALIGN_LOG)
+#else
+#define DEFAULT_POOL_SIZE (sizeof(void*) * 2048)
+#endif
+typedef unsigned int pool_holder_counter;
+
+typedef struct pool_entry_list pool_entry_list;
+typedef struct pool_holder pool_holder;
+
+typedef struct pool_header {
+    pool_holder         *first;
+    pool_holder         *_black_magick;
+    pool_holder_counter  size; // size of entry in sizeof(void*) items
+    pool_holder_counter  total; // size of entry in sizeof(void*) items
+} pool_header;
+
+struct pool_holder {
+    pool_holder_counter free, total;
+    pool_header  *header;
+    void               *freep;
+    pool_holder        *fore, *back;
+    void *data[1];
+};
+#define POOL_DATA_SIZE(pool_size) (((pool_size) - sizeof(void*) * 6 - offsetof(pool_holder, data)) / sizeof(void*))
+#define POOL_ENTRY_SIZE(item_type) ((sizeof(item_type) - 1) / sizeof(void*) + 1)
+#define POOL_HOLDER_COUNT(pool_size, item_type) (POOL_DATA_SIZE(pool_size)/POOL_ENTRY_SIZE(item_type))
+#define INIT_POOL(item_type) {NULL, NULL, POOL_ENTRY_SIZE(item_type), POOL_HOLDER_COUNT(DEFAULT_POOL_SIZE, item_type)}
+
+#elif POOL_ALLOC_PART == 2
+static pool_holder *
+pool_holder_alloc(pool_header *header)
+{
+    pool_holder *holder;
+    pool_holder_counter i, size, count;
+    register void **ptr;
+
+    size_t sz = offsetof(pool_holder, data) +
+	    header->size * header->total * sizeof(void*);
+#define objspace (&rb_objspace)
+    vm_malloc_prepare(objspace, DEFAULT_POOL_SIZE);
+    if (header->first != NULL) return header->first;
+    TRY_WITH_GC(holder = (pool_holder*) aligned_malloc(DEFAULT_POOL_SIZE, sz));
+    malloc_increase += DEFAULT_POOL_SIZE;
+#if CALC_EXACT_MALLOC_SIZE
+    objspace->malloc_params.allocated_size += DEFAULT_POOL_SIZE;
+    objspace->malloc_params.allocations++;
+#endif
+#undef objspace
+
+    size = header->size;
+    count = header->total;
+    holder->free = count;
+    holder->total = count;
+    holder->header = header;
+    holder->fore = NULL;
+    holder->back = NULL;
+    holder->freep = &holder->data;
+    ptr = holder->data;
+    for(i = count - 1; i; i-- ) {
+	ptr = *ptr = ptr + size;
+    }
+    *ptr = NULL;
+    header->first = holder;
+    return holder;
+}
+
+static inline void
+pool_holder_unchaing(pool_header *header, pool_holder *holder)
+{
+    register pool_holder *fore = holder->fore, *back = holder->back;
+    holder->fore = NULL;
+    holder->back = NULL;
+    if (fore != NULL)  fore->back     = back;
+    else               header->_black_magick = back;
+    if (back != NULL)  back->fore     = fore;
+    else               header->first = fore;
+}
+
+static inline pool_holder *
+entry_holder(void **entry)
+{
+    return (pool_holder*)(((uintptr_t)entry) & ~(DEFAULT_POOL_SIZE - 1));
+}
+
+static inline void
+pool_free_entry(void **entry)
+{
+    pool_holder *holder = entry_holder(entry);
+    pool_header *header = holder->header;
+
+    if (holder->free++ == 0) {
+	register pool_holder *first = header->first;
+	if (first == NULL) {
+	    header->first = holder;
+	} else {
+	    holder->back = first;
+	    holder->fore = first->fore;
+	    first->fore = holder;
+	    if (holder->fore)
+		holder->fore->back = holder;
+	    else
+		header->_black_magick = holder;
+	}
+    } else if (holder->free == holder->total && header->first != holder ) {
+	pool_holder_unchaing(header, holder);
+	aligned_free(holder);
+#if CALC_EXACT_MALLOC_SIZE
+	rb_objspace.malloc_params.allocated_size -= DEFAULT_POOL_SIZE;
+	rb_objspace.malloc_params.allocations--;
+#endif
+	return;
+    }
+
+    *entry = holder->freep;
+    holder->freep = entry;
+}
+
+static inline void*
+pool_alloc_entry(pool_header *header)
+{
+    pool_holder *holder = header->first;
+    void **result;
+    if (holder == NULL) {
+	holder = pool_holder_alloc(header);
+    }
+
+    result = holder->freep;
+    holder->freep = *result;
+
+    if (--holder->free == 0) {
+	pool_holder_unchaing(header, holder);
+    }
+
+    return result;
+}
+
+static void
+pool_finalize_header(pool_header *header)
+{
+    if (header->first) {
+        aligned_free(header->first);
+        header->first = NULL;
+    }
+}
+#endif
diff --git a/st.c b/st.c
index fda5784..20ec427 100644
--- a/st.c
+++ b/st.c
@@ -7,6 +7,7 @@
 #include "st.h"
 #else
 #include "ruby/ruby.h"
+#include "pool_alloc.h"
 #endif

 #include <stdio.h>
@@ -25,8 +26,17 @@ struct st_table_entry {
     st_table_entry *fore, *back;
 };

-#define ST_DEFAULT_MAX_DENSITY 5
+#define STATIC_ASSERT(name, expr) typedef int static_assert_##name##_check[(expr) ? 1 : -1];
+
+#define ST_DEFAULT_MAX_DENSITY 2
 #define ST_DEFAULT_INIT_TABLE_SIZE 11
+#define ST_DEFAULT_SECOND_TABLE_SIZE 19
+#define ST_DEFAULT_PACKED_TABLE_SIZE 19
+#define PACKED_UNIT (int)(sizeof(st_packed_entry) / sizeof(st_table_entry*))
+#define MAX_PACKED_HASH (int)(ST_DEFAULT_PACKED_TABLE_SIZE * sizeof(st_table_entry*) / sizeof(st_packed_entry))
+
+STATIC_ASSERT(st_packed_entry, sizeof(st_packed_entry) == sizeof(st_table_entry*[PACKED_UNIT]))
+STATIC_ASSERT(st_packed_bins, sizeof(st_packed_entry[MAX_PACKED_HASH]) <= sizeof(st_table_entry*[ST_DEFAULT_PACKED_TABLE_SIZE]))

     /*
      * DEFAULT_MAX_DENSITY is the default for the largest we allow the
@@ -38,7 +48,8 @@ struct st_table_entry {
      *
      */

-static const struct st_hash_type type_numhash = {
+#define type_numhash st_hashtype_num
+const struct st_hash_type st_hashtype_num = {
     st_numcmp,
     st_numhash,
 };
@@ -61,20 +72,128 @@ static void rehash(st_table *);
 #ifdef RUBY
 #define malloc xmalloc
 #define calloc xcalloc
+#define realloc xrealloc
 #define free(x) xfree(x)
 #endif

 #define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))

-#define alloc(type) (type*)malloc((size_t)sizeof(type))
-#define Calloc(n,s) (char*)calloc((n),(s))
-
 #define EQUAL(table,x,y) ((x)==(y) || (*(table)->type->compare)((x),(y)) == 0)

-/* remove cast to unsigned int in the future */
-#define do_hash(key,table) (unsigned int)(st_index_t)(*(table)->type->hash)((key))
+#define do_hash(key,table) (st_index_t)(*(table)->type->hash)((key))
 #define do_hash_bin(key,table) (do_hash((key), (table))%(table)->num_bins)

+/* preparation for possible allocation improvements */
+#ifdef POOL_ALLOC_API
+#define st_alloc_entry() (st_table_entry *)ruby_xpool_malloc_6p()
+#define st_free_entry(entry) ruby_xpool_free(entry)
+#define st_alloc_table() (st_table *)ruby_xpool_malloc_6p()
+#define st_dealloc_table(table) ruby_xpool_free(table)
+static inline st_table_entry **
+st_alloc_bins(st_index_t size)
+{
+    st_table_entry **result;
+    if (size == 11) {
+        result = (st_table_entry **) ruby_xpool_malloc_11p();
+        memset(result, 0, 11 * sizeof(st_table_entry *));
+    }
+    else
+        result = (st_table_entry **) ruby_xcalloc(size, sizeof(st_table_entry*));
+    return result;
+}
+static inline void
+st_free_bins(st_table_entry **bins, st_index_t size)
+{
+    if (size == 11)
+	ruby_xpool_free(bins);
+    else
+	ruby_xfree(bins);
+}
+static inline st_table_entry**
+st_realloc_bins(st_table_entry **bins, st_index_t newsize, st_index_t oldsize)
+{
+    st_table_entry **new_bins = st_alloc_bins(newsize);
+    st_free_bins(bins, oldsize);
+    return new_bins;
+}
+#else
+#define st_alloc_entry() (st_table_entry *)malloc(sizeof(st_table_entry))
+#define st_free_entry(entry) free(entry)
+#define st_alloc_table() (st_table *)malloc(sizeof(st_table))
+#define st_dealloc_table(table) free(table)
+#define st_alloc_bins(size) (st_table_entry **)calloc(size, sizeof(st_table_entry *))
+#define st_free_bins(bins, size) free(bins)
+static inline st_table_entry**
+st_realloc_bins(st_table_entry **bins, st_index_t newsize, st_index_t oldsize)
+{
+    bins = (st_table_entry **)realloc(bins, newsize * sizeof(st_table_entry *));
+    MEMZERO(bins, st_table_entry*, newsize);
+    return bins;
+}
+#endif
+
+/* Shortage */
+#define bins as.big.bins
+#define head as.big.head
+#define tail as.big.tail
+#define real_entries as.packed.real_entries
+
+/* preparation for possible packing improvements */
+#define PACKED_BINS(table) ((table)->as.packed.entries)
+#define PACKED_ENT(table, i) PACKED_BINS(table)[i]
+#define PKEY(table, i) PACKED_ENT((table), (i)).key
+#define PVAL(table, i) PACKED_ENT((table), (i)).val
+#define PHASH(table, i) PACKED_ENT((table), (i)).hash
+#define PKEY_SET(table, i, v) (PKEY((table), (i)) = (v))
+#define PVAL_SET(table, i, v) (PVAL((table), (i)) = (v))
+#define PHASH_SET(table, i, v) (PHASH((table), (i)) = (v))
+
+/* this function depends much on packed layout, so that it placed here */
+static inline void
+remove_packed_entry(st_table *table, st_index_t i)
+{
+    table->real_entries--;
+    table->num_entries--;
+    if (i < table->real_entries) {
+	MEMMOVE(&PACKED_ENT(table, i), &PACKED_ENT(table, i+1),
+		st_packed_entry, table->real_entries - i);
+    }
+}
+
+static inline void
+remove_safe_packed_entry(st_table *table, st_index_t i, st_data_t never)
+{
+    table->num_entries--;
+    PKEY_SET(table, i, never);
+    PVAL_SET(table, i, never);
+    PHASH_SET(table, i, 0);
+}
+
+/* ultrapacking */
+#define real_upacked num_bins
+#define MAX_UPACKED_HASH 1
+#define ULTRAPACKED(table) ((table)->real_upacked <= 1)
+#define UPACKED_ENT(table) ((table)->as.upacked)
+#define UPKEY(table) UPACKED_ENT(table).key
+#define UPVAL(table) UPACKED_ENT(table).val
+#define UPHASH(table) UPACKED_ENT(table).hash
+#define UPKEY_SET(table, v) (UPACKED_ENT(table).key = (v))
+#define UPVAL_SET(table, v) (UPACKED_ENT(table).val = (v))
+#define UPHASH_SET(table, v) (UPACKED_ENT(table).hash = (v))
+static inline void
+remove_upacked_entry(st_table *table)
+{
+    table->real_upacked = table->num_entries = 0;
+}
+
+static inline void
+remove_safe_upacked_entry(st_table *table, st_data_t never)
+{
+    table->num_entries = 0;
+    UPKEY_SET(table, never);
+    UPVAL_SET(table, never);
+    UPHASH_SET(table, 0);
+}
 /*
  * MINSIZE is the minimum size of a dictionary.
  */
@@ -85,8 +204,8 @@ static void rehash(st_table *);
 Table of prime numbers 2^n+a, 2<=n<=30.
 */
 static const unsigned int primes[] = {
-	8 + 3,
-	16 + 3,
+	ST_DEFAULT_INIT_TABLE_SIZE,
+	ST_DEFAULT_SECOND_TABLE_SIZE,
 	32 + 5,
 	64 + 3,
 	128 + 3,
@@ -161,8 +280,6 @@ stat_col(void)
 }
 #endif

-#define MAX_PACKED_NUMHASH (ST_DEFAULT_INIT_TABLE_SIZE/2)
-
 st_table*
 st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
 {
@@ -181,14 +298,19 @@ st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
     }
 #endif

-    size = new_size(size);	/* round up to prime number */

-    tbl = alloc(st_table);
+    tbl = st_alloc_table();
     tbl->type = type;
     tbl->num_entries = 0;
-    tbl->entries_packed = type == &type_numhash && size/2 <= MAX_PACKED_NUMHASH;
+    tbl->entries_packed = size <= MAX_PACKED_HASH;
+    if (tbl->entries_packed) {
+	size = size <= MAX_UPACKED_HASH ? 0 : ST_DEFAULT_PACKED_TABLE_SIZE;
+    }
+    else {
+	size = new_size(size);	/* round up to prime number */
+    }
     tbl->num_bins = size;
-    tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*));
+    tbl->bins = size ? st_alloc_bins(size) : 0;
     tbl->head = 0;
     tbl->tail = 0;

@@ -243,17 +365,23 @@ st_clear(st_table *table)
     register st_table_entry *ptr, *next;
     st_index_t i;

+    if (ULTRAPACKED(table)) {
+	remove_upacked_entry(table);
+	return;
+    }
+
     if (table->entries_packed) {
         table->num_entries = 0;
+        table->real_entries = 0;
         return;
     }

-    for(i = 0; i < table->num_bins; i++) {
+    for (i = 0; i < table->num_bins; i++) {
 	ptr = table->bins[i];
 	table->bins[i] = 0;
 	while (ptr != 0) {
 	    next = ptr->next;
-	    free(ptr);
+	    st_free_entry(ptr);
 	    ptr = next;
 	}
     }
@@ -266,14 +394,19 @@ void
 st_free_table(st_table *table)
 {
     st_clear(table);
-    free(table->bins);
-    free(table);
+    if (!ULTRAPACKED(table)) {
+	st_free_bins(table->bins, table->num_bins);
+    }
+    st_dealloc_table(table);
 }

 size_t
 st_memsize(const st_table *table)
 {
-    if (table->entries_packed) {
+    if (ULTRAPACKED(table)) {
+	return sizeof(st_table);
+    }
+    else if (table->entries_packed) {
 	return table->num_bins * sizeof (void *) + sizeof(st_table);
     }
     else {
@@ -306,46 +439,77 @@ count_collision(const struct st_hash_type *type)
 #define FOUND_ENTRY
 #endif

-#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\
-    (bin_pos) = (hash_val)%(table)->num_bins;\
-    (ptr) = (table)->bins[(bin_pos)];\
-    FOUND_ENTRY;\
-    if (PTR_NOT_EQUAL((table), (ptr), (hash_val), key)) {\
-	COLLISION;\
-	while (PTR_NOT_EQUAL((table), (ptr)->next, (hash_val), key)) {\
-	    (ptr) = (ptr)->next;\
-	}\
-	(ptr) = (ptr)->next;\
-    }\
-} while (0)
+#define FIND_ENTRY(table, ptr, hash_val, bin_pos) \
+    ((ptr) = find_entry((table), key, (hash_val), ((bin_pos) = (hash_val)%(table)->num_bins)))
+
+static st_table_entry *
+find_entry(st_table *table, st_data_t key, st_index_t hash_val, st_index_t bin_pos)
+{
+    register st_table_entry *ptr = table->bins[bin_pos];
+    FOUND_ENTRY;
+    if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {
+	COLLISION;
+	while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {
+	    ptr = ptr->next;
+	}
+	ptr = ptr->next;
+    }
+    return ptr;
+}
+
+static inline st_index_t
+find_packed_index(st_table *table, st_index_t hash_val, st_data_t key)
+{
+    st_index_t i = 0;
+    while (i < table->real_entries &&
+	   (PHASH(table, i) != hash_val || !EQUAL(table, key, PKEY(table, i)))) {
+	i++;
+    }
+    return i;
+}
+
+static inline int
+check_upacked(st_table *table, st_index_t hash_val, st_data_t key)
+{
+    return table->num_entries &&
+	UPHASH(table) == hash_val &&
+	EQUAL(table, key, UPKEY(table));
+}

 #define collision_check 0

 int
 st_lookup(st_table *table, register st_data_t key, st_data_t *value)
 {
-    st_index_t hash_val, bin_pos;
+    st_index_t hash_val;
     register st_table_entry *ptr;

+    hash_val = do_hash(key, table);
+
+    if (ULTRAPACKED(table)) {
+	if (check_upacked(table, hash_val, key)) {
+	    if (value != 0) *value = UPVAL(table);
+	    return 1;
+	}
+	return 0;
+    }
+
     if (table->entries_packed) {
-        st_index_t i;
-        for (i = 0; i < table->num_entries; i++) {
-            if ((st_data_t)table->bins[i*2] == key) {
-                if (value !=0) *value = (st_data_t)table->bins[i*2+1];
-                return 1;
-            }
-        }
+	st_index_t i = find_packed_index(table, hash_val, key);
+	if (i < table->real_entries) {
+	    if (value != 0) *value = PVAL(table, i);
+	    return 1;
+	}
         return 0;
     }

-    hash_val = do_hash(key, table);
-    FIND_ENTRY(table, ptr, hash_val, bin_pos);
+    ptr = find_entry(table, key, hash_val, hash_val % table->num_bins);

     if (ptr == 0) {
 	return 0;
     }
     else {
-	if (value != 0)  *value = ptr->record;
+	if (value != 0) *value = ptr->record;
 	return 1;
     }
 }
@@ -353,22 +517,29 @@ st_lookup(st_table *table, register st_data_t key, st_data_t *value)
 int
 st_get_key(st_table *table, register st_data_t key, st_data_t *result)
 {
-    st_index_t hash_val, bin_pos;
+    st_index_t hash_val;
     register st_table_entry *ptr;

+    hash_val = do_hash(key, table);
+
+    if (ULTRAPACKED(table)) {
+	if (check_upacked(table, hash_val, key)) {
+	    if (result != 0) *result = UPKEY(table);
+	    return 1;
+	}
+	return 0;
+    }
+
     if (table->entries_packed) {
-        st_index_t i;
-        for (i = 0; i < table->num_entries; i++) {
-            if ((st_data_t)table->bins[i*2] == key) {
-                if (result !=0) *result = (st_data_t)table->bins[i*2];
-                return 1;
-            }
-        }
+	st_index_t i = find_packed_index(table, hash_val, key);
+	if (i < table->real_entries) {
+	    if (result != 0) *result = PKEY(table, i);
+	    return 1;
+	}
         return 0;
     }

-    hash_val = do_hash(key, table);
-    FIND_ENTRY(table, ptr, hash_val, bin_pos);
+    ptr = find_entry(table, key, hash_val, hash_val % table->num_bins);

     if (ptr == 0) {
 	return 0;
@@ -382,85 +553,151 @@ st_get_key(st_table *table, register st_data_t key, st_data_t *result)
 #undef collision_check
 #define collision_check 1

-#define MORE_PACKABLE_P(table) \
-    ((st_index_t)((table)->num_entries+1) * 2 <= (table)->num_bins && \
-     (table)->num_entries+1 <= MAX_PACKED_NUMHASH)
-
-#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
-do {\
-    st_table_entry *entry;\
-    if ((table)->num_entries > ST_DEFAULT_MAX_DENSITY * (table)->num_bins) {\
-	rehash(table);\
-        (bin_pos) = (hash_val) % (table)->num_bins;\
-    }\
-    \
-    entry = alloc(st_table_entry);\
-    \
-    entry->hash = (hash_val);\
-    entry->key = (key);\
-    entry->record = (value);\
-    entry->next = (table)->bins[(bin_pos)];\
-    if ((table)->head != 0) {\
-	entry->fore = 0;\
-	(entry->back = (table)->tail)->fore = entry;\
-	(table)->tail = entry;\
-    }\
-    else {\
-	(table)->head = (table)->tail = entry;\
-	entry->fore = entry->back = 0;\
-    }\
-    (table)->bins[(bin_pos)] = entry;\
-    (table)->num_entries++;\
-} while (0)
+static inline st_table_entry *
+new_entry(st_table * table, st_data_t key, st_data_t value,
+	st_index_t hash_val, register st_index_t bin_pos)
+{
+    register st_table_entry *entry = st_alloc_entry();
+
+    entry->next = table->bins[bin_pos];
+    table->bins[bin_pos] = entry;
+    entry->hash = hash_val;
+    entry->key = key;
+    entry->record = value;
+
+    return entry;
+}
+
+static inline void
+add_direct(st_table *table, st_data_t key, st_data_t value,
+	   st_index_t hash_val, register st_index_t bin_pos)
+{
+    register st_table_entry *entry;
+    if (table->num_entries > ST_DEFAULT_MAX_DENSITY * table->num_bins) {
+	rehash(table);
+        bin_pos = hash_val % table->num_bins;
+    }
+
+    entry = new_entry(table, key, value, hash_val, bin_pos);
+
+    if (table->head != 0) {
+	entry->fore = 0;
+	(entry->back = table->tail)->fore = entry;
+	table->tail = entry;
+    }
+    else {
+	table->head = table->tail = entry;
+	entry->fore = entry->back = 0;
+    }
+    table->num_entries++;
+}

 static void
 unpack_entries(register st_table *table)
 {
     st_index_t i;
-    struct st_table_entry *packed_bins[MAX_PACKED_NUMHASH*2];
+    st_packed_entry packed_bins[MAX_PACKED_HASH];
+    register st_table_entry *entry, *preventry = 0, **chain;
     st_table tmp_table = *table;

-    memcpy(packed_bins, table->bins, sizeof(struct st_table_entry *) * table->num_entries*2);
-    table->bins = packed_bins;
+    MEMCPY(packed_bins, PACKED_BINS(table), st_packed_entry, MAX_PACKED_HASH);
+    table->as.packed.entries = packed_bins;
     tmp_table.entries_packed = 0;
-    tmp_table.num_entries = 0;
-    memset(tmp_table.bins, 0, sizeof(struct st_table_entry *) * tmp_table.num_bins);
-    for (i = 0; i < table->num_entries; i++) {
-        st_insert(&tmp_table, (st_data_t)packed_bins[i*2], (st_data_t)packed_bins[i*2+1]);
-    }
+#if ST_DEFAULT_INIT_TABLE_SIZE == ST_DEFAULT_PACKED_TABLE_SIZE
+    MEMZERO(tmp_table.bins, st_table_entry*, tmp_table.num_bins);
+#else
+    tmp_table.bins = st_realloc_bins(tmp_table.bins, ST_DEFAULT_INIT_TABLE_SIZE, tmp_table.num_bins);
+    tmp_table.num_bins = ST_DEFAULT_INIT_TABLE_SIZE;
+#endif
+    i = 0;
+    chain = &tmp_table.head;
+    do {
+	st_data_t key = packed_bins[i].key;
+	st_data_t val = packed_bins[i].val;
+	st_index_t hash = packed_bins[i].hash;
+	entry = new_entry(&tmp_table, key, val, hash,
+			  hash % ST_DEFAULT_INIT_TABLE_SIZE);
+	*chain = entry;
+	entry->back = preventry;
+	preventry = entry;
+	chain = &entry->fore;
+    } while (++i < MAX_PACKED_HASH);
+    *chain = NULL;
+    tmp_table.tail = entry;
     *table = tmp_table;
 }

+static void
+add_packed_direct(st_table *table, st_data_t key, st_data_t value, st_index_t hash_val)
+{
+    if (table->real_entries < MAX_PACKED_HASH) {
+	st_index_t i = table->real_entries++;
+	PKEY_SET(table, i, key);
+	PVAL_SET(table, i, value);
+	PHASH_SET(table, i, hash_val);
+	table->num_entries++;
+    }
+    else {
+	unpack_entries(table);
+	add_direct(table, key, value, hash_val, hash_val % table->num_bins);
+    }
+}
+
+static void
+add_upacked_direct(register st_table *table, register st_data_t key, st_data_t value, st_index_t hash_val)
+{
+    if (table->real_upacked) {
+	st_packed_entry *entries = (st_packed_entry *) st_alloc_bins(ST_DEFAULT_PACKED_TABLE_SIZE);
+	entries[0] = UPACKED_ENT(table);
+	entries[1].hash = hash_val;
+	entries[1].key = key;
+	entries[1].val = value;
+	table->num_bins = ST_DEFAULT_PACKED_TABLE_SIZE;
+	table->real_entries = 2;
+	table->num_entries++;
+	table->as.packed.entries = entries;
+    }
+    else {
+	table->real_upacked = 1;
+	table->num_entries = 1;
+	UPHASH_SET(table, hash_val);
+	UPKEY_SET(table, key);
+	UPVAL_SET(table, value);
+    }
+}
+
 int
 st_insert(register st_table *table, register st_data_t key, st_data_t value)
 {
-    st_index_t hash_val, bin_pos;
+    st_index_t hash_val;
+    register st_index_t bin_pos;
     register st_table_entry *ptr;

+    hash_val = do_hash(key, table);
+
+    if (ULTRAPACKED(table)) {
+	if (check_upacked(table, hash_val, key)) {
+	    UPVAL_SET(table, value);
+	    return 1;
+	}
+	add_upacked_direct(table, key, value, hash_val);
+	return 0;
+    }
+
     if (table->entries_packed) {
-        st_index_t i;
-        for (i = 0; i < table->num_entries; i++) {
-            if ((st_data_t)table->bins[i*2] == key) {
-                table->bins[i*2+1] = (struct st_table_entry*)value;
-                return 1;
-            }
-        }
-        if (MORE_PACKABLE_P(table)) {
-            i = table->num_entries++;
-            table->bins[i*2] = (struct st_table_entry*)key;
-            table->bins[i*2+1] = (struct st_table_entry*)value;
-            return 0;
-        }
-        else {
-            unpack_entries(table);
+	st_index_t i = find_packed_index(table, hash_val, key);
+	if (i < table->real_entries) {
+	    PVAL_SET(table, i, value);
+	    return 1;
         }
+	add_packed_direct(table, key, value, hash_val);
+	return 0;
     }

-    hash_val = do_hash(key, table);
     FIND_ENTRY(table, ptr, hash_val, bin_pos);

     if (ptr == 0) {
-	ADD_DIRECT(table, key, value, hash_val, bin_pos);
+	add_direct(table, key, value, hash_val, bin_pos);
 	return 0;
     }
     else {
@@ -473,34 +710,38 @@ int
 st_insert2(register st_table *table, register st_data_t key, st_data_t value,
 	   st_data_t (*func)(st_data_t))
 {
-    st_index_t hash_val, bin_pos;
+    st_index_t hash_val;
+    register st_index_t bin_pos;
     register st_table_entry *ptr;

+    hash_val = do_hash(key, table);
+
+    if (ULTRAPACKED(table)) {
+	if (check_upacked(table, hash_val, key)) {
+	    UPVAL_SET(table, value);
+	    return 1;
+	}
+	key = (*func)(key);
+	add_upacked_direct(table, key, value, hash_val);
+	return 0;
+    }
+
     if (table->entries_packed) {
-        st_index_t i;
-        for (i = 0; i < table->num_entries; i++) {
-            if ((st_data_t)table->bins[i*2] == key) {
-                table->bins[i*2+1] = (struct st_table_entry*)value;
-                return 1;
-            }
-        }
-        if (MORE_PACKABLE_P(table)) {
-            i = table->num_entries++;
-            table->bins[i*2] = (struct st_table_entry*)key;
-            table->bins[i*2+1] = (struct st_table_entry*)value;
-            return 0;
-        }
-        else {
-            unpack_entries(table);
-        }
+	st_index_t i = find_packed_index(table, hash_val, key);
+	if (i < table->real_entries) {
+	    PVAL_SET(table, i, value);
+	    return 1;
+	}
+	key = (*func)(key);
+	add_packed_direct(table, key, value, hash_val);
+	return 0;
     }

-    hash_val = do_hash(key, table);
     FIND_ENTRY(table, ptr, hash_val, bin_pos);

     if (ptr == 0) {
 	key = (*func)(key);
-	ADD_DIRECT(table, key, value, hash_val, bin_pos);
+	add_direct(table, key, value, hash_val, bin_pos);
 	return 0;
     }
     else {
@@ -512,36 +753,30 @@ st_insert2(register st_table *table, register st_data_t key, st_data_t value,
 void
 st_add_direct(st_table *table, st_data_t key, st_data_t value)
 {
-    st_index_t hash_val, bin_pos;
+    st_index_t hash_val;
+
+    hash_val = do_hash(key, table);
+    if (ULTRAPACKED(table)) {
+	add_upacked_direct(table, key, value, hash_val);
+	return;
+    }

     if (table->entries_packed) {
-        int i;
-        if (MORE_PACKABLE_P(table)) {
-            i = table->num_entries++;
-            table->bins[i*2] = (struct st_table_entry*)key;
-            table->bins[i*2+1] = (struct st_table_entry*)value;
-            return;
-        }
-        else {
-            unpack_entries(table);
-        }
+	add_packed_direct(table, key, value, hash_val);
+	return;
     }

-    hash_val = do_hash(key, table);
-    bin_pos = hash_val % table->num_bins;
-    ADD_DIRECT(table, key, value, hash_val, bin_pos);
+    add_direct(table, key, value, hash_val, hash_val % table->num_bins);
 }

 static void
 rehash(register st_table *table)
 {
     register st_table_entry *ptr, **new_bins;
-    st_index_t i, new_num_bins, hash_val;
+    st_index_t new_num_bins, hash_val;

     new_num_bins = new_size(table->num_bins+1);
-    new_bins = (st_table_entry**)
-	xrealloc(table->bins, new_num_bins * sizeof(st_table_entry*));
-    for (i = 0; i < new_num_bins; ++i) new_bins[i] = 0;
+    new_bins = st_realloc_bins(table->bins, new_num_bins, table->num_bins);
     table->num_bins = new_num_bins;
     table->bins = new_bins;

@@ -558,34 +793,37 @@ st_table*
 st_copy(st_table *old_table)
 {
     st_table *new_table;
-    st_table_entry *ptr, *entry, *prev, **tail;
+    st_table_entry *ptr, *entry, *prev, **tailp;
     st_index_t num_bins = old_table->num_bins;
     st_index_t hash_val;

-    new_table = alloc(st_table);
+    new_table = st_alloc_table();
     if (new_table == 0) {
 	return 0;
     }

     *new_table = *old_table;
-    new_table->bins = (st_table_entry**)
-	Calloc((unsigned)num_bins, sizeof(st_table_entry*));
+    if (ULTRAPACKED(old_table)) {
+	return new_table;
+    }
+
+    new_table->bins = st_alloc_bins(num_bins);

     if (new_table->bins == 0) {
-	free(new_table);
+	st_dealloc_table(new_table);
 	return 0;
     }

     if (old_table->entries_packed) {
-        memcpy(new_table->bins, old_table->bins, sizeof(struct st_table_entry *) * old_table->num_bins);
+        MEMCPY(new_table->bins, old_table->bins, st_table_entry*, old_table->num_bins);
         return new_table;
     }

     if ((ptr = old_table->head) != 0) {
 	prev = 0;
-	tail = &new_table->head;
+	tailp = &new_table->head;
 	do {
-	    entry = alloc(st_table_entry);
+	    entry = st_alloc_entry();
 	    if (entry == 0) {
 		st_free_table(new_table);
 		return 0;
@@ -595,8 +833,8 @@ st_copy(st_table *old_table)
 	    entry->next = new_table->bins[hash_val];
 	    new_table->bins[hash_val] = entry;
 	    entry->back = prev;
-	    *tail = prev = entry;
-	    tail = &entry->fore;
+	    *tailp = prev = entry;
+	    tailp = &entry->fore;
 	} while ((ptr = ptr->fore) != 0);
 	new_table->tail = prev;
     }
@@ -604,21 +842,22 @@ st_copy(st_table *old_table)
     return new_table;
 }

-#define REMOVE_ENTRY(table, ptr) do					\
-    {									\
-	if ((ptr)->fore == 0 && (ptr)->back == 0) {			\
-	    (table)->head = 0;						\
-	    (table)->tail = 0;						\
-	}								\
-	else {								\
-	    st_table_entry *fore = (ptr)->fore, *back = (ptr)->back;	\
-	    if (fore) fore->back = back;				\
-	    if (back) back->fore = fore;				\
-	    if ((ptr) == (table)->head) (table)->head = fore;		\
-	    if ((ptr) == (table)->tail) (table)->tail = back;		\
-	}								\
-	(table)->num_entries--;						\
-    } while (0)
+static inline void
+remove_entry(st_table *table, st_table_entry *ptr)
+{
+    if (ptr->fore == 0 && ptr->back == 0) {
+	table->head = 0;
+	table->tail = 0;
+    }
+    else {
+	st_table_entry *fore = ptr->fore, *back = ptr->back;
+	if (fore) fore->back = back;
+	if (back) back->fore = fore;
+	if (ptr == table->head) table->head = fore;
+	if (ptr == table->tail) table->tail = back;
+    }
+    table->num_entries--;
+}

 int
 st_delete(register st_table *table, register st_data_t *key, st_data_t *value)
@@ -627,30 +866,38 @@ st_delete(register st_table *table, register st_data_t *key, st_data_t *value)
     st_table_entry **prev;
     register st_table_entry *ptr;

+    hash_val = do_hash(*key, table);
+
+    if (ULTRAPACKED(table)) {
+	if (check_upacked(table, hash_val, *key)) {
+	    if (value != 0) *value = UPVAL(table);
+	    *key = UPKEY(table);
+	    remove_upacked_entry(table);
+	    return 1;
+	}
+	return 0;
+    }
+
     if (table->entries_packed) {
-        st_index_t i;
-        for (i = 0; i < table->num_entries; i++) {
-            if ((st_data_t)table->bins[i*2] == *key) {
-                if (value != 0) *value = (st_data_t)table->bins[i*2+1];
-                table->num_entries--;
-                memmove(&table->bins[i*2], &table->bins[(i+1)*2],
-                        sizeof(struct st_table_entry*) * 2*(table->num_entries-i));
-                return 1;
-            }
+	st_index_t i = find_packed_index(table, hash_val, *key);
+	if (i < table->real_entries) {
+	    if (value != 0) *value = PVAL(table, i);
+	    *key = PKEY(table, i);
+	    remove_packed_entry(table, i);
+	    return 1;
         }
         if (value != 0) *value = 0;
         return 0;
     }

-    hash_val = do_hash_bin(*key, table);
-
-    for (prev = &table->bins[hash_val]; (ptr = *prev) != 0; prev = &ptr->next) {
+    prev = &table->bins[hash_val % table->num_bins];
+    for (;(ptr = *prev) != 0; prev = &ptr->next) {
 	if (EQUAL(table, *key, ptr->key)) {
 	    *prev = ptr->next;
-	    REMOVE_ENTRY(table, ptr);
+	    remove_entry(table, ptr);
 	    if (value != 0) *value = ptr->record;
 	    *key = ptr->key;
-	    free(ptr);
+	    st_free_entry(ptr);
 	    return 1;
 	}
     }
@@ -665,25 +912,36 @@ st_delete_safe(register st_table *table, register st_data_t *key, st_data_t *val
     st_index_t hash_val;
     register st_table_entry *ptr;

+    hash_val = do_hash(*key, table);
+
+    if (ULTRAPACKED(table)) {
+	if (check_upacked(table, hash_val, *key)) {
+	    if (value != 0) *value = UPVAL(table);
+	    *key = UPKEY(table);
+	    remove_safe_upacked_entry(table, never);
+	    return 1;
+	}
+	if (value != 0) *value = 0;
+	return 0;
+    }
+
     if (table->entries_packed) {
-	st_index_t i;
-	for (i = 0; i < table->num_entries; i++) {
-	    if ((st_data_t)table->bins[i*2] == *key) {
-		if (value != 0) *value = (st_data_t)table->bins[i*2+1];
-		table->bins[i*2] = (void *)never;
-		return 1;
-	    }
+	st_index_t i = find_packed_index(table, hash_val, *key);
+	if (i < table->real_entries) {
+	    if (value != 0) *value = PVAL(table, i);
+	    *key = PKEY(table, i);
+	    remove_safe_packed_entry(table, i, never);
+	    return 1;
 	}
 	if (value != 0) *value = 0;
 	return 0;
     }

-    hash_val = do_hash_bin(*key, table);
-    ptr = table->bins[hash_val];
+    ptr = table->bins[hash_val % table->num_bins];

     for (; ptr != 0; ptr = ptr->next) {
 	if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) {
-	    REMOVE_ENTRY(table, ptr);
+	    remove_entry(table, ptr);
 	    *key = ptr->key;
 	    if (value != 0) *value = ptr->record;
 	    ptr->key = ptr->record = never;
@@ -701,17 +959,23 @@ st_cleanup_safe(st_table *table, st_data_t never)
     st_table_entry *ptr, **last, *tmp;
     st_index_t i;

+    if (ULTRAPACKED(table)) {
+	table->real_upacked = table->num_entries;
+	return;
+    }
+
     if (table->entries_packed) {
 	st_index_t i = 0, j = 0;
-	while ((st_data_t)table->bins[i*2] != never) {
-	    if (i++ == table->num_entries) return;
+	while (PKEY(table, i) != never) {
+	    if (i++ == table->real_entries) return;
 	}
-	for (j = i; ++i < table->num_entries;) {
-	    if ((st_data_t)table->bins[i*2] == never) continue;
-	    table->bins[j*2] = table->bins[i*2];
-	    table->bins[j*2+1] = table->bins[i*2+1];
+	for (j = i; ++i < table->real_entries;) {
+	    if (PKEY(table, i) == never) continue;
+	    PACKED_ENT(table, j) = PACKED_ENT(table, i);
 	    j++;
 	}
+	table->real_entries = j;
+	/* table->num_entries really should be equal j at this moment, but let set it anyway */
 	table->num_entries = j;
 	return;
     }
@@ -722,7 +986,7 @@ st_cleanup_safe(st_table *table, st_data_t never)
 	    if (ptr->key == never) {
 		tmp = ptr;
 		*last = ptr = ptr->next;
-		free(tmp);
+		st_free_entry(tmp);
 	    }
 	    else {
 		ptr = *(last = &ptr->next);
@@ -732,50 +996,78 @@ st_cleanup_safe(st_table *table, st_data_t never)
 }

 int
-st_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
+st_foreach_check(st_table *table, int (*func)(ANYARGS), st_data_t arg, st_data_t never)
 {
     st_table_entry *ptr, **last, *tmp;
     enum st_retval retval;
-    st_index_t i;
+    st_data_t key, val;
+    st_index_t hash, i = 0;
+
+    if (table->num_entries == 0) {
+	return 0;
+    }
+
+    if (ULTRAPACKED(table)) {
+	key = UPKEY(table);
+	val = UPVAL(table);
+	hash = UPHASH(table);
+	if (key == never) return 0;
+	retval = (*func)(key, val, arg);
+	if (!ULTRAPACKED(table)) {
+	    goto packed;
+	}
+	switch(retval) {
+	  case ST_CHECK:
+	    if (UPHASH(table) == 0 && UPKEY(table) == never)
+		break;
+	    if (check_upacked(table, hash, key))
+		break;
+	    goto deleted;
+	  case ST_DELETE:
+	    remove_safe_upacked_entry(table, never);
+	  case ST_CONTINUE:
+	  case ST_STOP:
+	    break;
+	}
+	return 0;
+    }

     if (table->entries_packed) {
-        for (i = 0; i < table->num_entries; i++) {
-            st_index_t j;
-            st_data_t key, val;
-            key = (st_data_t)table->bins[i*2];
-            val = (st_data_t)table->bins[i*2+1];
-            retval = (*func)(key, val, arg);
+	for (i = 0; i < table->real_entries; i++) {
+	    key = PKEY(table, i);
+	    val = PVAL(table, i);
+	    hash = PHASH(table, i);
+	    if (key == never) continue;
+	    retval = (*func)(key, val, arg);
+	  packed:
 	    if (!table->entries_packed) {
-		FIND_ENTRY(table, ptr, key, i);
+		FIND_ENTRY(table, ptr, hash, i);
 		if (retval == ST_CHECK) {
 		    if (!ptr) goto deleted;
 		    goto unpacked_continue;
 		}
 		goto unpacked;
 	    }
-            switch (retval) {
+	    switch (retval) {
 	      case ST_CHECK:	/* check if hash is modified during iteration */
-                for (j = 0; j < table->num_entries; j++) {
-                    if ((st_data_t)table->bins[j*2] == key)
-                        break;
-                }
-                if (j == table->num_entries) {
+		if (PHASH(table, i) == 0 && PKEY(table, i) == never) {
+		    break;
+		}
+		i = find_packed_index(table, hash, key);
+		if (i == table->real_entries) {
 		    goto deleted;
-                }
+		}
 		/* fall through */
 	      case ST_CONTINUE:
 		break;
 	      case ST_STOP:
 		return 0;
 	      case ST_DELETE:
-                table->num_entries--;
-                memmove(&table->bins[i*2], &table->bins[(i+1)*2],
-                        sizeof(struct st_table_entry*) * 2*(table->num_entries-i));
-                i--;
-                break;
-            }
-        }
-        return 0;
+		remove_safe_packed_entry(table, i, never);
+		break;
+	    }
+	}
+	return 0;
     }
     else {
 	ptr = table->head;
@@ -783,6 +1075,8 @@ st_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)

     if (ptr != 0) {
 	do {
+	    if (ptr->key == never)
+		goto unpacked_continue;
 	    i = ptr->hash % table->num_bins;
 	    retval = (*func)(ptr->key, ptr->record, arg);
 	  unpacked:
@@ -808,10 +1102,100 @@ st_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
 		for (; (tmp = *last) != 0; last = &tmp->next) {
 		    if (ptr == tmp) {
 			tmp = ptr->fore;
+			remove_entry(table, ptr);
+			ptr->key = ptr->record = never;
+			ptr->hash = 0;
+			ptr = tmp;
+			break;
+		    }
+		}
+	    }
+	} while (ptr && table->head);
+    }
+    return 0;
+}
+
+int
+st_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
+{
+    st_table_entry *ptr, **last, *tmp;
+    enum st_retval retval;
+    st_data_t key, val;
+    st_index_t hash, i = 0;
+
+    if (table->num_entries == 0) {
+	return 0;
+    }
+
+    if (ULTRAPACKED(table)) {
+	key = UPKEY(table);
+	val = UPVAL(table);
+	hash = UPHASH(table);
+	retval = (*func)(key, val, arg);
+	if (!ULTRAPACKED(table)) {
+	    goto packed;
+	}
+	switch (retval) {
+	  case ST_DELETE:
+	    remove_upacked_entry(table);
+	  case ST_CONTINUE:
+	  case ST_CHECK:
+	  case ST_STOP:
+	    break;
+	}
+	return 0;
+    }
+
+    if (table->entries_packed) {
+	for (i = 0; i < table->real_entries; i++) {
+	    key = PKEY(table, i);
+	    val = PVAL(table, i);
+	    hash = PHASH(table, i);
+	    retval = (*func)(key, val, arg);
+	  packed:
+	    if (!table->entries_packed) {
+		FIND_ENTRY(table, ptr, hash, i);
+		if (!ptr) return 0;
+		goto unpacked;
+	    }
+	    switch (retval) {
+	      case ST_CONTINUE:
+		break;
+	      case ST_CHECK:
+	      case ST_STOP:
+		return 0;
+	      case ST_DELETE:
+		remove_packed_entry(table, i);
+		i--;
+		break;
+	    }
+	}
+	return 0;
+    }
+    else {
+	ptr = table->head;
+    }
+
+    if (ptr != 0) {
+	do {
+	    i = ptr->hash % table->num_bins;
+	    retval = (*func)(ptr->key, ptr->record, arg);
+	  unpacked:
+	    switch (retval) {
+	      case ST_CONTINUE:
+		ptr = ptr->fore;
+		break;
+	      case ST_CHECK:
+	      case ST_STOP:
+		return 0;
+	      case ST_DELETE:
+		last = &table->bins[ptr->hash % table->num_bins];
+		for (; (tmp = *last) != 0; last = &tmp->next) {
+		    if (ptr == tmp) {
+			tmp = ptr->fore;
 			*last = ptr->next;
-			REMOVE_ENTRY(table, ptr);
-			free(ptr);
-			if (ptr == tmp) return 0;
+			remove_entry(table, ptr);
+			st_free_entry(ptr);
 			ptr = tmp;
 			break;
 		    }
@@ -834,13 +1218,13 @@ st_reverse_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
         for (i = table->num_entries-1; 0 <= i; i--) {
             int j;
             st_data_t key, val;
-            key = (st_data_t)table->bins[i*2];
-            val = (st_data_t)table->bins[i*2+1];
+            key = PKEY(table, i);
+            val = PVAL(table, i);
             retval = (*func)(key, val, arg);
             switch (retval) {
 	      case ST_CHECK:	/* check if hash is modified during iteration */
                 for (j = 0; j < table->num_entries; j++) {
-                    if ((st_data_t)table->bins[j*2] == key)
+                    if (PKEY(table, j) == key)
                         break;
                 }
                 if (j == table->num_entries) {
@@ -854,9 +1238,7 @@ st_reverse_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
 	      case ST_STOP:
 		return 0;
 	      case ST_DELETE:
-                table->num_entries--;
-                memmove(&table->bins[i*2], &table->bins[(i+1)*2],
-                        sizeof(struct st_table_entry*) * 2*(table->num_entries-i));
+		remove_packed_entry(table, i);
                 break;
             }
         }
@@ -889,8 +1271,8 @@ st_reverse_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
 		    if (ptr == tmp) {
 			tmp = ptr->back;
 			*last = ptr->next;
-			REMOVE_ENTRY(table, ptr);
-			free(ptr);
+			remove_entry(table, ptr);
+			st_free_entry(ptr);
 			ptr = tmp;
 			break;
 		    }

## 03-sparse_array.diff
diff --git a/class.c b/class.c
index 450c334..6e97cb0 100644
--- a/class.c
+++ b/class.c
@@ -31,7 +31,7 @@
 #include "internal.h"
 #include <ctype.h>

-extern st_table *rb_class_tbl;
+extern sa_table rb_class_tbl;
 static ID id_attached;

 /**
@@ -50,14 +50,24 @@ static VALUE
 class_alloc(VALUE flags, VALUE klass)
 {
     rb_classext_t *ext = ALLOC(rb_classext_t);
+    rb_class_cache_t *cache = ALLOC(rb_class_cache_t);
     NEWOBJ(obj, struct RClass);
     OBJSETUP(obj, klass, flags);
     obj->ptr = ext;
-    RCLASS_IV_TBL(obj) = 0;
-    RCLASS_CONST_TBL(obj) = 0;
-    RCLASS_M_TBL(obj) = 0;
-    RCLASS_SUPER(obj) = 0;
-    RCLASS_IV_INDEX_TBL(obj) = 0;
+    obj->cache = cache;
+    MEMZERO(ext, struct rb_classext_struct, 1);
+    MEMZERO(cache, struct rb_class_cache_struct, 1);
+    return (VALUE)obj;
+}
+
+static VALUE
+iclass_alloc()
+{
+    rb_class_cache_t *cache = ALLOC(rb_class_cache_t);
+    NEWOBJ(obj, struct RClass);
+    OBJSETUP(obj, rb_cClass, T_ICLASS);
+    obj->cache = cache;
+    MEMZERO(cache, struct rb_class_cache_struct, 1);
     return (VALUE)obj;
 }

@@ -77,7 +87,6 @@ rb_class_boot(VALUE super)
     VALUE klass = class_alloc(T_CLASS, rb_cClass);

     RCLASS_SUPER(klass) = super;
-    RCLASS_M_TBL(klass) = st_init_numtable();

     OBJ_INFECT(klass, super);
     return (VALUE)klass;
@@ -120,85 +129,59 @@ rb_class_new(VALUE super)
     return rb_class_boot(super);
 }

-struct clone_method_data {
-    st_table *tbl;
-    VALUE klass;
-};
-
 VALUE rb_iseq_clone(VALUE iseqval, VALUE newcbase);

-static int
-clone_method(ID mid, const rb_method_entry_t *me, struct clone_method_data *data)
+static void
+clone_method(ID mid, const rb_method_entry_t *me, VALUE klass)
 {
     VALUE newiseqval;
     if (me->def && me->def->type == VM_METHOD_TYPE_ISEQ) {
 	rb_iseq_t *iseq;
-	newiseqval = rb_iseq_clone(me->def->body.iseq->self, data->klass);
+	newiseqval = rb_iseq_clone(me->def->body.iseq->self, klass);
 	GetISeqPtr(newiseqval, iseq);
-	rb_add_method(data->klass, mid, VM_METHOD_TYPE_ISEQ, iseq, me->flag);
+	rb_add_method(klass, mid, VM_METHOD_TYPE_ISEQ, iseq, me->flag);
 	RB_GC_GUARD(newiseqval);
     }
     else {
-	rb_method_entry_set(data->klass, mid, me, me->flag);
+	rb_method_entry_set(klass, mid, me, me->flag);
     }
-    return ST_CONTINUE;
 }

-static int
-clone_const(ID key, const rb_const_entry_t *ce, st_table *tbl)
+static void
+clone_const(sa_index_t key, st_data_t ce, sa_table *tbl)
 {
     rb_const_entry_t *nce = ALLOC(rb_const_entry_t);
-    *nce = *ce;
-    st_insert(tbl, key, (st_data_t)nce);
-    return ST_CONTINUE;
-}
-
-static int
-clone_const_i(st_data_t key, st_data_t value, st_data_t data)
-{
-    return clone_const((ID)key, (const rb_const_entry_t *)value, (st_table *)data);
+    *nce = *(const rb_const_entry_t*)ce;
+    sa_insert(tbl, (sa_index_t)key, (st_data_t)nce);
 }

 /* :nodoc: */
 VALUE
 rb_mod_init_copy(VALUE clone, VALUE orig)
 {
+    ID id;
     rb_obj_init_copy(clone, orig);
     if (!FL_TEST(CLASS_OF(clone), FL_SINGLETON)) {
 	RBASIC(clone)->klass = rb_singleton_class_clone(orig);
 	rb_singleton_class_attached(RBASIC(clone)->klass, (VALUE)clone);
     }
     RCLASS_SUPER(clone) = RCLASS_SUPER(orig);
-    if (RCLASS_IV_TBL(orig)) {
-	st_data_t id;

-	if (RCLASS_IV_TBL(clone)) {
-	    st_free_table(RCLASS_IV_TBL(clone));
-	}
-	RCLASS_IV_TBL(clone) = st_copy(RCLASS_IV_TBL(orig));
-	CONST_ID(id, "__classpath__");
-	st_delete(RCLASS_IV_TBL(clone), &id, 0);
-	CONST_ID(id, "__classid__");
-	st_delete(RCLASS_IV_TBL(clone), &id, 0);
-    }
-    if (RCLASS_CONST_TBL(orig)) {
-	if (RCLASS_CONST_TBL(clone)) {
-	    rb_free_const_table(RCLASS_CONST_TBL(clone));
-	}
-	RCLASS_CONST_TBL(clone) = st_init_numtable();
-	st_foreach(RCLASS_CONST_TBL(orig), clone_const_i, (st_data_t)RCLASS_CONST_TBL(clone));
-    }
-    if (RCLASS_M_TBL(orig)) {
-	struct clone_method_data data;
+    sa_copy_to(RCLASS_IV_TBL(orig), RCLASS_IV_TBL(clone));
+    CONST_ID(id, "__classpath__");
+    sa_delete(RCLASS_IV_TBL(clone), (sa_index_t)id, 0);
+    CONST_ID(id, "__classid__");
+    sa_delete(RCLASS_IV_TBL(clone), (sa_index_t)id, 0);

-	if (RCLASS_M_TBL(clone)) {
-	    rb_free_m_table(RCLASS_M_TBL(clone));
-	}
-	data.tbl = RCLASS_M_TBL(clone) = st_init_numtable();
-	data.klass = clone;
-	st_foreach(RCLASS_M_TBL(orig), clone_method,
-		   (st_data_t)&data);
-    }
+    sa_clear(RCLASS_CONST_TBL(clone));
+    SA_FOREACH_START(RCLASS_CONST_TBL(orig));
+    clone_const(entry->key, value, RCLASS_CONST_TBL(clone));
+    SA_FOREACH_END();
+
+    rb_free_m_table(RCLASS_M_TBL(clone));
+    SA_FOREACH_START(RCLASS_M_TBL(orig));
+    clone_method(entry->key, (const rb_method_entry_t *)value, clone);
+    SA_FOREACH_END();

     return clone;
 }
@@ -227,7 +210,6 @@ rb_singleton_class_clone(VALUE obj)
     if (!FL_TEST(klass, FL_SINGLETON))
 	return klass;
     else {
-	struct clone_method_data data;
 	/* copy singleton(unnamed) class */
 	VALUE clone = class_alloc((RBASIC(klass)->flags & ~(FL_MARK)), 0);

@@ -239,18 +221,16 @@ rb_singleton_class_clone(VALUE obj)
 	}

 	RCLASS_SUPER(clone) = RCLASS_SUPER(klass);
-	if (RCLASS_IV_TBL(klass)) {
-	    RCLASS_IV_TBL(clone) = st_copy(RCLASS_IV_TBL(klass));
-	}
-	if (RCLASS_CONST_TBL(klass)) {
-	    RCLASS_CONST_TBL(clone) = st_init_numtable();
-	    st_foreach(RCLASS_CONST_TBL(klass), clone_const_i, (st_data_t)RCLASS_CONST_TBL(clone));
-	}
-	RCLASS_M_TBL(clone) = st_init_numtable();
-	data.tbl = RCLASS_M_TBL(clone);
-	data.klass = (VALUE)clone;
-	st_foreach(RCLASS_M_TBL(klass), clone_method,
-		   (st_data_t)&data);
+        sa_copy_to(RCLASS_IV_TBL(klass), RCLASS_IV_TBL(clone));
+
+        SA_FOREACH_START(RCLASS_CONST_TBL(klass));
+        clone_const(entry->key, value, RCLASS_CONST_TBL(clone));
+        SA_FOREACH_END();
+
+        SA_FOREACH_START(RCLASS_M_TBL(klass));
+        clone_method(entry->key, (const rb_method_entry_t*)value, clone);
+        SA_FOREACH_END();
+
 	rb_singleton_class_attached(RBASIC(clone)->klass, (VALUE)clone);
 	FL_SET(clone, FL_SINGLETON);
 	return (VALUE)clone;
@@ -265,10 +245,7 @@ void
 rb_singleton_class_attached(VALUE klass, VALUE obj)
 {
     if (FL_TEST(klass, FL_SINGLETON)) {
-	if (!RCLASS_IV_TBL(klass)) {
-	    RCLASS_IV_TBL(klass) = st_init_numtable();
-	}
-	st_insert(RCLASS_IV_TBL(klass), id_attached, obj);
+	sa_insert(RCLASS_IV_TBL(klass), (sa_index_t)id_attached, obj);
     }
 }

@@ -355,12 +332,12 @@ make_singleton_class(VALUE obj)
 static VALUE
 boot_defclass(const char *name, VALUE super)
 {
-    extern st_table *rb_class_tbl;
+    extern sa_table rb_class_tbl;
     VALUE obj = rb_class_boot(super);
     ID id = rb_intern(name);

     rb_name_class(obj, id);
-    st_add_direct(rb_class_tbl, id, obj);
+    sa_insert(&rb_class_tbl, (sa_index_t)id, obj);
     rb_const_set((rb_cObject ? rb_cObject : obj), id, obj);
     return obj;
 }
@@ -484,7 +461,7 @@ rb_define_class(const char *name, VALUE super)
 	rb_warn("no super class for `%s', Object assumed", name);
     }
     klass = rb_define_class_id(id, super);
-    st_add_direct(rb_class_tbl, id, klass);
+    sa_insert(&rb_class_tbl, (sa_index_t)id, klass);
     rb_name_class(klass, id);
     rb_const_set(rb_cObject, id, klass);
     rb_class_inherited(super, klass);
@@ -565,8 +542,6 @@ rb_module_new(void)
 {
     VALUE mdl = class_alloc(T_MODULE, rb_cModule);

-    RCLASS_M_TBL(mdl) = st_init_numtable();
-
     return (VALUE)mdl;
 }

@@ -595,7 +570,7 @@ rb_define_module(const char *name)
 	rb_raise(rb_eTypeError, "%s is not a module", rb_obj_classname(module));
     }
     module = rb_define_module_id(id);
-    st_add_direct(rb_class_tbl, id, module);
+    sa_insert(&rb_class_tbl, (sa_index_t)id, module);
     rb_const_set(rb_cObject, id, module);

     return module;
@@ -630,27 +605,15 @@ rb_define_module_id_under(VALUE outer, ID id)
 static VALUE
 include_class_new(VALUE module, VALUE super)
 {
-    VALUE klass = class_alloc(T_ICLASS, rb_cClass);
+    VALUE klass;

     if (BUILTIN_TYPE(module) == T_ICLASS) {
 	module = RBASIC(module)->klass;
     }
-    if (!RCLASS_IV_TBL(module)) {
-	RCLASS_IV_TBL(module) = st_init_numtable();
-    }
-    if (!RCLASS_CONST_TBL(module)) {
-	RCLASS_CONST_TBL(module) = st_init_numtable();
-    }
-    RCLASS_IV_TBL(klass) = RCLASS_IV_TBL(module);
-    RCLASS_CONST_TBL(klass) = RCLASS_CONST_TBL(module);
-    RCLASS_M_TBL(klass) = RCLASS_M_TBL(module);
+    klass = iclass_alloc();
+    RBASIC(klass)->klass = module;
+    RCLASS_EXT(klass) = RCLASS_EXT(module);
     RCLASS_SUPER(klass) = super;
-    if (TYPE(module) == T_ICLASS) {
-	RBASIC(klass)->klass = RBASIC(module)->klass;
-    }
-    else {
-	RBASIC(klass)->klass = module;
-    }
     OBJ_INFECT(klass, module);
     OBJ_INFECT(klass, super);

@@ -677,13 +640,13 @@ rb_include_module(VALUE klass, VALUE module)
     while (module) {
 	int superclass_seen = FALSE;

-	if (RCLASS_M_TBL(klass) == RCLASS_M_TBL(module))
+	if (RCLASS_EXT(klass) == RCLASS_EXT(module))
 	    rb_raise(rb_eArgError, "cyclic include detected");
 	/* ignore if the module included already in superclasses */
 	for (p = RCLASS_SUPER(klass); p; p = RCLASS_SUPER(p)) {
 	    switch (BUILTIN_TYPE(p)) {
 	      case T_ICLASS:
-		if (RCLASS_M_TBL(p) == RCLASS_M_TBL(module)) {
+		if (RCLASS_EXT(p) == RCLASS_EXT(module)) {
 		    if (!superclass_seen) {
 			c = p;  /* move insertion point */
 		    }
@@ -696,7 +659,7 @@ rb_include_module(VALUE klass, VALUE module)
 	    }
 	}
 	c = RCLASS_SUPER(c) = include_class_new(module, RCLASS_SUPER(c));
-	if (RMODULE_M_TBL(module) && RMODULE_M_TBL(module)->num_entries)
+	if (RMODULE_M_TBL(module)->num_entries)
 	    changed = 1;
       skip:
 	module = RCLASS_SUPER(module);
@@ -827,58 +790,58 @@ ins_methods_push(ID name, long type, VALUE ary, long visi)
 }

 static int
-ins_methods_i(st_data_t name, st_data_t type, st_data_t ary)
+ins_methods_i(sa_index_t name, st_data_t type, st_data_t ary)
 {
     return ins_methods_push((ID)name, (long)type, (VALUE)ary, -1); /* everything but private */
 }

 static int
-ins_methods_prot_i(st_data_t name, st_data_t type, st_data_t ary)
+ins_methods_prot_i(sa_index_t name, st_data_t type, st_data_t ary)
 {
     return ins_methods_push((ID)name, (long)type, (VALUE)ary, NOEX_PROTECTED);
 }

 static int
-ins_methods_priv_i(st_data_t name, st_data_t type, st_data_t ary)
+ins_methods_priv_i(sa_index_t name, st_data_t type, st_data_t ary)
 {
     return ins_methods_push((ID)name, (long)type, (VALUE)ary, NOEX_PRIVATE);
 }

 static int
-ins_methods_pub_i(st_data_t name, st_data_t type, st_data_t ary)
+ins_methods_pub_i(sa_index_t name, st_data_t type, st_data_t ary)
 {
     return ins_methods_push((ID)name, (long)type, (VALUE)ary, NOEX_PUBLIC);
 }

 static int
-method_entry_i(st_data_t key, st_data_t value, st_data_t data)
+method_entry_i(sa_index_t key, st_data_t value, st_data_t data)
 {
     const rb_method_entry_t *me = (const rb_method_entry_t *)value;
-    st_table *list = (st_table *)data;
+    sa_table *list = (sa_table *)data;
     long type;

     if ((ID)key == ID_ALLOCATOR) {
 	return ST_CONTINUE;
     }

-    if (!st_lookup(list, key, 0)) {
+    if (!sa_lookup(list, key, 0)) {
 	if (UNDEFINED_METHOD_ENTRY_P(me)) {
 	    type = -1; /* none */
 	}
 	else {
 	    type = VISI(me->flag);
 	}
-	st_add_direct(list, key, type);
+	sa_insert(list, key, type);
     }
     return ST_CONTINUE;
 }

 static VALUE
-class_instance_method_list(int argc, VALUE *argv, VALUE mod, int obj, int (*func) (st_data_t, st_data_t, st_data_t))
+class_instance_method_list(int argc, VALUE *argv, VALUE mod, int obj, int (*func) (sa_index_t, st_data_t, st_data_t))
 {
     VALUE ary;
     int recur;
-    st_table *list;
+    sa_table list = SA_EMPTY_TABLE;

     if (argc == 0) {
 	recur = TRUE;
@@ -889,16 +852,15 @@ class_instance_method_list(int argc, VALUE *argv, VALUE mod, int obj, int (*func
 	recur = RTEST(r);
     }

-    list = st_init_numtable();
     for (; mod; mod = RCLASS_SUPER(mod)) {
-	st_foreach(RCLASS_M_TBL(mod), method_entry_i, (st_data_t)list);
+	sa_foreach(RCLASS_M_TBL(mod), method_entry_i, (st_data_t)&list);
 	if (BUILTIN_TYPE(mod) == T_ICLASS) continue;
 	if (obj && FL_TEST(mod, FL_SINGLETON)) continue;
 	if (!recur) break;
     }
     ary = rb_ary_new();
-    st_foreach(list, func, ary);
-    st_free_table(list);
+    sa_foreach(&list, func, ary);
+    sa_clear(&list);

     return ary;
 }
@@ -1112,7 +1074,7 @@ VALUE
 rb_obj_singleton_methods(int argc, VALUE *argv, VALUE obj)
 {
     VALUE recur, ary, klass;
-    st_table *list;
+    sa_table list = SA_EMPTY_TABLE;

     if (argc == 0) {
 	recur = Qtrue;
@@ -1121,20 +1083,19 @@ rb_obj_singleton_methods(int argc, VALUE *argv, VALUE obj)
 	rb_scan_args(argc, argv, "01", &recur);
     }
     klass = CLASS_OF(obj);
-    list = st_init_numtable();
     if (klass && FL_TEST(klass, FL_SINGLETON)) {
-	st_foreach(RCLASS_M_TBL(klass), method_entry_i, (st_data_t)list);
+	sa_foreach(RCLASS_M_TBL(klass), method_entry_i, (st_data_t)&list);
 	klass = RCLASS_SUPER(klass);
     }
     if (RTEST(recur)) {
 	while (klass && (FL_TEST(klass, FL_SINGLETON) || TYPE(klass) == T_ICLASS)) {
-	    st_foreach(RCLASS_M_TBL(klass), method_entry_i, (st_data_t)list);
+	    sa_foreach(RCLASS_M_TBL(klass), method_entry_i, (st_data_t)&list);
 	    klass = RCLASS_SUPER(klass);
 	}
     }
     ary = rb_ary_new();
-    st_foreach(list, ins_methods_i, ary);
-    st_free_table(list);
+    sa_foreach(&list, ins_methods_i, ary);
+    sa_clear(&list);

     return ary;
 }
diff --git a/common.mk b/common.mk
index c9ef641..eab553e 100644
--- a/common.mk
+++ b/common.mk
@@ -79,6 +79,7 @@ COMMONOBJS    = array.$(OBJEXT) \
 		safe.$(OBJEXT) \
 		signal.$(OBJEXT) \
 		sprintf.$(OBJEXT) \
+		sp_ar.$(OBJEXT) \
 		st.$(OBJEXT) \
 		strftime.$(OBJEXT) \
 		string.$(OBJEXT) \
@@ -702,6 +703,7 @@ signal.$(OBJEXT): {$(VPATH)}signal.c $(RUBY_H_INCLUDES) \
   $(VM_CORE_H_INCLUDES) {$(VPATH)}debug.h
 sprintf.$(OBJEXT): {$(VPATH)}sprintf.c $(RUBY_H_INCLUDES) {$(VPATH)}re.h \
   {$(VPATH)}regex.h {$(VPATH)}vsnprintf.c $(ENCODING_H_INCLUDES)
+sp_ar.$(OBJEXT): {$(VPATH)}sp_ar.c $(RUBY_H_INCLUDES)
 st.$(OBJEXT): {$(VPATH)}st.c $(RUBY_H_INCLUDES)
 strftime.$(OBJEXT): {$(VPATH)}strftime.c $(RUBY_H_INCLUDES) \
   {$(VPATH)}timev.h
diff --git a/constant.h b/constant.h
index 8232910..9106847 100644
--- a/constant.h
+++ b/constant.h
@@ -23,7 +23,7 @@ typedef struct rb_const_entry_struct {

 VALUE rb_mod_private_constant(int argc, VALUE *argv, VALUE obj);
 VALUE rb_mod_public_constant(int argc, VALUE *argv, VALUE obj);
-void rb_free_const_table(st_table *tbl);
+void rb_free_const_table(sa_table *tbl);
 VALUE rb_public_const_get(VALUE klass, ID id);
 VALUE rb_public_const_get_at(VALUE klass, ID id);
 VALUE rb_public_const_get_from(VALUE klass, ID id);
diff --git a/ext/objspace/objspace.c b/ext/objspace/objspace.c
index 65e0d84..891e058 100644
--- a/ext/objspace/objspace.c
+++ b/ext/objspace/objspace.c
@@ -60,20 +60,10 @@ memsize_of(VALUE obj)
 	break;
       case T_MODULE:
       case T_CLASS:
-	size += st_memsize(RCLASS_M_TBL(obj));
-	if (RCLASS_IV_TBL(obj)) {
-	    size += st_memsize(RCLASS_IV_TBL(obj));
-	}
-	if (RCLASS_IV_INDEX_TBL(obj)) {
-	    size += st_memsize(RCLASS_IV_INDEX_TBL(obj));
-	}
-	if (RCLASS(obj)->ptr->iv_tbl) {
-	    size += st_memsize(RCLASS(obj)->ptr->iv_tbl);
-	}
-	if (RCLASS(obj)->ptr->const_tbl) {
-	    size += st_memsize(RCLASS(obj)->ptr->const_tbl);
-	}
-	size += sizeof(rb_classext_t);
+	size += sa_memsize(RCLASS_M_TBL(obj));
+        size += sa_memsize(RCLASS_IV_TBL(obj));
+        size += sa_memsize(RCLASS_IV_INDEX_TBL(obj));
+        size += sa_memsize(RCLASS_CONST_TBL(obj));
 	break;
       case T_STRING:
 	size += rb_str_memsize(obj);
diff --git a/gc.c b/gc.c
index 064f9a8..8924617 100644
--- a/gc.c
+++ b/gc.c
@@ -513,7 +513,7 @@ rb_objspace_free(rb_objspace_t *objspace)

 #define HEAP_OBJ_LIMIT (unsigned int)(HEAP_SIZE / sizeof(struct RVALUE))

-extern st_table *rb_class_tbl;
+extern sa_table rb_class_tbl;

 int ruby_disable_gc_stress = 0;

@@ -1466,6 +1466,15 @@ mark_tbl(rb_objspace_t *objspace, st_table *tbl, int lev)
     st_foreach(tbl, mark_entry, (st_data_t)&arg);
 }

+static void
+mark_sa_tbl(rb_objspace_t *objspace, sa_table *tbl, int lev)
+{
+    if (!tbl) return;
+    SA_FOREACH_START(tbl);
+    gc_mark(objspace, (VALUE)value, lev);
+    SA_FOREACH_END();
+}
+
 static int
 mark_key(VALUE key, VALUE value, st_data_t data)
 {
@@ -1544,74 +1553,52 @@ rb_mark_method_entry(const rb_method_entry_t *me)
     mark_method_entry(&rb_objspace, me, 0);
 }

-static int
-mark_method_entry_i(ID key, const rb_method_entry_t *me, st_data_t data)
-{
-    struct mark_tbl_arg *arg = (void*)data;
-    mark_method_entry(arg->objspace, me, arg->lev);
-    return ST_CONTINUE;
-}
-
 static void
-mark_m_tbl(rb_objspace_t *objspace, st_table *tbl, int lev)
+mark_m_tbl(rb_objspace_t *objspace, sa_table *tbl, int lev)
 {
-    struct mark_tbl_arg arg;
-    if (!tbl) return;
-    arg.objspace = objspace;
-    arg.lev = lev;
-    st_foreach(tbl, mark_method_entry_i, (st_data_t)&arg);
-}
-
-static int
-free_method_entry_i(ID key, rb_method_entry_t *me, st_data_t data)
-{
-    rb_free_method_entry(me);
-    return ST_CONTINUE;
+    SA_FOREACH_START(tbl);
+    mark_method_entry(objspace, (const rb_method_entry_t*)value, lev);
+    SA_FOREACH_END();
 }

 void
-rb_free_m_table(st_table *tbl)
+rb_free_m_table(sa_table *tbl)
 {
-    st_foreach(tbl, free_method_entry_i, 0);
-    st_free_table(tbl);
-}
-
-static int
-mark_const_entry_i(ID key, const rb_const_entry_t *ce, st_data_t data)
-{
-    struct mark_tbl_arg *arg = (void*)data;
-    gc_mark(arg->objspace, ce->value, arg->lev);
-    return ST_CONTINUE;
+    SA_FOREACH_START(tbl);
+    if (!((rb_method_entry_t*)value)->mark) {
+	rb_free_method_entry((rb_method_entry_t*)value);
+    }
+    SA_FOREACH_END();
+    sa_clear(tbl);
 }

 static void
-mark_const_tbl(rb_objspace_t *objspace, st_table *tbl, int lev)
+mark_const_tbl(rb_objspace_t *objspace, sa_table *tbl, int lev)
 {
-    struct mark_tbl_arg arg;
-    if (!tbl) return;
-    arg.objspace = objspace;
-    arg.lev = lev;
-    st_foreach(tbl, mark_const_entry_i, (st_data_t)&arg);
+    SA_FOREACH_START(tbl);
+    gc_mark(objspace, ((const rb_const_entry_t*)value)->value, lev);
+    SA_FOREACH_END();
 }

-static int
-free_const_entry_i(ID key, rb_const_entry_t *ce, st_data_t data)
+void
+rb_free_const_table(sa_table *tbl)
 {
-    xfree(ce);
-    return ST_CONTINUE;
+    SA_FOREACH_START(tbl);
+    xfree((rb_const_entry_t*)value);
+    SA_FOREACH_END();
+    sa_clear(tbl);
 }

 void
-rb_free_const_table(st_table *tbl)
+rb_mark_tbl(st_table *tbl)
 {
-    st_foreach(tbl, free_const_entry_i, 0);
-    st_free_table(tbl);
+    mark_tbl(&rb_objspace, tbl, 0);
 }

 void
-rb_mark_tbl(st_table *tbl)
+rb_mark_sa_tbl(sa_table *tbl)
 {
-    mark_tbl(&rb_objspace, tbl, 0);
+    mark_sa_tbl(&rb_objspace, tbl, 0);
 }

 void
@@ -1819,7 +1806,7 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev)
       case T_CLASS:
       case T_MODULE:
 	mark_m_tbl(objspace, RCLASS_M_TBL(obj), lev);
-	mark_tbl(objspace, RCLASS_IV_TBL(obj), lev);
+	mark_sa_tbl(objspace, RCLASS_IV_TBL(obj), lev);
 	mark_const_tbl(objspace, RCLASS_CONST_TBL(obj), lev);
 	ptr = RCLASS_SUPER(obj);
 	goto again;
@@ -2286,15 +2273,11 @@ obj_free(rb_objspace_t *objspace, VALUE obj)
       case T_CLASS:
 	rb_clear_cache_by_class((VALUE)obj);
 	rb_free_m_table(RCLASS_M_TBL(obj));
-	if (RCLASS_IV_TBL(obj)) {
-	    st_free_table(RCLASS_IV_TBL(obj));
-	}
-	if (RCLASS_CONST_TBL(obj)) {
-	    rb_free_const_table(RCLASS_CONST_TBL(obj));
-	}
-	if (RCLASS_IV_INDEX_TBL(obj)) {
-	    st_free_table(RCLASS_IV_INDEX_TBL(obj));
-	}
+        sa_clear(RCLASS_IV_TBL(obj));
+        rb_free_const_table(RCLASS_CONST_TBL(obj));
+        sa_clear(RCLASS_IV_INDEX_TBL(obj));
+        sa_clear(&RCLASS(obj)->cache->m_cache_tbl);
+        xfree(RCLASS(obj)->cache);
         xfree(RANY(obj)->as.klass.ptr);
 	break;
       case T_STRING:
@@ -2346,8 +2329,9 @@ obj_free(rb_objspace_t *objspace, VALUE obj)
       case T_COMPLEX:
 	break;
       case T_ICLASS:
+        sa_clear(&RCLASS(obj)->cache->m_cache_tbl);
+        xfree(RCLASS(obj)->cache);
 	/* iClass shares table with the module */
-	xfree(RANY(obj)->as.klass.ptr);
 	break;

       case T_FLOAT:
@@ -2458,7 +2442,7 @@ gc_marks(rb_objspace_t *objspace)
     rb_mark_end_proc();
     rb_gc_mark_global_tbl();

-    mark_tbl(objspace, rb_class_tbl, 0);
+    mark_sa_tbl(objspace, &rb_class_tbl, 0);

     /* mark generic instance variables for special constants */
     rb_mark_generic_ivar_tbl();
diff --git a/include/ruby/intern.h b/include/ruby/intern.h
index 876271f..7e4be31 100644
--- a/include/ruby/intern.h
+++ b/include/ruby/intern.h
@@ -413,6 +413,7 @@ size_t ruby_stack_length(VALUE**);
 int rb_during_gc(void);
 void rb_gc_mark_locations(VALUE*, VALUE*);
 void rb_mark_tbl(struct st_table*);
+void rb_mark_sa_tbl(sa_table*);
 void rb_mark_set(struct st_table*);
 void rb_mark_hash(struct st_table*);
 void rb_gc_mark_maybe(VALUE);
@@ -843,7 +844,7 @@ VALUE rb_f_trace_var(int, VALUE*);
 VALUE rb_f_untrace_var(int, VALUE*);
 VALUE rb_f_global_variables(void);
 void rb_alias_variable(ID, ID);
-struct st_table* rb_generic_ivar_table(VALUE);
+sa_table* rb_generic_ivar_table(VALUE);
 void rb_copy_generic_ivar(VALUE,VALUE);
 void rb_mark_generic_ivar(VALUE);
 void rb_mark_generic_ivar_tbl(void);
diff --git a/include/ruby/ruby.h b/include/ruby/ruby.h
index 3b72f8c..3fbb42d 100644
--- a/include/ruby/ruby.h
+++ b/include/ruby/ruby.h
@@ -605,7 +605,7 @@ struct RObject {
 	struct {
 	    long numiv;
 	    VALUE *ivptr;
-            struct st_table *iv_index_tbl; /* shortcut for RCLASS_IV_INDEX_TBL(rb_obj_class(obj)) */
+            struct sa_table *iv_index_tbl; /* shortcut for RCLASS_IV_INDEX_TBL(rb_obj_class(obj)) */
 	} heap;
 	VALUE ary[ROBJECT_EMBED_LEN_MAX];
     } as;
@@ -626,12 +626,13 @@ struct RObject {

 /** @internal */
 typedef struct rb_classext_struct rb_classext_t;
+typedef struct rb_class_cache_struct rb_class_cache_t;

 struct RClass {
     struct RBasic basic;
+    VALUE super;
     rb_classext_t *ptr;
-    struct st_table *m_tbl;
-    struct st_table *iv_index_tbl;
+    rb_class_cache_t *cache;
 };
 #define RCLASS_SUPER(c) rb_class_get_superclass(c)
 #define RMODULE_IV_TBL(m) RCLASS_IV_TBL(m)
diff --git a/include/ruby/st.h b/include/ruby/st.h
index 50f2a75..4e8741f 100644
--- a/include/ruby/st.h
+++ b/include/ruby/st.h
@@ -136,6 +136,51 @@ st_index_t st_hash_start(st_index_t h);
 #pragma GCC visibility pop
 #endif

+typedef unsigned int sa_index_t;
+#define SA_STOP     ST_STOP
+#define SA_CONTINUE ST_CONTINUE
+
+#define SA_EMPTY   0
+
+typedef struct sa_entry {
+    sa_index_t next;
+    sa_index_t key;
+    st_data_t value;
+} sa_entry;
+
+typedef struct sa_table {
+    sa_index_t num_bins;
+    sa_index_t num_entries;
+    sa_index_t free_pos;
+    sa_entry *entries;
+} sa_table;
+
+#define SA_EMPTY_TABLE {0, 0, 0, 0};
+void sa_init_table(sa_table *, sa_index_t);
+sa_table *sa_new_table();
+int  sa_insert(sa_table *, sa_index_t, st_data_t);
+int  sa_lookup(sa_table *, sa_index_t, st_data_t *);
+int  sa_delete(sa_table *, sa_index_t, st_data_t *);
+void sa_clear(sa_table *);
+void sa_clear_no_free(sa_table *);
+void sa_free_table(sa_table *);
+int  sa_foreach(sa_table *, int (*)(ANYARGS), st_data_t);
+size_t sa_memsize(const sa_table *);
+sa_table *sa_copy(sa_table*);
+void sa_copy_to(sa_table*, sa_table*);
+typedef int (*sa_iter_func)(sa_index_t key, st_data_t val, st_data_t arg);
+
+#define SA_FOREACH_START_I(table, entry) do { \
+    sa_table *T##entry = (table); \
+    sa_index_t K##entry; \
+    for(K##entry = 0; K##entry < T##entry->num_bins; K##entry++) { \
+	sa_entry *entry = T##entry->entries + K##entry; \
+	if (entry->next != SA_EMPTY) { \
+	    st_data_t value = entry->value
+#define SA_FOREACH_END() } } } while(0)
+
+#define SA_FOREACH_START(table) SA_FOREACH_START_I(table, entry)
+
 #if defined(__cplusplus)
 #if 0
 { /* satisfy cc-mode */
diff --git a/internal.h b/internal.h
index 59c9284..95686b7 100644
--- a/internal.h
+++ b/internal.h
@@ -24,18 +24,24 @@ struct rb_deprecated_classext_struct {
 };

 struct rb_classext_struct {
-    VALUE super;
-    struct st_table *iv_tbl;
-    struct st_table *const_tbl;
+    sa_table m_tbl;
+    sa_table iv_tbl;
+    sa_table const_tbl;
+    sa_table iv_index_tbl;
+};
+
+struct rb_class_cache_struct {
+    VALUE method_cache_version;
+    sa_table m_cache_tbl;
 };

 #undef RCLASS_SUPER
 #define RCLASS_EXT(c) (RCLASS(c)->ptr)
-#define RCLASS_SUPER(c) (RCLASS_EXT(c)->super)
-#define RCLASS_IV_TBL(c) (RCLASS_EXT(c)->iv_tbl)
-#define RCLASS_CONST_TBL(c) (RCLASS_EXT(c)->const_tbl)
-#define RCLASS_M_TBL(c) (RCLASS(c)->m_tbl)
-#define RCLASS_IV_INDEX_TBL(c) (RCLASS(c)->iv_index_tbl)
+#define RCLASS_SUPER(c) (RCLASS(c)->super)
+#define RCLASS_IV_TBL(c) (&RCLASS_EXT(c)->iv_tbl)
+#define RCLASS_CONST_TBL(c) (&RCLASS_EXT(c)->const_tbl)
+#define RCLASS_M_TBL(c) (&RCLASS_EXT(c)->m_tbl)
+#define RCLASS_IV_INDEX_TBL(c) (&RCLASS_EXT(c)->iv_index_tbl)

 struct vtm; /* defined by timev.h */

diff --git a/marshal.c b/marshal.c
index 113908e..0920cd9 100644
--- a/marshal.c
+++ b/marshal.c
@@ -506,7 +506,7 @@ w_uclass(VALUE obj, VALUE super, struct dump_arg *arg)
 }

 static int
-w_obj_each(ID id, VALUE value, struct dump_call_arg *arg)
+w_obj_each(sa_index_t id, VALUE value, struct dump_call_arg *arg)
 {
     if (id == rb_id_encoding()) return ST_CONTINUE;
     if (id == rb_intern("E")) return ST_CONTINUE;
@@ -553,13 +553,13 @@ w_encoding(VALUE obj, long num, struct dump_call_arg *arg)
 }

 static void
-w_ivar(VALUE obj, st_table *tbl, struct dump_call_arg *arg)
+w_ivar(VALUE obj, sa_table *tbl, struct dump_call_arg *arg)
 {
     long num = tbl ? tbl->num_entries : 0;

     w_encoding(obj, num, arg);
     if (tbl) {
-	st_foreach_safe(tbl, w_obj_each, (st_data_t)arg);
+	sa_foreach(tbl, w_obj_each, (st_data_t)arg);
     }
 }

@@ -586,7 +586,7 @@ static void
 w_object(VALUE obj, struct dump_arg *arg, int limit)
 {
     struct dump_call_arg c_arg;
-    st_table *ivtbl = 0;
+    sa_table *ivtbl = 0;
     st_data_t num;
     int hasiv = 0;
 #define has_ivars(obj, ivtbl) (((ivtbl) = rb_generic_ivar_table(obj)) != 0 || \
@@ -651,7 +651,7 @@ w_object(VALUE obj, struct dump_arg *arg, int limit)
 	}
 	if (rb_respond_to(obj, s_dump)) {
 	    VALUE v;
-            st_table *ivtbl2 = 0;
+            sa_table *ivtbl2 = 0;
             int hasiv2;

 	    v = rb_funcall(obj, s_dump, 1, INT2NUM(limit));
diff --git a/method.h b/method.h
index 896e309..3369698 100644
--- a/method.h
+++ b/method.h
@@ -100,6 +100,6 @@ int rb_method_entry_eq(const rb_method_entry_t *m1, const rb_method_entry_t *m2)
 void rb_mark_method_entry(const rb_method_entry_t *me);
 void rb_free_method_entry(rb_method_entry_t *me);
 void rb_sweep_method_entry(void *vm);
-void rb_free_m_table(st_table *tbl);
+void rb_free_m_table(sa_table *tbl);

 #endif /* METHOD_H */
diff --git a/object.c b/object.c
index 30db480..6c62ed9 100644
--- a/object.c
+++ b/object.c
@@ -229,17 +229,8 @@ init_copy(VALUE dest, VALUE obj)
         break;
       case T_CLASS:
       case T_MODULE:
-	if (RCLASS_IV_TBL(dest)) {
-	    st_free_table(RCLASS_IV_TBL(dest));
-	    RCLASS_IV_TBL(dest) = 0;
-	}
-	if (RCLASS_CONST_TBL(dest)) {
-	    rb_free_const_table(RCLASS_CONST_TBL(dest));
-	    RCLASS_CONST_TBL(dest) = 0;
-	}
-	if (RCLASS_IV_TBL(obj)) {
-	    RCLASS_IV_TBL(dest) = st_copy(RCLASS_IV_TBL(obj));
-	}
+        rb_free_const_table(RCLASS_CONST_TBL(dest));
+        sa_copy_to(RCLASS_IV_TBL(obj), RCLASS_IV_TBL(dest));
         break;
     }
 }
@@ -530,7 +521,7 @@ rb_obj_is_kind_of(VALUE obj, VALUE c)
     }

     while (cl) {
-	if (cl == c || RCLASS_M_TBL(cl) == RCLASS_M_TBL(c))
+	if (cl == c || RCLASS_EXT(cl) == RCLASS_EXT(c))
 	    return Qtrue;
 	cl = RCLASS_SUPER(cl);
     }
@@ -1355,13 +1346,13 @@ rb_class_inherited_p(VALUE mod, VALUE arg)
 	rb_raise(rb_eTypeError, "compared with non class/module");
     }
     while (mod) {
-	if (RCLASS_M_TBL(mod) == RCLASS_M_TBL(arg))
+	if (RCLASS_EXT(mod) == RCLASS_EXT(arg))
 	    return Qtrue;
 	mod = RCLASS_SUPER(mod);
     }
     /* not mod < arg; check if mod > arg */
     while (arg) {
-	if (RCLASS_M_TBL(arg) == RCLASS_M_TBL(start))
+	if (RCLASS_EXT(arg) == RCLASS_EXT(start))
 	    return Qfalse;
 	arg = RCLASS_SUPER(arg);
     }
diff --git a/sp_ar.c b/sp_ar.c
new file mode 100644
index 0000000..2ed69bf
--- /dev/null
+++ b/sp_ar.c
@@ -0,0 +1,374 @@
+/*
+ * sparse array lib
+ * inspired by Lua table
+ * written by Sokolov Yura aka funny_falcon
+ */
+#ifdef NOT_RUBY
+#include "regint.h"
+#include "st.h"
+#else
+#include "ruby/ruby.h"
+#endif
+
+#include <stdio.h>
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#include <string.h>
+
+#ifdef RUBY
+#define malloc xmalloc
+#define calloc xcalloc
+#define realloc xrealloc
+#define free   xfree
+#endif
+
+#define sa_table_alloc()          (sa_table*)malloc(sizeof(sa_table))
+#define sa_table_xalloc()         (sa_table*)calloc(1, sizeof(sa_table))
+#define sa_table_dealloc(table)   free(table)
+#define sa_entry_alloc(n)         (sa_entry*)calloc((n), sizeof(sa_entry))
+#define sa_entry_dealloc(entries) free(entries)
+
+#define SA_LAST   1
+#define SA_OFFSET 2
+
+#define SA_MIN_SIZE 4
+
+void
+sa_init_table(register sa_table *table, sa_index_t num_bins)
+{
+    if (num_bins) {
+        table->num_entries = 0;
+        table->entries = sa_entry_alloc(num_bins);
+        table->num_bins = num_bins;
+        table->free_pos = num_bins;
+    }
+    else {
+        memset(table, 0, sizeof(sa_table));
+    }
+}
+
+sa_table*
+sa_new_table()
+{
+    sa_table* table = sa_table_alloc();
+    sa_init_table(table, 0);
+    return table;
+}
+
+static inline sa_index_t
+calc_pos(register sa_table* table, sa_index_t key)
+{
+    /* this formula is empirical */
+    /* it has no good avalance, but works well in our case */
+    key ^= key >> 16;
+    key *= 0x445229;
+    return (key + (key >> 16)) % table->num_bins;
+}
+
+static void
+fix_empty(register sa_table* table)
+{
+    while(--table->free_pos &&
+            table->entries[table->free_pos-1].next != SA_EMPTY);
+}
+
+#define FLOOR_TO_4 ((~((sa_index_t)0)) << 2)
+static sa_index_t
+find_empty(register sa_table* table, register sa_index_t pos)
+{
+    sa_index_t new_pos = table->free_pos-1;
+    sa_entry *entry;
+    pos &= FLOOR_TO_4;
+    entry = table->entries+pos;
+
+    if (entry->next == SA_EMPTY) { new_pos = pos; goto check; }
+    pos++; entry++;
+    if (entry->next == SA_EMPTY) { new_pos = pos; goto check; }
+    pos++; entry++;
+    if (entry->next == SA_EMPTY) { new_pos = pos; goto check; }
+    pos++; entry++;
+    if (entry->next == SA_EMPTY) { new_pos = pos; goto check; }
+
+check:
+    if (new_pos+1 == table->free_pos) fix_empty(table);
+    return new_pos;
+}
+
+static void resize(register sa_table* table);
+static int insert_into_chain(register sa_table*, register sa_index_t, st_data_t, sa_index_t pos);
+static int insert_into_main(register sa_table*, sa_index_t, st_data_t, sa_index_t pos, sa_index_t prev_pos);
+
+int
+sa_insert(register sa_table* table, register sa_index_t key, st_data_t value)
+{
+    sa_index_t pos, main_pos;
+    register sa_entry *entry;
+
+    if (table->num_bins == 0) {
+        sa_init_table(table, SA_MIN_SIZE);
+    }
+
+    pos = calc_pos(table, key);
+    entry = table->entries + pos;
+
+    if (entry->next == SA_EMPTY) {
+        entry->next = SA_LAST;
+        entry->key = key;
+        entry->value = value;
+        table->num_entries++;
+        if (pos+1 == table->free_pos) fix_empty(table);
+        return 0;
+    }
+
+    if (entry->key == key) {
+        entry->value = value;
+        return 1;
+    }
+
+    if (table->num_entries + (table->num_entries >> 2) > table->num_bins) {
+        resize(table);
+        return sa_insert(table, key, value);
+    }
+
+    main_pos = calc_pos(table, entry->key);
+    if (main_pos == pos) {
+        return insert_into_chain(table, key, value, pos);
+    }
+    else {
+        if (!table->free_pos) {
+            resize(table);
+            return sa_insert(table, key, value);
+        }
+        return insert_into_main(table, key, value, pos, main_pos);
+    }
+}
+
+static int
+insert_into_chain(register sa_table* table, register sa_index_t key, st_data_t value, sa_index_t pos)
+{
+    sa_entry *entry = table->entries + pos, *new_entry;
+    sa_index_t new_pos;
+
+    while (entry->next != SA_LAST) {
+        pos = entry->next - SA_OFFSET;
+        entry = table->entries + pos;
+        if (entry->key == key) {
+            entry->value = value;
+            return 1;
+        }
+    }
+
+    if (!table->free_pos) {
+        resize(table);
+        return sa_insert(table, key, value);
+    }
+
+    new_pos = find_empty(table, pos);
+    new_entry = table->entries + new_pos;
+    entry->next = new_pos + SA_OFFSET;
+
+    new_entry->next = SA_LAST;
+    new_entry->key = key;
+    new_entry->value = value;
+    table->num_entries++;
+    return 0;
+}
+
+static int
+insert_into_main(register sa_table* table, sa_index_t key, st_data_t value, sa_index_t pos, sa_index_t prev_pos)
+{
+    sa_entry *entry = table->entries + pos;
+    sa_index_t new_pos = find_empty(table, pos);
+    sa_entry *new_entry = table->entries + new_pos;
+    sa_index_t npos;
+
+    *new_entry = *entry;
+
+    while((npos = table->entries[prev_pos].next - SA_OFFSET) != pos) {
+        prev_pos = npos;
+    }
+    table->entries[prev_pos].next = new_pos + SA_OFFSET;
+
+    entry->next = SA_LAST;
+    entry->key = key;
+    entry->value = value;
+    table->num_entries++;
+    return 0;
+}
+
+static sa_index_t
+new_size(sa_index_t num_entries)
+{
+    sa_index_t msb = num_entries;
+    msb |= msb >> 1;
+    msb |= msb >> 2;
+    msb |= msb >> 4;
+    msb |= msb >> 8;
+    msb |= msb >> 16;
+    msb = ((msb >> 4) + 1) << 3;
+    return (num_entries & (msb | (msb >> 1))) + (msb >> 1);
+}
+
+static void
+resize(register sa_table *table)
+{
+    sa_table tmp_table;
+    sa_entry *entry;
+    sa_index_t i;
+
+    if (table->num_entries == 0) {
+        sa_entry_dealloc(table->entries);
+        memset(table, 0, sizeof(sa_table));
+        return;
+    }
+
+    sa_init_table(&tmp_table, new_size(table->num_entries + (table->num_entries >> 2)));
+    entry = table->entries;
+
+    for(i = 0; i < table->num_bins; i++, entry++) {
+        if (entry->next != SA_EMPTY) {
+            sa_insert(&tmp_table, entry->key, entry->value);
+        }
+    }
+    sa_entry_dealloc(table->entries);
+    *table = tmp_table;
+}
+
+int
+sa_lookup(register sa_table *table, register sa_index_t key, st_data_t *value)
+{
+    register sa_entry *entry;
+
+    if (table->num_entries == 0) return 0;
+
+    entry = table->entries + calc_pos(table, key);
+    if (entry->next == SA_EMPTY) return 0;
+
+    if (entry->key == key) goto found;
+    if (entry->next == SA_LAST) return 0;
+
+    entry = table->entries + (entry->next - SA_OFFSET);
+    if (entry->key == key) goto found;
+
+    while(entry->next != SA_LAST) {
+        entry = table->entries + (entry->next - SA_OFFSET);
+        if (entry->key == key) goto found;
+    }
+    return 0;
+found:
+    if (value) *value = entry->value;
+    return 1;
+}
+
+void
+sa_clear(sa_table *table)
+{
+    sa_entry_dealloc(table->entries);
+    memset(table, 0, sizeof(sa_table));
+}
+
+void
+sa_clear_no_free(sa_table *table)
+{
+    memset(table->entries, 0, sizeof(sa_entry) * table->num_bins);
+    table->num_entries = 0;
+    table->free_pos = table->num_bins;
+}
+
+void
+sa_free_table(sa_table *table)
+{
+    sa_entry_dealloc(table->entries);
+    sa_table_dealloc(table);
+}
+
+int
+sa_delete(sa_table *table, sa_index_t key, st_data_t *value)
+{
+    sa_index_t pos, prev_pos = ~0;
+    sa_entry *entry;
+
+    if (table->num_entries == 0) goto not_found;
+
+    pos = calc_pos(table, key);
+    entry = table->entries + pos;
+
+    if (entry->next == SA_EMPTY) goto not_found;
+
+    do {
+        if (entry->key == key) {
+            if (value) *value = entry->value;
+            if (entry->next != SA_LAST) {
+                sa_index_t npos = entry->next - SA_OFFSET;
+                *entry = table->entries[npos];
+                memset(table->entries + npos, 0, sizeof(sa_entry));
+            }
+            else {
+                memset(table->entries + pos, 0, sizeof(sa_entry));
+                if (~prev_pos) {
+                    table->entries[prev_pos].next = SA_LAST;
+                }
+            }
+            table->num_entries--;
+            if (table->num_entries < table->num_bins / 4) {
+                resize(table);
+            }
+            return 1;
+        }
+        if (entry->next == SA_LAST) break;
+        prev_pos = pos;
+        pos = entry->next - SA_OFFSET;
+        entry = table->entries + pos;
+    } while(1);
+
+not_found:
+    if (value) *value = 0;
+    return 0;
+}
+
+int
+sa_foreach(register sa_table *table, int (*func)(), st_data_t arg)
+{
+    sa_index_t i;
+    if (table->num_bins == 0) {
+        return 0;
+    }
+    for(i = 0; i < table->num_bins ; i++) {
+	if (table->entries[i].next != SA_EMPTY) {
+	    sa_index_t key = table->entries[i].key;
+	    st_data_t val = table->entries[i].value;
+	    if ((*func)(key, val, arg) == SA_STOP) break;
+	}
+    }
+    return 0;
+}
+
+size_t
+sa_memsize(const sa_table *table)
+{
+    return sizeof(sa_table) + table->num_bins * sizeof(sa_entry);
+}
+
+sa_table*
+sa_copy(sa_table *table)
+{
+    sa_table *new_table = sa_table_alloc();
+    *new_table = *table;
+    if (table->num_bins) {
+        new_table->entries = sa_entry_alloc(table->num_bins);
+        memcpy(new_table->entries, table->entries, table->num_bins*sizeof(sa_entry));
+    }
+    return new_table;
+}
+
+void
+sa_copy_to(sa_table *from, sa_table *to)
+{
+    sa_entry_dealloc(to->entries);
+    *to = *from;
+    if (to->num_bins) {
+	to->entries = sa_entry_alloc(to->num_bins);
+	memcpy(to->entries, from->entries, from->num_bins*sizeof(sa_entry));
+    }
+}
diff --git a/test/ruby/test_marshal.rb b/test/ruby/test_marshal.rb
index 7ec6959..f5774cd 100644
--- a/test/ruby/test_marshal.rb
+++ b/test/ruby/test_marshal.rb
@@ -312,7 +312,7 @@ class TestMarshal < Test::Unit::TestCase
         assert_equal(a.instance_variable_get(i), b.instance_variable_get(i), bug1932)
       end
     end
-    a.__send__(a.methods(true).grep(/=\z/)[0], a)
+    a.__send__(a.methods(true).grep(/\Ar.+=\z/)[0], a)
     assert_nothing_raised(bug1932) do
       b = Marshal.load(Marshal.dump(a))
       assert_equal(ClassISO8859_1, b.class, bug1932)
diff --git a/test/ruby/test_method.rb b/test/ruby/test_method.rb
index 2cc0de5..b1bf33e 100644
--- a/test/ruby/test_method.rb
+++ b/test/ruby/test_method.rb
@@ -406,7 +406,7 @@ class TestMethod < Test::Unit::TestCase
     obj = a.new
     assert_equal([:a], obj.public_methods(false), bug)
     obj.extend(m)
-    assert_equal([:m1, :a], obj.public_methods(false), bug)
+    assert_equal([:a, :m1], obj.public_methods(false).sort, bug)
   end

   def test_visibility
diff --git a/test/ruby/test_module.rb b/test/ruby/test_module.rb
index 6176f48..a67d845 100644
--- a/test/ruby/test_module.rb
+++ b/test/ruby/test_module.rb
@@ -870,7 +870,7 @@ class TestModule < Test::Unit::TestCase
       (class << self ; self ; end).class_eval do
         define_method :method_added do |sym|
           memo << sym
-          memo << mod.instance_methods(false)
+          memo << mod.instance_methods(false).sort
           memo << (mod.instance_method(sym) rescue nil)
         end
       end
@@ -887,10 +887,10 @@ class TestModule < Test::Unit::TestCase
     assert_equal [:f, :g], memo.shift
     assert_equal mod.instance_method(:f), memo.shift
     assert_equal :a, memo.shift
-    assert_equal [:f, :g, :a], memo.shift
+    assert_equal [:a, :f, :g], memo.shift
     assert_equal mod.instance_method(:a), memo.shift
     assert_equal :a=, memo.shift
-    assert_equal [:f, :g, :a, :a=], memo.shift
+    assert_equal [:a, :a=, :f, :g], memo.shift
     assert_equal mod.instance_method(:a=), memo.shift
   end

diff --git a/time.c b/time.c
index a47ce23..64fb1d9 100644
--- a/time.c
+++ b/time.c
@@ -4712,16 +4712,14 @@ time_mload(VALUE time, VALUE str)
     long nsec;
     VALUE submicro, nano_num, nano_den, offset;
     wideval_t timew;
-    st_data_t data;

     time_modify(time);

 #define get_attr(attr, iffound) \
     attr = rb_attr_get(str, id_##attr); \
     if (!NIL_P(attr)) { \
-	data = id_##attr; \
 	iffound; \
-        st_delete(rb_generic_ivar_table(str), &data, 0); \
+        sa_delete(rb_generic_ivar_table(str), (sa_index_t)id_##attr, 0); \
     }

     get_attr(nano_num, {});
diff --git a/variable.c b/variable.c
index ba47d26..9c7ed5e 100644
--- a/variable.c
+++ b/variable.c
@@ -19,15 +19,15 @@
 #include "constant.h"
 #include "internal.h"

-st_table *rb_global_tbl;
-st_table *rb_class_tbl;
+sa_table rb_global_tbl;
+sa_table rb_class_tbl;
 static ID autoload, classpath, tmp_classpath, classid;

 void
 Init_var_tables(void)
 {
-    rb_global_tbl = st_init_numtable();
-    rb_class_tbl = st_init_numtable();
+    sa_init_table(&rb_global_tbl, 0);
+    sa_init_table(&rb_class_tbl, 0);
     CONST_ID(autoload, "__autoload__");
     CONST_ID(classpath, "__classpath__");
     CONST_ID(tmp_classpath, "__tmp_classpath__");
@@ -43,7 +43,7 @@ struct fc_result {
 };

 static VALUE
-fc_path(struct fc_result *fc, ID name)
+fc_path(struct fc_result *fc, sa_index_t name)
 {
     VALUE path, tmp;

@@ -51,8 +51,7 @@ fc_path(struct fc_result *fc, ID name)
     while (fc) {
 	st_data_t n;
 	if (fc->track == rb_cObject) break;
-	if (RCLASS_IV_TBL(fc->track) &&
-	    st_lookup(RCLASS_IV_TBL(fc->track), (st_data_t)classpath, &n)) {
+	if (sa_lookup(RCLASS_IV_TBL(fc->track), (sa_index_t)classpath, &n)) {
 	    tmp = rb_str_dup((VALUE)n);
 	    rb_str_cat2(tmp, "::");
 	    rb_str_append(tmp, path);
@@ -70,7 +69,7 @@ fc_path(struct fc_result *fc, ID name)
 }

 static int
-fc_i(ID key, rb_const_entry_t *ce, struct fc_result *res)
+fc_i(sa_index_t key, rb_const_entry_t *ce, struct fc_result *res)
 {
     VALUE value = ce->value;
     if (!rb_is_const_id(key)) return ST_CONTINUE;
@@ -98,7 +97,7 @@ fc_i(ID key, rb_const_entry_t *ce, struct fc_result *res)
 	    arg.klass = res->klass;
 	    arg.track = value;
 	    arg.prev = res;
-	    st_foreach(RCLASS_CONST_TBL(value), fc_i, (st_data_t)&arg);
+	    sa_foreach(RCLASS_CONST_TBL(value), fc_i, (st_data_t)&arg);
 	    if (arg.path) {
 		res->path = arg.path;
 		return ST_STOP;
@@ -123,18 +122,14 @@ find_class_path(VALUE klass)
     arg.track = rb_cObject;
     arg.prev = 0;
     if (RCLASS_CONST_TBL(rb_cObject)) {
-	st_foreach_safe(RCLASS_CONST_TBL(rb_cObject), fc_i, (st_data_t)&arg);
+	sa_foreach(RCLASS_CONST_TBL(rb_cObject), fc_i, (st_data_t)&arg);
     }
     if (arg.path == 0) {
-	st_foreach_safe(rb_class_tbl, fc_i, (st_data_t)&arg);
+	sa_foreach(&rb_class_tbl, fc_i, (st_data_t)&arg);
     }
     if (arg.path) {
-	st_data_t tmp = tmp_classpath;
-	if (!RCLASS_IV_TBL(klass)) {
-	    RCLASS_IV_TBL(klass) = st_init_numtable();
-	}
-	st_insert(RCLASS_IV_TBL(klass), (st_data_t)classpath, arg.path);
-	st_delete(RCLASS_IV_TBL(klass), &tmp, 0);
+	sa_insert(RCLASS_IV_TBL(klass), (sa_index_t)classpath, arg.path);
+	sa_delete(RCLASS_IV_TBL(klass), (sa_index_t)tmp_classpath, 0);
 	return arg.path;
     }
     return Qnil;
@@ -147,16 +142,15 @@ classname(VALUE klass)
     st_data_t n;

     if (!klass) klass = rb_cObject;
-    if (RCLASS_IV_TBL(klass)) {
-	if (!st_lookup(RCLASS_IV_TBL(klass), (st_data_t)classpath, &n)) {
-	    if (!st_lookup(RCLASS_IV_TBL(klass), (st_data_t)classid, &n)) {
+    if (RCLASS_IV_TBL(klass)->num_entries) {
+	if (!sa_lookup(RCLASS_IV_TBL(klass), (sa_index_t)classpath, &n)) {
+	    if (!sa_lookup(RCLASS_IV_TBL(klass), (sa_index_t)classid, &n)) {
 		return find_class_path(klass);
 	    }
 	    path = rb_str_dup(rb_id2str(SYM2ID((VALUE)n)));
 	    OBJ_FREEZE(path);
-	    st_insert(RCLASS_IV_TBL(klass), (st_data_t)classpath, (st_data_t)path);
-	    n = classid;
-	    st_delete(RCLASS_IV_TBL(klass), &n, 0);
+	    sa_insert(RCLASS_IV_TBL(klass), (sa_index_t)classpath, (st_data_t)path);
+	    sa_delete(RCLASS_IV_TBL(klass), (sa_index_t)classid, 0);
 	}
 	else {
 	    path = (VALUE)n;
@@ -192,8 +186,7 @@ rb_class_path(VALUE klass)
     st_data_t n = (st_data_t)path;

     if (!NIL_P(path)) return path;
-    if (RCLASS_IV_TBL(klass) && st_lookup(RCLASS_IV_TBL(klass),
-					  (st_data_t)tmp_classpath, &n)) {
+    if (sa_lookup(RCLASS_IV_TBL(klass), (sa_index_t)tmp_classpath, &n)) {
 	return (VALUE)n;
     }
     else {
@@ -364,7 +357,7 @@ rb_global_entry(ID id)
     struct global_entry *entry;
     st_data_t data;

-    if (!st_lookup(rb_global_tbl, (st_data_t)id, &data)) {
+    if (!sa_lookup(&rb_global_tbl, (sa_index_t)id, &data)) {
 	struct global_variable *var;
 	entry = ALLOC(struct global_entry);
 	var = ALLOC(struct global_variable);
@@ -378,7 +371,7 @@ rb_global_entry(ID id)

 	var->block_trace = 0;
 	var->trace = 0;
-	st_add_direct(rb_global_tbl, id, (st_data_t)entry);
+	sa_insert(&rb_global_tbl, (sa_index_t)id, (st_data_t)entry);
     }
     else {
 	entry = (struct global_entry *)data;
@@ -454,8 +447,8 @@ readonly_setter(VALUE val, ID id, void *data, struct global_variable *gvar)
     rb_name_error(id, "%s is a read-only variable", rb_id2name(id));
 }

-static int
-mark_global_entry(ID key, struct global_entry *entry)
+static void
+mark_global_entry(struct global_entry *entry)
 {
     struct trace_var *trace;
     struct global_variable *var = entry->var;
@@ -466,14 +459,14 @@ mark_global_entry(ID key, struct global_entry *entry)
 	if (trace->data) rb_gc_mark_maybe(trace->data);
 	trace = trace->next;
     }
-    return ST_CONTINUE;
 }

 void
 rb_gc_mark_global_tbl(void)
 {
-    if (rb_global_tbl)
-        st_foreach_safe(rb_global_tbl, mark_global_entry, 0);
+    SA_FOREACH_START(&rb_global_tbl);
+    mark_global_entry((struct global_entry*) value);
+    SA_FOREACH_END();
 }

 static ID
@@ -635,7 +628,7 @@ rb_f_untrace_var(int argc, VALUE *argv)
     rb_secure(4);
     rb_scan_args(argc, argv, "11", &var, &cmd);
     id = rb_to_id(var);
-    if (!st_lookup(rb_global_tbl, (st_data_t)id, &data)) {
+    if (!sa_lookup(&rb_global_tbl, (sa_index_t)id, &data)) {
 	rb_name_error(id, "undefined global variable %s", rb_id2name(id));
     }

@@ -742,13 +735,6 @@ rb_gvar_defined(struct global_entry *entry)
     return Qtrue;
 }

-static int
-gvar_i(ID key, struct global_entry *entry, VALUE ary)
-{
-    rb_ary_push(ary, ID2SYM(key));
-    return ST_CONTINUE;
-}
-
 /*
  *  call-seq:
  *     global_variables    -> array
@@ -765,7 +751,9 @@ rb_f_global_variables(void)
     char buf[2];
     int i;

-    st_foreach_safe(rb_global_tbl, gvar_i, ary);
+    SA_FOREACH_START(&rb_global_tbl);
+    rb_ary_push(ary, ID2SYM(entry->key));
+    SA_FOREACH_END();
     buf[0] = '$';
     for (i = 1; i <= 9; ++i) {
 	buf[1] = (char)(i + '0');
@@ -784,10 +772,10 @@ rb_alias_variable(ID name1, ID name2)
 	rb_raise(rb_eSecurityError, "Insecure: can't alias global variable");

     entry2 = rb_global_entry(name2);
-    if (!st_lookup(rb_global_tbl, (st_data_t)name1, &data1)) {
+    if (!sa_lookup(&rb_global_tbl, (sa_index_t)name1, &data1)) {
 	entry1 = ALLOC(struct global_entry);
 	entry1->id = name1;
-	st_add_direct(rb_global_tbl, name1, (st_data_t)entry1);
+	sa_insert(&rb_global_tbl, (sa_index_t)name1, (st_data_t)entry1);
     }
     else if ((entry1 = (struct global_entry *)data1)->var != entry2->var) {
 	struct global_variable *var = entry1->var;
@@ -815,7 +803,7 @@ rb_alias_variable(ID name1, ID name2)
 static int special_generic_ivar = 0;
 static st_table *generic_iv_tbl;

-st_table*
+sa_table*
 rb_generic_ivar_table(VALUE obj)
 {
     st_data_t tbl;
@@ -823,7 +811,7 @@ rb_generic_ivar_table(VALUE obj)
     if (!FL_TEST(obj, FL_EXIVAR)) return 0;
     if (!generic_iv_tbl) return 0;
     if (!st_lookup(generic_iv_tbl, (st_data_t)obj, &tbl)) return 0;
-    return (st_table *)tbl;
+    return (sa_table *)tbl;
 }

 static VALUE
@@ -833,7 +821,7 @@ generic_ivar_get(VALUE obj, ID id, int warn)

     if (generic_iv_tbl) {
 	if (st_lookup(generic_iv_tbl, (st_data_t)obj, &tbl)) {
-	    if (st_lookup((st_table *)tbl, (st_data_t)id, &val)) {
+	    if (sa_lookup((sa_table *)tbl, (sa_index_t)id, &val)) {
 		return (VALUE)val;
 	    }
 	}
@@ -847,7 +835,6 @@ generic_ivar_get(VALUE obj, ID id, int warn)
 static void
 generic_ivar_set(VALUE obj, ID id, VALUE val)
 {
-    st_table *tbl;
     st_data_t data;

     if (rb_special_const_p(obj)) {
@@ -859,24 +846,22 @@ generic_ivar_set(VALUE obj, ID id, VALUE val)
     }
     if (!st_lookup(generic_iv_tbl, (st_data_t)obj, &data)) {
 	FL_SET(obj, FL_EXIVAR);
-	tbl = st_init_numtable();
-	st_add_direct(generic_iv_tbl, (st_data_t)obj, (st_data_t)tbl);
-	st_add_direct(tbl, (st_data_t)id, (st_data_t)val);
-	return;
+	data = (st_data_t)sa_new_table();
+	st_add_direct(generic_iv_tbl, (st_data_t)obj, data);
     }
-    st_insert((st_table *)data, (st_data_t)id, (st_data_t)val);
+    sa_insert((sa_table *)data, (sa_index_t)id, (st_data_t)val);
 }

 static VALUE
 generic_ivar_defined(VALUE obj, ID id)
 {
-    st_table *tbl;
+    sa_table *tbl;
     st_data_t data;

     if (!generic_iv_tbl) return Qfalse;
     if (!st_lookup(generic_iv_tbl, (st_data_t)obj, &data)) return Qfalse;
-    tbl = (st_table *)data;
-    if (st_lookup(tbl, (st_data_t)id, &data)) {
+    tbl = (sa_table *)data;
+    if (sa_lookup(tbl, (sa_index_t)id, &data)) {
 	return Qtrue;
     }
     return Qfalse;
@@ -885,18 +870,18 @@ generic_ivar_defined(VALUE obj, ID id)
 static int
 generic_ivar_remove(VALUE obj, ID id, st_data_t *valp)
 {
-    st_table *tbl;
+    sa_table *tbl;
     st_data_t data, key = (st_data_t)id;
     int status;

     if (!generic_iv_tbl) return 0;
     if (!st_lookup(generic_iv_tbl, (st_data_t)obj, &data)) return 0;
-    tbl = (st_table *)data;
-    status = st_delete(tbl, &key, valp);
+    tbl = (sa_table *)data;
+    status = sa_delete(tbl, (sa_index_t)id, valp);
     if (tbl->num_entries == 0) {
 	key = (st_data_t)obj;
 	st_delete(generic_iv_tbl, &key, &data);
-	st_free_table((st_table *)data);
+	sa_free_table(tbl);
     }
     return status;
 }
@@ -908,22 +893,17 @@ rb_mark_generic_ivar(VALUE obj)

     if (!generic_iv_tbl) return;
     if (st_lookup(generic_iv_tbl, (st_data_t)obj, &tbl)) {
-	rb_mark_tbl((st_table *)tbl);
+	rb_mark_sa_tbl((sa_table *)tbl);
     }
 }

 static int
-givar_mark_i(ID key, VALUE value)
-{
-    rb_gc_mark(value);
-    return ST_CONTINUE;
-}
-
-static int
-givar_i(VALUE obj, st_table *tbl)
+givar_i(VALUE obj, sa_table *tbl)
 {
     if (rb_special_const_p(obj)) {
-	st_foreach_safe(tbl, givar_mark_i, 0);
+        SA_FOREACH_START(tbl);
+        rb_gc_mark((VALUE)value);
+        SA_FOREACH_END();
     }
     return ST_CONTINUE;
 }
@@ -943,7 +923,7 @@ rb_free_generic_ivar(VALUE obj)

     if (!generic_iv_tbl) return;
     if (st_delete(generic_iv_tbl, &key, &tbl))
-	st_free_table((st_table *)tbl);
+	sa_free_table((sa_table *)tbl);
 }

 RUBY_FUNC_EXPORTED size_t
@@ -951,7 +931,7 @@ rb_generic_ivar_memsize(VALUE obj)
 {
     st_data_t tbl;
     if (st_lookup(generic_iv_tbl, (st_data_t)obj, &tbl))
-	return st_memsize((st_table *)tbl);
+	return sa_memsize((sa_table *)tbl);
     return 0;
 }

@@ -970,17 +950,17 @@ rb_copy_generic_ivar(VALUE clone, VALUE obj)
         return;
     }
     if (st_lookup(generic_iv_tbl, (st_data_t)obj, &data)) {
-	st_table *tbl = (st_table *)data;
+	sa_table *tbl = (sa_table *)data;

         if (tbl->num_entries == 0)
             goto clear;

 	if (st_lookup(generic_iv_tbl, (st_data_t)clone, &data)) {
-	    st_free_table((st_table *)data);
-	    st_insert(generic_iv_tbl, (st_data_t)clone, (st_data_t)st_copy(tbl));
+	    sa_free_table((sa_table *)data);
+	    st_insert(generic_iv_tbl, (st_data_t)clone, (st_data_t)sa_copy(tbl));
 	}
 	else {
-	    st_add_direct(generic_iv_tbl, (st_data_t)clone, (st_data_t)st_copy(tbl));
+	    st_add_direct(generic_iv_tbl, (st_data_t)clone, (st_data_t)sa_copy(tbl));
 	    FL_SET(clone, FL_EXIVAR);
 	}
     }
@@ -990,7 +970,7 @@ static VALUE
 ivar_get(VALUE obj, ID id, int warn)
 {
     VALUE val, *ptr;
-    struct st_table *iv_index_tbl;
+    sa_table *iv_index_tbl;
     long len;
     st_data_t index;

@@ -1000,7 +980,7 @@ ivar_get(VALUE obj, ID id, int warn)
         ptr = ROBJECT_IVPTR(obj);
         iv_index_tbl = ROBJECT_IV_INDEX_TBL(obj);
         if (!iv_index_tbl) break;
-        if (!st_lookup(iv_index_tbl, (st_data_t)id, &index)) break;
+        if (!sa_lookup(iv_index_tbl, (sa_index_t)id, &index)) break;
         if (len <= (long)index) break;
         val = ptr[index];
         if (val != Qundef)
@@ -1008,7 +988,7 @@ ivar_get(VALUE obj, ID id, int warn)
 	break;
       case T_CLASS:
       case T_MODULE:
-	if (RCLASS_IV_TBL(obj) && st_lookup(RCLASS_IV_TBL(obj), (st_data_t)id, &index))
+	if (sa_lookup(RCLASS_IV_TBL(obj), (sa_index_t)id, &index))
 	    return (VALUE)index;
 	break;
       default:
@@ -1037,7 +1017,7 @@ rb_attr_get(VALUE obj, ID id)
 VALUE
 rb_ivar_set(VALUE obj, ID id, VALUE val)
 {
-    struct st_table *iv_index_tbl;
+    struct sa_table *iv_index_tbl;
     st_data_t index;
     long i, len;
     int ivar_extended;
@@ -1051,14 +1031,11 @@ rb_ivar_set(VALUE obj, ID id, VALUE val)
         if (!iv_index_tbl) {
             VALUE klass = rb_obj_class(obj);
             iv_index_tbl = RCLASS_IV_INDEX_TBL(klass);
-            if (!iv_index_tbl) {
-                iv_index_tbl = RCLASS_IV_INDEX_TBL(klass) = st_init_numtable();
-            }
         }
         ivar_extended = 0;
-        if (!st_lookup(iv_index_tbl, (st_data_t)id, &index)) {
+        if (!sa_lookup(iv_index_tbl, (sa_index_t)id, &index)) {
             index = iv_index_tbl->num_entries;
-            st_add_direct(iv_index_tbl, (st_data_t)id, index);
+            sa_insert(iv_index_tbl, (sa_index_t)id, index);
             ivar_extended = 1;
         }
         len = ROBJECT_NUMIV(obj);
@@ -1098,8 +1075,7 @@ rb_ivar_set(VALUE obj, ID id, VALUE val)
 	break;
       case T_CLASS:
       case T_MODULE:
-	if (!RCLASS_IV_TBL(obj)) RCLASS_IV_TBL(obj) = st_init_numtable();
-	st_insert(RCLASS_IV_TBL(obj), (st_data_t)id, val);
+	sa_insert(RCLASS_IV_TBL(obj), (sa_index_t)id, val);
         break;
       default:
 	generic_ivar_set(obj, id, val);
@@ -1112,13 +1088,13 @@ VALUE
 rb_ivar_defined(VALUE obj, ID id)
 {
     VALUE val;
-    struct st_table *iv_index_tbl;
+    struct sa_table *iv_index_tbl;
     st_data_t index;
     switch (TYPE(obj)) {
       case T_OBJECT:
         iv_index_tbl = ROBJECT_IV_INDEX_TBL(obj);
         if (!iv_index_tbl) break;
-        if (!st_lookup(iv_index_tbl, (st_data_t)id, &index)) break;
+        if (!sa_lookup(iv_index_tbl, (sa_index_t)id, &index)) break;
         if (ROBJECT_NUMIV(obj) <= (long)index) break;
         val = ROBJECT_IVPTR(obj)[index];
         if (val != Qundef)
@@ -1126,7 +1102,7 @@ rb_ivar_defined(VALUE obj, ID id)
 	break;
       case T_CLASS:
       case T_MODULE:
-	if (RCLASS_IV_TBL(obj) && st_lookup(RCLASS_IV_TBL(obj), (st_data_t)id, 0))
+	if (sa_lookup(RCLASS_IV_TBL(obj), (sa_index_t)id, 0))
 	    return Qtrue;
 	break;
       default:
@@ -1137,40 +1113,26 @@ rb_ivar_defined(VALUE obj, ID id)
     return Qfalse;
 }

-struct obj_ivar_tag {
-    VALUE obj;
-    int (*func)(ID key, VALUE val, st_data_t arg);
-    st_data_t arg;
-};
-
-static int
-obj_ivar_i(st_data_t key, st_data_t index, st_data_t arg)
-{
-    struct obj_ivar_tag *data = (struct obj_ivar_tag *)arg;
-    if ((long)index < ROBJECT_NUMIV(data->obj)) {
-        VALUE val = ROBJECT_IVPTR(data->obj)[(long)index];
-        if (val != Qundef) {
-            return (data->func)((ID)key, val, data->arg);
-        }
-    }
-    return ST_CONTINUE;
-}
-
 static void
 obj_ivar_each(VALUE obj, int (*func)(ANYARGS), st_data_t arg)
 {
-    st_table *tbl;
-    struct obj_ivar_tag data;
+    sa_table *tbl;
+    long numiv = ROBJECT_NUMIV(obj);
+    VALUE *vars = ROBJECT_IVPTR(obj);

     tbl = ROBJECT_IV_INDEX_TBL(obj);
-    if (!tbl)
+    if (!tbl || !numiv)
         return;

-    data.obj = obj;
-    data.func = (int (*)(ID key, VALUE val, st_data_t arg))func;
-    data.arg = arg;
-
-    st_foreach_safe(tbl, obj_ivar_i, (st_data_t)&data);
+    SA_FOREACH_START(tbl);
+    if ((long)value < numiv) {
+	VALUE val = vars[value];
+	if (val != Qundef) {
+	    if (((sa_iter_func)func)(entry->key, val, arg) == SA_STOP)
+		break;
+	}
+    }
+    SA_FOREACH_END();
 }

 void
@@ -1182,9 +1144,7 @@ rb_ivar_foreach(VALUE obj, int (*func)(ANYARGS), st_data_t arg)
 	break;
       case T_CLASS:
       case T_MODULE:
-	if (RCLASS_IV_TBL(obj)) {
-	    st_foreach_safe(RCLASS_IV_TBL(obj), func, arg);
-	}
+        sa_foreach(RCLASS_IV_TBL(obj), func, arg);
 	break;
       default:
 	if (!generic_iv_tbl) break;
@@ -1192,7 +1152,7 @@ rb_ivar_foreach(VALUE obj, int (*func)(ANYARGS), st_data_t arg)
 	    st_data_t tbl;

 	    if (st_lookup(generic_iv_tbl, (st_data_t)obj, &tbl)) {
-		st_foreach_safe((st_table *)tbl, func, arg);
+		sa_foreach((sa_table *)tbl, func, arg);
 	    }
 	}
 	break;
@@ -1202,11 +1162,11 @@ rb_ivar_foreach(VALUE obj, int (*func)(ANYARGS), st_data_t arg)
 st_index_t
 rb_ivar_count(VALUE obj)
 {
-    st_table *tbl;
+    sa_table *tbl;
     switch (TYPE(obj)) {
       case T_OBJECT:
 	if ((tbl = ROBJECT_IV_INDEX_TBL(obj)) != 0) {
-	    st_index_t i, count, num = tbl->num_entries;
+	    sa_index_t i, count, num = tbl->num_entries;
 	    const VALUE *const ivptr = ROBJECT_IVPTR(obj);
 	    for (i = count = 0; i < num; ++i) {
 		if (ivptr[i] != Qundef) {
@@ -1228,7 +1188,7 @@ rb_ivar_count(VALUE obj)
 	    st_data_t data;

 	    if (st_lookup(generic_iv_tbl, (st_data_t)obj, &data) &&
-		(tbl = (st_table *)data) != 0) {
+		(tbl = (sa_table *)data) != 0) {
 		return tbl->num_entries;
 	    }
 	}
@@ -1238,7 +1198,7 @@ rb_ivar_count(VALUE obj)
 }

 static int
-ivar_i(ID key, VALUE val, VALUE ary)
+ivar_i(sa_index_t key, VALUE val, VALUE ary)
 {
     if (rb_is_instance_id(key)) {
 	rb_ary_push(ary, ID2SYM(key));
@@ -1301,7 +1261,7 @@ rb_obj_remove_instance_variable(VALUE obj, VALUE name)
     VALUE val = Qnil;
     const ID id = rb_to_id(name);
     st_data_t n, v;
-    struct st_table *iv_index_tbl;
+    struct sa_table *iv_index_tbl;
     st_data_t index;

     if (!OBJ_UNTRUSTED(obj) && rb_safe_level() >= 4)
@@ -1315,7 +1275,7 @@ rb_obj_remove_instance_variable(VALUE obj, VALUE name)
       case T_OBJECT:
         iv_index_tbl = ROBJECT_IV_INDEX_TBL(obj);
         if (!iv_index_tbl) break;
-        if (!st_lookup(iv_index_tbl, (st_data_t)id, &index)) break;
+        if (!sa_lookup(iv_index_tbl, (sa_index_t)id, &index)) break;
         if (ROBJECT_NUMIV(obj) <= (long)index) break;
         val = ROBJECT_IVPTR(obj)[index];
         if (val != Qundef) {
@@ -1326,14 +1286,14 @@ rb_obj_remove_instance_variable(VALUE obj, VALUE name)
       case T_CLASS:
       case T_MODULE:
 	n = id;
-	if (RCLASS_IV_TBL(obj) && st_delete(RCLASS_IV_TBL(obj), &n, &v)) {
+	if (sa_delete(RCLASS_IV_TBL(obj), (sa_index_t)id, &v)) {
 	    return (VALUE)v;
 	}
 	break;
       default:
 	if (FL_TEST(obj, FL_EXIVAR) || rb_special_const_p(obj)) {
 	    v = val;
-	    if (generic_ivar_remove(obj, (st_data_t)id, &v)) {
+	    if (generic_ivar_remove(obj, id, &v)) {
 		return (VALUE)v;
 	    }
 	}
@@ -1410,20 +1370,20 @@ rb_mod_const_missing(VALUE klass, VALUE name)
 static void
 autoload_mark(void *ptr)
 {
-    rb_mark_tbl((st_table *)ptr);
+    rb_mark_sa_tbl((sa_table *)ptr);
 }

 static void
 autoload_free(void *ptr)
 {
-    st_free_table((st_table *)ptr);
+    sa_free_table((sa_table *)ptr);
 }

 static size_t
 autoload_memsize(const void *ptr)
 {
-    const st_table *tbl = ptr;
-    return st_memsize(tbl);
+    const sa_table *tbl = ptr;
+    return sa_memsize(tbl);
 }

 static const rb_data_type_t autoload_data_type = {
@@ -1432,14 +1392,14 @@ static const rb_data_type_t autoload_data_type = {
 };

 #define check_autoload_table(av) \
-    (struct st_table *)rb_check_typeddata((av), &autoload_data_type)
+    (struct sa_table *)rb_check_typeddata((av), &autoload_data_type)

 void
 rb_autoload(VALUE mod, ID id, const char *file)
 {
     st_data_t av;
     VALUE fn;
-    struct st_table *tbl;
+    struct sa_table *tbl;

     if (!rb_is_const_id(id)) {
 	rb_raise(rb_eNameError, "autoload must be constant name: %s", rb_id2name(id));
@@ -1448,43 +1408,41 @@ rb_autoload(VALUE mod, ID id, const char *file)
 	rb_raise(rb_eArgError, "empty file name");
     }

-    if ((tbl = RCLASS_CONST_TBL(mod)) && st_lookup(tbl, (st_data_t)id, &av) && ((rb_const_entry_t*)av)->value != Qundef)
+    if (sa_lookup(RCLASS_CONST_TBL(mod), (sa_index_t)id, &av) && ((rb_const_entry_t*)av)->value != Qundef)
 	return;

     rb_const_set(mod, id, Qundef);
     tbl = RCLASS_IV_TBL(mod);
-    if (tbl && st_lookup(tbl, (st_data_t)autoload, &av)) {
+    if (sa_lookup(tbl, (sa_index_t)autoload, &av)) {
 	tbl = check_autoload_table((VALUE)av);
     }
     else {
-	if (!tbl) tbl = RCLASS_IV_TBL(mod) = st_init_numtable();
 	av = (st_data_t)TypedData_Wrap_Struct(0, &autoload_data_type, 0);
-	st_add_direct(tbl, (st_data_t)autoload, av);
-	DATA_PTR(av) = tbl = st_init_numtable();
+	sa_insert(tbl, (sa_index_t)autoload, av);
+	DATA_PTR(av) = tbl = sa_new_table();
     }
     fn = rb_str_new2(file);
     FL_UNSET(fn, FL_TAINT);
     OBJ_FREEZE(fn);
-    st_insert(tbl, (st_data_t)id, (st_data_t)rb_node_newnode(NODE_MEMO, fn, rb_safe_level(), 0));
+    sa_insert(tbl, (sa_index_t)id, (st_data_t)rb_node_newnode(NODE_MEMO, fn, rb_safe_level(), 0));
 }

 static NODE*
 autoload_delete(VALUE mod, ID id)
 {
-    st_data_t val, load = 0, n = id;
+    st_data_t val, load = 0;
     rb_const_entry_t *ce;

-    st_delete(RCLASS_CONST_TBL(mod), &n, &val);
+    sa_delete(RCLASS_CONST_TBL(mod), (sa_index_t)id, &val);
     ce = (rb_const_entry_t*)val;
     if (ce) xfree(ce);
-    if (st_lookup(RCLASS_IV_TBL(mod), (st_data_t)autoload, &val)) {
-	struct st_table *tbl = check_autoload_table((VALUE)val);
+    if (sa_lookup(RCLASS_IV_TBL(mod), (sa_index_t)autoload, &val)) {
+	struct sa_table *tbl = check_autoload_table((VALUE)val);

-	st_delete(tbl, &n, &load);
+	sa_delete(tbl, (sa_index_t)id, &load);

 	if (tbl->num_entries == 0) {
-	    n = autoload;
-	    st_delete(RCLASS_IV_TBL(mod), &n, &val);
+	    sa_delete(RCLASS_IV_TBL(mod), (sa_index_t)autoload, &val);
 	}
     }

@@ -1509,14 +1467,14 @@ static NODE *
 autoload_node(VALUE mod, ID id, const char **loadingpath)
 {
     VALUE file;
-    struct st_table *tbl;
+    struct sa_table *tbl;
     st_data_t val;
     NODE *load;
     const char *loading;
     int safe;

-    if (!st_lookup(RCLASS_IV_TBL(mod), autoload, &val) ||
-	!(tbl = check_autoload_table((VALUE)val)) || !st_lookup(tbl, (st_data_t)id, &val)) {
+    if (!sa_lookup(RCLASS_IV_TBL(mod), (sa_index_t)autoload, &val) ||
+	!(tbl = check_autoload_table((VALUE)val)) || !sa_lookup(tbl, (sa_index_t)id, &val)) {
 	return 0;
     }
     load = (NODE *)val;
@@ -1541,10 +1499,10 @@ autoload_node(VALUE mod, ID id, const char **loadingpath)
 static int
 autoload_node_id(VALUE mod, ID id)
 {
-    struct st_table *tbl = RCLASS_CONST_TBL(mod);
+    struct sa_table *tbl = RCLASS_CONST_TBL(mod);
     st_data_t val;

-    if (!tbl || !st_lookup(tbl, (st_data_t)id, &val) || ((rb_const_entry_t*)val)->value != Qundef) {
+    if (!tbl || !sa_lookup(tbl, (sa_index_t)id, &val) || ((rb_const_entry_t*)val)->value != Qundef) {
 	return 0;
     }
     return 1;
@@ -1593,7 +1551,7 @@ rb_const_get_0(VALUE klass, ID id, int exclude, int recurse, int visibility)
     while (RTEST(tmp)) {
 	VALUE am = 0;
 	st_data_t data;
-	while (RCLASS_CONST_TBL(tmp) && st_lookup(RCLASS_CONST_TBL(tmp), (st_data_t)id, &data)) {
+	while (sa_lookup(RCLASS_CONST_TBL(tmp), (sa_index_t)id, &data)) {
 	    rb_const_entry_t *ce = (rb_const_entry_t *)data;
 	    if (visibility && ce->flag == CONST_PRIVATE) {
 		rb_name_error(id, "private constant %s::%s referenced", rb_class2name(klass), rb_id2name(id));
@@ -1686,12 +1644,12 @@ VALUE
 rb_const_remove(VALUE mod, ID id)
 {
     VALUE val;
-    st_data_t v, n = id;
+    st_data_t v;

     if (!OBJ_UNTRUSTED(mod) && rb_safe_level() >= 4)
 	rb_raise(rb_eSecurityError, "Insecure: can't remove constant");
     rb_check_frozen(mod);
-    if (!RCLASS_CONST_TBL(mod) || !st_delete(RCLASS_CONST_TBL(mod), &n, &v)) {
+    if (!sa_delete(RCLASS_CONST_TBL(mod), (sa_index_t)id, &v)) {
 	if (rb_const_defined_at(mod, id)) {
 	    rb_name_error(id, "cannot remove %s::%s",
 			  rb_class2name(mod), rb_id2name(id));
@@ -1711,27 +1669,20 @@ rb_const_remove(VALUE mod, ID id)
     return val;
 }

-static int
-sv_i(ID key, rb_const_entry_t *ce, st_table *tbl)
-{
-    if (rb_is_const_id(key)) {
-	if (!st_lookup(tbl, (st_data_t)key, 0)) {
-	    st_insert(tbl, (st_data_t)key, (st_data_t)ce);
-	}
-    }
-    return ST_CONTINUE;
-}
-
 void*
 rb_mod_const_at(VALUE mod, void *data)
 {
-    st_table *tbl = data;
+    sa_table *tbl = data;
     if (!tbl) {
-	tbl = st_init_numtable();
+	tbl = sa_new_table();
     }
-    if (RCLASS_CONST_TBL(mod)) {
-	st_foreach_safe(RCLASS_CONST_TBL(mod), sv_i, (st_data_t)tbl);
+    SA_FOREACH_START(RCLASS_CONST_TBL(mod));
+    if (rb_is_const_id(entry->key)) {
+	if (!sa_lookup(tbl, entry->key, 0)) {
+	    sa_insert(tbl, entry->key, value);
+	}
     }
+    SA_FOREACH_END();
     return tbl;
 }

@@ -1748,25 +1699,21 @@ rb_mod_const_of(VALUE mod, void *data)
     return data;
 }

-static int
-list_i(st_data_t key, st_data_t value, VALUE ary)
-{
-    ID sym = (ID)key;
-    rb_const_entry_t *ce = (rb_const_entry_t *)value;
-    if (ce->flag != CONST_PRIVATE) rb_ary_push(ary, ID2SYM(sym));
-    return ST_CONTINUE;
-}
-
 VALUE
 rb_const_list(void *data)
 {
-    st_table *tbl = data;
+    sa_table *tbl = data;
     VALUE ary;

     if (!tbl) return rb_ary_new2(0);
     ary = rb_ary_new2(tbl->num_entries);
-    st_foreach_safe(tbl, list_i, ary);
-    st_free_table(tbl);
+
+    SA_FOREACH_START(tbl);
+    rb_const_entry_t *ce = (rb_const_entry_t *)value;
+    if (ce->flag != CONST_PRIVATE) rb_ary_push(ary, ID2SYM(entry->key));
+    SA_FOREACH_END();
+
+    sa_free_table(tbl);

     return ary;
 }
@@ -1790,7 +1737,7 @@ VALUE
 rb_mod_constants(int argc, VALUE *argv, VALUE mod)
 {
     VALUE inherit;
-    st_table *tbl;
+    sa_table *tbl;

     if (argc == 0) {
 	inherit = Qtrue;
@@ -1817,7 +1764,7 @@ rb_const_defined_0(VALUE klass, ID id, int exclude, int recurse, int visibility)
     tmp = klass;
   retry:
     while (tmp) {
-	if (RCLASS_CONST_TBL(tmp) && st_lookup(RCLASS_CONST_TBL(tmp), (st_data_t)id, &value)) {
+	if (sa_lookup(RCLASS_CONST_TBL(tmp), (sa_index_t)id, &value)) {
 	    rb_const_entry_t *ce = (rb_const_entry_t *)value;
 	    if (visibility && ce->flag == CONST_PRIVATE) {
 		return (int)Qfalse;
@@ -1885,6 +1832,7 @@ void
 rb_const_set(VALUE klass, ID id, VALUE val)
 {
     rb_const_entry_t *ce;
+    st_data_t value;
     VALUE visibility = CONST_PUBLIC;

     if (NIL_P(klass)) {
@@ -1893,21 +1841,14 @@ rb_const_set(VALUE klass, ID id, VALUE val)
     }

     check_before_mod_set(klass, id, val, "constant");
-    if (!RCLASS_CONST_TBL(klass)) {
-	RCLASS_CONST_TBL(klass) = st_init_numtable();
-    }
-    else {
-	st_data_t value;
-
-	if (st_lookup(RCLASS_CONST_TBL(klass), (st_data_t)id, &value)) {
-	    rb_const_entry_t *ce = (rb_const_entry_t*)value;
-	    if (ce->value == Qundef)
-		autoload_delete(klass, id);
-	    else {
-		visibility = ce->flag;
-		rb_warn("already initialized constant %s", rb_id2name(id));
-	    }
-	}
+    if (sa_lookup(RCLASS_CONST_TBL(klass), (sa_index_t)id, &value)) {
+        rb_const_entry_t *ce = (rb_const_entry_t*)value;
+        if (ce->value == Qundef)
+            autoload_delete(klass, id);
+        else {
+            visibility = ce->flag;
+            rb_warn("already initialized constant %s", rb_id2name(id));
+        }
     }

     rb_vm_change_state();
@@ -1916,7 +1857,7 @@ rb_const_set(VALUE klass, ID id, VALUE val)
     ce->flag = (rb_const_flag_t)visibility;
     ce->value = val;

-    st_insert(RCLASS_CONST_TBL(klass), (st_data_t)id, (st_data_t)ce);
+    sa_insert(RCLASS_CONST_TBL(klass), (sa_index_t)id, (st_data_t)ce);
 }

 void
@@ -1958,8 +1899,7 @@ set_const_visibility(VALUE mod, int argc, VALUE *argv, rb_const_flag_t flag)
     for (i = 0; i < argc; i++) {
 	VALUE val = argv[i];
 	id = rb_to_id(val);
-	if (RCLASS_CONST_TBL(mod) &&
-	    st_lookup(RCLASS_CONST_TBL(mod), (st_data_t)id, &v)) {
+	if (sa_lookup(RCLASS_CONST_TBL(mod), (sa_index_t)id, &v)) {
 	    ((rb_const_entry_t*)v)->flag = flag;
 	}
 	else {
@@ -2008,7 +1948,7 @@ original_module(VALUE c)
 }

 #define CVAR_LOOKUP(v,r) do {\
-    if (RCLASS_IV_TBL(klass) && st_lookup(RCLASS_IV_TBL(klass),(st_data_t)id,(v))) {\
+    if (sa_lookup(RCLASS_IV_TBL(klass),(sa_index_t)id,(v))) {\
 	r;\
     }\
     if (FL_TEST(klass, FL_SINGLETON) ) {\
@@ -2027,7 +1967,7 @@ original_module(VALUE c)
 	klass = RCLASS_SUPER(klass);\
     }\
     while (klass) {\
-	if (RCLASS_IV_TBL(klass) && st_lookup(RCLASS_IV_TBL(klass),(st_data_t)id,(v))) {\
+	if (sa_lookup(RCLASS_IV_TBL(klass),(sa_index_t)id,(v))) {\
 	    r;\
 	}\
 	klass = RCLASS_SUPER(klass);\
@@ -2043,15 +1983,13 @@ rb_cvar_set(VALUE klass, ID id, VALUE val)
     CVAR_LOOKUP(0, {if (!front) front = klass; target = klass;});
     if (target) {
 	if (front && target != front) {
-	    st_data_t did = id;
-
 	    if (RTEST(ruby_verbose)) {
 		rb_warning("class variable %s of %s is overtaken by %s",
 			   rb_id2name(id), rb_class2name(original_module(front)),
 			   rb_class2name(original_module(target)));
 	    }
 	    if (BUILTIN_TYPE(front) == T_CLASS) {
-		st_delete(RCLASS_IV_TBL(front),&did,0);
+		sa_delete(RCLASS_IV_TBL(front),(sa_index_t)id,0);
 	    }
 	}
     }
@@ -2060,11 +1998,7 @@ rb_cvar_set(VALUE klass, ID id, VALUE val)
     }

     check_before_mod_set(target, id, val, "class variable");
-    if (!RCLASS_IV_TBL(target)) {
-	RCLASS_IV_TBL(target) = st_init_numtable();
-    }
-
-    st_insert(RCLASS_IV_TBL(target), (st_data_t)id, (st_data_t)val);
+    sa_insert(RCLASS_IV_TBL(target), (sa_index_t)id, (st_data_t)val);
 }

 VALUE
@@ -2080,15 +2014,13 @@ rb_cvar_get(VALUE klass, ID id)
 		      rb_id2name(id), rb_class2name(tmp));
     }
     if (front && target != front) {
-	st_data_t did = id;
-
 	if (RTEST(ruby_verbose)) {
 	    rb_warning("class variable %s of %s is overtaken by %s",
 		       rb_id2name(id), rb_class2name(original_module(front)),
 		       rb_class2name(original_module(target)));
 	}
 	if (BUILTIN_TYPE(front) == T_CLASS) {
-	    st_delete(RCLASS_IV_TBL(front),&did,0);
+	    sa_delete(RCLASS_IV_TBL(front),(sa_index_t)id,0);
 	}
     }
     return (VALUE)value;
@@ -2134,7 +2066,7 @@ rb_define_class_variable(VALUE klass, const char *name, VALUE val)
 }

 static int
-cv_i(ID key, VALUE value, VALUE ary)
+cv_i(sa_index_t key, VALUE value, VALUE ary)
 {
     if (rb_is_class_id(key)) {
 	VALUE kval = ID2SYM(key);
@@ -2167,7 +2099,7 @@ rb_mod_class_variables(VALUE obj)
     VALUE ary = rb_ary_new();

     if (RCLASS_IV_TBL(obj)) {
-	st_foreach_safe(RCLASS_IV_TBL(obj), cv_i, ary);
+	sa_foreach(RCLASS_IV_TBL(obj), cv_i, ary);
     }
     return ary;
 }
@@ -2196,7 +2128,7 @@ VALUE
 rb_mod_remove_cvar(VALUE mod, VALUE name)
 {
     const ID id = rb_to_id(name);
-    st_data_t val, n = id;
+    st_data_t val;

     if (!rb_is_class_id(id)) {
 	rb_name_error(id, "wrong class variable name %s", rb_id2name(id));
@@ -2204,7 +2136,7 @@ rb_mod_remove_cvar(VALUE mod, VALUE name)
     if (!OBJ_UNTRUSTED(mod) && rb_safe_level() >= 4)
 	rb_raise(rb_eSecurityError, "Insecure: can't remove class variable");
     rb_check_frozen(mod);
-    if (RCLASS_IV_TBL(mod) && st_delete(RCLASS_IV_TBL(mod), &n, &val)) {
+    if (RCLASS_IV_TBL(mod) && sa_delete(RCLASS_IV_TBL(mod), (sa_index_t)id, &val)) {
 	return (VALUE)val;
     }
     if (rb_cvar_defined(mod, id)) {
diff --git a/vm.c b/vm.c
index dd026cb..e6421c6 100644
--- a/vm.c
+++ b/vm.c
@@ -1042,7 +1042,7 @@ static void
 add_opt_method(VALUE klass, ID mid, VALUE bop)
 {
     rb_method_entry_t *me;
-    if (st_lookup(RCLASS_M_TBL(klass), mid, (void *)&me) && me->def &&
+    if (sa_lookup(RCLASS_M_TBL(klass), (sa_index_t)mid, (void *)&me) && me->def &&
 	me->def->type == VM_METHOD_TYPE_CFUNC) {
 	st_insert(vm_opt_method_table, (st_data_t)me, (st_data_t)bop);
     }
diff --git a/vm_insnhelper.c b/vm_insnhelper.c
index 8e7fe94..1367cec 100644
--- a/vm_insnhelper.c
+++ b/vm_insnhelper.c
@@ -1187,7 +1187,7 @@ vm_get_ev_const(rb_thread_t *th, const rb_iseq_t *iseq,
 		st_data_t data;
 	      search_continue:
 		if (RCLASS_CONST_TBL(klass) &&
-		    st_lookup(RCLASS_CONST_TBL(klass), id, &data)) {
+		    sa_lookup(RCLASS_CONST_TBL(klass), (sa_index_t)id, &data)) {
 		    val = ((rb_const_entry_t*)data)->value;
 		    if (val == Qundef) {
 			if (am == klass) break;
@@ -1297,10 +1297,10 @@ vm_getivar(VALUE obj, ID id, IC ic)
 	    st_data_t index;
 	    long len = ROBJECT_NUMIV(obj);
 	    VALUE *ptr = ROBJECT_IVPTR(obj);
-	    struct st_table *iv_index_tbl = ROBJECT_IV_INDEX_TBL(obj);
+	    struct sa_table *iv_index_tbl = ROBJECT_IV_INDEX_TBL(obj);

 	    if (iv_index_tbl) {
-		if (st_lookup(iv_index_tbl, id, &index)) {
+		if (sa_lookup(iv_index_tbl, (sa_index_t)id, &index)) {
 		    if ((long)index < len) {
 			val = ptr[index];
 		    }
@@ -1350,9 +1350,9 @@ vm_setivar(VALUE obj, ID id, VALUE val, IC ic)
 	    }
 	}
 	else {
-	    struct st_table *iv_index_tbl = ROBJECT_IV_INDEX_TBL(obj);
+	    struct sa_table *iv_index_tbl = ROBJECT_IV_INDEX_TBL(obj);

-	    if (iv_index_tbl && st_lookup(iv_index_tbl, (st_data_t)id, &index)) {
+	    if (iv_index_tbl && sa_lookup(iv_index_tbl, (sa_index_t)id, &index)) {
 		ic->ic_class = klass;
 		ic->ic_value.index = index;
 		ic->ic_vmstat = GET_VM_STATE_VERSION();
diff --git a/vm_method.c b/vm_method.c
index 6e33603..6567440 100644
--- a/vm_method.c
+++ b/vm_method.c
@@ -2,38 +2,33 @@
  * This file is included by vm.c
  */

-#define CACHE_SIZE 0x800
-#define CACHE_MASK 0x7ff
-#define EXPR1(c,m) ((((c)>>3)^(m))&CACHE_MASK)
-
 static void rb_vm_check_redefinition_opt_method(const rb_method_entry_t *me);

 static ID object_id, respond_to_missing;
 static ID removed, singleton_removed, undefined, singleton_undefined;
 static ID added, singleton_added, attached;

-struct cache_entry {		/* method hash table. */
-    VALUE filled_version;        /* filled state version */
-    ID mid;			/* method's id */
-    VALUE klass;		/* receiver's class */
-    rb_method_entry_t *me;
-};
-
-static struct cache_entry cache[CACHE_SIZE];
 #define ruby_running (GET_VM()->running)
 /* int ruby_running = 0; */

+static int
+methods_cache_clear_callback(void *vstart, void *vend, size_t stride, void *data)
+{
+    VALUE v = (VALUE)vstart;
+    for (; v != (VALUE)vend; v += stride) {
+	switch(BUILTIN_TYPE(v)) {
+	    case T_CLASS:
+	    case T_MODULE:
+	    case T_ICLASS:
+		RCLASS(v)->cache->method_cache_version = 0;
+	}
+    }
+    return 0;
+}
 static void
 vm_clear_global_method_cache(void)
 {
-    struct cache_entry *ent, *end;
-
-    ent = cache;
-    end = ent + CACHE_SIZE;
-    while (ent < end) {
-	ent->filled_version = 0;
-	ent++;
-    }
+    rb_objspace_each_objects(methods_cache_clear_callback, NULL);
 }

 void
@@ -162,7 +157,7 @@ rb_method_entry_make(VALUE klass, ID mid, rb_method_type_t type,
 		     rb_method_definition_t *def, rb_method_flag_t noex)
 {
     rb_method_entry_t *me;
-    st_table *mtbl;
+    sa_table *mtbl;
     st_data_t data;

     if (NIL_P(klass)) {
@@ -190,7 +185,7 @@ rb_method_entry_make(VALUE klass, ID mid, rb_method_type_t type,
     mtbl = RCLASS_M_TBL(klass);

     /* check re-definition */
-    if (st_lookup(mtbl, mid, &data)) {
+    if (sa_lookup(mtbl, (sa_index_t)mid, &data)) {
 	rb_method_entry_t *old_me = (rb_method_entry_t *)data;
 	rb_method_definition_t *old_def = old_me->def;

@@ -248,7 +243,7 @@ rb_method_entry_make(VALUE klass, ID mid, rb_method_type_t type,
 	}
     }

-    st_insert(mtbl, mid, (st_data_t) me);
+    sa_insert(mtbl, (sa_index_t)mid, (st_data_t) me);

     return me;
 }
@@ -371,7 +366,7 @@ search_method(VALUE klass, ID id)
 	return 0;
     }

-    while (!st_lookup(RCLASS_M_TBL(klass), id, &body)) {
+    while (!sa_lookup(RCLASS_M_TBL(klass), (sa_index_t)id, &body)) {
 	klass = RCLASS_SUPER(klass);
 	if (!klass) {
 	    return 0;
@@ -393,20 +388,10 @@ rb_method_entry_get_without_cache(VALUE klass, ID id)
     rb_method_entry_t *me = search_method(klass, id);

     if (ruby_running) {
-	struct cache_entry *ent;
-	ent = cache + EXPR1(klass, id);
-	ent->filled_version = GET_VM_STATE_VERSION();
-	ent->klass = klass;
-
 	if (UNDEFINED_METHOD_ENTRY_P(me)) {
-	    ent->mid = id;
-	    ent->me = 0;
-	    me = 0;
-	}
-	else {
-	    ent->mid = id;
-	    ent->me = me;
+            me = 0;
 	}
+        sa_insert(&RCLASS(klass)->cache->m_cache_tbl, (sa_index_t)id, (st_data_t)me);
     }

     return me;
@@ -415,21 +400,23 @@ rb_method_entry_get_without_cache(VALUE klass, ID id)
 rb_method_entry_t *
 rb_method_entry(VALUE klass, ID id)
 {
-    struct cache_entry *ent;
+    rb_class_cache_t *cache = RCLASS(klass)->cache;
+    rb_method_entry_t *me;

-    ent = cache + EXPR1(klass, id);
-    if (ent->filled_version == GET_VM_STATE_VERSION() &&
-	ent->mid == id && ent->klass == klass) {
-	return ent->me;
+    if (cache->method_cache_version != GET_VM_STATE_VERSION()) {
+        sa_clear_no_free(&cache->m_cache_tbl);
+        cache->method_cache_version = GET_VM_STATE_VERSION();
+    }
+    else if (sa_lookup(&cache->m_cache_tbl, (sa_index_t)id, (st_data_t*)&me)) {
+        return me;
     }
-
     return rb_method_entry_get_without_cache(klass, id);
 }

 static void
 remove_method(VALUE klass, ID mid)
 {
-    st_data_t key, data;
+    st_data_t data;
     rb_method_entry_t *me = 0;

     if (klass == rb_cObject) {
@@ -443,14 +430,13 @@ remove_method(VALUE klass, ID mid)
 	rb_warn("removing `%s' may cause serious problems", rb_id2name(mid));
     }

-    if (!st_lookup(RCLASS_M_TBL(klass), mid, &data) ||
+    if (!sa_lookup(RCLASS_M_TBL(klass), (sa_index_t)mid, &data) ||
 	!(me = (rb_method_entry_t *)data) ||
 	(!me->def || me->def->type == VM_METHOD_TYPE_UNDEF)) {
 	rb_name_error(mid, "method `%s' not defined in %s",
 		      rb_id2name(mid), rb_class2name(klass));
     }
-    key = (st_data_t)mid;
-    st_delete(RCLASS_M_TBL(klass), &key, &data);
+    sa_delete(RCLASS_M_TBL(klass), (sa_index_t)mid, &data);

     rb_vm_check_redefinition_opt_method(me);
     rb_clear_cache_for_undef(klass, mid);

## 04-ary_queue.diff
diff --git a/array.c b/array.c
index e427cb3..0efa9b1 100644
--- a/array.c
+++ b/array.c
@@ -255,15 +255,24 @@ rb_ary_modify(VALUE ary)
     rb_ary_modify_check(ary);
     if (ARY_SHARED_P(ary)) {
         long len = RARRAY_LEN(ary);
+	VALUE shared = ARY_SHARED(ary);
         if (len <= RARRAY_EMBED_LEN_MAX) {
             VALUE *ptr = ARY_HEAP_PTR(ary);
-            VALUE shared = ARY_SHARED(ary);
             FL_UNSET_SHARED(ary);
             FL_SET_EMBED(ary);
             MEMCPY(ARY_EMBED_PTR(ary), ptr, VALUE, len);
             rb_ary_decrement_share(shared);
             ARY_SET_EMBED_LEN(ary, len);
         }
+	else if (ARY_SHARED_NUM(shared) == 1 && len > RARRAY_LEN(shared)>>1) {
+	    long shift = RARRAY_PTR(ary) - RARRAY_PTR(shared);
+	    ARY_SET_PTR(ary, RARRAY_PTR(shared));
+	    ARY_SET_CAPA(ary, RARRAY_LEN(shared));
+	    MEMMOVE(RARRAY_PTR(ary), RARRAY_PTR(ary)+shift, VALUE, len);
+	    FL_UNSET_SHARED(ary);
+            FL_SET_EMBED(shared);
+	    rb_ary_decrement_share(shared);
+	}
         else {
             VALUE *ptr = ALLOC_N(VALUE, len);
             MEMCPY(ptr, RARRAY_PTR(ary), VALUE, len);
@@ -274,6 +283,38 @@ rb_ary_modify(VALUE ary)
     }
 }

+static void
+ary_ensure_room_for_push(VALUE ary, long add_len)
+{
+    long new_len = RARRAY_LEN(ary) + add_len;
+    long capa;
+
+    if (ARY_SHARED_P(ary)) {
+        if (new_len > RARRAY_EMBED_LEN_MAX) {
+            VALUE shared = ARY_SHARED(ary);
+            if (ARY_SHARED_NUM(shared) == 1) {
+		if (RARRAY_PTR(ary) - RARRAY_PTR(shared) + new_len <= RARRAY_LEN(shared)) {
+		    rb_ary_modify_check(ary);
+		}
+		else {
+		    /* if array is shared, than it is likely it participate in push/shift pattern */
+		    rb_ary_modify(ary);
+		    capa = ARY_CAPA(ary);
+		    if (new_len > capa - (capa >> 6)) {
+			ary_double_capa(ary, new_len);
+		    }
+		}
+		return;
+            }
+        }
+    }
+    rb_ary_modify(ary);
+    capa = ARY_CAPA(ary);
+    if (new_len > capa) {
+	ary_double_capa(ary, new_len);
+    }
+}
+
 VALUE
 rb_ary_freeze(VALUE ary)
 {
@@ -430,8 +471,9 @@ ary_make_shared(VALUE ary)
 	OBJSETUP(shared, 0, T_ARRAY);
         FL_UNSET_EMBED(shared);

-        ARY_SET_LEN((VALUE)shared, RARRAY_LEN(ary));
+        ARY_SET_LEN((VALUE)shared, ARY_CAPA(ary));
         ARY_SET_PTR((VALUE)shared, RARRAY_PTR(ary));
+	rb_mem_clear(RARRAY_PTR(shared) + RARRAY_LEN(ary), ARY_CAPA(ary) - RARRAY_LEN(ary));
 	FL_SET_SHARED_ROOT(shared);
 	ARY_SET_SHARED_NUM((VALUE)shared, 1);
 	FL_SET_SHARED(ary);
@@ -721,8 +763,6 @@ ary_take_first_or_last(int argc, VALUE *argv, VALUE ary, enum ary_take_pos_flags
     return ary_make_partial(ary, rb_cArray, offset, n);
 }

-static VALUE rb_ary_push_1(VALUE ary, VALUE item);
-
 /*
  *  call-seq:
  *     ary << obj            -> ary
@@ -739,8 +779,12 @@ static VALUE rb_ary_push_1(VALUE ary, VALUE item);
 VALUE
 rb_ary_push(VALUE ary, VALUE item)
 {
-    rb_ary_modify(ary);
-    return rb_ary_push_1(ary, item);
+    long idx = RARRAY_LEN(ary);
+
+    ary_ensure_room_for_push(ary, 1);
+    RARRAY_PTR(ary)[idx] = item;
+    ARY_SET_LEN(ary, idx + 1);
+    return ary;
 }

 static VALUE
@@ -756,6 +800,18 @@ rb_ary_push_1(VALUE ary, VALUE item)
     return ary;
 }

+static VALUE
+rb_ary_cat(VALUE ary, const VALUE *ptr, long len)
+{
+    long oldlen = RARRAY_LEN(ary);
+
+    ary_ensure_room_for_push(ary, len);
+copy:
+    MEMCPY(RARRAY_PTR(ary) + oldlen, ptr, VALUE, len);
+    ARY_SET_LEN(ary, oldlen + len);
+    return ary;
+}
+
 /*
  *  call-seq:
  *     ary.push(obj, ... )   -> ary
@@ -772,11 +828,7 @@ rb_ary_push_1(VALUE ary, VALUE item)
 static VALUE
 rb_ary_push_m(int argc, VALUE *argv, VALUE ary)
 {
-    rb_ary_modify(ary);
-    while (argc--) {
-	rb_ary_push_1(ary, *argv++);
-    }
-    return ary;
+    return rb_ary_cat(ary, argv, argc);
 }

 VALUE
@@ -904,6 +956,55 @@ rb_ary_shift_m(int argc, VALUE *argv, VALUE ary)
     return result;
 }

+static void
+ary_ensure_room_for_unshift(VALUE ary, int argc)
+{
+    long len = RARRAY_LEN(ary);
+    long new_len = len + argc;
+    long capa;
+    VALUE *head, *sharedp;
+
+    if (ARY_SHARED_P(ary)) {
+	VALUE shared = ARY_SHARED(ary);
+	capa = RARRAY_LEN(shared);
+	if (ARY_SHARED_NUM(shared) == 1 && capa > new_len) {
+	    head = RARRAY_PTR(ary);
+	    sharedp = RARRAY_PTR(shared);
+	    goto makeroom_if_need;
+	}
+    }
+
+    rb_ary_modify(ary);
+    capa = ARY_CAPA(ary);
+    if (capa - (capa >> 6) <= new_len) {
+	ary_double_capa(ary, new_len);
+    }
+
+    /* use shared array for big "queues" */
+    if (new_len > ARY_DEFAULT_SIZE * 4) {
+	/* make a room for unshifted items */
+	capa = ARY_CAPA(ary);
+	ary_make_shared(ary);
+
+	head = sharedp = RARRAY_PTR(ary);
+	goto makeroom;
+makeroom_if_need:
+	if (head - sharedp < argc) {
+	    long room;
+makeroom:
+	    room = capa - new_len;
+	    room -= room >> 4;
+	    MEMMOVE(sharedp + argc + room, head, VALUE, len);
+	    head = sharedp + argc + room;
+	}
+	ARY_SET_PTR(ary, head - argc);
+    }
+    else {
+	/* sliding items */
+	MEMMOVE(RARRAY_PTR(ary) + argc, RARRAY_PTR(ary), VALUE, len);
+    }
+}
+
 /*
  *  call-seq:
  *     ary.unshift(obj, ...)  -> ary
@@ -919,19 +1020,16 @@ rb_ary_shift_m(int argc, VALUE *argv, VALUE ary)
 static VALUE
 rb_ary_unshift_m(int argc, VALUE *argv, VALUE ary)
 {
-    long len;
+    long len = RARRAY_LEN(ary);

-    rb_ary_modify(ary);
-    if (argc == 0) return ary;
-    if (ARY_CAPA(ary) <= (len = RARRAY_LEN(ary)) + argc) {
-	ary_double_capa(ary, len + argc);
+    if (argc == 0) {
+	rb_ary_modify_check(ary);
+	return ary;
     }

-    /* sliding items */
-    MEMMOVE(RARRAY_PTR(ary) + argc, RARRAY_PTR(ary), VALUE, len);
+    ary_ensure_room_for_unshift(ary, argc);
     MEMCPY(RARRAY_PTR(ary), argv, VALUE, argc);
-    ARY_INCREASE_LEN(ary, argc);
-
+    ARY_SET_LEN(ary, len + argc);
     return ary;
 }

@@ -1293,15 +1391,12 @@ rb_ary_splice(VALUE ary, long beg, long len, VALUE rpl)
 	rpl = rb_ary_to_ary(rpl);
 	rlen = RARRAY_LEN(rpl);
     }
-    rb_ary_modify(ary);
     if (beg >= RARRAY_LEN(ary)) {
 	if (beg > ARY_MAX_SIZE - rlen) {
 	    rb_raise(rb_eIndexError, "index %ld too big", beg);
 	}
+	ary_ensure_room_for_push(ary, rlen-len); /* len is 0 or negative */
 	len = beg + rlen;
-	if (len >= ARY_CAPA(ary)) {
-	    ary_double_capa(ary, len);
-	}
 	rb_mem_clear(RARRAY_PTR(ary) + RARRAY_LEN(ary), beg - RARRAY_LEN(ary));
 	if (rlen > 0) {
 	    MEMCPY(RARRAY_PTR(ary) + beg, RARRAY_PTR(rpl), VALUE, rlen);
@@ -1311,6 +1406,7 @@ rb_ary_splice(VALUE ary, long beg, long len, VALUE rpl)
     else {
 	long alen;

+	rb_ary_modify(ary);
 	alen = RARRAY_LEN(ary) + rlen - len;
 	if (alen >= ARY_CAPA(ary)) {
 	    ary_double_capa(ary, alen);
@@ -2100,12 +2196,13 @@ rb_ary_sort_bang(VALUE ary)
     if (RARRAY_LEN(ary) > 1) {
 	VALUE tmp = ary_make_substitution(ary); /* only ary refers tmp */
 	struct ary_sort_data data;
+	long len = RARRAY_LEN(ary);

 	RBASIC(tmp)->klass = 0;
 	data.ary = tmp;
 	data.opt_methods = 0;
 	data.opt_inited = 0;
-	ruby_qsort(RARRAY_PTR(tmp), RARRAY_LEN(tmp), sizeof(VALUE),
+	ruby_qsort(RARRAY_PTR(tmp), len, sizeof(VALUE),
 		   rb_block_given_p()?sort_1:sort_2, &data);

         if (ARY_EMBED_P(tmp)) {
@@ -2122,7 +2219,7 @@ rb_ary_sort_bang(VALUE ary)
             if (ARY_HEAP_PTR(ary) == ARY_HEAP_PTR(tmp)) {
                 assert(!ARY_EMBED_P(ary));
                 FL_UNSET_SHARED(ary);
-                ARY_SET_CAPA(ary, ARY_CAPA(tmp));
+                ARY_SET_CAPA(ary, RARRAY_LEN(tmp));
             }
             else {
                 assert(!ARY_SHARED_P(tmp));
@@ -2137,8 +2234,8 @@ rb_ary_sort_bang(VALUE ary)
                     xfree(ARY_HEAP_PTR(ary));
                 }
                 ARY_SET_PTR(ary, RARRAY_PTR(tmp));
-                ARY_SET_HEAP_LEN(ary, RARRAY_LEN(tmp));
-                ARY_SET_CAPA(ary, ARY_CAPA(tmp));
+                ARY_SET_HEAP_LEN(ary, len);
+                ARY_SET_CAPA(ary, RARRAY_LEN(tmp));
             }
             /* tmp was lost ownership for the ptr */
             FL_UNSET(tmp, FL_FREEZE);
diff --git a/test/ruby/test_array.rb b/test/ruby/test_array.rb
index 28d3e41..bcad2c0 100644
--- a/test/ruby/test_array.rb
+++ b/test/ruby/test_array.rb
@@ -414,6 +414,18 @@ class TestArray < Test::Unit::TestCase
     a = @cls[1, 2, 3]
     a[-1, 0] = a
     assert_equal([1, 2, 1, 2, 3, 3], a)
+
+    a = @cls[]
+    a[5,0] = [5]
+    assert_equal([nil, nil, nil, nil, nil, 5], a)
+
+    a = @cls[1]
+    a[1,0] = [2]
+    assert_equal([1, 2], a)
+
+    a = @cls[1]
+    a[1,1] = [2]
+    assert_equal([1, 2], a)
   end

   def test_assoc

## 05-backport-gc.diff
diff --git a/configure.in b/configure.in
index b006a01..a3ccad7 100644
--- a/configure.in
+++ b/configure.in
@@ -1324,6 +1324,29 @@ main() {
 CFLAGS="$save_CFLAGS"])
 AC_DEFINE_UNQUOTED(GC_MARK_STACKFRAME_WORD, $rb_cv_gc_mark_stackframe_word)

+AS_CASE(["$target_os"],
+[openbsd*], [
+  AC_CACHE_CHECK(for heap align log on openbsd, rb_cv_page_size_log,
+    [rb_cv_page_size_log=no
+     for page_log in 12 13; do
+       AC_TRY_RUN([
+#include <math.h>
+#include <unistd.h>
+
+int
+main() {
+  if ((int)log2((double)sysconf(_SC_PAGESIZE)) != $page_log) return 1;
+  return 0;
+}
+       ],
+       rb_cv_page_size_log="$page_log"; break)
+     done])
+  if test $rb_cv_page_size_log != no; then
+    AC_DEFINE_UNQUOTED(HEAP_ALIGN_LOG, $rb_cv_page_size_log)
+  else
+    AC_DEFINE_UNQUOTED(HEAP_ALIGN_LOG, 12)
+  fi
+])

 dnl Checks for library functions.
 AC_TYPE_GETGROUPS
@@ -1424,7 +1447,8 @@ AC_CHECK_FUNCS(fmod killpg wait4 waitpid fork spawnv syscall __syscall chroot ge
 	      setsid telldir seekdir fchmod cosh sinh tanh log2 round\
 	      setuid setgid daemon select_large_fdset setenv unsetenv\
               mktime timegm gmtime_r clock_gettime gettimeofday poll ppoll\
-              pread sendfile shutdown sigaltstack dl_iterate_phdr)
+              pread sendfile shutdown sigaltstack dl_iterate_phdr\
+              dup3 pipe2 posix_memalign memalign)

 AC_CACHE_CHECK(for unsetenv returns a value, rb_cv_unsetenv_return_value,
   [AC_TRY_COMPILE([
diff --git a/gc.c b/gc.c
index e65d0ec..30ed36b 100644
--- a/gc.c
+++ b/gc.c
@@ -24,6 +24,7 @@
 #include <stdio.h>
 #include <setjmp.h>
 #include <sys/types.h>
+#include <assert.h>

 #ifdef HAVE_SYS_TIME_H
 #include <sys/time.h>
@@ -35,7 +36,12 @@

 #if defined _WIN32 || defined __CYGWIN__
 #include <windows.h>
+#elif defined(HAVE_POSIX_MEMALIGN)
+#elif defined(HAVE_MEMALIGN)
+#include <malloc.h>
 #endif
+static void aligned_free(void *);
+static void *aligned_malloc(size_t alignment, size_t size);

 #ifdef HAVE_VALGRIND_MEMCHECK_H
 # include <valgrind/memcheck.h>
@@ -84,10 +90,12 @@ typedef struct {
     unsigned int initial_malloc_limit;
     unsigned int initial_heap_min_slots;
     unsigned int initial_free_min;
+#if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     int gc_stress;
+#endif
 } ruby_gc_params_t;

-ruby_gc_params_t initial_params = {
+static ruby_gc_params_t initial_params = {
     GC_MALLOC_LIMIT,
     HEAP_MIN_SLOTS,
     FREE_MIN,
@@ -103,7 +111,10 @@ ruby_gc_params_t initial_params = {
 int ruby_gc_debug_indent = 0;

 /* for GC profile */
+#ifndef GC_PROFILE_MORE_DETAIL
 #define GC_PROFILE_MORE_DETAIL 0
+#endif
+
 typedef struct gc_profile_record {
     double gc_time;
     double gc_mark_time;
@@ -301,17 +312,20 @@ typedef struct RVALUE {
 #endif

 struct heaps_slot {
-    void *membase;
-    RVALUE *slot;
-    size_t limit;
+    struct heaps_header *membase;
+    RVALUE *freelist;
     struct heaps_slot *next;
     struct heaps_slot *prev;
+    struct heaps_slot *free_next;
+    uintptr_t bits[1];
 };

-struct sorted_heaps_slot {
+struct heaps_header {
+    struct heaps_slot *base;
+    uintptr_t *bits;
     RVALUE *start;
     RVALUE *end;
-    struct heaps_slot *slot;
+    size_t limit;
 };

 struct gc_list {
@@ -319,7 +333,9 @@ struct gc_list {
     struct gc_list *next;
 };

+#ifndef CALC_EXACT_MALLOC_SIZE
 #define CALC_EXACT_MALLOC_SIZE 0
+#endif

 typedef struct rb_objspace {
     struct {
@@ -334,12 +350,13 @@ typedef struct rb_objspace {
 	size_t increment;
 	struct heaps_slot *ptr;
 	struct heaps_slot *sweep_slots;
-	struct sorted_heaps_slot *sorted;
+	struct heaps_slot *free_slots;
+	struct heaps_header **sorted;
 	size_t length;
 	size_t used;
-	RVALUE *freelist;
+        struct heaps_slot *reserve_slots;
 	RVALUE *range[2];
-	RVALUE *freed;
+	struct heaps_header *freed;
 	size_t live_num;
 	size_t free_num;
 	size_t free_min;
@@ -350,6 +367,7 @@ typedef struct rb_objspace {
 	int dont_gc;
 	int dont_lazy_sweep;
 	int during_gc;
+	rb_atomic_t finalizing;
     } flags;
     struct {
 	st_table *table;
@@ -385,13 +403,13 @@ int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress;
 #define heaps			objspace->heap.ptr
 #define heaps_length		objspace->heap.length
 #define heaps_used		objspace->heap.used
-#define freelist		objspace->heap.freelist
 #define lomem			objspace->heap.range[0]
 #define himem			objspace->heap.range[1]
 #define heaps_inc		objspace->heap.increment
 #define heaps_freed		objspace->heap.freed
 #define dont_gc 		objspace->flags.dont_gc
 #define during_gc		objspace->flags.during_gc
+#define finalizing		objspace->flags.finalizing
 #define finalizer_table 	objspace->final.table
 #define deferred_final_list	objspace->final.deferred
 #define mark_stack		objspace->markstack.buffer
@@ -403,7 +421,16 @@ int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress;
 #define initial_heap_min_slots	initial_params.initial_heap_min_slots
 #define initial_free_min	initial_params.initial_free_min

+#define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0)
+
+#define nonspecial_obj_id(obj) (VALUE)((SIGNED_VALUE)(obj)|FIXNUM_FLAG)
+
+#define HEAP_HEADER(p) ((struct heaps_header *)(p))
+
 static void rb_objspace_call_finalizer(rb_objspace_t *objspace);
+static VALUE define_final0(VALUE obj, VALUE block);
+VALUE rb_define_final(VALUE obj, VALUE block);
+VALUE rb_undefine_final(VALUE obj);

 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
 rb_objspace_t *
@@ -478,14 +505,21 @@ rb_objspace_free(rb_objspace_t *objspace)
 	struct gc_list *list, *next;
 	for (list = global_List; list; list = next) {
 	    next = list->next;
-	    free(list);
+	    xfree(list);
 	}
     }
+    if (objspace->heap.reserve_slots) {
+        struct heaps_slot *list, *next;
+        for (list = objspace->heap.reserve_slots; list; list = next) {
+            next = list->free_next;
+            free(list);
+        }
+    }
     if (objspace->heap.sorted) {
 	size_t i;
 	for (i = 0; i < heaps_used; ++i) {
-	    free(objspace->heap.sorted[i].slot->membase);
-	    free(objspace->heap.sorted[i].slot);
+            free(objspace->heap.sorted[i]->base);
+	    aligned_free(objspace->heap.sorted[i]);
 	}
 	free(objspace->heap.sorted);
 	heaps_used = 0;
@@ -495,23 +529,29 @@ rb_objspace_free(rb_objspace_t *objspace)
 }
 #endif

-/* tiny heap size */
-/* 32KB */
-/*#define HEAP_SIZE 0x8000 */
-/* 128KB */
-/*#define HEAP_SIZE 0x20000 */
-/* 64KB */
-/*#define HEAP_SIZE 0x10000 */
-/* 16KB */
-#define HEAP_SIZE 0x4000
-/* 8KB */
-/*#define HEAP_SIZE 0x2000 */
-/* 4KB */
-/*#define HEAP_SIZE 0x1000 */
-/* 2KB */
-/*#define HEAP_SIZE 0x800 */
-
-#define HEAP_OBJ_LIMIT (unsigned int)(HEAP_SIZE / sizeof(struct RVALUE))
+#ifndef HEAP_ALIGN_LOG
+/* default tiny heap size: 16KB */
+#define HEAP_ALIGN_LOG 14
+#endif
+#define HEAP_ALIGN (1UL << HEAP_ALIGN_LOG)
+#define HEAP_ALIGN_MASK (~(~0UL << HEAP_ALIGN_LOG))
+#define REQUIRED_SIZE_BY_MALLOC (sizeof(size_t) * 5)
+#define HEAP_SIZE (HEAP_ALIGN - REQUIRED_SIZE_BY_MALLOC)
+#define CEILDIV(i, mod) (((i) + (mod) - 1)/(mod))
+
+#define HEAP_OBJ_LIMIT (unsigned int)((HEAP_SIZE - sizeof(struct heaps_header))/sizeof(struct RVALUE))
+#define HEAP_BITMAP_LIMIT CEILDIV(CEILDIV(HEAP_SIZE, sizeof(struct RVALUE)), sizeof(uintptr_t)*8)
+#define HEAP_SLOT_SIZE (sizeof(struct heaps_slot) + (HEAP_BITMAP_LIMIT-1) * sizeof(uintptr_t))
+
+#define GET_HEAP_HEADER(x) (HEAP_HEADER(((uintptr_t)x) & ~(HEAP_ALIGN_MASK)))
+#define GET_HEAP_SLOT(x) (GET_HEAP_HEADER(x)->base)
+#define GET_HEAP_BITMAP(x) (GET_HEAP_HEADER(x)->bits)
+#define NUM_IN_SLOT(p) (((uintptr_t)p & HEAP_ALIGN_MASK)/sizeof(RVALUE))
+#define BITMAP_INDEX(p) (NUM_IN_SLOT(p) / (sizeof(uintptr_t) * 8))
+#define BITMAP_OFFSET(p) (NUM_IN_SLOT(p) & ((sizeof(uintptr_t) * 8)-1))
+#define MARKED_IN_BITMAP(bits, p) (bits[BITMAP_INDEX(p)] & ((uintptr_t)1 << BITMAP_OFFSET(p)))
+#define MARK_IN_BITMAP(bits, p) (bits[BITMAP_INDEX(p)] = bits[BITMAP_INDEX(p)] | ((uintptr_t)1 << BITMAP_OFFSET(p)))
+#define CLEAR_IN_BITMAP(bits, p) (bits[BITMAP_INDEX(p)] &= ~((uintptr_t)1 << BITMAP_OFFSET(p)))

 extern st_table *rb_class_tbl;

@@ -823,8 +863,10 @@ vm_xfree(rb_objspace_t *objspace, void *ptr)
     size_t size;
     ptr = ((size_t *)ptr) - 1;
     size = ((size_t*)ptr)[0];
-    objspace->malloc_params.allocated_size -= size;
-    objspace->malloc_params.allocations--;
+    if (size) {
+	objspace->malloc_params.allocated_size -= size;
+	objspace->malloc_params.allocations--;
+    }
 #endif

     free(ptr);
@@ -984,70 +1026,140 @@ rb_gc_unregister_address(VALUE *addr)
     }
 }

-
 static void
 allocate_sorted_heaps(rb_objspace_t *objspace, size_t next_heaps_length)
 {
-    struct sorted_heaps_slot *p;
-    size_t size;
+    struct heaps_header **p;
+    struct heaps_slot *slot;
+    size_t size, add, i;

-    size = next_heaps_length*sizeof(struct sorted_heaps_slot);
+    size = next_heaps_length*sizeof(struct heaps_header *);
+    add = next_heaps_length - heaps_used;

     if (heaps_used > 0) {
-	p = (struct sorted_heaps_slot *)realloc(objspace->heap.sorted, size);
+	p = (struct heaps_header **)realloc(objspace->heap.sorted, size);
 	if (p) objspace->heap.sorted = p;
     }
     else {
-	p = objspace->heap.sorted = (struct sorted_heaps_slot *)malloc(size);
+	p = objspace->heap.sorted = (struct heaps_header **)malloc(size);
     }

     if (p == 0) {
 	during_gc = 0;
 	rb_memerror();
     }
-    heaps_length = next_heaps_length;
+
+    for (i = 0; i < add; i++) {
+        slot = (struct heaps_slot *)malloc(HEAP_SLOT_SIZE);
+        if (slot == 0) {
+            during_gc = 0;
+            rb_memerror();
+            return;
+        }
+        slot->free_next = objspace->heap.reserve_slots;
+        objspace->heap.reserve_slots = slot;
+    }
+}
+
+static void *
+aligned_malloc(size_t alignment, size_t size)
+{
+    void *res;
+
+#if defined __MINGW32__
+    res = __mingw_aligned_malloc(size, alignment);
+#elif defined _WIN32 && !defined __CYGWIN__
+    res = _aligned_malloc(size, alignment);
+#elif defined(HAVE_POSIX_MEMALIGN)
+    if (posix_memalign(&res, alignment, size) == 0) {
+        return res;
+    }
+    else {
+        return NULL;
+    }
+#elif defined(HAVE_MEMALIGN)
+    res = memalign(alignment, size);
+#else
+    char* aligned;
+    res = malloc(alignment + size + sizeof(void*));
+    aligned = (char*)res + alignment + sizeof(void*);
+    aligned -= ((VALUE)aligned & (alignment - 1));
+    ((void**)aligned)[-1] = res;
+    res = (void*)aligned;
+#endif
+
+#if defined(_DEBUG) || defined(GC_DEBUG)
+    /* alignment must be a power of 2 */
+    assert((alignment - 1) & alignment == 0);
+    assert(alignment % sizeof(void*) == 0);
+#endif
+    return res;
+}
+
+static void
+aligned_free(void *ptr)
+{
+#if defined __MINGW32__
+    __mingw_aligned_free(ptr);
+#elif defined _WIN32 && !defined __CYGWIN__
+    _aligned_free(ptr);
+#elif defined(HAVE_MEMALIGN) || defined(HAVE_POSIX_MEMALIGN)
+    free(ptr);
+#else
+    free(((void**)ptr)[-1]);
+#endif
+}
+
+static void
+link_free_heap_slot(rb_objspace_t *objspace, struct heaps_slot *slot)
+{
+    slot->free_next = objspace->heap.free_slots;
+    objspace->heap.free_slots = slot;
+}
+
+static void
+unlink_free_heap_slot(rb_objspace_t *objspace, struct heaps_slot *slot)
+{
+    objspace->heap.free_slots = slot->free_next;
+    slot->free_next = NULL;
 }

 static void
 assign_heap_slot(rb_objspace_t *objspace)
 {
-    RVALUE *p, *pend, *membase;
+    RVALUE *p, *pend;
+    struct heaps_header *membase;
     struct heaps_slot *slot;
     size_t hi, lo, mid;
     size_t objs;

     objs = HEAP_OBJ_LIMIT;
-    p = (RVALUE*)malloc(HEAP_SIZE);
-    if (p == 0) {
-	during_gc = 0;
-	rb_memerror();
-    }
-    slot = (struct heaps_slot *)malloc(sizeof(struct heaps_slot));
-    if (slot == 0) {
-	xfree(p);
+    membase = (struct heaps_header*)aligned_malloc(HEAP_ALIGN, HEAP_SIZE);
+    if (membase == 0) {
 	during_gc = 0;
 	rb_memerror();
     }
+    assert(objspace->heap.reserve_slots != NULL);
+    slot = objspace->heap.reserve_slots;
+    objspace->heap.reserve_slots = slot->free_next;
     MEMZERO((void*)slot, struct heaps_slot, 1);

     slot->next = heaps;
     if (heaps) heaps->prev = slot;
     heaps = slot;

-    membase = p;
+    p = (RVALUE*)((VALUE)membase + sizeof(struct heaps_header));
     if ((VALUE)p % sizeof(RVALUE) != 0) {
-	p = (RVALUE*)((VALUE)p + sizeof(RVALUE) - ((VALUE)p % sizeof(RVALUE)));
-	if ((HEAP_SIZE - HEAP_OBJ_LIMIT * sizeof(RVALUE)) < (size_t)((char*)p - (char*)membase)) {
-	    objs--;
-	}
+       p = (RVALUE*)((VALUE)p + sizeof(RVALUE) - ((VALUE)p % sizeof(RVALUE)));
+       objs = (HEAP_SIZE - (size_t)((VALUE)p - (VALUE)membase))/sizeof(RVALUE);
     }

     lo = 0;
     hi = heaps_used;
     while (lo < hi) {
-	register RVALUE *mid_membase;
+	register struct heaps_header *mid_membase;
 	mid = (lo + hi) / 2;
-	mid_membase = objspace->heap.sorted[mid].slot->membase;
+        mid_membase = objspace->heap.sorted[mid];
 	if (mid_membase < membase) {
 	    lo = mid + 1;
 	}
@@ -1059,14 +1171,16 @@ assign_heap_slot(rb_objspace_t *objspace)
 	}
     }
     if (hi < heaps_used) {
-	MEMMOVE(&objspace->heap.sorted[hi+1], &objspace->heap.sorted[hi], struct sorted_heaps_slot, heaps_used - hi);
-    }
-    objspace->heap.sorted[hi].slot = slot;
-    objspace->heap.sorted[hi].start = p;
-    objspace->heap.sorted[hi].end = (p + objs);
+	MEMMOVE(&objspace->heap.sorted[hi+1], &objspace->heap.sorted[hi], struct heaps_header*, heaps_used - hi);
+    }
+    objspace->heap.sorted[hi] = membase;
+    membase->start = p;
+    membase->end = (p + objs);
+    membase->base = heaps;
+    membase->bits = heaps->bits;
+    membase->limit = objs;
     heaps->membase = membase;
-    heaps->slot = p;
-    heaps->limit = objs;
+    memset(heaps->bits, 0, HEAP_BITMAP_LIMIT * sizeof(uintptr_t));
     objspace->heap.free_num += objs;
     pend = p + objs;
     if (lomem == 0 || lomem > p) lomem = p;
@@ -1075,19 +1189,24 @@ assign_heap_slot(rb_objspace_t *objspace)

     while (p < pend) {
 	p->as.free.flags = 0;
-	p->as.free.next = freelist;
-	freelist = p;
+	p->as.free.next = heaps->freelist;
+	heaps->freelist = p;
 	p++;
     }
+    link_free_heap_slot(objspace, heaps);
 }

 static void
 add_heap_slots(rb_objspace_t *objspace, size_t add)
 {
     size_t i;
+    size_t next_heaps_length;
+
+    next_heaps_length = heaps_used + add;

-    if ((heaps_used + add) > heaps_length) {
-        allocate_sorted_heaps(objspace, heaps_used + add);
+    if (next_heaps_length > heaps_length) {
+        allocate_sorted_heaps(objspace, next_heaps_length);
+        heaps_length = next_heaps_length;
     }

     for (i = 0; i < add; i++) {
@@ -1137,6 +1256,7 @@ set_heaps_increment(rb_objspace_t *objspace)

     if (next_heaps_length > heaps_length) {
 	allocate_sorted_heaps(objspace, next_heaps_length);
+        heaps_length = next_heaps_length;
     }
 }

@@ -1159,6 +1279,7 @@ rb_during_gc(void)
 }

 #define RANY(o) ((RVALUE*)(o))
+#define has_free_object (objspace->heap.free_slots && objspace->heap.free_slots->freelist)

 VALUE
 rb_newobj(void)
@@ -1179,15 +1300,18 @@ rb_newobj(void)
 	}
     }

-    if (UNLIKELY(!freelist)) {
+    if (UNLIKELY(!has_free_object)) {
 	if (!gc_lazy_sweep(objspace)) {
 	    during_gc = 0;
 	    rb_memerror();
 	}
     }

-    obj = (VALUE)freelist;
-    freelist = freelist->as.free.next;
+    obj = (VALUE)objspace->heap.free_slots->freelist;
+    objspace->heap.free_slots->freelist = RANY(obj)->as.free.next;
+    if (objspace->heap.free_slots->freelist == NULL) {
+        unlink_free_heap_slot(objspace, objspace->heap.free_slots);
+    }

     MEMZERO((void*)obj, RVALUE, 1);
 #ifdef GC_DEBUG
@@ -1356,10 +1480,10 @@ gc_mark_all(rb_objspace_t *objspace)

     init_mark_stack(objspace);
     for (i = 0; i < heaps_used; i++) {
-	p = objspace->heap.sorted[i].start; pend = objspace->heap.sorted[i].end;
+	p = objspace->heap.sorted[i]->start; pend = objspace->heap.sorted[i]->end;
 	while (p < pend) {
-	    if ((p->as.basic.flags & FL_MARK) &&
-		(p->as.basic.flags != FL_MARK)) {
+	    if (MARKED_IN_BITMAP(GET_HEAP_BITMAP(p), p) &&
+		p->as.basic.flags) {
 		gc_mark_children(objspace, (VALUE)p, 0);
 	    }
 	    p++;
@@ -1387,26 +1511,27 @@ static inline int
 is_pointer_to_heap(rb_objspace_t *objspace, void *ptr)
 {
     register RVALUE *p = RANY(ptr);
-    register struct sorted_heaps_slot *heap;
+    register struct heaps_header *heap;
     register size_t hi, lo, mid;

     if (p < lomem || p > himem) return FALSE;
     if ((VALUE)p % sizeof(RVALUE) != 0) return FALSE;
+    heap = GET_HEAP_HEADER(p);

     /* check if p looks like a pointer using bsearch*/
     lo = 0;
     hi = heaps_used;
     while (lo < hi) {
 	mid = (lo + hi) / 2;
-	heap = &objspace->heap.sorted[mid];
-	if (heap->start <= p) {
-	    if (p < heap->end)
-		return TRUE;
-	    lo = mid + 1;
-	}
-	else {
-	    hi = mid;
-	}
+        if (heap > objspace->heap.sorted[mid]) {
+            lo = mid + 1;
+        }
+        else if (heap < objspace->heap.sorted[mid]) {
+            hi = mid;
+        }
+        else {
+            return (p >= heap->start && p < heap->end) ? TRUE : FALSE;
+        }
     }
     return FALSE;
 }
@@ -1449,10 +1574,10 @@ struct mark_tbl_arg {
 };

 static int
-mark_entry(ID key, VALUE value, st_data_t data)
+mark_entry(st_data_t key, st_data_t value, st_data_t data)
 {
     struct mark_tbl_arg *arg = (void*)data;
-    gc_mark(arg->objspace, value, arg->lev);
+    gc_mark(arg->objspace, (VALUE)value, arg->lev);
     return ST_CONTINUE;
 }

@@ -1467,10 +1592,10 @@ mark_tbl(rb_objspace_t *objspace, st_table *tbl, int lev)
 }

 static int
-mark_key(VALUE key, VALUE value, st_data_t data)
+mark_key(st_data_t key, st_data_t value, st_data_t data)
 {
     struct mark_tbl_arg *arg = (void*)data;
-    gc_mark(arg->objspace, key, arg->lev);
+    gc_mark(arg->objspace, (VALUE)key, arg->lev);
     return ST_CONTINUE;
 }

@@ -1491,11 +1616,11 @@ rb_mark_set(st_table *tbl)
 }

 static int
-mark_keyvalue(VALUE key, VALUE value, st_data_t data)
+mark_keyvalue(st_data_t key, st_data_t value, st_data_t data)
 {
     struct mark_tbl_arg *arg = (void*)data;
-    gc_mark(arg->objspace, key, arg->lev);
-    gc_mark(arg->objspace, value, arg->lev);
+    gc_mark(arg->objspace, (VALUE)key, arg->lev);
+    gc_mark(arg->objspace, (VALUE)value, arg->lev);
     return ST_CONTINUE;
 }

@@ -1565,7 +1690,9 @@ mark_m_tbl(rb_objspace_t *objspace, st_table *tbl, int lev)
 static int
 free_method_entry_i(ID key, rb_method_entry_t *me, st_data_t data)
 {
-    rb_free_method_entry(me);
+    if (!me->mark) {
+	rb_free_method_entry(me);
+    }
     return ST_CONTINUE;
 }

@@ -1622,6 +1749,16 @@ rb_gc_mark_maybe(VALUE obj)
     }
 }

+static int
+gc_mark_ptr(rb_objspace_t *objspace, VALUE ptr)
+{
+    register uintptr_t *bits = GET_HEAP_BITMAP(ptr);
+    if (MARKED_IN_BITMAP(bits, ptr)) return 0;
+    MARK_IN_BITMAP(bits, ptr);
+    objspace->heap.live_num++;
+    return 1;
+}
+
 static void
 gc_mark(rb_objspace_t *objspace, VALUE ptr, int lev)
 {
@@ -1630,9 +1767,7 @@ gc_mark(rb_objspace_t *objspace, VALUE ptr, int lev)
     obj = RANY(ptr);
     if (rb_special_const_p(ptr)) return; /* special const not marked */
     if (obj->as.basic.flags == 0) return;       /* free cell */
-    if (obj->as.basic.flags & FL_MARK) return;  /* already marked */
-    obj->as.basic.flags |= FL_MARK;
-    objspace->heap.live_num++;
+    if (!gc_mark_ptr(objspace, ptr)) return;	/* already marked */

     if (lev > GC_LEVEL_MAX || (lev == 0 && stack_check(STACKFRAME_FOR_GC_MARK))) {
 	if (!mark_stack_overflow) {
@@ -1659,6 +1794,7 @@ static void
 gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev)
 {
     register RVALUE *obj = RANY(ptr);
+    register uintptr_t *bits;

     goto marking;		/* skip */

@@ -1666,8 +1802,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev)
     obj = RANY(ptr);
     if (rb_special_const_p(ptr)) return; /* special const not marked */
     if (obj->as.basic.flags == 0) return;       /* free cell */
-    if (obj->as.basic.flags & FL_MARK) return;  /* already marked */
-    obj->as.basic.flags |= FL_MARK;
+    bits = GET_HEAP_BITMAP(ptr);
+    if (MARKED_IN_BITMAP(bits, ptr)) return;  /* already marked */
+    MARK_IN_BITMAP(bits, ptr);
     objspace->heap.live_num++;

   marking:
@@ -1819,6 +1956,7 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev)
       case T_CLASS:
       case T_MODULE:
 	mark_m_tbl(objspace, RCLASS_M_TBL(obj), lev);
+	if (!RCLASS_EXT(obj)) break;
 	mark_tbl(objspace, RCLASS_IV_TBL(obj), lev);
 	mark_const_tbl(objspace, RCLASS_CONST_TBL(obj), lev);
 	ptr = RCLASS_SUPER(obj);
@@ -1929,13 +2067,18 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev)

 static int obj_free(rb_objspace_t *, VALUE);

-static inline void
-add_freelist(rb_objspace_t *objspace, RVALUE *p)
+static inline struct heaps_slot *
+add_slot_local_freelist(rb_objspace_t *objspace, RVALUE *p)
 {
+    struct heaps_slot *slot;
+
     VALGRIND_MAKE_MEM_UNDEFINED((void*)p, sizeof(RVALUE));
     p->as.free.flags = 0;
-    p->as.free.next = freelist;
-    freelist = p;
+    slot = GET_HEAP_SLOT(p);
+    p->as.free.next = slot->freelist;
+    slot->freelist = p;
+
+    return slot;
 }

 static void
@@ -1945,17 +2088,13 @@ finalize_list(rb_objspace_t *objspace, RVALUE *p)
 	RVALUE *tmp = p->as.free.next;
 	run_final(objspace, (VALUE)p);
 	if (!FL_TEST(p, FL_SINGLETON)) { /* not freeing page */
-            if (objspace->heap.sweep_slots) {
-                p->as.free.flags = 0;
-            }
-            else {
+            add_slot_local_freelist(objspace, p);
+            if (!is_lazy_sweeping(objspace)) {
                 GC_PROF_DEC_LIVE_NUM;
-                add_freelist(objspace, p);
             }
 	}
 	else {
-	    struct heaps_slot *slot = (struct heaps_slot *)(VALUE)RDATA(p)->dmark;
-	    slot->limit--;
+            GET_HEAP_HEADER(p)->limit--;
 	}
 	p = tmp;
     }
@@ -1976,22 +2115,23 @@ unlink_heap_slot(rb_objspace_t *objspace, struct heaps_slot *slot)
     slot->next = NULL;
 }

-
 static void
 free_unused_heaps(rb_objspace_t *objspace)
 {
     size_t i, j;
-    RVALUE *last = 0;
+    struct heaps_header *last = 0;

     for (i = j = 1; j < heaps_used; i++) {
-	if (objspace->heap.sorted[i].slot->limit == 0) {
+	if (objspace->heap.sorted[i]->limit == 0) {
+            struct heaps_slot* h = objspace->heap.sorted[i]->base;
+            h->free_next = objspace->heap.reserve_slots;
+            objspace->heap.reserve_slots = h;
 	    if (!last) {
-		last = objspace->heap.sorted[i].slot->membase;
+                last = objspace->heap.sorted[i];
 	    }
 	    else {
-		free(objspace->heap.sorted[i].slot->membase);
+		aligned_free(objspace->heap.sorted[i]);
 	    }
-            free(objspace->heap.sorted[i].slot);
 	    heaps_used--;
 	}
 	else {
@@ -2003,70 +2143,84 @@ free_unused_heaps(rb_objspace_t *objspace)
     }
     if (last) {
 	if (last < heaps_freed) {
-	    free(heaps_freed);
+	    aligned_free(heaps_freed);
 	    heaps_freed = last;
 	}
 	else {
-	    free(last);
+	    aligned_free(last);
 	}
     }
 }

 static void
+gc_clear_slot_bits(struct heaps_slot *slot)
+{
+    memset(slot->bits, 0, HEAP_BITMAP_LIMIT * sizeof(uintptr_t));
+}
+
+static void
 slot_sweep(rb_objspace_t *objspace, struct heaps_slot *sweep_slot)
 {
     size_t free_num = 0, final_num = 0;
     RVALUE *p, *pend;
-    RVALUE *free = freelist, *final = deferred_final_list;
+    RVALUE *final = deferred_final_list;
     int deferred;
+    uintptr_t *bits;

-    p = sweep_slot->slot; pend = p + sweep_slot->limit;
+    p = sweep_slot->membase->start; pend = sweep_slot->membase->end;
+    bits = sweep_slot->bits;
     while (p < pend) {
-        if (!(p->as.basic.flags & FL_MARK)) {
-            if (p->as.basic.flags &&
-                ((deferred = obj_free(objspace, (VALUE)p)) ||
-		 (FL_TEST(p, FL_FINALIZE)))) {
-                if (!deferred) {
-                    p->as.free.flags = T_ZOMBIE;
-                    RDATA(p)->dfree = 0;
+        if ((!(MARKED_IN_BITMAP(bits, p))) && BUILTIN_TYPE(p) != T_ZOMBIE) {
+            if (p->as.basic.flags) {
+                if ((deferred = obj_free(objspace, (VALUE)p)) ||
+                    (FL_TEST(p, FL_FINALIZE))) {
+                    if (!deferred) {
+                        p->as.free.flags = T_ZOMBIE;
+                        RDATA(p)->dfree = 0;
+                    }
+                    p->as.free.next = deferred_final_list;
+                    deferred_final_list = p;
+                    assert(BUILTIN_TYPE(p) == T_ZOMBIE);
+                    final_num++;
+                }
+                else {
+                    VALGRIND_MAKE_MEM_UNDEFINED((void*)p, sizeof(RVALUE));
+                    p->as.free.flags = 0;
+                    p->as.free.next = sweep_slot->freelist;
+                    sweep_slot->freelist = p;
+                    free_num++;
                 }
-                p->as.free.flags |= FL_MARK;
-                p->as.free.next = deferred_final_list;
-                deferred_final_list = p;
-                final_num++;
             }
             else {
-                add_freelist(objspace, p);
                 free_num++;
             }
         }
-        else if (BUILTIN_TYPE(p) == T_ZOMBIE) {
-            /* objects to be finalized */
-            /* do nothing remain marked */
-        }
-        else {
-            RBASIC(p)->flags &= ~FL_MARK;
-        }
         p++;
     }
-    if (final_num + free_num == sweep_slot->limit &&
+    gc_clear_slot_bits(sweep_slot);
+    if (final_num + free_num == sweep_slot->membase->limit &&
         objspace->heap.free_num > objspace->heap.do_heap_free) {
         RVALUE *pp;

         for (pp = deferred_final_list; pp != final; pp = pp->as.free.next) {
-	    RDATA(pp)->dmark = (void (*)(void *))(VALUE)sweep_slot;
+	    RDATA(pp)->dmark = 0;
             pp->as.free.flags |= FL_SINGLETON; /* freeing page mark */
         }
-        sweep_slot->limit = final_num;
-        freelist = free;	/* cancel this page from freelist */
+        sweep_slot->membase->limit = final_num;
         unlink_heap_slot(objspace, sweep_slot);
     }
     else {
+        if (free_num > 0) {
+            link_free_heap_slot(objspace, sweep_slot);
+        }
+        else {
+            sweep_slot->free_next = NULL;
+        }
         objspace->heap.free_num += free_num;
     }
     objspace->heap.final_num += final_num;

-    if (deferred_final_list) {
+    if (deferred_final_list && !finalizing) {
         rb_thread_t *th = GET_THREAD();
         if (th) {
             RUBY_VM_SET_FINALIZER_INTERRUPT(th);
@@ -2078,7 +2232,7 @@ static int
 ready_to_gc(rb_objspace_t *objspace)
 {
     if (dont_gc || during_gc) {
-	if (!freelist) {
+	if (!has_free_object) {
             if (!heaps_increment(objspace)) {
                 set_heaps_increment(objspace);
                 heaps_increment(objspace);
@@ -2092,7 +2246,6 @@ ready_to_gc(rb_objspace_t *objspace)
 static void
 before_gc_sweep(rb_objspace_t *objspace)
 {
-    freelist = 0;
     objspace->heap.do_heap_free = (size_t)((heaps_used * HEAP_OBJ_LIMIT) * 0.65);
     objspace->heap.free_min = (size_t)((heaps_used * HEAP_OBJ_LIMIT)  * 0.2);
     if (objspace->heap.free_min < initial_free_min) {
@@ -2101,6 +2254,7 @@ before_gc_sweep(rb_objspace_t *objspace)
     }
     objspace->heap.sweep_slots = heaps;
     objspace->heap.free_num = 0;
+    objspace->heap.free_slots = NULL;

     /* sweep unlinked method entries */
     if (GET_VM()->unlinked_method_entry_list) {
@@ -2137,7 +2291,7 @@ lazy_sweep(rb_objspace_t *objspace)
         next = objspace->heap.sweep_slots->next;
 	slot_sweep(objspace, objspace->heap.sweep_slots);
         objspace->heap.sweep_slots = next;
-        if (freelist) {
+        if (has_free_object) {
             during_gc = 0;
             return TRUE;
         }
@@ -2149,10 +2303,10 @@ static void
 rest_sweep(rb_objspace_t *objspace)
 {
     if (objspace->heap.sweep_slots) {
-       while (objspace->heap.sweep_slots) {
-           lazy_sweep(objspace);
-       }
-       after_gc_sweep(objspace);
+	while (objspace->heap.sweep_slots) {
+	    lazy_sweep(objspace);
+	}
+	after_gc_sweep(objspace);
     }
 }

@@ -2199,9 +2353,9 @@ gc_lazy_sweep(rb_objspace_t *objspace)
     }

     GC_PROF_SWEEP_TIMER_START;
-    if(!(res = lazy_sweep(objspace))) {
+    if (!(res = lazy_sweep(objspace))) {
         after_gc_sweep(objspace);
-        if(freelist) {
+        if (has_free_object) {
             res = TRUE;
             during_gc = 0;
         }
@@ -2234,12 +2388,17 @@ void
 rb_gc_force_recycle(VALUE p)
 {
     rb_objspace_t *objspace = &rb_objspace;
-    GC_PROF_DEC_LIVE_NUM;
-    if (RBASIC(p)->flags & FL_MARK) {
-        RANY(p)->as.free.flags = 0;
+    struct heaps_slot *slot;
+
+    if (MARKED_IN_BITMAP(GET_HEAP_BITMAP(p), p)) {
+        add_slot_local_freelist(objspace, (RVALUE *)p);
     }
     else {
-        add_freelist(objspace, (RVALUE *)p);
+        GC_PROF_DEC_LIVE_NUM;
+        slot = add_slot_local_freelist(objspace, (RVALUE *)p);
+        if (slot->free_next == NULL) {
+            link_free_heap_slot(objspace, slot);
+        }
     }
 }

@@ -2611,7 +2770,7 @@ static VALUE
 objspace_each_objects(VALUE arg)
 {
     size_t i;
-    RVALUE *membase = 0;
+    struct heaps_header *membase = 0;
     RVALUE *pstart, *pend;
     rb_objspace_t *objspace = &rb_objspace;
     struct each_obj_args *args = (struct each_obj_args *)arg;
@@ -2619,16 +2778,16 @@ objspace_each_objects(VALUE arg)

     i = 0;
     while (i < heaps_used) {
-	while (0 < i && (uintptr_t)membase < (uintptr_t)objspace->heap.sorted[i-1].slot->membase)
+	while (0 < i && membase < objspace->heap.sorted[i-1])
 	    i--;
-	while (i < heaps_used && (uintptr_t)objspace->heap.sorted[i].slot->membase <= (uintptr_t)membase)
+	while (i < heaps_used && objspace->heap.sorted[i] <= membase)
 	    i++;
 	if (heaps_used <= i)
 	  break;
-	membase = objspace->heap.sorted[i].slot->membase;
+	membase = objspace->heap.sorted[i];

-	pstart = objspace->heap.sorted[i].slot->slot;
-	pend = pstart + objspace->heap.sorted[i].slot->limit;
+	pstart = membase->start;
+	pend = membase->end;

 	for (; pstart != pend; pstart++) {
 	    if (pstart->as.basic.flags) {
@@ -2642,6 +2801,7 @@ objspace_each_objects(VALUE arg)
 	    }
 	}
     }
+    RB_GC_GUARD(v);

     return Qnil;
 }
@@ -2885,11 +3045,12 @@ run_single_final(VALUE arg)
 }

 static void
-run_finalizer(rb_objspace_t *objspace, VALUE objid, VALUE table)
+run_finalizer(rb_objspace_t *objspace, VALUE obj, VALUE table)
 {
     long i;
     int status;
     VALUE args[3];
+    VALUE objid = nonspecial_obj_id(obj);

     if (RARRAY_LEN(table) > 0) {
 	args[1] = rb_obj_freeze(rb_ary_new3(1, objid));
@@ -2913,13 +3074,11 @@ run_finalizer(rb_objspace_t *objspace, VALUE objid, VALUE table)
 static void
 run_final(rb_objspace_t *objspace, VALUE obj)
 {
-    VALUE objid;
     RUBY_DATA_FUNC free_func = 0;
     st_data_t key, table;

     objspace->heap.final_num--;

-    objid = rb_obj_id(obj);	/* make obj into id */
     RBASIC(obj)->klass = 0;

     if (RTYPEDDATA_P(obj)) {
@@ -2934,7 +3093,7 @@ run_final(rb_objspace_t *objspace, VALUE obj)

     key = (st_data_t)obj;
     if (st_delete(finalizer_table, &key, &table)) {
-	run_finalizer(objspace, objid, (VALUE)table);
+	run_finalizer(objspace, obj, (VALUE)table);
     }
 }

@@ -2952,16 +3111,20 @@ finalize_deferred(rb_objspace_t *objspace)
 void
 rb_gc_finalize_deferred(void)
 {
-    finalize_deferred(&rb_objspace);
+    rb_objspace_t *objspace = &rb_objspace;
+    if (ATOMIC_EXCHANGE(finalizing, 1)) return;
+    finalize_deferred(objspace);
+    ATOMIC_SET(finalizing, 0);
 }

 static int
 chain_finalized_object(st_data_t key, st_data_t val, st_data_t arg)
 {
     RVALUE *p = (RVALUE *)key, **final_list = (RVALUE **)arg;
-    if ((p->as.basic.flags & (FL_FINALIZE|FL_MARK)) == FL_FINALIZE) {
+    if ((p->as.basic.flags & FL_FINALIZE) == FL_FINALIZE &&
+        !MARKED_IN_BITMAP(GET_HEAP_BITMAP(p), p)) {
 	if (BUILTIN_TYPE(p) != T_ZOMBIE) {
-	    p->as.free.flags = FL_MARK | T_ZOMBIE; /* remain marked */
+	    p->as.free.flags = T_ZOMBIE;
 	    RDATA(p)->dfree = 0;
 	}
 	p->as.free.next = *final_list;
@@ -3004,6 +3167,8 @@ rb_objspace_call_finalizer(rb_objspace_t *objspace)
     /* run finalizers */
     rest_sweep(objspace);

+    if (ATOMIC_EXCHANGE(finalizing, 1)) return;
+
     do {
 	/* XXX: this loop will make no sense */
 	/* because mark will not be removed */
@@ -3018,8 +3183,9 @@ rb_objspace_call_finalizer(rb_objspace_t *objspace)
 	st_foreach(finalizer_table, force_chain_object, (st_data_t)&list);
 	while (list) {
 	    struct force_finalize_list *curr = list;
-	    run_finalizer(objspace, rb_obj_id(curr->obj), curr->table);
-	    st_delete(finalizer_table, (st_data_t*)&curr->obj, 0);
+	    st_data_t obj = (st_data_t)curr->obj;
+	    run_finalizer(objspace, curr->obj, curr->table);
+	    st_delete(finalizer_table, &obj, 0);
 	    list = curr->next;
 	    xfree(curr);
 	}
@@ -3030,7 +3196,7 @@ rb_objspace_call_finalizer(rb_objspace_t *objspace)

     /* run data object's finalizers */
     for (i = 0; i < heaps_used; i++) {
-	p = objspace->heap.sorted[i].start; pend = objspace->heap.sorted[i].end;
+	p = objspace->heap.sorted[i]->start; pend = objspace->heap.sorted[i]->end;
 	while (p < pend) {
 	    if (BUILTIN_TYPE(p) == T_DATA &&
 		DATA_PTR(p) && RANY(p)->as.data.dfree &&
@@ -3066,6 +3232,7 @@ rb_objspace_call_finalizer(rb_objspace_t *objspace)

     st_free_table(finalizer_table);
     finalizer_table = 0;
+    ATOMIC_SET(finalizing, 0);
 }

 void
@@ -3073,10 +3240,42 @@ rb_gc(void)
 {
     rb_objspace_t *objspace = &rb_objspace;
     garbage_collect(objspace);
-    finalize_deferred(objspace);
+    if (!finalizing) finalize_deferred(objspace);
     free_unused_heaps(objspace);
 }

+static inline int
+is_id_value(rb_objspace_t *objspace, VALUE ptr)
+{
+    if (!is_pointer_to_heap(objspace, (void *)ptr)) return FALSE;
+    if (BUILTIN_TYPE(ptr) > T_FIXNUM) return FALSE;
+    if (BUILTIN_TYPE(ptr) == T_ICLASS) return FALSE;
+    return TRUE;
+}
+
+static inline int
+is_dead_object(rb_objspace_t *objspace, VALUE ptr)
+{
+    struct heaps_slot *slot = objspace->heap.sweep_slots;
+    if (!is_lazy_sweeping(objspace) || MARKED_IN_BITMAP(GET_HEAP_BITMAP(ptr), ptr))
+	return FALSE;
+    while (slot) {
+	if ((VALUE)slot->membase->start <= ptr && ptr < (VALUE)(slot->membase->end))
+	    return TRUE;
+	slot = slot->next;
+    }
+    return FALSE;
+}
+
+static inline int
+is_live_object(rb_objspace_t *objspace, VALUE ptr)
+{
+    if (BUILTIN_TYPE(ptr) == 0) return FALSE;
+    if (RBASIC(ptr)->klass == 0) return FALSE;
+    if (is_dead_object(objspace, ptr)) return FALSE;
+    return TRUE;
+}
+
 /*
  *  call-seq:
  *     ObjectSpace._id2ref(object_id) -> an_object
@@ -3119,11 +3318,10 @@ id2ref(VALUE obj, VALUE objid)
 	return ID2SYM(symid);
     }

-    if (!is_pointer_to_heap(objspace, (void *)ptr) ||
-	BUILTIN_TYPE(ptr) > T_FIXNUM || BUILTIN_TYPE(ptr) == T_ICLASS) {
+    if (!is_id_value(objspace, ptr)) {
 	rb_raise(rb_eRangeError, "%p is not id value", p0);
     }
-    if (BUILTIN_TYPE(ptr) == 0 || RBASIC(ptr)->klass == 0) {
+    if (!is_live_object(objspace, ptr)) {
 	rb_raise(rb_eRangeError, "%p is recycled object", p0);
     }
     return (VALUE)ptr;
@@ -3193,7 +3391,7 @@ rb_obj_id(VALUE obj)
     if (SPECIAL_CONST_P(obj)) {
         return LONG2NUM((SIGNED_VALUE)obj);
     }
-    return (VALUE)((SIGNED_VALUE)obj|FIXNUM_FLAG);
+    return nonspecial_obj_id(obj);
 }

 static int
@@ -3236,7 +3434,7 @@ count_objects(int argc, VALUE *argv, VALUE os)
     VALUE hash;

     if (rb_scan_args(argc, argv, "01", &hash) == 1) {
-        if (TYPE(hash) != T_HASH)
+        if (!RB_TYPE_P(hash, T_HASH))
             rb_raise(rb_eTypeError, "non-hash given");
     }

@@ -3247,7 +3445,7 @@ count_objects(int argc, VALUE *argv, VALUE os)
     for (i = 0; i < heaps_used; i++) {
         RVALUE *p, *pend;

-        p = objspace->heap.sorted[i].start; pend = objspace->heap.sorted[i].end;
+        p = objspace->heap.sorted[i]->start; pend = objspace->heap.sorted[i]->end;
         for (;p < pend; p++) {
             if (p->as.basic.flags) {
                 counts[BUILTIN_TYPE(p)]++;
@@ -3256,7 +3454,7 @@ count_objects(int argc, VALUE *argv, VALUE os)
                 freed++;
             }
         }
-        total += objspace->heap.sorted[i].slot->limit;
+        total += objspace->heap.sorted[i]->limit;
     }

     if (hash == Qnil) {
@@ -3355,7 +3553,7 @@ gc_stat(int argc, VALUE *argv, VALUE self)
     VALUE hash;

     if (rb_scan_args(argc, argv, "01", &hash) == 1) {
-        if (TYPE(hash) != T_HASH)
+        if (!RB_TYPE_P(hash, T_HASH))
             rb_raise(rb_eTypeError, "non-hash given");
     }

@@ -3410,6 +3608,33 @@ gc_malloc_allocations(VALUE self)
 }
 #endif

+/*
+ *  call-seq:
+ *     GC::Profiler.raw_data -> [Hash, ...]
+ *
+ *  Returns an Array of individual raw profile data Hashes ordered
+ *  from earliest to latest by <tt>:GC_INVOKE_TIME</tt>.  For example:
+ *
+ *    [{:GC_TIME=>1.3000000000000858e-05,
+ *      :GC_INVOKE_TIME=>0.010634999999999999,
+ *      :HEAP_USE_SIZE=>289640,
+ *      :HEAP_TOTAL_SIZE=>588960,
+ *      :HEAP_TOTAL_OBJECTS=>14724,
+ *      :GC_IS_MARKED=>false},
+ *      ...
+ *    ]
+ *
+ *  The keys mean:
+ *
+ *  +:GC_TIME+:: Time taken for this run in milliseconds
+ *  +:GC_INVOKE_TIME+:: Time the GC was invoked since startup in seconds
+ *  +:HEAP_USE_SIZE+:: Bytes of heap used
+ *  +:HEAP_TOTAL_SIZE+:: Size of heap in bytes
+ *  +:HEAP_TOTAL_OBJECTS+:: Number of objects
+ *  +:GC_IS_MARKED+:: Is the GC in the mark phase
+ *
+ */
+
 static VALUE
 gc_profile_record_get(void)
 {
@@ -3602,6 +3827,7 @@ Init_GC(void)
     rb_mProfiler = rb_define_module_under(rb_mGC, "Profiler");
     rb_define_singleton_method(rb_mProfiler, "enabled?", gc_profile_enable_get, 0);
     rb_define_singleton_method(rb_mProfiler, "enable", gc_profile_enable, 0);
+    rb_define_singleton_method(rb_mProfiler, "raw_data", gc_profile_record_get, 0);
     rb_define_singleton_method(rb_mProfiler, "disable", gc_profile_disable, 0);
     rb_define_singleton_method(rb_mProfiler, "clear", gc_profile_clear, 0);
     rb_define_singleton_method(rb_mProfiler, "result", gc_profile_result, 0);

## 06-st_opt_sparse_array.diff
diff --git a/class.c b/class.c
index 450c334..6e97cb0 100644
--- a/class.c
+++ b/class.c
@@ -31,7 +31,7 @@
 #include "internal.h"
 #include <ctype.h>

-extern st_table *rb_class_tbl;
+extern sa_table rb_class_tbl;
 static ID id_attached;

 /**
@@ -50,14 +50,24 @@ static VALUE
 class_alloc(VALUE flags, VALUE klass)
 {
     rb_classext_t *ext = ALLOC(rb_classext_t);
+    rb_class_cache_t *cache = ALLOC(rb_class_cache_t);
     NEWOBJ(obj, struct RClass);
     OBJSETUP(obj, klass, flags);
     obj->ptr = ext;
-    RCLASS_IV_TBL(obj) = 0;
-    RCLASS_CONST_TBL(obj) = 0;
-    RCLASS_M_TBL(obj) = 0;
-    RCLASS_SUPER(obj) = 0;
-    RCLASS_IV_INDEX_TBL(obj) = 0;
+    obj->cache = cache;
+    MEMZERO(ext, struct rb_classext_struct, 1);
+    MEMZERO(cache, struct rb_class_cache_struct, 1);
+    return (VALUE)obj;
+}
+
+static VALUE
+iclass_alloc()
+{
+    rb_class_cache_t *cache = ALLOC(rb_class_cache_t);
+    NEWOBJ(obj, struct RClass);
+    OBJSETUP(obj, rb_cClass, T_ICLASS);
+    obj->cache = cache;
+    MEMZERO(cache, struct rb_class_cache_struct, 1);
     return (VALUE)obj;
 }

@@ -77,7 +87,6 @@ rb_class_boot(VALUE super)
     VALUE klass = class_alloc(T_CLASS, rb_cClass);

     RCLASS_SUPER(klass) = super;
-    RCLASS_M_TBL(klass) = st_init_numtable();

     OBJ_INFECT(klass, super);
     return (VALUE)klass;
@@ -120,85 +129,59 @@ rb_class_new(VALUE super)
     return rb_class_boot(super);
 }

-struct clone_method_data {
-    st_table *tbl;
-    VALUE klass;
-};
-
 VALUE rb_iseq_clone(VALUE iseqval, VALUE newcbase);

-static int
-clone_method(ID mid, const rb_method_entry_t *me, struct clone_method_data *data)
+static void
+clone_method(ID mid, const rb_method_entry_t *me, VALUE klass)
 {
     VALUE newiseqval;
     if (me->def && me->def->type == VM_METHOD_TYPE_ISEQ) {
 	rb_iseq_t *iseq;
-	newiseqval = rb_iseq_clone(me->def->body.iseq->self, data->klass);
+	newiseqval = rb_iseq_clone(me->def->body.iseq->self, klass);
 	GetISeqPtr(newiseqval, iseq);
-	rb_add_method(data->klass, mid, VM_METHOD_TYPE_ISEQ, iseq, me->flag);
+	rb_add_method(klass, mid, VM_METHOD_TYPE_ISEQ, iseq, me->flag);
 	RB_GC_GUARD(newiseqval);
     }
     else {
-	rb_method_entry_set(data->klass, mid, me, me->flag);
+	rb_method_entry_set(klass, mid, me, me->flag);
     }
-    return ST_CONTINUE;
 }

-static int
-clone_const(ID key, const rb_const_entry_t *ce, st_table *tbl)
+static void
+clone_const(sa_index_t key, st_data_t ce, sa_table *tbl)
 {
     rb_const_entry_t *nce = ALLOC(rb_const_entry_t);
-    *nce = *ce;
-    st_insert(tbl, key, (st_data_t)nce);
-    return ST_CONTINUE;
-}
-
-static int
-clone_const_i(st_data_t key, st_data_t value, st_data_t data)
-{
-    return clone_const((ID)key, (const rb_const_entry_t *)value, (st_table *)data);
+    *nce = *(const rb_const_entry_t*)ce;
+    sa_insert(tbl, (sa_index_t)key, (st_data_t)nce);
 }

 /* :nodoc: */
 VALUE
 rb_mod_init_copy(VALUE clone, VALUE orig)
 {
+    ID id;
     rb_obj_init_copy(clone, orig);
     if (!FL_TEST(CLASS_OF(clone), FL_SINGLETON)) {
 	RBASIC(clone)->klass = rb_singleton_class_clone(orig);
 	rb_singleton_class_attached(RBASIC(clone)->klass, (VALUE)clone);
     }
     RCLASS_SUPER(clone) = RCLASS_SUPER(orig);
-    if (RCLASS_IV_TBL(orig)) {
-	st_data_t id;

-	if (RCLASS_IV_TBL(clone)) {
-	    st_free_table(RCLASS_IV_TBL(clone));
-	}
-	RCLASS_IV_TBL(clone) = st_copy(RCLASS_IV_TBL(orig));
-	CONST_ID(id, "__classpath__");
-	st_delete(RCLASS_IV_TBL(clone), &id, 0);
-	CONST_ID(id, "__classid__");
-	st_delete(RCLASS_IV_TBL(clone), &id, 0);
-    }
-    if (RCLASS_CONST_TBL(orig)) {
-	if (RCLASS_CONST_TBL(clone)) {
-	    rb_free_const_table(RCLASS_CONST_TBL(clone));
-	}
-	RCLASS_CONST_TBL(clone) = st_init_numtable();
-	st_foreach(RCLASS_CONST_TBL(orig), clone_const_i, (st_data_t)RCLASS_CONST_TBL(clone));
-    }
-    if (RCLASS_M_TBL(orig)) {
-	struct clone_method_data data;
+    sa_copy_to(RCLASS_IV_TBL(orig), RCLASS_IV_TBL(clone));
+    CONST_ID(id, "__classpath__");
+    sa_delete(RCLASS_IV_TBL(clone), (sa_index_t)id, 0);
+    CONST_ID(id, "__classid__");
+    sa_delete(RCLASS_IV_TBL(clone), (sa_index_t)id, 0);

-	if (RCLASS_M_TBL(clone)) {
-	    rb_free_m_table(RCLASS_M_TBL(clone));
-	}
-	data.tbl = RCLASS_M_TBL(clone) = st_init_numtable();
-	data.klass = clone;
-	st_foreach(RCLASS_M_TBL(orig), clone_method,
-		   (st_data_t)&data);
-    }
+    sa_clear(RCLASS_CONST_TBL(clone));
+    SA_FOREACH_START(RCLASS_CONST_TBL(orig));
+    clone_const(entry->key, value, RCLASS_CONST_TBL(clone));
+    SA_FOREACH_END();
+
+    rb_free_m_table(RCLASS_M_TBL(clone));
+    SA_FOREACH_START(RCLASS_M_TBL(orig));
+    clone_method(entry->key, (const rb_method_entry_t *)value, clone);
+    SA_FOREACH_END();

     return clone;
 }
@@ -227,7 +210,6 @@ rb_singleton_class_clone(VALUE obj)
     if (!FL_TEST(klass, FL_SINGLETON))
 	return klass;
     else {
-	struct clone_method_data data;
 	/* copy singleton(unnamed) class */
 	VALUE clone = class_alloc((RBASIC(klass)->flags & ~(FL_MARK)), 0);

@@ -239,18 +221,16 @@ rb_singleton_class_clone(VALUE obj)
 	}

 	RCLASS_SUPER(clone) = RCLASS_SUPER(klass);
-	if (RCLASS_IV_TBL(klass)) {
-	    RCLASS_IV_TBL(clone) = st_copy(RCLASS_IV_TBL(klass));
-	}
-	if (RCLASS_CONST_TBL(klass)) {
-	    RCLASS_CONST_TBL(clone) = st_init_numtable();
-	    st_foreach(RCLASS_CONST_TBL(klass), clone_const_i, (st_data_t)RCLASS_CONST_TBL(clone));
-	}
-	RCLASS_M_TBL(clone) = st_init_numtable();
-	data.tbl = RCLASS_M_TBL(clone);
-	data.klass = (VALUE)clone;
-	st_foreach(RCLASS_M_TBL(klass), clone_method,
-		   (st_data_t)&data);
+        sa_copy_to(RCLASS_IV_TBL(klass), RCLASS_IV_TBL(clone));
+
+        SA_FOREACH_START(RCLASS_CONST_TBL(klass));
+        clone_const(entry->key, value, RCLASS_CONST_TBL(clone));
+        SA_FOREACH_END();
+
+        SA_FOREACH_START(RCLASS_M_TBL(klass));
+        clone_method(entry->key, (const rb_method_entry_t*)value, clone);
+        SA_FOREACH_END();
+
 	rb_singleton_class_attached(RBASIC(clone)->klass, (VALUE)clone);
 	FL_SET(clone, FL_SINGLETON);
 	return (VALUE)clone;
@@ -265,10 +245,7 @@ void
 rb_singleton_class_attached(VALUE klass, VALUE obj)
 {
     if (FL_TEST(klass, FL_SINGLETON)) {
-	if (!RCLASS_IV_TBL(klass)) {
-	    RCLASS_IV_TBL(klass) = st_init_numtable();
-	}
-	st_insert(RCLASS_IV_TBL(klass), id_attached, obj);
+	sa_insert(RCLASS_IV_TBL(klass), (sa_index_t)id_attached, obj);
     }
 }

@@ -355,12 +332,12 @@ make_singleton_class(VALUE obj)
 static VALUE
 boot_defclass(const char *name, VALUE super)
 {
-    extern st_table *rb_class_tbl;
+    extern sa_table rb_class_tbl;
     VALUE obj = rb_class_boot(super);
     ID id = rb_intern(name);

     rb_name_class(obj, id);
-    st_add_direct(rb_class_tbl, id, obj);
+    sa_insert(&rb_class_tbl, (sa_index_t)id, obj);
     rb_const_set((rb_cObject ? rb_cObject : obj), id, obj);
     return obj;
 }
@@ -484,7 +461,7 @@ rb_define_class(const char *name, VALUE super)
 	rb_warn("no super class for `%s', Object assumed", name);
     }
     klass = rb_define_class_id(id, super);
-    st_add_direct(rb_class_tbl, id, klass);
+    sa_insert(&rb_class_tbl, (sa_index_t)id, klass);
     rb_name_class(klass, id);
     rb_const_set(rb_cObject, id, klass);
     rb_class_inherited(super, klass);
@@ -565,8 +542,6 @@ rb_module_new(void)
 {
     VALUE mdl = class_alloc(T_MODULE, rb_cModule);

-    RCLASS_M_TBL(mdl) = st_init_numtable();
-
     return (VALUE)mdl;
 }

@@ -595,7 +570,7 @@ rb_define_module(const char *name)
 	rb_raise(rb_eTypeError, "%s is not a module", rb_obj_classname(module));
     }
     module = rb_define_module_id(id);
-    st_add_direct(rb_class_tbl, id, module);
+    sa_insert(&rb_class_tbl, (sa_index_t)id, module);
     rb_const_set(rb_cObject, id, module);

     return module;
@@ -630,27 +605,15 @@ rb_define_module_id_under(VALUE outer, ID id)
 static VALUE
 include_class_new(VALUE module, VALUE super)
 {
-    VALUE klass = class_alloc(T_ICLASS, rb_cClass);
+    VALUE klass;

     if (BUILTIN_TYPE(module) == T_ICLASS) {
 	module = RBASIC(module)->klass;
     }
-    if (!RCLASS_IV_TBL(module)) {
-	RCLASS_IV_TBL(module) = st_init_numtable();
-    }
-    if (!RCLASS_CONST_TBL(module)) {
-	RCLASS_CONST_TBL(module) = st_init_numtable();
-    }
-    RCLASS_IV_TBL(klass) = RCLASS_IV_TBL(module);
-    RCLASS_CONST_TBL(klass) = RCLASS_CONST_TBL(module);
-    RCLASS_M_TBL(klass) = RCLASS_M_TBL(module);
+    klass = iclass_alloc();
+    RBASIC(klass)->klass = module;
+    RCLASS_EXT(klass) = RCLASS_EXT(module);
     RCLASS_SUPER(klass) = super;
-    if (TYPE(module) == T_ICLASS) {
-	RBASIC(klass)->klass = RBASIC(module)->klass;
-    }
-    else {
-	RBASIC(klass)->klass = module;
-    }
     OBJ_INFECT(klass, module);
     OBJ_INFECT(klass, super);

@@ -677,13 +640,13 @@ rb_include_module(VALUE klass, VALUE module)
     while (module) {
 	int superclass_seen = FALSE;

-	if (RCLASS_M_TBL(klass) == RCLASS_M_TBL(module))
+	if (RCLASS_EXT(klass) == RCLASS_EXT(module))
 	    rb_raise(rb_eArgError, "cyclic include detected");
 	/* ignore if the module included already in superclasses */
 	for (p = RCLASS_SUPER(klass); p; p = RCLASS_SUPER(p)) {
 	    switch (BUILTIN_TYPE(p)) {
 	      case T_ICLASS:
-		if (RCLASS_M_TBL(p) == RCLASS_M_TBL(module)) {
+		if (RCLASS_EXT(p) == RCLASS_EXT(module)) {
 		    if (!superclass_seen) {
 			c = p;  /* move insertion point */
 		    }
@@ -696,7 +659,7 @@ rb_include_module(VALUE klass, VALUE module)
 	    }
 	}
 	c = RCLASS_SUPER(c) = include_class_new(module, RCLASS_SUPER(c));
-	if (RMODULE_M_TBL(module) && RMODULE_M_TBL(module)->num_entries)
+	if (RMODULE_M_TBL(module)->num_entries)
 	    changed = 1;
       skip:
 	module = RCLASS_SUPER(module);
@@ -827,58 +790,58 @@ ins_methods_push(ID name, long type, VALUE ary, long visi)
 }

 static int
-ins_methods_i(st_data_t name, st_data_t type, st_data_t ary)
+ins_methods_i(sa_index_t name, st_data_t type, st_data_t ary)
 {
     return ins_methods_push((ID)name, (long)type, (VALUE)ary, -1); /* everything but private */
 }

 static int
-ins_methods_prot_i(st_data_t name, st_data_t type, st_data_t ary)
+ins_methods_prot_i(sa_index_t name, st_data_t type, st_data_t ary)
 {
     return ins_methods_push((ID)name, (long)type, (VALUE)ary, NOEX_PROTECTED);
 }

 static int
-ins_methods_priv_i(st_data_t name, st_data_t type, st_data_t ary)
+ins_methods_priv_i(sa_index_t name, st_data_t type, st_data_t ary)
 {
     return ins_methods_push((ID)name, (long)type, (VALUE)ary, NOEX_PRIVATE);
 }

 static int
-ins_methods_pub_i(st_data_t name, st_data_t type, st_data_t ary)
+ins_methods_pub_i(sa_index_t name, st_data_t type, st_data_t ary)
 {
     return ins_methods_push((ID)name, (long)type, (VALUE)ary, NOEX_PUBLIC);
 }

 static int
-method_entry_i(st_data_t key, st_data_t value, st_data_t data)
+method_entry_i(sa_index_t key, st_data_t value, st_data_t data)
 {
     const rb_method_entry_t *me = (const rb_method_entry_t *)value;
-    st_table *list = (st_table *)data;
+    sa_table *list = (sa_table *)data;
     long type;

     if ((ID)key == ID_ALLOCATOR) {
 	return ST_CONTINUE;
     }

-    if (!st_lookup(list, key, 0)) {
+    if (!sa_lookup(list, key, 0)) {
 	if (UNDEFINED_METHOD_ENTRY_P(me)) {
 	    type = -1; /* none */
 	}
 	else {
 	    type = VISI(me->flag);
 	}
-	st_add_direct(list, key, type);
+	sa_insert(list, key, type);
     }
     return ST_CONTINUE;
 }

 static VALUE
-class_instance_method_list(int argc, VALUE *argv, VALUE mod, int obj, int (*func) (st_data_t, st_data_t, st_data_t))
+class_instance_method_list(int argc, VALUE *argv, VALUE mod, int obj, int (*func) (sa_index_t, st_data_t, st_data_t))
 {
     VALUE ary;
     int recur;
-    st_table *list;
+    sa_table list = SA_EMPTY_TABLE;

     if (argc == 0) {
 	recur = TRUE;
@@ -889,16 +852,15 @@ class_instance_method_list(int argc, VALUE *argv, VALUE mod, int obj, int (*func
 	recur = RTEST(r);
     }

-    list = st_init_numtable();
     for (; mod; mod = RCLASS_SUPER(mod)) {
-	st_foreach(RCLASS_M_TBL(mod), method_entry_i, (st_data_t)list);
+	sa_foreach(RCLASS_M_TBL(mod), method_entry_i, (st_data_t)&list);
 	if (BUILTIN_TYPE(mod) == T_ICLASS) continue;
 	if (obj && FL_TEST(mod, FL_SINGLETON)) continue;
 	if (!recur) break;
     }
     ary = rb_ary_new();
-    st_foreach(list, func, ary);
-    st_free_table(list);
+    sa_foreach(&list, func, ary);
+    sa_clear(&list);

     return ary;
 }
@@ -1112,7 +1074,7 @@ VALUE
 rb_obj_singleton_methods(int argc, VALUE *argv, VALUE obj)
 {
     VALUE recur, ary, klass;
-    st_table *list;
+    sa_table list = SA_EMPTY_TABLE;

     if (argc == 0) {
 	recur = Qtrue;
@@ -1121,20 +1083,19 @@ rb_obj_singleton_methods(int argc, VALUE *argv, VALUE obj)
 	rb_scan_args(argc, argv, "01", &recur);
     }
     klass = CLASS_OF(obj);
-    list = st_init_numtable();
     if (klass && FL_TEST(klass, FL_SINGLETON)) {
-	st_foreach(RCLASS_M_TBL(klass), method_entry_i, (st_data_t)list);
+	sa_foreach(RCLASS_M_TBL(klass), method_entry_i, (st_data_t)&list);
 	klass = RCLASS_SUPER(klass);
     }
     if (RTEST(recur)) {
 	while (klass && (FL_TEST(klass, FL_SINGLETON) || TYPE(klass) == T_ICLASS)) {
-	    st_foreach(RCLASS_M_TBL(klass), method_entry_i, (st_data_t)list);
+	    sa_foreach(RCLASS_M_TBL(klass), method_entry_i, (st_data_t)&list);
 	    klass = RCLASS_SUPER(klass);
 	}
     }
     ary = rb_ary_new();
-    st_foreach(list, ins_methods_i, ary);
-    st_free_table(list);
+    sa_foreach(&list, ins_methods_i, ary);
+    sa_clear(&list);

     return ary;
 }
diff --git a/common.mk b/common.mk
index c9ef641..3ccfa47 100644
--- a/common.mk
+++ b/common.mk
@@ -79,6 +79,7 @@ COMMONOBJS    = array.$(OBJEXT) \
 		safe.$(OBJEXT) \
 		signal.$(OBJEXT) \
 		sprintf.$(OBJEXT) \
+		sp_ar.$(OBJEXT) \
 		st.$(OBJEXT) \
 		strftime.$(OBJEXT) \
 		string.$(OBJEXT) \
@@ -638,7 +639,8 @@ file.$(OBJEXT): {$(VPATH)}file.c $(RUBY_H_INCLUDES) {$(VPATH)}io.h \
 gc.$(OBJEXT): {$(VPATH)}gc.c $(RUBY_H_INCLUDES) {$(VPATH)}re.h \
   {$(VPATH)}regex.h $(ENCODING_H_INCLUDES) $(VM_CORE_H_INCLUDES) \
   {$(VPATH)}gc.h {$(VPATH)}io.h {$(VPATH)}eval_intern.h {$(VPATH)}util.h \
-  {$(VPATH)}debug.h {$(VPATH)}internal.h {$(VPATH)}constant.h
+  {$(VPATH)}debug.h {$(VPATH)}internal.h {$(VPATH)}constant.h \
+  {$(VPATH)}pool_alloc.inc.h {$(VPATH)}pool_alloc.h
 hash.$(OBJEXT): {$(VPATH)}hash.c $(RUBY_H_INCLUDES) {$(VPATH)}util.h \
   $(ENCODING_H_INCLUDES)
 inits.$(OBJEXT): {$(VPATH)}inits.c $(RUBY_H_INCLUDES) \
@@ -702,7 +704,8 @@ signal.$(OBJEXT): {$(VPATH)}signal.c $(RUBY_H_INCLUDES) \
   $(VM_CORE_H_INCLUDES) {$(VPATH)}debug.h
 sprintf.$(OBJEXT): {$(VPATH)}sprintf.c $(RUBY_H_INCLUDES) {$(VPATH)}re.h \
   {$(VPATH)}regex.h {$(VPATH)}vsnprintf.c $(ENCODING_H_INCLUDES)
-st.$(OBJEXT): {$(VPATH)}st.c $(RUBY_H_INCLUDES)
+sp_ar.$(OBJEXT): {$(VPATH)}sp_ar.c $(RUBY_H_INCLUDES)
+st.$(OBJEXT): {$(VPATH)}st.c $(RUBY_H_INCLUDES) {$(VPATH)}pool_alloc.h
 strftime.$(OBJEXT): {$(VPATH)}strftime.c $(RUBY_H_INCLUDES) \
   {$(VPATH)}timev.h
 string.$(OBJEXT): {$(VPATH)}string.c $(RUBY_H_INCLUDES) {$(VPATH)}re.h \
diff --git a/configure.in b/configure.in
index b006a01..a3ccad7 100644
--- a/configure.in
+++ b/configure.in
@@ -1324,6 +1324,29 @@ main() {
 CFLAGS="$save_CFLAGS"])
 AC_DEFINE_UNQUOTED(GC_MARK_STACKFRAME_WORD, $rb_cv_gc_mark_stackframe_word)

+AS_CASE(["$target_os"],
+[openbsd*], [
+  AC_CACHE_CHECK(for heap align log on openbsd, rb_cv_page_size_log,
+    [rb_cv_page_size_log=no
+     for page_log in 12 13; do
+       AC_TRY_RUN([
+#include <math.h>
+#include <unistd.h>
+
+int
+main() {
+  if ((int)log2((double)sysconf(_SC_PAGESIZE)) != $page_log) return 1;
+  return 0;
+}
+       ],
+       rb_cv_page_size_log="$page_log"; break)
+     done])
+  if test $rb_cv_page_size_log != no; then
+    AC_DEFINE_UNQUOTED(HEAP_ALIGN_LOG, $rb_cv_page_size_log)
+  else
+    AC_DEFINE_UNQUOTED(HEAP_ALIGN_LOG, 12)
+  fi
+])

 dnl Checks for library functions.
 AC_TYPE_GETGROUPS
@@ -1424,7 +1447,8 @@ AC_CHECK_FUNCS(fmod killpg wait4 waitpid fork spawnv syscall __syscall chroot ge
 	      setsid telldir seekdir fchmod cosh sinh tanh log2 round\
 	      setuid setgid daemon select_large_fdset setenv unsetenv\
               mktime timegm gmtime_r clock_gettime gettimeofday poll ppoll\
-              pread sendfile shutdown sigaltstack dl_iterate_phdr)
+              pread sendfile shutdown sigaltstack dl_iterate_phdr\
+              dup3 pipe2 posix_memalign memalign)

 AC_CACHE_CHECK(for unsetenv returns a value, rb_cv_unsetenv_return_value,
   [AC_TRY_COMPILE([
diff --git a/constant.h b/constant.h
index 8232910..9106847 100644
--- a/constant.h
+++ b/constant.h
@@ -23,7 +23,7 @@ typedef struct rb_const_entry_struct {

 VALUE rb_mod_private_constant(int argc, VALUE *argv, VALUE obj);
 VALUE rb_mod_public_constant(int argc, VALUE *argv, VALUE obj);
-void rb_free_const_table(st_table *tbl);
+void rb_free_const_table(sa_table *tbl);
 VALUE rb_public_const_get(VALUE klass, ID id);
 VALUE rb_public_const_get_at(VALUE klass, ID id);
 VALUE rb_public_const_get_from(VALUE klass, ID id);
diff --git a/ext/-test-/st/numhash/numhash.c b/ext/-test-/st/numhash/numhash.c
index e186cd4..53d9e1b 100644
--- a/ext/-test-/st/numhash/numhash.c
+++ b/ext/-test-/st/numhash/numhash.c
@@ -54,7 +54,7 @@ numhash_i(st_data_t key, st_data_t value, st_data_t arg, int error)
 static VALUE
 numhash_each(VALUE self)
 {
-    return st_foreach((st_table *)DATA_PTR(self), numhash_i, self) ? Qtrue : Qfalse;
+    return st_foreach_check((st_table *)DATA_PTR(self), numhash_i, self, 0) ? Qtrue : Qfalse;
 }

 void
diff --git a/ext/objspace/objspace.c b/ext/objspace/objspace.c
index 65e0d84..891e058 100644
--- a/ext/objspace/objspace.c
+++ b/ext/objspace/objspace.c
@@ -60,20 +60,10 @@ memsize_of(VALUE obj)
 	break;
       case T_MODULE:
       case T_CLASS:
-	size += st_memsize(RCLASS_M_TBL(obj));
-	if (RCLASS_IV_TBL(obj)) {
-	    size += st_memsize(RCLASS_IV_TBL(obj));
-	}
-	if (RCLASS_IV_INDEX_TBL(obj)) {
-	    size += st_memsize(RCLASS_IV_INDEX_TBL(obj));
-	}
-	if (RCLASS(obj)->ptr->iv_tbl) {
-	    size += st_memsize(RCLASS(obj)->ptr->iv_tbl);
-	}
-	if (RCLASS(obj)->ptr->const_tbl) {
-	    size += st_memsize(RCLASS(obj)->ptr->const_tbl);
-	}
-	size += sizeof(rb_classext_t);
+	size += sa_memsize(RCLASS_M_TBL(obj));
+        size += sa_memsize(RCLASS_IV_TBL(obj));
+        size += sa_memsize(RCLASS_IV_INDEX_TBL(obj));
+        size += sa_memsize(RCLASS_CONST_TBL(obj));
 	break;
       case T_STRING:
 	size += rb_str_memsize(obj);
diff --git a/gc.c b/gc.c
index 064f9a8..cdea1e6 100644
--- a/gc.c
+++ b/gc.c
@@ -20,10 +20,12 @@
 #include "vm_core.h"
 #include "internal.h"
 #include "gc.h"
+#include "pool_alloc.h"
 #include "constant.h"
 #include <stdio.h>
 #include <setjmp.h>
 #include <sys/types.h>
+#include <assert.h>

 #ifdef HAVE_SYS_TIME_H
 #include <sys/time.h>
@@ -35,7 +37,12 @@

 #if defined _WIN32 || defined __CYGWIN__
 #include <windows.h>
+#elif defined(HAVE_POSIX_MEMALIGN)
+#elif defined(HAVE_MEMALIGN)
+#include <malloc.h>
 #endif
+static void aligned_free(void *);
+static void *aligned_malloc(size_t alignment, size_t size);

 #ifdef HAVE_VALGRIND_MEMCHECK_H
 # include <valgrind/memcheck.h>
@@ -321,6 +328,24 @@ struct gc_list {

 #define CALC_EXACT_MALLOC_SIZE 0

+#ifdef POOL_ALLOC_API
+/* POOL ALLOC API */
+#define POOL_ALLOC_PART 1
+#include "pool_alloc.inc.h"
+#undef POOL_ALLOC_PART
+
+typedef struct pool_layout_t pool_layout_t;
+struct pool_layout_t {
+    pool_header
+      p6,  /* st_table && st_table_entry */
+      p11;  /* st_table.bins init size */
+} pool_layout = {
+    INIT_POOL(void*[6]),
+    INIT_POOL(void*[11])
+};
+static void pool_finalize_header(pool_header *header);
+#endif
+
 typedef struct rb_objspace {
     struct {
 	size_t limit;
@@ -330,6 +355,9 @@ typedef struct rb_objspace {
 	size_t allocations;
 #endif
     } malloc_params;
+#ifdef POOL_ALLOC_API
+    pool_layout_t *pool_headers;
+#endif
     struct {
 	size_t increment;
 	struct heaps_slot *ptr;
@@ -377,7 +405,11 @@ typedef struct rb_objspace {
 #define ruby_initial_gc_stress	initial_params.gc_stress
 int *ruby_initial_gc_stress_ptr = &ruby_initial_gc_stress;
 #else
+#  ifdef POOL_ALLOC_API
+static rb_objspace_t rb_objspace = {{GC_MALLOC_LIMIT}, &pool_layout, {HEAP_MIN_SLOTS}};
+#  else
 static rb_objspace_t rb_objspace = {{GC_MALLOC_LIMIT}, {HEAP_MIN_SLOTS}};
+#  endif
 int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress;
 #endif
 #define malloc_limit		objspace->malloc_params.limit
@@ -413,6 +445,10 @@ rb_objspace_alloc(void)
     memset(objspace, 0, sizeof(*objspace));
     malloc_limit = initial_malloc_limit;
     ruby_gc_stress = ruby_initial_gc_stress;
+#ifdef POOL_ALLOC_API
+    objspace->pool_headers = (pool_layout_t*) malloc(sizeof(pool_layout));
+    memcpy(objspace->pool_headers, &pool_layout, sizeof(pool_layout));
+#endif

     return objspace;
 }
@@ -491,6 +527,13 @@ rb_objspace_free(rb_objspace_t *objspace)
 	heaps_used = 0;
 	heaps = 0;
     }
+#ifdef POOL_ALLOC_API
+    if (objspace->pool_headers) {
+        pool_finalize_header(&objspace->pool_headers->p6);
+        pool_finalize_header(&objspace->pool_headers->p11);
+        free(objspace->pool_headers);
+    }
+#endif
     free(objspace);
 }
 #endif
@@ -513,7 +556,7 @@ rb_objspace_free(rb_objspace_t *objspace)

 #define HEAP_OBJ_LIMIT (unsigned int)(HEAP_SIZE / sizeof(struct RVALUE))

-extern st_table *rb_class_tbl;
+extern sa_table rb_class_tbl;

 int ruby_disable_gc_stress = 0;

@@ -894,6 +937,27 @@ ruby_xfree(void *x)
 	vm_xfree(&rb_objspace, x);
 }

+#ifdef POOL_ALLOC_API
+/* POOL ALLOC API */
+#define POOL_ALLOC_PART 2
+#include "pool_alloc.inc.h"
+#undef POOL_ALLOC_PART
+
+void
+ruby_xpool_free(void *ptr)
+{
+    pool_free_entry((void**)ptr);
+}
+
+#define CONCRET_POOL_MALLOC(pnts) \
+void * ruby_xpool_malloc_##pnts##p () { \
+    return pool_alloc_entry(&rb_objspace.pool_headers->p##pnts ); \
+}
+CONCRET_POOL_MALLOC(6)
+CONCRET_POOL_MALLOC(11)
+#undef CONCRET_POOL_MALLOC
+
+#endif

 /*
  *  call-seq:
@@ -1008,6 +1072,55 @@ allocate_sorted_heaps(rb_objspace_t *objspace, size_t next_heaps_length)
     heaps_length = next_heaps_length;
 }

+static void *
+aligned_malloc(size_t alignment, size_t size)
+{
+    void *res;
+
+#if defined __MINGW32__
+    res = __mingw_aligned_malloc(size, alignment);
+#elif defined _WIN32 && !defined __CYGWIN__
+    res = _aligned_malloc(size, alignment);
+#elif defined(HAVE_POSIX_MEMALIGN)
+    if (posix_memalign(&res, alignment, size) == 0) {
+        return res;
+    }
+    else {
+        return NULL;
+    }
+#elif defined(HAVE_MEMALIGN)
+    res = memalign(alignment, size);
+#else
+    char* aligned;
+    res = malloc(alignment + size + sizeof(void*));
+    aligned = (char*)res + alignment + sizeof(void*);
+    aligned -= ((VALUE)aligned & (alignment - 1));
+    ((void**)aligned)[-1] = res;
+    res = (void*)aligned;
+#endif
+
+#if defined(_DEBUG) || defined(GC_DEBUG)
+    /* alignment must be a power of 2 */
+    assert((alignment - 1) & alignment == 0);
+    assert(alignment % sizeof(void*) == 0);
+#endif
+    return res;
+}
+
+static void
+aligned_free(void *ptr)
+{
+#if defined __MINGW32__
+    __mingw_aligned_free(ptr);
+#elif defined _WIN32 && !defined __CYGWIN__
+    _aligned_free(ptr);
+#elif defined(HAVE_MEMALIGN) || defined(HAVE_POSIX_MEMALIGN)
+    free(ptr);
+#else
+    free(((void**)ptr)[-1]);
+#endif
+}
+
 static void
 assign_heap_slot(rb_objspace_t *objspace)
 {
@@ -1466,6 +1579,15 @@ mark_tbl(rb_objspace_t *objspace, st_table *tbl, int lev)
     st_foreach(tbl, mark_entry, (st_data_t)&arg);
 }

+static void
+mark_sa_tbl(rb_objspace_t *objspace, sa_table *tbl, int lev)
+{
+    if (!tbl) return;
+    SA_FOREACH_START(tbl);
+    gc_mark(objspace, (VALUE)value, lev);
+    SA_FOREACH_END();
+}
+
 static int
 mark_key(VALUE key, VALUE value, st_data_t data)
 {
@@ -1544,74 +1666,52 @@ rb_mark_method_entry(const rb_method_entry_t *me)
     mark_method_entry(&rb_objspace, me, 0);
 }

-static int
-mark_method_entry_i(ID key, const rb_method_entry_t *me, st_data_t data)
-{
-    struct mark_tbl_arg *arg = (void*)data;
-    mark_method_entry(arg->objspace, me, arg->lev);
-    return ST_CONTINUE;
-}
-
 static void
-mark_m_tbl(rb_objspace_t *objspace, st_table *tbl, int lev)
-{
-    struct mark_tbl_arg arg;
-    if (!tbl) return;
-    arg.objspace = objspace;
-    arg.lev = lev;
-    st_foreach(tbl, mark_method_entry_i, (st_data_t)&arg);
-}
-
-static int
-free_method_entry_i(ID key, rb_method_entry_t *me, st_data_t data)
+mark_m_tbl(rb_objspace_t *objspace, sa_table *tbl, int lev)
 {
-    rb_free_method_entry(me);
-    return ST_CONTINUE;
+    SA_FOREACH_START(tbl);
+    mark_method_entry(objspace, (const rb_method_entry_t*)value, lev);
+    SA_FOREACH_END();
 }

 void
-rb_free_m_table(st_table *tbl)
+rb_free_m_table(sa_table *tbl)
 {
-    st_foreach(tbl, free_method_entry_i, 0);
-    st_free_table(tbl);
-}
-
-static int
-mark_const_entry_i(ID key, const rb_const_entry_t *ce, st_data_t data)
-{
-    struct mark_tbl_arg *arg = (void*)data;
-    gc_mark(arg->objspace, ce->value, arg->lev);
-    return ST_CONTINUE;
+    SA_FOREACH_START(tbl);
+    if (!((rb_method_entry_t*)value)->mark) {
+	rb_free_method_entry((rb_method_entry_t*)value);
+    }
+    SA_FOREACH_END();
+    sa_clear(tbl);
 }

 static void
-mark_const_tbl(rb_objspace_t *objspace, st_table *tbl, int lev)
+mark_const_tbl(rb_objspace_t *objspace, sa_table *tbl, int lev)
 {
-    struct mark_tbl_arg arg;
-    if (!tbl) return;
-    arg.objspace = objspace;
-    arg.lev = lev;
-    st_foreach(tbl, mark_const_entry_i, (st_data_t)&arg);
+    SA_FOREACH_START(tbl);
+    gc_mark(objspace, ((const rb_const_entry_t*)value)->value, lev);
+    SA_FOREACH_END();
 }

-static int
-free_const_entry_i(ID key, rb_const_entry_t *ce, st_data_t data)
+void
+rb_free_const_table(sa_table *tbl)
 {
-    xfree(ce);
-    return ST_CONTINUE;
+    SA_FOREACH_START(tbl);
+    xfree((rb_const_entry_t*)value);
+    SA_FOREACH_END();
+    sa_clear(tbl);
 }

 void
-rb_free_const_table(st_table *tbl)
+rb_mark_tbl(st_table *tbl)
 {
-    st_foreach(tbl, free_const_entry_i, 0);
-    st_free_table(tbl);
+    mark_tbl(&rb_objspace, tbl, 0);
 }

 void
-rb_mark_tbl(st_table *tbl)
+rb_mark_sa_tbl(sa_table *tbl)
 {
-    mark_tbl(&rb_objspace, tbl, 0);
+    mark_sa_tbl(&rb_objspace, tbl, 0);
 }

 void
@@ -1819,7 +1919,7 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev)
       case T_CLASS:
       case T_MODULE:
 	mark_m_tbl(objspace, RCLASS_M_TBL(obj), lev);
-	mark_tbl(objspace, RCLASS_IV_TBL(obj), lev);
+	mark_sa_tbl(objspace, RCLASS_IV_TBL(obj), lev);
 	mark_const_tbl(objspace, RCLASS_CONST_TBL(obj), lev);
 	ptr = RCLASS_SUPER(obj);
 	goto again;
@@ -2286,15 +2386,11 @@ obj_free(rb_objspace_t *objspace, VALUE obj)
       case T_CLASS:
 	rb_clear_cache_by_class((VALUE)obj);
 	rb_free_m_table(RCLASS_M_TBL(obj));
-	if (RCLASS_IV_TBL(obj)) {
-	    st_free_table(RCLASS_IV_TBL(obj));
-	}
-	if (RCLASS_CONST_TBL(obj)) {
-	    rb_free_const_table(RCLASS_CONST_TBL(obj));
-	}
-	if (RCLASS_IV_INDEX_TBL(obj)) {
-	    st_free_table(RCLASS_IV_INDEX_TBL(obj));
-	}
+        sa_clear(RCLASS_IV_TBL(obj));
+        rb_free_const_table(RCLASS_CONST_TBL(obj));
+        sa_clear(RCLASS_IV_INDEX_TBL(obj));
+        sa_clear(&RCLASS(obj)->cache->m_cache_tbl);
+        xfree(RCLASS(obj)->cache);
         xfree(RANY(obj)->as.klass.ptr);
 	break;
       case T_STRING:
@@ -2346,8 +2442,9 @@ obj_free(rb_objspace_t *objspace, VALUE obj)
       case T_COMPLEX:
 	break;
       case T_ICLASS:
+        sa_clear(&RCLASS(obj)->cache->m_cache_tbl);
+        xfree(RCLASS(obj)->cache);
 	/* iClass shares table with the module */
-	xfree(RANY(obj)->as.klass.ptr);
 	break;

       case T_FLOAT:
@@ -2458,7 +2555,7 @@ gc_marks(rb_objspace_t *objspace)
     rb_mark_end_proc();
     rb_gc_mark_global_tbl();

-    mark_tbl(objspace, rb_class_tbl, 0);
+    mark_sa_tbl(objspace, &rb_class_tbl, 0);

     /* mark generic instance variables for special constants */
     rb_mark_generic_ivar_tbl();
diff --git a/hash.c b/hash.c
index 64c5d94..66023f5 100644
--- a/hash.c
+++ b/hash.c
@@ -44,7 +44,7 @@ rb_any_cmp(VALUE a, VALUE b)
     if (FIXNUM_P(a) && FIXNUM_P(b)) {
 	return a != b;
     }
-    if (TYPE(a) == T_STRING && RBASIC(a)->klass == rb_cString &&
+    if (RB_TYPE_P(a, T_STRING) && RBASIC(a)->klass == rb_cString &&
 	TYPE(b) == T_STRING && RBASIC(b)->klass == rb_cString) {
 	return rb_str_hash_cmp(a, b);
     }
@@ -80,20 +80,14 @@ rb_any_hash(VALUE a)
     VALUE hval;
     st_index_t hnum;

-    switch (TYPE(a)) {
-      case T_FIXNUM:
-      case T_SYMBOL:
-      case T_NIL:
-      case T_FALSE:
-      case T_TRUE:
-	hnum = rb_hash_end(rb_hash_start((unsigned int)a));
-	break;
-
-      case T_STRING:
+    if (SPECIAL_CONST_P(a)) {
+	if (a == Qundef) return 0;
+	hnum = rb_hash_end(rb_hash_start((st_index_t)a));
+    }
+    else if (BUILTIN_TYPE(a) == T_STRING) {
 	hnum = rb_str_hash(a);
-	break;
-
-      default:
+    }
+    else {
         hval = rb_hash(a);
 	hnum = FIX2LONG(hval);
     }
@@ -106,10 +100,8 @@ static const struct st_hash_type objhash = {
     rb_any_hash,
 };

-static const struct st_hash_type identhash = {
-    st_numcmp,
-    st_numhash,
-};
+extern const struct st_hash_type st_hashtype_num;
+#define identhash st_hashtype_num

 typedef int st_foreach_func(st_data_t, st_data_t, st_data_t);

@@ -124,7 +116,6 @@ foreach_safe_i(st_data_t key, st_data_t value, struct foreach_safe_arg *arg)
 {
     int status;

-    if (key == Qundef) return ST_CONTINUE;
     status = (*arg->func)(key, value, arg->arg);
     if (status == ST_CONTINUE) {
 	return ST_CHECK;
@@ -140,7 +131,7 @@ st_foreach_safe(st_table *table, int (*func)(ANYARGS), st_data_t a)
     arg.tbl = table;
     arg.func = (st_foreach_func *)func;
     arg.arg = a;
-    if (st_foreach(table, foreach_safe_i, (st_data_t)&arg)) {
+    if (st_foreach_check(table, foreach_safe_i, (st_data_t)&arg, 0)) {
 	rb_raise(rb_eRuntimeError, "hash modified during iteration");
     }
 }
@@ -154,21 +145,21 @@ struct hash_foreach_arg {
 };

 static int
-hash_foreach_iter(st_data_t key, st_data_t value, struct hash_foreach_arg *arg)
+hash_foreach_iter(st_data_t key, st_data_t value, st_data_t argp)
 {
+    struct hash_foreach_arg *arg = (struct hash_foreach_arg *)argp;
     int status;
     st_table *tbl;

     tbl = RHASH(arg->hash)->ntbl;
-    if ((VALUE)key == Qundef) return ST_CONTINUE;
     status = (*arg->func)((VALUE)key, (VALUE)value, arg->arg);
     if (RHASH(arg->hash)->ntbl != tbl) {
 	rb_raise(rb_eRuntimeError, "rehash occurred during iteration");
     }
     switch (status) {
       case ST_DELETE:
-	st_delete_safe(tbl, &key, 0, Qundef);
 	FL_SET(arg->hash, HASH_DELETED);
+	return ST_DELETE;
       case ST_CONTINUE:
 	break;
       case ST_STOP:
@@ -184,7 +175,7 @@ hash_foreach_ensure(VALUE hash)

     if (RHASH(hash)->iter_lev == 0) {
 	if (FL_TEST(hash, HASH_DELETED)) {
-	    st_cleanup_safe(RHASH(hash)->ntbl, Qundef);
+	    st_cleanup_safe(RHASH(hash)->ntbl, (st_data_t)Qundef);
 	    FL_UNSET(hash, HASH_DELETED);
 	}
     }
@@ -192,9 +183,10 @@ hash_foreach_ensure(VALUE hash)
 }

 static VALUE
-hash_foreach_call(struct hash_foreach_arg *arg)
+hash_foreach_call(VALUE arg)
 {
-    if (st_foreach(RHASH(arg->hash)->ntbl, hash_foreach_iter, (st_data_t)arg)) {
+    VALUE hash = ((struct hash_foreach_arg *)arg)->hash;
+    if (st_foreach_check(RHASH(hash)->ntbl, hash_foreach_iter, (st_data_t)arg, (st_data_t)Qundef)) {
 	rb_raise(rb_eRuntimeError, "hash modified during iteration");
     }
     return Qnil;
@@ -447,7 +439,7 @@ rb_hash_rehash_i(VALUE key, VALUE value, VALUE arg)
 {
     st_table *tbl = (st_table *)arg;

-    if (key != Qundef) st_insert(tbl, key, value);
+    st_insert(tbl, (st_data_t)key, (st_data_t)value);
     return ST_CONTINUE;
 }

@@ -490,6 +482,20 @@ rb_hash_rehash(VALUE hash)
     return hash;
 }

+static VALUE
+hash_default_value(VALUE hash, VALUE key)
+{
+    if (rb_method_basic_definition_p(CLASS_OF(hash), id_default)) {
+	VALUE ifnone = RHASH_IFNONE(hash);
+	if (!FL_TEST(hash, HASH_PROC_DEFAULT)) return ifnone;
+	if (key == Qundef) return Qnil;
+	return rb_funcall(ifnone, id_yield, 2, hash, key);
+    }
+    else {
+	return rb_funcall(hash, id_default, 1, key);
+    }
+}
+
 /*
  *  call-seq:
  *     hsh[key]    ->  value
@@ -510,13 +516,7 @@ rb_hash_aref(VALUE hash, VALUE key)
     st_data_t val;

     if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) {
-	if (!FL_TEST(hash, HASH_PROC_DEFAULT) &&
-	    rb_method_basic_definition_p(CLASS_OF(hash), id_default)) {
-	    return RHASH_IFNONE(hash);
-	}
-	else {
-	    return rb_funcall(hash, id_default, 1, key);
-	}
+	return hash_default_value(hash, key);
     }
     return (VALUE)val;
 }
@@ -659,7 +659,7 @@ rb_hash_default(int argc, VALUE *argv, VALUE hash)
 static VALUE
 rb_hash_set_default(VALUE hash, VALUE ifnone)
 {
-    rb_hash_modify(hash);
+    rb_hash_modify_check(hash);
     RHASH_IFNONE(hash) = ifnone;
     FL_UNSET(hash, HASH_PROC_DEFAULT);
     return ifnone;
@@ -707,7 +707,7 @@ rb_hash_set_default_proc(VALUE hash, VALUE proc)
 {
     VALUE b;

-    rb_hash_modify(hash);
+    rb_hash_modify_check(hash);
     b = rb_check_convert_type(proc, T_DATA, "Proc", "to_proc");
     if (NIL_P(b) || !rb_obj_is_proc(b)) {
 	rb_raise(rb_eTypeError,
@@ -776,7 +776,7 @@ rb_hash_delete_key(VALUE hash, VALUE key)
     if (!RHASH(hash)->ntbl)
         return Qundef;
     if (RHASH(hash)->iter_lev > 0) {
-	if (st_delete_safe(RHASH(hash)->ntbl, &ktmp, &val, Qundef)) {
+	if (st_delete_safe(RHASH(hash)->ntbl, &ktmp, &val, (st_data_t)Qundef)) {
 	    FL_SET(hash, HASH_DELETED);
 	    return (VALUE)val;
 	}
@@ -809,7 +809,7 @@ rb_hash_delete(VALUE hash, VALUE key)
 {
     VALUE val;

-    rb_hash_modify(hash);
+    rb_hash_modify_check(hash);
     val = rb_hash_delete_key(hash, key);
     if (val != Qundef) return val;
     if (rb_block_given_p()) {
@@ -828,7 +828,6 @@ shift_i(VALUE key, VALUE value, VALUE arg)
 {
     struct shift_var *var = (struct shift_var *)arg;

-    if (key == Qundef) return ST_CONTINUE;
     if (var->key != Qundef) return ST_STOP;
     var->key = key;
     var->val = value;
@@ -840,7 +839,6 @@ shift_i_safe(VALUE key, VALUE value, VALUE arg)
 {
     struct shift_var *var = (struct shift_var *)arg;

-    if (key == Qundef) return ST_CONTINUE;
     var->key = key;
     var->val = value;
     return ST_STOP;
@@ -864,29 +862,25 @@ rb_hash_shift(VALUE hash)
 {
     struct shift_var var;

-    rb_hash_modify(hash);
-    var.key = Qundef;
-    rb_hash_foreach(hash, RHASH(hash)->iter_lev > 0 ? shift_i_safe : shift_i,
-		    (VALUE)&var);
-
-    if (var.key != Qundef) {
-	if (RHASH(hash)->iter_lev > 0) {
-	    rb_hash_delete_key(hash, var.key);
+    rb_hash_modify_check(hash);
+    if (RHASH(hash)->ntbl) {
+	var.key = Qundef;
+	rb_hash_foreach(hash, RHASH(hash)->iter_lev > 0 ? shift_i_safe : shift_i,
+			(VALUE)&var);
+
+	if (var.key != Qundef) {
+	    if (RHASH(hash)->iter_lev > 0) {
+		rb_hash_delete_key(hash, var.key);
+	    }
+	    return rb_assoc_new(var.key, var.val);
 	}
-	return rb_assoc_new(var.key, var.val);
-    }
-    else if (FL_TEST(hash, HASH_PROC_DEFAULT)) {
-	return rb_funcall(RHASH_IFNONE(hash), id_yield, 2, hash, Qnil);
-    }
-    else {
-	return RHASH_IFNONE(hash);
     }
+    return hash_default_value(hash, Qnil);
 }

 static int
 delete_if_i(VALUE key, VALUE value, VALUE hash)
 {
-    if (key == Qundef) return ST_CONTINUE;
     if (RTEST(rb_yield_values(2, key, value))) {
 	rb_hash_delete_key(hash, key);
     }
@@ -912,8 +906,9 @@ VALUE
 rb_hash_delete_if(VALUE hash)
 {
     RETURN_ENUMERATOR(hash, 0, 0);
-    rb_hash_modify(hash);
-    rb_hash_foreach(hash, delete_if_i, hash);
+    rb_hash_modify_check(hash);
+    if (RHASH(hash)->ntbl)
+	rb_hash_foreach(hash, delete_if_i, hash);
     return hash;
 }

@@ -984,7 +979,6 @@ rb_hash_values_at(int argc, VALUE *argv, VALUE hash)
 static int
 select_i(VALUE key, VALUE value, VALUE result)
 {
-    if (key == Qundef) return ST_CONTINUE;
     if (RTEST(rb_yield_values(2, key, value)))
 	rb_hash_aset(result, key, value);
     return ST_CONTINUE;
@@ -1018,7 +1012,6 @@ rb_hash_select(VALUE hash)
 static int
 keep_if_i(VALUE key, VALUE value, VALUE hash)
 {
-    if (key == Qundef) return ST_CONTINUE;
     if (!RTEST(rb_yield_values(2, key, value))) {
 	return ST_DELETE;
     }
@@ -1040,7 +1033,7 @@ rb_hash_select_bang(VALUE hash)
     st_index_t n;

     RETURN_ENUMERATOR(hash, 0, 0);
-    rb_hash_modify(hash);
+    rb_hash_modify_check(hash);
     if (!RHASH(hash)->ntbl)
         return Qnil;
     n = RHASH(hash)->ntbl->num_entries;
@@ -1065,8 +1058,9 @@ VALUE
 rb_hash_keep_if(VALUE hash)
 {
     RETURN_ENUMERATOR(hash, 0, 0);
-    rb_hash_modify(hash);
-    rb_hash_foreach(hash, keep_if_i, hash);
+    rb_hash_modify_check(hash);
+    if (RHASH(hash)->ntbl)
+	rb_hash_foreach(hash, keep_if_i, hash);
     return hash;
 }

@@ -1144,9 +1138,7 @@ rb_hash_aset(VALUE hash, VALUE key, VALUE val)
 static int
 replace_i(VALUE key, VALUE val, VALUE hash)
 {
-    if (key != Qundef) {
-	rb_hash_aset(hash, key, val);
-    }
+    rb_hash_aset(hash, key, val);

     return ST_CONTINUE;
 }
@@ -1227,7 +1219,6 @@ rb_hash_empty_p(VALUE hash)
 static int
 each_value_i(VALUE key, VALUE value)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_yield(value);
     return ST_CONTINUE;
 }
@@ -1262,7 +1253,6 @@ rb_hash_each_value(VALUE hash)
 static int
 each_key_i(VALUE key, VALUE value)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_yield(key);
     return ST_CONTINUE;
 }
@@ -1296,7 +1286,6 @@ rb_hash_each_key(VALUE hash)
 static int
 each_pair_i(VALUE key, VALUE value)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_yield(rb_assoc_new(key, value));
     return ST_CONTINUE;
 }
@@ -1334,7 +1323,6 @@ rb_hash_each_pair(VALUE hash)
 static int
 to_a_i(VALUE key, VALUE value, VALUE ary)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_ary_push(ary, rb_assoc_new(key, value));
     return ST_CONTINUE;
 }
@@ -1367,7 +1355,6 @@ inspect_i(VALUE key, VALUE value, VALUE str)
 {
     VALUE str2;

-    if (key == Qundef) return ST_CONTINUE;
     str2 = rb_inspect(key);
     if (RSTRING_LEN(str) > 1) {
 	rb_str_cat2(str, ", ");
@@ -1434,7 +1421,6 @@ rb_hash_to_hash(VALUE hash)
 static int
 keys_i(VALUE key, VALUE value, VALUE ary)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_ary_push(ary, key);
     return ST_CONTINUE;
 }
@@ -1465,7 +1451,6 @@ rb_hash_keys(VALUE hash)
 static int
 values_i(VALUE key, VALUE value, VALUE ary)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_ary_push(ary, value);
     return ST_CONTINUE;
 }
@@ -1524,7 +1509,6 @@ rb_hash_search_value(VALUE key, VALUE value, VALUE arg)
 {
     VALUE *data = (VALUE *)arg;

-    if (key == Qundef) return ST_CONTINUE;
     if (rb_equal(value, data[1])) {
 	data[0] = Qtrue;
 	return ST_STOP;
@@ -1568,7 +1552,6 @@ eql_i(VALUE key, VALUE val1, VALUE arg)
     struct equal_data *data = (struct equal_data *)arg;
     st_data_t val2;

-    if (key == Qundef) return ST_CONTINUE;
     if (!st_lookup(data->tbl, key, &val2)) {
 	data->result = Qfalse;
 	return ST_STOP;
@@ -1599,7 +1582,7 @@ hash_equal(VALUE hash1, VALUE hash2, int eql)
     struct equal_data data;

     if (hash1 == hash2) return Qtrue;
-    if (TYPE(hash2) != T_HASH) {
+    if (!RB_TYPE_P(hash2, T_HASH)) {
 	if (!rb_respond_to(hash2, rb_intern("to_hash"))) {
 	    return Qfalse;
 	}
@@ -1670,7 +1653,6 @@ hash_i(VALUE key, VALUE val, VALUE arg)
     st_index_t *hval = (st_index_t *)arg;
     st_index_t hdata[2];

-    if (key == Qundef) return ST_CONTINUE;
     hdata[0] = rb_hash(key);
     hdata[1] = rb_hash(val);
     *hval ^= st_hash(hdata, sizeof(hdata), 0);
@@ -1711,7 +1693,6 @@ rb_hash_hash(VALUE hash)
 static int
 rb_hash_invert_i(VALUE key, VALUE value, VALUE hash)
 {
-    if (key == Qundef) return ST_CONTINUE;
     rb_hash_aset(hash, value, key);
     return ST_CONTINUE;
 }
@@ -1740,7 +1721,6 @@ rb_hash_invert(VALUE hash)
 static int
 rb_hash_update_i(VALUE key, VALUE value, VALUE hash)
 {
-    if (key == Qundef) return ST_CONTINUE;
     hash_update(hash, key);
     st_insert(RHASH(hash)->ntbl, key, value);
     return ST_CONTINUE;
@@ -1749,7 +1729,6 @@ rb_hash_update_i(VALUE key, VALUE value, VALUE hash)
 static int
 rb_hash_update_block_i(VALUE key, VALUE value, VALUE hash)
 {
-    if (key == Qundef) return ST_CONTINUE;
     if (rb_hash_has_key(hash, key)) {
 	value = rb_yield_values(3, key, rb_hash_aref(hash, key), value);
     }
@@ -1806,7 +1785,6 @@ rb_hash_update_func_i(VALUE key, VALUE value, VALUE arg0)
     struct update_arg *arg = (struct update_arg *)arg0;
     VALUE hash = arg->hash;

-    if (key == Qundef) return ST_CONTINUE;
     if (rb_hash_has_key(hash, key)) {
 	value = (*arg->func)(key, rb_hash_aref(hash, key), value);
     }
@@ -1863,7 +1841,6 @@ assoc_i(VALUE key, VALUE val, VALUE arg)
 {
     VALUE *args = (VALUE *)arg;

-    if (key == Qundef) return ST_CONTINUE;
     if (RTEST(rb_equal(args[0], key))) {
 	args[1] = rb_assoc_new(key, val);
 	return ST_STOP;
@@ -1901,7 +1878,6 @@ rassoc_i(VALUE key, VALUE val, VALUE arg)
 {
     VALUE *args = (VALUE *)arg;

-    if (key == Qundef) return ST_CONTINUE;
     if (RTEST(rb_equal(args[0], val))) {
 	args[1] = rb_assoc_new(key, val);
 	return ST_STOP;
@@ -2198,7 +2174,7 @@ rb_env_path_tainted(void)
 }

 #if defined(_WIN32) || (defined(HAVE_SETENV) && defined(HAVE_UNSETENV))
-#elif defined __sun__
+#elif defined __sun
 static int
 in_origenv(const char *str)
 {
@@ -2286,7 +2262,7 @@ ruby_setenv(const char *name, const char *value)
 	    rb_sys_fail("unsetenv");
 #endif
     }
-#elif defined __sun__
+#elif defined __sun
     size_t len;
     char **env_ptr, *str;
     if (strchr(name, '=')) {
@@ -3084,11 +3060,9 @@ env_invert(void)
 static int
 env_replace_i(VALUE key, VALUE val, VALUE keys)
 {
-    if (key != Qundef) {
-	env_aset(Qnil, key, val);
-	if (rb_ary_includes(keys, key)) {
-	    rb_ary_delete(keys, key);
-	}
+    env_aset(Qnil, key, val);
+    if (rb_ary_includes(keys, key)) {
+	rb_ary_delete(keys, key);
     }
     return ST_CONTINUE;
 }
@@ -3120,12 +3094,10 @@ env_replace(VALUE env, VALUE hash)
 static int
 env_update_i(VALUE key, VALUE val)
 {
-    if (key != Qundef) {
-	if (rb_block_given_p()) {
-	    val = rb_yield_values(3, key, rb_f_getenv(Qnil, key), val);
-	}
-	env_aset(Qnil, key, val);
+    if (rb_block_given_p()) {
+	val = rb_yield_values(3, key, rb_f_getenv(Qnil, key), val);
     }
+    env_aset(Qnil, key, val);
     return ST_CONTINUE;
 }

@@ -3150,15 +3122,116 @@ env_update(VALUE env, VALUE hash)
 }

 /*
- *  A <code>Hash</code> is a collection of key-value pairs. It is
- *  similar to an <code>Array</code>, except that indexing is done via
- *  arbitrary keys of any object type, not an integer index. Hashes enumerate
- *  their values in the order that the corresponding keys were inserted.
+ *  A Hash is a dictionary-like collection of unique keys and their values.
+ *  Also called associative arrays, they are similar to Arrays, but where an
+ *  Array uses integers as its index, a Hash allows you to use any object
+ *  type.
+ *
+ *  Hashes enumerate their values in the order that the corresponding keys
+ *  were inserted.
+ *
+ *  A Hash can be easily created by using its implicit form:
+ *
+ *    grades = { "Jane Doe" => 10, "Jim Doe" => 6 }
+ *
+ *  Hashes allow an alternate syntax form when your keys are always symbols.
+ *  Instead of
+ *
+ *    options = { :font_size => 10, :font_family => "Arial" }
+ *
+ *  You could write it as:
+ *
+ *    options = { font_size: 10, font_family: "Arial" }
+ *
+ *  Each named key is a symbol you can access in hash:
+ *
+ *    options[:font_size]  # => 10
+ *
+ *  A Hash can also be created through its ::new method:
+ *
+ *    grades = Hash.new
+ *    grades["Dorothy Doe"] = 9
  *
  *  Hashes have a <em>default value</em> that is returned when accessing
- *  keys that do not exist in the hash. By default, that value is
- *  <code>nil</code>.
+ *  keys that do not exist in the hash. If no default is set +nil+ is used.
+ *  You can set the default value by sending it as an argument to Hash.new:
+ *
+ *    grades = Hash.new(0)
+ *
+ *  Or by using the #default= method:
+ *
+ *    grades = {"Timmy Doe" => 8}
+ *    grades.default = 0
+ *
+ *  Accessing a value in a Hash requires using its key:
+ *
+ *    puts grades["Jane Doe"] # => 10
+ *
+ *  === Common Uses
+ *
+ *  Hashes are an easy way to represent data structures, such as
+ *
+ *    books         = {}
+ *    books[:matz]  = "The Ruby Language"
+ *    books[:black] = "The Well-Grounded Rubyist"
+ *
+ *  Hashes are also commonly used as a way to have named parameters in
+ *  functions. Note that no brackets are used below. If a hash is the last
+ *  argument on a method call, no braces are needed, thus creating a really
+ *  clean interface:
+ *
+ *    Person.create(name: "John Doe", age: 27)
+ *
+ *    def self.create(params)
+ *      @name = params[:name]
+ *      @age  = params[:age]
+ *    end
+ *
+ *  === Hash Keys
+ *
+ *  Two objects refer to the same hash key when their <code>hash</code> value
+ *  is identical and the two objects are <code>eql?</code> to each other.
+ *
+ *  A user-defined class may be used as a hash key if the <code>hash</code>
+ *  and <code>eql?</code> methods are overridden to provide meaningful
+ *  behavior.  By default, separate instances refer to separate hash keys.
+ *
+ *  A typical implementation of <code>hash</code> is based on the
+ *  object's data while <code>eql?</code> is usually aliased to the overridden
+ *  <code>==</code> method:
+ *
+ *    class Book
+ *      attr_reader :author, :title
+ *
+ *      def initialize(author, title)
+ *        @author = author
+ *        @title = title
+ *      end
+ *
+ *      def ==(other)
+ *        self.class === other and
+ *          other.author == @author and
+ *          other.title == @title
+ *      end
+ *
+ *      alias eql? ==
+ *
+ *      def hash
+ *        @author.hash ^ @title.hash # XOR
+ *      end
+ *    end
+ *
+ *    book1 = Book.new 'matz', 'Ruby in a Nutshell'
+ *    book2 = Book.new 'matz', 'Ruby in a Nutshell'
+ *
+ *    reviews = {}
+ *
+ *    reviews[book1] = 'Great reference!'
+ *    reviews[book2] = 'Nice and compact!'
+ *
+ *    reviews.length #=> 1
  *
+ *  See also Object#hash and Object#eql?
  */

 void
diff --git a/include/ruby/intern.h b/include/ruby/intern.h
index 876271f..7e4be31 100644
--- a/include/ruby/intern.h
+++ b/include/ruby/intern.h
@@ -413,6 +413,7 @@ size_t ruby_stack_length(VALUE**);
 int rb_during_gc(void);
 void rb_gc_mark_locations(VALUE*, VALUE*);
 void rb_mark_tbl(struct st_table*);
+void rb_mark_sa_tbl(sa_table*);
 void rb_mark_set(struct st_table*);
 void rb_mark_hash(struct st_table*);
 void rb_gc_mark_maybe(VALUE);
@@ -843,7 +844,7 @@ VALUE rb_f_trace_var(int, VALUE*);
 VALUE rb_f_untrace_var(int, VALUE*);
 VALUE rb_f_global_variables(void);
 void rb_alias_variable(ID, ID);
-struct st_table* rb_generic_ivar_table(VALUE);
+sa_table* rb_generic_ivar_table(VALUE);
 void rb_copy_generic_ivar(VALUE,VALUE);
 void rb_mark_generic_ivar(VALUE);
 void rb_mark_generic_ivar_tbl(void);
diff --git a/include/ruby/ruby.h b/include/ruby/ruby.h
index 3b72f8c..3fbb42d 100644
--- a/include/ruby/ruby.h
+++ b/include/ruby/ruby.h
@@ -605,7 +605,7 @@ struct RObject {
 	struct {
 	    long numiv;
 	    VALUE *ivptr;
-            struct st_table *iv_index_tbl; /* shortcut for RCLASS_IV_INDEX_TBL(rb_obj_class(obj)) */
+            struct sa_table *iv_index_tbl; /* shortcut for RCLASS_IV_INDEX_TBL(rb_obj_class(obj)) */
 	} heap;
 	VALUE ary[ROBJECT_EMBED_LEN_MAX];
     } as;
@@ -626,12 +626,13 @@ struct RObject {

 /** @internal */
 typedef struct rb_classext_struct rb_classext_t;
+typedef struct rb_class_cache_struct rb_class_cache_t;

 struct RClass {
     struct RBasic basic;
+    VALUE super;
     rb_classext_t *ptr;
-    struct st_table *m_tbl;
-    struct st_table *iv_index_tbl;
+    rb_class_cache_t *cache;
 };
 #define RCLASS_SUPER(c) rb_class_get_superclass(c)
 #define RMODULE_IV_TBL(m) RCLASS_IV_TBL(m)
diff --git a/include/ruby/st.h b/include/ruby/st.h
index 50f2a75..aff94fc 100644
--- a/include/ruby/st.h
+++ b/include/ruby/st.h
@@ -36,7 +36,7 @@ typedef unsigned long st_data_t;
 #elif SIZEOF_LONG_LONG == SIZEOF_VOIDP
 typedef unsigned LONG_LONG st_data_t;
 #else
-# error ---->> st.c requires sizeof(void*) == sizeof(long) to be compiled. <<----
+# error ---->> st.c requires sizeof(void*) == sizeof(long) or sizeof(LONG_LONG) to be compiled. <<----
 #endif
 #define ST_DATA_T_DEFINED

@@ -74,6 +74,11 @@ struct st_hash_type {

 #define ST_INDEX_BITS (sizeof(st_index_t) * CHAR_BIT)

+typedef struct st_packed_entry {
+    st_index_t hash;
+    st_data_t key, val;
+} st_packed_entry;
+
 struct st_table {
     const struct st_hash_type *type;
     st_index_t num_bins;
@@ -91,8 +96,17 @@ struct st_table {
     __extension__
 #endif
     st_index_t num_entries : ST_INDEX_BITS - 1;
-    struct st_table_entry **bins;
-    struct st_table_entry *head, *tail;
+    union {
+	struct {
+	    struct st_table_entry **bins;
+	    struct st_table_entry *head, *tail;
+	} big;
+	struct {
+	    struct st_packed_entry *entries;
+	    st_index_t real_entries;
+	} packed;
+	st_packed_entry upacked;
+    } as;
 };

 #define st_is_member(table,key) st_lookup((table),(key),(st_data_t *)0)
@@ -114,6 +128,7 @@ int st_insert2(st_table *, st_data_t, st_data_t, st_data_t (*)(st_data_t));
 int st_lookup(st_table *, st_data_t, st_data_t *);
 int st_get_key(st_table *, st_data_t, st_data_t *);
 int st_foreach(st_table *, int (*)(ANYARGS), st_data_t);
+int st_foreach_check(st_table *, int (*)(ANYARGS), st_data_t, st_data_t);
 int st_reverse_foreach(st_table *, int (*)(ANYARGS), st_data_t);
 void st_add_direct(st_table *, st_data_t, st_data_t);
 void st_free_table(st_table *);
@@ -136,6 +151,51 @@ st_index_t st_hash_start(st_index_t h);
 #pragma GCC visibility pop
 #endif

+typedef unsigned int sa_index_t;
+#define SA_STOP     ST_STOP
+#define SA_CONTINUE ST_CONTINUE
+
+#define SA_EMPTY   0
+
+typedef struct sa_entry {
+    sa_index_t next;
+    sa_index_t key;
+    st_data_t value;
+} sa_entry;
+
+typedef struct sa_table {
+    sa_index_t num_bins;
+    sa_index_t num_entries;
+    sa_index_t free_pos;
+    sa_entry *entries;
+} sa_table;
+
+#define SA_EMPTY_TABLE {0, 0, 0, 0};
+void sa_init_table(sa_table *, sa_index_t);
+sa_table *sa_new_table();
+int  sa_insert(sa_table *, sa_index_t, st_data_t);
+int  sa_lookup(sa_table *, sa_index_t, st_data_t *);
+int  sa_delete(sa_table *, sa_index_t, st_data_t *);
+void sa_clear(sa_table *);
+void sa_clear_no_free(sa_table *);
+void sa_free_table(sa_table *);
+int  sa_foreach(sa_table *, int (*)(ANYARGS), st_data_t);
+size_t sa_memsize(const sa_table *);
+sa_table *sa_copy(sa_table*);
+void sa_copy_to(sa_table*, sa_table*);
+typedef int (*sa_iter_func)(sa_index_t key, st_data_t val, st_data_t arg);
+
+#define SA_FOREACH_START_I(table, entry) do { \
+    sa_table *T##entry = (table); \
+    sa_index_t K##entry; \
+    for(K##entry = 0; K##entry < T##entry->num_bins; K##entry++) { \
+	sa_entry *entry = T##entry->entries + K##entry; \
+	if (entry->next != SA_EMPTY) { \
+	    st_data_t value = entry->value
+#define SA_FOREACH_END() } } } while(0)
+
+#define SA_FOREACH_START(table) SA_FOREACH_START_I(table, entry)
+
 #if defined(__cplusplus)
 #if 0
 { /* satisfy cc-mode */
diff --git a/internal.h b/internal.h
index 59c9284..95686b7 100644
--- a/internal.h
+++ b/internal.h
@@ -24,18 +24,24 @@ struct rb_deprecated_classext_struct {
 };

 struct rb_classext_struct {
-    VALUE super;
-    struct st_table *iv_tbl;
-    struct st_table *const_tbl;
+    sa_table m_tbl;
+    sa_table iv_tbl;
+    sa_table const_tbl;
+    sa_table iv_index_tbl;
+};
+
+struct rb_class_cache_struct {
+    VALUE method_cache_version;
+    sa_table m_cache_tbl;
 };

 #undef RCLASS_SUPER
 #define RCLASS_EXT(c) (RCLASS(c)->ptr)
-#define RCLASS_SUPER(c) (RCLASS_EXT(c)->super)
-#define RCLASS_IV_TBL(c) (RCLASS_EXT(c)->iv_tbl)
-#define RCLASS_CONST_TBL(c) (RCLASS_EXT(c)->const_tbl)
-#define RCLASS_M_TBL(c) (RCLASS(c)->m_tbl)
-#define RCLASS_IV_INDEX_TBL(c) (RCLASS(c)->iv_index_tbl)
+#define RCLASS_SUPER(c) (RCLASS(c)->super)
+#define RCLASS_IV_TBL(c) (&RCLASS_EXT(c)->iv_tbl)
+#define RCLASS_CONST_TBL(c) (&RCLASS_EXT(c)->const_tbl)
+#define RCLASS_M_TBL(c) (&RCLASS_EXT(c)->m_tbl)
+#define RCLASS_IV_INDEX_TBL(c) (&RCLASS_EXT(c)->iv_index_tbl)

 struct vtm; /* defined by timev.h */

diff --git a/marshal.c b/marshal.c
index 113908e..0920cd9 100644
--- a/marshal.c
+++ b/marshal.c
@@ -506,7 +506,7 @@ w_uclass(VALUE obj, VALUE super, struct dump_arg *arg)
 }

 static int
-w_obj_each(ID id, VALUE value, struct dump_call_arg *arg)
+w_obj_each(sa_index_t id, VALUE value, struct dump_call_arg *arg)
 {
     if (id == rb_id_encoding()) return ST_CONTINUE;
     if (id == rb_intern("E")) return ST_CONTINUE;
@@ -553,13 +553,13 @@ w_encoding(VALUE obj, long num, struct dump_call_arg *arg)
 }

 static void
-w_ivar(VALUE obj, st_table *tbl, struct dump_call_arg *arg)
+w_ivar(VALUE obj, sa_table *tbl, struct dump_call_arg *arg)
 {
     long num = tbl ? tbl->num_entries : 0;

     w_encoding(obj, num, arg);
     if (tbl) {
-	st_foreach_safe(tbl, w_obj_each, (st_data_t)arg);
+	sa_foreach(tbl, w_obj_each, (st_data_t)arg);
     }
 }

@@ -586,7 +586,7 @@ static void
 w_object(VALUE obj, struct dump_arg *arg, int limit)
 {
     struct dump_call_arg c_arg;
-    st_table *ivtbl = 0;
+    sa_table *ivtbl = 0;
     st_data_t num;
     int hasiv = 0;
 #define has_ivars(obj, ivtbl) (((ivtbl) = rb_generic_ivar_table(obj)) != 0 || \
@@ -651,7 +651,7 @@ w_object(VALUE obj, struct dump_arg *arg, int limit)
 	}
 	if (rb_respond_to(obj, s_dump)) {
 	    VALUE v;
-            st_table *ivtbl2 = 0;
+            sa_table *ivtbl2 = 0;
             int hasiv2;

 	    v = rb_funcall(obj, s_dump, 1, INT2NUM(limit));
diff --git a/method.h b/method.h
index 896e309..3369698 100644
--- a/method.h
+++ b/method.h
@@ -100,6 +100,6 @@ int rb_method_entry_eq(const rb_method_entry_t *m1, const rb_method_entry_t *m2)
 void rb_mark_method_entry(const rb_method_entry_t *me);
 void rb_free_method_entry(rb_method_entry_t *me);
 void rb_sweep_method_entry(void *vm);
-void rb_free_m_table(st_table *tbl);
+void rb_free_m_table(sa_table *tbl);

 #endif /* METHOD_H */
diff --git a/object.c b/object.c
index 30db480..6c62ed9 100644
--- a/object.c
+++ b/object.c
@@ -229,17 +229,8 @@ init_copy(VALUE dest, VALUE obj)
         break;
       case T_CLASS:
       case T_MODULE:
-	if (RCLASS_IV_TBL(dest)) {
-	    st_free_table(RCLASS_IV_TBL(dest));
-	    RCLASS_IV_TBL(dest) = 0;
-	}
-	if (RCLASS_CONST_TBL(dest)) {
-	    rb_free_const_table(RCLASS_CONST_TBL(dest));
-	    RCLASS_CONST_TBL(dest) = 0;
-	}
-	if (RCLASS_IV_TBL(obj)) {
-	    RCLASS_IV_TBL(dest) = st_copy(RCLASS_IV_TBL(obj));
-	}
+        rb_free_const_table(RCLASS_CONST_TBL(dest));
+        sa_copy_to(RCLASS_IV_TBL(obj), RCLASS_IV_TBL(dest));
         break;
     }
 }
@@ -530,7 +521,7 @@ rb_obj_is_kind_of(VALUE obj, VALUE c)
     }

     while (cl) {
-	if (cl == c || RCLASS_M_TBL(cl) == RCLASS_M_TBL(c))
+	if (cl == c || RCLASS_EXT(cl) == RCLASS_EXT(c))
 	    return Qtrue;
 	cl = RCLASS_SUPER(cl);
     }
@@ -1355,13 +1346,13 @@ rb_class_inherited_p(VALUE mod, VALUE arg)
 	rb_raise(rb_eTypeError, "compared with non class/module");
     }
     while (mod) {
-	if (RCLASS_M_TBL(mod) == RCLASS_M_TBL(arg))
+	if (RCLASS_EXT(mod) == RCLASS_EXT(arg))
 	    return Qtrue;
 	mod = RCLASS_SUPER(mod);
     }
     /* not mod < arg; check if mod > arg */
     while (arg) {
-	if (RCLASS_M_TBL(arg) == RCLASS_M_TBL(start))
+	if (RCLASS_EXT(arg) == RCLASS_EXT(start))
 	    return Qfalse;
 	arg = RCLASS_SUPER(arg);
     }
diff --git a/pool_alloc.h b/pool_alloc.h
new file mode 100644
index 0000000..957708e
--- /dev/null
+++ b/pool_alloc.h
@@ -0,0 +1,11 @@
+#ifndef POOL_ALLOC_H
+#define POOL_ALLOC_H
+
+#define POOL_ALLOC_API
+#ifdef POOL_ALLOC_API
+void  ruby_xpool_free(void *ptr);
+void *ruby_xpool_malloc_6p();
+void *ruby_xpool_malloc_11p();
+#endif
+
+#endif
diff --git a/pool_alloc.inc.h b/pool_alloc.inc.h
new file mode 100644
index 0000000..a7879ab
--- /dev/null
+++ b/pool_alloc.inc.h
@@ -0,0 +1,156 @@
+/*
+ * this is generic pool allocator
+ * you should define following macroses:
+ * ITEM_NAME - unique identifier, which allows to hold functions in a namespace
+ * ITEM_TYPEDEF(name) - passed to typedef to localize item type
+ * free_entry - desired name of function for free entry
+ * alloc_entry - defired name of function for allocate entry
+ */
+
+#if POOL_ALLOC_PART == 1
+#ifdef HEAP_ALIGN_LOG
+#define DEFAULT_POOL_SIZE (1 << HEAP_ALIGN_LOG)
+#else
+#define DEFAULT_POOL_SIZE (sizeof(void*) * 2048)
+#endif
+typedef unsigned int pool_holder_counter;
+
+typedef struct pool_entry_list pool_entry_list;
+typedef struct pool_holder pool_holder;
+
+typedef struct pool_header {
+    pool_holder         *first;
+    pool_holder         *_black_magick;
+    pool_holder_counter  size; // size of entry in sizeof(void*) items
+    pool_holder_counter  total; // size of entry in sizeof(void*) items
+} pool_header;
+
+struct pool_holder {
+    pool_holder_counter free, total;
+    pool_header  *header;
+    void               *freep;
+    pool_holder        *fore, *back;
+    void *data[1];
+};
+#define POOL_DATA_SIZE(pool_size) (((pool_size) - sizeof(void*) * 6 - offsetof(pool_holder, data)) / sizeof(void*))
+#define POOL_ENTRY_SIZE(item_type) ((sizeof(item_type) - 1) / sizeof(void*) + 1)
+#define POOL_HOLDER_COUNT(pool_size, item_type) (POOL_DATA_SIZE(pool_size)/POOL_ENTRY_SIZE(item_type))
+#define INIT_POOL(item_type) {NULL, NULL, POOL_ENTRY_SIZE(item_type), POOL_HOLDER_COUNT(DEFAULT_POOL_SIZE, item_type)}
+
+#elif POOL_ALLOC_PART == 2
+static pool_holder *
+pool_holder_alloc(pool_header *header)
+{
+    pool_holder *holder;
+    pool_holder_counter i, size, count;
+    register void **ptr;
+
+    size_t sz = offsetof(pool_holder, data) +
+	    header->size * header->total * sizeof(void*);
+#define objspace (&rb_objspace)
+    vm_malloc_prepare(objspace, DEFAULT_POOL_SIZE);
+    if (header->first != NULL) return header->first;
+    TRY_WITH_GC(holder = (pool_holder*) aligned_malloc(DEFAULT_POOL_SIZE, sz));
+    malloc_increase += DEFAULT_POOL_SIZE;
+#if CALC_EXACT_MALLOC_SIZE
+    objspace->malloc_params.allocated_size += DEFAULT_POOL_SIZE;
+    objspace->malloc_params.allocations++;
+#endif
+#undef objspace
+
+    size = header->size;
+    count = header->total;
+    holder->free = count;
+    holder->total = count;
+    holder->header = header;
+    holder->fore = NULL;
+    holder->back = NULL;
+    holder->freep = &holder->data;
+    ptr = holder->data;
+    for(i = count - 1; i; i-- ) {
+	ptr = *ptr = ptr + size;
+    }
+    *ptr = NULL;
+    header->first = holder;
+    return holder;
+}
+
+static inline void
+pool_holder_unchaing(pool_header *header, pool_holder *holder)
+{
+    register pool_holder *fore = holder->fore, *back = holder->back;
+    holder->fore = NULL;
+    holder->back = NULL;
+    if (fore != NULL)  fore->back     = back;
+    else               header->_black_magick = back;
+    if (back != NULL)  back->fore     = fore;
+    else               header->first = fore;
+}
+
+static inline pool_holder *
+entry_holder(void **entry)
+{
+    return (pool_holder*)(((uintptr_t)entry) & ~(DEFAULT_POOL_SIZE - 1));
+}
+
+static inline void
+pool_free_entry(void **entry)
+{
+    pool_holder *holder = entry_holder(entry);
+    pool_header *header = holder->header;
+
+    if (holder->free++ == 0) {
+	register pool_holder *first = header->first;
+	if (first == NULL) {
+	    header->first = holder;
+	} else {
+	    holder->back = first;
+	    holder->fore = first->fore;
+	    first->fore = holder;
+	    if (holder->fore)
+		holder->fore->back = holder;
+	    else
+		header->_black_magick = holder;
+	}
+    } else if (holder->free == holder->total && header->first != holder ) {
+	pool_holder_unchaing(header, holder);
+	aligned_free(holder);
+#if CALC_EXACT_MALLOC_SIZE
+	rb_objspace.malloc_params.allocated_size -= DEFAULT_POOL_SIZE;
+	rb_objspace.malloc_params.allocations--;
+#endif
+	return;
+    }
+
+    *entry = holder->freep;
+    holder->freep = entry;
+}
+
+static inline void*
+pool_alloc_entry(pool_header *header)
+{
+    pool_holder *holder = header->first;
+    void **result;
+    if (holder == NULL) {
+	holder = pool_holder_alloc(header);
+    }
+
+    result = holder->freep;
+    holder->freep = *result;
+
+    if (--holder->free == 0) {
+	pool_holder_unchaing(header, holder);
+    }
+
+    return result;
+}
+
+static void
+pool_finalize_header(pool_header *header)
+{
+    if (header->first) {
+        aligned_free(header->first);
+        header->first = NULL;
+    }
+}
+#endif
diff --git a/sp_ar.c b/sp_ar.c
new file mode 100644
index 0000000..2ed69bf
--- /dev/null
+++ b/sp_ar.c
@@ -0,0 +1,374 @@
+/*
+ * sparse array lib
+ * inspired by Lua table
+ * written by Sokolov Yura aka funny_falcon
+ */
+#ifdef NOT_RUBY
+#include "regint.h"
+#include "st.h"
+#else
+#include "ruby/ruby.h"
+#endif
+
+#include <stdio.h>
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#include <string.h>
+
+#ifdef RUBY
+#define malloc xmalloc
+#define calloc xcalloc
+#define realloc xrealloc
+#define free   xfree
+#endif
+
+#define sa_table_alloc()          (sa_table*)malloc(sizeof(sa_table))
+#define sa_table_xalloc()         (sa_table*)calloc(1, sizeof(sa_table))
+#define sa_table_dealloc(table)   free(table)
+#define sa_entry_alloc(n)         (sa_entry*)calloc((n), sizeof(sa_entry))
+#define sa_entry_dealloc(entries) free(entries)
+
+#define SA_LAST   1
+#define SA_OFFSET 2
+
+#define SA_MIN_SIZE 4
+
+void
+sa_init_table(register sa_table *table, sa_index_t num_bins)
+{
+    if (num_bins) {
+        table->num_entries = 0;
+        table->entries = sa_entry_alloc(num_bins);
+        table->num_bins = num_bins;
+        table->free_pos = num_bins;
+    }
+    else {
+        memset(table, 0, sizeof(sa_table));
+    }
+}
+
+sa_table*
+sa_new_table()
+{
+    sa_table* table = sa_table_alloc();
+    sa_init_table(table, 0);
+    return table;
+}
+
+static inline sa_index_t
+calc_pos(register sa_table* table, sa_index_t key)
+{
+    /* this formula is empirical */
+    /* it has no good avalance, but works well in our case */
+    key ^= key >> 16;
+    key *= 0x445229;
+    return (key + (key >> 16)) % table->num_bins;
+}
+
+static void
+fix_empty(register sa_table* table)
+{
+    while(--table->free_pos &&
+            table->entries[table->free_pos-1].next != SA_EMPTY);
+}
+
+#define FLOOR_TO_4 ((~((sa_index_t)0)) << 2)
+static sa_index_t
+find_empty(register sa_table* table, register sa_index_t pos)
+{
+    sa_index_t new_pos = table->free_pos-1;
+    sa_entry *entry;
+    pos &= FLOOR_TO_4;
+    entry = table->entries+pos;
+
+    if (entry->next == SA_EMPTY) { new_pos = pos; goto check; }
+    pos++; entry++;
+    if (entry->next == SA_EMPTY) { new_pos = pos; goto check; }
+    pos++; entry++;
+    if (entry->next == SA_EMPTY) { new_pos = pos; goto check; }
+    pos++; entry++;
+    if (entry->next == SA_EMPTY) { new_pos = pos; goto check; }
+
+check:
+    if (new_pos+1 == table->free_pos) fix_empty(table);
+    return new_pos;
+}
+
+static void resize(register sa_table* table);
+static int insert_into_chain(register sa_table*, register sa_index_t, st_data_t, sa_index_t pos);
+static int insert_into_main(register sa_table*, sa_index_t, st_data_t, sa_index_t pos, sa_index_t prev_pos);
+
+int
+sa_insert(register sa_table* table, register sa_index_t key, st_data_t value)
+{
+    sa_index_t pos, main_pos;
+    register sa_entry *entry;
+
+    if (table->num_bins == 0) {
+        sa_init_table(table, SA_MIN_SIZE);
+    }
+
+    pos = calc_pos(table, key);
+    entry = table->entries + pos;
+
+    if (entry->next == SA_EMPTY) {
+        entry->next = SA_LAST;
+        entry->key = key;
+        entry->value = value;
+        table->num_entries++;
+        if (pos+1 == table->free_pos) fix_empty(table);
+        return 0;
+    }
+
+    if (entry->key == key) {
+        entry->value = value;
+        return 1;
+    }
+
+    if (table->num_entries + (table->num_entries >> 2) > table->num_bins) {
+        resize(table);
+        return sa_insert(table, key, value);
+    }
+
+    main_pos = calc_pos(table, entry->key);
+    if (main_pos == pos) {
+        return insert_into_chain(table, key, value, pos);
+    }
+    else {
+        if (!table->free_pos) {
+            resize(table);
+            return sa_insert(table, key, value);
+        }
+        return insert_into_main(table, key, value, pos, main_pos);
+    }
+}
+
+static int
+insert_into_chain(register sa_table* table, register sa_index_t key, st_data_t value, sa_index_t pos)
+{
+    sa_entry *entry = table->entries + pos, *new_entry;
+    sa_index_t new_pos;
+
+    while (entry->next != SA_LAST) {
+        pos = entry->next - SA_OFFSET;
+        entry = table->entries + pos;
+        if (entry->key == key) {
+            entry->value = value;
+            return 1;
+        }
+    }
+
+    if (!table->free_pos) {
+        resize(table);
+        return sa_insert(table, key, value);
+    }
+
+    new_pos = find_empty(table, pos);
+    new_entry = table->entries + new_pos;
+    entry->next = new_pos + SA_OFFSET;
+
+    new_entry->next = SA_LAST;
+    new_entry->key = key;
+    new_entry->value = value;
+    table->num_entries++;
+    return 0;
+}
+
+static int
+insert_into_main(register sa_table* table, sa_index_t key, st_data_t value, sa_index_t pos, sa_index_t prev_pos)
+{
+    sa_entry *entry = table->entries + pos;
+    sa_index_t new_pos = find_empty(table, pos);
+    sa_entry *new_entry = table->entries + new_pos;
+    sa_index_t npos;
+
+    *new_entry = *entry;
+
+    while((npos = table->entries[prev_pos].next - SA_OFFSET) != pos) {
+        prev_pos = npos;
+    }
+    table->entries[prev_pos].next = new_pos + SA_OFFSET;
+
+    entry->next = SA_LAST;
+    entry->key = key;
+    entry->value = value;
+    table->num_entries++;
+    return 0;
+}
+
+static sa_index_t
+new_size(sa_index_t num_entries)
+{
+    sa_index_t msb = num_entries;
+    msb |= msb >> 1;
+    msb |= msb >> 2;
+    msb |= msb >> 4;
+    msb |= msb >> 8;
+    msb |= msb >> 16;
+    msb = ((msb >> 4) + 1) << 3;
+    return (num_entries & (msb | (msb >> 1))) + (msb >> 1);
+}
+
+static void
+resize(register sa_table *table)
+{
+    sa_table tmp_table;
+    sa_entry *entry;
+    sa_index_t i;
+
+    if (table->num_entries == 0) {
+        sa_entry_dealloc(table->entries);
+        memset(table, 0, sizeof(sa_table));
+        return;
+    }
+
+    sa_init_table(&tmp_table, new_size(table->num_entries + (table->num_entries >> 2)));
+    entry = table->entries;
+
+    for(i = 0; i < table->num_bins; i++, entry++) {
+        if (entry->next != SA_EMPTY) {
+            sa_insert(&tmp_table, entry->key, entry->value);
+        }
+    }
+    sa_entry_dealloc(table->entries);
+    *table = tmp_table;
+}
+
+int
+sa_lookup(register sa_table *table, register sa_index_t key, st_data_t *value)
+{
+    register sa_entry *entry;
+
+    if (table->num_entries == 0) return 0;
+
+    entry = table->entries + calc_pos(table, key);
+    if (entry->next == SA_EMPTY) return 0;
+
+    if (entry->key == key) goto found;
+    if (entry->next == SA_LAST) return 0;
+
+    entry = table->entries + (entry->next - SA_OFFSET);
+    if (entry->key == key) goto found;
+
+    while(entry->next != SA_LAST) {
+        entry = table->entries + (entry->next - SA_OFFSET);
+        if (entry->key == key) goto found;
+    }
+    return 0;
+found:
+    if (value) *value = entry->value;
+    return 1;
+}
+
+void
+sa_clear(sa_table *table)
+{
+    sa_entry_dealloc(table->entries);
+    memset(table, 0, sizeof(sa_table));
+}
+
+void
+sa_clear_no_free(sa_table *table)
+{
+    memset(table->entries, 0, sizeof(sa_entry) * table->num_bins);
+    table->num_entries = 0;
+    table->free_pos = table->num_bins;
+}
+
+void
+sa_free_table(sa_table *table)
+{
+    sa_entry_dealloc(table->entries);
+    sa_table_dealloc(table);
+}
+
+int
+sa_delete(sa_table *table, sa_index_t key, st_data_t *value)
+{
+    sa_index_t pos, prev_pos = ~0;
+    sa_entry *entry;
+
+    if (table->num_entries == 0) goto not_found;
+
+    pos = calc_pos(table, key);
+    entry = table->entries + pos;
+
+    if (entry->next == SA_EMPTY) goto not_found;
+
+    do {
+        if (entry->key == key) {
+            if (value) *value = entry->value;
+            if (entry->next != SA_LAST) {
+                sa_index_t npos = entry->next - SA_OFFSET;
+                *entry = table->entries[npos];
+                memset(table->entries + npos, 0, sizeof(sa_entry));
+            }
+            else {
+                memset(table->entries + pos, 0, sizeof(sa_entry));
+                if (~prev_pos) {
+                    table->entries[prev_pos].next = SA_LAST;
+                }
+            }
+            table->num_entries--;
+            if (table->num_entries < table->num_bins / 4) {
+                resize(table);
+            }
+            return 1;
+        }
+        if (entry->next == SA_LAST) break;
+        prev_pos = pos;
+        pos = entry->next - SA_OFFSET;
+        entry = table->entries + pos;
+    } while(1);
+
+not_found:
+    if (value) *value = 0;
+    return 0;
+}
+
+int
+sa_foreach(register sa_table *table, int (*func)(), st_data_t arg)
+{
+    sa_index_t i;
+    if (table->num_bins == 0) {
+        return 0;
+    }
+    for(i = 0; i < table->num_bins ; i++) {
+	if (table->entries[i].next != SA_EMPTY) {
+	    sa_index_t key = table->entries[i].key;
+	    st_data_t val = table->entries[i].value;
+	    if ((*func)(key, val, arg) == SA_STOP) break;
+	}
+    }
+    return 0;
+}
+
+size_t
+sa_memsize(const sa_table *table)
+{
+    return sizeof(sa_table) + table->num_bins * sizeof(sa_entry);
+}
+
+sa_table*
+sa_copy(sa_table *table)
+{
+    sa_table *new_table = sa_table_alloc();
+    *new_table = *table;
+    if (table->num_bins) {
+        new_table->entries = sa_entry_alloc(table->num_bins);
+        memcpy(new_table->entries, table->entries, table->num_bins*sizeof(sa_entry));
+    }
+    return new_table;
+}
+
+void
+sa_copy_to(sa_table *from, sa_table *to)
+{
+    sa_entry_dealloc(to->entries);
+    *to = *from;
+    if (to->num_bins) {
+	to->entries = sa_entry_alloc(to->num_bins);
+	memcpy(to->entries, from->entries, from->num_bins*sizeof(sa_entry));
+    }
+}
diff --git a/st.c b/st.c
index fda5784..0194022 100644
--- a/st.c
+++ b/st.c
@@ -7,6 +7,7 @@
 #include "st.h"
 #else
 #include "ruby/ruby.h"
+#include "pool_alloc.h"
 #endif

 #include <stdio.h>
@@ -25,8 +26,17 @@ struct st_table_entry {
     st_table_entry *fore, *back;
 };

-#define ST_DEFAULT_MAX_DENSITY 5
+#define STATIC_ASSERT(name, expr) typedef int static_assert_##name##_check[(expr) ? 1 : -1];
+
+#define ST_DEFAULT_MAX_DENSITY 2
 #define ST_DEFAULT_INIT_TABLE_SIZE 11
+#define ST_DEFAULT_SECOND_TABLE_SIZE 19
+#define ST_DEFAULT_PACKED_TABLE_SIZE 19
+#define PACKED_UNIT (int)(sizeof(st_packed_entry) / sizeof(st_table_entry*))
+#define MAX_PACKED_HASH (int)(ST_DEFAULT_PACKED_TABLE_SIZE * sizeof(st_table_entry*) / sizeof(st_packed_entry))
+
+STATIC_ASSERT(st_packed_entry, sizeof(st_packed_entry) == sizeof(st_table_entry*[PACKED_UNIT]))
+STATIC_ASSERT(st_packed_bins, sizeof(st_packed_entry[MAX_PACKED_HASH]) <= sizeof(st_table_entry*[ST_DEFAULT_PACKED_TABLE_SIZE]))

     /*
      * DEFAULT_MAX_DENSITY is the default for the largest we allow the
@@ -38,7 +48,8 @@ struct st_table_entry {
      *
      */

-static const struct st_hash_type type_numhash = {
+#define type_numhash st_hashtype_num
+const struct st_hash_type st_hashtype_num = {
     st_numcmp,
     st_numhash,
 };
@@ -61,20 +72,128 @@ static void rehash(st_table *);
 #ifdef RUBY
 #define malloc xmalloc
 #define calloc xcalloc
+#define realloc xrealloc
 #define free(x) xfree(x)
 #endif

 #define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))

-#define alloc(type) (type*)malloc((size_t)sizeof(type))
-#define Calloc(n,s) (char*)calloc((n),(s))
-
 #define EQUAL(table,x,y) ((x)==(y) || (*(table)->type->compare)((x),(y)) == 0)

-/* remove cast to unsigned int in the future */
-#define do_hash(key,table) (unsigned int)(st_index_t)(*(table)->type->hash)((key))
+#define do_hash(key,table) (st_index_t)(*(table)->type->hash)((key))
 #define do_hash_bin(key,table) (do_hash((key), (table))%(table)->num_bins)

+/* preparation for possible allocation improvements */
+#ifdef POOL_ALLOC_API
+#define st_alloc_entry() (st_table_entry *)ruby_xpool_malloc_6p()
+#define st_free_entry(entry) ruby_xpool_free(entry)
+#define st_alloc_table() (st_table *)ruby_xpool_malloc_6p()
+#define st_dealloc_table(table) ruby_xpool_free(table)
+static inline st_table_entry **
+st_alloc_bins(st_index_t size)
+{
+    st_table_entry **result;
+    if (size == 11) {
+        result = (st_table_entry **) ruby_xpool_malloc_11p();
+        memset(result, 0, 11 * sizeof(st_table_entry *));
+    }
+    else
+        result = (st_table_entry **) ruby_xcalloc(size, sizeof(st_table_entry*));
+    return result;
+}
+static inline void
+st_free_bins(st_table_entry **bins, st_index_t size)
+{
+    if (size == 11)
+	ruby_xpool_free(bins);
+    else
+	ruby_xfree(bins);
+}
+static inline st_table_entry**
+st_realloc_bins(st_table_entry **bins, st_index_t newsize, st_index_t oldsize)
+{
+    st_table_entry **new_bins = st_alloc_bins(newsize);
+    st_free_bins(bins, oldsize);
+    return new_bins;
+}
+#else
+#define st_alloc_entry() (st_table_entry *)malloc(sizeof(st_table_entry))
+#define st_free_entry(entry) free(entry)
+#define st_alloc_table() (st_table *)malloc(sizeof(st_table))
+#define st_dealloc_table(table) free(table)
+#define st_alloc_bins(size) (st_table_entry **)calloc(size, sizeof(st_table_entry *))
+#define st_free_bins(bins, size) free(bins)
+static inline st_table_entry**
+st_realloc_bins(st_table_entry **bins, st_index_t newsize, st_index_t oldsize)
+{
+    bins = (st_table_entry **)realloc(bins, newsize * sizeof(st_table_entry *));
+    MEMZERO(bins, st_table_entry*, newsize);
+    return bins;
+}
+#endif
+
+/* Shortage */
+#define bins as.big.bins
+#define head as.big.head
+#define tail as.big.tail
+#define real_entries as.packed.real_entries
+
+/* preparation for possible packing improvements */
+#define PACKED_BINS(table) ((table)->as.packed.entries)
+#define PACKED_ENT(table, i) PACKED_BINS(table)[i]
+#define PKEY(table, i) PACKED_ENT((table), (i)).key
+#define PVAL(table, i) PACKED_ENT((table), (i)).val
+#define PHASH(table, i) PACKED_ENT((table), (i)).hash
+#define PKEY_SET(table, i, v) (PKEY((table), (i)) = (v))
+#define PVAL_SET(table, i, v) (PVAL((table), (i)) = (v))
+#define PHASH_SET(table, i, v) (PHASH((table), (i)) = (v))
+
+/* this function depends much on packed layout, so that it placed here */
+static inline void
+remove_packed_entry(st_table *table, st_index_t i)
+{
+    table->real_entries--;
+    table->num_entries--;
+    if (i < table->real_entries) {
+	MEMMOVE(&PACKED_ENT(table, i), &PACKED_ENT(table, i+1),
+		st_packed_entry, table->real_entries - i);
+    }
+}
+
+static inline void
+remove_safe_packed_entry(st_table *table, st_index_t i, st_data_t never)
+{
+    table->num_entries--;
+    PKEY_SET(table, i, never);
+    PVAL_SET(table, i, never);
+    PHASH_SET(table, i, 0);
+}
+
+/* ultrapacking */
+#define real_upacked num_bins
+#define MAX_UPACKED_HASH 1
+#define ULTRAPACKED(table) ((table)->real_upacked <= 1)
+#define UPACKED_ENT(table) ((table)->as.upacked)
+#define UPKEY(table) UPACKED_ENT(table).key
+#define UPVAL(table) UPACKED_ENT(table).val
+#define UPHASH(table) UPACKED_ENT(table).hash
+#define UPKEY_SET(table, v) (UPACKED_ENT(table).key = (v))
+#define UPVAL_SET(table, v) (UPACKED_ENT(table).val = (v))
+#define UPHASH_SET(table, v) (UPACKED_ENT(table).hash = (v))
+static inline void
+remove_upacked_entry(st_table *table)
+{
+    table->real_upacked = table->num_entries = 0;
+}
+
+static inline void
+remove_safe_upacked_entry(st_table *table, st_data_t never)
+{
+    table->num_entries = 0;
+    UPKEY_SET(table, never);
+    UPVAL_SET(table, never);
+    UPHASH_SET(table, 0);
+}
 /*
  * MINSIZE is the minimum size of a dictionary.
  */
@@ -85,8 +204,8 @@ static void rehash(st_table *);
 Table of prime numbers 2^n+a, 2<=n<=30.
 */
 static const unsigned int primes[] = {
-	8 + 3,
-	16 + 3,
+	ST_DEFAULT_INIT_TABLE_SIZE,
+	ST_DEFAULT_SECOND_TABLE_SIZE,
 	32 + 5,
 	64 + 3,
 	128 + 3,
@@ -161,8 +280,6 @@ stat_col(void)
 }
 #endif

-#define MAX_PACKED_NUMHASH (ST_DEFAULT_INIT_TABLE_SIZE/2)
-
 st_table*
 st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
 {
@@ -181,14 +298,19 @@ st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
     }
 #endif

-    size = new_size(size);	/* round up to prime number */

-    tbl = alloc(st_table);
+    tbl = st_alloc_table();
     tbl->type = type;
     tbl->num_entries = 0;
-    tbl->entries_packed = type == &type_numhash && size/2 <= MAX_PACKED_NUMHASH;
+    tbl->entries_packed = size <= MAX_PACKED_HASH;
+    if (tbl->entries_packed) {
+	size = size <= MAX_UPACKED_HASH ? 0 : ST_DEFAULT_PACKED_TABLE_SIZE;
+    }
+    else {
+	size = new_size(size);	/* round up to prime number */
+    }
     tbl->num_bins = size;
-    tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*));
+    tbl->bins = size ? st_alloc_bins(size) : 0;
     tbl->head = 0;
     tbl->tail = 0;

@@ -243,17 +365,23 @@ st_clear(st_table *table)
     register st_table_entry *ptr, *next;
     st_index_t i;

+    if (ULTRAPACKED(table)) {
+	remove_upacked_entry(table);
+	return;
+    }
+
     if (table->entries_packed) {
         table->num_entries = 0;
+        table->real_entries = 0;
         return;
     }

-    for(i = 0; i < table->num_bins; i++) {
+    for (i = 0; i < table->num_bins; i++) {
 	ptr = table->bins[i];
 	table->bins[i] = 0;
 	while (ptr != 0) {
 	    next = ptr->next;
-	    free(ptr);
+	    st_free_entry(ptr);
 	    ptr = next;
 	}
     }
@@ -266,14 +394,19 @@ void
 st_free_table(st_table *table)
 {
     st_clear(table);
-    free(table->bins);
-    free(table);
+    if (!ULTRAPACKED(table)) {
+	st_free_bins(table->bins, table->num_bins);
+    }
+    st_dealloc_table(table);
 }

 size_t
 st_memsize(const st_table *table)
 {
-    if (table->entries_packed) {
+    if (ULTRAPACKED(table)) {
+	return sizeof(st_table);
+    }
+    else if (table->entries_packed) {
 	return table->num_bins * sizeof (void *) + sizeof(st_table);
     }
     else {
@@ -306,46 +439,77 @@ count_collision(const struct st_hash_type *type)
 #define FOUND_ENTRY
 #endif

-#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\
-    (bin_pos) = (hash_val)%(table)->num_bins;\
-    (ptr) = (table)->bins[(bin_pos)];\
-    FOUND_ENTRY;\
-    if (PTR_NOT_EQUAL((table), (ptr), (hash_val), key)) {\
-	COLLISION;\
-	while (PTR_NOT_EQUAL((table), (ptr)->next, (hash_val), key)) {\
-	    (ptr) = (ptr)->next;\
-	}\
-	(ptr) = (ptr)->next;\
-    }\
-} while (0)
+#define FIND_ENTRY(table, ptr, hash_val, bin_pos) \
+    ((ptr) = find_entry((table), key, (hash_val), ((bin_pos) = (hash_val)%(table)->num_bins)))
+
+static st_table_entry *
+find_entry(st_table *table, st_data_t key, st_index_t hash_val, st_index_t bin_pos)
+{
+    register st_table_entry *ptr = table->bins[bin_pos];
+    FOUND_ENTRY;
+    if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {
+	COLLISION;
+	while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {
+	    ptr = ptr->next;
+	}
+	ptr = ptr->next;
+    }
+    return ptr;
+}
+
+static inline st_index_t
+find_packed_index(st_table *table, st_index_t hash_val, st_data_t key)
+{
+    st_index_t i = 0;
+    while (i < table->real_entries &&
+	   (PHASH(table, i) != hash_val || !EQUAL(table, key, PKEY(table, i)))) {
+	i++;
+    }
+    return i;
+}
+
+static inline int
+check_upacked(st_table *table, st_index_t hash_val, st_data_t key)
+{
+    return table->num_entries &&
+	UPHASH(table) == hash_val &&
+	EQUAL(table, key, UPKEY(table));
+}

 #define collision_check 0

 int
 st_lookup(st_table *table, register st_data_t key, st_data_t *value)
 {
-    st_index_t hash_val, bin_pos;
+    st_index_t hash_val;
     register st_table_entry *ptr;

+    hash_val = do_hash(key, table);
+
+    if (ULTRAPACKED(table)) {
+	if (check_upacked(table, hash_val, key)) {
+	    if (value != 0) *value = UPVAL(table);
+	    return 1;
+	}
+	return 0;
+    }
+
     if (table->entries_packed) {
-        st_index_t i;
-        for (i = 0; i < table->num_entries; i++) {
-            if ((st_data_t)table->bins[i*2] == key) {
-                if (value !=0) *value = (st_data_t)table->bins[i*2+1];
-                return 1;
-            }
-        }
+	st_index_t i = find_packed_index(table, hash_val, key);
+	if (i < table->real_entries) {
+	    if (value != 0) *value = PVAL(table, i);
+	    return 1;
+	}
         return 0;
     }

-    hash_val = do_hash(key, table);
-    FIND_ENTRY(table, ptr, hash_val, bin_pos);
+    ptr = find_entry(table, key, hash_val, hash_val % table->num_bins);

     if (ptr == 0) {
 	return 0;
     }
     else {
-	if (value != 0)  *value = ptr->record;
+	if (value != 0) *value = ptr->record;
 	return 1;
     }
 }
@@ -353,22 +517,29 @@ st_lookup(st_table *table, register st_data_t key, st_data_t *value)
 int
 st_get_key(st_table *table, register st_data_t key, st_data_t *result)
 {
-    st_index_t hash_val, bin_pos;
+    st_index_t hash_val;
     register st_table_entry *ptr;

+    hash_val = do_hash(key, table);
+
+    if (ULTRAPACKED(table)) {
+	if (check_upacked(table, hash_val, key)) {
+	    if (result != 0) *result = UPKEY(table);
+	    return 1;
+	}
+	return 0;
+    }
+
     if (table->entries_packed) {
-        st_index_t i;
-        for (i = 0; i < table->num_entries; i++) {
-            if ((st_data_t)table->bins[i*2] == key) {
-                if (result !=0) *result = (st_data_t)table->bins[i*2];
-                return 1;
-            }
-        }
+	st_index_t i = find_packed_index(table, hash_val, key);
+	if (i < table->real_entries) {
+	    if (result != 0) *result = PKEY(table, i);
+	    return 1;
+	}
         return 0;
     }

-    hash_val = do_hash(key, table);
-    FIND_ENTRY(table, ptr, hash_val, bin_pos);
+    ptr = find_entry(table, key, hash_val, hash_val % table->num_bins);

     if (ptr == 0) {
 	return 0;
@@ -382,85 +553,151 @@ st_get_key(st_table *table, register st_data_t key, st_data_t *result)
 #undef collision_check
 #define collision_check 1

-#define MORE_PACKABLE_P(table) \
-    ((st_index_t)((table)->num_entries+1) * 2 <= (table)->num_bins && \
-     (table)->num_entries+1 <= MAX_PACKED_NUMHASH)
-
-#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
-do {\
-    st_table_entry *entry;\
-    if ((table)->num_entries > ST_DEFAULT_MAX_DENSITY * (table)->num_bins) {\
-	rehash(table);\
-        (bin_pos) = (hash_val) % (table)->num_bins;\
-    }\
-    \
-    entry = alloc(st_table_entry);\
-    \
-    entry->hash = (hash_val);\
-    entry->key = (key);\
-    entry->record = (value);\
-    entry->next = (table)->bins[(bin_pos)];\
-    if ((table)->head != 0) {\
-	entry->fore = 0;\
-	(entry->back = (table)->tail)->fore = entry;\
-	(table)->tail = entry;\
-    }\
-    else {\
-	(table)->head = (table)->tail = entry;\
-	entry->fore = entry->back = 0;\
-    }\
-    (table)->bins[(bin_pos)] = entry;\
-    (table)->num_entries++;\
-} while (0)
+static inline st_table_entry *
+new_entry(st_table * table, st_data_t key, st_data_t value,
+	st_index_t hash_val, register st_index_t bin_pos)
+{
+    register st_table_entry *entry = st_alloc_entry();
+
+    entry->next = table->bins[bin_pos];
+    table->bins[bin_pos] = entry;
+    entry->hash = hash_val;
+    entry->key = key;
+    entry->record = value;
+
+    return entry;
+}
+
+static inline void
+add_direct(st_table *table, st_data_t key, st_data_t value,
+	   st_index_t hash_val, register st_index_t bin_pos)
+{
+    register st_table_entry *entry;
+    if (table->num_entries > ST_DEFAULT_MAX_DENSITY * table->num_bins) {
+	rehash(table);
+        bin_pos = hash_val % table->num_bins;
+    }
+
+    entry = new_entry(table, key, value, hash_val, bin_pos);
+
+    if (table->head != 0) {
+	entry->fore = 0;
+	(entry->back = table->tail)->fore = entry;
+	table->tail = entry;
+    }
+    else {
+	table->head = table->tail = entry;
+	entry->fore = entry->back = 0;
+    }
+    table->num_entries++;
+}

 static void
 unpack_entries(register st_table *table)
 {
     st_index_t i;
-    struct st_table_entry *packed_bins[MAX_PACKED_NUMHASH*2];
+    st_packed_entry packed_bins[MAX_PACKED_HASH];
+    register st_table_entry *entry, *preventry = 0, **chain;
     st_table tmp_table = *table;

-    memcpy(packed_bins, table->bins, sizeof(struct st_table_entry *) * table->num_entries*2);
-    table->bins = packed_bins;
+    MEMCPY(packed_bins, PACKED_BINS(table), st_packed_entry, MAX_PACKED_HASH);
+    table->as.packed.entries = packed_bins;
     tmp_table.entries_packed = 0;
-    tmp_table.num_entries = 0;
-    memset(tmp_table.bins, 0, sizeof(struct st_table_entry *) * tmp_table.num_bins);
-    for (i = 0; i < table->num_entries; i++) {
-        st_insert(&tmp_table, (st_data_t)packed_bins[i*2], (st_data_t)packed_bins[i*2+1]);
-    }
+#if ST_DEFAULT_INIT_TABLE_SIZE == ST_DEFAULT_PACKED_TABLE_SIZE
+    MEMZERO(tmp_table.bins, st_table_entry*, tmp_table.num_bins);
+#else
+    tmp_table.bins = st_realloc_bins(tmp_table.bins, ST_DEFAULT_INIT_TABLE_SIZE, tmp_table.num_bins);
+    tmp_table.num_bins = ST_DEFAULT_INIT_TABLE_SIZE;
+#endif
+    i = 0;
+    chain = &tmp_table.head;
+    do {
+	st_data_t key = packed_bins[i].key;
+	st_data_t val = packed_bins[i].val;
+	st_index_t hash = packed_bins[i].hash;
+	entry = new_entry(&tmp_table, key, val, hash,
+			  hash % ST_DEFAULT_INIT_TABLE_SIZE);
+	*chain = entry;
+	entry->back = preventry;
+	preventry = entry;
+	chain = &entry->fore;
+    } while (++i < MAX_PACKED_HASH);
+    *chain = NULL;
+    tmp_table.tail = entry;
     *table = tmp_table;
 }

+static void
+add_packed_direct(st_table *table, st_data_t key, st_data_t value, st_index_t hash_val)
+{
+    if (table->real_entries < MAX_PACKED_HASH) {
+	st_index_t i = table->real_entries++;
+	PKEY_SET(table, i, key);
+	PVAL_SET(table, i, value);
+	PHASH_SET(table, i, hash_val);
+	table->num_entries++;
+    }
+    else {
+	unpack_entries(table);
+	add_direct(table, key, value, hash_val, hash_val % table->num_bins);
+    }
+}
+
+static void
+add_upacked_direct(register st_table *table, register st_data_t key, st_data_t value, st_index_t hash_val)
+{
+    if (table->real_upacked) {
+	st_packed_entry *entries = (st_packed_entry *) st_alloc_bins(ST_DEFAULT_PACKED_TABLE_SIZE);
+	entries[0] = UPACKED_ENT(table);
+	entries[1].hash = hash_val;
+	entries[1].key = key;
+	entries[1].val = value;
+	table->num_bins = ST_DEFAULT_PACKED_TABLE_SIZE;
+	table->real_entries = 2;
+	table->num_entries++;
+	table->as.packed.entries = entries;
+    }
+    else {
+	table->real_upacked = 1;
+	table->num_entries = 1;
+	UPHASH_SET(table, hash_val);
+	UPKEY_SET(table, key);
+	UPVAL_SET(table, value);
+    }
+}
+
 int
 st_insert(register st_table *table, register st_data_t key, st_data_t value)
 {
-    st_index_t hash_val, bin_pos;
+    st_index_t hash_val;
+    register st_index_t bin_pos;
     register st_table_entry *ptr;

+    hash_val = do_hash(key, table);
+
+    if (ULTRAPACKED(table)) {
+	if (check_upacked(table, hash_val, key)) {
+	    UPVAL_SET(table, value);
+	    return 1;
+	}
+	add_upacked_direct(table, key, value, hash_val);
+	return 0;
+    }
+
     if (table->entries_packed) {
-        st_index_t i;
-        for (i = 0; i < table->num_entries; i++) {
-            if ((st_data_t)table->bins[i*2] == key) {
-                table->bins[i*2+1] = (struct st_table_entry*)value;
-                return 1;
-            }
-        }
-        if (MORE_PACKABLE_P(table)) {
-            i = table->num_entries++;
-            table->bins[i*2] = (struct st_table_entry*)key;
-            table->bins[i*2+1] = (struct st_table_entry*)value;
-            return 0;
-        }
-        else {
-            unpack_entries(table);
+	st_index_t i = find_packed_index(table, hash_val, key);
+	if (i < table->real_entries) {
+	    PVAL_SET(table, i, value);
+	    return 1;
         }
+	add_packed_direct(table, key, value, hash_val);
+	return 0;
     }

-    hash_val = do_hash(key, table);
     FIND_ENTRY(table, ptr, hash_val, bin_pos);

     if (ptr == 0) {
-	ADD_DIRECT(table, key, value, hash_val, bin_pos);
+	add_direct(table, key, value, hash_val, bin_pos);
 	return 0;
     }
     else {
@@ -473,34 +710,38 @@ int
 st_insert2(register st_table *table, register st_data_t key, st_data_t value,
 	   st_data_t (*func)(st_data_t))
 {
-    st_index_t hash_val, bin_pos;
+    st_index_t hash_val;
+    register st_index_t bin_pos;
     register st_table_entry *ptr;

+    hash_val = do_hash(key, table);
+
+    if (ULTRAPACKED(table)) {
+	if (check_upacked(table, hash_val, key)) {
+	    UPVAL_SET(table, value);
+	    return 1;
+	}
+	key = (*func)(key);
+	add_upacked_direct(table, key, value, hash_val);
+	return 0;
+    }
+
     if (table->entries_packed) {
-        st_index_t i;
-        for (i = 0; i < table->num_entries; i++) {
-            if ((st_data_t)table->bins[i*2] == key) {
-                table->bins[i*2+1] = (struct st_table_entry*)value;
-                return 1;
-            }
-        }
-        if (MORE_PACKABLE_P(table)) {
-            i = table->num_entries++;
-            table->bins[i*2] = (struct st_table_entry*)key;
-            table->bins[i*2+1] = (struct st_table_entry*)value;
-            return 0;
-        }
-        else {
-            unpack_entries(table);
-        }
+	st_index_t i = find_packed_index(table, hash_val, key);
+	if (i < table->real_entries) {
+	    PVAL_SET(table, i, value);
+	    return 1;
+	}
+	key = (*func)(key);
+	add_packed_direct(table, key, value, hash_val);
+	return 0;
     }

-    hash_val = do_hash(key, table);
     FIND_ENTRY(table, ptr, hash_val, bin_pos);

     if (ptr == 0) {
 	key = (*func)(key);
-	ADD_DIRECT(table, key, value, hash_val, bin_pos);
+	add_direct(table, key, value, hash_val, bin_pos);
 	return 0;
     }
     else {
@@ -512,36 +753,30 @@ st_insert2(register st_table *table, register st_data_t key, st_data_t value,
 void
 st_add_direct(st_table *table, st_data_t key, st_data_t value)
 {
-    st_index_t hash_val, bin_pos;
+    st_index_t hash_val;
+
+    hash_val = do_hash(key, table);
+    if (ULTRAPACKED(table)) {
+	add_upacked_direct(table, key, value, hash_val);
+	return;
+    }

     if (table->entries_packed) {
-        int i;
-        if (MORE_PACKABLE_P(table)) {
-            i = table->num_entries++;
-            table->bins[i*2] = (struct st_table_entry*)key;
-            table->bins[i*2+1] = (struct st_table_entry*)value;
-            return;
-        }
-        else {
-            unpack_entries(table);
-        }
+	add_packed_direct(table, key, value, hash_val);
+	return;
     }

-    hash_val = do_hash(key, table);
-    bin_pos = hash_val % table->num_bins;
-    ADD_DIRECT(table, key, value, hash_val, bin_pos);
+    add_direct(table, key, value, hash_val, hash_val % table->num_bins);
 }

 static void
 rehash(register st_table *table)
 {
     register st_table_entry *ptr, **new_bins;
-    st_index_t i, new_num_bins, hash_val;
+    st_index_t new_num_bins, hash_val;

     new_num_bins = new_size(table->num_bins+1);
-    new_bins = (st_table_entry**)
-	xrealloc(table->bins, new_num_bins * sizeof(st_table_entry*));
-    for (i = 0; i < new_num_bins; ++i) new_bins[i] = 0;
+    new_bins = st_realloc_bins(table->bins, new_num_bins, table->num_bins);
     table->num_bins = new_num_bins;
     table->bins = new_bins;

@@ -558,34 +793,37 @@ st_table*
 st_copy(st_table *old_table)
 {
     st_table *new_table;
-    st_table_entry *ptr, *entry, *prev, **tail;
+    st_table_entry *ptr, *entry, *prev, **tailp;
     st_index_t num_bins = old_table->num_bins;
     st_index_t hash_val;

-    new_table = alloc(st_table);
+    new_table = st_alloc_table();
     if (new_table == 0) {
 	return 0;
     }

     *new_table = *old_table;
-    new_table->bins = (st_table_entry**)
-	Calloc((unsigned)num_bins, sizeof(st_table_entry*));
+    if (ULTRAPACKED(old_table)) {
+	return new_table;
+    }
+
+    new_table->bins = st_alloc_bins(num_bins);

     if (new_table->bins == 0) {
-	free(new_table);
+	st_dealloc_table(new_table);
 	return 0;
     }

     if (old_table->entries_packed) {
-        memcpy(new_table->bins, old_table->bins, sizeof(struct st_table_entry *) * old_table->num_bins);
+        MEMCPY(new_table->bins, old_table->bins, st_table_entry*, old_table->num_bins);
         return new_table;
     }

     if ((ptr = old_table->head) != 0) {
 	prev = 0;
-	tail = &new_table->head;
+	tailp = &new_table->head;
 	do {
-	    entry = alloc(st_table_entry);
+	    entry = st_alloc_entry();
 	    if (entry == 0) {
 		st_free_table(new_table);
 		return 0;
@@ -595,8 +833,8 @@ st_copy(st_table *old_table)
 	    entry->next = new_table->bins[hash_val];
 	    new_table->bins[hash_val] = entry;
 	    entry->back = prev;
-	    *tail = prev = entry;
-	    tail = &entry->fore;
+	    *tailp = prev = entry;
+	    tailp = &entry->fore;
 	} while ((ptr = ptr->fore) != 0);
 	new_table->tail = prev;
     }
@@ -604,21 +842,22 @@ st_copy(st_table *old_table)
     return new_table;
 }

-#define REMOVE_ENTRY(table, ptr) do					\
-    {									\
-	if ((ptr)->fore == 0 && (ptr)->back == 0) {			\
-	    (table)->head = 0;						\
-	    (table)->tail = 0;						\
-	}								\
-	else {								\
-	    st_table_entry *fore = (ptr)->fore, *back = (ptr)->back;	\
-	    if (fore) fore->back = back;				\
-	    if (back) back->fore = fore;				\
-	    if ((ptr) == (table)->head) (table)->head = fore;		\
-	    if ((ptr) == (table)->tail) (table)->tail = back;		\
-	}								\
-	(table)->num_entries--;						\
-    } while (0)
+static inline void
+remove_entry(st_table *table, st_table_entry *ptr)
+{
+    if (ptr->fore == 0 && ptr->back == 0) {
+	table->head = 0;
+	table->tail = 0;
+    }
+    else {
+	st_table_entry *fore = ptr->fore, *back = ptr->back;
+	if (fore) fore->back = back;
+	if (back) back->fore = fore;
+	if (ptr == table->head) table->head = fore;
+	if (ptr == table->tail) table->tail = back;
+    }
+    table->num_entries--;
+}

 int
 st_delete(register st_table *table, register st_data_t *key, st_data_t *value)
@@ -627,30 +866,38 @@ st_delete(register st_table *table, register st_data_t *key, st_data_t *value)
     st_table_entry **prev;
     register st_table_entry *ptr;

+    hash_val = do_hash(*key, table);
+
+    if (ULTRAPACKED(table)) {
+	if (check_upacked(table, hash_val, *key)) {
+	    if (value != 0) *value = UPVAL(table);
+	    *key = UPKEY(table);
+	    remove_upacked_entry(table);
+	    return 1;
+	}
+	return 0;
+    }
+
     if (table->entries_packed) {
-        st_index_t i;
-        for (i = 0; i < table->num_entries; i++) {
-            if ((st_data_t)table->bins[i*2] == *key) {
-                if (value != 0) *value = (st_data_t)table->bins[i*2+1];
-                table->num_entries--;
-                memmove(&table->bins[i*2], &table->bins[(i+1)*2],
-                        sizeof(struct st_table_entry*) * 2*(table->num_entries-i));
-                return 1;
-            }
+	st_index_t i = find_packed_index(table, hash_val, *key);
+	if (i < table->real_entries) {
+	    if (value != 0) *value = PVAL(table, i);
+	    *key = PKEY(table, i);
+	    remove_packed_entry(table, i);
+	    return 1;
         }
         if (value != 0) *value = 0;
         return 0;
     }

-    hash_val = do_hash_bin(*key, table);
-
-    for (prev = &table->bins[hash_val]; (ptr = *prev) != 0; prev = &ptr->next) {
+    prev = &table->bins[hash_val % table->num_bins];
+    for (;(ptr = *prev) != 0; prev = &ptr->next) {
 	if (EQUAL(table, *key, ptr->key)) {
 	    *prev = ptr->next;
-	    REMOVE_ENTRY(table, ptr);
+	    remove_entry(table, ptr);
 	    if (value != 0) *value = ptr->record;
 	    *key = ptr->key;
-	    free(ptr);
+	    st_free_entry(ptr);
 	    return 1;
 	}
     }
@@ -665,25 +912,36 @@ st_delete_safe(register st_table *table, register st_data_t *key, st_data_t *val
     st_index_t hash_val;
     register st_table_entry *ptr;

+    hash_val = do_hash(*key, table);
+
+    if (ULTRAPACKED(table)) {
+	if (check_upacked(table, hash_val, *key)) {
+	    if (value != 0) *value = UPVAL(table);
+	    *key = UPKEY(table);
+	    remove_safe_upacked_entry(table, never);
+	    return 1;
+	}
+	if (value != 0) *value = 0;
+	return 0;
+    }
+
     if (table->entries_packed) {
-	st_index_t i;
-	for (i = 0; i < table->num_entries; i++) {
-	    if ((st_data_t)table->bins[i*2] == *key) {
-		if (value != 0) *value = (st_data_t)table->bins[i*2+1];
-		table->bins[i*2] = (void *)never;
-		return 1;
-	    }
+	st_index_t i = find_packed_index(table, hash_val, *key);
+	if (i < table->real_entries) {
+	    if (value != 0) *value = PVAL(table, i);
+	    *key = PKEY(table, i);
+	    remove_safe_packed_entry(table, i, never);
+	    return 1;
 	}
 	if (value != 0) *value = 0;
 	return 0;
     }

-    hash_val = do_hash_bin(*key, table);
-    ptr = table->bins[hash_val];
+    ptr = table->bins[hash_val % table->num_bins];

     for (; ptr != 0; ptr = ptr->next) {
 	if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) {
-	    REMOVE_ENTRY(table, ptr);
+	    remove_entry(table, ptr);
 	    *key = ptr->key;
 	    if (value != 0) *value = ptr->record;
 	    ptr->key = ptr->record = never;
@@ -701,17 +959,23 @@ st_cleanup_safe(st_table *table, st_data_t never)
     st_table_entry *ptr, **last, *tmp;
     st_index_t i;

+    if (ULTRAPACKED(table)) {
+	table->real_upacked = table->num_entries;
+	return;
+    }
+
     if (table->entries_packed) {
 	st_index_t i = 0, j = 0;
-	while ((st_data_t)table->bins[i*2] != never) {
-	    if (i++ == table->num_entries) return;
+	while (PKEY(table, i) != never) {
+	    if (i++ == table->real_entries) return;
 	}
-	for (j = i; ++i < table->num_entries;) {
-	    if ((st_data_t)table->bins[i*2] == never) continue;
-	    table->bins[j*2] = table->bins[i*2];
-	    table->bins[j*2+1] = table->bins[i*2+1];
+	for (j = i; ++i < table->real_entries;) {
+	    if (PKEY(table, i) == never) continue;
+	    PACKED_ENT(table, j) = PACKED_ENT(table, i);
 	    j++;
 	}
+	table->real_entries = j;
+	/* table->num_entries really should be equal j at this moment, but let set it anyway */
 	table->num_entries = j;
 	return;
     }
@@ -722,7 +986,7 @@ st_cleanup_safe(st_table *table, st_data_t never)
 	    if (ptr->key == never) {
 		tmp = ptr;
 		*last = ptr = ptr->next;
-		free(tmp);
+		st_free_entry(tmp);
 	    }
 	    else {
 		ptr = *(last = &ptr->next);
@@ -732,50 +996,78 @@ st_cleanup_safe(st_table *table, st_data_t never)
 }

 int
-st_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
+st_foreach_check(st_table *table, int (*func)(ANYARGS), st_data_t arg, st_data_t never)
 {
     st_table_entry *ptr, **last, *tmp;
     enum st_retval retval;
-    st_index_t i;
+    st_data_t key, val;
+    st_index_t hash, i = 0;
+
+    if (table->num_entries == 0) {
+	return 0;
+    }
+
+    if (ULTRAPACKED(table)) {
+	key = UPKEY(table);
+	val = UPVAL(table);
+	hash = UPHASH(table);
+	if (key == never) return 0;
+	retval = (*func)(key, val, arg);
+	if (!ULTRAPACKED(table)) {
+	    goto packed;
+	}
+	switch(retval) {
+	  case ST_CHECK:
+	    if (UPHASH(table) == 0 && UPKEY(table) == never)
+		break;
+	    if (check_upacked(table, hash, key))
+		break;
+	    goto deleted;
+	  case ST_DELETE:
+	    remove_safe_upacked_entry(table, never);
+	  case ST_CONTINUE:
+	  case ST_STOP:
+	    break;
+	}
+	return 0;
+    }

     if (table->entries_packed) {
-        for (i = 0; i < table->num_entries; i++) {
-            st_index_t j;
-            st_data_t key, val;
-            key = (st_data_t)table->bins[i*2];
-            val = (st_data_t)table->bins[i*2+1];
-            retval = (*func)(key, val, arg);
+	for (i = 0; i < table->real_entries; i++) {
+	    key = PKEY(table, i);
+	    val = PVAL(table, i);
+	    hash = PHASH(table, i);
+	    if (key == never) continue;
+	    retval = (*func)(key, val, arg);
+	  packed:
 	    if (!table->entries_packed) {
-		FIND_ENTRY(table, ptr, key, i);
+		FIND_ENTRY(table, ptr, hash, i);
 		if (retval == ST_CHECK) {
 		    if (!ptr) goto deleted;
 		    goto unpacked_continue;
 		}
 		goto unpacked;
 	    }
-            switch (retval) {
+	    switch (retval) {
 	      case ST_CHECK:	/* check if hash is modified during iteration */
-                for (j = 0; j < table->num_entries; j++) {
-                    if ((st_data_t)table->bins[j*2] == key)
-                        break;
-                }
-                if (j == table->num_entries) {
+		if (PHASH(table, i) == 0 && PKEY(table, i) == never) {
+		    break;
+		}
+		i = find_packed_index(table, hash, key);
+		if (i == table->real_entries) {
 		    goto deleted;
-                }
+		}
 		/* fall through */
 	      case ST_CONTINUE:
 		break;
 	      case ST_STOP:
 		return 0;
 	      case ST_DELETE:
-                table->num_entries--;
-                memmove(&table->bins[i*2], &table->bins[(i+1)*2],
-                        sizeof(struct st_table_entry*) * 2*(table->num_entries-i));
-                i--;
-                break;
-            }
-        }
-        return 0;
+		remove_safe_packed_entry(table, i, never);
+		break;
+	    }
+	}
+	return 0;
     }
     else {
 	ptr = table->head;
@@ -783,6 +1075,8 @@ st_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)

     if (ptr != 0) {
 	do {
+	    if (ptr->key == never)
+		goto unpacked_continue;
 	    i = ptr->hash % table->num_bins;
 	    retval = (*func)(ptr->key, ptr->record, arg);
 	  unpacked:
@@ -808,10 +1102,100 @@ st_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
 		for (; (tmp = *last) != 0; last = &tmp->next) {
 		    if (ptr == tmp) {
 			tmp = ptr->fore;
+			remove_entry(table, ptr);
+			ptr->key = ptr->record = never;
+			ptr->hash = 0;
+			ptr = tmp;
+			break;
+		    }
+		}
+	    }
+	} while (ptr && table->head);
+    }
+    return 0;
+}
+
+int
+st_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
+{
+    st_table_entry *ptr, **last, *tmp;
+    enum st_retval retval;
+    st_data_t key, val;
+    st_index_t hash, i = 0;
+
+    if (table->num_entries == 0) {
+	return 0;
+    }
+
+    if (ULTRAPACKED(table)) {
+	key = UPKEY(table);
+	val = UPVAL(table);
+	hash = UPHASH(table);
+	retval = (*func)(key, val, arg);
+	if (!ULTRAPACKED(table)) {
+	    goto packed;
+	}
+	switch (retval) {
+	  case ST_DELETE:
+	    remove_upacked_entry(table);
+	  case ST_CONTINUE:
+	  case ST_CHECK:
+	  case ST_STOP:
+	    break;
+	}
+	return 0;
+    }
+
+    if (table->entries_packed) {
+	for (i = 0; i < table->real_entries; i++) {
+	    key = PKEY(table, i);
+	    val = PVAL(table, i);
+	    hash = PHASH(table, i);
+	    retval = (*func)(key, val, arg);
+	  packed:
+	    if (!table->entries_packed) {
+		FIND_ENTRY(table, ptr, hash, i);
+		if (!ptr) return 0;
+		goto unpacked;
+	    }
+	    switch (retval) {
+	      case ST_CONTINUE:
+		break;
+	      case ST_CHECK:
+	      case ST_STOP:
+		return 0;
+	      case ST_DELETE:
+		remove_packed_entry(table, i);
+		i--;
+		break;
+	    }
+	}
+	return 0;
+    }
+    else {
+	ptr = table->head;
+    }
+
+    if (ptr != 0) {
+	do {
+	    i = ptr->hash % table->num_bins;
+	    retval = (*func)(ptr->key, ptr->record, arg);
+	  unpacked:
+	    switch (retval) {
+	      case ST_CONTINUE:
+		ptr = ptr->fore;
+		break;
+	      case ST_CHECK:
+	      case ST_STOP:
+		return 0;
+	      case ST_DELETE:
+		last = &table->bins[ptr->hash % table->num_bins];
+		for (; (tmp = *last) != 0; last = &tmp->next) {
+		    if (ptr == tmp) {
+			tmp = ptr->fore;
 			*last = ptr->next;
-			REMOVE_ENTRY(table, ptr);
-			free(ptr);
-			if (ptr == tmp) return 0;
+			remove_entry(table, ptr);
+			st_free_entry(ptr);
 			ptr = tmp;
 			break;
 		    }
@@ -834,13 +1218,13 @@ st_reverse_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
         for (i = table->num_entries-1; 0 <= i; i--) {
             int j;
             st_data_t key, val;
-            key = (st_data_t)table->bins[i*2];
-            val = (st_data_t)table->bins[i*2+1];
+            key = PKEY(table, i);
+            val = PVAL(table, i);
             retval = (*func)(key, val, arg);
             switch (retval) {
 	      case ST_CHECK:	/* check if hash is modified during iteration */
                 for (j = 0; j < table->num_entries; j++) {
-                    if ((st_data_t)table->bins[j*2] == key)
+                    if (PKEY(table, j) == key)
                         break;
                 }
                 if (j == table->num_entries) {
@@ -854,9 +1238,7 @@ st_reverse_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
 	      case ST_STOP:
 		return 0;
 	      case ST_DELETE:
-                table->num_entries--;
-                memmove(&table->bins[i*2], &table->bins[(i+1)*2],
-                        sizeof(struct st_table_entry*) * 2*(table->num_entries-i));
+		remove_packed_entry(table, i);
                 break;
             }
         }
@@ -889,8 +1271,8 @@ st_reverse_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
 		    if (ptr == tmp) {
 			tmp = ptr->back;
 			*last = ptr->next;
-			REMOVE_ENTRY(table, ptr);
-			free(ptr);
+			remove_entry(table, ptr);
+			st_free_entry(ptr);
 			ptr = tmp;
 			break;
 		    }
diff --git a/test/ruby/test_marshal.rb b/test/ruby/test_marshal.rb
index 7ec6959..f5774cd 100644
--- a/test/ruby/test_marshal.rb
+++ b/test/ruby/test_marshal.rb
@@ -312,7 +312,7 @@ class TestMarshal < Test::Unit::TestCase
         assert_equal(a.instance_variable_get(i), b.instance_variable_get(i), bug1932)
       end
     end
-    a.__send__(a.methods(true).grep(/=\z/)[0], a)
+    a.__send__(a.methods(true).grep(/\Ar.+=\z/)[0], a)
     assert_nothing_raised(bug1932) do
       b = Marshal.load(Marshal.dump(a))
       assert_equal(ClassISO8859_1, b.class, bug1932)
diff --git a/test/ruby/test_method.rb b/test/ruby/test_method.rb
index 2cc0de5..b1bf33e 100644
--- a/test/ruby/test_method.rb
+++ b/test/ruby/test_method.rb
@@ -406,7 +406,7 @@ class TestMethod < Test::Unit::TestCase
     obj = a.new
     assert_equal([:a], obj.public_methods(false), bug)
     obj.extend(m)
-    assert_equal([:m1, :a], obj.public_methods(false), bug)
+    assert_equal([:a, :m1], obj.public_methods(false).sort, bug)
   end

   def test_visibility
diff --git a/test/ruby/test_module.rb b/test/ruby/test_module.rb
index 6176f48..a67d845 100644
--- a/test/ruby/test_module.rb
+++ b/test/ruby/test_module.rb
@@ -870,7 +870,7 @@ class TestModule < Test::Unit::TestCase
       (class << self ; self ; end).class_eval do
         define_method :method_added do |sym|
           memo << sym
-          memo << mod.instance_methods(false)
+          memo << mod.instance_methods(false).sort
           memo << (mod.instance_method(sym) rescue nil)
         end
       end
@@ -887,10 +887,10 @@ class TestModule < Test::Unit::TestCase
     assert_equal [:f, :g], memo.shift
     assert_equal mod.instance_method(:f), memo.shift
     assert_equal :a, memo.shift
-    assert_equal [:f, :g, :a], memo.shift
+    assert_equal [:a, :f, :g], memo.shift
     assert_equal mod.instance_method(:a), memo.shift
     assert_equal :a=, memo.shift
-    assert_equal [:f, :g, :a, :a=], memo.shift
+    assert_equal [:a, :a=, :f, :g], memo.shift
     assert_equal mod.instance_method(:a=), memo.shift
   end

diff --git a/time.c b/time.c
index a47ce23..64fb1d9 100644
--- a/time.c
+++ b/time.c
@@ -4712,16 +4712,14 @@ time_mload(VALUE time, VALUE str)
     long nsec;
     VALUE submicro, nano_num, nano_den, offset;
     wideval_t timew;
-    st_data_t data;

     time_modify(time);

 #define get_attr(attr, iffound) \
     attr = rb_attr_get(str, id_##attr); \
     if (!NIL_P(attr)) { \
-	data = id_##attr; \
 	iffound; \
-        st_delete(rb_generic_ivar_table(str), &data, 0); \
+        sa_delete(rb_generic_ivar_table(str), (sa_index_t)id_##attr, 0); \
     }

     get_attr(nano_num, {});
diff --git a/variable.c b/variable.c
index ba47d26..9c7ed5e 100644
--- a/variable.c
+++ b/variable.c
@@ -19,15 +19,15 @@
 #include "constant.h"
 #include "internal.h"

-st_table *rb_global_tbl;
-st_table *rb_class_tbl;
+sa_table rb_global_tbl;
+sa_table rb_class_tbl;
 static ID autoload, classpath, tmp_classpath, classid;

 void
 Init_var_tables(void)
 {
-    rb_global_tbl = st_init_numtable();
-    rb_class_tbl = st_init_numtable();
+    sa_init_table(&rb_global_tbl, 0);
+    sa_init_table(&rb_class_tbl, 0);
     CONST_ID(autoload, "__autoload__");
     CONST_ID(classpath, "__classpath__");
     CONST_ID(tmp_classpath, "__tmp_classpath__");
@@ -43,7 +43,7 @@ struct fc_result {
 };

 static VALUE
-fc_path(struct fc_result *fc, ID name)
+fc_path(struct fc_result *fc, sa_index_t name)
 {
     VALUE path, tmp;

@@ -51,8 +51,7 @@ fc_path(struct fc_result *fc, ID name)
     while (fc) {
 	st_data_t n;
 	if (fc->track == rb_cObject) break;
-	if (RCLASS_IV_TBL(fc->track) &&
-	    st_lookup(RCLASS_IV_TBL(fc->track), (st_data_t)classpath, &n)) {
+	if (sa_lookup(RCLASS_IV_TBL(fc->track), (sa_index_t)classpath, &n)) {
 	    tmp = rb_str_dup((VALUE)n);
 	    rb_str_cat2(tmp, "::");
 	    rb_str_append(tmp, path);
@@ -70,7 +69,7 @@ fc_path(struct fc_result *fc, ID name)
 }

 static int
-fc_i(ID key, rb_const_entry_t *ce, struct fc_result *res)
+fc_i(sa_index_t key, rb_const_entry_t *ce, struct fc_result *res)
 {
     VALUE value = ce->value;
     if (!rb_is_const_id(key)) return ST_CONTINUE;
@@ -98,7 +97,7 @@ fc_i(ID key, rb_const_entry_t *ce, struct fc_result *res)
 	    arg.klass = res->klass;
 	    arg.track = value;
 	    arg.prev = res;
-	    st_foreach(RCLASS_CONST_TBL(value), fc_i, (st_data_t)&arg);
+	    sa_foreach(RCLASS_CONST_TBL(value), fc_i, (st_data_t)&arg);
 	    if (arg.path) {
 		res->path = arg.path;
 		return ST_STOP;
@@ -123,18 +122,14 @@ find_class_path(VALUE klass)
     arg.track = rb_cObject;
     arg.prev = 0;
     if (RCLASS_CONST_TBL(rb_cObject)) {
-	st_foreach_safe(RCLASS_CONST_TBL(rb_cObject), fc_i, (st_data_t)&arg);
+	sa_foreach(RCLASS_CONST_TBL(rb_cObject), fc_i, (st_data_t)&arg);
     }
     if (arg.path == 0) {
-	st_foreach_safe(rb_class_tbl, fc_i, (st_data_t)&arg);
+	sa_foreach(&rb_class_tbl, fc_i, (st_data_t)&arg);
     }
     if (arg.path) {
-	st_data_t tmp = tmp_classpath;
-	if (!RCLASS_IV_TBL(klass)) {
-	    RCLASS_IV_TBL(klass) = st_init_numtable();
-	}
-	st_insert(RCLASS_IV_TBL(klass), (st_data_t)classpath, arg.path);
-	st_delete(RCLASS_IV_TBL(klass), &tmp, 0);
+	sa_insert(RCLASS_IV_TBL(klass), (sa_index_t)classpath, arg.path);
+	sa_delete(RCLASS_IV_TBL(klass), (sa_index_t)tmp_classpath, 0);
 	return arg.path;
     }
     return Qnil;
@@ -147,16 +142,15 @@ classname(VALUE klass)
     st_data_t n;

     if (!klass) klass = rb_cObject;
-    if (RCLASS_IV_TBL(klass)) {
-	if (!st_lookup(RCLASS_IV_TBL(klass), (st_data_t)classpath, &n)) {
-	    if (!st_lookup(RCLASS_IV_TBL(klass), (st_data_t)classid, &n)) {
+    if (RCLASS_IV_TBL(klass)->num_entries) {
+	if (!sa_lookup(RCLASS_IV_TBL(klass), (sa_index_t)classpath, &n)) {
+	    if (!sa_lookup(RCLASS_IV_TBL(klass), (sa_index_t)classid, &n)) {
 		return find_class_path(klass);
 	    }
 	    path = rb_str_dup(rb_id2str(SYM2ID((VALUE)n)));
 	    OBJ_FREEZE(path);
-	    st_insert(RCLASS_IV_TBL(klass), (st_data_t)classpath, (st_data_t)path);
-	    n = classid;
-	    st_delete(RCLASS_IV_TBL(klass), &n, 0);
+	    sa_insert(RCLASS_IV_TBL(klass), (sa_index_t)classpath, (st_data_t)path);
+	    sa_delete(RCLASS_IV_TBL(klass), (sa_index_t)classid, 0);
 	}
 	else {
 	    path = (VALUE)n;
@@ -192,8 +186,7 @@ rb_class_path(VALUE klass)
     st_data_t n = (st_data_t)path;

     if (!NIL_P(path)) return path;
-    if (RCLASS_IV_TBL(klass) && st_lookup(RCLASS_IV_TBL(klass),
-					  (st_data_t)tmp_classpath, &n)) {
+    if (sa_lookup(RCLASS_IV_TBL(klass), (sa_index_t)tmp_classpath, &n)) {
 	return (VALUE)n;
     }
     else {
@@ -364,7 +357,7 @@ rb_global_entry(ID id)
     struct global_entry *entry;
     st_data_t data;

-    if (!st_lookup(rb_global_tbl, (st_data_t)id, &data)) {
+    if (!sa_lookup(&rb_global_tbl, (sa_index_t)id, &data)) {
 	struct global_variable *var;
 	entry = ALLOC(struct global_entry);
 	var = ALLOC(struct global_variable);
@@ -378,7 +371,7 @@ rb_global_entry(ID id)

 	var->block_trace = 0;
 	var->trace = 0;
-	st_add_direct(rb_global_tbl, id, (st_data_t)entry);
+	sa_insert(&rb_global_tbl, (sa_index_t)id, (st_data_t)entry);
     }
     else {
 	entry = (struct global_entry *)data;
@@ -454,8 +447,8 @@ readonly_setter(VALUE val, ID id, void *data, struct global_variable *gvar)
     rb_name_error(id, "%s is a read-only variable", rb_id2name(id));
 }

-static int
-mark_global_entry(ID key, struct global_entry *entry)
+static void
+mark_global_entry(struct global_entry *entry)
 {
     struct trace_var *trace;
     struct global_variable *var = entry->var;
@@ -466,14 +459,14 @@ mark_global_entry(ID key, struct global_entry *entry)
 	if (trace->data) rb_gc_mark_maybe(trace->data);
 	trace = trace->next;
     }
-    return ST_CONTINUE;
 }

 void
 rb_gc_mark_global_tbl(void)
 {
-    if (rb_global_tbl)
-        st_foreach_safe(rb_global_tbl, mark_global_entry, 0);
+    SA_FOREACH_START(&rb_global_tbl);
+    mark_global_entry((struct global_entry*) value);
+    SA_FOREACH_END();
 }

 static ID
@@ -635,7 +628,7 @@ rb_f_untrace_var(int argc, VALUE *argv)
     rb_secure(4);
     rb_scan_args(argc, argv, "11", &var, &cmd);
     id = rb_to_id(var);
-    if (!st_lookup(rb_global_tbl, (st_data_t)id, &data)) {
+    if (!sa_lookup(&rb_global_tbl, (sa_index_t)id, &data)) {
 	rb_name_error(id, "undefined global variable %s", rb_id2name(id));
     }

@@ -742,13 +735,6 @@ rb_gvar_defined(struct global_entry *entry)
     return Qtrue;
 }

-static int
-gvar_i(ID key, struct global_entry *entry, VALUE ary)
-{
-    rb_ary_push(ary, ID2SYM(key));
-    return ST_CONTINUE;
-}
-
 /*
  *  call-seq:
  *     global_variables    -> array
@@ -765,7 +751,9 @@ rb_f_global_variables(void)
     char buf[2];
     int i;

-    st_foreach_safe(rb_global_tbl, gvar_i, ary);
+    SA_FOREACH_START(&rb_global_tbl);
+    rb_ary_push(ary, ID2SYM(entry->key));
+    SA_FOREACH_END();
     buf[0] = '$';
     for (i = 1; i <= 9; ++i) {
 	buf[1] = (char)(i + '0');
@@ -784,10 +772,10 @@ rb_alias_variable(ID name1, ID name2)
 	rb_raise(rb_eSecurityError, "Insecure: can't alias global variable");

     entry2 = rb_global_entry(name2);
-    if (!st_lookup(rb_global_tbl, (st_data_t)name1, &data1)) {
+    if (!sa_lookup(&rb_global_tbl, (sa_index_t)name1, &data1)) {
 	entry1 = ALLOC(struct global_entry);
 	entry1->id = name1;
-	st_add_direct(rb_global_tbl, name1, (st_data_t)entry1);
+	sa_insert(&rb_global_tbl, (sa_index_t)name1, (st_data_t)entry1);
     }
     else if ((entry1 = (struct global_entry *)data1)->var != entry2->var) {
 	struct global_variable *var = entry1->var;
@@ -815,7 +803,7 @@ rb_alias_variable(ID name1, ID name2)
 static int special_generic_ivar = 0;
 static st_table *generic_iv_tbl;

-st_table*
+sa_table*
 rb_generic_ivar_table(VALUE obj)
 {
     st_data_t tbl;
@@ -823,7 +811,7 @@ rb_generic_ivar_table(VALUE obj)
     if (!FL_TEST(obj, FL_EXIVAR)) return 0;
     if (!generic_iv_tbl) return 0;
     if (!st_lookup(generic_iv_tbl, (st_data_t)obj, &tbl)) return 0;
-    return (st_table *)tbl;
+    return (sa_table *)tbl;
 }

 static VALUE
@@ -833,7 +821,7 @@ generic_ivar_get(VALUE obj, ID id, int warn)

     if (generic_iv_tbl) {
 	if (st_lookup(generic_iv_tbl, (st_data_t)obj, &tbl)) {
-	    if (st_lookup((st_table *)tbl, (st_data_t)id, &val)) {
+	    if (sa_lookup((sa_table *)tbl, (sa_index_t)id, &val)) {
 		return (VALUE)val;
 	    }
 	}
@@ -847,7 +835,6 @@ generic_ivar_get(VALUE obj, ID id, int warn)
 static void
 generic_ivar_set(VALUE obj, ID id, VALUE val)
 {
-    st_table *tbl;
     st_data_t data;

     if (rb_special_const_p(obj)) {
@@ -859,24 +846,22 @@ generic_ivar_set(VALUE obj, ID id, VALUE val)
     }
     if (!st_lookup(generic_iv_tbl, (st_data_t)obj, &data)) {
 	FL_SET(obj, FL_EXIVAR);
-	tbl = st_init_numtable();
-	st_add_direct(generic_iv_tbl, (st_data_t)obj, (st_data_t)tbl);
-	st_add_direct(tbl, (st_data_t)id, (st_data_t)val);
-	return;
+	data = (st_data_t)sa_new_table();
+	st_add_direct(generic_iv_tbl, (st_data_t)obj, data);
     }
-    st_insert((st_table *)data, (st_data_t)id, (st_data_t)val);
+    sa_insert((sa_table *)data, (sa_index_t)id, (st_data_t)val);
 }

 static VALUE
 generic_ivar_defined(VALUE obj, ID id)
 {
-    st_table *tbl;
+    sa_table *tbl;
     st_data_t data;

     if (!generic_iv_tbl) return Qfalse;
     if (!st_lookup(generic_iv_tbl, (st_data_t)obj, &data)) return Qfalse;
-    tbl = (st_table *)data;
-    if (st_lookup(tbl, (st_data_t)id, &data)) {
+    tbl = (sa_table *)data;
+    if (sa_lookup(tbl, (sa_index_t)id, &data)) {
 	return Qtrue;
     }
     return Qfalse;
@@ -885,18 +870,18 @@ generic_ivar_defined(VALUE obj, ID id)
 static int
 generic_ivar_remove(VALUE obj, ID id, st_data_t *valp)
 {
-    st_table *tbl;
+    sa_table *tbl;
     st_data_t data, key = (st_data_t)id;
     int status;

     if (!generic_iv_tbl) return 0;
     if (!st_lookup(generic_iv_tbl, (st_data_t)obj, &data)) return 0;
-    tbl = (st_table *)data;
-    status = st_delete(tbl, &key, valp);
+    tbl = (sa_table *)data;
+    status = sa_delete(tbl, (sa_index_t)id, valp);
     if (tbl->num_entries == 0) {
 	key = (st_data_t)obj;
 	st_delete(generic_iv_tbl, &key, &data);
-	st_free_table((st_table *)data);
+	sa_free_table(tbl);
     }
     return status;
 }
@@ -908,22 +893,17 @@ rb_mark_generic_ivar(VALUE obj)

     if (!generic_iv_tbl) return;
     if (st_lookup(generic_iv_tbl, (st_data_t)obj, &tbl)) {
-	rb_mark_tbl((st_table *)tbl);
+	rb_mark_sa_tbl((sa_table *)tbl);
     }
 }

 static int
-givar_mark_i(ID key, VALUE value)
-{
-    rb_gc_mark(value);
-    return ST_CONTINUE;
-}
-
-static int
-givar_i(VALUE obj, st_table *tbl)
+givar_i(VALUE obj, sa_table *tbl)
 {
     if (rb_special_const_p(obj)) {
-	st_foreach_safe(tbl, givar_mark_i, 0);
+        SA_FOREACH_START(tbl);
+        rb_gc_mark((VALUE)value);
+        SA_FOREACH_END();
     }
     return ST_CONTINUE;
 }
@@ -943,7 +923,7 @@ rb_free_generic_ivar(VALUE obj)

     if (!generic_iv_tbl) return;
     if (st_delete(generic_iv_tbl, &key, &tbl))
-	st_free_table((st_table *)tbl);
+	sa_free_table((sa_table *)tbl);
 }

 RUBY_FUNC_EXPORTED size_t
@@ -951,7 +931,7 @@ rb_generic_ivar_memsize(VALUE obj)
 {
     st_data_t tbl;
     if (st_lookup(generic_iv_tbl, (st_data_t)obj, &tbl))
-	return st_memsize((st_table *)tbl);
+	return sa_memsize((sa_table *)tbl);
     return 0;
 }

@@ -970,17 +950,17 @@ rb_copy_generic_ivar(VALUE clone, VALUE obj)
         return;
     }
     if (st_lookup(generic_iv_tbl, (st_data_t)obj, &data)) {
-	st_table *tbl = (st_table *)data;
+	sa_table *tbl = (sa_table *)data;

         if (tbl->num_entries == 0)
             goto clear;

 	if (st_lookup(generic_iv_tbl, (st_data_t)clone, &data)) {
-	    st_free_table((st_table *)data);
-	    st_insert(generic_iv_tbl, (st_data_t)clone, (st_data_t)st_copy(tbl));
+	    sa_free_table((sa_table *)data);
+	    st_insert(generic_iv_tbl, (st_data_t)clone, (st_data_t)sa_copy(tbl));
 	}
 	else {
-	    st_add_direct(generic_iv_tbl, (st_data_t)clone, (st_data_t)st_copy(tbl));
+	    st_add_direct(generic_iv_tbl, (st_data_t)clone, (st_data_t)sa_copy(tbl));
 	    FL_SET(clone, FL_EXIVAR);
 	}
     }
@@ -990,7 +970,7 @@ static VALUE
 ivar_get(VALUE obj, ID id, int warn)
 {
     VALUE val, *ptr;
-    struct st_table *iv_index_tbl;
+    sa_table *iv_index_tbl;
     long len;
     st_data_t index;

@@ -1000,7 +980,7 @@ ivar_get(VALUE obj, ID id, int warn)
         ptr = ROBJECT_IVPTR(obj);
         iv_index_tbl = ROBJECT_IV_INDEX_TBL(obj);
         if (!iv_index_tbl) break;
-        if (!st_lookup(iv_index_tbl, (st_data_t)id, &index)) break;
+        if (!sa_lookup(iv_index_tbl, (sa_index_t)id, &index)) break;
         if (len <= (long)index) break;
         val = ptr[index];
         if (val != Qundef)
@@ -1008,7 +988,7 @@ ivar_get(VALUE obj, ID id, int warn)
 	break;
       case T_CLASS:
       case T_MODULE:
-	if (RCLASS_IV_TBL(obj) && st_lookup(RCLASS_IV_TBL(obj), (st_data_t)id, &index))
+	if (sa_lookup(RCLASS_IV_TBL(obj), (sa_index_t)id, &index))
 	    return (VALUE)index;
 	break;
       default:
@@ -1037,7 +1017,7 @@ rb_attr_get(VALUE obj, ID id)
 VALUE
 rb_ivar_set(VALUE obj, ID id, VALUE val)
 {
-    struct st_table *iv_index_tbl;
+    struct sa_table *iv_index_tbl;
     st_data_t index;
     long i, len;
     int ivar_extended;
@@ -1051,14 +1031,11 @@ rb_ivar_set(VALUE obj, ID id, VALUE val)
         if (!iv_index_tbl) {
             VALUE klass = rb_obj_class(obj);
             iv_index_tbl = RCLASS_IV_INDEX_TBL(klass);
-            if (!iv_index_tbl) {
-                iv_index_tbl = RCLASS_IV_INDEX_TBL(klass) = st_init_numtable();
-            }
         }
         ivar_extended = 0;
-        if (!st_lookup(iv_index_tbl, (st_data_t)id, &index)) {
+        if (!sa_lookup(iv_index_tbl, (sa_index_t)id, &index)) {
             index = iv_index_tbl->num_entries;
-            st_add_direct(iv_index_tbl, (st_data_t)id, index);
+            sa_insert(iv_index_tbl, (sa_index_t)id, index);
             ivar_extended = 1;
         }
         len = ROBJECT_NUMIV(obj);
@@ -1098,8 +1075,7 @@ rb_ivar_set(VALUE obj, ID id, VALUE val)
 	break;
       case T_CLASS:
       case T_MODULE:
-	if (!RCLASS_IV_TBL(obj)) RCLASS_IV_TBL(obj) = st_init_numtable();
-	st_insert(RCLASS_IV_TBL(obj), (st_data_t)id, val);
+	sa_insert(RCLASS_IV_TBL(obj), (sa_index_t)id, val);
         break;
       default:
 	generic_ivar_set(obj, id, val);
@@ -1112,13 +1088,13 @@ VALUE
 rb_ivar_defined(VALUE obj, ID id)
 {
     VALUE val;
-    struct st_table *iv_index_tbl;
+    struct sa_table *iv_index_tbl;
     st_data_t index;
     switch (TYPE(obj)) {
       case T_OBJECT:
         iv_index_tbl = ROBJECT_IV_INDEX_TBL(obj);
         if (!iv_index_tbl) break;
-        if (!st_lookup(iv_index_tbl, (st_data_t)id, &index)) break;
+        if (!sa_lookup(iv_index_tbl, (sa_index_t)id, &index)) break;
         if (ROBJECT_NUMIV(obj) <= (long)index) break;
         val = ROBJECT_IVPTR(obj)[index];
         if (val != Qundef)
@@ -1126,7 +1102,7 @@ rb_ivar_defined(VALUE obj, ID id)
 	break;
       case T_CLASS:
       case T_MODULE:
-	if (RCLASS_IV_TBL(obj) && st_lookup(RCLASS_IV_TBL(obj), (st_data_t)id, 0))
+	if (sa_lookup(RCLASS_IV_TBL(obj), (sa_index_t)id, 0))
 	    return Qtrue;
 	break;
       default:
@@ -1137,40 +1113,26 @@ rb_ivar_defined(VALUE obj, ID id)
     return Qfalse;
 }

-struct obj_ivar_tag {
-    VALUE obj;
-    int (*func)(ID key, VALUE val, st_data_t arg);
-    st_data_t arg;
-};
-
-static int
-obj_ivar_i(st_data_t key, st_data_t index, st_data_t arg)
-{
-    struct obj_ivar_tag *data = (struct obj_ivar_tag *)arg;
-    if ((long)index < ROBJECT_NUMIV(data->obj)) {
-        VALUE val = ROBJECT_IVPTR(data->obj)[(long)index];
-        if (val != Qundef) {
-            return (data->func)((ID)key, val, data->arg);
-        }
-    }
-    return ST_CONTINUE;
-}
-
 static void
 obj_ivar_each(VALUE obj, int (*func)(ANYARGS), st_data_t arg)
 {
-    st_table *tbl;
-    struct obj_ivar_tag data;
+    sa_table *tbl;
+    long numiv = ROBJECT_NUMIV(obj);
+    VALUE *vars = ROBJECT_IVPTR(obj);

     tbl = ROBJECT_IV_INDEX_TBL(obj);
-    if (!tbl)
+    if (!tbl || !numiv)
         return;

-    data.obj = obj;
-    data.func = (int (*)(ID key, VALUE val, st_data_t arg))func;
-    data.arg = arg;
-
-    st_foreach_safe(tbl, obj_ivar_i, (st_data_t)&data);
+    SA_FOREACH_START(tbl);
+    if ((long)value < numiv) {
+	VALUE val = vars[value];
+	if (val != Qundef) {
+	    if (((sa_iter_func)func)(entry->key, val, arg) == SA_STOP)
+		break;
+	}
+    }
+    SA_FOREACH_END();
 }

 void
@@ -1182,9 +1144,7 @@ rb_ivar_foreach(VALUE obj, int (*func)(ANYARGS), st_data_t arg)
 	break;
       case T_CLASS:
       case T_MODULE:
-	if (RCLASS_IV_TBL(obj)) {
-	    st_foreach_safe(RCLASS_IV_TBL(obj), func, arg);
-	}
+        sa_foreach(RCLASS_IV_TBL(obj), func, arg);
 	break;
       default:
 	if (!generic_iv_tbl) break;
@@ -1192,7 +1152,7 @@ rb_ivar_foreach(VALUE obj, int (*func)(ANYARGS), st_data_t arg)
 	    st_data_t tbl;

 	    if (st_lookup(generic_iv_tbl, (st_data_t)obj, &tbl)) {
-		st_foreach_safe((st_table *)tbl, func, arg);
+		sa_foreach((sa_table *)tbl, func, arg);
 	    }
 	}
 	break;
@@ -1202,11 +1162,11 @@ rb_ivar_foreach(VALUE obj, int (*func)(ANYARGS), st_data_t arg)
 st_index_t
 rb_ivar_count(VALUE obj)
 {
-    st_table *tbl;
+    sa_table *tbl;
     switch (TYPE(obj)) {
       case T_OBJECT:
 	if ((tbl = ROBJECT_IV_INDEX_TBL(obj)) != 0) {
-	    st_index_t i, count, num = tbl->num_entries;
+	    sa_index_t i, count, num = tbl->num_entries;
 	    const VALUE *const ivptr = ROBJECT_IVPTR(obj);
 	    for (i = count = 0; i < num; ++i) {
 		if (ivptr[i] != Qundef) {
@@ -1228,7 +1188,7 @@ rb_ivar_count(VALUE obj)
 	    st_data_t data;

 	    if (st_lookup(generic_iv_tbl, (st_data_t)obj, &data) &&
-		(tbl = (st_table *)data) != 0) {
+		(tbl = (sa_table *)data) != 0) {
 		return tbl->num_entries;
 	    }
 	}
@@ -1238,7 +1198,7 @@ rb_ivar_count(VALUE obj)
 }

 static int
-ivar_i(ID key, VALUE val, VALUE ary)
+ivar_i(sa_index_t key, VALUE val, VALUE ary)
 {
     if (rb_is_instance_id(key)) {
 	rb_ary_push(ary, ID2SYM(key));
@@ -1301,7 +1261,7 @@ rb_obj_remove_instance_variable(VALUE obj, VALUE name)
     VALUE val = Qnil;
     const ID id = rb_to_id(name);
     st_data_t n, v;
-    struct st_table *iv_index_tbl;
+    struct sa_table *iv_index_tbl;
     st_data_t index;

     if (!OBJ_UNTRUSTED(obj) && rb_safe_level() >= 4)
@@ -1315,7 +1275,7 @@ rb_obj_remove_instance_variable(VALUE obj, VALUE name)
       case T_OBJECT:
         iv_index_tbl = ROBJECT_IV_INDEX_TBL(obj);
         if (!iv_index_tbl) break;
-        if (!st_lookup(iv_index_tbl, (st_data_t)id, &index)) break;
+        if (!sa_lookup(iv_index_tbl, (sa_index_t)id, &index)) break;
         if (ROBJECT_NUMIV(obj) <= (long)index) break;
         val = ROBJECT_IVPTR(obj)[index];
         if (val != Qundef) {
@@ -1326,14 +1286,14 @@ rb_obj_remove_instance_variable(VALUE obj, VALUE name)
       case T_CLASS:
       case T_MODULE:
 	n = id;
-	if (RCLASS_IV_TBL(obj) && st_delete(RCLASS_IV_TBL(obj), &n, &v)) {
+	if (sa_delete(RCLASS_IV_TBL(obj), (sa_index_t)id, &v)) {
 	    return (VALUE)v;
 	}
 	break;
       default:
 	if (FL_TEST(obj, FL_EXIVAR) || rb_special_const_p(obj)) {
 	    v = val;
-	    if (generic_ivar_remove(obj, (st_data_t)id, &v)) {
+	    if (generic_ivar_remove(obj, id, &v)) {
 		return (VALUE)v;
 	    }
 	}
@@ -1410,20 +1370,20 @@ rb_mod_const_missing(VALUE klass, VALUE name)
 static void
 autoload_mark(void *ptr)
 {
-    rb_mark_tbl((st_table *)ptr);
+    rb_mark_sa_tbl((sa_table *)ptr);
 }

 static void
 autoload_free(void *ptr)
 {
-    st_free_table((st_table *)ptr);
+    sa_free_table((sa_table *)ptr);
 }

 static size_t
 autoload_memsize(const void *ptr)
 {
-    const st_table *tbl = ptr;
-    return st_memsize(tbl);
+    const sa_table *tbl = ptr;
+    return sa_memsize(tbl);
 }

 static const rb_data_type_t autoload_data_type = {
@@ -1432,14 +1392,14 @@ static const rb_data_type_t autoload_data_type = {
 };

 #define check_autoload_table(av) \
-    (struct st_table *)rb_check_typeddata((av), &autoload_data_type)
+    (struct sa_table *)rb_check_typeddata((av), &autoload_data_type)

 void
 rb_autoload(VALUE mod, ID id, const char *file)
 {
     st_data_t av;
     VALUE fn;
-    struct st_table *tbl;
+    struct sa_table *tbl;

     if (!rb_is_const_id(id)) {
 	rb_raise(rb_eNameError, "autoload must be constant name: %s", rb_id2name(id));
@@ -1448,43 +1408,41 @@ rb_autoload(VALUE mod, ID id, const char *file)
 	rb_raise(rb_eArgError, "empty file name");
     }

-    if ((tbl = RCLASS_CONST_TBL(mod)) && st_lookup(tbl, (st_data_t)id, &av) && ((rb_const_entry_t*)av)->value != Qundef)
+    if (sa_lookup(RCLASS_CONST_TBL(mod), (sa_index_t)id, &av) && ((rb_const_entry_t*)av)->value != Qundef)
 	return;

     rb_const_set(mod, id, Qundef);
     tbl = RCLASS_IV_TBL(mod);
-    if (tbl && st_lookup(tbl, (st_data_t)autoload, &av)) {
+    if (sa_lookup(tbl, (sa_index_t)autoload, &av)) {
 	tbl = check_autoload_table((VALUE)av);
     }
     else {
-	if (!tbl) tbl = RCLASS_IV_TBL(mod) = st_init_numtable();
 	av = (st_data_t)TypedData_Wrap_Struct(0, &autoload_data_type, 0);
-	st_add_direct(tbl, (st_data_t)autoload, av);
-	DATA_PTR(av) = tbl = st_init_numtable();
+	sa_insert(tbl, (sa_index_t)autoload, av);
+	DATA_PTR(av) = tbl = sa_new_table();
     }
     fn = rb_str_new2(file);
     FL_UNSET(fn, FL_TAINT);
     OBJ_FREEZE(fn);
-    st_insert(tbl, (st_data_t)id, (st_data_t)rb_node_newnode(NODE_MEMO, fn, rb_safe_level(), 0));
+    sa_insert(tbl, (sa_index_t)id, (st_data_t)rb_node_newnode(NODE_MEMO, fn, rb_safe_level(), 0));
 }

 static NODE*
 autoload_delete(VALUE mod, ID id)
 {
-    st_data_t val, load = 0, n = id;
+    st_data_t val, load = 0;
     rb_const_entry_t *ce;

-    st_delete(RCLASS_CONST_TBL(mod), &n, &val);
+    sa_delete(RCLASS_CONST_TBL(mod), (sa_index_t)id, &val);
     ce = (rb_const_entry_t*)val;
     if (ce) xfree(ce);
-    if (st_lookup(RCLASS_IV_TBL(mod), (st_data_t)autoload, &val)) {
-	struct st_table *tbl = check_autoload_table((VALUE)val);
+    if (sa_lookup(RCLASS_IV_TBL(mod), (sa_index_t)autoload, &val)) {
+	struct sa_table *tbl = check_autoload_table((VALUE)val);

-	st_delete(tbl, &n, &load);
+	sa_delete(tbl, (sa_index_t)id, &load);

 	if (tbl->num_entries == 0) {
-	    n = autoload;
-	    st_delete(RCLASS_IV_TBL(mod), &n, &val);
+	    sa_delete(RCLASS_IV_TBL(mod), (sa_index_t)autoload, &val);
 	}
     }

@@ -1509,14 +1467,14 @@ static NODE *
 autoload_node(VALUE mod, ID id, const char **loadingpath)
 {
     VALUE file;
-    struct st_table *tbl;
+    struct sa_table *tbl;
     st_data_t val;
     NODE *load;
     const char *loading;
     int safe;

-    if (!st_lookup(RCLASS_IV_TBL(mod), autoload, &val) ||
-	!(tbl = check_autoload_table((VALUE)val)) || !st_lookup(tbl, (st_data_t)id, &val)) {
+    if (!sa_lookup(RCLASS_IV_TBL(mod), (sa_index_t)autoload, &val) ||
+	!(tbl = check_autoload_table((VALUE)val)) || !sa_lookup(tbl, (sa_index_t)id, &val)) {
 	return 0;
     }
     load = (NODE *)val;
@@ -1541,10 +1499,10 @@ autoload_node(VALUE mod, ID id, const char **loadingpath)
 static int
 autoload_node_id(VALUE mod, ID id)
 {
-    struct st_table *tbl = RCLASS_CONST_TBL(mod);
+    struct sa_table *tbl = RCLASS_CONST_TBL(mod);
     st_data_t val;

-    if (!tbl || !st_lookup(tbl, (st_data_t)id, &val) || ((rb_const_entry_t*)val)->value != Qundef) {
+    if (!tbl || !sa_lookup(tbl, (sa_index_t)id, &val) || ((rb_const_entry_t*)val)->value != Qundef) {
 	return 0;
     }
     return 1;
@@ -1593,7 +1551,7 @@ rb_const_get_0(VALUE klass, ID id, int exclude, int recurse, int visibility)
     while (RTEST(tmp)) {
 	VALUE am = 0;
 	st_data_t data;
-	while (RCLASS_CONST_TBL(tmp) && st_lookup(RCLASS_CONST_TBL(tmp), (st_data_t)id, &data)) {
+	while (sa_lookup(RCLASS_CONST_TBL(tmp), (sa_index_t)id, &data)) {
 	    rb_const_entry_t *ce = (rb_const_entry_t *)data;
 	    if (visibility && ce->flag == CONST_PRIVATE) {
 		rb_name_error(id, "private constant %s::%s referenced", rb_class2name(klass), rb_id2name(id));
@@ -1686,12 +1644,12 @@ VALUE
 rb_const_remove(VALUE mod, ID id)
 {
     VALUE val;
-    st_data_t v, n = id;
+    st_data_t v;

     if (!OBJ_UNTRUSTED(mod) && rb_safe_level() >= 4)
 	rb_raise(rb_eSecurityError, "Insecure: can't remove constant");
     rb_check_frozen(mod);
-    if (!RCLASS_CONST_TBL(mod) || !st_delete(RCLASS_CONST_TBL(mod), &n, &v)) {
+    if (!sa_delete(RCLASS_CONST_TBL(mod), (sa_index_t)id, &v)) {
 	if (rb_const_defined_at(mod, id)) {
 	    rb_name_error(id, "cannot remove %s::%s",
 			  rb_class2name(mod), rb_id2name(id));
@@ -1711,27 +1669,20 @@ rb_const_remove(VALUE mod, ID id)
     return val;
 }

-static int
-sv_i(ID key, rb_const_entry_t *ce, st_table *tbl)
-{
-    if (rb_is_const_id(key)) {
-	if (!st_lookup(tbl, (st_data_t)key, 0)) {
-	    st_insert(tbl, (st_data_t)key, (st_data_t)ce);
-	}
-    }
-    return ST_CONTINUE;
-}
-
 void*
 rb_mod_const_at(VALUE mod, void *data)
 {
-    st_table *tbl = data;
+    sa_table *tbl = data;
     if (!tbl) {
-	tbl = st_init_numtable();
+	tbl = sa_new_table();
     }
-    if (RCLASS_CONST_TBL(mod)) {
-	st_foreach_safe(RCLASS_CONST_TBL(mod), sv_i, (st_data_t)tbl);
+    SA_FOREACH_START(RCLASS_CONST_TBL(mod));
+    if (rb_is_const_id(entry->key)) {
+	if (!sa_lookup(tbl, entry->key, 0)) {
+	    sa_insert(tbl, entry->key, value);
+	}
     }
+    SA_FOREACH_END();
     return tbl;
 }

@@ -1748,25 +1699,21 @@ rb_mod_const_of(VALUE mod, void *data)
     return data;
 }

-static int
-list_i(st_data_t key, st_data_t value, VALUE ary)
-{
-    ID sym = (ID)key;
-    rb_const_entry_t *ce = (rb_const_entry_t *)value;
-    if (ce->flag != CONST_PRIVATE) rb_ary_push(ary, ID2SYM(sym));
-    return ST_CONTINUE;
-}
-
 VALUE
 rb_const_list(void *data)
 {
-    st_table *tbl = data;
+    sa_table *tbl = data;
     VALUE ary;

     if (!tbl) return rb_ary_new2(0);
     ary = rb_ary_new2(tbl->num_entries);
-    st_foreach_safe(tbl, list_i, ary);
-    st_free_table(tbl);
+
+    SA_FOREACH_START(tbl);
+    rb_const_entry_t *ce = (rb_const_entry_t *)value;
+    if (ce->flag != CONST_PRIVATE) rb_ary_push(ary, ID2SYM(entry->key));
+    SA_FOREACH_END();
+
+    sa_free_table(tbl);

     return ary;
 }
@@ -1790,7 +1737,7 @@ VALUE
 rb_mod_constants(int argc, VALUE *argv, VALUE mod)
 {
     VALUE inherit;
-    st_table *tbl;
+    sa_table *tbl;

     if (argc == 0) {
 	inherit = Qtrue;
@@ -1817,7 +1764,7 @@ rb_const_defined_0(VALUE klass, ID id, int exclude, int recurse, int visibility)
     tmp = klass;
   retry:
     while (tmp) {
-	if (RCLASS_CONST_TBL(tmp) && st_lookup(RCLASS_CONST_TBL(tmp), (st_data_t)id, &value)) {
+	if (sa_lookup(RCLASS_CONST_TBL(tmp), (sa_index_t)id, &value)) {
 	    rb_const_entry_t *ce = (rb_const_entry_t *)value;
 	    if (visibility && ce->flag == CONST_PRIVATE) {
 		return (int)Qfalse;
@@ -1885,6 +1832,7 @@ void
 rb_const_set(VALUE klass, ID id, VALUE val)
 {
     rb_const_entry_t *ce;
+    st_data_t value;
     VALUE visibility = CONST_PUBLIC;

     if (NIL_P(klass)) {
@@ -1893,21 +1841,14 @@ rb_const_set(VALUE klass, ID id, VALUE val)
     }

     check_before_mod_set(klass, id, val, "constant");
-    if (!RCLASS_CONST_TBL(klass)) {
-	RCLASS_CONST_TBL(klass) = st_init_numtable();
-    }
-    else {
-	st_data_t value;
-
-	if (st_lookup(RCLASS_CONST_TBL(klass), (st_data_t)id, &value)) {
-	    rb_const_entry_t *ce = (rb_const_entry_t*)value;
-	    if (ce->value == Qundef)
-		autoload_delete(klass, id);
-	    else {
-		visibility = ce->flag;
-		rb_warn("already initialized constant %s", rb_id2name(id));
-	    }
-	}
+    if (sa_lookup(RCLASS_CONST_TBL(klass), (sa_index_t)id, &value)) {
+        rb_const_entry_t *ce = (rb_const_entry_t*)value;
+        if (ce->value == Qundef)
+            autoload_delete(klass, id);
+        else {
+            visibility = ce->flag;
+            rb_warn("already initialized constant %s", rb_id2name(id));
+        }
     }

     rb_vm_change_state();
@@ -1916,7 +1857,7 @@ rb_const_set(VALUE klass, ID id, VALUE val)
     ce->flag = (rb_const_flag_t)visibility;
     ce->value = val;

-    st_insert(RCLASS_CONST_TBL(klass), (st_data_t)id, (st_data_t)ce);
+    sa_insert(RCLASS_CONST_TBL(klass), (sa_index_t)id, (st_data_t)ce);
 }

 void
@@ -1958,8 +1899,7 @@ set_const_visibility(VALUE mod, int argc, VALUE *argv, rb_const_flag_t flag)
     for (i = 0; i < argc; i++) {
 	VALUE val = argv[i];
 	id = rb_to_id(val);
-	if (RCLASS_CONST_TBL(mod) &&
-	    st_lookup(RCLASS_CONST_TBL(mod), (st_data_t)id, &v)) {
+	if (sa_lookup(RCLASS_CONST_TBL(mod), (sa_index_t)id, &v)) {
 	    ((rb_const_entry_t*)v)->flag = flag;
 	}
 	else {
@@ -2008,7 +1948,7 @@ original_module(VALUE c)
 }

 #define CVAR_LOOKUP(v,r) do {\
-    if (RCLASS_IV_TBL(klass) && st_lookup(RCLASS_IV_TBL(klass),(st_data_t)id,(v))) {\
+    if (sa_lookup(RCLASS_IV_TBL(klass),(sa_index_t)id,(v))) {\
 	r;\
     }\
     if (FL_TEST(klass, FL_SINGLETON) ) {\
@@ -2027,7 +1967,7 @@ original_module(VALUE c)
 	klass = RCLASS_SUPER(klass);\
     }\
     while (klass) {\
-	if (RCLASS_IV_TBL(klass) && st_lookup(RCLASS_IV_TBL(klass),(st_data_t)id,(v))) {\
+	if (sa_lookup(RCLASS_IV_TBL(klass),(sa_index_t)id,(v))) {\
 	    r;\
 	}\
 	klass = RCLASS_SUPER(klass);\
@@ -2043,15 +1983,13 @@ rb_cvar_set(VALUE klass, ID id, VALUE val)
     CVAR_LOOKUP(0, {if (!front) front = klass; target = klass;});
     if (target) {
 	if (front && target != front) {
-	    st_data_t did = id;
-
 	    if (RTEST(ruby_verbose)) {
 		rb_warning("class variable %s of %s is overtaken by %s",
 			   rb_id2name(id), rb_class2name(original_module(front)),
 			   rb_class2name(original_module(target)));
 	    }
 	    if (BUILTIN_TYPE(front) == T_CLASS) {
-		st_delete(RCLASS_IV_TBL(front),&did,0);
+		sa_delete(RCLASS_IV_TBL(front),(sa_index_t)id,0);
 	    }
 	}
     }
@@ -2060,11 +1998,7 @@ rb_cvar_set(VALUE klass, ID id, VALUE val)
     }

     check_before_mod_set(target, id, val, "class variable");
-    if (!RCLASS_IV_TBL(target)) {
-	RCLASS_IV_TBL(target) = st_init_numtable();
-    }
-
-    st_insert(RCLASS_IV_TBL(target), (st_data_t)id, (st_data_t)val);
+    sa_insert(RCLASS_IV_TBL(target), (sa_index_t)id, (st_data_t)val);
 }

 VALUE
@@ -2080,15 +2014,13 @@ rb_cvar_get(VALUE klass, ID id)
 		      rb_id2name(id), rb_class2name(tmp));
     }
     if (front && target != front) {
-	st_data_t did = id;
-
 	if (RTEST(ruby_verbose)) {
 	    rb_warning("class variable %s of %s is overtaken by %s",
 		       rb_id2name(id), rb_class2name(original_module(front)),
 		       rb_class2name(original_module(target)));
 	}
 	if (BUILTIN_TYPE(front) == T_CLASS) {
-	    st_delete(RCLASS_IV_TBL(front),&did,0);
+	    sa_delete(RCLASS_IV_TBL(front),(sa_index_t)id,0);
 	}
     }
     return (VALUE)value;
@@ -2134,7 +2066,7 @@ rb_define_class_variable(VALUE klass, const char *name, VALUE val)
 }

 static int
-cv_i(ID key, VALUE value, VALUE ary)
+cv_i(sa_index_t key, VALUE value, VALUE ary)
 {
     if (rb_is_class_id(key)) {
 	VALUE kval = ID2SYM(key);
@@ -2167,7 +2099,7 @@ rb_mod_class_variables(VALUE obj)
     VALUE ary = rb_ary_new();

     if (RCLASS_IV_TBL(obj)) {
-	st_foreach_safe(RCLASS_IV_TBL(obj), cv_i, ary);
+	sa_foreach(RCLASS_IV_TBL(obj), cv_i, ary);
     }
     return ary;
 }
@@ -2196,7 +2128,7 @@ VALUE
 rb_mod_remove_cvar(VALUE mod, VALUE name)
 {
     const ID id = rb_to_id(name);
-    st_data_t val, n = id;
+    st_data_t val;

     if (!rb_is_class_id(id)) {
 	rb_name_error(id, "wrong class variable name %s", rb_id2name(id));
@@ -2204,7 +2136,7 @@ rb_mod_remove_cvar(VALUE mod, VALUE name)
     if (!OBJ_UNTRUSTED(mod) && rb_safe_level() >= 4)
 	rb_raise(rb_eSecurityError, "Insecure: can't remove class variable");
     rb_check_frozen(mod);
-    if (RCLASS_IV_TBL(mod) && st_delete(RCLASS_IV_TBL(mod), &n, &val)) {
+    if (RCLASS_IV_TBL(mod) && sa_delete(RCLASS_IV_TBL(mod), (sa_index_t)id, &val)) {
 	return (VALUE)val;
     }
     if (rb_cvar_defined(mod, id)) {
diff --git a/vm.c b/vm.c
index dd026cb..e6421c6 100644
--- a/vm.c
+++ b/vm.c
@@ -1042,7 +1042,7 @@ static void
 add_opt_method(VALUE klass, ID mid, VALUE bop)
 {
     rb_method_entry_t *me;
-    if (st_lookup(RCLASS_M_TBL(klass), mid, (void *)&me) && me->def &&
+    if (sa_lookup(RCLASS_M_TBL(klass), (sa_index_t)mid, (void *)&me) && me->def &&
 	me->def->type == VM_METHOD_TYPE_CFUNC) {
 	st_insert(vm_opt_method_table, (st_data_t)me, (st_data_t)bop);
     }
diff --git a/vm_insnhelper.c b/vm_insnhelper.c
index 8e7fe94..1367cec 100644
--- a/vm_insnhelper.c
+++ b/vm_insnhelper.c
@@ -1187,7 +1187,7 @@ vm_get_ev_const(rb_thread_t *th, const rb_iseq_t *iseq,
 		st_data_t data;
 	      search_continue:
 		if (RCLASS_CONST_TBL(klass) &&
-		    st_lookup(RCLASS_CONST_TBL(klass), id, &data)) {
+		    sa_lookup(RCLASS_CONST_TBL(klass), (sa_index_t)id, &data)) {
 		    val = ((rb_const_entry_t*)data)->value;
 		    if (val == Qundef) {
 			if (am == klass) break;
@@ -1297,10 +1297,10 @@ vm_getivar(VALUE obj, ID id, IC ic)
 	    st_data_t index;
 	    long len = ROBJECT_NUMIV(obj);
 	    VALUE *ptr = ROBJECT_IVPTR(obj);
-	    struct st_table *iv_index_tbl = ROBJECT_IV_INDEX_TBL(obj);
+	    struct sa_table *iv_index_tbl = ROBJECT_IV_INDEX_TBL(obj);

 	    if (iv_index_tbl) {
-		if (st_lookup(iv_index_tbl, id, &index)) {
+		if (sa_lookup(iv_index_tbl, (sa_index_t)id, &index)) {
 		    if ((long)index < len) {
 			val = ptr[index];
 		    }
@@ -1350,9 +1350,9 @@ vm_setivar(VALUE obj, ID id, VALUE val, IC ic)
 	    }
 	}
 	else {
-	    struct st_table *iv_index_tbl = ROBJECT_IV_INDEX_TBL(obj);
+	    struct sa_table *iv_index_tbl = ROBJECT_IV_INDEX_TBL(obj);

-	    if (iv_index_tbl && st_lookup(iv_index_tbl, (st_data_t)id, &index)) {
+	    if (iv_index_tbl && sa_lookup(iv_index_tbl, (sa_index_t)id, &index)) {
 		ic->ic_class = klass;
 		ic->ic_value.index = index;
 		ic->ic_vmstat = GET_VM_STATE_VERSION();
diff --git a/vm_method.c b/vm_method.c
index 6e33603..6567440 100644
--- a/vm_method.c
+++ b/vm_method.c
@@ -2,38 +2,33 @@
  * This file is included by vm.c
  */

-#define CACHE_SIZE 0x800
-#define CACHE_MASK 0x7ff
-#define EXPR1(c,m) ((((c)>>3)^(m))&CACHE_MASK)
-
 static void rb_vm_check_redefinition_opt_method(const rb_method_entry_t *me);

 static ID object_id, respond_to_missing;
 static ID removed, singleton_removed, undefined, singleton_undefined;
 static ID added, singleton_added, attached;

-struct cache_entry {		/* method hash table. */
-    VALUE filled_version;        /* filled state version */
-    ID mid;			/* method's id */
-    VALUE klass;		/* receiver's class */
-    rb_method_entry_t *me;
-};
-
-static struct cache_entry cache[CACHE_SIZE];
 #define ruby_running (GET_VM()->running)
 /* int ruby_running = 0; */

+static int
+methods_cache_clear_callback(void *vstart, void *vend, size_t stride, void *data)
+{
+    VALUE v = (VALUE)vstart;
+    for (; v != (VALUE)vend; v += stride) {
+	switch(BUILTIN_TYPE(v)) {
+	    case T_CLASS:
+	    case T_MODULE:
+	    case T_ICLASS:
+		RCLASS(v)->cache->method_cache_version = 0;
+	}
+    }
+    return 0;
+}
 static void
 vm_clear_global_method_cache(void)
 {
-    struct cache_entry *ent, *end;
-
-    ent = cache;
-    end = ent + CACHE_SIZE;
-    while (ent < end) {
-	ent->filled_version = 0;
-	ent++;
-    }
+    rb_objspace_each_objects(methods_cache_clear_callback, NULL);
 }

 void
@@ -162,7 +157,7 @@ rb_method_entry_make(VALUE klass, ID mid, rb_method_type_t type,
 		     rb_method_definition_t *def, rb_method_flag_t noex)
 {
     rb_method_entry_t *me;
-    st_table *mtbl;
+    sa_table *mtbl;
     st_data_t data;

     if (NIL_P(klass)) {
@@ -190,7 +185,7 @@ rb_method_entry_make(VALUE klass, ID mid, rb_method_type_t type,
     mtbl = RCLASS_M_TBL(klass);

     /* check re-definition */
-    if (st_lookup(mtbl, mid, &data)) {
+    if (sa_lookup(mtbl, (sa_index_t)mid, &data)) {
 	rb_method_entry_t *old_me = (rb_method_entry_t *)data;
 	rb_method_definition_t *old_def = old_me->def;

@@ -248,7 +243,7 @@ rb_method_entry_make(VALUE klass, ID mid, rb_method_type_t type,
 	}
     }

-    st_insert(mtbl, mid, (st_data_t) me);
+    sa_insert(mtbl, (sa_index_t)mid, (st_data_t) me);

     return me;
 }
@@ -371,7 +366,7 @@ search_method(VALUE klass, ID id)
 	return 0;
     }

-    while (!st_lookup(RCLASS_M_TBL(klass), id, &body)) {
+    while (!sa_lookup(RCLASS_M_TBL(klass), (sa_index_t)id, &body)) {
 	klass = RCLASS_SUPER(klass);
 	if (!klass) {
 	    return 0;
@@ -393,20 +388,10 @@ rb_method_entry_get_without_cache(VALUE klass, ID id)
     rb_method_entry_t *me = search_method(klass, id);

     if (ruby_running) {
-	struct cache_entry *ent;
-	ent = cache + EXPR1(klass, id);
-	ent->filled_version = GET_VM_STATE_VERSION();
-	ent->klass = klass;
-
 	if (UNDEFINED_METHOD_ENTRY_P(me)) {
-	    ent->mid = id;
-	    ent->me = 0;
-	    me = 0;
-	}
-	else {
-	    ent->mid = id;
-	    ent->me = me;
+            me = 0;
 	}
+        sa_insert(&RCLASS(klass)->cache->m_cache_tbl, (sa_index_t)id, (st_data_t)me);
     }

     return me;
@@ -415,21 +400,23 @@ rb_method_entry_get_without_cache(VALUE klass, ID id)
 rb_method_entry_t *
 rb_method_entry(VALUE klass, ID id)
 {
-    struct cache_entry *ent;
+    rb_class_cache_t *cache = RCLASS(klass)->cache;
+    rb_method_entry_t *me;

-    ent = cache + EXPR1(klass, id);
-    if (ent->filled_version == GET_VM_STATE_VERSION() &&
-	ent->mid == id && ent->klass == klass) {
-	return ent->me;
+    if (cache->method_cache_version != GET_VM_STATE_VERSION()) {
+        sa_clear_no_free(&cache->m_cache_tbl);
+        cache->method_cache_version = GET_VM_STATE_VERSION();
+    }
+    else if (sa_lookup(&cache->m_cache_tbl, (sa_index_t)id, (st_data_t*)&me)) {
+        return me;
     }
-
     return rb_method_entry_get_without_cache(klass, id);
 }

 static void
 remove_method(VALUE klass, ID mid)
 {
-    st_data_t key, data;
+    st_data_t data;
     rb_method_entry_t *me = 0;

     if (klass == rb_cObject) {
@@ -443,14 +430,13 @@ remove_method(VALUE klass, ID mid)
 	rb_warn("removing `%s' may cause serious problems", rb_id2name(mid));
     }

-    if (!st_lookup(RCLASS_M_TBL(klass), mid, &data) ||
+    if (!sa_lookup(RCLASS_M_TBL(klass), (sa_index_t)mid, &data) ||
 	!(me = (rb_method_entry_t *)data) ||
 	(!me->def || me->def->type == VM_METHOD_TYPE_UNDEF)) {
 	rb_name_error(mid, "method `%s' not defined in %s",
 		      rb_id2name(mid), rb_class2name(klass));
     }
-    key = (st_data_t)mid;
-    st_delete(RCLASS_M_TBL(klass), &key, &data);
+    sa_delete(RCLASS_M_TBL(klass), (sa_index_t)mid, &data);

     rb_vm_check_redefinition_opt_method(me);
     rb_clear_cache_for_undef(klass, mid);
	diff --git a/array.c b/array.c
	index e427cb3..0efa9b1 100644
	--- a/array.c
	+++ b/array.c
	@@ -255,15 +255,24 @@ rb_ary_modify(VALUE ary)
	rb_ary_modify_check(ary);
	if (ARY_SHARED_P(ary)) {
	long len = RARRAY_LEN(ary);
	+ VALUE shared = ARY_SHARED(ary);
	if (len <= RARRAY_EMBED_LEN_MAX) {
	VALUE *ptr = ARY_HEAP_PTR(ary);
	- VALUE shared = ARY_SHARED(ary);
	FL_UNSET_SHARED(ary);
	FL_SET_EMBED(ary);
	MEMCPY(ARY_EMBED_PTR(ary), ptr, VALUE, len);
	rb_ary_decrement_share(shared);
	ARY_SET_EMBED_LEN(ary, len);
	}
	+ else if (ARY_SHARED_NUM(shared) == 1 && len > RARRAY_LEN(shared)>>1) {
	+ long shift = RARRAY_PTR(ary) - RARRAY_PTR(shared);
	+ ARY_SET_PTR(ary, RARRAY_PTR(shared));
	+ ARY_SET_CAPA(ary, RARRAY_LEN(shared));
	+ MEMMOVE(RARRAY_PTR(ary), RARRAY_PTR(ary)+shift, VALUE, len);
	+ FL_UNSET_SHARED(ary);
	+ FL_SET_EMBED(shared);
	+ rb_ary_decrement_share(shared);
	+ }
	else {
	VALUE *ptr = ALLOC_N(VALUE, len);
	MEMCPY(ptr, RARRAY_PTR(ary), VALUE, len);
	@@ -274,6 +283,38 @@ rb_ary_modify(VALUE ary)
	}
	}

	+static void
	+ary_ensure_room_for_push(VALUE ary, long add_len)
	+{
	+ long new_len = RARRAY_LEN(ary) + add_len;
	+ long capa;
	+
	+ if (ARY_SHARED_P(ary)) {
	+ if (new_len > RARRAY_EMBED_LEN_MAX) {
	+ VALUE shared = ARY_SHARED(ary);
	+ if (ARY_SHARED_NUM(shared) == 1) {
	+ if (RARRAY_PTR(ary) - RARRAY_PTR(shared) + new_len <= RARRAY_LEN(shared)) {
	+ rb_ary_modify_check(ary);
	+ }
	+ else {
	+ /* if array is shared, than it is likely it participate in push/shift pattern */
	+ rb_ary_modify(ary);
	+ capa = ARY_CAPA(ary);
	+ if (new_len > capa - (capa >> 6)) {
	+ ary_double_capa(ary, new_len);
	+ }
	+ }
	+ return;
	+ }
	+ }
	+ }
	+ rb_ary_modify(ary);
	+ capa = ARY_CAPA(ary);
	+ if (new_len > capa) {
	+ ary_double_capa(ary, new_len);
	+ }
	+}
	+
	VALUE
	rb_ary_freeze(VALUE ary)
	{
	@@ -430,8 +471,9 @@ ary_make_shared(VALUE ary)
	OBJSETUP(shared, 0, T_ARRAY);
	FL_UNSET_EMBED(shared);

	- ARY_SET_LEN((VALUE)shared, RARRAY_LEN(ary));
	+ ARY_SET_LEN((VALUE)shared, ARY_CAPA(ary));
	ARY_SET_PTR((VALUE)shared, RARRAY_PTR(ary));
	+ rb_mem_clear(RARRAY_PTR(shared) + RARRAY_LEN(ary), ARY_CAPA(ary) - RARRAY_LEN(ary));
	FL_SET_SHARED_ROOT(shared);
	ARY_SET_SHARED_NUM((VALUE)shared, 1);
	FL_SET_SHARED(ary);
	@@ -721,8 +763,6 @@ ary_take_first_or_last(int argc, VALUE *argv, VALUE ary, enum ary_take_pos_flags
	return ary_make_partial(ary, rb_cArray, offset, n);
	}

	-static VALUE rb_ary_push_1(VALUE ary, VALUE item);
	-
	/*
	* call-seq:
	* ary << obj -> ary
	@@ -739,8 +779,12 @@ static VALUE rb_ary_push_1(VALUE ary, VALUE item);
	VALUE
	rb_ary_push(VALUE ary, VALUE item)
	{
	- rb_ary_modify(ary);
	- return rb_ary_push_1(ary, item);
	+ long idx = RARRAY_LEN(ary);
	+
	+ ary_ensure_room_for_push(ary, 1);
	+ RARRAY_PTR(ary)[idx] = item;
	+ ARY_SET_LEN(ary, idx + 1);
	+ return ary;
	}

	static VALUE
	@@ -756,6 +800,18 @@ rb_ary_push_1(VALUE ary, VALUE item)
	return ary;
	}

	+static VALUE
	+rb_ary_cat(VALUE ary, const VALUE *ptr, long len)
	+{
	+ long oldlen = RARRAY_LEN(ary);
	+
	+ ary_ensure_room_for_push(ary, len);
	+copy:
	+ MEMCPY(RARRAY_PTR(ary) + oldlen, ptr, VALUE, len);
	+ ARY_SET_LEN(ary, oldlen + len);
	+ return ary;
	+}
	+
	/*
	* call-seq:
	* ary.push(obj, ... ) -> ary
	@@ -772,11 +828,7 @@ rb_ary_push_1(VALUE ary, VALUE item)
	static VALUE
	rb_ary_push_m(int argc, VALUE *argv, VALUE ary)
	{
	- rb_ary_modify(ary);
	- while (argc--) {
	- rb_ary_push_1(ary, *argv++);
	- }
	- return ary;
	+ return rb_ary_cat(ary, argv, argc);
	}

	VALUE
	@@ -904,6 +956,55 @@ rb_ary_shift_m(int argc, VALUE *argv, VALUE ary)
	return result;
	}

	+static void
	+ary_ensure_room_for_unshift(VALUE ary, int argc)
	+{
	+ long len = RARRAY_LEN(ary);
	+ long new_len = len + argc;
	+ long capa;
	+ VALUE head, sharedp;
	+
	+ if (ARY_SHARED_P(ary)) {
	+ VALUE shared = ARY_SHARED(ary);
	+ capa = RARRAY_LEN(shared);
	+ if (ARY_SHARED_NUM(shared) == 1 && capa > new_len) {
	+ head = RARRAY_PTR(ary);
	+ sharedp = RARRAY_PTR(shared);
	+ goto makeroom_if_need;
	+ }
	+ }
	+
	+ rb_ary_modify(ary);
	+ capa = ARY_CAPA(ary);
	+ if (capa - (capa >> 6) <= new_len) {
	+ ary_double_capa(ary, new_len);
	+ }
	+
	+ /* use shared array for big "queues" */
	+ if (new_len > ARY_DEFAULT_SIZE * 4) {
	+ /* make a room for unshifted items */
	+ capa = ARY_CAPA(ary);
	+ ary_make_shared(ary);
	+
	+ head = sharedp = RARRAY_PTR(ary);
	+ goto makeroom;
	+makeroom_if_need:
	+ if (head - sharedp < argc) {
	+ long room;
	+makeroom:
	+ room = capa - new_len;
	+ room -= room >> 4;
	+ MEMMOVE(sharedp + argc + room, head, VALUE, len);
	+ head = sharedp + argc + room;
	+ }
	+ ARY_SET_PTR(ary, head - argc);
	+ }
	+ else {
	+ /* sliding items */
	+ MEMMOVE(RARRAY_PTR(ary) + argc, RARRAY_PTR(ary), VALUE, len);
	+ }
	+}
	+
	/*
	* call-seq:
	* ary.unshift(obj, ...) -> ary
	@@ -919,19 +1020,16 @@ rb_ary_shift_m(int argc, VALUE *argv, VALUE ary)
	static VALUE
	rb_ary_unshift_m(int argc, VALUE *argv, VALUE ary)
	{
	- long len;
	+ long len = RARRAY_LEN(ary);

	- rb_ary_modify(ary);
	- if (argc == 0) return ary;
	- if (ARY_CAPA(ary) <= (len = RARRAY_LEN(ary)) + argc) {
	- ary_double_capa(ary, len + argc);
	+ if (argc == 0) {
	+ rb_ary_modify_check(ary);
	+ return ary;
	}

	- /* sliding items */
	- MEMMOVE(RARRAY_PTR(ary) + argc, RARRAY_PTR(ary), VALUE, len);
	+ ary_ensure_room_for_unshift(ary, argc);
	MEMCPY(RARRAY_PTR(ary), argv, VALUE, argc);
	- ARY_INCREASE_LEN(ary, argc);
	-
	+ ARY_SET_LEN(ary, len + argc);
	return ary;
	}

	@@ -1293,15 +1391,12 @@ rb_ary_splice(VALUE ary, long beg, long len, VALUE rpl)
	rpl = rb_ary_to_ary(rpl);
	rlen = RARRAY_LEN(rpl);
	}
	- rb_ary_modify(ary);
	if (beg >= RARRAY_LEN(ary)) {
	if (beg > ARY_MAX_SIZE - rlen) {
	rb_raise(rb_eIndexError, "index %ld too big", beg);
	}
	+ ary_ensure_room_for_push(ary, rlen-len); /* len is 0 or negative */
	len = beg + rlen;
	- if (len >= ARY_CAPA(ary)) {
	- ary_double_capa(ary, len);
	- }
	rb_mem_clear(RARRAY_PTR(ary) + RARRAY_LEN(ary), beg - RARRAY_LEN(ary));
	if (rlen > 0) {
	MEMCPY(RARRAY_PTR(ary) + beg, RARRAY_PTR(rpl), VALUE, rlen);
	@@ -1311,6 +1406,7 @@ rb_ary_splice(VALUE ary, long beg, long len, VALUE rpl)
	else {
	long alen;

	+ rb_ary_modify(ary);
	alen = RARRAY_LEN(ary) + rlen - len;
	if (alen >= ARY_CAPA(ary)) {
	ary_double_capa(ary, alen);
	@@ -2100,12 +2196,13 @@ rb_ary_sort_bang(VALUE ary)
	if (RARRAY_LEN(ary) > 1) {
	VALUE tmp = ary_make_substitution(ary); /* only ary refers tmp */
	struct ary_sort_data data;
	+ long len = RARRAY_LEN(ary);

	RBASIC(tmp)->klass = 0;
	data.ary = tmp;
	data.opt_methods = 0;
	data.opt_inited = 0;
	- ruby_qsort(RARRAY_PTR(tmp), RARRAY_LEN(tmp), sizeof(VALUE),
	+ ruby_qsort(RARRAY_PTR(tmp), len, sizeof(VALUE),
	rb_block_given_p()?sort_1:sort_2, &data);

	if (ARY_EMBED_P(tmp)) {
	@@ -2122,7 +2219,7 @@ rb_ary_sort_bang(VALUE ary)
	if (ARY_HEAP_PTR(ary) == ARY_HEAP_PTR(tmp)) {
	assert(!ARY_EMBED_P(ary));
	FL_UNSET_SHARED(ary);
	- ARY_SET_CAPA(ary, ARY_CAPA(tmp));
	+ ARY_SET_CAPA(ary, RARRAY_LEN(tmp));
	}
	else {
	assert(!ARY_SHARED_P(tmp));
	@@ -2137,8 +2234,8 @@ rb_ary_sort_bang(VALUE ary)
	xfree(ARY_HEAP_PTR(ary));
	}
	ARY_SET_PTR(ary, RARRAY_PTR(tmp));
	- ARY_SET_HEAP_LEN(ary, RARRAY_LEN(tmp));
	- ARY_SET_CAPA(ary, ARY_CAPA(tmp));
	+ ARY_SET_HEAP_LEN(ary, len);
	+ ARY_SET_CAPA(ary, RARRAY_LEN(tmp));
	}
	/* tmp was lost ownership for the ptr */
	FL_UNSET(tmp, FL_FREEZE);
	diff --git a/test/ruby/test_array.rb b/test/ruby/test_array.rb
	index 28d3e41..bcad2c0 100644
	--- a/test/ruby/test_array.rb
	+++ b/test/ruby/test_array.rb
	@@ -414,6 +414,18 @@ class TestArray < Test::Unit::TestCase
	a = @cls[1, 2, 3]
	a[-1, 0] = a
	assert_equal([1, 2, 1, 2, 3, 3], a)
	+
	+ a = @cls[]
	+ a[5,0] = [5]
	+ assert_equal([nil, nil, nil, nil, nil, 5], a)
	+
	+ a = @cls[1]
	+ a[1,0] = [2]
	+ assert_equal([1, 2], a)
	+
	+ a = @cls[1]
	+ a[1,1] = [2]
	+ assert_equal([1, 2], a)
	end

	def test_assoc