Skip to content

Instantly share code, notes, and snippets.

@jeremy
Created December 15, 2008 23:37
Show Gist options
  • Save jeremy/36170 to your computer and use it in GitHub Desktop.
Save jeremy/36170 to your computer and use it in GitHub Desktop.
Index: intern.h
===================================================================
--- intern.h (revision 20354)
+++ intern.h (working copy)
@@ -269,6 +269,15 @@
VALUE rb_gc_enable _((void));
VALUE rb_gc_disable _((void));
VALUE rb_gc_start _((void));
+VALUE rb_gc_enable_stats _((void));
+VALUE rb_gc_disable_stats _((void));
+VALUE rb_gc_allocated_size _((void));
+unsigned long rb_os_live_objects _((void));
+#ifdef HAVE_LONG_LONG
+unsigned long long rb_os_allocated_objects _((void));
+#else
+unsigned long rb_os_allocated_objects _((void));
+#endif
/* hash.c */
void st_foreach_safe _((struct st_table *, int (*)(ANYARGS), unsigned long));
void rb_hash_foreach _((VALUE, int (*)(ANYARGS), VALUE));
Index: gc.c
===================================================================
--- gc.c (revision 20354)
+++ gc.c (working copy)
@@ -32,8 +32,16 @@
#define EXIT_FAILURE 1
#endif
+#ifdef _WIN32
+#include <string.h>
+#else
+#include <strings.h>
+#endif
+
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
+#elif defined(_WIN32)
+#include <time.h>
#endif
#ifdef HAVE_SYS_RESOURCE_H
@@ -52,7 +60,6 @@
#ifdef __CYGWIN__
int _setjmp(), _longjmp();
#endif
-
/* Make alloca work the best possible way. */
#ifdef __GNUC__
# ifndef atarist
@@ -90,6 +97,20 @@
int ruby_gc_stress = 0;
+static unsigned long live_objects = 0;
+unsigned long rb_os_live_objects()
+{ return live_objects; }
+
+#if defined(HAVE_LONG_LONG)
+static unsigned long long allocated_objects = 0;
+unsigned long long rb_os_allocated_objects()
+{ return allocated_objects; }
+#else
+static unsigned long allocated_objects = 0;
+unsigned long rb_os_allocated_objects()
+{ return allocated_objects; }
+#endif
+
NORETURN(void rb_exc_jump _((VALUE)));
void
@@ -144,6 +165,10 @@
return bool;
}
+long gc_allocated_size = 0;
+long gc_num_allocations = 0;
+static int gc_statistics = 0;
+
void *
ruby_xmalloc(size)
long size;
@@ -166,6 +191,10 @@
rb_memerror();
}
}
+ if (gc_statistics) {
+ gc_allocated_size += size;
+ gc_num_allocations += 1;
+ }
malloc_increase += size;
return mem;
@@ -217,8 +246,16 @@
RUBY_CRITICAL(free(x));
}
+#if HAVE_LONG_LONG
+#define GC_TIME_TYPE LONG_LONG
+#else
+#define GC_TIME_TYPE long
+#endif
+
extern int ruby_in_compile;
static int dont_gc;
+static GC_TIME_TYPE gc_time = 0;
+static int gc_collections = 0;
static int during_gc;
static int need_call_final = 0;
static st_table *finalizer_table = 0;
@@ -253,7 +290,7 @@
* Disables garbage collection, returning <code>true</code> if garbage
* collection was already disabled.
*
- * GC.disable #=> false
+ * GC.disable #=> false or true
* GC.disable #=> true
*
*/
@@ -267,6 +304,140 @@
return old;
}
+/*
+ * call-seq:
+ * GC.enable_stats => true or false
+ *
+ * Enables garbage collection statistics, returning <code>true</code> if garbage
+ * collection statistics was already enabled.
+ *
+ * GC.enable_stats #=> false or true
+ * GC.enable_stats #=> true
+ *
+ */
+
+VALUE
+rb_gc_enable_stats()
+{
+ int old = gc_statistics;
+ gc_statistics = Qtrue;
+ return old;
+}
+
+/*
+ * call-seq:
+ * GC.disable_stats => true or false
+ *
+ * Disables garbage collection statistics, returning <code>true</code> if garbage
+ * collection statistics was already disabled.
+ *
+ * GC.disable_stats #=> false or true
+ * GC.disable_stats #=> true
+ *
+ */
+
+VALUE
+rb_gc_disable_stats()
+{
+ int old = gc_statistics;
+ gc_statistics = Qfalse;
+ gc_allocated_size = 0;
+ gc_num_allocations = 0;
+ return old;
+}
+
+/*
+ * call-seq:
+ * GC.clear_stats => nil
+ *
+ * Clears garbage collection statistics, returning nil. This resets the number
+ * of collections (GC.collections) and the time used (GC.time) to 0.
+ *
+ * GC.clear_stats #=> nil
+ *
+ */
+
+VALUE
+rb_gc_clear_stats()
+{
+ gc_collections = 0;
+ gc_time = 0;
+ gc_allocated_size = 0;
+ gc_num_allocations = 0;
+ return Qnil;
+}
+
+/*
+ * call-seq:
+ * GC.allocated_size => Integer
+ *
+ * Returns the size of memory (in bytes) allocated since GC statistics collection
+ * was enabled.
+ *
+ * GC.allocated_size #=> 35
+ *
+ */
+VALUE
+rb_gc_allocated_size()
+{
+ return INT2NUM(gc_allocated_size);
+}
+
+/*
+ * call-seq:
+ * GC.num_allocations => Integer
+ *
+ * Returns the number of memory allocations since GC statistics collection
+ * was enabled.
+ *
+ * GC.num_allocations #=> 150
+ *
+ */
+VALUE
+rb_gc_num_allocations()
+{
+ return INT2NUM(gc_num_allocations);
+}
+
+/*
+ * call-seq:
+ * GC.collections => Integer
+ *
+ * Returns the number of garbage collections performed while GC statistics collection
+ * was enabled.
+ *
+ * GC.collections #=> 35
+ *
+ */
+
+VALUE
+rb_gc_collections()
+{
+ return INT2NUM(gc_collections);
+}
+
+/*
+ * call-seq:
+ * GC.time => Integer
+ *
+ * Returns the time spent during garbage collection while GC statistics collection
+ * was enabled (in micro seconds).
+ *
+ * GC.time #=> 20000
+ *
+ */
+
+VALUE
+rb_gc_time()
+{
+#if HAVE_LONG_LONG
+ return LL2NUM(gc_time);
+#else
+ return LONG2NUM(gc_time);
+#endif
+}
+
+
VALUE rb_mGC;
static struct gc_list {
@@ -358,7 +529,7 @@
static RVALUE *freelist = 0;
static RVALUE *deferred_final_list = 0;
-#define HEAPS_INCREMENT 10
+static int heaps_increment = 10;
static struct heaps_slot {
void *membase;
RVALUE *slot;
@@ -367,13 +538,165 @@
static int heaps_length = 0;
static int heaps_used = 0;
-#define HEAP_MIN_SLOTS 10000
-static int heap_slots = HEAP_MIN_SLOTS;
+static int heap_min_slots = 10000;
+static int heap_slots = 10000;
-#define FREE_MIN 4096
+static int heap_free_min = 4096;
+static int heap_slots_increment = 10000;
+static double heap_slots_growth_factor = 1.8;
+static long initial_malloc_limit = GC_MALLOC_LIMIT;
+
+static int verbose_gc_stats = Qfalse;
+
+static FILE* gc_data_file = NULL;
+
static RVALUE *himem, *lomem;
+static void set_gc_parameters()
+{
+ char *gc_stats_ptr, *min_slots_ptr, *free_min_ptr, *heap_slots_incr_ptr,
+ *heap_incr_ptr, *malloc_limit_ptr, *gc_heap_file_ptr, *heap_slots_growth_factor_ptr;
+
+ gc_data_file = stderr;
+
+ gc_stats_ptr = getenv("RUBY_GC_STATS");
+ if (gc_stats_ptr != NULL) {
+ int gc_stats_i = atoi(gc_stats_ptr);
+ if (gc_stats_i > 0) {
+ verbose_gc_stats = Qtrue;
+ }
+ }
+
+ gc_heap_file_ptr = getenv("RUBY_GC_DATA_FILE");
+ if (gc_heap_file_ptr != NULL) {
+ FILE* data_file = fopen(gc_heap_file_ptr, "w");
+ if (data_file != NULL) {
+ gc_data_file = data_file;
+ }
+ else {
+ fprintf(stderr,
+ "can't open gc log file %s for writing, using default\n", gc_heap_file_ptr);
+ }
+ }
+
+ min_slots_ptr = getenv("RUBY_HEAP_MIN_SLOTS");
+ if (min_slots_ptr != NULL) {
+ int min_slots_i = atoi(min_slots_ptr);
+ if (verbose_gc_stats) {
+ fprintf(gc_data_file, "RUBY_HEAP_MIN_SLOTS=%s\n", min_slots_ptr);
+ }
+ if (min_slots_i > 0) {
+ heap_slots = min_slots_i;
+ heap_min_slots = min_slots_i;
+ }
+ }
+
+ free_min_ptr = getenv("RUBY_HEAP_FREE_MIN");
+ if (free_min_ptr != NULL) {
+ int free_min_i = atoi(free_min_ptr);
+ if (verbose_gc_stats) {
+ fprintf(gc_data_file, "RUBY_HEAP_FREE_MIN=%s\n", free_min_ptr);
+ }
+ if (free_min_i > 0) {
+ heap_free_min = free_min_i;
+ }
+ }
+
+ heap_incr_ptr = getenv("RUBY_HEAP_INCREMENT");
+ if (heap_incr_ptr != NULL) {
+ int heap_incr_i = atoi(heap_incr_ptr);
+ if (verbose_gc_stats) {
+ fprintf(gc_data_file, "RUBY_HEAP_INCREMENT=%s\n", heap_incr_ptr);
+ }
+ if (heap_incr_i > 0) {
+ heaps_increment = heap_incr_i;
+ }
+ }
+
+ heap_slots_incr_ptr = getenv("RUBY_HEAP_SLOTS_INCREMENT");
+ if (heap_slots_incr_ptr != NULL) {
+ int heap_slots_incr_i = atoi(heap_slots_incr_ptr);
+ if (verbose_gc_stats) {
+ fprintf(gc_data_file, "RUBY_HEAP_SLOTS_INCREMENT=%s\n", heap_slots_incr_ptr);
+ }
+ if (heap_slots_incr_i > 0) {
+ heap_slots_increment = heap_slots_incr_i;
+ }
+ }
+
+ heap_slots_growth_factor_ptr = getenv("RUBY_HEAP_SLOTS_GROWTH_FACTOR");
+ if (heap_slots_growth_factor_ptr != NULL) {
+ double heap_slots_growth_factor_d = atoi(heap_slots_growth_factor_ptr);
+ if (verbose_gc_stats) {
+ fprintf(gc_data_file, "RUBY_HEAP_SLOTS_GROWTH_FACTOR=%s\n", heap_slots_growth_factor_ptr);
+ }
+ if (heap_slots_growth_factor_d > 0) {
+ heap_slots_growth_factor = heap_slots_growth_factor_d;
+ }
+ }
+
+ malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
+ if (malloc_limit_ptr != NULL) {
+ int malloc_limit_i = atol(malloc_limit_ptr);
+ if (verbose_gc_stats) {
+ fprintf(gc_data_file, "RUBY_GC_MALLOC_LIMIT=%s\n", malloc_limit_ptr);
+ }
+ if (malloc_limit_i > 0) {
+ initial_malloc_limit = malloc_limit_i;
+ }
+ }
+}
+
+/*
+ * call-seq:
+ * GC.dump => nil
+ *
+ * dumps information about the current GC data structures to the GC log file
+ *
+ * GC.dump #=> nil
+ *
+ */
+
+VALUE
+rb_gc_dump()
+{
+ int i;
+
+ for (i = 0; i < heaps_used; i++) {
+ int heap_size = heaps[i].limit;
+ fprintf(gc_data_file, "HEAP[%2d]: size=%7d\n", i, heap_size);
+ }
+
+ return Qnil;
+}
+
+/*
+ * call-seq:
+ * GC.log String => String
+ *
+ * Logs string to the GC data file and returns it.
+ *
+ * GC.log "manual GC call" #=> "manual GC call"
+ *
+ */
+
+VALUE
+rb_gc_log(self, original_str)
+ VALUE self, original_str;
+{
+ if (original_str == Qnil) {
+ fprintf(gc_data_file, "\n");
+ }
+ else {
+ VALUE str = StringValue(original_str);
+ char *p = RSTRING(str)->ptr;
+ fprintf(gc_data_file, "%s\n", p);
+ }
+ return original_str;
+}
+
+
static void
add_heap()
{
@@ -384,7 +707,7 @@
struct heaps_slot *p;
int length;
- heaps_length += HEAPS_INCREMENT;
+ heaps_length += heaps_increment;
length = heaps_length*sizeof(struct heaps_slot);
RUBY_CRITICAL(
if (heaps_used > 0) {
@@ -400,10 +723,10 @@
for (;;) {
RUBY_CRITICAL(p = (RVALUE*)malloc(sizeof(RVALUE)*(heap_slots+1)));
if (p == 0) {
- if (heap_slots == HEAP_MIN_SLOTS) {
+ if (heap_slots == heap_min_slots) {
rb_memerror();
}
- heap_slots = HEAP_MIN_SLOTS;
+ heap_slots = heap_min_slots;
continue;
}
heaps[heaps_used].membase = p;
@@ -419,8 +742,9 @@
if (lomem == 0 || lomem > p) lomem = p;
if (himem < pend) himem = pend;
heaps_used++;
- heap_slots *= 1.8;
- if (heap_slots <= 0) heap_slots = HEAP_MIN_SLOTS;
+ heap_slots += heap_slots_increment;
+ heap_slots_increment *= heap_slots_growth_factor;
+ if (heap_slots <= 0) heap_slots = heap_min_slots;
while (p < pend) {
p->as.free.flags = 0;
@@ -454,6 +778,8 @@
RANY(obj)->file = ruby_sourcefile;
RANY(obj)->line = ruby_sourceline;
#endif
+ live_objects++;
+ allocated_objects++;
return obj;
}
@@ -1115,6 +1441,39 @@
}
}
+static char* obj_type(int tp)
+{
+ switch (tp) {
+ case T_NIL : return "NIL";
+ case T_OBJECT : return "OBJECT";
+ case T_CLASS : return "CLASS";
+ case T_ICLASS : return "ICLASS";
+ case T_MODULE : return "MODULE";
+ case T_FLOAT : return "FLOAT";
+ case T_STRING : return "STRING";
+ case T_REGEXP : return "REGEXP";
+ case T_ARRAY : return "ARRAY";
+ case T_FIXNUM : return "FIXNUM";
+ case T_HASH : return "HASH";
+ case T_STRUCT : return "STRUCT";
+ case T_BIGNUM : return "BIGNUM";
+ case T_FILE : return "FILE";
+
+ case T_TRUE : return "TRUE";
+ case T_FALSE : return "FALSE";
+ case T_DATA : return "DATA";
+ case T_MATCH : return "MATCH";
+ case T_SYMBOL : return "SYMBOL";
+
+ case T_BLKTAG : return "BLKTAG";
+ case T_UNDEF : return "UNDEF";
+ case T_VARMAP : return "VARMAP";
+ case T_SCOPE : return "SCOPE";
+ case T_NODE : return "NODE";
+ default: return "____";
+ }
+}
+
static void
free_unused_heaps()
{
@@ -1144,14 +1503,24 @@
int i;
unsigned long live = 0;
unsigned long free_min = 0;
+ live_objects = 0;
+ unsigned long really_freed = 0;
+ int free_counts[256];
+ int live_counts[256];
+ int do_gc_stats = gc_statistics & verbose_gc_stats;
+
for (i = 0; i < heaps_used; i++) {
free_min += heaps[i].limit;
}
free_min = free_min * 0.2;
- if (free_min < FREE_MIN)
- free_min = FREE_MIN;
+ if (free_min < heap_free_min)
+ free_min = heap_free_min;
+ if (do_gc_stats) {
+ for (i = 0 ; i< 256; i++) { free_counts[i] = live_counts[i] = 0; }
+ }
+
if (ruby_in_compile && ruby_parser_stack_on_heap()) {
/* should not reclaim nodes during compilation
if yacc's semantic stack is not allocated on machine stack */
@@ -1188,12 +1557,20 @@
if (!deferred) {
p->as.free.flags = T_ZOMBIE;
RDATA(p)->dfree = 0;
+ } else if (do_gc_stats) {
+ really_freed++;
}
p->as.free.flags |= FL_MARK;
p->as.free.next = final_list;
final_list = p;
}
else {
+ if (do_gc_stats) {
+ int obt = p->as.basic.flags & T_MASK;
+ if (obt) {
+ free_counts[obt]++;
+ }
+ }
add_freelist(p);
}
n++;
@@ -1204,7 +1581,10 @@
}
else {
RBASIC(p)->flags &= ~FL_MARK;
- live++;
+ live_objects++;
+ if (do_gc_stats) {
+ live_counts[RANY((VALUE)p)->as.basic.flags & T_MASK]++;
+ }
}
p++;
}
@@ -1222,8 +1602,8 @@
}
}
if (malloc_increase > malloc_limit) {
- malloc_limit += (malloc_increase - malloc_limit) * (double)live / (live + freed);
- if (malloc_limit < GC_MALLOC_LIMIT) malloc_limit = GC_MALLOC_LIMIT;
+ malloc_limit += (malloc_increase - malloc_limit) * (double)live_objects / (live_objects + freed);
+ if (malloc_limit < initial_malloc_limit) malloc_limit = initial_malloc_limit;
}
malloc_increase = 0;
if (freed < free_min) {
@@ -1231,6 +1611,20 @@
}
during_gc = 0;
+ if (do_gc_stats) {
+ fprintf(gc_data_file, "objects processed: %.7d\n", live+freed);
+ fprintf(gc_data_file, "live objects : %.7d\n", live);
+ fprintf(gc_data_file, "freelist objects : %.7d\n", freed - really_freed);
+ fprintf(gc_data_file, "freed objects : %.7d\n", really_freed);
+ for(i=0; i<256; i++) {
+ if (free_counts[i]>0) {
+ fprintf(gc_data_file,
+ "kept %.7d / freed %.7d objects of type %s\n",
+ live_counts[i], free_counts[i], obj_type(i));
+ }
+ }
+ }
+
/* clear finalization list */
if (final_list) {
deferred_final_list = final_list;
@@ -1434,6 +1828,7 @@
struct gc_list *list;
struct FRAME * volatile frame; /* gcc 2.7.2.3 -O2 bug?? */
jmp_buf save_regs_gc_mark;
+ struct timeval gctv1, gctv2;
SET_STACK_END;
#ifdef HAVE_NATIVETHREAD
@@ -1450,6 +1845,14 @@
if (during_gc) return;
during_gc++;
+ if (gc_statistics) {
+ gc_collections++;
+ gettimeofday(&gctv1, NULL);
+ if (verbose_gc_stats) {
+ fprintf(gc_data_file, "Garbage collection started\n");
+ }
+ }
+
init_mark_stack();
gc_mark((VALUE)ruby_current_node, 0);
@@ -1525,6 +1928,17 @@
} while (!MARK_STACK_EMPTY);
gc_sweep();
+
+ if (gc_statistics) {
+ GC_TIME_TYPE musecs_used;
+ gettimeofday(&gctv2, NULL);
+ musecs_used = ((GC_TIME_TYPE)(gctv2.tv_sec - gctv1.tv_sec) * 1000000) + (gctv2.tv_usec - gctv1.tv_usec);
+ gc_time += musecs_used;
+
+ if (verbose_gc_stats) {
+ fprintf(gc_data_file, "GC time: %d msec\n", musecs_used / 1000);
+ }
+ }
}
void
@@ -1706,6 +2120,7 @@
if (!rb_gc_stack_start) {
Init_stack(0);
}
+ set_gc_parameters();
add_heap();
}
@@ -2139,6 +2554,35 @@
return (VALUE)((long)obj|FIXNUM_FLAG);
}
+/* call-seq:
+ * ObjectSpace.live_objects => number
+ *
+ * Returns the count of objects currently allocated in the system. This goes
+ * down after the garbage collector runs.
+ */
+static
+VALUE os_live_objects(VALUE self)
+{ return ULONG2NUM(live_objects); }
+
+/* call-seq:
+ * ObjectSpace.allocated_objects => number
+ *
+ * Returns the count of objects allocated since the Ruby interpreter has
+ * started. This number can only increase. To know how many objects are
+ * currently allocated, use ObjectSpace::live_objects
+ */
+static
+VALUE os_allocated_objects(VALUE self)
+{
+#if defined(HAVE_LONG_LONG)
+ return ULL2NUM(allocated_objects);
+#else
+ return ULONG2NUM(allocated_objects);
+#endif
+}
+
+
+
/*
* The <code>GC</code> module provides an interface to Ruby's mark and
* sweep garbage collection mechanism. Some of the underlying methods
@@ -2158,6 +2602,16 @@
rb_define_singleton_method(rb_mGC, "stress=", gc_stress_set, 1);
rb_define_method(rb_mGC, "garbage_collect", rb_gc_start, 0);
+ rb_define_singleton_method(rb_mGC, "enable_stats", rb_gc_enable_stats, 0);
+ rb_define_singleton_method(rb_mGC, "disable_stats", rb_gc_disable_stats, 0);
+ rb_define_singleton_method(rb_mGC, "clear_stats", rb_gc_clear_stats, 0);
+ rb_define_singleton_method(rb_mGC, "allocated_size", rb_gc_allocated_size, 0);
+ rb_define_singleton_method(rb_mGC, "num_allocations", rb_gc_num_allocations, 0);
+ rb_define_singleton_method(rb_mGC, "collections", rb_gc_collections, 0);
+ rb_define_singleton_method(rb_mGC, "time", rb_gc_time, 0);
+ rb_define_singleton_method(rb_mGC, "dump", rb_gc_dump, 0);
+ rb_define_singleton_method(rb_mGC, "log", rb_gc_log, 1);
+
rb_mObSpace = rb_define_module("ObjectSpace");
rb_define_module_function(rb_mObSpace, "each_object", os_each_obj, -1);
rb_define_module_function(rb_mObSpace, "garbage_collect", rb_gc_start, 0);
@@ -2165,6 +2619,8 @@
rb_define_module_function(rb_mObSpace, "remove_finalizer", rm_final, 1);
rb_define_module_function(rb_mObSpace, "finalizers", finals, 0);
rb_define_module_function(rb_mObSpace, "call_finalizer", call_final, 1);
+ rb_define_module_function(rb_mObSpace, "live_objects", os_live_objects, 0);
+ rb_define_module_function(rb_mObSpace, "allocated_objects", os_allocated_objects, 0);
rb_define_module_function(rb_mObSpace, "define_finalizer", define_final, -1);
rb_define_module_function(rb_mObSpace, "undefine_finalizer", undefine_final, 1);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment