Skip to content

Instantly share code, notes, and snippets.

@razielgn
Created July 22, 2013 10:52
Show Gist options
  • Save razielgn/6053008 to your computer and use it in GitHub Desktop.
Save razielgn/6053008 to your computer and use it in GitHub Desktop.
Broken attempt at adding symbols encoding to Rubinius.
diff --git a/vm/builtin/symbol.cpp b/vm/builtin/symbol.cpp
index 0af6a3f..0a1cf37 100644
--- a/vm/builtin/symbol.cpp
+++ b/vm/builtin/symbol.cpp
@@ -72,13 +72,11 @@ namespace rubinius {
}
Encoding* Symbol::encoding(STATE) {
- // TODO
- return Encoding::usascii_encoding(state);
+ return state->shared().symbols.encoding(state, this);
}
Encoding* Symbol::encoding(STATE, Encoding* enc) {
- // TODO
- return enc;
+ return state->shared().symbols.encoding(state, this, enc);
}
void Symbol::Info::show(STATE, Object* self, int level) {
diff --git a/vm/symboltable.cpp b/vm/symboltable.cpp
index 2386620..08e4566 100644
--- a/vm/symboltable.cpp
+++ b/vm/symboltable.cpp
@@ -50,11 +50,23 @@ namespace rubinius {
return kinds[sym->index()];
}
- size_t SymbolTable::add(std::string str) {
+ Encoding* SymbolTable::encoding(STATE, const Symbol*sym) {
+ return encodings[sym->index()];
+ }
+
+ Encoding* SymbolTable::encoding(STATE, const Symbol* sym, Encoding* enc) {
+ encodings[sym->index()] = enc;
+
+ return enc;
+ }
+
+ size_t SymbolTable::add(std::string str, Encoding *enc) {
bytes_used_ += (str.size() + sizeof(str));
strings.push_back(str);
kinds.push_back(detect_kind(str.data(), str.size()));
+ encodings.push_back(enc);
+
return strings.size() - 1;
}
@@ -64,7 +76,9 @@ namespace rubinius {
return NULL;
}
- return lookup(str, length, state->hash_seed());
+ Encoding* enc = Encoding::usascii_encoding(state);
+
+ return lookup(str, length, state->hash_seed(), enc);
}
struct SpecialOperator {
@@ -99,14 +113,16 @@ namespace rubinius {
}
Symbol* SymbolTable::lookup(SharedState* shared, const std::string& str) {
- return lookup(str.data(), str.size(), shared->hash_seed);
+ Encoding *enc = shared->globals.usascii_encoding.get();
+ return lookup(str.data(), str.size(), shared->hash_seed, enc);
}
Symbol* SymbolTable::lookup(STATE, const std::string& str) {
- return lookup(str.data(), str.size(), state->hash_seed());
+ Encoding* enc = Encoding::usascii_encoding(state);
+ return lookup(str.data(), str.size(), state->hash_seed(), enc);
}
- Symbol* SymbolTable::lookup(const char* str, size_t length, uint32_t seed) {
+ Symbol* SymbolTable::lookup(const char* str, size_t length, uint32_t seed, Encoding* enc) {
size_t sym;
if(const char* op = find_special(str, length)) {
@@ -122,17 +138,18 @@ namespace rubinius {
utilities::thread::SpinLock::LockGuard guard(lock_);
SymbolMap::iterator entry = symbols.find(hash);
if(entry == symbols.end()) {
- sym = add(std::string(str, length));
+ sym = add(std::string(str, length), enc);
SymbolIds v(1, sym);
symbols[hash] = v;
} else {
SymbolIds& v = entry->second;
for(SymbolIds::const_iterator i = v.begin(); i != v.end(); ++i) {
std::string& s = strings[*i];
+ Encoding* e = encodings[*i];
- if(!strncmp(s.data(), str, length)) return Symbol::from_index(*i);
+ if(!strncmp(s.data(), str, length) && enc == e) return Symbol::from_index(*i);
}
- sym = add(std::string(str, length));
+ sym = add(std::string(str, length), enc);
v.push_back(sym);
}
}
@@ -150,8 +167,14 @@ namespace rubinius {
// use byte_address() here.
const char* bytes = (const char*) str->byte_address();
size_t size = str->byte_size();
+ Encoding* enc = str->encoding(state);
if(LANGUAGE_18_ENABLED) {
+ if(size == 0) {
+ Exception::argument_error(state, "Cannot create a symbol from an empty string");
+ return NULL;
+ }
+
for(size_t i = 0; i < size; i++) {
if(bytes[i] == 0) {
Exception::argument_error(state,
@@ -161,7 +184,7 @@ namespace rubinius {
}
}
- return lookup(state, bytes, size);
+ return lookup(bytes, size, state->hash_seed(), enc);
}
String* SymbolTable::lookup_string(STATE, const Symbol* sym) {
@@ -175,8 +198,10 @@ namespace rubinius {
return NULL;
}
std::string& str = strings[sym_index];
+ Encoding* enc = encodings[sym_index];
+
String* s = String::create(state, str.data(), str.size());
- s->encoding(state, Encoding::usascii_encoding(state));
+ s->encoding(state, enc);
return s;
}
diff --git a/vm/symboltable.hpp b/vm/symboltable.hpp
index 16d41fa..aad76f3 100644
--- a/vm/symboltable.hpp
+++ b/vm/symboltable.hpp
@@ -31,6 +31,7 @@ namespace rubinius {
class String;
class Symbol;
class SharedState;
+ class Encoding;
typedef std::vector<std::string> SymbolStrings;
typedef std::vector<std::size_t> SymbolIds;
@@ -52,15 +53,17 @@ namespace rubinius {
};
typedef std::vector<Kind> SymbolKinds;
+ typedef std::vector<Encoding*> SymbolEncodings;
private:
SymbolMap symbols;
SymbolStrings strings;
SymbolKinds kinds;
+ SymbolEncodings encodings;
utilities::thread::SpinLock lock_;
size_t bytes_used_;
- Symbol* lookup(const char* str, size_t length, uint32_t seed);
+ Symbol* lookup(const char* str, size_t length, uint32_t seed, Encoding* enc);
public:
@@ -88,9 +91,12 @@ namespace rubinius {
Kind kind(STATE, const Symbol* sym) const;
+ Encoding* encoding(STATE, const Symbol* sym);
+ Encoding* encoding(STATE, const Symbol* sym, Encoding* enc);
+
size_t byte_size() const;
- size_t add(std::string str);
+ size_t add(std::string str, Encoding* enc);
Kind detect_kind(const char* str, size_t size) const;
};
};
@razielgn
Copy link
Author

At some point during boot, symbols that were added with a particular encoding, are looked up with another one.

Then it crashes like this:

Type Error detected:
  Tried to use non-reference value 0x1a as type Array (6)
/home/razielgn/rubinius/staging/bin/rbx(_ZN8rubinius11VMExceptionC2EPKcb+0x58) [0x5c5a18]
/home/razielgn/rubinius/staging/bin/rbx() [0x5c75a5]
/home/razielgn/rubinius/staging/bin/rbx() [0x5c5ceb]
/home/razielgn/rubinius/staging        /bin/rbx(_ZN8rubinius9TypeError5raiseENS_11object_typeEPNS_6ObjectEPKc+0x37) [0x5c4db7]
/home/razielgn/rubinius/staging/bin/rbx() [0x79f2c5]
/home/razielgn/rubinius/staging/bin/rbx(_ZN8rubinius8Encoding13encoding_listEPNS_5StateE+0x5a) [0x79dfaa]
/home/razielgn/rubinius/staging/bin/rbx(_ZN8rubinius8Encoding6defineEPNS_5StateEPKcP18OnigEncodingTypeSTPNS_6ObjectE+0x69) [0x79b699]
/home/razielgn/rubinius/staging/bin/rbx(_ZN8rubinius8Encoding4initEPNS_5StateE+0x570) [0x799c60]
/home/razielgn/rubinius/staging/bin/rbx(_ZN8rubinius2VM26initialize_builtin_classesEPNS_5StateE+0x457) [0x71a067]
/home/razielgn/rubinius/staging/bin/rbx(_ZN8rubinius2VM18bootstrap_ontologyEPNS_5StateE+0x83) [0x71a123]
/home/razielgn/rubinius/staging/bin/rbx(_ZN8rubinius2VM18initialize_as_rootEv+0x11b) [0x77d9ab]
/home/razielgn/rubinius/staging/bin/rbx(_ZN8rubinius11Environment7boot_vmEv+0x21) [0x5bdf71]
/home/razielgn/rubinius/staging/bin/rbx(_ZN8rubinius11Environment19run_from_filesystemEv+0xda) [0x5bfd3a]
/home/razielgn/rubinius/staging/bin/rbx(main+0x244) [0x898754]
/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf5) [0x60ddea5]
/home/razielgn/rubinius/staging/bin/rbx() [0x58b2d9]
Ruby backtrace:
Abort!
/home/razielgn/rubinius/staging/bin/rbx(_ZN8rubinius5abortEv+0x3d) [0x5c519d]
/home/razielgn/rubinius/staging/bin/rbx() [0x77eb71]
/home/razielgn/rubinius/staging/bin/rbx(main+0x10da) [0x8995ea]
/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf5) [0x60ddea5]
/home/razielgn/rubinius/staging/bin/rbx() [0x58b2d9]

Would love some feedback but I'm totally accepting something like "It's totally wrong, stop trying to write C++ please.". 😃

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment