Created
July 22, 2013 10:52
-
-
Save razielgn/6053008 to your computer and use it in GitHub Desktop.
Broken attempt at adding symbols encoding to Rubinius.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/vm/builtin/symbol.cpp b/vm/builtin/symbol.cpp | |
index 0af6a3f..0a1cf37 100644 | |
--- a/vm/builtin/symbol.cpp | |
+++ b/vm/builtin/symbol.cpp | |
@@ -72,13 +72,11 @@ namespace rubinius { | |
} | |
Encoding* Symbol::encoding(STATE) { | |
- // TODO | |
- return Encoding::usascii_encoding(state); | |
+ return state->shared().symbols.encoding(state, this); | |
} | |
Encoding* Symbol::encoding(STATE, Encoding* enc) { | |
- // TODO | |
- return enc; | |
+ return state->shared().symbols.encoding(state, this, enc); | |
} | |
void Symbol::Info::show(STATE, Object* self, int level) { | |
diff --git a/vm/symboltable.cpp b/vm/symboltable.cpp | |
index 2386620..08e4566 100644 | |
--- a/vm/symboltable.cpp | |
+++ b/vm/symboltable.cpp | |
@@ -50,11 +50,23 @@ namespace rubinius { | |
return kinds[sym->index()]; | |
} | |
- size_t SymbolTable::add(std::string str) { | |
+ Encoding* SymbolTable::encoding(STATE, const Symbol*sym) { | |
+ return encodings[sym->index()]; | |
+ } | |
+ | |
+ Encoding* SymbolTable::encoding(STATE, const Symbol* sym, Encoding* enc) { | |
+ encodings[sym->index()] = enc; | |
+ | |
+ return enc; | |
+ } | |
+ | |
+ size_t SymbolTable::add(std::string str, Encoding *enc) { | |
bytes_used_ += (str.size() + sizeof(str)); | |
strings.push_back(str); | |
kinds.push_back(detect_kind(str.data(), str.size())); | |
+ encodings.push_back(enc); | |
+ | |
return strings.size() - 1; | |
} | |
@@ -64,7 +76,9 @@ namespace rubinius { | |
return NULL; | |
} | |
- return lookup(str, length, state->hash_seed()); | |
+ Encoding* enc = Encoding::usascii_encoding(state); | |
+ | |
+ return lookup(str, length, state->hash_seed(), enc); | |
} | |
struct SpecialOperator { | |
@@ -99,14 +113,16 @@ namespace rubinius { | |
} | |
Symbol* SymbolTable::lookup(SharedState* shared, const std::string& str) { | |
- return lookup(str.data(), str.size(), shared->hash_seed); | |
+ Encoding *enc = shared->globals.usascii_encoding.get(); | |
+ return lookup(str.data(), str.size(), shared->hash_seed, enc); | |
} | |
Symbol* SymbolTable::lookup(STATE, const std::string& str) { | |
- return lookup(str.data(), str.size(), state->hash_seed()); | |
+ Encoding* enc = Encoding::usascii_encoding(state); | |
+ return lookup(str.data(), str.size(), state->hash_seed(), enc); | |
} | |
- Symbol* SymbolTable::lookup(const char* str, size_t length, uint32_t seed) { | |
+ Symbol* SymbolTable::lookup(const char* str, size_t length, uint32_t seed, Encoding* enc) { | |
size_t sym; | |
if(const char* op = find_special(str, length)) { | |
@@ -122,17 +138,18 @@ namespace rubinius { | |
utilities::thread::SpinLock::LockGuard guard(lock_); | |
SymbolMap::iterator entry = symbols.find(hash); | |
if(entry == symbols.end()) { | |
- sym = add(std::string(str, length)); | |
+ sym = add(std::string(str, length), enc); | |
SymbolIds v(1, sym); | |
symbols[hash] = v; | |
} else { | |
SymbolIds& v = entry->second; | |
for(SymbolIds::const_iterator i = v.begin(); i != v.end(); ++i) { | |
std::string& s = strings[*i]; | |
+ Encoding* e = encodings[*i]; | |
- if(!strncmp(s.data(), str, length)) return Symbol::from_index(*i); | |
+ if(!strncmp(s.data(), str, length) && enc == e) return Symbol::from_index(*i); | |
} | |
- sym = add(std::string(str, length)); | |
+ sym = add(std::string(str, length), enc); | |
v.push_back(sym); | |
} | |
} | |
@@ -150,8 +167,14 @@ namespace rubinius { | |
// use byte_address() here. | |
const char* bytes = (const char*) str->byte_address(); | |
size_t size = str->byte_size(); | |
+ Encoding* enc = str->encoding(state); | |
if(LANGUAGE_18_ENABLED) { | |
+ if(size == 0) { | |
+ Exception::argument_error(state, "Cannot create a symbol from an empty string"); | |
+ return NULL; | |
+ } | |
+ | |
for(size_t i = 0; i < size; i++) { | |
if(bytes[i] == 0) { | |
Exception::argument_error(state, | |
@@ -161,7 +184,7 @@ namespace rubinius { | |
} | |
} | |
- return lookup(state, bytes, size); | |
+ return lookup(bytes, size, state->hash_seed(), enc); | |
} | |
String* SymbolTable::lookup_string(STATE, const Symbol* sym) { | |
@@ -175,8 +198,10 @@ namespace rubinius { | |
return NULL; | |
} | |
std::string& str = strings[sym_index]; | |
+ Encoding* enc = encodings[sym_index]; | |
+ | |
String* s = String::create(state, str.data(), str.size()); | |
- s->encoding(state, Encoding::usascii_encoding(state)); | |
+ s->encoding(state, enc); | |
return s; | |
} | |
diff --git a/vm/symboltable.hpp b/vm/symboltable.hpp | |
index 16d41fa..aad76f3 100644 | |
--- a/vm/symboltable.hpp | |
+++ b/vm/symboltable.hpp | |
@@ -31,6 +31,7 @@ namespace rubinius { | |
class String; | |
class Symbol; | |
class SharedState; | |
+ class Encoding; | |
typedef std::vector<std::string> SymbolStrings; | |
typedef std::vector<std::size_t> SymbolIds; | |
@@ -52,15 +53,17 @@ namespace rubinius { | |
}; | |
typedef std::vector<Kind> SymbolKinds; | |
+ typedef std::vector<Encoding*> SymbolEncodings; | |
private: | |
SymbolMap symbols; | |
SymbolStrings strings; | |
SymbolKinds kinds; | |
+ SymbolEncodings encodings; | |
utilities::thread::SpinLock lock_; | |
size_t bytes_used_; | |
- Symbol* lookup(const char* str, size_t length, uint32_t seed); | |
+ Symbol* lookup(const char* str, size_t length, uint32_t seed, Encoding* enc); | |
public: | |
@@ -88,9 +91,12 @@ namespace rubinius { | |
Kind kind(STATE, const Symbol* sym) const; | |
+ Encoding* encoding(STATE, const Symbol* sym); | |
+ Encoding* encoding(STATE, const Symbol* sym, Encoding* enc); | |
+ | |
size_t byte_size() const; | |
- size_t add(std::string str); | |
+ size_t add(std::string str, Encoding* enc); | |
Kind detect_kind(const char* str, size_t size) const; | |
}; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
At some point during boot, symbols that were added with a particular encoding, are looked up with another one.
Then it crashes like this:
Would love some feedback but I'm totally accepting something like "It's totally wrong, stop trying to write C++ please.". 😃