Skip to content

Instantly share code, notes, and snippets.

@MasterDuke17
Created January 27, 2017 01:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MasterDuke17/587545fd13e53cb66400f42751650bcd to your computer and use it in GitHub Desktop.
Save MasterDuke17/587545fd13e53cb66400f42751650bcd to your computer and use it in GitHub Desktop.
radix work in progress
diff --git a/src/core/coerce.c b/src/core/coerce.c
index df8cd8aa..e172541e 100644
--- a/src/core/coerce.c
+++ b/src/core/coerce.c
@@ -349,7 +349,99 @@ MVMint64 MVM_coerce_simple_intify(MVMThreadContext *tc, MVMObject *obj) {
/* concatenating with "" ensures that only literal strings are accepted as argument. */
#define STR_WITH_LEN(str) ("" str ""), (sizeof(str) - 1)
-MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMint64 offset, MVMint64 flag) {
+MVMObject * MVM_radix3(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMint64 offset, MVMint64 flag) {
+ MVMObject *result;
+ MVMint64 zvalue = 0;
+ MVMint64 zbase = 1;
+ MVMint64 chars = MVM_string_graphs(tc, str);
+ MVMint64 value = zvalue;
+ MVMint64 base = zbase;
+ MVMint64 pos = -1;
+ MVMuint16 neg = 0;
+ MVMint64 ch;
+ char *enc;
+
+ if (radix > 36) {
+ MVM_exception_throw_adhoc(tc, "Cannot convert radix of %"PRId64" (max 36)", radix);
+ }
+
+ //fprintf(stderr, "str = '%s', storage_type = %d\n", MVM_string_utf8_encode_C_string(tc, str), str->body.storage_type);
+ enc = MVM_string_ascii_encode_substr(tc, str, NULL, offset, -1, MVM_string_chr(tc, (MVMGrapheme32) 0), 0);
+ //fprintf(stderr, "str = '%s'", enc);
+ if (strlen(enc) == chars) {
+ ch = (offset < chars) ? enc[offset] : 0;
+ if ((flag & 0x02) && (ch == '+' || ch == '-')) {
+ neg = (ch == '-');
+ offset++;
+ }
+ errno = 0;
+ value = strtoll(enc, NULL, radix);
+ //fprintf(stderr, "ASCII!!! str == %lli, errno == %i\n", value, errno);
+ }
+ else {
+ //fprintf(stderr, "slow path\n");
+ value = zvalue;
+ ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0;
+ if ((flag & 0x02) && (ch == '+' || ch == '-')) {
+ neg = (ch == '-');
+ offset++;
+ ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0;
+ }
+
+ while (offset < chars) {
+ //if (ch >= '0' && ch <= '9') ch = ch - '0'; /* fast-path for ASCII 0..9 */
+ if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10;
+ else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10;
+ else if (ch >= 0xFF21 && ch <= 0xFF3A) ch = ch - 0xFF21 + 10; /* uppercase fullwidth */
+ else if (ch >= 0xFF41 && ch <= 0xFF5A) ch = ch - 0xFF41 + 10; /* lowercase fullwidth */
+ else if (ch > 0 && MVM_unicode_codepoint_has_property_value(tc, ch, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY,
+ MVM_unicode_cname_to_property_value_code(tc, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, STR_WITH_LEN("Nd")))) {
+ /* As of Unicode 6.0.0, we know that Nd category numerals are within
+ * the range 0..9
+ */
+
+ /* the string returned for NUMERIC_VALUE contains a floating point
+ * value, so atoi will stop on the . in the string. This is fine
+ * though, since we'd have to truncate the float regardless.
+ */
+ ch = atoi(MVM_unicode_codepoint_get_property_cstr(tc, ch, MVM_UNICODE_PROPERTY_NUMERIC_VALUE));
+ }
+ else break;
+ if (ch >= radix) break;
+ zvalue = zvalue * radix + ch;
+ zbase = zbase * radix;
+ offset++; pos = offset;
+ if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; }
+ if (offset >= chars) break;
+ ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset);
+ if (ch != '_') continue;
+ offset++;
+ if (offset >= chars) break;
+ ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset);
+ }
+
+ }
+ MVM_free(enc);
+ if (neg || flag & 0x01) { value = -value; }
+
+ /* initialize the object */
+ result = MVM_repr_alloc_init(tc, MVM_hll_current(tc)->slurpy_array_type);
+ MVMROOT(tc, result, {
+ MVMObject *box_type = MVM_hll_current(tc)->int_box_type;
+ MVMROOT(tc, box_type, {
+ MVMObject *boxed = MVM_repr_box_int(tc, box_type, value);
+ MVM_repr_push_o(tc, result, boxed);
+ boxed = MVM_repr_box_int(tc, box_type, base);
+ MVM_repr_push_o(tc, result, boxed);
+ boxed = MVM_repr_box_int(tc, box_type, pos);
+ MVM_repr_push_o(tc, result, boxed);
+ });
+ });
+
+ return result;
+}
+
+MVMObject * MVM_radix2(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMint64 offset, MVMint64 flag) {
MVMObject *result;
MVMint64 zvalue = 0;
MVMint64 zbase = 1;
@@ -371,6 +463,108 @@ MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMi
ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0;
}
+ switch (str->body.storage_type) {
+ case MVM_STRING_GRAPHEME_32: {
+ while (offset < chars) {
+ if (ch >= '0' && ch <= '9') ch = ch - '0'; /* fast-path for ASCII 0..9 */
+ else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10;
+ else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10;
+ else if (ch >= 0xFF21 && ch <= 0xFF3A) ch = ch - 0xFF21 + 10; /* uppercase fullwidth */
+ else if (ch >= 0xFF41 && ch <= 0xFF5A) ch = ch - 0xFF41 + 10; /* lowercase fullwidth */
+ else if (ch > 0 && MVM_unicode_codepoint_has_property_value(tc, ch, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY,
+ MVM_unicode_cname_to_property_value_code(tc, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, STR_WITH_LEN("Nd")))) {
+ /* As of Unicode 6.0.0, we know that Nd category numerals are within
+ * the range 0..9
+ */
+
+ /* the string returned for NUMERIC_VALUE contains a floating point
+ * value, so atoi will stop on the . in the string. This is fine
+ * though, since we'd have to truncate the float regardless.
+ */
+ ch = atoi(MVM_unicode_codepoint_get_property_cstr(tc, ch, MVM_UNICODE_PROPERTY_NUMERIC_VALUE));
+ }
+ else break;
+ if (ch >= radix) break;
+ zvalue = zvalue * radix + ch;
+ zbase = zbase * radix;
+ offset++; pos = offset;
+ if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; }
+ if (offset >= chars) break;
+ ch = str->body.storage.blob_32[offset];
+ if (ch != '_') continue;
+ offset++;
+ if (offset >= chars) break;
+ ch = str->body.storage.blob_32[offset];
+ }
+ break;
+ }
+ case MVM_STRING_GRAPHEME_8:
+ while (offset < chars) {
+ if (ch >= '0' && ch <= '9') ch = ch - '0'; /* fast-path for ASCII 0..9 */
+ else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10;
+ else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10;
+ else if (ch >= 0xFF21 && ch <= 0xFF3A) ch = ch - 0xFF21 + 10; /* uppercase fullwidth */
+ else if (ch >= 0xFF41 && ch <= 0xFF5A) ch = ch - 0xFF41 + 10; /* lowercase fullwidth */
+ else if (ch > 0 && MVM_unicode_codepoint_has_property_value(tc, ch, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY,
+ MVM_unicode_cname_to_property_value_code(tc, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, STR_WITH_LEN("Nd")))) {
+ /* As of Unicode 6.0.0, we know that Nd category numerals are within
+ * the range 0..9
+ */
+
+ /* the string returned for NUMERIC_VALUE contains a floating point
+ * value, so atoi will stop on the . in the string. This is fine
+ * though, since we'd have to truncate the float regardless.
+ */
+ ch = atoi(MVM_unicode_codepoint_get_property_cstr(tc, ch, MVM_UNICODE_PROPERTY_NUMERIC_VALUE));
+ }
+ else break;
+ if (ch >= radix) break;
+ zvalue = zvalue * radix + ch;
+ zbase = zbase * radix;
+ offset++; pos = offset;
+ if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; }
+ if (offset >= chars) break;
+ ch = str->body.storage.blob_8[offset];
+ if (ch != '_') continue;
+ offset++;
+ if (offset >= chars) break;
+ ch = str->body.storage.blob_8[offset];
+ }
+ break;
+ case MVM_STRING_GRAPHEME_ASCII:
+ while (offset < chars) {
+ if (ch >= '0' && ch <= '9') ch = ch - '0'; /* fast-path for ASCII 0..9 */
+ else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10;
+ else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10;
+ else if (ch >= 0xFF21 && ch <= 0xFF3A) ch = ch - 0xFF21 + 10; /* uppercase fullwidth */
+ else if (ch >= 0xFF41 && ch <= 0xFF5A) ch = ch - 0xFF41 + 10; /* lowercase fullwidth */
+ else if (ch > 0 && MVM_unicode_codepoint_has_property_value(tc, ch, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY,
+ MVM_unicode_cname_to_property_value_code(tc, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, STR_WITH_LEN("Nd")))) {
+ /* As of Unicode 6.0.0, we know that Nd category numerals are within
+ * the range 0..9
+ */
+
+ /* the string returned for NUMERIC_VALUE contains a floating point
+ * value, so atoi will stop on the . in the string. This is fine
+ * though, since we'd have to truncate the float regardless.
+ */
+ ch = atoi(MVM_unicode_codepoint_get_property_cstr(tc, ch, MVM_UNICODE_PROPERTY_NUMERIC_VALUE));
+ }
+ else break;
+ if (ch >= radix) break;
+ zvalue = zvalue * radix + ch;
+ zbase = zbase * radix;
+ offset++; pos = offset;
+ if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; }
+ if (offset >= chars) break;
+ ch = str->body.storage.blob_ascii[offset];
+ if (ch != '_') continue;
+ offset++;
+ if (offset >= chars) break;
+ ch = str->body.storage.blob_ascii[offset];
+ }
+ break;
+ case MVM_STRING_STRAND: {
while (offset < chars) {
if (ch >= '0' && ch <= '9') ch = ch - '0'; /* fast-path for ASCII 0..9 */
else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10;
@@ -401,6 +595,9 @@ MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMi
offset++;
if (offset >= chars) break;
ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset);
+ break;
+ }
+ }
}
if (neg || flag & 0x01) { value = -value; }
@@ -422,6 +619,78 @@ MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMi
return result;
}
+MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMint64 offset, MVMint64 flag) {
+ MVMObject *result;
+ MVMint64 zvalue = 0;
+ MVMint64 zbase = 1;
+ MVMint64 chars = MVM_string_graphs(tc, str);
+ MVMint64 value = zvalue;
+ MVMint64 base = zbase;
+ MVMint64 pos = -1;
+ MVMuint16 neg = 0;
+ MVMint64 ch;
+
+ if (radix > 36) {
+ MVM_exception_throw_adhoc(tc, "Cannot convert radix of %"PRId64" (max 36)", radix);
+ }
+
+ ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0;
+ if ((flag & 0x02) && (ch == '+' || ch == '-')) {
+ neg = (ch == '-');
+ offset++;
+ ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0;
+ }
+
+ while (offset < chars) {
+ if (ch >= '0' && ch <= '9') ch = ch - '0'; /* fast-path for ASCII 0..9 */
+ else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10;
+ else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10;
+ else if (ch >= 0xFF21 && ch <= 0xFF3A) ch = ch - 0xFF21 + 10; /* uppercase fullwidth */
+ else if (ch >= 0xFF41 && ch <= 0xFF5A) ch = ch - 0xFF41 + 10; /* lowercase fullwidth */
+ else if (ch > 0 && MVM_unicode_codepoint_has_property_value(tc, ch, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY,
+ MVM_unicode_cname_to_property_value_code(tc, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, STR_WITH_LEN("Nd")))) {
+ /* As of Unicode 6.0.0, we know that Nd category numerals are within
+ * the range 0..9
+ */
+
+ /* the string returned for NUMERIC_VALUE contains a floating point
+ * value, so atoi will stop on the . in the string. This is fine
+ * though, since we'd have to truncate the float regardless.
+ */
+ ch = atoi(MVM_unicode_codepoint_get_property_cstr(tc, ch, MVM_UNICODE_PROPERTY_NUMERIC_VALUE));
+ }
+ else break;
+ if (ch >= radix) break;
+ zvalue = zvalue * radix + ch;
+ zbase = zbase * radix;
+ offset++; pos = offset;
+ if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; }
+ if (offset >= chars) break;
+ ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset);
+ if (ch != '_') continue;
+ offset++;
+ if (offset >= chars) break;
+ ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset);
+ }
+
+ if (neg || flag & 0x01) { value = -value; }
+
+ /* initialize the object */
+ result = MVM_repr_alloc_init(tc, MVM_hll_current(tc)->slurpy_array_type);
+ MVMROOT(tc, result, {
+ MVMObject *box_type = MVM_hll_current(tc)->int_box_type;
+ MVMROOT(tc, box_type, {
+ MVMObject *boxed = MVM_repr_box_int(tc, box_type, value);
+ MVM_repr_push_o(tc, result, boxed);
+ boxed = MVM_repr_box_int(tc, box_type, base);
+ MVM_repr_push_o(tc, result, boxed);
+ boxed = MVM_repr_box_int(tc, box_type, pos);
+ MVM_repr_push_o(tc, result, boxed);
+ });
+ });
+
+ return result;
+}
void MVM_box_int(MVMThreadContext *tc, MVMint64 value, MVMObject *type,
MVMRegister * dst) {
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment