-
-
Save MasterDuke17/587545fd13e53cb66400f42751650bcd to your computer and use it in GitHub Desktop.
radix work in progress
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/src/core/coerce.c b/src/core/coerce.c | |
index df8cd8aa..e172541e 100644 | |
--- a/src/core/coerce.c | |
+++ b/src/core/coerce.c | |
@@ -349,7 +349,99 @@ MVMint64 MVM_coerce_simple_intify(MVMThreadContext *tc, MVMObject *obj) { | |
/* concatenating with "" ensures that only literal strings are accepted as argument. */ | |
#define STR_WITH_LEN(str) ("" str ""), (sizeof(str) - 1) | |
-MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMint64 offset, MVMint64 flag) { | |
+MVMObject * MVM_radix3(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMint64 offset, MVMint64 flag) { | |
+ MVMObject *result; | |
+ MVMint64 zvalue = 0; | |
+ MVMint64 zbase = 1; | |
+ MVMint64 chars = MVM_string_graphs(tc, str); | |
+ MVMint64 value = zvalue; | |
+ MVMint64 base = zbase; | |
+ MVMint64 pos = -1; | |
+ MVMuint16 neg = 0; | |
+ MVMint64 ch; | |
+ char *enc; | |
+ | |
+ if (radix > 36) { | |
+ MVM_exception_throw_adhoc(tc, "Cannot convert radix of %"PRId64" (max 36)", radix); | |
+ } | |
+ | |
+ //fprintf(stderr, "str = '%s', storage_type = %d\n", MVM_string_utf8_encode_C_string(tc, str), str->body.storage_type); | |
+ enc = MVM_string_ascii_encode_substr(tc, str, NULL, offset, -1, MVM_string_chr(tc, (MVMGrapheme32) 0), 0); | |
+ //fprintf(stderr, "str = '%s'", enc); | |
+ if (strlen(enc) == chars) { | |
+ ch = (offset < chars) ? enc[offset] : 0; | |
+ if ((flag & 0x02) && (ch == '+' || ch == '-')) { | |
+ neg = (ch == '-'); | |
+ offset++; | |
+ } | |
+ errno = 0; | |
+ value = strtoll(enc, NULL, radix); | |
+ //fprintf(stderr, "ASCII!!! str == %lli, errno == %i\n", value, errno); | |
+ } | |
+ else { | |
+ //fprintf(stderr, "slow path\n"); | |
+ value = zvalue; | |
+ ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0; | |
+ if ((flag & 0x02) && (ch == '+' || ch == '-')) { | |
+ neg = (ch == '-'); | |
+ offset++; | |
+ ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0; | |
+ } | |
+ | |
+ while (offset < chars) { | |
+ //if (ch >= '0' && ch <= '9') ch = ch - '0'; /* fast-path for ASCII 0..9 */ | |
+ if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10; | |
+ else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10; | |
+ else if (ch >= 0xFF21 && ch <= 0xFF3A) ch = ch - 0xFF21 + 10; /* uppercase fullwidth */ | |
+ else if (ch >= 0xFF41 && ch <= 0xFF5A) ch = ch - 0xFF41 + 10; /* lowercase fullwidth */ | |
+ else if (ch > 0 && MVM_unicode_codepoint_has_property_value(tc, ch, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, | |
+ MVM_unicode_cname_to_property_value_code(tc, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, STR_WITH_LEN("Nd")))) { | |
+ /* As of Unicode 6.0.0, we know that Nd category numerals are within | |
+ * the range 0..9 | |
+ */ | |
+ | |
+ /* the string returned for NUMERIC_VALUE contains a floating point | |
+ * value, so atoi will stop on the . in the string. This is fine | |
+ * though, since we'd have to truncate the float regardless. | |
+ */ | |
+ ch = atoi(MVM_unicode_codepoint_get_property_cstr(tc, ch, MVM_UNICODE_PROPERTY_NUMERIC_VALUE)); | |
+ } | |
+ else break; | |
+ if (ch >= radix) break; | |
+ zvalue = zvalue * radix + ch; | |
+ zbase = zbase * radix; | |
+ offset++; pos = offset; | |
+ if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; } | |
+ if (offset >= chars) break; | |
+ ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset); | |
+ if (ch != '_') continue; | |
+ offset++; | |
+ if (offset >= chars) break; | |
+ ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset); | |
+ } | |
+ | |
+ } | |
+ MVM_free(enc); | |
+ if (neg || flag & 0x01) { value = -value; } | |
+ | |
+ /* initialize the object */ | |
+ result = MVM_repr_alloc_init(tc, MVM_hll_current(tc)->slurpy_array_type); | |
+ MVMROOT(tc, result, { | |
+ MVMObject *box_type = MVM_hll_current(tc)->int_box_type; | |
+ MVMROOT(tc, box_type, { | |
+ MVMObject *boxed = MVM_repr_box_int(tc, box_type, value); | |
+ MVM_repr_push_o(tc, result, boxed); | |
+ boxed = MVM_repr_box_int(tc, box_type, base); | |
+ MVM_repr_push_o(tc, result, boxed); | |
+ boxed = MVM_repr_box_int(tc, box_type, pos); | |
+ MVM_repr_push_o(tc, result, boxed); | |
+ }); | |
+ }); | |
+ | |
+ return result; | |
+} | |
+ | |
+MVMObject * MVM_radix2(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMint64 offset, MVMint64 flag) { | |
MVMObject *result; | |
MVMint64 zvalue = 0; | |
MVMint64 zbase = 1; | |
@@ -371,6 +463,108 @@ MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMi | |
ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0; | |
} | |
+ switch (str->body.storage_type) { | |
+ case MVM_STRING_GRAPHEME_32: { | |
+ while (offset < chars) { | |
+ if (ch >= '0' && ch <= '9') ch = ch - '0'; /* fast-path for ASCII 0..9 */ | |
+ else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10; | |
+ else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10; | |
+ else if (ch >= 0xFF21 && ch <= 0xFF3A) ch = ch - 0xFF21 + 10; /* uppercase fullwidth */ | |
+ else if (ch >= 0xFF41 && ch <= 0xFF5A) ch = ch - 0xFF41 + 10; /* lowercase fullwidth */ | |
+ else if (ch > 0 && MVM_unicode_codepoint_has_property_value(tc, ch, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, | |
+ MVM_unicode_cname_to_property_value_code(tc, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, STR_WITH_LEN("Nd")))) { | |
+ /* As of Unicode 6.0.0, we know that Nd category numerals are within | |
+ * the range 0..9 | |
+ */ | |
+ | |
+ /* the string returned for NUMERIC_VALUE contains a floating point | |
+ * value, so atoi will stop on the . in the string. This is fine | |
+ * though, since we'd have to truncate the float regardless. | |
+ */ | |
+ ch = atoi(MVM_unicode_codepoint_get_property_cstr(tc, ch, MVM_UNICODE_PROPERTY_NUMERIC_VALUE)); | |
+ } | |
+ else break; | |
+ if (ch >= radix) break; | |
+ zvalue = zvalue * radix + ch; | |
+ zbase = zbase * radix; | |
+ offset++; pos = offset; | |
+ if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; } | |
+ if (offset >= chars) break; | |
+ ch = str->body.storage.blob_32[offset]; | |
+ if (ch != '_') continue; | |
+ offset++; | |
+ if (offset >= chars) break; | |
+ ch = str->body.storage.blob_32[offset]; | |
+ } | |
+ break; | |
+ } | |
+ case MVM_STRING_GRAPHEME_8: | |
+ while (offset < chars) { | |
+ if (ch >= '0' && ch <= '9') ch = ch - '0'; /* fast-path for ASCII 0..9 */ | |
+ else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10; | |
+ else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10; | |
+ else if (ch >= 0xFF21 && ch <= 0xFF3A) ch = ch - 0xFF21 + 10; /* uppercase fullwidth */ | |
+ else if (ch >= 0xFF41 && ch <= 0xFF5A) ch = ch - 0xFF41 + 10; /* lowercase fullwidth */ | |
+ else if (ch > 0 && MVM_unicode_codepoint_has_property_value(tc, ch, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, | |
+ MVM_unicode_cname_to_property_value_code(tc, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, STR_WITH_LEN("Nd")))) { | |
+ /* As of Unicode 6.0.0, we know that Nd category numerals are within | |
+ * the range 0..9 | |
+ */ | |
+ | |
+ /* the string returned for NUMERIC_VALUE contains a floating point | |
+ * value, so atoi will stop on the . in the string. This is fine | |
+ * though, since we'd have to truncate the float regardless. | |
+ */ | |
+ ch = atoi(MVM_unicode_codepoint_get_property_cstr(tc, ch, MVM_UNICODE_PROPERTY_NUMERIC_VALUE)); | |
+ } | |
+ else break; | |
+ if (ch >= radix) break; | |
+ zvalue = zvalue * radix + ch; | |
+ zbase = zbase * radix; | |
+ offset++; pos = offset; | |
+ if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; } | |
+ if (offset >= chars) break; | |
+ ch = str->body.storage.blob_8[offset]; | |
+ if (ch != '_') continue; | |
+ offset++; | |
+ if (offset >= chars) break; | |
+ ch = str->body.storage.blob_8[offset]; | |
+ } | |
+ break; | |
+ case MVM_STRING_GRAPHEME_ASCII: | |
+ while (offset < chars) { | |
+ if (ch >= '0' && ch <= '9') ch = ch - '0'; /* fast-path for ASCII 0..9 */ | |
+ else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10; | |
+ else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10; | |
+ else if (ch >= 0xFF21 && ch <= 0xFF3A) ch = ch - 0xFF21 + 10; /* uppercase fullwidth */ | |
+ else if (ch >= 0xFF41 && ch <= 0xFF5A) ch = ch - 0xFF41 + 10; /* lowercase fullwidth */ | |
+ else if (ch > 0 && MVM_unicode_codepoint_has_property_value(tc, ch, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, | |
+ MVM_unicode_cname_to_property_value_code(tc, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, STR_WITH_LEN("Nd")))) { | |
+ /* As of Unicode 6.0.0, we know that Nd category numerals are within | |
+ * the range 0..9 | |
+ */ | |
+ | |
+ /* the string returned for NUMERIC_VALUE contains a floating point | |
+ * value, so atoi will stop on the . in the string. This is fine | |
+ * though, since we'd have to truncate the float regardless. | |
+ */ | |
+ ch = atoi(MVM_unicode_codepoint_get_property_cstr(tc, ch, MVM_UNICODE_PROPERTY_NUMERIC_VALUE)); | |
+ } | |
+ else break; | |
+ if (ch >= radix) break; | |
+ zvalue = zvalue * radix + ch; | |
+ zbase = zbase * radix; | |
+ offset++; pos = offset; | |
+ if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; } | |
+ if (offset >= chars) break; | |
+ ch = str->body.storage.blob_ascii[offset]; | |
+ if (ch != '_') continue; | |
+ offset++; | |
+ if (offset >= chars) break; | |
+ ch = str->body.storage.blob_ascii[offset]; | |
+ } | |
+ break; | |
+ case MVM_STRING_STRAND: { | |
while (offset < chars) { | |
if (ch >= '0' && ch <= '9') ch = ch - '0'; /* fast-path for ASCII 0..9 */ | |
else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10; | |
@@ -401,6 +595,9 @@ MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMi | |
offset++; | |
if (offset >= chars) break; | |
ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset); | |
+ break; | |
+ } | |
+ } | |
} | |
if (neg || flag & 0x01) { value = -value; } | |
@@ -422,6 +619,78 @@ MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMi | |
return result; | |
} | |
+MVMObject * MVM_radix(MVMThreadContext *tc, MVMint64 radix, MVMString *str, MVMint64 offset, MVMint64 flag) { | |
+ MVMObject *result; | |
+ MVMint64 zvalue = 0; | |
+ MVMint64 zbase = 1; | |
+ MVMint64 chars = MVM_string_graphs(tc, str); | |
+ MVMint64 value = zvalue; | |
+ MVMint64 base = zbase; | |
+ MVMint64 pos = -1; | |
+ MVMuint16 neg = 0; | |
+ MVMint64 ch; | |
+ | |
+ if (radix > 36) { | |
+ MVM_exception_throw_adhoc(tc, "Cannot convert radix of %"PRId64" (max 36)", radix); | |
+ } | |
+ | |
+ ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0; | |
+ if ((flag & 0x02) && (ch == '+' || ch == '-')) { | |
+ neg = (ch == '-'); | |
+ offset++; | |
+ ch = (offset < chars) ? MVM_string_get_grapheme_at_nocheck(tc, str, offset) : 0; | |
+ } | |
+ | |
+ while (offset < chars) { | |
+ if (ch >= '0' && ch <= '9') ch = ch - '0'; /* fast-path for ASCII 0..9 */ | |
+ else if (ch >= 'a' && ch <= 'z') ch = ch - 'a' + 10; | |
+ else if (ch >= 'A' && ch <= 'Z') ch = ch - 'A' + 10; | |
+ else if (ch >= 0xFF21 && ch <= 0xFF3A) ch = ch - 0xFF21 + 10; /* uppercase fullwidth */ | |
+ else if (ch >= 0xFF41 && ch <= 0xFF5A) ch = ch - 0xFF41 + 10; /* lowercase fullwidth */ | |
+ else if (ch > 0 && MVM_unicode_codepoint_has_property_value(tc, ch, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, | |
+ MVM_unicode_cname_to_property_value_code(tc, MVM_UNICODE_PROPERTY_GENERAL_CATEGORY, STR_WITH_LEN("Nd")))) { | |
+ /* As of Unicode 6.0.0, we know that Nd category numerals are within | |
+ * the range 0..9 | |
+ */ | |
+ | |
+ /* the string returned for NUMERIC_VALUE contains a floating point | |
+ * value, so atoi will stop on the . in the string. This is fine | |
+ * though, since we'd have to truncate the float regardless. | |
+ */ | |
+ ch = atoi(MVM_unicode_codepoint_get_property_cstr(tc, ch, MVM_UNICODE_PROPERTY_NUMERIC_VALUE)); | |
+ } | |
+ else break; | |
+ if (ch >= radix) break; | |
+ zvalue = zvalue * radix + ch; | |
+ zbase = zbase * radix; | |
+ offset++; pos = offset; | |
+ if (ch != 0 || !(flag & 0x04)) { value=zvalue; base=zbase; } | |
+ if (offset >= chars) break; | |
+ ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset); | |
+ if (ch != '_') continue; | |
+ offset++; | |
+ if (offset >= chars) break; | |
+ ch = MVM_string_get_grapheme_at_nocheck(tc, str, offset); | |
+ } | |
+ | |
+ if (neg || flag & 0x01) { value = -value; } | |
+ | |
+ /* initialize the object */ | |
+ result = MVM_repr_alloc_init(tc, MVM_hll_current(tc)->slurpy_array_type); | |
+ MVMROOT(tc, result, { | |
+ MVMObject *box_type = MVM_hll_current(tc)->int_box_type; | |
+ MVMROOT(tc, box_type, { | |
+ MVMObject *boxed = MVM_repr_box_int(tc, box_type, value); | |
+ MVM_repr_push_o(tc, result, boxed); | |
+ boxed = MVM_repr_box_int(tc, box_type, base); | |
+ MVM_repr_push_o(tc, result, boxed); | |
+ boxed = MVM_repr_box_int(tc, box_type, pos); | |
+ MVM_repr_push_o(tc, result, boxed); | |
+ }); | |
+ }); | |
+ | |
+ return result; | |
+} | |
void MVM_box_int(MVMThreadContext *tc, MVMint64 value, MVMObject *type, | |
MVMRegister * dst) { |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment