-
-
Save FROGGS/b39bb6ed35370d763c91 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/lib/MAST/Ops.nqp b/lib/MAST/Ops.nqp | |
index d69b93c..86dc8dc 100644 | |
--- a/lib/MAST/Ops.nqp | |
+++ b/lib/MAST/Ops.nqp | |
@@ -556,7 +556,8 @@ BEGIN { | |
1312, | |
1314, | |
1317, | |
- 1321); | |
+ 1321, | |
+ 1324); | |
MAST::Ops.WHO<@counts> := nqp::list_i(0, | |
2, | |
2, | |
@@ -1110,7 +1111,8 @@ BEGIN { | |
2, | |
3, | |
4, | |
- 3); | |
+ 3, | |
+ 4); | |
MAST::Ops.WHO<@values> := nqp::list_i(10, | |
8, | |
18, | |
@@ -2434,7 +2436,11 @@ BEGIN { | |
65, | |
66, | |
65, | |
- 65); | |
+ 65, | |
+ 34, | |
+ 57, | |
+ 33, | |
+ 57); | |
MAST::Ops.WHO<%codes> := nqp::hash('no_op', 0, | |
'const_i8', 1, | |
'const_i16', 2, | |
@@ -2988,7 +2994,8 @@ BEGIN { | |
'continuationclone', 550, | |
'continuationreset', 551, | |
'continuationcontrol', 552, | |
- 'continuationinvoke', 553); | |
+ 'continuationinvoke', 553, | |
+ 'uniisblock', 554); | |
MAST::Ops.WHO<@names> := nqp::list('no_op', | |
'const_i8', | |
'const_i16', | |
@@ -3542,5 +3549,6 @@ BEGIN { | |
'continuationclone', | |
'continuationreset', | |
'continuationcontrol', | |
- 'continuationinvoke'); | |
+ 'continuationinvoke', | |
+ 'uniisblock'); | |
} | |
diff --git a/src/core/interp.c b/src/core/interp.c | |
index ba15f72..9d068af 100644 | |
--- a/src/core/interp.c | |
+++ b/src/core/interp.c | |
@@ -3908,6 +3908,11 @@ void MVM_interp_run(MVMThreadContext *tc, void (*initial_invoke)(MVMThreadContex | |
MVM_exception_throw_adhoc(tc, "continuationinvoke expects an MVMContinuation"); | |
goto NEXT; | |
} | |
+ OP(uniisblock): | |
+ GET_REG(cur_op, 0).i64 = (MVMint64)MVM_unicode_is_in_block(tc, | |
+ GET_REG(cur_op, 2).s, GET_REG(cur_op, 4).i64, GET_REG(cur_op, 6).s); | |
+ cur_op += 8; | |
+ goto NEXT; | |
#if MVM_CGOTO | |
OP_CALL_EXTOP: { | |
/* Bounds checking? Never heard of that. */ | |
diff --git a/src/core/oplabels.h b/src/core/oplabels.h | |
index 6ee285a..7cdfa8f 100644 | |
--- a/src/core/oplabels.h | |
+++ b/src/core/oplabels.h | |
@@ -555,7 +555,7 @@ static const void * const LABELS[] = { | |
&&OP_continuationreset, | |
&&OP_continuationcontrol, | |
&&OP_continuationinvoke, | |
- NULL, | |
+ &&OP_uniisblock, | |
NULL, | |
NULL, | |
NULL, | |
diff --git a/src/core/oplist b/src/core/oplist | |
index 7a56cb7..dd9ea99 100644 | |
--- a/src/core/oplist | |
+++ b/src/core/oplist | |
@@ -583,3 +583,4 @@ continuationclone w(obj) r(obj) | |
continuationreset w(obj) r(obj) r(obj) | |
continuationcontrol w(obj) r(int64) r(obj) r(obj) | |
continuationinvoke w(obj) r(obj) r(obj) | |
+uniisblock w(int64) r(str) r(int64) r(str) | |
diff --git a/src/core/ops.c b/src/core/ops.c | |
index 914e253..8576cae 100644 | |
--- a/src/core/ops.c | |
+++ b/src/core/ops.c | |
@@ -3884,9 +3884,16 @@ static MVMOpInfo MVM_op_infos[] = { | |
3, | |
{ MVM_operand_write_reg | MVM_operand_obj, MVM_operand_read_reg | MVM_operand_obj, MVM_operand_read_reg | MVM_operand_obj } | |
}, | |
+ { | |
+ MVM_OP_uniisblock, | |
+ "uniisblock", | |
+ " ", | |
+ 4, | |
+ { MVM_operand_write_reg | MVM_operand_int64, MVM_operand_read_reg | MVM_operand_str, MVM_operand_read_reg | MVM_operand_int64, MVM_operand_read_reg | MVM_operand_str } | |
+ }, | |
}; | |
-static unsigned short MVM_op_counts = 554; | |
+static unsigned short MVM_op_counts = 555; | |
MVMOpInfo * MVM_op_get_op(unsigned short op) { | |
if (op >= MVM_op_counts) | |
diff --git a/src/core/ops.h b/src/core/ops.h | |
index eb7f169..c3651ba 100644 | |
--- a/src/core/ops.h | |
+++ b/src/core/ops.h | |
@@ -555,6 +555,7 @@ | |
#define MVM_OP_continuationreset 551 | |
#define MVM_OP_continuationcontrol 552 | |
#define MVM_OP_continuationinvoke 553 | |
+#define MVM_OP_uniisblock 554 | |
#define MVM_OP_EXT_BASE 1024 | |
#define MVM_OP_EXT_CU_LIMIT 1024 | |
diff --git a/src/strings/ops.c b/src/strings/ops.c | |
index 0d7b15a..bf3eb20 100644 | |
--- a/src/strings/ops.c | |
+++ b/src/strings/ops.c | |
@@ -1064,8 +1064,13 @@ MVMint64 MVM_string_offset_has_unicode_property_value(MVMThreadContext *tc, MVMS | |
if (offset < 0 || offset >= NUM_GRAPHS(s)) | |
return 0; | |
- return MVM_unicode_codepoint_has_property_value(tc, | |
- MVM_string_get_codepoint_at_nocheck(tc, s, offset), property_code, property_value_code); | |
+ MVMint64 cp = MVM_string_get_codepoint_at_nocheck(tc, s, offset); | |
+ MVMint64 r = MVM_unicode_codepoint_has_property_value(tc, | |
+ cp, property_code, property_value_code); | |
+ | |
+ //~ printf("MVM_string_offset_has_unicode_property_value(tc, cp=%d, pc=%d, pvc=%d) = r=%d\n", cp, property_code, property_value_code, r); | |
+ | |
+ return r; | |
} | |
/* internal function so hashes can easily compute hashes of hash keys */ | |
diff --git a/src/strings/unicode_ops.c b/src/strings/unicode_ops.c | |
index fef79a2..5e2354d 100644 | |
--- a/src/strings/unicode_ops.c | |
+++ b/src/strings/unicode_ops.c | |
@@ -13,8 +13,11 @@ MVMCodepoint32 MVM_unicode_lookup_by_name(MVMThreadContext *tc, MVMString *name) | |
} | |
MVMint64 MVM_unicode_codepoint_has_property_value(MVMThreadContext *tc, MVMCodepoint32 codepoint, MVMint64 property_code, MVMint64 property_value_code) { | |
- return (MVMint64)MVM_unicode_get_property_value(tc, | |
- codepoint, property_code) == property_value_code ? 1 : 0; | |
+ MVMint64 r = MVM_unicode_get_property_value(tc, codepoint, property_code); | |
+ printf("MVM_unicode_get_property_value(tc, cp=%d, pc=%d) = r=%d == pvc=%d\n", codepoint, property_code, r, property_value_code); | |
+ if (property_code == 20 && property_value_code == 2) | |
+ property_value_code = 3; | |
+ return r == property_value_code ? 1 : 0; | |
} | |
MVMCodepoint32 MVM_unicode_get_case_change(MVMThreadContext *tc, MVMCodepoint32 codepoint, MVMint32 case_) { | |
@@ -35,13 +38,17 @@ static MVMUnicodeNameRegistry *property_codes_by_names_aliases; | |
void generate_property_codes_by_names_aliases(MVMThreadContext *tc) { | |
MVMuint32 num_names = num_unicode_property_keypairs; | |
+ //~ printf("%d\n", num_names); | |
while (num_names--) { | |
MVMUnicodeNameRegistry *entry = malloc(sizeof(MVMUnicodeNameRegistry)); | |
entry->name = (char *)unicode_property_keypairs[num_names].name; | |
entry->codepoint = unicode_property_keypairs[num_names].value; | |
+ //~ if (strncmp("ASCII", entry->name, 5) == 0) | |
+ //~ printf("%d '%s' %d\n", num_names, entry->name, entry->codepoint); | |
HASH_ADD_KEYPTR(hash_handle, property_codes_by_names_aliases, | |
entry->name, strlen(entry->name), entry); | |
} | |
+ //~ printf("%d\n", num_names); | |
} | |
MVMint32 MVM_unicode_name_to_property_code(MVMThreadContext *tc, MVMString *name) { | |
@@ -52,6 +59,7 @@ MVMint32 MVM_unicode_name_to_property_code(MVMThreadContext *tc, MVMString *name | |
generate_property_codes_by_names_aliases(tc); | |
} | |
HASH_FIND(hash_handle, property_codes_by_names_aliases, cname, strlen((const char *)cname), result); | |
+ //~ printf("MVM_unicode_name_to_property_code(tc, '%s') = r=%d\n", cname, result ? result->codepoint : 0); | |
free(cname); /* not really codepoint, really just an index */ | |
return result ? result->codepoint : 0; | |
} | |
@@ -108,6 +116,7 @@ MVMint32 MVM_unicode_name_to_property_value_code(MVMThreadContext *tc, MVMint64 | |
generate_unicode_property_values_hashes(tc); | |
} | |
HASH_FIND(hash_handle, unicode_property_values_hashes[property_code], cname, strlen((const char *)cname), result); | |
+ //~ printf("MVM_unicode_name_to_property_value_code(tc, pc=%d, '%s') = r=%d\n", property_code, cname, result ? result->codepoint : 0); | |
free(cname); /* not really codepoint, really just an index */ | |
return result ? result->codepoint : 0; | |
} | |
diff --git a/tools/ucd2c.pl b/tools/ucd2c.pl | |
index b72f9f0..ed75578 100644 | |
--- a/tools/ucd2c.pl | |
+++ b/tools/ucd2c.pl | |
@@ -102,6 +102,7 @@ sub main { | |
emit_names_hash_builder(); | |
emit_unicode_property_keypairs(); | |
emit_unicode_property_value_keypairs(); | |
+ emit_block_lookup(); | |
print "done!"; | |
write_file('src/strings/unicode_db.c', join_sections($db_sections)); | |
@@ -664,6 +665,39 @@ static MVMint32 MVM_unicode_get_property_value(MVMThreadContext *tc, MVMint32 co | |
$db_sections->{MVM_unicode_get_property_value} = $out; | |
$h_sections->{property_code_definitions} = $hout; | |
} | |
+sub emit_block_lookup { | |
+ my $hout = "MVMint32 MVM_unicode_is_in_block(MVMThreadContext *tc, MVMString *str, MVMint64 pos, MVMString *block);\n"; | |
+ my $out = "MVMint32 MVM_unicode_is_in_block(MVMThreadContext *tc, MVMString *str, MVMint64 pos, MVMString *block) { | |
+ MVMCodepoint32 ord = MVM_string_get_codepoint_at_nocheck(tc, str, pos); | |
+ MVMuint64 size; | |
+ unsigned char *bname = MVM_string_ascii_encode(tc, block, &size); | |
+"; | |
+ | |
+ my $else = ''; | |
+ each_line('Blocks', sub { | |
+ $_ = shift; | |
+ my ($from, $to, $block_name) = /^(\w+)..(\w+); (.+)/; | |
+ if ($from && $to && $block_name) { | |
+ $block_name =~ s/[_\s-]//g; | |
+ my $alias_name = lc($block_name); | |
+ my $block_len = length $block_name; | |
+ my $alias_len = length $alias_name; | |
+ if ($block_len && $alias_len) { | |
+ $out .= " | |
+ $else if (ord >= 0x$from && ord <= 0x$to) { | |
+ return strncmp(\"$block_name\", bname, $block_len) == 0 || strncmp(\"$alias_name\", bname, $alias_len) == 0; | |
+ }"; | |
+ $else = 'else'; | |
+ } | |
+ } | |
+ }); | |
+ | |
+ $out .= " | |
+ return 0; | |
+}"; | |
+ $db_sections->{block_lookup} = $out; | |
+ $h_sections->{block_lookup} = $hout; | |
+} | |
sub emit_names_hash_builder { | |
my $num_extents = scalar(@$extents); | |
my $out = " |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp b/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp | |
index 1168aca..5e21eb6 100644 | |
--- a/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp | |
+++ b/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp | |
@@ -954,18 +954,44 @@ class QAST::MASTRegexCompiler { | |
my $pname := fresh_s(); | |
my $pcode := fresh_i(); | |
my $pvcode := fresh_i(); | |
+ my $pprop := fresh_s(); | |
my $i0 := fresh_i(); | |
my $testop := $node.negate ?? 'if_i' !! 'unless_i'; | |
- [ | |
+ my $prefix := $*QASTCOMPILER.unique($*RXPREFIX ~ '_uniprop'); | |
+ my $hasvalcode := label($prefix ~ '_haselems'); | |
+ my $endblock := label($prefix ~ '_endblock'); | |
+ my $succeed := label($prefix ~ '_succeed'); | |
+ my @ins := [ | |
op('ge_i', $i0, %*REG<pos>, %*REG<eos>), | |
op('if_i', $i0, %*REG<fail>), | |
+ ]; | |
+ if ~$node[0] ~~ /^ [ In<[A..Z]> | in<[a..z]> ]/ { # "InArabic" is a lookup of Block Arabic | |
+ merge_ins(@ins, [ | |
+ op('const_s', $pname, sval(nqp::substr($node[0],2))), | |
+ op('uniisblock', $i0, %*REG<tgt>, %*REG<pos>, $pname), | |
+ op('if_i', $i0, $succeed), | |
+ | |
+ op('const_s', $pprop, sval('Block')), | |
+ op('const_s', $pname, sval(nqp::substr($node[0],2))), | |
+ op('unipropcode', $pcode, $pprop), | |
+ op('unless_i', $pcode, $endblock), | |
+ op('unipvalcode', $pvcode, $pcode, $pname), | |
+ op('if_i', $pvcode, $hasvalcode), | |
+ $endblock, | |
+ ]); | |
+ } | |
+ merge_ins(@ins, [ | |
op('const_s', $pname, sval($node[0])), | |
op('unipropcode', $pcode, $pname), | |
op('unipvalcode', $pvcode, $pcode, $pname), | |
+ #~ op($testop, $pvcode, %*REG<fail>), # XXX I am sure we should fail here | |
+ $hasvalcode, | |
op('hasuniprop', $i0, %*REG<tgt>, %*REG<pos>, $pcode, $pvcode), | |
+ $succeed, | |
op($testop, $i0, %*REG<fail>), | |
op('inc_i', %*REG<pos>) | |
- ]; | |
+ ]); | |
+ @ins | |
} | |
method ws($node) { self.subrule($node) } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment