Skip to content

Instantly share code, notes, and snippets.

@FROGGS

FROGGS/moar.diff Secret

Created January 10, 2014 19:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save FROGGS/b39bb6ed35370d763c91 to your computer and use it in GitHub Desktop.
Save FROGGS/b39bb6ed35370d763c91 to your computer and use it in GitHub Desktop.
diff --git a/lib/MAST/Ops.nqp b/lib/MAST/Ops.nqp
index d69b93c..86dc8dc 100644
--- a/lib/MAST/Ops.nqp
+++ b/lib/MAST/Ops.nqp
@@ -556,7 +556,8 @@ BEGIN {
1312,
1314,
1317,
- 1321);
+ 1321,
+ 1324);
MAST::Ops.WHO<@counts> := nqp::list_i(0,
2,
2,
@@ -1110,7 +1111,8 @@ BEGIN {
2,
3,
4,
- 3);
+ 3,
+ 4);
MAST::Ops.WHO<@values> := nqp::list_i(10,
8,
18,
@@ -2434,7 +2436,11 @@ BEGIN {
65,
66,
65,
- 65);
+ 65,
+ 34,
+ 57,
+ 33,
+ 57);
MAST::Ops.WHO<%codes> := nqp::hash('no_op', 0,
'const_i8', 1,
'const_i16', 2,
@@ -2988,7 +2994,8 @@ BEGIN {
'continuationclone', 550,
'continuationreset', 551,
'continuationcontrol', 552,
- 'continuationinvoke', 553);
+ 'continuationinvoke', 553,
+ 'uniisblock', 554);
MAST::Ops.WHO<@names> := nqp::list('no_op',
'const_i8',
'const_i16',
@@ -3542,5 +3549,6 @@ BEGIN {
'continuationclone',
'continuationreset',
'continuationcontrol',
- 'continuationinvoke');
+ 'continuationinvoke',
+ 'uniisblock');
}
diff --git a/src/core/interp.c b/src/core/interp.c
index ba15f72..9d068af 100644
--- a/src/core/interp.c
+++ b/src/core/interp.c
@@ -3908,6 +3908,11 @@ void MVM_interp_run(MVMThreadContext *tc, void (*initial_invoke)(MVMThreadContex
MVM_exception_throw_adhoc(tc, "continuationinvoke expects an MVMContinuation");
goto NEXT;
}
+ OP(uniisblock):
+ GET_REG(cur_op, 0).i64 = (MVMint64)MVM_unicode_is_in_block(tc,
+ GET_REG(cur_op, 2).s, GET_REG(cur_op, 4).i64, GET_REG(cur_op, 6).s);
+ cur_op += 8;
+ goto NEXT;
#if MVM_CGOTO
OP_CALL_EXTOP: {
/* Bounds checking? Never heard of that. */
diff --git a/src/core/oplabels.h b/src/core/oplabels.h
index 6ee285a..7cdfa8f 100644
--- a/src/core/oplabels.h
+++ b/src/core/oplabels.h
@@ -555,7 +555,7 @@ static const void * const LABELS[] = {
&&OP_continuationreset,
&&OP_continuationcontrol,
&&OP_continuationinvoke,
- NULL,
+ &&OP_uniisblock,
NULL,
NULL,
NULL,
diff --git a/src/core/oplist b/src/core/oplist
index 7a56cb7..dd9ea99 100644
--- a/src/core/oplist
+++ b/src/core/oplist
@@ -583,3 +583,4 @@ continuationclone w(obj) r(obj)
continuationreset w(obj) r(obj) r(obj)
continuationcontrol w(obj) r(int64) r(obj) r(obj)
continuationinvoke w(obj) r(obj) r(obj)
+uniisblock w(int64) r(str) r(int64) r(str)
diff --git a/src/core/ops.c b/src/core/ops.c
index 914e253..8576cae 100644
--- a/src/core/ops.c
+++ b/src/core/ops.c
@@ -3884,9 +3884,16 @@ static MVMOpInfo MVM_op_infos[] = {
3,
{ MVM_operand_write_reg | MVM_operand_obj, MVM_operand_read_reg | MVM_operand_obj, MVM_operand_read_reg | MVM_operand_obj }
},
+ {
+ MVM_OP_uniisblock,
+ "uniisblock",
+ " ",
+ 4,
+ { MVM_operand_write_reg | MVM_operand_int64, MVM_operand_read_reg | MVM_operand_str, MVM_operand_read_reg | MVM_operand_int64, MVM_operand_read_reg | MVM_operand_str }
+ },
};
-static unsigned short MVM_op_counts = 554;
+static unsigned short MVM_op_counts = 555;
MVMOpInfo * MVM_op_get_op(unsigned short op) {
if (op >= MVM_op_counts)
diff --git a/src/core/ops.h b/src/core/ops.h
index eb7f169..c3651ba 100644
--- a/src/core/ops.h
+++ b/src/core/ops.h
@@ -555,6 +555,7 @@
#define MVM_OP_continuationreset 551
#define MVM_OP_continuationcontrol 552
#define MVM_OP_continuationinvoke 553
+#define MVM_OP_uniisblock 554
#define MVM_OP_EXT_BASE 1024
#define MVM_OP_EXT_CU_LIMIT 1024
diff --git a/src/strings/ops.c b/src/strings/ops.c
index 0d7b15a..bf3eb20 100644
--- a/src/strings/ops.c
+++ b/src/strings/ops.c
@@ -1064,8 +1064,13 @@ MVMint64 MVM_string_offset_has_unicode_property_value(MVMThreadContext *tc, MVMS
if (offset < 0 || offset >= NUM_GRAPHS(s))
return 0;
- return MVM_unicode_codepoint_has_property_value(tc,
- MVM_string_get_codepoint_at_nocheck(tc, s, offset), property_code, property_value_code);
+ MVMint64 cp = MVM_string_get_codepoint_at_nocheck(tc, s, offset);
+ MVMint64 r = MVM_unicode_codepoint_has_property_value(tc,
+ cp, property_code, property_value_code);
+
+ //~ printf("MVM_string_offset_has_unicode_property_value(tc, cp=%d, pc=%d, pvc=%d) = r=%d\n", cp, property_code, property_value_code, r);
+
+ return r;
}
/* internal function so hashes can easily compute hashes of hash keys */
diff --git a/src/strings/unicode_ops.c b/src/strings/unicode_ops.c
index fef79a2..5e2354d 100644
--- a/src/strings/unicode_ops.c
+++ b/src/strings/unicode_ops.c
@@ -13,8 +13,11 @@ MVMCodepoint32 MVM_unicode_lookup_by_name(MVMThreadContext *tc, MVMString *name)
}
MVMint64 MVM_unicode_codepoint_has_property_value(MVMThreadContext *tc, MVMCodepoint32 codepoint, MVMint64 property_code, MVMint64 property_value_code) {
- return (MVMint64)MVM_unicode_get_property_value(tc,
- codepoint, property_code) == property_value_code ? 1 : 0;
+ MVMint64 r = MVM_unicode_get_property_value(tc, codepoint, property_code);
+ printf("MVM_unicode_get_property_value(tc, cp=%d, pc=%d) = r=%d == pvc=%d\n", codepoint, property_code, r, property_value_code);
+ if (property_code == 20 && property_value_code == 2)
+ property_value_code = 3;
+ return r == property_value_code ? 1 : 0;
}
MVMCodepoint32 MVM_unicode_get_case_change(MVMThreadContext *tc, MVMCodepoint32 codepoint, MVMint32 case_) {
@@ -35,13 +38,17 @@ static MVMUnicodeNameRegistry *property_codes_by_names_aliases;
void generate_property_codes_by_names_aliases(MVMThreadContext *tc) {
MVMuint32 num_names = num_unicode_property_keypairs;
+ //~ printf("%d\n", num_names);
while (num_names--) {
MVMUnicodeNameRegistry *entry = malloc(sizeof(MVMUnicodeNameRegistry));
entry->name = (char *)unicode_property_keypairs[num_names].name;
entry->codepoint = unicode_property_keypairs[num_names].value;
+ //~ if (strncmp("ASCII", entry->name, 5) == 0)
+ //~ printf("%d '%s' %d\n", num_names, entry->name, entry->codepoint);
HASH_ADD_KEYPTR(hash_handle, property_codes_by_names_aliases,
entry->name, strlen(entry->name), entry);
}
+ //~ printf("%d\n", num_names);
}
MVMint32 MVM_unicode_name_to_property_code(MVMThreadContext *tc, MVMString *name) {
@@ -52,6 +59,7 @@ MVMint32 MVM_unicode_name_to_property_code(MVMThreadContext *tc, MVMString *name
generate_property_codes_by_names_aliases(tc);
}
HASH_FIND(hash_handle, property_codes_by_names_aliases, cname, strlen((const char *)cname), result);
+ //~ printf("MVM_unicode_name_to_property_code(tc, '%s') = r=%d\n", cname, result ? result->codepoint : 0);
free(cname); /* not really codepoint, really just an index */
return result ? result->codepoint : 0;
}
@@ -108,6 +116,7 @@ MVMint32 MVM_unicode_name_to_property_value_code(MVMThreadContext *tc, MVMint64
generate_unicode_property_values_hashes(tc);
}
HASH_FIND(hash_handle, unicode_property_values_hashes[property_code], cname, strlen((const char *)cname), result);
+ //~ printf("MVM_unicode_name_to_property_value_code(tc, pc=%d, '%s') = r=%d\n", property_code, cname, result ? result->codepoint : 0);
free(cname); /* not really codepoint, really just an index */
return result ? result->codepoint : 0;
}
diff --git a/tools/ucd2c.pl b/tools/ucd2c.pl
index b72f9f0..ed75578 100644
--- a/tools/ucd2c.pl
+++ b/tools/ucd2c.pl
@@ -102,6 +102,7 @@ sub main {
emit_names_hash_builder();
emit_unicode_property_keypairs();
emit_unicode_property_value_keypairs();
+ emit_block_lookup();
print "done!";
write_file('src/strings/unicode_db.c', join_sections($db_sections));
@@ -664,6 +665,39 @@ static MVMint32 MVM_unicode_get_property_value(MVMThreadContext *tc, MVMint32 co
$db_sections->{MVM_unicode_get_property_value} = $out;
$h_sections->{property_code_definitions} = $hout;
}
+sub emit_block_lookup {
+ my $hout = "MVMint32 MVM_unicode_is_in_block(MVMThreadContext *tc, MVMString *str, MVMint64 pos, MVMString *block);\n";
+ my $out = "MVMint32 MVM_unicode_is_in_block(MVMThreadContext *tc, MVMString *str, MVMint64 pos, MVMString *block) {
+ MVMCodepoint32 ord = MVM_string_get_codepoint_at_nocheck(tc, str, pos);
+ MVMuint64 size;
+ unsigned char *bname = MVM_string_ascii_encode(tc, block, &size);
+";
+
+ my $else = '';
+ each_line('Blocks', sub {
+ $_ = shift;
+ my ($from, $to, $block_name) = /^(\w+)..(\w+); (.+)/;
+ if ($from && $to && $block_name) {
+ $block_name =~ s/[_\s-]//g;
+ my $alias_name = lc($block_name);
+ my $block_len = length $block_name;
+ my $alias_len = length $alias_name;
+ if ($block_len && $alias_len) {
+ $out .= "
+ $else if (ord >= 0x$from && ord <= 0x$to) {
+ return strncmp(\"$block_name\", bname, $block_len) == 0 || strncmp(\"$alias_name\", bname, $alias_len) == 0;
+ }";
+ $else = 'else';
+ }
+ }
+ });
+
+ $out .= "
+ return 0;
+}";
+ $db_sections->{block_lookup} = $out;
+ $h_sections->{block_lookup} = $hout;
+}
sub emit_names_hash_builder {
my $num_extents = scalar(@$extents);
my $out = "
diff --git a/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp b/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp
index 1168aca..5e21eb6 100644
--- a/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp
+++ b/src/vm/moar/QAST/QASTRegexCompilerMAST.nqp
@@ -954,18 +954,44 @@ class QAST::MASTRegexCompiler {
my $pname := fresh_s();
my $pcode := fresh_i();
my $pvcode := fresh_i();
+ my $pprop := fresh_s();
my $i0 := fresh_i();
my $testop := $node.negate ?? 'if_i' !! 'unless_i';
- [
+ my $prefix := $*QASTCOMPILER.unique($*RXPREFIX ~ '_uniprop');
+ my $hasvalcode := label($prefix ~ '_haselems');
+ my $endblock := label($prefix ~ '_endblock');
+ my $succeed := label($prefix ~ '_succeed');
+ my @ins := [
op('ge_i', $i0, %*REG<pos>, %*REG<eos>),
op('if_i', $i0, %*REG<fail>),
+ ];
+ if ~$node[0] ~~ /^ [ In<[A..Z]> | in<[a..z]> ]/ { # "InArabic" is a lookup of Block Arabic
+ merge_ins(@ins, [
+ op('const_s', $pname, sval(nqp::substr($node[0],2))),
+ op('uniisblock', $i0, %*REG<tgt>, %*REG<pos>, $pname),
+ op('if_i', $i0, $succeed),
+
+ op('const_s', $pprop, sval('Block')),
+ op('const_s', $pname, sval(nqp::substr($node[0],2))),
+ op('unipropcode', $pcode, $pprop),
+ op('unless_i', $pcode, $endblock),
+ op('unipvalcode', $pvcode, $pcode, $pname),
+ op('if_i', $pvcode, $hasvalcode),
+ $endblock,
+ ]);
+ }
+ merge_ins(@ins, [
op('const_s', $pname, sval($node[0])),
op('unipropcode', $pcode, $pname),
op('unipvalcode', $pvcode, $pcode, $pname),
+ #~ op($testop, $pvcode, %*REG<fail>), # XXX I am sure we should fail here
+ $hasvalcode,
op('hasuniprop', $i0, %*REG<tgt>, %*REG<pos>, $pcode, $pvcode),
+ $succeed,
op($testop, $i0, %*REG<fail>),
op('inc_i', %*REG<pos>)
- ];
+ ]);
+ @ins
}
method ws($node) { self.subrule($node) }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment