Skip to content

Instantly share code, notes, and snippets.

@samcv
Created February 6, 2017 12:16

Revisions

  1. samcv renamed this gist Feb 6, 2017. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  2. samcv created this gist Feb 6, 2017.
    134 changes: 134 additions & 0 deletions -
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,134 @@
    diff --git a/src/core/interp.c b/src/core/interp.c
    index efc1f853..752cde43 100644
    --- a/src/core/interp.c
    +++ b/src/core/interp.c
    @@ -1464,7 +1464,9 @@ void MVM_interp_run(MVMThreadContext *tc, void (*initial_invoke)(MVMThreadContex
    goto NEXT;
    OP(unicmp_s):
    GET_REG(cur_op, 0).i64 = MVM_unicode_string_compare(tc,
    - GET_REG(cur_op, 2).s, GET_REG(cur_op, 4).s);
    + GET_REG(cur_op, 2).s, GET_REG(cur_op, 4).s,
    + GET_REG(cur_op, 6).i64, GET_REG(cur_op, 8).i64,
    + GET_REG(cur_op, 10).i64);
    cur_op += 12;
    goto NEXT;
    OP(eqat_s):
    diff --git a/src/strings/unicode_ops.c b/src/strings/unicode_ops.c
    index 6d80b7b9..b4416c1b 100644
    --- a/src/strings/unicode_ops.c
    +++ b/src/strings/unicode_ops.c
    @@ -24,7 +24,7 @@ MVMint32 MVM_unicode_collation_tertiary (MVMThreadContext *tc, MVMint32 codepoin
    * then use the decomposed codepoint's weights. */
    MVMint64 MVM_unicode_string_compare
    (MVMThreadContext *tc, MVMString *a, MVMString *b,
    - MVMint32 collation_mode, MVMint32 lang_mode, MVMint32 country_mode) {
    + MVMint64 collation_mode, MVMint64 lang_mode, MVMint64 country_mode) {
    MVMStringIndex alen, blen;
    /* Iteration variables */
    MVMGraphemeIter a_gi, b_gi;
    @@ -48,7 +48,16 @@ MVMint64 MVM_unicode_string_compare
    /* Initialize a grapheme iterator */
    MVM_string_gi_init(tc, &a_gi, a);
    MVM_string_gi_init(tc, &b_gi, b);
    -
    + fprintf(stderr, "collation_mode[%i] lang_mode[%i] country_mode[%i]\n", collation_mode, lang_mode, country_mode);
    + if ( collation_mode & 1) {
    + fprintf(stderr, "primary\n");
    + }
    + if (collation_mode & 2) {
    + fprintf(stderr, "secondary\n");
    + }
    + if (collation_mode & 4) {
    + fprintf(stderr, "tertiary\n");
    + }
    /* Otherwise, need to iterate by grapheme */
    while (MVM_string_gi_has_more(tc, s_has_more_gi)) {
    ai = MVM_string_gi_get_grapheme(tc, &a_gi);
    @@ -70,15 +79,21 @@ MVMint64 MVM_unicode_string_compare

    /* result_a is the base character of the grapheme. */
    result_a = synth_a->base;
    - ai_coll_val += MVM_unicode_collation_primary(tc, result_a);
    - ai_coll_val += MVM_unicode_collation_secondary(tc, result_a);
    - ai_coll_val += MVM_unicode_collation_tertiary(tc, result_a);
    - while (a_ci.synth_codes) {
    - /* Take the current combiner as the result_a. */
    - result_a = a_ci.synth_codes[a_ci.visited_synth_codes];
    + if (collation_mode & 1)
    ai_coll_val += MVM_unicode_collation_primary(tc, result_a);
    + if (collation_mode & 2)
    ai_coll_val += MVM_unicode_collation_secondary(tc, result_a);
    + if (collation_mode & 4)
    ai_coll_val += MVM_unicode_collation_tertiary(tc, result_a);
    + while (a_ci.synth_codes) {
    + /* Take the current combiner as the result_a. */
    + result_a = a_ci.synth_codes[a_ci.visited_synth_codes];
    + if (collation_mode & 1)
    + ai_coll_val += MVM_unicode_collation_primary(tc, result_a);
    + if (collation_mode & 2)
    + ai_coll_val += MVM_unicode_collation_secondary(tc, result_a);
    + if (collation_mode & 4)
    + ai_coll_val += MVM_unicode_collation_tertiary(tc, result_a);
    /* If we've seen all of the synthetics, clear up so we'll take another
    * grapheme next time around. */
    a_ci.visited_synth_codes++;
    @@ -87,9 +102,12 @@ MVMint64 MVM_unicode_string_compare
    }
    }
    else {
    - ai_coll_val += MVM_unicode_collation_primary(tc, ai);
    - ai_coll_val += MVM_unicode_collation_secondary(tc, ai);
    - ai_coll_val += MVM_unicode_collation_tertiary(tc, ai);
    + if (collation_mode & 1)
    + ai_coll_val += MVM_unicode_collation_primary(tc, ai);
    + if (collation_mode & 2)
    + ai_coll_val += MVM_unicode_collation_secondary(tc, ai);
    + if (collation_mode & 4)
    + ai_coll_val += MVM_unicode_collation_tertiary(tc, ai);
    }
    if (bi < 0) {
    MVMCodepointIter b_ci;
    @@ -105,15 +123,21 @@ MVMint64 MVM_unicode_string_compare

    /* result_b is the base character of the grapheme. */
    result_b = synth_b->base;
    - bi_coll_val += MVM_unicode_collation_primary(tc, result_b);
    - bi_coll_val += MVM_unicode_collation_secondary(tc, result_b);
    - bi_coll_val += MVM_unicode_collation_tertiary(tc, result_b);
    - while (b_ci.synth_codes) {
    - /* Take the current combiner as the result_b. */
    - result_b = b_ci.synth_codes[b_ci.visited_synth_codes];
    + if (collation_mode & 1)
    bi_coll_val += MVM_unicode_collation_primary(tc, result_b);
    + if (collation_mode & 2)
    bi_coll_val += MVM_unicode_collation_secondary(tc, result_b);
    + if (collation_mode & 4)
    bi_coll_val += MVM_unicode_collation_tertiary(tc, result_b);
    + while (b_ci.synth_codes) {
    + /* Take the current combiner as the result_b. */
    + result_b = b_ci.synth_codes[b_ci.visited_synth_codes];
    + if (collation_mode & 1)
    + bi_coll_val += MVM_unicode_collation_primary(tc, result_b);
    + if (collation_mode & 2)
    + bi_coll_val += MVM_unicode_collation_secondary(tc, result_b);
    + if (collation_mode & 4)
    + bi_coll_val += MVM_unicode_collation_tertiary(tc, result_b);
    /* If we've seen all of the synthetics, clear up so we'll take another
    * grapheme next time around. */
    b_ci.visited_synth_codes++;
    @@ -122,9 +146,12 @@ MVMint64 MVM_unicode_string_compare
    }
    }
    else {
    - bi_coll_val += MVM_unicode_collation_primary(tc, bi);
    - bi_coll_val += MVM_unicode_collation_secondary(tc, bi);
    - bi_coll_val += MVM_unicode_collation_tertiary(tc, bi);
    + if (collation_mode & 1)
    + bi_coll_val += MVM_unicode_collation_primary(tc, bi);
    + if (collation_mode & 2)
    + bi_coll_val += MVM_unicode_collation_secondary(tc, bi);
    + if (collation_mode & 4)
    + bi_coll_val += MVM_unicode_collation_tertiary(tc, bi);
    }
    if ( (ai_coll_val != 0 && bi_coll_val != 0) && (ai_coll_val != bi_coll_val) ) {
    return ai_coll_val < bi_coll_val ? -3 : 3;