From f5c8ba76029337f1099eab3c789e025a5351cc2e Mon Sep 17 00:00:00 2001 From: Martin Stjernholm <mast@lysator.liu.se> Date: Wed, 2 Dec 1998 13:56:47 +0100 Subject: [PATCH] diff3 got better heuristics in ambiguous cases. Rev: lib/modules/Array.pmod:1.25 Rev: src/testsuite.in:1.136 --- lib/modules/Array.pmod | 147 +++++++++++++++++++++++++---------------- src/testsuite.in | 59 +++++++++++------ 2 files changed, 128 insertions(+), 78 deletions(-) diff --git a/lib/modules/Array.pmod b/lib/modules/Array.pmod index 7059f747e0..5f69de19f7 100644 --- a/lib/modules/Array.pmod +++ b/lib/modules/Array.pmod @@ -244,75 +244,108 @@ array(array(array)) diff3 (array a, array b, array c) array(int) seq_bc = diff_longest_sequence (b, c); array(int) seq_ca = diff_longest_sequence (c, a); - // A number bigger than any valid index servers as end of array marker. - int eoa = max (sizeof (a), sizeof (b), sizeof (c)); - - array(int) ab = allocate (sizeof (a) + 1, -1); - array(int) ac = allocate (sizeof (a) + 1, -1); - ab[sizeof (a)] = ac[sizeof (a)] = eoa; - array(int) bc = allocate (sizeof (b) + 1, -1); - array(int) ba = allocate (sizeof (b) + 1, -1); - bc[sizeof (b)] = ba[sizeof (b)] = eoa; - array(int) ca = allocate (sizeof (c) + 1, -1); - array(int) cb = allocate (sizeof (c) + 1, -1); - ca[sizeof (c)] = cb[sizeof (c)] = eoa; + array(int) aeq = allocate (sizeof (a) + 1); + array(int) beq = allocate (sizeof (b) + 1); + array(int) ceq = allocate (sizeof (c) + 1); + aeq[sizeof (a)] = beq[sizeof (b)] = ceq[sizeof (c)] = 7; for (int i = 0, j = 0; j < sizeof (seq_ab); i++) - if (a[i] == b[seq_ab[j]]) ab[i] = seq_ab[j], ba[seq_ab[j]] = i, j++; + if (a[i] == b[seq_ab[j]]) aeq[i] |= 2, beq[seq_ab[j]] |= 1, j++; for (int i = 0, j = 0; j < sizeof (seq_bc); i++) - if (b[i] == c[seq_bc[j]]) bc[i] = seq_bc[j], cb[seq_bc[j]] = i, j++; + if (b[i] == c[seq_bc[j]]) beq[i] |= 2, ceq[seq_bc[j]] |= 1, j++; for (int i = 0, j = 0; j < sizeof (seq_ca); i++) - if (c[i] == a[seq_ca[j]]) ca[i] = seq_ca[j], ac[seq_ca[j]] = i, j++; + if (c[i] == a[seq_ca[j]]) ceq[i] |= 2, aeq[seq_ca[j]] |= 1, j++; array(array) ares = ({}), bres = ({}), cres = ({}); int ai = 0, bi = 0, ci = 0; - int part = 8; // Chunk partition bitfield. - - while (min (ac[ai], ab[ai], ba[bi], bc[bi], cb[ci], ca[ci]) != eoa) { - int apart = (ac[ai] == -1 && 1) | (ab[ai] == -1 && 2); - int bpart = (ba[bi] == -1 && 2) | (bc[bi] == -1 && 4); - int cpart = (cb[ci] == -1 && 4) | (ca[ci] == -1 && 1); - int newpart = apart | bpart | cpart; - - //werror ("a %3d %3d %3d %3d\n", ai, ac[ai], ab[ai], apart); - //werror ("b %3d %3d %3d %3d\n", bi, ba[bi], bc[bi], bpart); - //werror ("c %3d %3d %3d %3d\n", ci, cb[ci], ca[ci], cpart); - //werror ("part %d %d\n", part, newpart); - - if ((apart ^ bpart ^ cpart) == 7 && !(apart & bpart & cpart) && - apart && bpart && cpart) { - // Solve cyclically interlocking equivalences by arbitrary - // breaking one of them. - if (ac[ai] != -1) ca[ac[ai]] = -1, ac[ai] = -1; - if (ab[ai] != -1) ba[ab[ai]] = -1, ab[ai] = -1; - apart = 3; - } + int prevodd = -2; - if ((part & newpart) == newpart) { - // If the previous block had the same equivalence partition or - // was a three-part conflict, we should tack any singleton - // equivalences we have onto it. - if (apart == 3) ares[-1] += ({a[ai++]}); - if (bpart == 6) bres[-1] += ({b[bi++]}); - if (cpart == 5) cres[-1] += ({c[ci++]}); - } + int i = 10; + while (i-- && !(aeq[ai] & beq[bi] & ceq[ci] & 4)) { + array empty = ({}), apart = empty, bpart = empty, cpart = empty; - if (newpart != part) { - // Start a new block if the equivalence partition doesn't match - // the previous block. - part = newpart; - ares += ({({})}), bres += ({({})}), cres += ({({})}); + if (aeq[ai] == 2 && beq[bi] == 1) { // a and b are equal. + do apart += ({a[ai++]}), bi++; while (aeq[ai] == 2 && beq[bi] == 1); + bpart = apart; + while (!ceq[ci]) cpart += ({c[ci++]}); + prevodd = 2; + } + else if (beq[bi] == 2 && ceq[ci] == 1) { // b and c are equal. + do bpart += ({b[bi++]}), ci++; while (beq[bi] == 2 && ceq[ci] == 1); + cpart = bpart; + while (!aeq[ai]) apart += ({a[ai++]}); + prevodd = 0; + } + else if (ceq[ci] == 2 && aeq[ai] == 1) { // c and a are equal. + do cpart += ({c[ci++]}), ai++; while (ceq[ci] == 2 && aeq[ai] == 1); + apart = cpart; + while (!beq[bi]) bpart += ({b[bi++]}); + prevodd = 1; } + else if (aeq[ai] & beq[bi] & ceq[ci] == 3) { // All are equal. + do apart += ({a[ai++]}), bi++, ci++; while (aeq[ai] & beq[bi] & ceq[ci] == 3); + cpart = bpart = apart; + prevodd = -1; + } + else { + // Haven't got any equivalences in this block. Avoid adjacent + // complementary blocks (e.g. ({({"foo"}),({}),({})}) next to + // ({({}),({"bar"}),({"bar"})})). Besides that, leave the + // odd-one-out sequence empty in a block where two are equal. + + if (aeq[ai] & beq[bi] & ceq[ci]) { + // Got cyclically interlocking equivalences. Have to break one + // of them. Prefer the shortest. + int which, newblock, mask, i, oi; + array(int) eq, oeq; + array arr; + for (i = 0;; i++) + if (aeq[ai] != aeq[ai + i]) { + which = 0, newblock = prevodd != 0 && (prevodd == -2 || sizeof (ares[-1])); + mask = aeq[ai] ^ 3, i = ai, eq = aeq, arr = a; + if (mask == 1) oi = bi, oeq = beq; else oi = ci, oeq = ceq; + break; + } + else if (beq[bi] != beq[bi + i]) { + which = 1, newblock = prevodd != 1 && (prevodd == -2 || sizeof (bres[-1])); + mask = beq[bi] ^ 3, i = bi, eq = beq, arr = b; + if (mask == 1) oi = ci, oeq = ceq; else oi = ai, oeq = aeq; + break; + } + else if (ceq[ci] != ceq[ci + i]) { + which = 2, newblock = prevodd != 2 && (prevodd == -2 || sizeof (cres[-1])); + mask = ceq[ci] ^ 3, i = ci, eq = ceq, arr = c; + if (mask == 1) oi = ai, oeq = aeq; else oi = bi, oeq = beq; + break; + } + if (newblock) + ares += ({empty}), bres += ({empty}), cres += ({empty}), prevodd = -1; + while (oeq[oi] != mask) oi++; + array part = ({}); + mask ^= 3; + do part += ({arr[i++]}), oeq[oi++] = 0; while (eq[i] == mask); + switch (which) { + case 0: ai = i; ares[-1] += part; break; + case 1: bi = i; bres[-1] += part; break; + case 2: ci = i; cres[-1] += part; break; + } + continue; + } - // Add any composite equivalences. Wait with the singletons (this - // may cause an extra iteration, but the necessary conditions to - // prevent that are tricky). - if (!part) ares[-1] = bres[-1] = cres[-1] += ({a[ai++]}), bi++, ci++; - else if (part == 3 && bpart && cpart) bres[-1] = cres[-1] += ({b[bi++]}), ci++; - else if (part == 6 && cpart && apart) cres[-1] = ares[-1] += ({c[ci++]}), ai++; - else if (part == 5 && apart && bpart) ares[-1] = bres[-1] += ({a[ai++]}), bi++; + else { + switch (prevodd) { + case 0: apart = ares[-1], ares[-1] = ({}); break; + case 1: bpart = bres[-1], bres[-1] = ({}); break; + case 2: cpart = cres[-1], cres[-1] = ({}); break; + } + prevodd = -1; + while (!aeq[ai]) apart += ({a[ai++]}); + while (!beq[bi]) bpart += ({b[bi++]}); + while (!ceq[ci]) cpart += ({c[ci++]}); + } + } - //werror ("%O\n", ({ares[-1], bres[-1], cres[-1]})); + ares += ({apart}), bres += ({bpart}), cres += ({cpart}); } return ({ares, bres, cres}); diff --git a/src/testsuite.in b/src/testsuite.in index af627daa0c..f2aae331f3 100644 --- a/src/testsuite.in +++ b/src/testsuite.in @@ -1,4 +1,4 @@ -stest_true([["$Id: testsuite.in,v 1.135 1998/12/01 15:23:29 mast Exp $"]]) +stest_true([["$Id: testsuite.in,v 1.136 1998/12/02 12:56:47 mast Exp $"]]) cond([[all_constants()->_verify_internals]], [[ test_do(_verify_internals()) @@ -2255,15 +2255,7 @@ test_equal(Array.diff("123.org"/"","http://123.org/"/""), ({ ({ ({}), "123.org"/"", ({}) }), ({ "http://"/"", "123.org"/"", ({ "/" }) }) })) -// - Array.diff3 (these tests do not work for a really optimal diff3 implementation) -test_equal(Array.diff3(({" "}),({"a"}),({"b"})), - ({({({" "})}), - ({({"a"})}), - ({({"b"})})})) -test_equal(Array.diff3(({}),({"a"}),({"b"})), - ({({({ })}), - ({({"a"})}), - ({({"b"})})})) +// - Array.diff3 (note that the results would be different for a really optimal diff3) test_equal(Array.diff3(({}),({"a","c"}),({"b","y"})), ({({({ })}), ({({"a","c"})}), @@ -2276,22 +2268,43 @@ test_equal(Array.diff3(({"a"}),({"a","c"}),({"b"})), ({({({"a"}),({ })}), ({({"a"}),({"c"})}), ({({ }),({"b"})})})) -test_equal(Array.diff3(({"a","c","d"}),({"a","d","d"}),({"a","b"})), - ({({({"a"}),({"c"}),({"d"}),({ })}), - ({({"a"}),({ }),({"d"}),({"d"})}), - ({({"a"}),({"b"}),({ }),({ })})})) test_equal(Array.diff3(({"a","d"}),({"a","d"}),({"a","b"})), ({({({"a"}),({"d"})}), ({({"a"}),({"d"})}), ({({"a"}),({"b"})})})) -test_equal(Array.diff3(({"a","b"}),({"b","c"}),({"c","a"})), - ({({({"a"}),({"b"}),({ }),({ })}), - ({({ }),({"b"}),({"c"}),({ })}), - ({({ }),({ }),({"c"}),({"a"})})})) test_equal(Array.diff3(({"a","b"}),({"b"}),({"c","b"})), ({({({"a"}),({"b"})}), ({({ }),({"b"})}), ({({"c"}),({"b"})})})) +// The following is not optimal. +test_equal(Array.diff3(({"a","b"}),({"b"}),({"b","b"})), + ({({({"a"}),({"b"}),({ })}), + ({({ }),({"b"}),({ })}), + ({({ }),({"b"}),({"b"})})})) +test_equal(Array.diff3(({"a","c","d"}),({"a","d","d"}),({"a","b"})), + ({({({"a"}),({"c"}),({"d"}),({ })}), + ({({"a"}),({ }),({"d"}),({"d"})}), + ({({"a"}),({"b"}),({ }),({ })})})) +test_equal(Array.diff3(({"a","b","x"}),({"y","b","c","x"}),({"y","c","a"})), + ({({({"a"}),({"b"}),({ }),({"x"})}), + ({({"y"}),({"b"}),({"c"}),({"x"})}), + ({({"y"}),({ }),({"c"}),({"a"})})})) +test_equal(Array.diff3(({"a","a","b","b"}),({"b","b","c","c"}),({"c","c","a","a"})), + ({({({"a","a"}),({"b","b"}),({ }),({ })}), + ({({ }),({"b","b"}),({"c","c"}),({ })}), + ({({ }),({ }),({"c","c"}),({"a","a"})})})) +test_equal(Array.diff3(({"a","a","b"}),({"b","c"}),({"c","a","a"})), + ({({({ }),({ }),({"a","a"}),({"b"})}), + ({({"b"}),({"c"}),({ }),({ })}), + ({({ }),({"c"}),({"a","a"}),({ })})})) +test_equal(Array.diff3(({"x","x","x","a"}),({"y","b"}),({"x","x","x","c"})), + ({({({"x","x","x"}),({"a" })}), + ({({ }),({"y","b"})}), + ({({"x","x","x"}),({"c" })})})) +test_equal(Array.diff3(({"a","x","x","x","a"}),({"b","y","y","b"}),({"c","x","x","x","c"})), + ({({({"a" }),({"x","x","x"}),({"a"})}), + ({({"b","y","y","b"}),({ }),({ })}), + ({({"c" }),({"x","x","x"}),({"c"})})})) test_equal(Array.diff3(({"a","b","c","d"}),({"x","x","x","d"}),({"a","y","y","y"})), ({({({"a"}),({"b","c" }),({"d"})}), ({({ }),({"x","x","x"}),({"d"})}), @@ -2301,9 +2314,13 @@ test_equal(Array.diff3(({"a","b","c","d"}),({"a","x","x","d"}),({"a","y","y","y" ({({"a"}),({"x","x" }),({"d"})}), ({({"a"}),({"y","y","y"}),({ })})})) test_equal(Array.diff3(({"a","b","c","d"}),({"x","x","x","b"}),({"a","y","y","y"})), - ({({({"a"}),({ }),({"b"}),({"c","d"})}), - ({({ }),({"x","x","x"}),({"b"}),({ })}), - ({({"a"}),({"y","y","y"}),({ }),({ })})})) + ({({({"a" }),({"b"}),({"c","d" })}), + ({({"x","x","x"}),({"b"}),({ })}), + ({({"a" }),({ }),({"y","y","y"})})})) +test_equal(Array.diff3(({"a","b","c","d"}),({"x","x","x","c"}),({"a","y","y","y"})), + ({({({"a"}),({"b" }),({"c"}),({"d"})}), + ({({ }),({"x","x","x"}),({"c"}),({ })}), + ({({"a"}),({"y","y","y"}),({ }),({ })})})) test_equal(Array.diff3(({"a","b","b","c","d"}),({"z","a","b","b","x"}),({"z","b","c","x"})), ({({({ }),({"a"}),({"b"}),({"b"}),({"c"}),({"d"})}), ({({"z"}),({"a"}),({"b"}),({"b"}),({ }),({"x"})}), -- GitLab