From bde0ef0256600dc1c52f960ecdf581d6b2cda261 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Grubbstr=C3=B6m=20=28Grubba=29?= <grubba@grubba.org> Date: Tue, 19 May 1998 20:30:56 +0200 Subject: [PATCH] Optimized the dynamic diff() a bit more, but it's still slow. Rev: src/builtin_functions.c:1.108 --- src/builtin_functions.c | 242 ++++++++++++++++++---------------------- 1 file changed, 106 insertions(+), 136 deletions(-) diff --git a/src/builtin_functions.c b/src/builtin_functions.c index 71f922440f..3fdf8d0be6 100644 --- a/src/builtin_functions.c +++ b/src/builtin_functions.c @@ -4,7 +4,7 @@ ||| See the files COPYING and DISCLAIMER for more information. \*/ #include "global.h" -RCSID("$Id: builtin_functions.c,v 1.107 1998/05/19 17:25:11 grubba Exp $"); +RCSID("$Id: builtin_functions.c,v 1.108 1998/05/19 18:30:56 grubba Exp $"); #include "interpret.h" #include "svalue.h" #include "pike_macros.h" @@ -2169,104 +2169,70 @@ static struct array *diff_longest_sequence(struct array *cmptbl, int blen) * This makes it faster than the G-M algorithm on binary data, * but slower on ascii data. */ -static struct array *diff_dyn_longest_sequence(struct array *a, - struct array *b) +static struct array *diff_dyn_longest_sequence(struct array *cmptbl, int blen) { struct array *res = NULL; struct diff_magic_link_head *table = NULL; struct diff_magic_link_pool *dml_pool = NULL; struct diff_magic_link *dml; - unsigned int sa = (unsigned int)a->size; - unsigned int sb = (unsigned int)b->size; - unsigned int ia; - unsigned int ib; + unsigned int sz = (unsigned int)cmptbl->size; + unsigned int i; unsigned int off1 = 0; - unsigned int off2; - unsigned int tmp; - - if (sa <= sb) { - off2 = sa+1; - table = calloc(sizeof(struct diff_magic_link_head)*2, off2); - if (!table) { - error("diff_dyn_longest_sequence(): Out of memory"); - } + unsigned int off2 = blen + 1; + + table = calloc(sizeof(struct diff_magic_link_head)*2, off2); + if (!table) { + error("diff_dyn_longest_sequence(): Out of memory"); + } + + /* FIXME: Assumes NULL is represented with all zeroes */ + /* NOTE: Scan strings backwards to get the same result as the G-M + * algorithm. + */ + for (i = sz; i--;) { + struct array *boff = cmptbl->item[i].u.array; + +#ifdef DIFF_DEBUG + fprintf(stderr, " i:%d\n", i); +#endif /* DIFF_DEBUG */ - /* FIXME: Assumes NULL is represented with all zeroes */ - /* NOTE: Scan strings backwards to get the same result as the G-M - * algorithm. - */ - for (ib = sb; ib--;) { - tmp = off1; + if (boff->size) { + unsigned int bi; + unsigned int base = blen; + unsigned int tmp = off1; off1 = off2; off2 = tmp; - for (ia = sa; ia--;) { - int res = is_eq(b->item + ib, a->item + ia); - if (table[off1 + ia].link) { - if (!--(table[off1 + ia].link->refs)) { - dml_delete(dml_pool, table[off1 + ia].link); - } - } - if (res) { - /* Equal */ - - table[off1 + ia].depth = table[off2 + ia + 1].depth + 1; - dml = (table[off1 + ia].link = dml_new(&dml_pool)); - if (!dml) { - dml_free_pools(dml_pool); - free(table); - error("diff_dyn_longest_sequence(): Out of memory"); - } - dml->refs = 1; - dml->prev = table[off2 + ia + 1].link; - if (dml->prev) { - dml->prev->refs++; - } - dml->x = ib; - } else { + for (bi = boff->size; bi--;) { + unsigned int ib = boff->item[bi].u.integer; + +#ifdef DIFF_DEBUG + fprintf(stderr, " Range [%d - %d] differ\n", base - 1, ib + 1); +#endif /* DIFF_DEBUG */ + while ((--base) > ib) { /* Differ */ + if (table[off1 + base].link) { + if (!--(table[off1 + base].link->refs)) { + dml_delete(dml_pool, table[off1 + base].link); + } + } /* FIXME: Should it be > or >= here to get the same result * as with the G-M algorithm? */ - if (table[off2 + ia].depth > table[off1 + ia + 1].depth) { - table[off1 + ia].depth = table[off2 + ia].depth; - dml = (table[off1 + ia].link = table[off2 + ia].link); + if (table[off2 + base].depth > table[off1 + base + 1].depth) { + table[off1 + base].depth = table[off2 + base].depth; + dml = (table[off1 + base].link = table[off2 + base].link); } else { - table[off1 + ia].depth = table[off1 + ia + 1].depth; - dml = (table[off1 + ia].link = table[off1 + ia + 1].link); + table[off1 + base].depth = table[off1 + base + 1].depth; + dml = (table[off1 + base].link = table[off1 + base + 1].link); } if (dml) { dml->refs++; } } - } - } - } else { - /* Do the mirror version */ - off2 = sb+1; - table = calloc(sizeof(struct diff_magic_link_head)*2, off2); - if (!table) { - error("diff_dyn_longest_sequence(): Out of memory"); - } - - /* FIXME: Assumes NULL is represented with all zeroes */ - /* NOTE: Scan strings backwards to get the same result as the G-M - * algorithm. - */ - for (ia = sa; ia--;) { - tmp = off1; - off1 = off2; - off2 = tmp; - -#ifdef DIFF_DEBUG - fprintf(stderr, " ia:%d\n", ia); -#endif /* DIFF_DEBUG */ - - for (ib = sb; ib--;) { - int res = is_eq(b->item + ib, a->item + ia); - + /* Equal */ #ifdef DIFF_DEBUG - fprintf(stderr, " ib:%d ", ib); + fprintf(stderr, " Equal\n"); #endif /* DIFF_DEBUG */ if (table[off1 + ib].link) { @@ -2274,57 +2240,56 @@ static struct array *diff_dyn_longest_sequence(struct array *a, dml_delete(dml_pool, table[off1 + ib].link); } } - if (res) { - /* Equal */ + table[off1 + ib].depth = table[off2 + ib + 1].depth + 1; + dml = (table[off1 + ib].link = dml_new(&dml_pool)); + if (!dml) { + dml_free_pools(dml_pool); + free(table); + error("diff_dyn_longest_sequence(): Out of memory"); + } + dml->refs = 1; + dml->prev = table[off2 + ib + 1].link; + if (dml->prev) { + dml->prev->refs++; + } + dml->x = ib; + } #ifdef DIFF_DEBUG - fprintf(stderr, "Equal\n"); + fprintf(stderr, " Range [0 - %d] differ\n", base-1); #endif /* DIFF_DEBUG */ - - table[off1 + ib].depth = table[off2 + ib + 1].depth + 1; - dml = (table[off1 + ib].link = dml_new(&dml_pool)); - if (!dml) { - dml_free_pools(dml_pool); - free(table); - error("diff_dyn_longest_sequence(): Out of memory"); + while (base--) { + /* Differ */ + if (table[off1 + base].link) { + if (!--(table[off1 + base].link->refs)) { + dml_delete(dml_pool, table[off1 + base].link); } - dml->refs = 1; - dml->prev = table[off2 + ib + 1].link; - if (dml->prev) { - dml->prev->refs++; - } - dml->x = ib; + } + /* FIXME: Should it be > or >= here to get the same result + * as with the G-M algorithm? + */ + if (table[off2 + base].depth > table[off1 + base + 1].depth) { + table[off1 + base].depth = table[off2 + base].depth; + dml = (table[off1 + base].link = table[off2 + base].link); } else { - /* Differ */ -#ifdef DIFF_DEBUG - fprintf(stderr, "Differ\n"); -#endif /* DIFF_DEBUG */ - /* FIXME: Should it be > or >= here to get the same result - * as with the G-M algorithm? - */ - if (table[off2 + ib].depth > table[off1 + ib + 1].depth) { - table[off1 + ib].depth = table[off2 + ib].depth; - dml = (table[off1 + ib].link = table[off2 + ib].link); - } else { - table[off1 + ib].depth = table[off1 + ib + 1].depth; - dml = (table[off1 + ib].link = table[off1 + ib + 1].link); - } - if (dml) { - dml->refs++; - } + table[off1 + base].depth = table[off1 + base + 1].depth; + dml = (table[off1 + base].link = table[off1 + base + 1].link); + } + if (dml) { + dml->refs++; } } } } /* Convert table into res */ - sa = table[off1].depth; + sz = table[off1].depth; dml = table[off1].link; free(table); #ifdef DIFF_DEBUG - fprintf(stderr, "Result array size:%d\n", sa); + fprintf(stderr, "Result array size:%d\n", sz); #endif /* DIFF_DEBUG */ - res = allocate_array(sa); + res = allocate_array(sz); if (!res) { if (dml_pool) { dml_free_pools(dml_pool); @@ -2332,24 +2297,24 @@ static struct array *diff_dyn_longest_sequence(struct array *a, error("diff_dyn_longest_sequence(): Out of memory"); } - ia = 0; + i = 0; while(dml) { #ifdef DEBUG - if (ia >= sa) { + if (i >= sz) { fatal("Consistency error in diff_dyn_longest_sequence()\n"); } #endif /* DEBUG */ #ifdef DIFF_DEBUG - fprintf(stderr, " %02d: %d\n", ia, dml->x); + fprintf(stderr, " %02d: %d\n", i, dml->x); #endif /* DIFF_DEBUG */ - res->item[ia].type = T_INT; - res->item[ia].subtype = 0; - res->item[ia].u.integer = dml->x; + res->item[i].type = T_INT; + res->item[i].subtype = 0; + res->item[i].u.integer = dml->x; dml = dml->prev; - ia++; + i++; } #ifdef DEBUG - if (ia != sa) { + if (i != sz) { fatal("Consistency error in diff_dyn_longest_sequence()\n"); } #endif /* DEBUG */ @@ -2443,27 +2408,27 @@ void f_diff(INT32 args) cmptbl = diff_compare_table(sp[-args].u.array, sp[1-args].u.array, &uniq); + push_array(cmptbl); +#ifdef ENABLE_DYN_DIFF if (uniq * 100 > sp[1-args].u.array->size) { +#endif /* ENABLE_DYN_DIFF */ #ifdef DIFF_DEBUG fprintf(stderr, "diff: Using G-M algorithm, u:%d, s:%d\n", uniq, sp[1-args].u.array->size); #endif /* DIFF_DEBUG */ - push_array(cmptbl); - seq=diff_longest_sequence(cmptbl, sp[1-1-args].u.array->size); - push_array(seq); - - diff=diff_build(sp[-2-args].u.array,sp[1-2-args].u.array,seq); + seq = diff_longest_sequence(cmptbl, sp[1-1-args].u.array->size); +#ifdef ENABLE_DYN_DIFF } else { #ifdef DIFF_DEBUG fprintf(stderr, "diff: Using dyn algorithm, u:%d, s:%d\n", uniq, sp[1-args].u.array->size); #endif /* DIFF_DEBUG */ - free_array(cmptbl); - seq = diff_dyn_longest_sequence(sp[-args].u.array, sp[1-args].u.array); - push_array(seq); - - diff = diff_build(sp[-1-args].u.array, sp[1-1-args].u.array, seq); - } + seq = diff_dyn_longest_sequence(cmptbl, sp[1-1-args].u.array->size); + } +#endif /* ENABLE_DYN_DIFF */ + push_array(seq); + + diff=diff_build(sp[-2-args].u.array,sp[1-2-args].u.array,seq); pop_n_elems(2+args); push_array(diff); @@ -2498,10 +2463,11 @@ void f_diff_longest_sequence(INT32 args) sp[1-args].type!=T_ARRAY) PIKE_ERROR("diff_longest_sequence", "Bad arguments.\n", sp, args); - cmptbl=diff_compare_table(sp[-args].u.array,sp[1-args].u.array, NULL); + cmptbl = diff_compare_table(sp[-args].u.array,sp[1-args].u.array, NULL); push_array(cmptbl); /* Note that the stack is one element off here. */ - seq=diff_longest_sequence(cmptbl, sp[1-1-args].u.array->size); + seq = diff_longest_sequence(cmptbl, sp[1-1-args].u.array->size); + pop_n_elems(args+1); push_array(seq); } @@ -2509,6 +2475,7 @@ void f_diff_longest_sequence(INT32 args) void f_diff_dyn_longest_sequence(INT32 args) { struct array *seq; + struct array *cmptbl; if (args<2) PIKE_ERROR("diff_dyn_longest_sequence", "Too few arguments.\n", @@ -2518,7 +2485,10 @@ void f_diff_dyn_longest_sequence(INT32 args) sp[1-args].type!=T_ARRAY) PIKE_ERROR("diff_dyn_longest_sequence", "Bad arguments.\n", sp, args); - seq = diff_dyn_longest_sequence(sp[-args].u.array, sp[1-args].u.array); + cmptbl=diff_compare_table(sp[-args].u.array,sp[1-args].u.array, NULL); + push_array(cmptbl); + /* Note that the stack is one element off here. */ + seq = diff_dyn_longest_sequence(cmptbl, sp[1-1-args].u.array->size); pop_n_elems(args); push_array(seq); -- GitLab