local-to-global.c 26.4 KB
Newer Older
inge's avatar
inge committed
1
2
3
/*
 * File: local_to_global.c
 *
4
 * Copyright 1996-1999 Inge Wallin, Per Cederqvist
inge's avatar
inge committed
5
6
7
 */


David Byers's avatar
David Byers committed
8
9
10
11
12
13


#ifdef HAVE_CONFIG_H
#  include <config.h>
#endif

14
15
16
17
18
19
20
#include <assert.h>
#ifdef HAVE_STDLIB_H
#  include <stdlib.h>
#endif
#ifndef NULL
#  include <stdio.h>
#endif
21
22
23
24
25
26

#include <sys/types.h>
#include <time.h>

#include "s-string.h"

inge's avatar
inge committed
27
#include "kom-types.h"
Per Cederqvist's avatar
Per Cederqvist committed
28
#include "local-to-global.h"
29
30
31
#include "log.h"
#include "ram-parse.h"
#include "ram-output.h"
32
33
34
35
36
#include "lyskomd.h"
#include "server/smalloc.h"

/* The test suite requires L2G_BLOCKSIZE to be 10, but that small
   blocks will result in far too much malloc-overhead.  A value of at
37
38
39
   least 100 is probably needed for reasonable performance.  The user
   can call l2g_set_block_size to set this to a reasonable value.  */
static int L2G_BLOCKSIZE = -1;
40
41
42
43
44
45
46
47
48
49


struct l2g_block_info {
    /* An index into key_block and value_block that indicates the
       first free spot.  This can also be thought of as the number of
       entries in key_block and value_block that are actually in use. */
    int first_free;

    /* Number of entries in the block that contain the value 0.  For
       purposes of calculating this value, data past the end of the
Per Cederqvist's avatar
Per Cederqvist committed
50
51
       block is counted as zeroes, so that this->first_free +
       this->zeroes is always at least L2G_BLOCKSIZE.  */
52
53
    int zeroes;

Per Cederqvist's avatar
Per Cederqvist committed
54
55
    /* First local text no in this block.  Note: this does not
       increase if the first entry of the block is removed. */
56
57
58
59
60
61
62
63
64
    Local_text_no start;

    /* NULL if this is a dense block, or a block of L2G_BLOCKSIZE
       Local_text_nos if this is a sparse block. */
    Local_text_no *key_block;

    /* A block of L2G_BLOCKSIZE Text_nos. */
    Text_no *value_block;
};
inge's avatar
inge committed
65
66


67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
static long nr_constructs = 0;
static long nr_l2gs = 0;
static long nr_l2gs_peak = 0;
static long nr_destructs = 0;
static long nr_clears = 0;
static long nr_copies = 0;
static long nr_joins = 0;
static long nr_joined_blocks = 0;
static long nr_blocks = 0;
static long nr_blocks_peak = 0;
static long nr_blocks_sparse = 0;
static long nr_blocks_sparse_peak = 0;
static long sparse_skip_cost = 0;
static long nr_sparse_compactions = 0;
static long nr_sparsifications = 0;



inge's avatar
inge committed
85
86
87
88
89
/* ================================================================ */
/* ====                     Static functions                   ==== */
/* ================================================================ */


90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
static inline int
is_dense(const struct l2g_block_info *binfo)
{
    return binfo->key_block == NULL;
}

static inline int
is_empty(const struct l2g_block_info *binfo)
{
    return binfo->zeroes == L2G_BLOCKSIZE;
}

static inline Local_text_no
key_value(const struct l2g_block_info *binfo,
	  int index)
{
    if (is_dense(binfo))
	return binfo->start + index;
    else
	return binfo->key_block[index];
}


static inline int
sparse_skip_deleted(const struct l2g_block_info *binfo,
		    int i)
{
    while (i < binfo->first_free && binfo->value_block[i] == 0)
118
119
    {
	++sparse_skip_cost;
120
	++i;
121
    }
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176

    return i;
}


/* Search for LNO in BINFO, and return the index for it.  BINFO must
   be a non-empty sparse block.  If LNO isn't present in the block
   this will return the index of next higher entry.  If LNO is larger
   than the last entry, the index past the largest entry will be
   returned.  This function never returns the index of a deleted entry. */
static inline int
sparse_locate_value(const struct l2g_block_info *binfo,
		    Local_text_no lno)
{
    int i;

    assert(!is_empty(binfo));
    assert(!is_dense(binfo));
    
    /* FIXME: implement a binary search instead. */
    for (i = 0; i < binfo->first_free && binfo->key_block[i] < lno; ++i)
	continue;

    /* Skip any lingering deleted entries. */
    i = sparse_skip_deleted(binfo, i);

    assert(i == binfo->first_free || binfo->key_block[i] >= lno);
    assert(i == binfo->first_free || binfo->value_block[i] != 0);

    return i;
}

static void
sparse_compact(struct l2g_block_info *binfo)
{
    int from, to;

    if (binfo->first_free + binfo->zeroes == L2G_BLOCKSIZE)
	return;

    assert(binfo->first_free + binfo->zeroes > L2G_BLOCKSIZE);

    for (from = to = 0; from < binfo->first_free; ++from)
	if (binfo->value_block[from] != 0)
	{
	    if (from != to)
	    {
		binfo->value_block[to] = binfo->value_block[from];
		binfo->key_block[to] = binfo->key_block[from];
	    }
	    ++to;
	}

    binfo->first_free = to;
    assert(binfo->first_free + binfo->zeroes == L2G_BLOCKSIZE);
177
    ++nr_sparse_compactions;
178
179
180
}


inge's avatar
inge committed
181
182
183
184
185
186
187
188
/*
 * Add a new block to the Local_to_global L2G.
 * Since we always add consecutively, we always start out by making 
 * a new block dense.
 *
 * Return a pointer to the newly created block info.
 */

189
static struct l2g_block_info *
inge's avatar
inge committed
190
191
add_block(Local_to_global *l2g)
{
192
193
194
195
    struct l2g_block_info  * binfo;
#ifndef NDEBUG
    int i;
#endif
inge's avatar
inge committed
196
197
198

    /* Realloc the block pointer array. */
    l2g->num_blocks++;
199
200
    l2g->blocks = srealloc(l2g->blocks,
			   l2g->num_blocks * sizeof(struct l2g_block_info));
inge's avatar
inge committed
201
202
203
204
205

    /* Create a new block info and fill it in. */
    binfo = &(l2g->blocks[l2g->num_blocks - 1]);
    binfo->first_free  = 0;
    binfo->zeroes      = L2G_BLOCKSIZE;
206
#ifndef NDEBUG
207
    binfo->start       = 0xdeadbeef;
208
#endif
inge's avatar
inge committed
209
    binfo->key_block   = NULL;
210
211
212
213
    binfo->value_block = smalloc(L2G_BLOCKSIZE * sizeof(Text_no));
    
#ifndef NDEBUG
    for (i = 0; i < L2G_BLOCKSIZE; ++i)
214
	binfo->value_block[i] = 0xdeadbeef;
215
#endif
216
217
218
    if (++nr_blocks > nr_blocks_peak)
	nr_blocks_peak = nr_blocks;
	
inge's avatar
inge committed
219
220
221
222
    return binfo;
}


223
224
225
226
227
228
229
/*
 * Delete a block from the Local_to_global L2G.  The pointer BINFO
 * points at the block, which is supposed to be empty.
 */

static void
delete_block(Local_to_global *l2g, 
230
	     struct l2g_block_info  *binfo)
231
{
232
233
    assert(is_empty(binfo));

234
235
236
237
    if (!is_dense(binfo))
	--nr_blocks_sparse;
    --nr_blocks;

238
239
    /* Remove the blocks from the Block Info */
    if (binfo->key_block != NULL)
240
241
	sfree(binfo->key_block);
    sfree(binfo->value_block);
242
243
244
245
246

    /* Compact the remaining blocks. */
    while (++binfo < l2g->blocks + l2g->num_blocks)
	*(binfo - 1) = *binfo;
    l2g->num_blocks--;
247
248
    l2g->blocks = srealloc(l2g->blocks,
			   l2g->num_blocks * sizeof(struct l2g_block_info));
249
250
251
}


inge's avatar
inge committed
252
253
254
255
/*
 * Make a sparse block from a dense one.
 */
static void
256
make_sparse(struct l2g_block_info *binfo)
inge's avatar
inge committed
257
258
259
260
{
    int  next;
    int  i;

261
    assert(is_dense(binfo));
262
263
264
    if (++nr_blocks_sparse > nr_blocks_sparse_peak)
	nr_blocks_sparse_peak = nr_blocks_sparse;
    ++nr_sparsifications;
265

inge's avatar
inge committed
266
    /* Allocate the room for the key block. */
267
268
269
    binfo->key_block = smalloc(L2G_BLOCKSIZE * sizeof(Local_text_no));
#ifndef NDEBUG
    for (i = 0; i < L2G_BLOCKSIZE; ++i)
David Byers's avatar
David Byers committed
270
	binfo->key_block[i] = (Local_text_no)0xdeadbeefUL;
271
#endif
inge's avatar
inge committed
272
273
274

    /* Compact the block. */
    next = 0;
275
276
277
    for (i = 0; i < binfo->first_free; ++i)
	if (binfo->value_block[i] != 0)
	{
inge's avatar
inge committed
278
279
280
281
282
283
284
285
286
287
288
289
290
291
	    binfo->key_block[next]   = binfo->start + i;
	    binfo->value_block[next] = binfo->value_block[i];
	    next++;
	}

    /* Set the rest of the fields. */
    binfo->first_free = next;
    binfo->zeroes = L2G_BLOCKSIZE - next;
}


/*
 * Find the block where the local text no LNO is and return a pointer
 * to it.  Returning a pointer does not mean that the text still exists,
292
293
294
 * so a search within the block must also be done later.  If LNO is
 * smaller than the smallest number in the structure a pointer to the
 * first block will be returned.
inge's avatar
inge committed
295
 * 
296
297
 * Return NULL if LNO is bigger than the highest number in the
 * structure, or if the structure is empty.
inge's avatar
inge committed
298
299
 */

David Byers's avatar
David Byers committed
300
static struct l2g_block_info *
301
find_block(const Local_to_global *l2g, Local_text_no lno)
inge's avatar
inge committed
302
{
David Byers's avatar
David Byers committed
303
    struct l2g_block_info  * binfo;
inge's avatar
inge committed
304

Per Cederqvist's avatar
Per Cederqvist committed
305
306
307
308
    /* If empty, the number can not be in here. */
    if (l2g->num_blocks == 0)
	return NULL;

inge's avatar
inge committed
309
310
311
312
    /* Let binfo point to the last block. */
    binfo = &(l2g->blocks[l2g->num_blocks - 1]);

    /* If lno is greater than the biggest local number, return NULL. */
313
314
315
    assert(binfo->first_free > 0);
    if (lno > key_value(binfo, binfo->first_free - 1))
	return NULL;
inge's avatar
inge committed
316
317
318

    /* Find the block where lno *could* be. */
    /* FIXME: Binary search? */
319
320
    while (binfo > l2g->blocks)
    {
inge's avatar
inge committed
321
322
323
324
325
	if (lno >= binfo->start)
	    return binfo;
	binfo--;
    }

Per Cederqvist's avatar
Per Cederqvist committed
326
    return binfo;
inge's avatar
inge committed
327
328
329
}


330
331
332
333
334
335
336
/* Find the first local text number higher than LNO and return it.
   Return 0 if LNO is higher than any local text number in the
   structure.  The block where the returned value was found is stored
   in BINFO_OUT, and the position within the block is stored in
   INDEX_OUT.  BINFO_OUT and INDEX_OUT are only filled in if the
   return value is non-zero.  */

inge's avatar
inge committed
337
static Local_text_no
338
339
find_block_index_key(const Local_to_global *l2g, Local_text_no lno,
		     const struct l2g_block_info **binfo_out,
inge's avatar
inge committed
340
341
		     int             *index_out)
{
342
    const struct l2g_block_info  * binfo;
inge's avatar
inge committed
343
344
345
346
347
348
349
    int               i;

    /* If the number is not to be found, return 0. */
    binfo = find_block(l2g, lno);
    if (binfo == NULL)
	return 0;

350
351
352
353
354
355
356
357
358
359
360
361
    /* If lno is lower than the first existing entry, return the first
       existing entry.  The search in the first block is performed the
       same way as when no entry was found after LNO in binfo. */
    if (lno >= binfo->start)
    {
	/* Check the found block.  See if there is another entry after
	   the entry for LNO.  If so, return it. */
	if (is_dense(binfo))
	{
	    for (i = lno - binfo->start + 1; i < binfo->first_free; ++i)
		if (binfo->value_block[i] != 0)
		{
inge's avatar
inge committed
362
363
364
365
366
		    *binfo_out = binfo;
		    *index_out = i;

		    return binfo->start + i;
		}
367
368
369
370
371
372
373
374
375
376
377
	}
	else
	{
	    i = sparse_locate_value(binfo, lno);
	    if (i < binfo->first_free && binfo->key_block[i] == lno)
		++i;
	    i = sparse_skip_deleted(binfo, i);
	    if (i < binfo->first_free)
	    {
		*binfo_out = binfo;
		*index_out = i;
inge's avatar
inge committed
378

379
		return binfo->key_block[i];
inge's avatar
inge committed
380
381
	    }
	}
Per Cederqvist's avatar
Per Cederqvist committed
382
	/*
Per Cederqvist's avatar
Per Cederqvist committed
383
384
	 * If the block didn't contain anything more, try the next
	 * block, if there is one.
Per Cederqvist's avatar
Per Cederqvist committed
385
	 */
inge's avatar
inge committed
386
387
388
389
	binfo++;
    }

    /*
Per Cederqvist's avatar
Per Cederqvist committed
390
     * We want the first existing entry in the block that binfo points
Per Cederqvist's avatar
Per Cederqvist committed
391
     * to.  However, binfo may point past the last block.
inge's avatar
inge committed
392
     */
Per Cederqvist's avatar
Per Cederqvist committed
393
394
395
396
397
    if (binfo >= l2g->blocks + l2g->num_blocks)
	return 0;

    for (i = 0; i < binfo->first_free; ++i)
	if (binfo->value_block[i] != 0)
398
	{
Per Cederqvist's avatar
Per Cederqvist committed
399
400
401
	    *binfo_out = binfo;
	    *index_out = i;
	    return key_value(binfo, i);
inge's avatar
inge committed
402
403
	}

Per Cederqvist's avatar
Per Cederqvist committed
404
    restart_kom("find_block_index_key found nothing\n");
inge's avatar
inge committed
405
406
}

407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
static void
join_range(Local_to_global *l2g,
	   struct l2g_block_info *first,
	   struct l2g_block_info *last)
{
    int next;
    int i;
    struct l2g_block_info *binfo;

    assert(first < last);
    assert(l2g->blocks <= first && first < l2g->blocks + l2g->num_blocks);
    assert(l2g->blocks <= last  && last  < l2g->blocks + l2g->num_blocks);

    if (is_dense(first))
	make_sparse(first);
    else
	sparse_compact(first);

425
    ++nr_joins;
426
427
428
    next = first->first_free;
    for (binfo = first + 1; binfo <= last; ++binfo)
    {
429
	++nr_joined_blocks;
430
431
432
433
434
435
436
437
	for (i = 0; i < binfo->first_free; ++i)
	    if (binfo->value_block[i] != 0)
	    {
		first->value_block[next] = binfo->value_block[i];
		first->key_block[next]  = key_value(binfo, i);
		next++;
	    }
	if (!is_dense(binfo))
438
439
	{
	    --nr_blocks_sparse;
440
	    sfree(binfo->key_block);
441
	}
442
443
444
445
446
	sfree(binfo->value_block);
    }

    while (binfo < l2g->blocks + l2g->num_blocks)
    {
447
	assert(binfo - (last - first) > first);
448
449
450
451
452
453
454
	*(binfo - (last - first)) = *binfo;
	++binfo;
    }

    assert(next <= L2G_BLOCKSIZE);
    first->first_free = next;
    first->zeroes = L2G_BLOCKSIZE - next;
455

456
    nr_blocks -= last - first;
457
458
459
    l2g->num_blocks -= last - first;
    l2g->blocks = srealloc(l2g->blocks,
			   l2g->num_blocks * sizeof(struct l2g_block_info));
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
}
    
    
static int
join_blocks(Local_to_global *l2g,
	    struct l2g_block_info *binfo)
{
    int zeroes;
    int gain;
    int best;
    int i;

    if (binfo->zeroes == 0)
	/* This block is full. */
	return 0;

    /* Note: on rare occasions we might be able to create one dense
       block from two (dense or sparse) blocks or one sparse block.
       We don't bother.  The gain would probably be small, and it
       would complicate the algorithm significantly.  */

    zeroes = L2G_BLOCKSIZE;
    gain = 0;
    best = 0;

    for (i = 0; zeroes > 0 && i <= binfo - l2g->blocks; ++i)
    {
	zeroes -= (L2G_BLOCKSIZE - (binfo-i)->zeroes);
	gain += (is_dense(binfo-i) ? 1 : 2);
	if (gain > 2 && zeroes >= 0)
	    best = i;
    }

    if (best > 0)
    {
	join_range(l2g, binfo - best, binfo);
	return 1;
    }

    zeroes = L2G_BLOCKSIZE;
    gain = 0;
    best = 0;
    for (i = 0; zeroes > 0 && i < l2g->num_blocks - (binfo - l2g->blocks); ++i)
    {
	zeroes -= (L2G_BLOCKSIZE - (binfo+i)->zeroes);
	gain += (is_dense(binfo+i) ? 1 : 2);
	if (gain > 2 && zeroes >= 0)
	    best = i;
    }

    if (best > 0)
    {
	join_range(l2g, binfo, binfo + best);
	return 1;
    }

    return 0;
}
inge's avatar
inge committed
518

inge's avatar
inge committed
519
520
521
522
523
/* ================================================================ */
/* ====                Outside accessible functions            ==== */
/* ================================================================ */


524
525
526
527
528
529
530
531
void
l2g_set_block_size(int sz)
{
    assert(L2G_BLOCKSIZE == -1);
    L2G_BLOCKSIZE = sz;
}


inge's avatar
inge committed
532
533
534
535
void
l2g_destruct(Local_to_global *l2g)
{
    l2g_clear(l2g);
536
537
538
539
540
541
542
    ++nr_destructs;
    --nr_l2gs;
#ifndef NDEBUG
    l2g->num_blocks = 0xdeadbeef;
    l2g->first_unused = 0xdeadbeef;
    l2g->blocks = (void*)0xdeadbeef;
#endif
inge's avatar
inge committed
543
544
545
546
547
548
}


void
l2g_init(Local_to_global *l2g)
{
549
550
551
    if (L2G_BLOCKSIZE == -1)
	L2G_BLOCKSIZE = 250;

inge's avatar
inge committed
552
553
    /* Initialize the main structure */
    l2g->num_blocks = 0;
554
    l2g->first_unused = 1;
inge's avatar
inge committed
555
    l2g->blocks = NULL;
556
557
558
    ++nr_constructs;
    if (++nr_l2gs > nr_l2gs_peak)
	nr_l2gs_peak = nr_l2gs;
inge's avatar
inge committed
559
560
561
562
563
564
}


void
l2g_clear(Local_to_global *l2g)
{
565
    struct l2g_block_info  * binfo;
inge's avatar
inge committed
566
567
568
    int               i;

    /* Free the block info structures. */
Per Cederqvist's avatar
Per Cederqvist committed
569
    binfo = l2g->blocks;
570
571
    for (i = 0; i < l2g->num_blocks; ++i)
    {
inge's avatar
inge committed
572
	if (binfo->key_block != NULL)
573
574
	{
	    --nr_blocks_sparse;
575
	    sfree(binfo->key_block);
576
	}
577
	sfree(binfo->value_block);
inge's avatar
inge committed
578
579
580

	binfo++;
    }
581
    nr_blocks -= l2g->num_blocks;
inge's avatar
inge committed
582

583
    /* Free the block pointers. */
Per Cederqvist's avatar
Per Cederqvist committed
584
    l2g->num_blocks = 0;
585
586
587
588
    l2g->first_unused = 1;
    if (l2g->blocks != NULL)
    {
	sfree(l2g->blocks);
589
590
	l2g->blocks = NULL;
    }
591
    ++nr_clears;
inge's avatar
inge committed
592
593
594
595
596
597
598
599
600
}


/*
 * Copy everything from one l2g structure (FROM) into another (TO).
 * TO has to be an initialized structure.
 */

void
601
l2g_copy(Local_to_global *dest, const Local_to_global *src)
inge's avatar
inge committed
602
{
603
604
    const struct l2g_block_info *binfo;
    int i;
inge's avatar
inge committed
605
606

    /* FIXME: More efficient code */
607
    l2g_clear(dest);
inge's avatar
inge committed
608

609
610
611
612
613
    for (binfo = src->blocks; binfo < src->blocks + src->num_blocks; ++binfo)
	for (i = 0; i < binfo->first_free; ++i)
	    l2g_append(dest, key_value(binfo, i), binfo->value_block[i]);
    assert(src->first_unused >= dest->first_unused);
    dest->first_unused = src->first_unused;
614
    ++nr_copies;
inge's avatar
inge committed
615
616
617
}


618
619
typedef Local_text_no Local_text_no_iter;

inge's avatar
inge committed
620
621
622
623
624
void
l2g_append(Local_to_global *l2g, 
	   Local_text_no    lno,
	   Text_no          tno)
{
625
    struct l2g_block_info  * binfo;
626
    Local_text_no_iter ix;
inge's avatar
inge committed
627

628
629
    if (lno < l2g->first_unused)
    {
David Byers's avatar
David Byers committed
630
	kom_log("l2g_append: won't add %lu<%lu> when first_unused=%lu\n",
631
632
633
634
635
636
637
	    (unsigned long)tno, (unsigned long)lno,
	    (unsigned long)l2g->first_unused);
	return;
    }

    l2g->first_unused = lno + 1;
	    
Per Cederqvist's avatar
Per Cederqvist committed
638
    /* Don't add anything if tno == 0. */
inge's avatar
inge committed
639
640
641
    if (tno == 0)
	return;

642
643
    if (l2g->num_blocks == 0)
    {
Per Cederqvist's avatar
Per Cederqvist committed
644
645
	/* If totally empty, add the first block and set binfo to it. */
	binfo = add_block(l2g);
inge's avatar
inge committed
646
	binfo->start = lno;
647
648
649
    }
    else
    {
Per Cederqvist's avatar
Per Cederqvist committed
650
651
652
	/* else let binfo point to the last block. */
	binfo = &(l2g->blocks[l2g->num_blocks - 1]);
    }
inge's avatar
inge committed
653

654
655
656
657
658
659
660
661
662
663
    /* Try to make room for the new data in the last block (pointed to
       by binfo).  If that fails, allocate a new block and let binfo
       point to it. */
    if (is_dense(binfo))
    {
	if (lno - binfo->start >= L2G_BLOCKSIZE)
	{
	    /* The last block is a dense block, and this text does not
	       fit in it.  Or is there anything we can do to make it fit?  */

Per Cederqvist's avatar
Per Cederqvist committed
664
	    if (binfo->zeroes >= L2G_BLOCKSIZE / 2)
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
		/* Arbitrary limit: make the block sparse if at least
		   half of it contains zeroes.  (Remeber that any
		   unused entries past binfo->first_free are counted
		   in binfo->zeroes). */
		make_sparse(binfo);
	    else
		/* There is no way to make room in the last block, so
		   allocate a new one.  (There *could* be ways to make
		   this entry fit into the last block: if the next to
		   last block is sparse and has room for an entry, we
		   might be able to slide the last block, and fit in
		   the new entry.  However, we never move data between
		   blocks.  That would be too complex and error-prone,
		   and the gain would be very small.) */
		binfo = add_block(l2g);
inge's avatar
inge committed
680
681
	}
    }
682
683
684
685
686
687
688
    else
    {
	/* The last block was sparse. */
	sparse_compact(binfo);
	if (binfo->first_free == L2G_BLOCKSIZE)
	    binfo = add_block(l2g);
    }
inge's avatar
inge committed
689

690
691
    /* Enter the new value into the last block. */
    if (binfo->first_free == 0)
inge's avatar
inge committed
692
693
	binfo->start = lno;

694
695
    if (is_dense(binfo))
    {
696
697
	for (ix = binfo->first_free; ix < lno - binfo->start; ++ix)
	    binfo->value_block[ix] = 0;
inge's avatar
inge committed
698
699
	binfo->value_block[lno - binfo->start] = tno;
	binfo->first_free = lno - binfo->start + 1;
700
701
702
    }
    else
    {
inge's avatar
inge committed
703
704
705
706
707
708
709
710
711
712
713
714
715
716
	binfo->key_block  [binfo->first_free] = lno;
	binfo->value_block[binfo->first_free] = tno;
	binfo->first_free++;
    }
    
    binfo->zeroes--;
}


/*
 * Delete the local text LNO from the structure L2G.
 */

void
717
718
l2g_delete(Local_to_global *l2g,
	   Local_text_no lno)
inge's avatar
inge committed
719
{
720
721
722
    struct l2g_block_info *binfo;
    int i;
    
inge's avatar
inge committed
723
    /* Find block where LNO might be and return if not there. */
David Byers's avatar
David Byers committed
724
    binfo = find_block(l2g, lno);
inge's avatar
inge committed
725
726
727
728
    if (binfo == NULL)
	return;

    /* Go through the block where it might be and delete LNO */
729
730
    if (is_dense(binfo))
    {
inge's avatar
inge committed
731
	if (binfo->start <= lno && lno < binfo->start + L2G_BLOCKSIZE)
732
733
734
	    if (binfo->value_block[lno - binfo->start] != 0)
	    {
		binfo->value_block[lno - binfo->start] = 0;
inge's avatar
inge committed
735
736
		binfo->zeroes++;
	    }
737
738
739
740
741
742
743
744
745
    }
    else
    {
	i = sparse_locate_value(binfo, lno);
	if (i < binfo->first_free && binfo->key_block[i] == lno)
	{
	    assert(binfo->value_block[i] != 0); /* See sparse_locate_value. */
	    binfo->value_block[i] = 0;
	    binfo->zeroes++;
inge's avatar
inge committed
746
747
	}
    }
748

749
750
751
    /* Delete the block if it became empty. */
    if (is_empty(binfo))
    {
752
	delete_block(l2g, binfo);
753
754
755
756
757
758
759
760
761
762
763
764
	return;
    }

    /* Check if we can save space by joining adjoining blocks. */
    if (!join_blocks(l2g, binfo))
	/* Could not join.  Compact this block if it is a sparse block
           with more than half of the entries zeroes.  */
	if (!is_dense(binfo)
	    && binfo->zeroes + binfo->first_free/2 > L2G_BLOCKSIZE)
	{
	    sparse_compact(binfo);
	}
inge's avatar
inge committed
765
766
767
768
}


Text_no
769
770
l2g_lookup(const Local_to_global *l2g,
	   Local_text_no lno)
inge's avatar
inge committed
771
{
772
    const struct l2g_block_info  * binfo;
inge's avatar
inge committed
773
774
775
776
777
778
    int               i;

    binfo = find_block(l2g, lno);
    if (binfo == NULL)
	return 0;

779
780
781
    if (is_dense(binfo))
    {
	if (binfo->start <= lno && lno < binfo->start + binfo->first_free)
inge's avatar
inge committed
782
	    return binfo->value_block[lno - binfo->start];
783
784
785
786
787
788
    }
    else
    {
	i = sparse_locate_value(binfo, lno);
	if (i < binfo->first_free && binfo->key_block[i] == lno)
	    return binfo->value_block[i];
inge's avatar
inge committed
789
790
791
792
793
794
795
    }

    return 0;
}


Local_text_no
796
797
l2g_next_key(const Local_to_global *l2g,
	     Local_text_no lno)
inge's avatar
inge committed
798
{
799
    const struct l2g_block_info  * binfo;
Per Cederqvist's avatar
Per Cederqvist committed
800
801
    int               i;

inge's avatar
inge committed
802
    return find_block_index_key(l2g, lno, &binfo, &i);
inge's avatar
inge committed
803
804
805
}


806
807
808
809
810
Local_text_no
l2g_first_appendable_key(const Local_to_global *l2g)
{
    return l2g->first_unused;
}
inge's avatar
inge committed
811

812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827

void
l2g_set_first_appendable_key(Local_to_global *l2g,
			     Local_text_no    key)
{
    if (key < l2g->first_unused)
    {
    	kom_log("l2g_append: won't decrease first_unused from %lu to %lu\n",
		(unsigned long)l2g->first_unused, (unsigned long)key);
	return;
    }

    l2g->first_unused = key;
}


inge's avatar
inge committed
828
void
829
830
l2g_delete_global_in_sorted(Local_to_global *l2g,
			    Text_no tno)
inge's avatar
inge committed
831
{
832
833
834
835
836
837
838
839
840
841
842
    /* This implementation performs a linear search through the list,
       but since we know that both the key and the data are sorted in
       ascending order we could do a binary search instead.
       FIXME: profile the code and se if it is worthwhile to implement
       something fancier.  */

    L2g_iterator iter;

    for (l2gi_searchall(&iter, l2g); !iter.search_ended; l2gi_next(&iter))
	if (iter.tno == tno)
	{
843
844
845
	    l2g_delete(l2g, iter.lno);
	    return;
	}
inge's avatar
inge committed
846
847
848
}


849
850
851
void
l2g_dump(FILE *file,
	 const Local_to_global *l2g)
inge's avatar
inge committed
852
{
853
854
    struct l2g_block_info  * binfo;
    int               i, j;
inge's avatar
inge committed
855

856
857
    fprintf(file, "Number of blocks: %d\n", l2g->num_blocks);
    fprintf(file, "First unused: %lu\n", l2g->first_unused);
inge's avatar
inge committed
858

859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
    binfo = l2g->blocks;
    for (i = 0; i < l2g->num_blocks; ++i) {
	fprintf(file, "%d: %d %d %d (%s) [", i,
		binfo->first_free,
		binfo->zeroes,
		(int) binfo->start,
		(is_dense(binfo) ? "dense" : "sparse"));

	if (is_dense(binfo)) {
	    for (j = 0; j < binfo->first_free; ++j) {
		fprintf(file, "%d ", (int) binfo->value_block[j]);
	    }
	} else {
	    for (j = 0; j < binfo->first_free; ++j) {
		fprintf(file, "%d:%d ", 
			(int) binfo->key_block[j],
			(int) binfo->value_block[j]);
	    }
	}
	fprintf(file, "]\n");

	binfo++;
    }
inge's avatar
inge committed
882
883
884
}


885
886
887
888
Success
l2g_read(FILE *fp, Local_to_global *l2g)
{
    int            c;
889
    Local_text_no  lno = 0;
890
    Text_no        tno;
891
892
893
    Local_text_no  first_unused;

    l2g_clear(l2g);
894
895
896

    /* Read past the start marker */
    fskipwhite(fp);
897
898
    if ( (c = getc(fp)) == EOF || c != '[')
    {
David Byers's avatar
David Byers committed
899
900
	kom_log("l2g_read() failed to find ``['' marker at pos %lu.\n",
                (unsigned long) ftell(fp));
901
	return FAILURE;
902
903
    }

904
905
    first_unused = fparse_long(fp);

906
    /* Read numbers until the EOL (end-of-l2g :-) ) marker is read. */
907
908
    while (1)
    {
909
	if ((c = getc(fp)) == EOF)
910
	{
David Byers's avatar
David Byers committed
911
	    kom_log("l2g_read(): unexpected EOF at pos %lu.\n",
912
913
914
		(unsigned long) ftell(fp));
	    return FAILURE;
	}
915

916
917
918
919
920
921
	switch (c)
	{
	case ' ':
	    lno = fparse_long(fp);
	    if (lno == 0)
	    {
David Byers's avatar
David Byers committed
922
		kom_log("l2g_read(): got local number 0 at pos %lu.\n",
923
924
925
926
927
928
929
		    (unsigned long) ftell(fp));
		return FAILURE;
	    }
	    break;
	case ',':
	    if (lno == 0)
	    {
David Byers's avatar
David Byers committed
930
		kom_log("l2g_read(): missing local number at pos %lu.\n",
931
932
933
934
935
936
937
938
		    (unsigned long)ftell(fp));
		return FAILURE;
	    }
	    ++lno;
	    /*FALLTHROUGH*/
	case ':':
	    if (lno == 0)
	    {
David Byers's avatar
David Byers committed
939
		kom_log("l2g_read(): missing local number at pos %lu.\n",
940
941
942
943
944
945
946
947
948
949
950
		    (unsigned long)ftell(fp));
		return FAILURE;
	    }
	    tno = fparse_long(fp);
	    l2g_append(l2g, lno, tno);
	    break;
	case ']':
	    assert(first_unused >= l2g->first_unused);
	    l2g->first_unused = first_unused;
	    return OK;
	default:
David Byers's avatar
David Byers committed
951
	    kom_log("l2g_read(): unexpected character ``%c'' at pos %lu.\n",
952
953
954
		c, (unsigned long) ftell(fp));
	    return FAILURE;
	}
955
    }
inge's avatar
inge committed
956
957
}

958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
/* Write the unsigned long value ``l'' to ``fp''. */
static void
put_ulong(unsigned long l,
	  FILE *fp)
{
    static char buf[sizeof(unsigned long) * 3 + 1];
    char       *cp;

    if (l < 10)
	putc("0123456789"[l], fp);
    else
    {
	cp = buf + sizeof(buf);
	while (l > 0)
	{
	    *--cp = (l % 10) + '0';
	    l /= 10;
	}
	fwrite(cp, buf + sizeof(buf) - cp, 1, fp);
    }
}
inge's avatar
inge committed
979
980

void
981
l2g_write(FILE *fp, const Local_to_global *l2g)
inge's avatar
inge committed
982
{
983
984
    const struct l2g_block_info *binfo;
    int ix;
985
986
987
    Local_text_no key;
    Text_no val;
    Local_text_no current = 0;
988
989

    putc('[', fp);
990
    put_ulong(l2g->first_unused, fp);
991

992
993
    for (binfo = l2g->blocks; binfo < l2g->blocks + l2g->num_blocks; ++binfo)
	for (ix = 0; ix < binfo->first_free; ++ix)
994
	    if ((val = binfo->value_block[ix]) != 0)
995
	    {
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
		key = key_value(binfo, ix);
		if (key > current + 3 || current == 0)
		{
		    putc(' ', fp);
		    put_ulong(key, fp);
		    putc(':', fp);
		    put_ulong(val, fp);
		    current = key;
		}
		else
		{
		    while (++current < key)
		    {
			putc(',', fp);
			putc('0', fp);
		    }
		    putc(',', fp);
		    put_ulong(val, fp);
		}
1015
	    }
1016
1017

    putc(']', fp);
inge's avatar
inge committed
1018
}
inge's avatar
inge committed
1019
1020
1021
1022
1023
1024
1025
1026


/* ================================================================ */
/* ===                     Iterator code                        === */
/* ================================================================ */


void
1027
1028
l2gi_searchall (L2g_iterator *l2gi,
		const Local_to_global *l2g)
inge's avatar
inge committed
1029
{
1030
    l2gi_searchsome(l2gi, l2g, 1, 0);
inge's avatar
inge committed
1031
1032
1033
1034
}


void
1035
1036
1037
l2gi_searchsome(L2g_iterator *l2gi,
		const Local_to_global *l2g, 
		Local_text_no begin, 
inge's avatar
inge committed
1038
1039
		Local_text_no end)
{
1040
    const struct l2g_block_info  * binfo;
inge's avatar
inge committed
1041
1042
    int               index;

1043
1044
    if (begin < 1)
    {
David Byers's avatar
David Byers committed
1045
	kom_log("l2gi_searchsome(%lu, %lu) called: min is 1\n",
1046
1047
1048
1049
	    (unsigned long)begin, (unsigned long)end);
	begin = 1;
    }

inge's avatar
inge committed
1050
    l2gi->l2g = l2g;
1051
    l2gi->beginval = begin;
inge's avatar
inge committed
1052
    l2gi->endval   = end;
1053
    l2gi->search_ended = 1;
inge's avatar
inge committed
1054

1055
    if (end != 0 && begin >= end)
inge's avatar
inge committed
1056
1057
	return;

1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
    if (find_block_index_key(l2g, begin-1, &binfo, &index) == 0)
	return;

    l2gi->binfo    = binfo;
    l2gi->arrindex = index;

    assert(binfo != NULL);
    
    l2gi->lno = key_value(binfo, index);
    l2gi->tno = binfo->value_block[index];
1068
1069
1070
1071
1072

    if (end == 0)
	l2gi->search_ended = 0;
    else
	l2gi->search_ended = l2gi->lno >= end;
inge's avatar
inge committed
1073
1074
1075
}


Per Cederqvist's avatar
Per Cederqvist committed
1076
1077
void
l2gi_next(L2g_iterator *l2gi)
inge's avatar
inge committed
1078
{
1079
1080
    const Local_to_global *l2g;
    const struct l2g_block_info *binfo;
inge's avatar
inge committed
1081
1082
1083
1084
1085
1086
1087
1088
    int                arrindex;

    l2g      = l2gi->l2g;
    arrindex = l2gi->arrindex + 1;
    for (binfo = l2gi->binfo;
	 binfo < l2g->blocks + l2g->num_blocks;
	 ++binfo, arrindex = 0)
    {
1089
1090
1091
1092
1093
1094
1095
	for (; arrindex < binfo->first_free; ++arrindex)
	{
	    if (l2gi->endval != 0
		&& key_value(binfo, arrindex) >= l2gi->endval)
	    {
		l2gi->search_ended = 1;
		return;
inge's avatar
inge committed
1096
1097
	    }

1098
1099
1100
1101
	    if (binfo->value_block[arrindex] != 0)
	    {
		l2gi->binfo    = binfo;
		l2gi->arrindex = arrindex;
inge's avatar
inge committed
1102

1103
1104
		l2gi->lno = key_value(binfo, arrindex);
		l2gi->tno = binfo->value_block[arrindex];
inge's avatar
inge committed
1105

1106
		return;
inge's avatar
inge committed
1107
1108
1109
1110
1111
1112
1113
	    }
	}
    }

    l2gi->search_ended = 1;
    return;
}
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128


Local_text_no
l2gi_begin(const L2g_iterator *l2gi)
{
    return l2gi->beginval;
}


Local_text_no
l2gi_end(const L2g_iterator *l2gi)
{
    return l2gi->endval;
}

1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151

/* ---------- */

extern void
dump_l2g_stats(FILE *file)
{
    fprintf(file, "---%s:\n\tExisting l2gs:    %ld\n",
	    __FILE__, nr_l2gs);
    fprintf(file, "\tExisting sparse blocks: %ld\n", nr_blocks_sparse);
    fprintf(file, "\tExisting blocks (total): %ld\n", nr_blocks);
    fprintf(file, "\tPeak l2gs: %ld\n", nr_l2gs_peak);
    fprintf(file, "\tPeak sparse blocks: %ld\n", nr_blocks_sparse_peak);
    fprintf(file, "\tPeak blocks (total): %ld\n", nr_blocks_peak);
    fprintf(file, "\tctor calls: %ld\n", nr_constructs);
    fprintf(file, "\tdtor calls: %ld\n", nr_destructs);
    fprintf(file, "\tclear calls: %ld\n", nr_clears);
    fprintf(file, "\tcopy calls: %ld\n", nr_copies);
    fprintf(file, "\tjoin operations: %ld\n", nr_joins);
    fprintf(file, "\tjoined blocks: %ld\n", nr_joined_blocks);
    fprintf(file, "\tsparse skip cost: %ld\n", sparse_skip_cost);
    fprintf(file, "\tsparse compactions: %ld\n", nr_sparse_compactions);
    fprintf(file, "\tsparsifications: %ld\n", nr_sparsifications);
}