Commit bdab7fa5 authored by Niels Möller's avatar Niels Möller
Browse files

*** empty log message ***

Rev: src/rsync/checksum.c:1.1
Rev: src/rsync/rsync.h:1.2
Rev: src/rsync/send.c:1.2
parent f6fca5f2
/* checksum.c
*
* Calculate rsync specific checksums.
*
* $Id$
*/
#include "rsync.h"
/* NOTE: We do calculations module 2^16. We don't need to care about
* overflow, as calculation with unsigned C variables is arithmetic
* module 2^n for some n >= 16. */
#define CHAR_OFFSET 0
/* Update a checksum of a prefix of a block
*
* For input x_i, i = 0, ..., l-1, we calculate (modulo 2^16)
*
* a_k = \sum_0^k x_i,
* b_k = \sum_0^k (l-i) x_i
*
* But in fact, we don't calculate all b_k, only the final
* value b_{l-1}, and we have the identity (by changing order of summation(
*
* b_{l-1} = \sum_0^{l-1} (l-i) x_i = \sum_0^{l-1} a_i
*
* So we keep track of the numbers c_k = \sum_0^k a_k rather than b_k. */
void
rsync_update_1(unsigned *ap, unsigned *cp,
UINT32 length, UINT8 *data)
{
unsigned a = *ap;
unsigned c = *cp;
UINT32 i;
for (i = 0; i<length; i++)
{
a += data[i] + CHAR_OFFSET;
c += a;
}
*ap = a & 0xffff;
*cp = c & 0xffff;
}
/* Updates checksum of a full block. START points to the beginning of the block,
* END points to new data to be added. HASH points to a hash table.
*
* The function returns 1 at a hash-tag hit, in which case FOUND is the number of octets
* that were processed, or 0 if length characters were proces with no hit.
*/
struct rsync_node *
rsync_search(unsigned *ap, unsigned *bp, unsigned block_size,
UINT32 length, UINT8 *start, UINT8 *end,
UINT32 *found, struct rsync_node **hash)
{
unsigned a = *ap;
unsigned b = *bp;
UINT32 i;
for (i = 0; i<length; i++)
{
/* Any non-zero CHAR_OFFSET cancels out here. I think. */
struct rsync_node *n;
a += end[i] - start[i];
b += a - block_size * start[i];
n = hash[(a ^ b) & 0xffff];
if (n)
{
*found = i;
*ap = a & 0xffff;
*bp = a & 0xffff;
return n;
}
}
return NULL;
}
......@@ -43,20 +43,13 @@
#define RSYNC_SUM_LENGTH MD5_DIGESTSIZE
/* Constant used in checksum calculation */
#define RSYNC_CHAR_OFFSET 0
/* Size of block count, block size, tail */
#define RSYNC_HEADER_SIZE 12
struct rsync_sum_buf
{
OFF_T offset; /* offset in file of this chunk */
unsigned len; /* length of chunk of file */
unsigned i; /* index of this chunk */
UINT32 sum1; /* simple checksum */
char sum2[RSYNC_SUM_LENGTH]; /* checksum */
};
/* Size of weak sum, md5 sume */
#define RSYNC_ENTRY_SIZE 20
/* Initial checksum calculations (by the receiver) */
#define RSYNC_INTERNAL_BUF_SIZE 20
/* NOTE: Unlike zlib, we want to know the file size before we start.
* This could be relxed, but requires some modifications to the
......@@ -80,7 +73,7 @@ struct rsync_generate_state
struct md5_ctx block_sum;
/* Internal state */
UINT8 buf[RSYNC_INTERNAL_BUF_SIZE];
UINT8 buf[RSYNC_ENTRY_SIZE];
UINT8 buf_length; /* Zero means no buffered data. */
UINT8 buf_pos;
......@@ -96,6 +89,8 @@ struct rsync_generate_state
#define RSYNC_BUF_ERROR 2
/* Invalid input */
#define RSYNC_INPUT_ERROR 3
/* Out of memory (can happen only for rsync_read_table and rsync_send_init) */
#define RSYNC_MEMORY 4
int rsync_generate(struct rsync_generate_state *state);
int rsync_generate_init(struct rsync_generate_state *state,
......@@ -125,8 +120,6 @@ typedef int (*rsync_lookup_read_t)(void *opaque,
UINT8 *dst, UINT32 length,
UINT32 index, UINT32 offset, UINT32 *done);
enum rsync_receive_mode;
struct rsync_receive_state
{
/* Public fields */
......@@ -156,4 +149,69 @@ struct rsync_receive_state
int rsync_receive(struct rsync_receive_state *state);
void rsync_receive_init(struct rsync_receive_state *state);
/* Sending files */
struct rsync_table;
struct rsync_node;
struct rsync_read_table_state
{
/* Public fields */
struct rsync_table *table;
UINT32 count; /* Block count */
UINT32 block_size;
UINT32 remainder;
/* Private state */
UINT8 buf[RSYNC_ENTRY_SIZE];
unsigned pos;
};
int
rsync_read_table(struct rsync_read_table_state *state,
UINT32 length, UINT8 *input);
/* For reading the list of checksums. */
struct rsync_send_state
{
/* Public fields */
UINT8 *next_in;
UINT32 avail_in;
UINT8 *next_out;
UINT32 avail_out;
/* Limits */
UINT32 max_count;
UINT32 max_block_size;
struct rsync_table *table;
/* Internal state */
int state;
UINT32 buf_size;
UINT8 *buf;
UINT32 pos;
unsigned sum_a;
unsigned sum_b;
};
int rsync_send_init(struct rsync_send_state *state,
struct rsync_table *table);
int rsync_send(struct rsync_send_state *state, int flush);
void rsync_send_free(struct rsync_send_state *state);
void
rsync_update_1(unsigned *ap, unsigned *cp,
UINT32 length, UINT8 *data);
struct rsync_node *
rsync_search(unsigned *ap, unsigned *bp, unsigned block_size,
UINT32 length, UINT8 *start, UINT8 *end,
UINT32 *found, struct rsync_node **hash);
#endif /* RSYNC_H_INCLUDED */
......@@ -8,3 +8,261 @@
#include <assert.h>
#define HASH_SIZE 0x1000
struct rsync_node
{
struct rsync_node *next;
UINT32 index;
UINT32 length;
unsigned sum_a;
unsigned sum_b;
UINT8 sum_md5[MD5_DIGESTSIZE];
};
struct rsync_table
{
struct rsync_node *hash[HASH_SIZE];
UINT32 alloc_size;
UINT32 size;
struct rsync_node all_nodes[1];
};
static struct rsync_table *
make_rsync_table(UINT32 count, UINT32)
{
unsigned i;
struct rsync_table *table =
malloc(sizeof(struct rsync_table) - sizeof(struct rsync_node)
+ count * sizeof(rsync_node));
if (!table)
return NULL;
for (i = 0; i<HASH_SIZE; i++)
table->hash[i] = 0;
table->alloc_size = count;
table->size = 0;
}
struct rsync_node *
rsync_add_entry(struct rsync_table *table,
UINT8 *input)
{
struct rsync_node *node;
unsigned h;
assert(table->size < table->alloc_size);
node = table->all_nodes + table->size;
node->index = table->size++;
/* NOTE: Length field is left uninitialized for now */
node->sum_a = READ_UINT16(input);
node->sum_b = READ_UINT16(input + 2);
memcpy(node->sum_md5, input + 4, MD5_DIGESTSIZE);
h = node->sum_a ^ node->sum_b;
node->next = table->hash[h];
table->hash[h] = node;
return node;
}
int
rsync_read_table(struct rsync_read_table_state *s,
UINT32 length, UINT8 *input)
{
while (length)
if (!s->table)
{
UINT32 left = RSYNC_HEADER_SIZE - s->pos;
if (length < left)
{
memcpy(s->buf + s->pos, input, length);
s->pos += length;
return RSYNC_PROGRESS;
}
else
{
memcpy(s->buf + s->pos, input, left);
input += left;
length -= left;
s->pos = 0;
s->count = READ_UINT32(s->buf);
s->block_size = READ_UINT32(s->buf + 4);
s->remainder = READ_UINT32(s->buf + 8);
if ( (s->count > s->max_count)
|| (s->block_size > s->max_block_size)
|| (s->remainder >= s->block_size))
return RSYNC_INPUT_ERROR;
s->table = make_rsync_table(s->count);
return (s->table) ? RSYNC_PROGRESS : RSYNC_MEMORY;
}
}
else
{
struct rsync_node *node;
if (s->pos)
{
/* Do partial entries */
UINT32 left = RSYNC_ENTRY_SIZE - s->pos;
if (length < left)
{
memcpy(s->buf + s->pos, input, length);
s->pos += length;
return RSYNC_PROGRESS;
}
else
{
struct rsync_node *node;
memcpy(s->buf + s->pos, input, left);
input += left;
length -= left;
s->pos = 0;
node = rsync_add_entry(s->table, s->buf);
}
}
else if (length < RSYNC_ENTRY_SIZE)
{
/* New partial block */
memcpy(s->buf, input, length);
b->pos = length;
return RSYNC_PROGRESS;
}
else
{
node = rsync_add_entry(s->table, input);
length -= RSYNC_ENTRY_SIZE;
input += RSYNC_ENTRY_SIZE;
}
node->length = s->block_size;
if (table->size == table->alloc_size)
{
if (s->remainder)
node->length = remainder;
return length ? RSYNC_INPUT_ERROR : RSYNC_DONE;
}
}
return RSYNC_PROGRESS;
}
#define STATE_INITIAL 0
#define STATE_SEARCH 1
#define STATE_LITERAL 2
int rsync_send_init(struct rsync_send_state *s,
struct rsync_table *table)
{
assert(table->block_size <= 0xffffffffU/2);
s->buf_size = table->block_size * 2;
s->table = table;
s->buf = malloc(s->buf_size);
if (!buf)
return RSYNC_MEMORY;
s->pos = 0;
s->a_sum = s->b_sum = 0;
s->state = STATE_SEARCH;
}
/* We first read s->buf_size octets into a buffer (we can improve this
* if avail_in is large).
*
* When the buffer is full, we search for a match. If we find a match,
* we output a literal and a match.
*
* If no match is found, we output a literal consisting of all but the
* (block_size-1) last octets. */
int rsync_send(struct rsync_send_state *state, int flush)
{
for (;;)
switch (s->state)
{
case STATE_SEARCH:
{
if (s->pos < s->table->block_size)
{
/* We have less than one block of data available */
UINT32 left = s->table_b - s->pos;
if (s->avail_in < left)
{
/* Copy some data and update sums */
rsync_update_1(&s->a_sum, &s->b_sum,
s->avail_in, s->next_in);
memcpy(s->buf + s->pos, s->next_in, s->avail_in);
s->pos += s->avail_in;
s->next_in += avail_in;
s->avail_in = 0;
return RSYNC_PROGRESS;
}
else
{
rsync_update_1(&s->a_sum, &s->b_sum,
left, s->next_in);
memcpy(s->buf + s->pos, s->next_in, left);
s->pos += left;
s->next_in += left;
s->avail_in -= left;
}
}
assert(s->pos >= s->table->block_size);
/* Search */
{
UINT32 found;
struct rsync_node *n
= rsync_search(&s->a_sum, &s->b_sum, s->table->block_size,
MIN(s->table->block_size, avail_in),
s->buf + s->pos - s->table->block_size,
s->next_in, &found, self->table->hash);
if (n && (s->a_sum == n->a_sum) && (s->b_sum == node->b_sum))
{
/* Block consisting of
*
* buf[pos - block_size + found ... pos], next_in[0 ... found]
*
* may match.
*/
struct md5_ctx m;
struct UINT8 *digest[MD5_DIGESTSIZE];
md5_init(&m);
md5_update(&m, s->buf + s->pos + found - s->table->block_size,
s->table->block_size - found);
md5_update(&m, found, s->next_in);
md5_final(&m);
md5_digest(&m, digest);
if (!memcmp(n->md5, digest, MD5_DIGESTSIZE))
{
/* A match! */
...
}
}
}
}
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment