Commit e34b6e2e authored by Niels Möller's avatar Niels Möller
Browse files

Abandoned the streamed continuation based parser for now. Wrote a

simpler parser instead, closer to Ron's implementation.

Rev: src/sexp_parser.c:1.3
Rev: src/sexp_parser.h:1.2
Rev: src/sexp_streamed_parser.c:1.1
parent 76ea2385
......@@ -23,732 +23,868 @@
#include "sexp_parser.h"
#include "read_scan.h"
#include "sexp.h"
#include "format.h"
#include "parse_macros.h"
#include "werror.h"
#include "xalloc.h"
#include <assert.h>
#include <string.h>
/* Automatically generated files. */
#include "sexp_table.h"
#include "digit_table.h"
#include <assert.h>
/* Returns the length of the segmant of characters of a class */
static UINT32 sexp_scan_class(struct simple_buffer *buffer, int class)
{
UINT32 i;
for (i=0; i<LEFT; i++)
if (!(sexp_char_classes[HERE[i]] & class))
break;
return i;
}
/* Forward declarations */
struct parse_node;
static void do_mark_parse_node(struct parse_node *n,
void (*mark)(struct lsh_object *o));
static void do_free_parse_node(struct parse_node *n);
#include "sexp_parser.c.x"
/* CLASS:
(class
(name string_handler)
(vars
(handler method int "struct lsh_string *s")))
*/
#define HANDLE_STRING(h,s) ((h)->handler((h), (s)))
/* CLASS:
(class
(name parse)
(super scanner)
(vars
; How to parse the rest of the input stream
(next object scanner)))
*/
/* CLASS:
(class
(name parse_string)
(super parse)
(vars
(handler object string_handler)))
*/
/* CLASS:
(class
(name parse_sexp)
(super parse)
(vars
; What to do with this expression
(handler object sexp_handler)))
*/
/* CLASS:
(class
(name parse_literal_data)
(super parse_string)
(vars
(i . UINT32)
(data string)))
*/
static int do_parse_literal_data(struct scanner **s, int token)
static void sexp_skip_space(struct simple_buffer *buffer)
{
CAST(parse_literal_data, closure, *s);
ADVANCE(sexp_scan_class(buffer, CHAR_space));
}
if (token < 0)
return LSH_FAIL | LSH_SYNTAX;
/* Skip the following input character on input stream struct
* simple_buffer *buffer, if it is equal to a given character. Return 1
* on success, 0 on error. */
static int sexp_skip_char(struct simple_buffer *buffer, UINT8 expect)
{
UINT8 c;
closure->data->data[closure->i++] = token;
if (closure->data->length == closure->i)
if (!LEFT)
{
werror("sexp: Unexpected EOF when expecting character %x.\n",
expect);
return 0;
}
c = GET();
if (c != expect)
{
struct lsh_string *res = closure->data;
closure->data = NULL;
*s = closure->super.super.next;
return HANDLE_STRING(closure->super.handler, res);
werror("sexp: Expected char %x, got %x.\n", expect, c);
return 0;
}
return LSH_OK;
return 1;
}
static struct scanner *
make_parse_literal_data(UINT32 length,
struct string_handler *handler,
struct scanner *next)
/* Parse one or more characters into a simple string as a token. */
static struct lsh_string *sexp_parse_token(struct simple_buffer *buffer)
{
NEW(parse_literal_data, closure);
closure->super.super.super.scan = do_parse_literal_data;
closure->super.super.next = next;
closure->super.handler = handler;
closure->i = 0;
closure->data = lsh_string_alloc(length);
UINT32 length;
struct lsh_string *token;
assert(LEFT);
assert(sexp_char_classes[*HERE] & CHAR_token_start);
length = sexp_scan_class(buffer, CHAR_token);
return &closure->super.super.super;
}
if (!length)
{
werror("sexp: Invalid token.\n");
return NULL;
}
/* FIXME: Arbitrary limit. */
#define SEXP_MAX_STRING 100000
token = ssh_format("%ls", length, HERE);
ADVANCE(length);
/* CLASS:
(class
(name parse_literal)
(super parse_string)
(vars
(got_length . int)
(length . UINT32)))
*/
return token;
}
static int do_parse_literal(struct scanner **s, int token)
/* Parse a decimal number */
static int sexp_parse_decimal(struct simple_buffer *buffer, UINT32 *value)
{
CAST(parse_literal, closure, *s);
unsigned length = sexp_scan_class(buffer, CHAR_digit);
unsigned i;
if (token < 0) goto fail;
if (sexp_char_classes[token] & CHAR_digit)
assert(length);
if ((*HERE == '0') && (length != 1))
{
closure->length = closure->length * 10 + (token - '0');
if (closure->length > SEXP_MAX_STRING)
goto fail;
closure->got_length = 1;
return LSH_OK;
/* No leading zeros allowed */
werror("sexp: Unexpected leading zeroes\n");
return 0;
}
else if (closure->got_length && (token == ':'))
if (length > 8)
{
*s = make_parse_literal_data(closure->length,
closure->super.handler,
closure->super.super.next);
return LSH_OK;
werror("sexp: Decimal number too long (%d digits, max is 8).\n",
length);
return 0;
}
for (i = 0, *value = 0; i<length; i++)
*value = *value * 10 + HERE[i] - '0';
fail:
*s = NULL;
return LSH_FAIL | LSH_SYNTAX;
ADVANCE(length);
return 1;
}
static struct scanner *make_parse_literal(struct string_handler *handler,
struct scanner *next)
/* Reads a literal string of given length. */
static struct lsh_string *
sexp_parse_literal(struct simple_buffer *buffer, UINT32 length)
{
NEW(parse_literal, closure);
struct lsh_string *res;
if (LEFT < length)
{
werror("sexp: Unexpected EOF in literal.\n");
return NULL;
}
closure->super.super.super.scan = do_parse_literal;
closure->super.super.next = next;
closure->super.handler = handler;
closure->got_length = 0;
closure->length = 0;
res = ssh_format("%ls", length, HERE);
ADVANCE(length);
return &closure->super.super.super;
return res;
}
/* CLASS:
(class
(name return_string)
(super string_handler)
(vars
(c object sexp_handler)))
*/
#define QUOTE_END -1
#define QUOTE_INVALID -2
static int do_return_string(struct string_handler *h,
struct lsh_string *data)
static int sexp_dequote(struct simple_buffer *buffer)
{
CAST(return_string, closure, h);
return HANDLE_SEXP(closure->c, make_sexp_string(NULL, data));
}
int c;
static struct string_handler *make_return_string(struct sexp_handler *c)
if (!LEFT)
return QUOTE_INVALID;
c = GET();
loop:
switch (c)
{
default:
return c;
case '"':
return QUOTE_END;
case '\\':
if (!LEFT)
return QUOTE_INVALID;
switch( (c = GET()) )
{
case '\\':
case '"':
case '\'':
return c;
case 'b':
return 0x8;
case 't':
return 0x9;
case 'n':
return 0xa;
case 'v':
return 0xb;
case 'f':
return 0xc;
case 'r':
return 0xd;
case '\r':
/* Ignore */
if (!LEFT)
return QUOTE_INVALID;
c = GET();
if (c == '\n')
{ /* Ignore this too */
if (!LEFT)
return QUOTE_INVALID;
c = GET();
}
goto loop;
case '\n':
/* Ignore */
if (!LEFT)
return QUOTE_INVALID;
c = GET();
if (c == '\r')
{ /* Ignore this too */
if (!LEFT)
return QUOTE_INVALID;
c = GET();
}
goto loop;
default:
/* Octal escape sequence */
{
int value;
unsigned i;
if (!(sexp_char_classes[c] & CHAR_octal))
{
werror("sexp: Invalid escape character in"
" quoted string: %x.\n", c);
return QUOTE_INVALID;
}
if (LEFT < 2)
{
werror("sexp: Unexpected eof in octal escape sequence.\n");
return QUOTE_INVALID;
}
value = c - '0';
for (i = 1; i<3; i++)
{
c = GET();
if (!(sexp_char_classes[c] & CHAR_octal))
{
werror("sexp: Invalid character %x in"
" octal escape sequence.\n", c);
return QUOTE_INVALID;
}
value = (value << 3) + (c - '0');
}
return value;
}
}
}
}
/* Reads a quoted string of given length. Handles ordinary C escapes.
* Assumes that the starting '"' have been skipped already. */
static struct lsh_string *
sexp_parse_quoted_length(struct simple_buffer *buffer, UINT32 length)
{
NEW(return_string, closure);
closure->super.handler = do_return_string;
closure->c = c;
return &closure->super;
struct lsh_string *res;
UINT32 i;
res = lsh_string_alloc(length);
for (i = 0; i < length; i++)
{
int c = sexp_dequote(buffer);
if (c < 0)
{
if (c == QUOTE_END)
werror("sexp: Quoted string is too short.\n");
lsh_string_free(res);
return NULL;
}
res->data[i] = (unsigned) c;
}
return res;
}
#define MAKE_PARSE(name) \
static int do_parse_##name(struct scanner **s, int token); \
\
static struct scanner *make_parse_##name(struct sexp_handler *h, \
struct scanner *next) \
{ \
NEW(parse_sexp, closure); \
\
closure->super.super.scan = do_parse_##name; \
closure->super.next = next; \
closure->handler = h; \
\
return &closure->super.super; \
} \
\
static int do_parse_##name(struct scanner **s, int token)
/* CLASS:
(class
(name parse_skip)
(super parse_sexp)
(vars
(expect . int)
(value object sexp)))
*/
static int do_parse_skip(struct scanner **s, int token)
/* Reads a quoted string of indefinite length */
static struct lsh_string *
sexp_parse_quoted(struct simple_buffer *buffer)
{
CAST(parse_skip, closure, *s);
struct lsh_string *res;
UINT32 length;
UINT32 i;
UINT8 *p;
if (*HERE == '"')
return lsh_string_alloc(0);
/* FIXME: If the token doesn't match, perhaps we should install NULL
* instead? */
/* We want a reasonable upper bound on the string to allocate.
* Search for a double quote, not preceded by a backslash. */
if (token == closure->expect)
for (p = HERE; p < HERE + LEFT; )
{
p = memchr(p, '"', (HERE + LEFT) - p);
if (!p)
{
werror("sexp: Unexpected EOF in quoted string.\n");
return NULL;
}
if (p[-1] != '\\')
break;
p++;
}
length = p - HERE;
res = lsh_string_alloc(length);
for (i = 0; i<length; i++)
{
*s = closure->super.super.next;
return (closure->super.handler
? HANDLE_SEXP(closure->super.handler, closure->value)
: LSH_OK);
int c = sexp_dequote(buffer);
switch (c)
{
case QUOTE_INVALID:
lsh_string_free(res);
return NULL;
case QUOTE_END:
res->length = i;
return res;
default:
res->data[i] = (unsigned) c;
}
}
/* FIXME: More readable error message */
werror("Expected token %d, got %d\n", closure->expect, token);
/* We haven't seen the ending double quote yet. We must be looking at it now. */
if (!sexp_skip_char(buffer, '"'))
fatal("Internal error!\n");
*s = NULL;
return LSH_FAIL | LSH_SYNTAX;
return res;
}
static struct scanner *make_parse_skip(int token,
struct sexp *value,
struct sexp_handler *handler,
struct scanner *next)
static int sexp_dehex(struct simple_buffer *buffer)
{
NEW(parse_skip, closure);
unsigned i;
int value = 0;
closure->super.super.super.scan = do_parse_skip;
closure->super.super.next = next;
closure->super.handler = handler;
closure->expect = token;
closure->value = value;
for (i = 0; i<2; i++)
{
int c;
sexp_skip_space(buffer);
if (!LEFT)
{
werror("sexp: Unexpected EOF in hex string.\n");
return HEX_INVALID;
}
return &closure->super.super.super;
c = hex_digits[GET()];
switch (c)
{
case HEX_END:
if (!i)
return HEX_END;
/* Fall through */
case HEX_INVALID:
return HEX_INVALID;
default:
value = (value << 4) | c;
}
}
return value;
}
#if 0
MAKE_PARSE(simple_string)
/* Reads a hex string of given length. Handles ordinary C escapes.
* Assumes that the starting '#' have been skipped already. */
static struct lsh_string *
sexp_parse_hex_length(struct simple_buffer *buffer, UINT32 length)
{
CAST(parse_sexp, closure, *s);
struct lsh_string *res;
UINT32 i;
res = lsh_string_alloc(length);
for (i = 0; i < length; i++)
{
int c = sexp_dehex(buffer);
if (c < 0)
{
if (c == HEX_END)
werror("sexp: Hex string is too short.\n");
lsh_string_free(res);
return NULL;
}
res->data[i] = (unsigned) c;
}
return res;
}
/* Reads a hex string of indefinite length */
static struct lsh_string *
sexp_parse_hex(struct simple_buffer *buffer)
{
struct lsh_string *res;
UINT32 length = sexp_scan_class(buffer, CHAR_hex | CHAR_space);
UINT32 terminator = buffer->pos + length;
switch(token)
UINT32 i;
if ( (length == LEFT)
|| (HERE[terminator] != '#'))
{
case TOKEN_EOS:
fatal("Internal error!\n");
werror("sexp: Unexpected EOF in hex string.\n");
return NULL;
}
case '0':
/* This should be a single zero digit, as there mustn't be unneccessary
* leading zeros. */
*s = make_parse_skip(':', sexp_z(""),
closure->handler, closure->super.next);
return LSH_OK:
/* The number of digits, divided by two, rounded upwards,
* is an upper limit on the length. */
case '1': case '2': case '3':
case '4': case '5': case '6':
case '7': case '8': case '9':
*s = make_parse_literal(token - '0',
make_return_string(closure->handler),
closure->super.next);
return LSH_OK;
length = (length + 1) / 2;
default:
/* Syntax error */
return LSH_FAIL | LSH_SYNTAX;
res = lsh_string_alloc(length);
for (i = 0; i < length; i++)
{
int c = sexp_dehex(buffer);
switch (c)
{
case HEX_INVALID:
lsh_string_free(res);
return NULL;
case HEX_END:
res->length = i;
return res;
default:
res->data[i] = (unsigned) c;
}
}
assert(sexp_scan_class(buffer, CHAR_space) == (terminator - buffer->pos));
buffer->pos = terminator + 1;
return res;
}
#endif
#if 0
static int do_parse_advanced_string(struct scanner **s,
int token)
struct base64_state
{
CAST(parse_string, closure, *s);
/* Bits are shifted into the buffer from the right, 6 at a time */
unsigned buffer;