diff --git a/.gitattributes b/.gitattributes index fe014aa9d41e38a1b90f40891be5ccecf13e535b..9e4b8199526530d249a4c0bfdb05ef7bb1b8b0b5 100644 --- a/.gitattributes +++ b/.gitattributes @@ -10,6 +10,14 @@ testfont binary # Remove the corresponding line before committing # changes to these files. /lib/include/sql.pre.pike foreign_ident +/lib/modules/LR.pmod/Grammar_parser.pmod foreign_ident +/lib/modules/LR.pmod/item.pike foreign_ident +/lib/modules/LR.pmod/kernel.pike foreign_ident +/lib/modules/LR.pmod/lr.pike foreign_ident +/lib/modules/LR.pmod/parser.pike foreign_ident +/lib/modules/LR.pmod/priority.pike foreign_ident +/lib/modules/LR.pmod/rule.pike foreign_ident +/lib/modules/LR.pmod/scanner.pike foreign_ident /src/backend.c foreign_ident /src/builtin_functions.c foreign_ident /src/configure.in foreign_ident diff --git a/lib/modules/LR.pmod/Grammar_parser.pmod b/lib/modules/LR.pmod/Grammar_parser.pmod new file mode 100755 index 0000000000000000000000000000000000000000..1710448fc7b51c6fc1717a0cac0fd08a3a86d707 --- /dev/null +++ b/lib/modules/LR.pmod/Grammar_parser.pmod @@ -0,0 +1,355 @@ +#!/home/grubba/src/pike/build/sol2.5/pike + +/* + * $Id: Grammar_parser.pmod,v 1.1 1997/03/03 23:50:14 grubba Exp $ + * + * Generates a parser from a textual specification. + * + * Henrik Grubbström 1996-12-06 + */ + +/* + * Includes + */ + +#include <stdio.h> + +/* + * Defines + */ + +/* #define DEBUG */ + +/* Errors during parsing */ +/* Action for rule is missing from master object */ +#define ERROR_MISSING_ACTION 256 +/* Action for rule is not a function */ +#define ERROR_BAD_ACTION 512 + +import LR; + +private object(parser) _parser = parser(); + +/* + * Scanner + */ + +class scan { + string str = ""; + int pos; + + array|string scan() + { + while (1) { + if (pos >= sizeof(str)) { + /* EOF */ + return(""); + } else { + int start=pos++; + switch (str[start]) { + case '%': + /* Token */ + while ((pos < sizeof(str)) && + ((('A' <= str[pos]) && ('Z' >= str[pos])) || + (('a' <= str[pos]) && ('z' >= str[pos])))) { + pos++; + } + return (lower_case(str[start..pos-1])); + case '\"': + /* String */ + while ((pos < sizeof(str)) && + (str[pos] != '\"')) { + if (str[pos] == '\\') { + pos++; + } + pos++; + } + if (pos < sizeof(str)) { + pos++; + } else { + pos = sizeof(str); + } + if (str != pos-2) { + return ({ "string", str[start+1..pos-2] }); + } + /* Throw away empty strings (EOF) */ + break; + case '/': + /* Comment */ + if (str[pos] != '*') { + werror(sprintf("Bad token \"/%c\" in input\n", str[pos])); + break; + } + pos++; + while (1) { + if ((++pos >= sizeof(str)) || + (str[pos-1 .. pos] == "*/")) { + pos++; + break; + } + } + break; + case '(': + return("("); + case ')': + return(")"); + case '{': + return("{"); + case '}': + return("}"); + case ':': + return(":"); + case ';': + return(";"); + case '\n': + case '\r': + case ' ': + case '\t': + /* Whitespace */ + break; + case '0'..'9': + /* int */ + while ((pos < sizeof(str)) && + ('0' <= str[pos]) && ('9' >= str[pos])) { + pos++; + } + return (({ "int", (int)str[start .. pos-1] })); + default: + /* Identifier */ + while ((pos < sizeof(str)) && + ((('A' <= str[pos]) && ('Z' >= str[pos])) || + (('a' <= str[pos]) && ('z' >= str[pos])) || + (('0' <= str[pos]) && ('9' >= str[pos])) || + ('_' == str[pos]) || (str[pos] >= 128))) { + pos++; + } + return (({ "identifier", str[start .. pos-1] })); + } + } + } + } +} + +private object(scan) scanner = scan(); + +private array(string) nonterminals = ({ + "translation_unit", + "directives", + "directive", + "declaration", + "rule", + "symbols", + "terminals", + "symbol", + "action", + "nonterminal", + "terminal", + "priority", +}); + +private object(Stack.stack) id_stack = Stack.stack(); + +private mapping(string:int) nonterminal_lookup = ([]); + +private object(parser) g=parser(); + +private object master; + +int error; + +private int add_nonterminal(string id) +{ + int nt = nonterminal_lookup[id]; + + if (!nt) { + nt = nonterminal_lookup[id] = id_stack->ptr; + id_stack->push(id); + } + return(nt); +} + +private void add_tokens(array(string) tokens) +{ + /* NOOP */ +#if 0 + if (sizeof(tokens)) { + map(tokens, add_token); + } +#endif /* 0 */ +} + +private void set_left_tokens(string ignore, int pri_val, array(string) tokens) +{ + foreach (tokens, string token) { + g->set_associativity(token, -1); /* Left associative */ + g->set_priority(token, pri_val); + } +} + +private string internal_symbol_to_string(int|string symbol) +{ + if (intp(symbol)) { + return (nonterminals[symbol]); + } else { + return ("\"" + symbol + "\""); + } +} + +private string symbol_to_string(int|string symbol) +{ + if (intp(symbol)) { + if (symbol < id_stack->ptr) { + return (id_stack->arr[symbol]); + } else { + /* Only happens with the initial(automatic) rule */ + return ("nonterminal"+symbol); + } + } else { + return ("\""+symbol+"\""); + } +} + +private void add_rule(int nt, string colon, array(mixed) symbols, string action) +{ + if (action == ";") { + action = 0; + } + if ((action) && (master)) { + if (!master[action]) { + werror(sprintf("Warning: Missing action %s\n", action)); + + error |= ERROR_MISSING_ACTION; + } else if (!functionp(master[action])) { + werror(sprintf("Warning: \"%s\" is not a function in object\n", + action)); + + error |= ERROR_BAD_ACTION; + } else { + g->add_rule(rule(nt, symbols, master[action])); + return; + } + } + g->add_rule(rule(nt, symbols, action)); +} + +void create() +{ + _parser->set_symbol_to_string(internal_symbol_to_string); + + _parser->verbose = 0; + + _parser->add_rule(rule(0, ({ 1, "" }), 0)); /* translation_unit */ + _parser->add_rule(rule(1, ({ 2 }), 0)); /* directives */ + _parser->add_rule(rule(1, ({ 1, 2 }), 0)); /* directives */ + _parser->add_rule(rule(2, ({ 3 }), 0)); /* directive */ + _parser->add_rule(rule(2, ({ 4 }), 0)); /* directive */ + _parser->add_rule(rule(3, ({ "%token", 6, ";" }), + add_tokens)); /* declaration */ + _parser->add_rule(rule(3, ({ "%left", 11, 6, ";" }), + set_left_tokens)); /* declaration */ + _parser->add_rule(rule(4, ({ 9, ":", 5, ";" }), + add_rule)); /* rule */ + _parser->add_rule(rule(4, ({ 9, ":", 5, 8, ";" }), + add_rule)); /* rule */ + _parser->add_rule(rule(5, ({}), + lambda () { + return ({}); } )); /* symbols */ + _parser->add_rule(rule(5, ({ 5, 7 }), + lambda (array x, mixed|void y) { + if (y) { return (x + ({ y })); } + else { return (x); }} )); /* symbols */ + _parser->add_rule(rule(6, ({ 10 }), + lambda (string x) { + return ({ x }); } )); /* terminals */ + _parser->add_rule(rule(6, ({ 6, 10 }), + lambda (array(string) x, string y) { + return (x + ({ y })); } )); /* terminals */ + _parser->add_rule(rule(7, ({ 9 }), 0 )); /* symbol */ + _parser->add_rule(rule(7, ({ 10 }), 0 )); /* symbol */ + _parser->add_rule(rule(8, ({ "{", "identifier", "}" }), + lambda (mixed brace_l, string id, mixed brace_r) { + return (id); } )); /* action */ + _parser->add_rule(rule(8, ({ "{", "string", "}" }), + lambda (mixed brace_l, string str, mixed brace_r) { + werror(sprintf("Warning: Converting string \"%s\" " + "to identifier\n", str)); + return(str); } )); /* action */ + _parser->add_rule(rule(9, ({ "identifier" }), + add_nonterminal)); /* nonterminal */ + _parser->add_rule(rule(10, ({ "string" }), 0)); /* terminal */ + _parser->add_rule(rule(11, ({ "(", "int", ")" }), + lambda (mixed paren_l, int val, mixed paren_r) { + return (val); + } )); /* priority */ + _parser->add_rule(rule(11, ({}), 0)); /* priority */ + + _parser->compile(); +} + +object(parser) make_parser(string str, object|void m) +{ + object(parser) res = 0; + + master = m; + error = 0; /* No errors yet */ + + g = parser(); + + scanner->str = str; + scanner->pos = 0; + + g->set_symbol_to_string(symbol_to_string); + + id_stack = Stack.stack(); + + nonterminal_lookup = ([]); + +#ifdef DEBUG + _parser->verbose = 1; + g->verbose = 1; +#else + g->verbose = 0; +#endif /* DEBUG */ + + /* Default rule -- Will never be reduced */ + id_stack->push("Translation Unit"); /* Nonterminal #0 -- Start symbol */ + g->add_rule(rule(0, ({ 1, "" }), 0)); /* Rule #0 -- Start rule */ + + _parser->parse(scanner->scan); + + if ((!_parser->error) && + (!error) && + (g->compile())) { + res = g; + } + + g = 0; /* Don't keep any references */ + + return (res); +} + +/* + * Syntax-checks and compiles the grammar files + */ +int main(int argc, string *argv) +{ + if (argc == 1) { + werror(sprintf("Usage:\n\t%s <files>\n", argv[0])); + } else { + int i; + + for (i=1; i < argc; i++) { + object(FILE) f = FILE(); + object(parser) g; + f->open(argv[i], "r"); + werror(sprintf("Compiling \"%s\"...\n", argv[i])); + g = make_parser(f->read(0x7fffffff)); + if (error) { + werror("Compilation failed\n"); + } else { + werror("Compilation done\n"); + } + f->close(); + } + } +} diff --git a/lib/modules/LR.pmod/item.pike b/lib/modules/LR.pmod/item.pike new file mode 100644 index 0000000000000000000000000000000000000000..51f06d3f198cc0a9daeaf010d38089e21e924b78 --- /dev/null +++ b/lib/modules/LR.pmod/item.pike @@ -0,0 +1,28 @@ +/* + * $Id: item.pike,v 1.1 1997/03/03 23:50:15 grubba Exp $ + * + * An LR(0) item + * + * Henrik Grubbström 1996-11-27 + */ + +import LR; + +/* constant kernel = (program)"kernel"; */ +/* constant item = (program)"item"; */ + +/* The rule */ +object(rule) r; +/* How long into the rule the parsing has come */ +int offset; +/* The state we will get if we shift */ +object /* (kernel) */ next_state; +/* Item representing this one (used for shifts) */ +object /* (item) */ master_item = 0; +/* Look-ahead set for this item */ +multiset(string) direct_lookahead = (<>); +multiset(string) error_lookahead = (<>); +/* Relation to other items (used when compiling) */ +multiset(object /* (item) */ ) relation = (<>); +/* Depth counter (used when compiling) */ +int counter = 0; diff --git a/lib/modules/LR.pmod/kernel.pike b/lib/modules/LR.pmod/kernel.pike new file mode 100644 index 0000000000000000000000000000000000000000..f7ed03e2bbbb479d67b29bbf29fc490212eb5b74 --- /dev/null +++ b/lib/modules/LR.pmod/kernel.pike @@ -0,0 +1,77 @@ +/* + * $Id: kernel.pike,v 1.1 1997/03/03 23:50:16 grubba Exp $ + * + * Implements a LR(1) state; + * + * Henrik Grubbström 1996-11-25 + */ + +import LR; + +/* Used to check if a rule already has been added when doing closures */ +multiset(object(rule)) rules = (<>); + +/* Contains the items in this state */ +array(object(item)) items = ({}); + +/* Contains the items whose next symbol is this non-terminal */ +mapping(int : multiset(object(item))) symbol_items = ([]); + +/* The action table for this state + * + * object(kernel) SHIFT to this state on this symbol. + * object(rule) REDUCE according to this rule on this symbol. + */ +mapping(int|string : object /* (kernel) */|object(rule)) action = ([]); + +/* + * Functions + */ + +void add_item(object(item) i) +{ + int|string symbol; + + items += ({ i }); + if (i->offset < sizeof(i->r->symbols)) { + symbol = i->r->symbols[i->offset]; + + if (symbol_items[symbol]) { + symbol_items[symbol][i] = 1; + } else { + symbol_items[symbol] = (< i >); + } + } +} + +int equalp(object /* (kernel) */ state) +{ + /* Two states are the same if they contain the same items */ + if (sizeof(state->items) != sizeof(items)) { + return(0); + } + + /* Could probably make it test only kernel items */ + + foreach (state->items, object(item) i) { + if (search(items, i) == -1) { + int found = 0; + + foreach (items, object(item) i2) { + /* Two items are the same if they have the same rule + * and the same offset; + */ + if ((i->offset == i2->offset) && + (i->r == i2->r)) { + found = 1; + break; /* BUG in Pike 0.3 beta */ + } + } + if (!found) { + return(0); + } + } + } + return(1); +} + diff --git a/lib/modules/LR.pmod/lr.pike b/lib/modules/LR.pmod/lr.pike new file mode 100755 index 0000000000000000000000000000000000000000..e786ba12073130836130b28c7a3617e2f9f4ab7d --- /dev/null +++ b/lib/modules/LR.pmod/lr.pike @@ -0,0 +1,211 @@ +#!/home/grubba/src/pike/build/sol2.5/pike + +/* + * $Id: lr.pike,v 1.1 1997/03/03 23:50:17 grubba Exp $ + * + * An LR(1) Parser in Pike + * + * Henrik Grubbström 1996-11-23 + */ + +import LR; + +object(parser) g; + +/* + * Test actions + */ + +int add_values(int x, mixed ignore, int y) +{ + werror(x+" + "+y+" = "+(x+y)+"\n"); + return (x+y); +} + +int mul_values(int x, mixed ignore, int y) +{ + werror(x+" * "+y+" = "+(x*y)+"\n"); + return (x*y); +} + +int get_second_value(mixed ignored, int x, mixed ... ignored_also) +{ + return(x); +} + +int concat_values(int x, int y) +{ + return (x*10 + y); +} + +int make_value(string s) +{ + return((int)s); +} + +/* + * Test grammar + */ + +array(string) nonterminals = ({ +#if 0 + "S", "A", "B", +#else + "E'", "E", "T", "F", "id", "value", +#endif +}); + +array(array(string|int)) g_init = ({ +#if 0 + ({ 0, 1, "" }), + ({ 1, 2, 1 }), + ({ 1 }), + ({ 2, "a", 2 }), + ({ 2, "b" }), +#else + ({ 0, 1, "" }), + ({ 1, 1, "+", 2 }), + ({ 1, 2 }), + ({ 2, 2, "*", 3 }), + ({ 2, 3 }), + ({ 3, "(", 1, ")" }), + ({ 3, 4 }), + ({ 4, 5 }), + ({ 4, 4, 5 }), + ({ 5, "0" }), + ({ 5, "1" }), + ({ 5, "2" }), + ({ 5, "3" }), + ({ 5, "4" }), + ({ 5, "5" }), + ({ 5, "6" }), + ({ 5, "7" }), + ({ 5, "8" }), + ({ 5, "9" }), +#endif +}); + +array(int|function(mixed ...:mixed)) action_init = ({ + 0, + add_values, + 0, + mul_values, + 0, + get_second_value, + 0, + 0, + concat_values, + make_value, + make_value, + make_value, + make_value, + make_value, + make_value, + make_value, + make_value, + make_value, + make_value, +}); + +/* + * Test action + */ +string a_init(string ... args) +{ + if (sizeof(args)) { + werror(sprintf("Reducing %s => \"%s\"\n", + map(args, g->symbol_to_string) * ", ", + args * "")); + return (`+(@args)); + } else { + /* Empty rule */ + werror("Reducing /* empty */ => \"\"\n"); + return(""); + } +} + +string symbol_to_string(int|string symbol) +{ + if (intp(symbol)) { + if (symbol < sizeof(nonterminals)) { + return(nonterminals[symbol]); + } else { + return("nonterminal"+symbol); + } + } else { + return("\""+symbol+"\""); + } +} + +void create() +{ + g = parser(); + + g->symbol_to_string = symbol_to_string; + +#if 0 + foreach (g_init, array(string|int) i) { + g->add_rule(rule(i[0], i[1..], a_init)); + } +#else + foreach (indices(g_init), int i) { + g->add_rule(rule(g_init[i][0], g_init[i][1..], action_init[i])); + } +#endif +} + +class scan { + /* + * Test input + */ + + array(string) s_init = ({ +#if 0 + "a", "a", "a", "b", + "a", "a", "a", "b", + "b", + "a", "a", "b", "a", +#else + "1", "*", "(", "3", "+", "2", ")", "+", "2", "*", "3", +#endif + "", + }); + + int s_pos = 0; + + string scan() + { + return(s_init[s_pos++]); + } +} + +object(scan) scanner = scan(); + +int main(int argc, string *argv) +{ + mixed result; + + werror("Grammar:\n\n" + (string) g); + +#if efun(_memory_usage) + werror(sprintf("Memory usage:\n%O\n", _memory_usage())); +#endif + + werror("Compiling...\n"); + + g->verbose = 0; + + g->compile(); + + werror("Compilation finished!\n"); + +#if efun(_memory_usage) + werror(sprintf("Memory usage:\n%O\n", _memory_usage())); +#endif + + g->scanner=scanner; + + result = g->parse(); + + werror(sprintf("Result of parsing: \"%s\"\n", result + "")); +} diff --git a/lib/modules/LR.pmod/parser.pike b/lib/modules/LR.pmod/parser.pike new file mode 100644 index 0000000000000000000000000000000000000000..d666df7f5fe98d7b72bd0f84dce360b7a013c7fa --- /dev/null +++ b/lib/modules/LR.pmod/parser.pike @@ -0,0 +1,1306 @@ +/* + * $Id: parser.pike,v 1.1 1997/03/03 23:50:18 grubba Exp $ + * + * A BNF-grammar in Pike. + * Compiles to a LALR(1) state-machine. + * + * Henrik Grubbström 1996-11-24 + */ + +/* + * Includes + */ + +#include <array.h> + +/* + * Defines + */ + +/* Errors during parsing */ +/* Unexpected EOF */ +#define ERROR_EOF 1 +/* Syntax error in input */ +#define ERROR_SYNTAX 2 +/* Shift-Reduce or Reduce-Reduce */ +#define ERROR_CONFLICTS 4 +/* Action is missing from action object */ +#define ERROR_MISSING_ACTION 8 +/* Action is not a function */ +#define ERROR_BAD_ACTION_TYPE 16 +/* Action invoked by name, but no object given */ +#define ERROR_NO_OBJECT 32 +/* Scanner not set */ +#define ERROR_NO_SCANNER 64 +/* Missing definition of nonterminal */ +#define ERROR_MISSING_DEFINITION 128 + +/* + * Classes + */ + +class state_queue { + /* + * This is a combined set and queue. + */ + + int head, tail; + array(object(LR.kernel)) arr=allocate(64); + + int|object(LR.kernel) memberp(object(LR.kernel) state) + { + int j; + + for (j = 0; j<tail; j++) { + if (state->equalp(arr[j])) { + return(j); + } + } + return(-1); + } + + object(LR.kernel) push_if_new(object(LR.kernel) state) + { + int index; + + if ((index = memberp(state)) >= 0) { + return (arr[index]); + } else { + if (tail == sizeof(arr)) { + arr += allocate(tail); + } + arr[tail++] = state; + + return(state); + } + } + + int|object(LR.kernel) next() + { + if (head == tail) { + return(0); + } else { + return(arr[head++]); + } + } +} + +import LR; + +/* The grammar itself */ +static private mapping(int|string : array(object(rule))) grammar = ([]); + +/* Priority table for terminal symbols */ +static private mapping(string : object(priority)) operator_priority = ([]); + +static private multiset(mixed) nullable = (< >); + +#if 0 +static private mapping(mixed : multiset(object(rule))) derives = ([]); + +/* Maps from symbol to which rules may start with that symbol */ +static private mapping(mixed : multiset(object(rule))) begins = ([]); +#endif /* 0 */ + + +/* Maps from symbol to which rules use that symbol + * (used for findling nullable symbols) + */ +static private mapping(int : multiset(object(rule))) used_by = ([]); + +static private object(kernel) start_state; + +/* Verbosity level + * 0 - none + * 1 - some + */ +int verbose=1; + +/* Error code */ +int error=0; + +/* Number of next rule (used only for conflict resolving) */ +static private int next_rule_number = 0; + +/* + * Functions + */ + +/* Here are some help functions */ + +/* Several cast to string functions */ + +static private string builtin_symbol_to_string(int|string symbol) +{ + if (intp(symbol)) { + return("nonterminal"+symbol); + } else { + return("\"" + symbol + "\""); + } +} + +static private function(int|string : string) symbol_to_string = builtin_symbol_to_string; + +string rule_to_string(object(rule) r) +{ + array(string) res = ({ symbol_to_string(r->nonterminal), ":\t" }); + + if (sizeof(r->symbols)) { + foreach (r->symbols, int|string symbol) { + res += ({ symbol_to_string(symbol), " " }); + } + } else { + res += ({ "/* empty */" }); + } + return(res * ""); +} + +string item_to_string(object(item) i) +{ + array(string) res = ({ symbol_to_string(i->r->nonterminal), ":\t" }); + + if (i->offset) { + foreach(i->r->symbols[0..i->offset-1], int|string symbol) { + res += ({ symbol_to_string(symbol), " " }); + } + } + res += ({ "· " }); + if (i->offset != sizeof(i->r->symbols)) { + foreach(i->r->symbols[i->offset..], int|string symbol) { + res += ({ symbol_to_string(symbol), " " }); + } + } + if (sizeof(indices(i->direct_lookahead))) { + res += ({ "\t{ ", + map(indices(i->direct_lookahead), symbol_to_string) * ", ", + " }" }); + } + return(res * ""); +} + +string state_to_string(object(kernel) state) +{ + return (map(state->items, item_to_string) * "\n"); +} + +string cast_to_string() +{ + array(string) res = ({}); + + foreach (indices(grammar), int nonterminal) { + res += ({ symbol_to_string(nonterminal) }); + foreach (grammar[nonterminal], object(rule) r) { + res += ({ "\t: " }); + if (sizeof(r->symbols)) { + foreach (r->symbols, int|string symbol) { + res += ({ symbol_to_string(symbol), " " }); + } + } else { + res += ({ "/* empty */" }); + } + res += ({ "\n" }); + } + res += ({ "\n" }); + } + return (res * ""); +} + +mixed cast(string type) +{ + if (type == "string") { + return(cast_to_string()); + } + throw ( ({ "Cast to "+type+" not supported\n", backtrace()[0..-2] }) ); +} + +/* Here come the functions that actually do some work */ + +void set_priority(string terminal, int pri_val) +{ + object(priority) pri; + + if (pri = operator_priority[terminal]) { + pri->value = pri_val; + } else { + operator_priority[terminal] = priority(pri_val, 0); + } +} + +void set_associativity(string terminal, int assoc) +{ + object(priority) pri; + + if (pri = operator_priority[terminal]) { + pri->assoc = assoc; + } else { + operator_priority[terminal] = priority(0, assoc); + } +} + +void set_symbol_to_string(function(int|string:string) s_to_s) +{ + if (s_to_s) { + symbol_to_string = s_to_s; + } else { + symbol_to_string = builtin_symbol_to_string; + } +} + +/* Add a rule to the grammar */ +void add_rule(object(rule) r) +{ + array(object(rule)) rules; + int|string symbol; + + /* DEBUG */ + if (verbose) { + werror("Adding rule: " + rule_to_string(r) + "\n"); + } + + /* !DEBUG */ + + r->number = next_rule_number++; + + /* First add the rule to the grammar */ + if (grammar[r->nonterminal]) { + grammar[r->nonterminal] += ({ r }); + } else { + grammar[r->nonterminal] = ({ r }); + } + + /* Then see if it is nullable */ + if (!r->has_tokens) { + object(Stack.stack) new_nullables = Stack.stack(); + + foreach (r->symbols, symbol) { + if (nullable[symbol]) { + r->num_nonnullables--; + } else { + if (used_by[symbol]) { + if (used_by[symbol][r]) { + /* Only count a symbol once */ + r->num_nonnullables--; + } else { + used_by[symbol][r] = 1; + } + } else { + used_by[symbol] = (< r >); + } + } + } + + if (!(r->num_nonnullables)) { + /* This rule was nullable */ + new_nullables->push(r->nonterminal); + + while (new_nullables->ptr) { + symbol = new_nullables->pop(); + if (verbose) { + werror(sprintf("Nulling symbol %s\n", + symbol_to_string(symbol))); + } + nullable[symbol] = 1; + if (used_by[symbol]) { + foreach (indices(used_by[symbol]), object(rule) r2) { + if (!(--r2->num_nonnullables)) { + new_nullables->push(r2->nonterminal); + } + } + used_by[symbol] = 0; /* No more need for this info */ + } + } + } + } + + /* The info calculated from this point is not at the moment used + * by the compiler + */ +#if 0 + /* Now check for symbols that may begin this rule */ + foreach (r->symbols, symbol) { + if (!stringp(symbol)) { + multiset set = begins[symbol]; + + r->prefix_nonterminals |= (< symbol >); + + if (set) { + set[r] = 1; + } else { + begins[symbol] = (< r >); + } + + if (grammar[symbol]) { + foreach (grammar[symbol], object(rule) r2) { + r->prefix_nonterminals |= r2->prefix_nonterminals; + r->prefix_tokens |= r2->prefix_tokens; + + foreach (indices(r2->prefix_nonterminals), mixed s2) { + set = begins[s2]; + + if (set) { + set[r] = 1; + } else { + begins[s2] = (< r >); + } + } + } + } + if (!nullable[symbol]) { + break; + } + } else { + r->prefix_tokens[symbol] = 1; + break; + } + } + + /* Scan through the rules beginning with this rule's non-terminal */ + if (begins[r->nonterminal]) { + foreach (indices(begins[r->nonterminal]), object(rule) r2) { + r2->prefix_nonterminals |= r->prefix_nonterminals; + r2->prefix_tokens |= r->prefix_tokens; + + /* NOTE: Might want to move values(r->prefixes) out of the loop */ + foreach (values(r->prefix_nonterminals), symbol) { + multiset set = begins[symbol]; + + if (set) { + set[r2] = 1; + } else { + begins[symbol] = (< r2 >); + } + } + } + } +#endif /* 0 */ +} + +/* Here come the functions used by the compiler */ + +static private multiset(int|string) make_goto_set(object(kernel) state) +{ + multiset(int|string) set = (<>); + + foreach (state->items, object(item) i) { + if (i->offset != sizeof(i->r->symbols)) { + set[i->r->symbols[i->offset]] = 1; + } + } + + if (verbose) { + werror(sprintf("make_goto_set()=> (< %s >)\n", + map(indices(set), symbol_to_string) * ", ")); + } + + return (set); +} + +static private void make_closure(object(kernel) state, int nonterminal) +{ + if (grammar[nonterminal]) { + foreach (grammar[nonterminal], object(rule) r) { + if (!(state->rules[r])) { + object(item) new_item = item(); + + new_item->r = r; + new_item->offset = 0; + + state->rules[r] = 1; + + state->add_item(new_item); + + if (sizeof(r->symbols) && intp(r->symbols[0])) { + make_closure(state, r->symbols[0]); + } + } + } + } else { + werror(sprintf("Error: Definition missing for non-terminal %s\n", + symbol_to_string(nonterminal))); + error |= ERROR_MISSING_DEFINITION; + } +} + +static private object(kernel) first_state() +{ + object(kernel) state = kernel(); + + foreach (grammar[0], object(rule) r) { + if (!state->rules[r]) { + object(item) i = item(); + + i->r = r; + i->offset = 0; + + state->add_item(i); + state->rules[r] = 1; /* Since this is an item with offset 0 */ + + if ((sizeof(r->symbols)) && + (intp(r->symbols[0]))) { + make_closure(state, r->symbols[0]); + } + } + } + return(state); +} + +/* + * Contains all states used. + * + * In the queue-part are the states that remain to be compiled. + */ +static private object(state_queue) s_q; + +static private object(kernel) do_goto(object(kernel) state, int|string symbol) +{ + object(kernel) new_state = kernel(); + multiset(object(item)) items; + object(rule) r; + int offset; + + if (verbose) { + werror(sprintf("Performing GOTO on <%s>\n", symbol_to_string(symbol))); + } + + items = state->symbol_items[symbol]; + if (items) { + foreach (indices(items), object(item) i) { + int|string lookahead; + + object(item) new_item = item(); + + offset = i->offset; + + new_item->offset = ++offset; + new_item->r = r = i->r; + + new_state->add_item(new_item); + + if ((offset != sizeof(r->symbols)) && + intp(lookahead = r->symbols[offset])) { + make_closure(new_state, lookahead); + } + } + } + + /* DEBUG */ + + if (verbose) { + werror(sprintf("GOTO on %s generated state:\n%s\n", + symbol_to_string(symbol), + state_to_string(new_state))); + } + + /* !DEBUG */ + + new_state = s_q->push_if_new(new_state); + + if (items) { + foreach (indices(items), object(item) i) { + i->next_state = new_state; + } + } +} + +static private object(Stack.stack) item_stack; + +static private void traverse_items(object(item) i, + function(int:void) conflict_func) +{ + int depth; + + item_stack->push(i); + + i->counter = depth = item_stack->ptr; + + foreach (indices(i->relation), object(item) i2) { + if (!i2->counter) { + traverse_items(i2, conflict_func); + } + if (i->counter > i2->counter) { + i->counter = i2->counter; + } + + i->direct_lookahead |= i2->direct_lookahead; + } + + if (i->number == depth) { + int cyclic = 0; + int empty_cycle = 1; + object(item) i2; + + while ((i2 = item_stack->pop()) != i) { + + i2->number = 0x7fffffff; + + i2->direct_lookahead = i->direct_lookahead; + + cyclic = 1; + empty_cycle &= !(sizeof(i2->error_lookahead)); + } + i->count = 0x7fffffff; + + if (cyclic) { + if (verbose) { + werror(sprintf("Cyclic item\n%s\n", + item_to_string(i))); + } + conflict_func(empty_cycle && !(sizeof(i->error_lookahead))); + } + } +} + +static private void shift_conflict(int empty) +{ + /* Ignored */ +} + +static private void handle_shift_conflicts() +{ + item_stack = Stack.stack(); + + /* Initialize the counter */ + for (int index = 0; index < s_q->tail; index++) { + foreach (s_q->arr[index]->items, object(item) i) { + if ((i->offset != sizeof(i->r->symbols)) && + (intp(i->r->symbols[i->offset])) && + (!i->master_item)) { + /* Nonterminal master item */ + i->counter = 0; + } else { + i->counter = 0x7fffffff; + } + } + } + + for (int index = 0; index < s_q->tail; index++) { + foreach (s_q->arr[index]->items, object(item) i) { + if (!i->number) { + traverse_items(i, shift_conflict); + } + } + } +} + +static private void follow_conflict(int empty) +{ +} + +static private void handle_follow_conflicts() +{ + item_stack = Stack.stack(); + + /* Initialize the counter */ + for (int index = 0; index < s_q->tail; index++) { + foreach (s_q->arr[index]->items, object(item) i) { + if ((i->offset != sizeof(i->r->symbols)) && + (intp(i->r->symbols[i->offset])) && + (!i->master_item)) { + /* Nonterminal master item */ + i->counter = 0; + } else { + i->counter = 0x7fffffff; + } + } + } + + for (int index = 0; index < s_q->tail; index++) { + foreach (s_q->arr[index]->items, object(item) i) { + if (!i->number) { + traverse_items(i, shift_conflict); + } + } + } +} + +static private int go_through(object(kernel) state, object(rule) r, int offset, + object(item) current_item) +{ + int index; + object(item) i, master; + + for (index = 0; index < sizeof(state->items); index++) { + if ((state->items[index]->r == r) && + (state->items[index]->offset == offset)) { + /* Found the index for the current rule and offset */ + break; + } + } + + /* What to do if not found? */ + if (index == sizeof(state->items)) { + werror(sprintf("go_through: item with offset %d in rule\n%s\n" + "not found in state\n%s\n", + offset, rule_to_string(r), state_to_string(state))); + werror(sprintf("Backtrace:\n%s\n", describe_backtrace(backtrace()))); + return(0); + } + + i = state->items[index]; + if (i->master_item) { + master = i->master_item; + } else { + master = i; + } + + if (i->offset < sizeof(i->r->symbols)) { + if (go_through(i->next_state, i->r, i->offset + 1, current_item)) { + /* Nullable */ + if ((master->offset < sizeof(master->r->symbols)) && + (intp(master->r->symbols[master->offset]))) { + /* Don't include ourselves */ + if (master != current_item) { + master->relation[current_item] = 1; + } + } + return(nullable[i->r->symbols[i->offset]]); + } else { + return (0); /* Not nullable */ + } + } else { + /* At end of rule */ + master->relation[current_item] = 1; + return (1); /* Always nullable */ + } +} + +static private int repair(object(kernel) state, multiset(int|string) conflicts) +{ + multiset(int|string) conflict_set = (<>); + + if (verbose) { + werror(sprintf("Repairing conflict in state:\n%s\n" + "Conflicts on (< %s >)\n", + state_to_string(state), + map(indices(conflicts), symbol_to_string) * ", ")); + } + + foreach (indices(conflicts), int|string symbol) { + /* Initialize some vars here */ + int reduce_count = 0; + int shift_count = 0; + int reduce_rest = 0; + int shift_rest = 0; + int only_operators = 1; + object(priority) shift_pri, reduce_pri, pri; + object(rule) min_rule = 0; + int conflict_free; + + /* Analyse the items */ + /* This loses if there are reduce-reduce conflicts, + * or shift-shift conflicts + */ + foreach (state->items, object(item) i) { + if (i->offset == sizeof(i->r->symbols)) { + if (i->direct_lookahead[symbol]) { + /* Reduction */ + reduce_count++; + /* ******************* BUGGY! ********************* + if (i->r->priority) { + reduce_pri = i->r->pri; + } else { + */ + only_operators = 0; + /* + } + */ + if ((!min_rule) || (i->r->number < min_rule->number)) { + min_rule = i->r; + } + } + } else if (!intp(i->r->symbols[i->offset])) { + if (i->r->symbols[i->offset] == symbol) { + /* Shift */ + shift_count++; + + if (operator_priority[symbol]) { + shift_pri = operator_priority[symbol]; + } else { + only_operators = 0; + } + } + } + } + + if (only_operators) { + if (reduce_pri->value > shift_pri->value) { + pri = reduce_pri; + } else { + pri = shift_pri; + } + + foreach (state->items, object(item) i) { + if (i->offset == sizeof(i->r->symbols)) { + if (i->direct_lookahead[symbol]) { + /* *************************** BUGGY PRIORITY HANDLING ****** + if (i->r->pri->value < pri->value) { + if (verbose) { + werror(sprintf("Ignoring reduction of item\n%s\n" + "on lookahead %s (Priority %d < %d)\n", + item_to_string(i), + symbol_to_string(symbol), + i->r->pri->value, pri->value)); + } + i->direct_lookahead[symbol] = 0; + if (!sizeof(indices(i->direct_lookahead))) { + i->direct_lookahead = (<>); + } + } else */ if ((pri->assoc >= 0) && + (shift_pri->value == pri->value)) { + if (verbose) { + werror(sprintf("Ignoring reduction of item\n%s\n" + "on lookahead %s (Right associative)\n", + item_to_string(i), + symbol_to_string(symbol))); + } + i->direct_lookahead[symbol] = 0; + if (!sizeof(indices(i->direct_lookahead))) { + i->direct_lookahead = (<>); + } + } else { + if (verbose) { + werror(sprintf("Kept item\n%s\n" + "on lookahead %s\n", + item_to_string(i), + symbol_to_string(symbol))); + } + reduce_rest++; + } + } else if (i->r->symbols[i->offset] == symbol) { + /* Shift */ + if (shift_pri->value < pri->value) { + /************* BUGGY PRIORITY HANDLING *********** + if (verbose) { + werror(sprintf("Ignoring shift on item\n%s\n" + "on lookahead %s (Priority %d < %d)\n", + item_to_string(i), + symbol_to_string(symbol), + i->r->pri->value, pri->value)); + } + */ + i->direct_lookahead = (<>); + } else if ((pri->assoc <= 0) && + (reduce_pri->value == pri->value)) { + if (verbose) { + werror(sprintf("Ignoring shift on item\n%s\n" + "on lookahead %s (Left associative)\n", + item_to_string(i), + symbol_to_string(symbol))); + } + i->direct_lookahead = (<>); + } else { + if (verbose) { + werror(sprintf("Kept item\n%s\n" + "on lookahead %s\n", + item_to_string(i), + symbol_to_string(symbol))); + } + shift_rest++; + } + } + } + } + } else { + /* Not only operators */ + if (shift_count) { + /* Prefer shifts */ + foreach (state->items, object(item) i) { + if (i->offset == sizeof(i->r->symbols)) { + /* Reduction */ + if (i->direct_lookahead[symbol]) { + if (verbose) { + werror(sprintf("Ignoring reduction on item\n%s\n" + "on lookahead %s (can shift)\n", + item_to_string(i), + symbol_to_string(symbol))); + } + i->direct_lookahead[symbol] = 0; + if (!sizeof(indices(i->direct_lookahead))) { + i->direct_lookahead = (<>); + } + } + } else { + /* Shift */ + if (i->r->symbols[i->offset] == symbol) { + if (verbose) { + werror(sprintf("Kept item\n%s\n" + "on lookahead (shift)%s\n", + item_to_string(i), + symbol_to_string(symbol))); + } + shift_rest++; + } + } + } + } else { + /* Select the first reduction */ + foreach (state->items, object(item) i) { + if (i->r == min_rule) { + if (verbose) { + werror(sprintf("Kept item\n%s\n" + "on lookahead %s (first rule)\n", + item_to_string(i), + symbol_to_string(symbol))); + } + reduce_rest++; + } else { + if (verbose) { + werror(sprintf("Ignoring reduction on item\n%s\n" + "on lookahead %s (not first rule)\n", + item_to_string(i), + symbol_to_string(symbol))); + } + i->direct_lookahead[symbol] = 0; + if (!sizeof(indices(i->direct_lookahead))) { + i->direct_lookahead = (<>); + } + } + } + } + } + + conflict_free = 0; + + if (reduce_rest > 1) { + if (shift_rest) { + werror(sprintf("Error: Shift-Reduce-Reduce conflict on lookahead %s\n", + symbol_to_string(symbol))); + } else { + werror(sprintf("Error: Reduce-Reduce conflict on lookahead %s\n", + symbol_to_string(symbol))); + } + } else if (reduce_rest) { + if (shift_rest) { + werror(sprintf("Error: Shift-Reduce conflict on lookahead %s\n", + symbol_to_string(symbol))); + } else { + /* REDUCE + * + * No other rule left -- conflict resolved! + */ + conflict_free = 1; + } + } else { + /* SHIFT + * + * All reductions removed -- conflict resolved! + */ + conflict_free = 1; + } + if (conflict_free) { + if (reduce_count > 1) { + if (shift_count) { + if (only_operators) { + werror(sprintf("Repaired Shift-Reduce-Reduce conflict on %s\n", + symbol_to_string(symbol))); + } else { + werror(sprintf("Warning: Repaired Shift-Reduce-Reduce conflict on %s\n", + symbol_to_string(symbol))); + } + } else { + if (only_operators) { + werror(sprintf("Repaired Reduce-Reduce conflict on %s\n", + symbol_to_string(symbol))); + } else { + werror(sprintf("Warning: Repaired Reduce-Reduce conflict on %s\n", + symbol_to_string(symbol))); + } + } + } else if (reduce_count) { + if (shift_count) { + if (only_operators) { + werror(sprintf("Repaired Shift-Reduce conflict on %s\n", + symbol_to_string(symbol))); + } else { + werror(sprintf("Warning: Repaired Shift-Reduce conflict on %s\n", + symbol_to_string(symbol))); + } + } else { + /* No conflict */ + werror(sprintf("No conflict on symbol %s (Plain REDUCE)\n", + symbol_to_string(symbol))); + } + } else { + /* No conflict */ + werror(sprintf("No conflict on symbol %s (SHIFT)\n", + symbol_to_string(symbol))); + } + + } else { + /* Still conflicts left on this symbol */ + conflict_set[symbol] = 1; + } + } + + if (sizeof(indices(conflict_set))) { + werror(sprintf("Still conflicts remaining in state\n%s\n" + "on symbols (< %s >)\n", + state_to_string(state), + map(indices(conflict_set), symbol_to_string) * ", ")); + return (ERROR_CONFLICTS); + } else { + if (verbose) { + werror("All conflicts removed!\n"); + } + return (0); + } +} + +int compile() +{ + int error = 0; /* No error yet */ + int state_no = 0; /* DEBUG INFO */ + object(kernel) state; + multiset(int|string) symbols, conflicts; + + s_q = state_queue(); + s_q->push_if_new(first_state()); + + /* First make LR(0) states */ + + while (state = s_q->next()) { + + if (verbose) { + werror(sprintf("Compiling state %d:\n%s", state_no++, + state_to_string(state) + "\n")); + } + + /* Probably better implemented as a stack */ + foreach (indices(make_goto_set(state)), int|string symbol) { + do_goto(state, symbol); + } + } + + /* Compute nullables */ + /* Done during add_rule */ + if (verbose) { + werror(sprintf("Nullable nonterminals: (< %s >)\n", + map(indices(nullable), symbol_to_string) * ", ")); + } + + /* Mark Transition and Reduction master items */ + for (int index = 0; index < s_q->tail; index++) { + mapping(int|string : object(item)) master_item =([]); + + foreach (s_q->arr[index]->items, object(item) i) { + if (i->offset < sizeof(i->r->symbols)) { + /* This is not a reduction item, which represent themselves */ + int|string symbol = i->r->symbols[i->offset]; + + if (!(i->master_item = master_item[symbol])) { + master_item[symbol] = i; + } + } + } + } + + /* Probably OK so far */ + + /* Calculate look-ahead sets (DR and relation) */ + for (int index = 0; index < s_q->tail; index++) { + foreach (s_q->arr[index]->items, object(item) i) { + if ((!i->master_item) && (i->offset != sizeof(i->r->symbols)) && + (intp(i->r->symbols[i->offset]))) { + /* This is a non-terminal master item */ + foreach (i->next_state->items, object(item) i2) { + int|string symbol; + + if (!i2->master_item) { + /* Master item */ + if (i2->offset != sizeof(i2->r->symbols)) { + if (intp(symbol = i2->r->symbols[i2->offset])) { + if (nullable[symbol]) { + /* Add the item to the look-ahead relation set */ + i->relation[i2] = 1; + } + } else { + /* Add the string to the direct look-ahead set (DR) */ + i->direct_lookahead[symbol] = 1; + } + } + } + } + } + } + } + + /* Handle SHIFT-conflicts */ + handle_shift_conflicts(); + + /* Check the shift sets */ + /* (Is this needed?) + * Yes - initializes error_lookahead + */ + for (int index = 0; index < s_q->tail; index++) { + foreach (s_q->arr[index]->items, object(item) i) { + if ((!i->master_item) && + (i->offset != sizeof(i->r->symbols)) && + (intp(i->r->symbols[i->offset]))) { + i->error_lookahead = copy_value(i->direct_lookahead); + } + } + } + + /* Compute lookback-sets */ + for (int index = 0; index < s_q->tail; index++) { + foreach (s_q->arr[index]->items, object(item) transition) { + int|string symbol; + + if ((transition->offset != sizeof(transition->r->symbols)) && + (intp(symbol = transition->r->symbols[transition->offset])) && + (!transition->master_item)) { + /* NonTerminal and master item*/ + + /* Find items which can reduce to the nonterminal from above */ + foreach (s_q->arr[index]->items, object(item) i2) { + if ((!i2->offset) && + (i2->r->nonterminal == symbol)) { + if (sizeof(i2->r->symbols)) { + if (go_through(i2->next_state, i2->r, i2->offset+1, + transition)) { + /* Nullable */ + object(item) master = i2; + if (i2->master_item) { + master = i2->master_item; + } + /* Is this a nonterminal transition? */ + if ((master->offset != sizeof(master->r->symbols)) && + (intp(master->r->symbols[master->offset]))) { + /* Don't include ourselves */ + if (master != transition) { + master->relation[transition] = 1; + } + } + } + } else { + i2->relation[transition] = 1; + } + } + } + } + } + } + + /* Handle follow-conflicts */ + handle_follow_conflicts(); + + /* Compute the lookahead (LA) */ + for (int index = 0; index < s_q->tail; index++) { + foreach (s_q->arr[index]->items, object(item) i) { + if (i->offset == sizeof(i->r->symbols)) { + /* Reduction item (always a master item) */ + + /* Calculate Look-ahead for all items in look-back set */ + + foreach (indices(i->relation), object(item) lookback) { + /* Add Follow(i2) to the lookahead-set */ + + i->direct_lookahead |= lookback->direct_lookahead; + } + } + } + } + + /* Probably OK from this point onward */ + + /* Check for conflicts */ + for (int index = 0; index < s_q->tail; index++) { + object(kernel) state = s_q->arr[index]; + + conflicts = (<>); + symbols = (<>); + + foreach (state->items, object(item) i) { + if (i->offset == sizeof(i->r->symbols)) { + /* Reduction */ + conflicts |= i->direct_lookahead & symbols; + symbols |= i->direct_lookahead; + } else if (!i->master_item) { + string|int symbol; + + /* Only master items, since we get Shift-Shift conflicts otherwise */ + + if (!intp(symbol = i->r->symbols[i->offset])) { + /* Shift on terminal */ + if (symbols[symbol]) { + conflicts[symbol] = 1; + } else { + symbols[symbol] = 1; + } + } + } + } + if (sizeof(conflicts)) { + /* Repair conflicts */ + error = repair(state, conflicts); + } else if (verbose) { + werror(sprintf("No conflicts in state:\n%s\n", + state_to_string(s_q->arr[index]))); + } + } + + /* Compile action tables */ + for (int index = 0; index < s_q->tail; index++) { + object(kernel) state = s_q->arr[index]; + + state->action = ([]); + + foreach (state->items, object(item) i) { + if (i->next_state) { + /* SHIFT */ + state->action[i->r->symbols[i->offset]] = i->next_state; + } else { + foreach (indices(i->direct_lookahead), int|string symbol) { + state->action[symbol] = i->r; + } + } + } + } + start_state = s_q->arr[0]; + + return (error); +} + +mixed parse(function(void:string|array(string|mixed)) scanner, + void|object action_object) +{ + object(Stack.stack) value_stack = Stack.stack(); + object(Stack.stack) state_stack = Stack.stack(); + object(kernel) state = start_state; + + string input; + mixed value; + + error = 0; /* No parse error yet */ + + if (!scanner || !functionp(scanner)) { + werror("parser->parse(): scanner not set!\n"); + error = ERROR_NO_SCANNER; + return(0); + } + + value = scanner(); + + if (arrayp(value)) { + input = value[0]; + value = value[1]; + } else { + input = value; + } + + while (1) { + mixed a = state->action[input]; + + if (object_program(a) == rule) { + /* REDUCE */ + function (mixed ...:mixed) func = 0; + + if (verbose) { + werror(sprintf("Reducing according to rule\n%s\n", + rule_to_string(a))); + } + + if (a->action) { + if (functionp(a->action)) { + func = a->action; + } else if (stringp(a->action)) { + if (action_object) { + func = action_object[a->action]; + if (!func) { + werror(sprintf("Missing action \"%s\" in object\n", + a->action)); + error |= ERROR_MISSING_ACTION; + } else if (!functionp(func)) { + werror(sprintf("Bad type (%s) for action \"%s\" in object\n", + typeof(func), a->action)); + error |= ERROR_BAD_ACTION_TYPE; + func = 0; + } + } else { + werror(sprintf("Missing object for action \"%s\"\n", + a->action)); + error |= ERROR_NO_OBJECT; + } + } else { + werror(sprintf("Unsupported action type \"%s\" (%s)\n", + a->action, typeof(a->action))); + error |= ERROR_BAD_ACTION_TYPE; + } + } + if (func) { + if (sizeof(a->symbols)) { + value_stack->push(a->action(@value_stack->pop(sizeof(a->symbols)))); + } else { + value_stack->push(a->action()); + } + } else { + if (sizeof(a->symbols)) { + value_stack->push(value_stack->pop(sizeof(a->symbols))[0]); + } else { + value_stack->push(0); + } + } + if (sizeof(a->symbols)) { + state = state_stack->pop(sizeof(a->symbols))[0]; + } + state_stack->push(state); + state = state->action[a->nonterminal]; /* Goto */ + } else if (a) { + /* SHIFT or ACCEPT */ + if (input == "") { + /* Only the final state is allowed to shift on ""(EOF) */ + /* ACCEPT */ + return(value_stack->pop()); + } + /* SHIFT */ + if (verbose) { + werror(sprintf("Shifting \"%s\", value \"%O\"\n", input, value)); + } + value_stack->push(value); + state_stack->push(state); + state = a; + + value = scanner(); + + if (arrayp(value)) { + input = value[0]; + value = value[1]; + } else { + input = value; + } + } else { + /* ERROR */ + if (input = "") { + /* At end of file */ + error |= ERROR_EOF; + + if (value_stack->ptr != 1) { + if (value_stack->ptr) { + werror(sprintf("Error: Bad state at EOF -- Throwing \"%O\"\n", + value_stack->pop())); + state=state_stack->pop(); + } else { + werror(sprintf("Error: Empty stack at EOF!\n")); + return (0); + } + } else { + werror("Error: Bad state at EOF\n"); + return(value_stack->pop()); + } + } else { + error |= ERROR_SYNTAX; + + werror("Error: Bad input: \""+input+"\"(\""+value+"\")\n"); + + value = scanner(); + + if (arrayp(value)) { + input = value[0]; + value = value[1]; + } else { + input = value; + } + } + } + } +} diff --git a/lib/modules/LR.pmod/priority.pike b/lib/modules/LR.pmod/priority.pike new file mode 100644 index 0000000000000000000000000000000000000000..abe9e1d4e8aba09f1345e7476c4ebe3e0d764fb6 --- /dev/null +++ b/lib/modules/LR.pmod/priority.pike @@ -0,0 +1,24 @@ +/* + * $Id: priority.pike,v 1.1 1997/03/03 23:50:19 grubba Exp $ + * + * Rule priority specification + * + * Henrik Grubbström 1996-12-05 + */ + +/* Priority value */ +int value; + +/* Associativity + * + * -1 - left + * 0 - none + * +1 - right + */ +int assoc; + +void create(int p, int a) +{ + value = p; + assoc = a; +} diff --git a/lib/modules/LR.pmod/rule.pike b/lib/modules/LR.pmod/rule.pike new file mode 100644 index 0000000000000000000000000000000000000000..4952dca5d3571e21e7677492607f3d1ed347bb1c --- /dev/null +++ b/lib/modules/LR.pmod/rule.pike @@ -0,0 +1,60 @@ +/* + * $Id: rule.pike,v 1.1 1997/03/03 23:50:21 grubba Exp $ + * + * A BNF-rule. + * + * Henrik Grubbström 1996-11-24 + */ + +/* + * Object variables + */ + +/* Nonterminal this rule reduces to */ +int nonterminal; +/* The actual rule */ +array(string|int) symbols; +/* Action to do when reducing this rule + * + * function - call this function + * string - call this function by name in the object given to the parser + */ +function|string action; + +/* Variables used when compiling */ + +/* This rule contains tokens */ +int has_tokens = 0; +/* This rule has this many nonnullable symbols at the moment */ +int num_nonnullables = 0; + +/* +multiset(int) prefix_nonterminals = (<>); +multiset(string) prefix_tokens = (<>); +*/ + +/* Number of this rule (used for conflict resolving) */ +int number = 0; + +/* + * Functions + */ + +void create(int nt, array(string|int) r, function|string|void a) +{ + mixed symbol; + + nonterminal = nt; + symbols = r; + action = a; + + foreach (r, symbol) { + if (stringp(symbol)) { + has_tokens = 1; + break; + } + } + + num_nonnullables = sizeof(r); +} + diff --git a/lib/modules/LR.pmod/scanner.pike b/lib/modules/LR.pmod/scanner.pike new file mode 100644 index 0000000000000000000000000000000000000000..730f8bada5d55d6b3dfccb1eb6773e5653f3c9f2 --- /dev/null +++ b/lib/modules/LR.pmod/scanner.pike @@ -0,0 +1,8 @@ +/* + * $Id: scanner.pike,v 1.1 1997/03/03 23:50:22 grubba Exp $ + * + * Scanner using reg-exps. + * + * Henrik Grubbström 1996-12-16 + */ +