Skip to content
Snippets Groups Projects
Commit 705daf5e authored by Henrik (Grubba) Grubbström's avatar Henrik (Grubba) Grubbström
Browse files

First version of LR(1) parser module.

Rev: lib/modules/LR.pmod/Grammar_parser.pmod:1.1
Rev: lib/modules/LR.pmod/item.pike:1.1
Rev: lib/modules/LR.pmod/kernel.pike:1.1
Rev: lib/modules/LR.pmod/lr.pike:1.1
Rev: lib/modules/LR.pmod/parser.pike:1.1
Rev: lib/modules/LR.pmod/priority.pike:1.1
Rev: lib/modules/LR.pmod/rule.pike:1.1
Rev: lib/modules/LR.pmod/scanner.pike:1.1
parent e412c804
No related branches found
No related tags found
No related merge requests found
......@@ -10,6 +10,14 @@ testfont binary
# Remove the corresponding line before committing
# changes to these files.
/lib/include/sql.pre.pike foreign_ident
/lib/modules/LR.pmod/Grammar_parser.pmod foreign_ident
/lib/modules/LR.pmod/item.pike foreign_ident
/lib/modules/LR.pmod/kernel.pike foreign_ident
/lib/modules/LR.pmod/lr.pike foreign_ident
/lib/modules/LR.pmod/parser.pike foreign_ident
/lib/modules/LR.pmod/priority.pike foreign_ident
/lib/modules/LR.pmod/rule.pike foreign_ident
/lib/modules/LR.pmod/scanner.pike foreign_ident
/src/backend.c foreign_ident
/src/builtin_functions.c foreign_ident
/src/configure.in foreign_ident
......
#!/home/grubba/src/pike/build/sol2.5/pike
/*
* $Id: Grammar_parser.pmod,v 1.1 1997/03/03 23:50:14 grubba Exp $
*
* Generates a parser from a textual specification.
*
* Henrik Grubbstrm 1996-12-06
*/
/*
* Includes
*/
#include <stdio.h>
/*
* Defines
*/
/* #define DEBUG */
/* Errors during parsing */
/* Action for rule is missing from master object */
#define ERROR_MISSING_ACTION 256
/* Action for rule is not a function */
#define ERROR_BAD_ACTION 512
import LR;
private object(parser) _parser = parser();
/*
* Scanner
*/
class scan {
string str = "";
int pos;
array|string scan()
{
while (1) {
if (pos >= sizeof(str)) {
/* EOF */
return("");
} else {
int start=pos++;
switch (str[start]) {
case '%':
/* Token */
while ((pos < sizeof(str)) &&
((('A' <= str[pos]) && ('Z' >= str[pos])) ||
(('a' <= str[pos]) && ('z' >= str[pos])))) {
pos++;
}
return (lower_case(str[start..pos-1]));
case '\"':
/* String */
while ((pos < sizeof(str)) &&
(str[pos] != '\"')) {
if (str[pos] == '\\') {
pos++;
}
pos++;
}
if (pos < sizeof(str)) {
pos++;
} else {
pos = sizeof(str);
}
if (str != pos-2) {
return ({ "string", str[start+1..pos-2] });
}
/* Throw away empty strings (EOF) */
break;
case '/':
/* Comment */
if (str[pos] != '*') {
werror(sprintf("Bad token \"/%c\" in input\n", str[pos]));
break;
}
pos++;
while (1) {
if ((++pos >= sizeof(str)) ||
(str[pos-1 .. pos] == "*/")) {
pos++;
break;
}
}
break;
case '(':
return("(");
case ')':
return(")");
case '{':
return("{");
case '}':
return("}");
case ':':
return(":");
case ';':
return(";");
case '\n':
case '\r':
case ' ':
case '\t':
/* Whitespace */
break;
case '0'..'9':
/* int */
while ((pos < sizeof(str)) &&
('0' <= str[pos]) && ('9' >= str[pos])) {
pos++;
}
return (({ "int", (int)str[start .. pos-1] }));
default:
/* Identifier */
while ((pos < sizeof(str)) &&
((('A' <= str[pos]) && ('Z' >= str[pos])) ||
(('a' <= str[pos]) && ('z' >= str[pos])) ||
(('0' <= str[pos]) && ('9' >= str[pos])) ||
('_' == str[pos]) || (str[pos] >= 128))) {
pos++;
}
return (({ "identifier", str[start .. pos-1] }));
}
}
}
}
}
private object(scan) scanner = scan();
private array(string) nonterminals = ({
"translation_unit",
"directives",
"directive",
"declaration",
"rule",
"symbols",
"terminals",
"symbol",
"action",
"nonterminal",
"terminal",
"priority",
});
private object(Stack.stack) id_stack = Stack.stack();
private mapping(string:int) nonterminal_lookup = ([]);
private object(parser) g=parser();
private object master;
int error;
private int add_nonterminal(string id)
{
int nt = nonterminal_lookup[id];
if (!nt) {
nt = nonterminal_lookup[id] = id_stack->ptr;
id_stack->push(id);
}
return(nt);
}
private void add_tokens(array(string) tokens)
{
/* NOOP */
#if 0
if (sizeof(tokens)) {
map(tokens, add_token);
}
#endif /* 0 */
}
private void set_left_tokens(string ignore, int pri_val, array(string) tokens)
{
foreach (tokens, string token) {
g->set_associativity(token, -1); /* Left associative */
g->set_priority(token, pri_val);
}
}
private string internal_symbol_to_string(int|string symbol)
{
if (intp(symbol)) {
return (nonterminals[symbol]);
} else {
return ("\"" + symbol + "\"");
}
}
private string symbol_to_string(int|string symbol)
{
if (intp(symbol)) {
if (symbol < id_stack->ptr) {
return (id_stack->arr[symbol]);
} else {
/* Only happens with the initial(automatic) rule */
return ("nonterminal"+symbol);
}
} else {
return ("\""+symbol+"\"");
}
}
private void add_rule(int nt, string colon, array(mixed) symbols, string action)
{
if (action == ";") {
action = 0;
}
if ((action) && (master)) {
if (!master[action]) {
werror(sprintf("Warning: Missing action %s\n", action));
error |= ERROR_MISSING_ACTION;
} else if (!functionp(master[action])) {
werror(sprintf("Warning: \"%s\" is not a function in object\n",
action));
error |= ERROR_BAD_ACTION;
} else {
g->add_rule(rule(nt, symbols, master[action]));
return;
}
}
g->add_rule(rule(nt, symbols, action));
}
void create()
{
_parser->set_symbol_to_string(internal_symbol_to_string);
_parser->verbose = 0;
_parser->add_rule(rule(0, ({ 1, "" }), 0)); /* translation_unit */
_parser->add_rule(rule(1, ({ 2 }), 0)); /* directives */
_parser->add_rule(rule(1, ({ 1, 2 }), 0)); /* directives */
_parser->add_rule(rule(2, ({ 3 }), 0)); /* directive */
_parser->add_rule(rule(2, ({ 4 }), 0)); /* directive */
_parser->add_rule(rule(3, ({ "%token", 6, ";" }),
add_tokens)); /* declaration */
_parser->add_rule(rule(3, ({ "%left", 11, 6, ";" }),
set_left_tokens)); /* declaration */
_parser->add_rule(rule(4, ({ 9, ":", 5, ";" }),
add_rule)); /* rule */
_parser->add_rule(rule(4, ({ 9, ":", 5, 8, ";" }),
add_rule)); /* rule */
_parser->add_rule(rule(5, ({}),
lambda () {
return ({}); } )); /* symbols */
_parser->add_rule(rule(5, ({ 5, 7 }),
lambda (array x, mixed|void y) {
if (y) { return (x + ({ y })); }
else { return (x); }} )); /* symbols */
_parser->add_rule(rule(6, ({ 10 }),
lambda (string x) {
return ({ x }); } )); /* terminals */
_parser->add_rule(rule(6, ({ 6, 10 }),
lambda (array(string) x, string y) {
return (x + ({ y })); } )); /* terminals */
_parser->add_rule(rule(7, ({ 9 }), 0 )); /* symbol */
_parser->add_rule(rule(7, ({ 10 }), 0 )); /* symbol */
_parser->add_rule(rule(8, ({ "{", "identifier", "}" }),
lambda (mixed brace_l, string id, mixed brace_r) {
return (id); } )); /* action */
_parser->add_rule(rule(8, ({ "{", "string", "}" }),
lambda (mixed brace_l, string str, mixed brace_r) {
werror(sprintf("Warning: Converting string \"%s\" "
"to identifier\n", str));
return(str); } )); /* action */
_parser->add_rule(rule(9, ({ "identifier" }),
add_nonterminal)); /* nonterminal */
_parser->add_rule(rule(10, ({ "string" }), 0)); /* terminal */
_parser->add_rule(rule(11, ({ "(", "int", ")" }),
lambda (mixed paren_l, int val, mixed paren_r) {
return (val);
} )); /* priority */
_parser->add_rule(rule(11, ({}), 0)); /* priority */
_parser->compile();
}
object(parser) make_parser(string str, object|void m)
{
object(parser) res = 0;
master = m;
error = 0; /* No errors yet */
g = parser();
scanner->str = str;
scanner->pos = 0;
g->set_symbol_to_string(symbol_to_string);
id_stack = Stack.stack();
nonterminal_lookup = ([]);
#ifdef DEBUG
_parser->verbose = 1;
g->verbose = 1;
#else
g->verbose = 0;
#endif /* DEBUG */
/* Default rule -- Will never be reduced */
id_stack->push("Translation Unit"); /* Nonterminal #0 -- Start symbol */
g->add_rule(rule(0, ({ 1, "" }), 0)); /* Rule #0 -- Start rule */
_parser->parse(scanner->scan);
if ((!_parser->error) &&
(!error) &&
(g->compile())) {
res = g;
}
g = 0; /* Don't keep any references */
return (res);
}
/*
* Syntax-checks and compiles the grammar files
*/
int main(int argc, string *argv)
{
if (argc == 1) {
werror(sprintf("Usage:\n\t%s <files>\n", argv[0]));
} else {
int i;
for (i=1; i < argc; i++) {
object(FILE) f = FILE();
object(parser) g;
f->open(argv[i], "r");
werror(sprintf("Compiling \"%s\"...\n", argv[i]));
g = make_parser(f->read(0x7fffffff));
if (error) {
werror("Compilation failed\n");
} else {
werror("Compilation done\n");
}
f->close();
}
}
}
/*
* $Id: item.pike,v 1.1 1997/03/03 23:50:15 grubba Exp $
*
* An LR(0) item
*
* Henrik Grubbström 1996-11-27
*/
import LR;
/* constant kernel = (program)"kernel"; */
/* constant item = (program)"item"; */
/* The rule */
object(rule) r;
/* How long into the rule the parsing has come */
int offset;
/* The state we will get if we shift */
object /* (kernel) */ next_state;
/* Item representing this one (used for shifts) */
object /* (item) */ master_item = 0;
/* Look-ahead set for this item */
multiset(string) direct_lookahead = (<>);
multiset(string) error_lookahead = (<>);
/* Relation to other items (used when compiling) */
multiset(object /* (item) */ ) relation = (<>);
/* Depth counter (used when compiling) */
int counter = 0;
/*
* $Id: kernel.pike,v 1.1 1997/03/03 23:50:16 grubba Exp $
*
* Implements a LR(1) state;
*
* Henrik Grubbström 1996-11-25
*/
import LR;
/* Used to check if a rule already has been added when doing closures */
multiset(object(rule)) rules = (<>);
/* Contains the items in this state */
array(object(item)) items = ({});
/* Contains the items whose next symbol is this non-terminal */
mapping(int : multiset(object(item))) symbol_items = ([]);
/* The action table for this state
*
* object(kernel) SHIFT to this state on this symbol.
* object(rule) REDUCE according to this rule on this symbol.
*/
mapping(int|string : object /* (kernel) */|object(rule)) action = ([]);
/*
* Functions
*/
void add_item(object(item) i)
{
int|string symbol;
items += ({ i });
if (i->offset < sizeof(i->r->symbols)) {
symbol = i->r->symbols[i->offset];
if (symbol_items[symbol]) {
symbol_items[symbol][i] = 1;
} else {
symbol_items[symbol] = (< i >);
}
}
}
int equalp(object /* (kernel) */ state)
{
/* Two states are the same if they contain the same items */
if (sizeof(state->items) != sizeof(items)) {
return(0);
}
/* Could probably make it test only kernel items */
foreach (state->items, object(item) i) {
if (search(items, i) == -1) {
int found = 0;
foreach (items, object(item) i2) {
/* Two items are the same if they have the same rule
* and the same offset;
*/
if ((i->offset == i2->offset) &&
(i->r == i2->r)) {
found = 1;
break; /* BUG in Pike 0.3 beta */
}
}
if (!found) {
return(0);
}
}
}
return(1);
}
#!/home/grubba/src/pike/build/sol2.5/pike
/*
* $Id: lr.pike,v 1.1 1997/03/03 23:50:17 grubba Exp $
*
* An LR(1) Parser in Pike
*
* Henrik Grubbstrm 1996-11-23
*/
import LR;
object(parser) g;
/*
* Test actions
*/
int add_values(int x, mixed ignore, int y)
{
werror(x+" + "+y+" = "+(x+y)+"\n");
return (x+y);
}
int mul_values(int x, mixed ignore, int y)
{
werror(x+" * "+y+" = "+(x*y)+"\n");
return (x*y);
}
int get_second_value(mixed ignored, int x, mixed ... ignored_also)
{
return(x);
}
int concat_values(int x, int y)
{
return (x*10 + y);
}
int make_value(string s)
{
return((int)s);
}
/*
* Test grammar
*/
array(string) nonterminals = ({
#if 0
"S", "A", "B",
#else
"E'", "E", "T", "F", "id", "value",
#endif
});
array(array(string|int)) g_init = ({
#if 0
({ 0, 1, "" }),
({ 1, 2, 1 }),
({ 1 }),
({ 2, "a", 2 }),
({ 2, "b" }),
#else
({ 0, 1, "" }),
({ 1, 1, "+", 2 }),
({ 1, 2 }),
({ 2, 2, "*", 3 }),
({ 2, 3 }),
({ 3, "(", 1, ")" }),
({ 3, 4 }),
({ 4, 5 }),
({ 4, 4, 5 }),
({ 5, "0" }),
({ 5, "1" }),
({ 5, "2" }),
({ 5, "3" }),
({ 5, "4" }),
({ 5, "5" }),
({ 5, "6" }),
({ 5, "7" }),
({ 5, "8" }),
({ 5, "9" }),
#endif
});
array(int|function(mixed ...:mixed)) action_init = ({
0,
add_values,
0,
mul_values,
0,
get_second_value,
0,
0,
concat_values,
make_value,
make_value,
make_value,
make_value,
make_value,
make_value,
make_value,
make_value,
make_value,
make_value,
});
/*
* Test action
*/
string a_init(string ... args)
{
if (sizeof(args)) {
werror(sprintf("Reducing %s => \"%s\"\n",
map(args, g->symbol_to_string) * ", ",
args * ""));
return (`+(@args));
} else {
/* Empty rule */
werror("Reducing /* empty */ => \"\"\n");
return("");
}
}
string symbol_to_string(int|string symbol)
{
if (intp(symbol)) {
if (symbol < sizeof(nonterminals)) {
return(nonterminals[symbol]);
} else {
return("nonterminal"+symbol);
}
} else {
return("\""+symbol+"\"");
}
}
void create()
{
g = parser();
g->symbol_to_string = symbol_to_string;
#if 0
foreach (g_init, array(string|int) i) {
g->add_rule(rule(i[0], i[1..], a_init));
}
#else
foreach (indices(g_init), int i) {
g->add_rule(rule(g_init[i][0], g_init[i][1..], action_init[i]));
}
#endif
}
class scan {
/*
* Test input
*/
array(string) s_init = ({
#if 0
"a", "a", "a", "b",
"a", "a", "a", "b",
"b",
"a", "a", "b", "a",
#else
"1", "*", "(", "3", "+", "2", ")", "+", "2", "*", "3",
#endif
"",
});
int s_pos = 0;
string scan()
{
return(s_init[s_pos++]);
}
}
object(scan) scanner = scan();
int main(int argc, string *argv)
{
mixed result;
werror("Grammar:\n\n" + (string) g);
#if efun(_memory_usage)
werror(sprintf("Memory usage:\n%O\n", _memory_usage()));
#endif
werror("Compiling...\n");
g->verbose = 0;
g->compile();
werror("Compilation finished!\n");
#if efun(_memory_usage)
werror(sprintf("Memory usage:\n%O\n", _memory_usage()));
#endif
g->scanner=scanner;
result = g->parse();
werror(sprintf("Result of parsing: \"%s\"\n", result + ""));
}
This diff is collapsed.
/*
* $Id: priority.pike,v 1.1 1997/03/03 23:50:19 grubba Exp $
*
* Rule priority specification
*
* Henrik Grubbstrm 1996-12-05
*/
/* Priority value */
int value;
/* Associativity
*
* -1 - left
* 0 - none
* +1 - right
*/
int assoc;
void create(int p, int a)
{
value = p;
assoc = a;
}
/*
* $Id: rule.pike,v 1.1 1997/03/03 23:50:21 grubba Exp $
*
* A BNF-rule.
*
* Henrik Grubbström 1996-11-24
*/
/*
* Object variables
*/
/* Nonterminal this rule reduces to */
int nonterminal;
/* The actual rule */
array(string|int) symbols;
/* Action to do when reducing this rule
*
* function - call this function
* string - call this function by name in the object given to the parser
*/
function|string action;
/* Variables used when compiling */
/* This rule contains tokens */
int has_tokens = 0;
/* This rule has this many nonnullable symbols at the moment */
int num_nonnullables = 0;
/*
multiset(int) prefix_nonterminals = (<>);
multiset(string) prefix_tokens = (<>);
*/
/* Number of this rule (used for conflict resolving) */
int number = 0;
/*
* Functions
*/
void create(int nt, array(string|int) r, function|string|void a)
{
mixed symbol;
nonterminal = nt;
symbols = r;
action = a;
foreach (r, symbol) {
if (stringp(symbol)) {
has_tokens = 1;
break;
}
}
num_nonnullables = sizeof(r);
}
/*
* $Id: scanner.pike,v 1.1 1997/03/03 23:50:22 grubba Exp $
*
* Scanner using reg-exps.
*
* Henrik Grubbstrm 1996-12-16
*/
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment