parser.c

/*
 * $Id: parser.c,v 0.4 1991/09/21 12:04:23 ceder Exp $
 * Copyright (C) 1991  Lysator Academic Computer Association.
 *
 * This file is part of the LysKOM server.
 * 
 * LysKOM is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by 
 * the Free Software Foundation; either version 1, or (at your option) 
 * any later version.
 * 
 * LysKOM is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with LysKOM; see the file COPYING.  If not, write to
 * Lysator, c/o ISY, Linkoping University, S-581 83 Linkoping, SWEDEN,
 * or the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, 
 * MA 02139, USA.
 *
 * Please mail bug reports to bug-lyskom@lysator.liu.se. 
 */
/*
 *  client/parser.c  --  Routines to parse commands
 *
 *
 *  Copyright (C) 1990	Lysator Computer Club,
 *			Linkoping University,  Sweden
 *
 *  Everyone is granted permission to copy, modify and redistribute
 *  this code, provided the people they give it to can.
 *
 *
 *  Author:	Thomas Bellman
 *		Lysator Computer Club
 *		Linkoping University
 *		Sweden
 *
 *  email:	Bellman@Lysator.LiU.SE
 *
 *
 *  Any opinions expressed in this code are the author's PERSONAL opinions,
 *  and does NOT, repeat NOT, represent any official standpoint of Lysator,
 *  even if so stated.
 */


static char *rcsid = "$Id: parser.c,v 0.4 1991/09/21 12:04:23 ceder Exp $";

#include <stddef.h>

#include <s-string.h>
#include <s-collat-tabs.h>

#include <misc-types.h>

#include "parser.h"


#define EXPORT			/* To emphasize export of objects */
#define PRIVATE		static


#if defined (SERVER)
#	include <server/smalloc.h>
#	define	MALLOC	smalloc
#	define	REALLOC	srealloc
#	define	FREE	sfree
#elif defined (CLIENT)
#	include	<zmalloc.h>
#	define	MALLOC	zmalloc
#	define	REALLOC	zrealloc
#	define	FREE	zfree
#else
#	include	<malloc.h>
#	define  MALLOC  malloc
#	define  REALLOC realloc
#	define  FREE	free
#endif


/*
 *  Remove paranthesized "expressions" from the string STR.
 *  E g if STR is "Foo (Bar (vfslck)) Gazonk", then it is reduced
 *  to "Foo _ Gazonk", where '_' is the character in SEPARATOR.
 *  Superflous close paranthesis are disregarded.
 */
EXPORT  void
remove_parenthesis (String	* str,
		    char	  blanker)

{
    String_size		  i;	/* Index in loop */
    int			  nesting_depth;
    

    /* Match parantheses.  Remove text inside parantheses. */
    nesting_depth = 0;
    for ( i = 0 ;  i < s_strlen(*str) ;  i++ )
    {
	if (str->string[i] == '(')
	    nesting_depth++;

	if (str->string[i] == ')')
	{
	    nesting_depth--;
	    str->string[i] = blanker;		/* Don't forget that... */
	    if (nesting_depth < 0)
		nesting_depth = 0;
	}

	if (nesting_depth > 0)
	    str->string[i] = blanker;
    }

    return;
}


/*
 *  Convert a String to a list of tokens (words).  This list is
 *  easier to parse than a string (since the string would have to
 *  be tokenized first anyway).  The last entry is EMPTY_STRING.
 *  Returns NULL if any error occured (couldn't allocate memory).
 *  The result from this function should be freed when it is no longer
 *  used by a FREE(Parse_token *).  Note that String (Parse_token *)[X].word
 *  points into source, and should thus not be freed. 
 */
EXPORT  Parse_token *
tokenize (const String	  source,
	  const String	  separators)

{
    Parse_token		* tokens		= NULL;
    Parse_token		* temp_list;		/* Temporary */
    String		  work_string		= EMPTY_STRING;
    String		  a_token;
    String		  a_temp_token;		/* More temporaries... */
    int			  no_of_tokens;
    int			  list_size;
    String_size		  pos_in_string;
    const int		  chunk_size		= 10;


    /* Copy string to working area */
    if (s_strcpy(&work_string, source) == FAILURE)
    {
	/* Couldn't allocate space for temporary string. */
	return  NULL;
    }

    remove_parenthesis (&work_string, separators.string[0]);

    no_of_tokens = 0;
    list_size = 0;
    pos_in_string = 0;

    while ( ! s_empty(a_token
		      = s_strtok (work_string, &pos_in_string,
				  separators)))
    {
	/* Make the token point into the original source string
	 * instead of the working string			*/
	a_temp_token = s_fsubstr (source,
				  pos_in_string - s_strlen (a_token),
				  pos_in_string - 1);
#if 0
// *	/* Get a real copy of the word */
// *	a_temp_token = EMPTY_STRING;
// *	if (s_strcpy(&a_temp_token, a_token) == FAILURE)
// *	{
// *	    /* Grumble... */
// *	    free_tokens (tokens);
// *	    s_clear(&work_string);
// *	    return  NULL;
// *	}
#endif

	/* Is the allocated list large enough? */
	if (no_of_tokens++ >= list_size)
	{
	    /* No, allocate more */
	    temp_list = REALLOC (tokens, (list_size += chunk_size)
					  * sizeof(Parse_token));
	    if (temp_list == NULL)
	    {
		/* Sigh. Out of memory. */
		free_tokens (tokens);
		s_clear (&work_string);
		return  NULL;
	    }
	    else
	    {
		/* OK, we got what we asked for */
		tokens = temp_list;
	    }
	}

	/* Insert the new token in the list */
	tokens [no_of_tokens-1].word = a_temp_token;
	tokens [no_of_tokens-1].start_in_string =
	    pos_in_string - s_strlen(a_temp_token);
    }
    s_clear (&work_string);

    /* Is there room in the list for the 'stop' element? */
    if (list_size <= no_of_tokens)
    {
	/* No, get some more memory */
	temp_list = REALLOC (tokens, (++list_size) * sizeof(Parse_token));
	if (temp_list == NULL)
	{
	    /* Sigh. Out of memory. */
	    free_tokens (tokens);
	    return  NULL;
	}
	else
	{
	    /* OK, we got what we asked for */
	    tokens = temp_list;
	}
    }

    /* OK, insert the 'stop' element. */
    tokens [no_of_tokens].word = EMPTY_STRING;
    tokens [no_of_tokens].start_in_string = END_OF_STRING;

    return  tokens;
}


/*
 *  Count the number of tokens (words) in TOKEN_LIST.  Used to
 *  set the NUMBER_OF_WORDS field in a 'Matching_info' object.
 */
extern  int
count_words (const Parse_token   * token_list)

{
    int		no_of_words;

    no_of_words = 0;
    while (! s_empty(token_list++ -> word))
	no_of_words++;

    return  no_of_words;
}


/*
 *  Free the list of tokens (// and the strings they are pointing to //).
 *  Free:ing NULL is a no-op.
 */
EXPORT  void
free_tokens (Parse_token   * token_list)

{
    if (token_list != NULL)
    {
	FREE (token_list);
    }
}


/*
 *  Returns the number of the first word of SOURCE that does
 *  not match PATTERN.  A word "foo" in SOURCE matches "foobar"
 *  in PATTERN, but not vice versa.
 */
EXPORT  int
match (Parse_token	* source,
       Parse_token	* pattern,
       char		  collat_tab [ COLLAT_TAB_SIZE ] )

{
    int		word_no;

    word_no = 0;
    while (   (! s_streq (pattern[word_no].word, EMPTY_STRING))
	   && (! s_streq (source[word_no].word, EMPTY_STRING))
	   && (s_usr_strhead (source[word_no].word,
			      pattern[word_no].word,
			      collat_tab)))
    {
	word_no++;
    }

    return  word_no;
}


/*
 *  Searches for a matching string in the table 'match_table'.
 *  Some weird pattern matching is done.
 *  parse().no_of_matches is -1 if an error occured (out of
 *  memory).
 *
 *  What?  You want a description of how it matches?  Forget it!   BUG!
 *  Try for yourself, and you'll find out!
 */

EXPORT  Parse_info
parse (String		  source_string,
       Matching_info	* match_table,
       Bool		  allow_trailing_words,
       Bool		  number_of_words_must_match,
       String		  separators,
       char		  collat_tab [ COLLAT_TAB_SIZE ] )

{
    Parse_info		  answer;
    int			* temp_indexes;
    int			  index;
    int			  size_of_index_list;
    Parse_token		* source_words;
    int			  no_of_source_words;
    int			  first_non_matching;
    int			  best_match;
    int			  highest_priority;

    const int		  chunk_size	= 20;

    
    source_words = tokenize(source_string, separators);
    if (source_words == NULL)
    {
	answer.no_of_matches = -1;
	return  answer;
    }

    no_of_source_words = count_words(source_words);

    /* Check if SOURCE_STRING was empty of words */
    if (no_of_source_words == 0)
    {
	FREE (source_words);
	answer.indexes = MALLOC (1 * sizeof(int));
	if (answer.indexes == NULL)
	{
	    /* Gahh! Someone eats memory! */
	    answer.no_of_matches = -1;
	    return  answer;
	}
	answer.indexes[0] = -1;
	answer.no_of_matches = 1;
	return  answer;
    }


    answer.no_of_matches = 0;
    answer.indexes = NULL;
    size_of_index_list = 0;
    index = -1;
    best_match = 1;			/* At least one word */
    highest_priority = 1;
    while (! s_empty (match_table[++index].name))
    {
	first_non_matching = match (source_words, match_table[index].tokens,
				    collat_tab);

	if (   (    ! allow_trailing_words
		&&  first_non_matching < no_of_source_words)
	    || (    number_of_words_must_match
		&&  first_non_matching != count_words (match_table[index].tokens)))
	{
	    continue;			/* Try next entry in table */
	}

	if (first_non_matching < best_match)
	    continue;			/* Try next entry in table */

	if (    first_non_matching == best_match
	    &&  highest_priority > match_table[index].priority)
	    continue;

	/*  If we reach this far, then we have a match that should be
	 *  inserted in the table.  But if it is a better match than any
	 *  before, then we clear the table first.			*/
	if (    first_non_matching > best_match
	    ||  match_table[index].priority > highest_priority)
	{
	    highest_priority = match_table[index].priority;
	    best_match = first_non_matching;
	    answer.no_of_matches = 0;
	}

	/* Insert the match in the table */

	/* Increase the size if necessary */
	if (answer.no_of_matches >= size_of_index_list)
	{
	    temp_indexes = REALLOC (answer.indexes,
				    (size_of_index_list += chunk_size)
				    * sizeof(answer.indexes));
	    if (temp_indexes == NULL)
	    {
		/* Grumble!  Out of memory. */
		FREE (source_words);
		FREE (answer.indexes);
		answer.no_of_matches = -1;
		return  answer;
	    }

	    answer.indexes = temp_indexes;
	}

	highest_priority = match_table [index].priority;
	answer.indexes[answer.no_of_matches] = index;
	/*  Find out where the arguments start.
	 *  This value should not be used if more than one match is found.
	 */
	/* Special hack needed if no parameters */
	if (s_empty (source_words [first_non_matching].word))
	    answer.arguments = EMPTY_STRING;
	else
	    answer.arguments =
		s_fsubstr(source_string,
			  source_words[first_non_matching].
			  start_in_string,
			  END_OF_STRING);
	answer.no_of_matches++;
    }

    /* All matches found by now */

    /* Strip trailing blanks from the argument */
    if (answer.no_of_matches == 1)
	answer.arguments = s_strip_trailing (answer.arguments, separators);

    FREE (source_words);
    return  answer;

}   /* END: parse() */