diff --git a/Notation.lhs b/Notation.lhs index ad8a4f5445968a16f70da0db0ed0c14db8afbd1b..39739cf3a0bff78d2687452a99f3b57f7e5d0610 100644 --- a/Notation.lhs +++ b/Notation.lhs @@ -30,149 +30,113 @@ letter. both LysType and TypeField \end{itemize} } - -A LysType our notation for our different ``core'' types, -which all in some way hold a TypeField. -\begin{code} -data LysType = SimpleType TypeField - | BitstringType [TypeField] - | EnumType [TypeField] - | StructureType [TypeField] - | SelectionType [TypeField] - | ArrayType TypeField +The \emph{only} possible (non-derived) types are +\begin{code} +data LysType = INT32 | INT16 | INT8 | BOOL | FLOAT | HOLLERITH + | BITSTRING [String] + | ENUMERATION [(String, Int)] + | ENUMERATION_OF String -- must be name of a selection + | STRUCTURE [(String, LysType)] + | SELECTION [(Int, String, (String, LysType))] + | ARRAY LysType deriving (Show) \end{code} -A type field holds all the data we have about a type, -including nested types. And is represented as: -\begin{code} -data TypeField = OnlyType String - | BitStringField String - | StructureField TypeBinding - | EnumField Int String - | SelectionField Int String TypeBinding - | ArrayField TypeField - deriving (Show) -\end{code} - -\subsection{Parsing Types} - -Types come in a few forms. But they all share similarities. - -First out are the simple primitive types. These are INT32, -INT16, INT8, BOOL, FLOAT \& HOLLERITH. They all represent -a number of different (obvious) types, except for HOLLERITH -which is a string (see section \ref{item:holler}). -They can all be parsed with: +This means that the parser for type signatures is simply: \begin{code} -typeWordParser :: GenParser Char () TypeField -typeWordParser = OnlyType <$> word +lystypeParser :: GenParser Char () LysType +lystypeParser = + (try (string "INT8") >> return INT8) + <|> (try (string "INT16") >> return INT16) + <|> (try (string "INT32") >> return INT32) + <|> (try (string "BOOL") >> return BOOL) + <|> (try (string "FLOAT") >> return FLOAT) + <|> (try (string "HOLLERITH") >> return HOLLERITH) -- see section \ref{item:holler}. + <|> (try arrayTypeParser >>= return . ARRAY) + <|> (try bitstringTypeParser >>= return . BITSTRING) + <|> (try selectionParser >>= return . SELECTION) + <|> (try enumTypeParser >>= return . ENUMERATION) + <|> (try enumOfTypeParser >>= return . ENUMERATION_OF) + <|> (try structTypeParser >>= return . STRUCTURE) \end{code} -\hr - \subsubsection{Arrays} -Arrays are a simple type which represents a list of one -type. In the specification they are written as +Arbitrary length lists of a single type. Specified by + \begin{verbatim} ARRAY <type> \end{verbatim} -Where <type> is which types they hold. - -\begin{code} -arrayParser :: GenParser Char () TypeField -arrayParser = ArrayField - <$> (string "ARRAY" - *> whitespaces - *> typeWordParser) -\end{code} - -\hr - -We then have the structure types BISTRINGS, ENUMERATIONS, -SELECTIONS, \& STRUCTURES. -They all have in common that the handle a list of -declarations, surrounded by parenthesis. Therefore we start -by creating a general listParser; Which takes a parser for each -field and returns a list of fields. +and parsed as \begin{code} -listParser :: GenParser Char () TypeField -> GenParser Char () [TypeField] -listParser fieldParser - = withDelim' "()" (many $ withWS fieldParser) +arrayTypeParser :: GenParser Char () LysType +arrayTypeParser = string "ARRAY" *> whitespaces *> lystypeParser \end{code} -Many of them also have a name before the their list. This -parsers takes a parser for the actual list, and checks if -a set string appears before it. - \begin{code} -specialTypeParser - :: String - -> GenParser Char () [TypeField] - -> GenParser Char () [TypeField] -specialTypeParser str subParser = - string str *> whitespaces *> subParser +listParser fieldParser = withDelim' "()" (many $ withWS fieldParser) \end{code} \subsubsection{Bitstring} -Bitstrings are the simplest type. They represent a number -of bits. A sample bistring structure in the BNF could look -like: +List of boolean. Specified as \begin{verbatim} -BITSTRING ( name; - other-name; - ) +BITSTRING ( name; ... ) \end{verbatim} - -Where both `name' and `other-name' declare one bit filed -each, and each tell what that field should contain. It also -implies that this specific bitstring holds exactly two bits, -since it has two fields. +where each name is a descriptive string of that bit. \begin{code} -bitstringFieldParser :: GenParser Char () TypeField -bitstringFieldParser = BitStringField <$> word - <* whitespaces - <* char ';' - -bitstringParser = specialTypeParser "BITSTRING" (listParser bitstringFieldParser) +bitstringTypeParser :: GenParser Char () [String] +bitstringTypeParser = string "BITSTRING" + *> whitespaces + *> listParser (word <* whitespaces <* char ';') \end{code} \subsubsection{Selections} -\TODO{understand Selections} -If I understand correctly selections are a form of type unions. -\footnote{Please correct me} -They declare a <name> <n> mapping which is used for -specifing which type <type> will be used. I don't know why -the <tail> field extists, but it creates a secound name. +Tagged unions. \begin{verbatim} selection ( - <n> = <name> <tail> : <type>; + <n>=<name> <tail> : <type>; ) \end{verbatim} +\begin{quote} +Protocol A: Simple Data Types + +given +\begin{verbatim} +description ::= SELECTION ( + 1=name the_name : HOLLERITH; + 2=age years : INT32; + ) +\end{verbatim} +[the] two legal messages of the type `description' are `1 4HJohn' and `2 18'. +\end{quote} + +<name> and <tail> names only for the reader (of the protocol). + \begin{code} -selectionFieldParser :: GenParser Char () TypeField +selectionFieldParser :: GenParser Char () (Int, String, (String, LysType)) selectionFieldParser - = SelectionField - <$> (intParser <* char '=') - <*> (withWS word) - <*> (bindingParser <* whitespaces <* char ';') + = (,,) + <$> (intParser <* char '=') <*> (withWS word) + <*> structFieldParser <* whitespaces <* char ';' -selectionParser = specialTypeParser "SELECTION" (listParser selectionFieldParser) +selectionParser = string "SELECTION" *> listParser selectionFieldParser \end{code} \subsubsection{Enemurations} -An enumeration works just as expected. It declares a number -of symbols, as well as integer representations for them all. +Named subset of integer, equivalent to C. Can either be declared +directly through the ENUMERATION statement\footnote{ + there actually aren't any ENUMERATION statements in protocol A +}, or derived from a +selection through ENUMERATION-OF. -An example BNF of it would be: +The ``regular'' case looks like \begin{verbatim} ENUMERATION ( name = 1; other = 2; @@ -180,96 +144,49 @@ ENUMERATION ( name = 1; \end{verbatim} \begin{code} -enumFieldParser :: GenParser Char () TypeField -enumFieldParser = flip EnumField - <$> word - <*> withDelim' "=;" intParser +enumFieldParser :: GenParser Char () (String, Int) +enumFieldParser = do + w <- word + string "=" + i <- intParser + whitespaces + string ";" + return (w, i) + +-- (,) <$> word <* char '=' *> intParser <* whitespaces <* char ';' +enumTypeParser = listParser enumFieldParser \end{code} -They can also appear on the form +Second from is \begin{verbatim} ENUMERATION-OF (<selection-type>) \end{verbatim} Which builds an enumeration from the <n> and <name> field in -a selection. See above. - -The parser for the BNF here would be -\footnote{The parser might work. But I can't figure out the types for it.} -\begin{code} --- enumSelectionParser :: GenParser Char () TypeField --- enumSelectionParser = SelectEnumField --- <$> word "ENUMERATION-OF" --- *> withDelim "()" ( typeWordParser --- <|> selectionParser --- <?> "Selection") -\end{code} +a selection. \begin{code} -enumParser = specialTypeParser "ENUMERATION" (listParser enumFieldParser) - -- <|> enumSelectionParser - -- <?> "BNF Enum declaration" -\end{code} - -\TODO{move the following somewhere else.} - -On evaluating the inner selection is expanded, and bound -translated to an enum with. -\begin{code} --- makeEnum :: LysType -> LysType --- makeEnum (SelectionType []) = [] --- makeEnum (SelectionType (s:xs)) --- = EnumField n name : makeEnum xs --- where (SelectionField n name _ _) = s +enumOfTypeParser = string "ENUMERATOIN-OF" *> withDelim "()" word \end{code} \subsubsection{Structures} A sturcture is just a simple compound data type, on the form \begin{verbatim} -( field-name : TYPE; - other-name : TYPE; -) +( field-name : TYPE; ... ) \end{verbatim} -Note here that with my implementation the last semicolon is -optional. This is to better work with how the BNF specifies -RPC requests (see section \ref{item:rpc}) +% Note here that with my implementation the last semicolon is +% optional. This is to better work with how the BNF specifies +% RPC requests (see section \ref{item:rpc}) \begin{code} -structFieldParser :: GenParser Char () TypeField -structFieldParser = StructureField - <$> bindingParser - <* whitespaces - <* ( char ';' - <|> lookAhead (char ')') - <?> "Struct Field End") -\end{code} - -The reason for the \lstinline{lookahead (char ')')} in the -above code is since we want to check for the structure -ending, but don't consume it. Since the actual parsing of -the surrounding parenthisis is done in the ``listParser''. +structFieldParser :: GenParser Char () (String, LysType) +structFieldParser = (,) + <$> (word <* withWS (string ":")) + <*> lystypeParser <* whitespaces <* char ';' -\begin{code} -structParser = listParser structFieldParser -\end{code} - -\hr - -We can now create a general type parser, which juts binds -all our above parsers into one. It also shows that the -reason for LysType existing besides TypeField was so that we -didn't have to wory about a type being single or multiple. - -\begin{code} -typeParser :: GenParser Char () LysType -typeParser = (BitstringType <$> try bitstringParser) - <|> ( EnumType <$> try enumParser) - <|> (SelectionType <$> try selectionParser) - <|> ( ArrayType <$> try arrayParser) - <|> (StructureType <$> try structParser) - <|> ( SimpleType <$> typeWordParser) - <?> "LysType" +structTypeParser :: GenParser Char () [(String, LysType)] +structTypeParser = listParser structFieldParser \end{code} \subsection{Bindings} @@ -295,16 +212,20 @@ bindingParser :: GenParser Char () TypeBinding bindingParser = TypeBinding <$> (word <* withWS (try (string "::=") - <|> string ":" - <?> "Name Type Separator")) - <*> typeParser + <?> "::= expected")) + <*> lystypeParser \end{code} +\begin{verbatim} +$ parseTest bindingParser "a ::= ARRAY INT32" +TypeBinding "a" (ArrayType (ArrayField (OnlyType "INT32"))) +\end{verbatim} + \subsection{Extra Helpers} \TODO{These are here, they should maybe be moved.} \begin{code} -maybeTypeParser = option Nothing $ Just <$> typeParser +maybeTypeParser = option Nothing $ Just <$> lystypeParser maybeBindingParser = option Nothing $ Just <$> bindingParser \end{code} diff --git a/doc/main.tex b/doc/main.tex index 791441e0e68167e4d7fea0bfcaa0b8e0dd7380de..7751edc8a1561837635b0d66c5d9f405d8c553ff 100644 --- a/doc/main.tex +++ b/doc/main.tex @@ -51,7 +51,7 @@ The most up to date version of this document, along with its source code, can be found at \mbox{\url{https://git.lysator.liu.se/hugo/hskom}}. -\chapter {From BNF to AST} +\chapter {Parsing protocol-a.txt} \label{cha:bnfast} What we had from the outset was an info page detailing Protocol A, as well as a @@ -69,7 +69,7 @@ chapter. \section{Types \& Bindings} \input{lhs/Notation.lhs} -\section{RPC \& Async} +\section{RPC \& Async Declarations} \label{item:rpc} \input{lhs/RPC.lhs} @@ -81,7 +81,7 @@ chapter. \input{lhs/AstHaskell.lhs} -\chapter {From Incomming Message to Haskell} +\chapter {Parsing Line-data} \label{cha:incomming} Everything so far has simple been about parsing a BNF file, and generating Haskell code from it. Now we start actually looking towards actual data!