Grammar

Formal definition of the Zimbu grammar.

NOTE: this is work in progress!


Notation:

MAINFILE    starting point for a program
IMPORTFILE  starting point for an imported file

one-item    non-terminal
"abc"       terminal representing string constant "abc"
"a" .. "z"  terminal: a character in the range from "a" to "z"
"^abc"      terminal: any character but "a", "b" or "c"
TAB         terminal, Tab character
CR          terminal, Carriage Return character, ASCII 0x0d
NL          terminal, New Line character, ASCII 0x0a
ANY         terminal, any character let through by the preprocessor

->          produces
|           alternative
;           end of rule
()          group items into one non-terminal
?           preceding item is optional
*           preceding item appears zero or more times
+           preceding item appears one or more times
!           anything but next item

# text      comment, ignored


PREPROCESSING

All CR characters are silently removed from the input.
All TAB, NUL and other control characters result in an error.  Control characters
are 0x00 - 0x1f (excluding NL and CR) and 0x7f.
All BOM (Byte Order Mark) characters are silently removed from the input.
All invalid UTF-8 results in an error.


#
# toplevel
#

MAINFILE         -> skip
                    import*
                    file-item*
                    main
                    file-item*
                    ;

IMPORTFILE       -> skip
                    import*
                    ( module-def | class-def | enum-def )
                    ;

import           -> "IMPORT"  sep
                    ( """  file-name  """ | "<"  file-name  ">" )
                    sep-with-eol ;

file-item        -> ( var-def | module-def | class-def | enum-def | method-def ) ;

main             -> "MAIN()"  sep-with-eol
                      block-item+
                    block-end  ;

var-def          -> type  sep  var-decl
                    ( skip  ","  sep  var-decl )*  line-sep ;

module-def       -> "MODULE"  sep  group-name  sep-with-eol
                      block-item*
                    block-end  ;

class-def        -> "CLASS"  sep  group-name  sep-with-eol
                      block-item*
                    block-end  ;

enum-def         -> "ENUM"  sep  group-name  sep-with-eol
                       (( var-name sep )*  var-name  sep-with-eol )?
                    block-end ;

method-def       -> function-def | proc-def | new-def | equal-def ;

function-def     -> "FUNC"  sep  var-name  method-args  ":"  sep  type  method-common ;

proc-def         -> "PROC sep var-name  method-args method-common  ;

new-def          -> "NEW"  method-args  method-common  ;

equal-def        -> "EQUAL"  method-args  method-common  ;

method-args      -> "(" sep-with-eol?  arg-defs? ")"

method-common    -> sep-with-eol

                      block-item*
                    block-end  ;

block-item       -> ( file-item
                    | assignment
                    | function-call
                    | conditional
                    | while
                    | do-until
                    | for-in

                    | switch
                    | break
                    | continue
                    | nop
                    | block
                    ) ;

assignment       -> comp-name  sep  "="  sep  expr  line-sep ;

function-call    -> comp-name  skip  "("  arguments?  ")"  line-sep ;

conditional      -> "IF"  sep  expr  line-sep
                    block-item+
                    elseif-part*
                    else-part?
                    block-end  ;

elseif-part      -> "ELSEIF"  sep  expr  line-sep
                     block-item+  ;

else-part        -> "ELSE"  line-sep
                     block-item+  ;

while            -> "WHILE"  loop-name?  sep  expr  line-sep
                       block-item+
                     block-end  ;

do-until         -> "DO"  loop-name?  line-sep
                       block-item+
                     "UNTIL"  sep  expr sep-with-eol  ;

for-in           -> "FOR"  loop-name?  sep  var-name  "IN"  expr  line-sep
                       block-item+
                     block-end  ;

switch           ->  "SWITCH"  sep  expr  line-sep
                       switch-item+
                     block-end  ;

switch-item      ->  ( ( "CASE"  sep  expr  line-sep )+ | "DEFAULT"  line-sep )
                        block-item+
                     
;

return           ->  "RETURN"  ( sep  expr )?  line-sep  ;


exit             ->  "EXIT  sep  expr  line-sep  ;

break            ->  "BREAK"  loop-name?  line-sep  ;

continue         ->  "CONTINUE"  loop-name?  line-sep  ;

nop              ->  "NOP"  line-sep  ;

block            ->  "{"  line-sep
                        block-item+
                     block-end  ;

#
# above: whole-line items
# below: parts of a line items
#

var-decl         -> var-name  var-init? ;

var-init         -> sep  "="  sep  expr ;


arg-defs         -> arg-def  ( ","  sep  arg-def ) *  skip  ;

arg-def          -> type  sep  "&"?  var-name ;

arguments        -> "&"?  expr  ( ","   sep   "&"?  expr )*  ;

comp-name        -> var-name  comp-follow*
                    | member-name  comp-follow*
                    | group-name  comp-follow+
                    ;

comp-follow      -> ( dot-item  |  paren-item  |  bracket-item  |  angle-item  )  ;

dot-item         -> sep-with-eol?  "."  ( var-name | member-name ) ;


paren-item       -> "("  arguments?  ")"  ;

bracket-item     -> "[" skip  expr  skip  "]"  ;

angle-item       -> "<"  arguments  ">"  ;

#
# expressions
#

expr             -> alt-expr  ;

alt-expr         -> or-expr  ( sep  "?"  sep  alt-expr  sep  ":"  sep  alt-expr )?  ;

or-exp           -> and-expr  ( sep  "||"  sep  and-expr )*  ;

and-expr         -> comp-expr  ( sep  "&&"  sep  comp-expr )*  ;

comp-expr        -> concat-expr  ( sep  ( "==" | "!=" | ">" | >=" | "<" | "<=" | "IS" | "ISNOT" | "ISA" | "ISNOTA" )  sep  concat-expr )*  ;

concat-expr      -> bitwise-expr  ( sep   ".."  sep  bitwise-expr )* ;

bitwise-expr     -> shift-expr ( sep  ( "&" | "|"  | "^" )  sep  shift-expr )* ;

shift-expr       -> add-expr  ( sep ( ">>" | "<<" )  sep  add-expr )* ;


add-expr         -> mult-expr  ( sep  ( "+" | "-" )  sep  mult-expr )*  ;

mult-expr        -> incr-expr  ( sep  ( "*" | "/" | "%" )  sep  incr-expr )*  ;

incr-expr        -> ( "++" | "--" )?  mult-expr  ( "++" | "--" )?  ;

neg-expr         -> ( "-" | "!" )?  dot-expr  ;

dot-expr         -> paren-expr  ( TODO )?  ;

paren-expr       -> "("  skip  expr  skip  ")"  |  base-expr ;

base-expr        -> ( "EOF" | "NIL" | "THIS" | "TRUE" | "FALSE" | "OK" | "FAIL" | new-item | string | char | number | list | dict | comp-name )  ;

string           -> """  ( "^\"" | "\"  ANY )*  """  ;

char             -> "'"  ( "^\'" | "\"  ANY )  "'"  ;

number           -> decimal-number | hex-number | binary-number  ;

decimal-number   -> digit  ( digit | "'")*  ;

hex-number       -> ( "0x" | "0X" ) ( "0" .. "9" | "a" .. "f" | "A" .. "F" | "'" )+  ;


binary-number    -> ( "0b" | "0B" ) ( "0" | "1" | "'" )+  ;

list             -> "["  ( skip  ( expr  ","  sep )*  expr  ( ","  sep)? )?  skip  "]"  ;

dict             -> empty-dict | non-empty-dict ;

empty-dict       -> "[:]" ;

non-empty-dict   -> "["  ( skip  ( dict-item  ","  sep )*  dict-item  ","? )?  skip  "]"  ;


dict-item        -> expr  skip  ":"  sep  expr  ;

new-item         -> "NEW"  "("  arguments?  ")"  ;

type             -> comp-name  ;

#
# identifiers
#

loop-name        -> "."  var-name ;

file-name        -> ( ! EOL ) + ;

group-name       -> upper  id-char*  lower  id-char* ;

var-name         -> lower  id-char* ;

member-name      -> upper  id-char*  lower  id-char* | lower  id-char* ;

id-char          -> alpha | digit | "_" ;

alpha            -> upper | lower ;

upper            -> "A" .. "Z" ;

lower            -> "a" .. "z" ;

digit            -> "0" .. "9" ;

block-end        -> "}"  sep-with-eol

#

# white space and comments
#

# line separator: either a semicolon or an EOL with optional white space and comments
line-sep         -> semicolon | sep-with-eol ;

# a semicolon with mandatory following white space
semicolon        -> ";" white

# at least one line break, with optional comments and white space
sep-with-eol     -> ( white comment )? EOL skip ;


# mandatory white space with optional comments
sep              -> ( white | EOL ) skip ;

# optional white space, comments and line breaks
skip             -> ( ( white | EOL ) ( white | comment | EOL )* )? ;

# White space
white            -> " "+ ;

# One comment, up to EOL
comment          -> "#" ( ! EOL ) * ;