SIDL Grammar Reference

The following document provides a reference for the SIDL grammar in extended BNF form. This grammar is taken directly from the JavaCC source code in the Babel parser. This document is intended as a reference guide to the SIDL grammar and not as a learning reference. We are developing a separate SIDL grammar introductory overview.

Please direct any questions or comments to components@llnl.gov.

/*
 * The following lexical tokens are ignored.
 */
SKIP : {
   < " " >
 | < "\n" >
 | < "\r" >
 | < "\t" >
 | < "//" (~["\n","\r"])* ("\n" | "\r" | "\r\n") >
 | < "/**/" >
 | < "/*" (~["*"])+ "*" ("*" | ~["*","/"] (~["*"])* "*")* "/" >
}

/*
 * The following lexical states define the transitions necessary to
 * parse documentation comments.  Documentation comments may appear
 * anywhere in the file, although they are only saved if they precede
 * definition or method productions.  Documentation comments are
 * represented by "special tokens" in the token list.
 */
SPECIAL_TOKEN : {
   < T_COMMENT : "/**" > : BEGIN_DOC_COMMENT
}

<BEGIN_DOC_COMMENT> SKIP : {
   < " " >
 | < "\t" >
 | < "*/" >                   : DEFAULT
 | < ("\n" | "\r" | "\r\n") > : LINE_DOC_COMMENT
 | < "" >                     : IN_DOC_COMMENT
}

<LINE_DOC_COMMENT> SKIP : {
   < " " >
 | < "\t" >
 | < "*/" >      : DEFAULT
 | < "*" (" ")?> : IN_DOC_COMMENT
 | < "" >        : IN_DOC_COMMENT
}

<IN_DOC_COMMENT> SPECIAL_TOKEN : {
   < "*/" >                   : DEFAULT
 | < ("\n" | "\r" | "\r\n") > : LINE_DOC_COMMENT
}

<IN_DOC_COMMENT> MORE : {
   < ~[] >
}

/*
 * The following keywords are the lexical tokens in the SIDL grammar.
 */
TOKEN : {
   < T_ABSTRACT       : "abstract" >
 | < T_CLASS          : "class" >
 | < T_COPY           : "copy" >
 | < T_ENUM           : "enum" >
 | < T_EXTENDS        : "extends" >
 | < T_IMPORT         : "import" >
 | < T_IN             : "in" >
 | < T_INOUT          : "inout" >
 | < T_FINAL          : "final" >
 | < T_IMPLEMENTS     : "implements" >
 | < T_IMPLEMENTS_ALL : "implements-all" >
 | < T_INTERFACE      : "interface" >
 | < T_LOCAL          : "local" >
 | < T_ONEWAY         : "oneway" >
 | < T_OUT            : "out" >
 | < T_PACKAGE        : "package" >
 | < T_STATIC         : "static" >
 | < T_THROWS         : "throws" >
 | < T_VERSION        : "version" >
 | < T_VOID           : "void" >
 | < T_ARRAY          : "array" >
 | < T_BOOLEAN        : "bool" >
 | < T_CHAR           : "char" >
 | < T_DCOMPLEX       : "dcomplex" >
 | < T_DOUBLE         : "double" >
 | < T_FCOMPLEX       : "fcomplex" >
 | < T_FLOAT          : "float" >
 | < T_INT            : "int" >
 | < T_LONG           : "long" >
 | < T_OPAQUE         : "opaque" >
 | < T_STRING         : "string" >
 | < T_IDENTIFIER     : <T_LETTER> (<T_LETTER> | <T_DIGIT> | "_")* >
 | < T_VERSION_STRING : <T_INTEGER> ("." <T_INTEGER>)+ >
 | < T_INTEGER        : (["-","+"])? (<T_DIGIT>)+ >
 | < T_DIGIT          : ["0"-"9"] >
 | < T_LETTER         : ["a"-"z","A"-"Z"] >
 | < T_CLOSE_ANGLE    : ">" >
 | < T_CLOSE_CURLY    : "}" >
 | < T_CLOSE_PAREN    : ")" >
 | < T_COMMA          : "," >
 | < T_EQUALS         : "=" >
 | < T_OPEN_ANGLE     : "<" >
 | < T_OPEN_CURLY     : "{" >
 | < T_OPEN_PAREN     : "(" >
 | < T_SEMICOLON      : ";" >
 | < T_SCOPE          : "." >
}

/**
 * A SIDL Specification contains zero or more version productions followed
 * by zero or more import productions followed by zero or more package
 * productions followed by the end-of-file.
 */
SPECIFICATAION :=
   ( VERSION )*
   ( IMPORT  )*
   ( PACKAGE )*
   <EOF>

/**
 * A SIDL Version begins with "version" followed by a package identifier.
 * The version number is specified in the general form "V1.V2...Vn" where
 * Vi is a non-negative integer.  Only one version statement is allowed
 * for each package identifier; if two version statements exist that name
 * the same package, a <code>SymbolException</code> is thrown.  Because all
 * scoped identifiers must start with a package name, the identifier in the
 * version statement will be a package name (if it exists).
 */
VERSION :=
   <T_VERSION> IDENTIFIER ( <T_INTEGER> | <T_VERSION_STRING> ) <T_SEMICOLON>

/**
 * A SIDL Import production begins with an "import" token and is followed
 * by a scoped identifier that must be a package name.  The scoped identifier
 * must be defined and it must be a package.  A particular package may only
 * be included in one import statement.  The import package name is added to
 * the default search path.  At the end of the parse, any import statements
 * that were not used to resolve a symbol name are output as warnings.
 */
IMPORT :=
   <T_IMPORT> SCOPED_IDENTIFIER <T_SEMICOLON>

/**
 * The SIDL package specification begins with a "package" token followed by
 * a scoped identifier.  The new package namespace begins with an open curly
 * brace, a set of zero or more definitions, and a close curly brace.  The
 * closing curly brace may be followed by an optional semicolon.  The package
 * identifier must have a version defined for it, and it must not have been
 * previously defined as a symbol or used as a forward reference.  The parent
 * of the package must itself be a package and must have been defined.  The
 * symbols within the curly braces will be defined within the package scope.
 */
PACKAGE :=
   <T_PACKAGE> SCOPED_IDENTIFIER
   <T_OPEN_CURLY> ( DEFINITION )* <T_CLOSE_CURLY>
   [ <T_SEMICOLON> ]

/**
 * A SIDL Definition production consists of a class, interface, enumerated
 * type, or package.
 */
DEFINITION :=
   ( CLASS | ENUM | INTERFACE | PACKAGE )

/**
 * A SIDL class specification begins with an optional abstract keyword
 * followed by the class token followed by an identifier.  The abstract
 * keyword is required if and only if there are abstract methods in the
 * class.  The class keyword is followed by an identifer.  The identifier
 * string may not have been previously defined, although it may have been
 * used as a forward reference.  The identifier string may be preceeded
 * by a documentation comment.  A class may optionally extend another class;
 * if no class is specified, then the class will automatically extend the
 * SIDL base class (unless it is itself the SIDL base class).  Then parse
 * the implements-all and implements clauses.  The interfaces parsed during
 * implements-all are saved in a set and then all those methods are defined
 * at the end of the class definition.  The methods block begins with an
 * open curly-brace followed by zero or more methods followed by a close
 * curly-brace and optional semicolon.
 */
CLASS :=
   [ <T_ABSTRACT> ] <T_CLASS> IDENTIFIER

   [ <T_EXTENDS> SCOPED_IDENTIFIER ]
   [ <T_IMPLEMENTS_ALL> SCOPED_IDENTIFIER ( <T_COMMA> SCOPED_IDENTIFIER )* ]
   [ <T_IMPLEMENTS> SCOPED_IDENTIFIER ( <T_COMMA> SCOPED_IDENTIFIER )* ]

   <T_OPEN_CURLY> ( CLASS_METHOD )* <T_CLOSE_CURLY>

   [ <T_SEMICOLON> ]

/**
 * The SIDL enumeration specification begins with an "enum" token followed by
 * an identifier.  The enumerator list begins with an open curly brace, a set
 * of one or more definitions, and a close curly brace.  The closing curly
 * brace may be followed by an optional semicolon.  The enumeration symbol
 * identifier must have a version defined for it, and it must not have been
 * previously defined as a symbol.  Forward references are not allowed for
 * enumerated types.  This routine creates the enumerated class and then
 * grabs the list of enumeration symbols and their optional values.  The
 * trailing comma is there by request of Ben Allan, since it made it easier
 * for him to automatically generate SIDL enum statements from Java.
 */
ENUM :=
   <T_ENUM> IDENTIFIER

   <T_OPEN_CURLY>
      ENUMERATOR ( <T_COMMA> ENUMERATOR )* [ <T_COMMA> ]
   <T_CLOSE_CURLY>

   [ <T_SEMICOLON> ] 

/**
 * The SIDL enumerator specification consists of an identifier followed
 * by an optional assignment statement beginning with an equals and followed
 * by an integer value.  This routine adds the new enumeration symbol to
 * the list and then returns.
 */
ENUMERATOR :=
   IDENTIFIER [ <T_EQUALS> <T_INTEGER> ]

/**
 * A SIDL interface specification begins with the interface token followed
 * by an identifier.  An interface may have an extends block consisting of
 * a comma-separated sequence of interfaces.  The methods block begins with
 * an open curly-brace followed by zero or more methods followed by a close
 * curly-brace and optional semicolon.  Interfaces may be preceeded by a
 * documentation comment.  The identifier string may not have been previously
 * defined, although it may have been used as a forward reference.  If the
 * interface does not extend another interface, then it must extend the base
 * SIDL interface (unless, of course, this is the definition for the base
 * SIDL interface).
 */
INTERFACE :=
   <T_INTERFACE> IDENTIFIER

   [ <T_EXTENDS> SCOPED_IDENTIFIER ( <T_COMMA> SCOPED_IDENTIFIER )* ]

   <T_OPEN_CURLY>
      ( METHOD )*
   <T_CLOSE_CURLY>

   [ <T_SEMICOLON> ]

/**
 * This production parses the SIDL method description for a class method.
 * A class method may start with abstract, final, or static.  An error is
 * thrown if the method has already been defined in the class object or if
 * the method name is the same as the class name.  An error is also thrown
 * if a method has been defined in a parent class and (1) the signatures
 * do not match, (2) either of the methods is static, (3) the existing method
 * is final, or (4) the new method is abstract but the existing method was
 * not abstract.
 */
CLASS_METHOD :=
   [ ( <T_ABSTRACT> | <T_FINAL> | <T_STATIC> ) ] METHOD

/**
 * The SIDL method production has a return type, a method identifier,
 * an optional argument list, an optional communication modifier, and
 * an optional throws clause.  The return type may be void (no return
 * type) or any valid SIDL type.  The method is built piece by piece.
 */
METHOD :=
   ( <T_VOID> | [ <T_COPY> ] TYPE IDENTIFIER

   <T_OPEN_PAREN>
      [ ARGUMENT ( <T_COMMA> ARGUMENT )* ]
   <T_CLOSE_PAREN>

   [ <T_LOCAL> | <T_ONEWAY> ]

   [ <T_THROWS> SCOPED_IDENTIFIER ( <T_COMMA> SCOPED_IDENTIFIER )* ]

   <T_SEMICOLON>

/**
 * Parse a SIDL argument.  Arguments begin with an optional copy modifier
 * followed by in, out, or inout followed by a type and a formal argument.
 * The argument is returned on the top of the argument stack.  This routine
 * also checks that the copy modifier is used only for symbol objects.  For
 * all other types, copy is redundant.
 */
ARGUMENT :=
   [ <T_COPY> ] ( <T_IN> | <T_OUT> | <T_INOUT>) TYPE IDENTIFIER

/**
 * A SIDL type consists of one of the standard built-in types (boolean,
 * char, dcomplex, double, fcomplex, float, int, long, opaque, and string),
 * a user-defined type (interface, class, or enum), or an array.  This
 * production parses the type and pushes the resulting type object on
 * the top of the argument stack.
 */
TYPE :=
   ( <T_BOOLEAN> 
   | <T_CHAR> {
   | <T_DCOMPLEX> {
   | <T_DOUBLE> {
   | <T_FCOMPLEX> {
   | <T_FLOAT> {
   | <T_INT> {
   | <T_LONG> {
   | <T_OPAQUE> {
   | <T_STRING> {
   | ARRAY
   | SYMBOL_TYPE )

/**
 * Parse an array construct and push the resulting type on top of the stack.
 * Currently, only dimensions one through four (inclusive) are supported.
 */
ARRAY :=
   <T_ARRAY> <T_OPEN_ANGLE> TYPE [ <T_COMMA> <T_INTEGER> ] <T_CLOSE_ANGLE>

/**
 * This production parses a scoped identifier and verifies that it is
 * either a forward reference or a symbol that may be used as a type
 * (either an enum, an interface, or a class).
 */
SYMBOL_TYPE :=
   SCOPED_IDENTIFIER

/**
 * All SIDL scoped names are of the general form "ID ( . ID )*".  Each
 * identifier ID is a string of letters, numbers, and underscores that
 * must begin with a letter.  The scope resolution operator "." separates
 * the identifiers in a name.
 */
SCOPED_IDENTIFIER :=
   IDENTIFIER ( <T_SCOPE> IDENTIFIER)*

/**
 * A SIDL identifier must start with a letter and may be followed by any
 * number of letters, numbers, or underscores.  It may not be a reserved
 * word in any of the SIDL implementation languages (e.g., C or C++).  Note
 * that the reserved words are not implemented in the grammar but checked
 * after the token has been parsed.
 */
IDENTIFIER :=
   <T_IDENTIFIER>