Source

semantic / c.bnf

Diff from to

File c.bnf

-# Simple BNF notation for top-level C elements.
+# C & C++ BNF language specification
 #
-# Copyright (C) 1999, 2000 Eric M. Ludlam
+# Copyright (C) 1999, 2000, 2001 Eric M. Ludlam
 #
 # Author: Eric M. Ludlam <zappo@gnu.org>
 # X-RCS: $Id$
 # Boston, MA 02111-1307, USA.
 #
 # $Log$
-# Revision 1.20  2000/10/30 01:34:23  zappo
-# Tidied up opt-array stuff.
+# Revision 1.37  2001/06/03 14:01:13  zappo
+# Multi-inheritance for classes.
+# Structs like classes for C++.
+# No ; on name spaces.
+# Throws for methods.
+# (Bugs & patches from "Jesper Nordenberg" <mayhem@home.se>
+# and Norbert Lindlbauer <Norbert_Lindlbauer@betaresearch.de>)
 #
-# Revision 1.19  2000/09/27 02:08:04  zappo
-# Moved `type' to be before `function' and `variable' in `declaration'.
+# Revision 1.36  2001/05/25 01:12:10  zappo
+# (expression): Added string.
 #
-# Revision 1.18  2000/09/21 03:35:08  zappo
-# Fixed up struct/union variable declaration section to use new lexical
-# tokens for struct/union.
+# Revision 1.35  2001/05/09 03:25:17  zappo
+# Added inline method support.
+# Store protection elements
+# Change enum parts to be integer variables.
 #
-# Revision 1.17  2000/09/19 04:22:31  zappo
-# Added %keywordtable
+# Revision 1.34  2001/05/01 16:52:40  zappo
+# Revamped lots of summary strings.
+# Added `type' as a possible expansion in classsubparts.
 #
-# Revision 1.16  2000/09/14 19:23:27  zappo
-# Updated to use %token to make new tokens for use in RULES.
+# Revision 1.33  2001/04/21 14:37:55  zappo
+# Spelling error for integer.
 #
-# Revision 1.15  2000/09/11 23:00:32  zappo
-# Use new BNF settings section.
+# Revision 1.32  2001/04/13 02:24:45  zappo
+# Added built in types (void, char, etc) and summaries.
 #
-# Revision 1.14  2000/07/05 14:41:31  zappo
-# Support multiple continuous decl modifiers.
-# Added modifiers into variables.
+# Revision 1.31  2001/04/07 02:43:48  zappo
+# Added name spaces, and references.
 #
-# Revision 1.13  2000/07/01 18:15:31  zappo
-# Updated for new elements in the tokens.
+# Revision 1.30  2001/03/10 16:18:15  zappo
+# Added lots of summaries to %tokens.
+# variables and functions now share declmod and typedecl start
+# match, and then get recombobulated later.  Prevents massive
+# reparsing.
 #
-# Revision 1.12  2000/06/13 14:37:48  zappo
-# The table has moved.
+# Revision 1.29  2001/02/24 15:24:24  zappo
+# Added a few optimizations for structure parts.
+# Used some %tokens in a few spots where I wasn't using them before.
 #
-# Revision 1.11  2000/05/06 01:32:57  zappo
-# Use EXPANDFULL for enums, and for argument lists to functions.
+# Revision 1.28  2001/02/20 20:36:19  zappo
+# Removed unused %put calls.
 #
-# Revision 1.10  2000/04/29 12:54:15  zappo
-# Added support for Linux __P items.
-# Added support for "..." fn prototypes.
+# Revision 1.27  2001/02/09 19:49:26  zappo
+# Added paren testing to argument lists.
 #
-# Revision 1.9  2000/04/28 18:57:49  zappo
-# Added system flag to include files.
+# Revision 1.26  2001/02/09 11:47:32  zappo
+# Added type separation characters.
+# Added all constituents of DECLMOD into their own tokens.
+# Created a rule to create a list of declmods.
 #
-# Revision 1.8  2000/04/25 14:46:42  zappo
-# structparts now uses EXPANDFULL to get positional information for it's
-# sub parts.  This simplified STRUCTSUBPARTS nonterminal
+# Revision 1.25  2001/02/02 04:14:53  zappo
+# Added c++ to the list of language modes.
+# Added lots of operator symbol tokens.
+# Added DECLMOD symbol w/ complex regexp.
+# Added CLASS, OPERATOR, PUBLIC, PRIVATE, and PROTECTED token keywords.
+# Support parsing of a class.
+# Add class bit parsing for functions.
+# Add destructor bit parsing for functions.
+# Allow operator symbols for methods.
+# Added parent and destructor fields to extra-spec for functions.
+# The function rule now returns a function or prototype.
 #
-# Revision 1.7  2000/04/23 15:34:28  zappo
-# Added copyright information.
+# Revision 1.24  2001/01/31 15:26:10  zappo
+# Added `codeblock' rule and `%scopestart'.
 #
-# Revision 1.6  2000/04/20 23:55:03  zappo
-# Macros split from 'variable' nt.
-# Added knr-argument parsing.
+# Revision 1.23  2001/01/24 21:08:41  zappo
+# Added support for new token formats that use ASSOC.
 #
-# Revision 1.5  2000/04/16 22:34:07  zappo
-# Added `filename' for include rules.
+# Revision 1.22  2001/01/06 14:35:40  zappo
+# Put `type' `t' onto some tokens.
+# Struct and enums now match the braces, and return nil.
+# Enum parts now have a token type of 'enum.
+# Added ... to fuction arg lists.
+# Match parens for arg lists.
 #
-# Revision 1.4  1999/12/17 20:53:16  zappo
-# Added a splice , for varnamelist.
-#
-# Revision 1.3  1999/06/06 14:20:24  zappo
-# Fixed up some language definitions.
-#
-# Revision 1.2  1999/05/18 14:08:56  zappo
-# Fixed up the returns of some nonterminals.
+# [...]
 #
 # Revision 1.1  1999/05/17 17:28:30  zappo
 # Initial revision
 #
 
 %start         declaration
+%scopestart    codeblock
 %outputfile    semantic-c.el
 %parsetable    semantic-toplevel-c-bovine-table
 %keywordtable  semantic-c-keyword-table
-%languagemode  c-mode
+%languagemode  (c-mode c++-mode)
 %setupfunction semantic-default-c-setup
 
 %(setq semantic-expand-nonterminal 'semantic-expand-c-nonterminal
        semantic-flex-extensions semantic-flex-c-extensions
        semantic-dependency-include-path semantic-default-c-path
        imenu-create-index-function 'semantic-create-imenu-index
+       semantic-type-relation-separator-character '("." "->")
+       semantic-command-separation-character ";"
        document-comment-start "/*"
        document-comment-line-prefix " *"
        document-comment-end " */"
        )%
 
-%token HASH punctuation "#"
 %token INCLUDE "include"
 %token DEFINE "define"
+%token HASH punctuation "#"
 %token PERIOD punctuation "."
 %token COLON punctuation ":"
 %token SEMICOLON punctuation ";"
 %token STAR punctuation "*"
+%token AMPERSAND punctuation "&"
+%token DIVIDE punctuation "/"
+%token PLUS punctuation "+"
+%token MINUS punctuation "-"
+%token BANG punctuation "!"
 %token EQUAL punctuation "="
+%token LESS punctuation "<"
+%token GREATER punctuation ">"
 %token COMA punctuation ","
+%token TILDE punctuation "~"
+
+%token EXTERN "extern"
+%put EXTERN summary "Declaration Modifier: extern <type> <name> ..."
+%token STATIC "static"
+%put STATIC summary "Declaration Modifier: static <type> <name> ..."
+%token CONST "const"
+%put CONST summary "Declaration Modifier: const <type> <name> ..."
+%token VOLATILE "volatile"
+%put VOLATILE summary "Declaration Modifier: volatile <type> <name> ..."
+%token SIGNED "signed"
+%put SIGNED summary "Numeric Type Modifier: signed <numeric type> <name> ..."
+%token UNSIGNED "unsigned"
+%put UNSIGNED summary "Numeric Type Modifier: unsigned <numeric type> <name> ..."
+
+%token INLINE "inline"
+%put INLINE "Function Modifier: inline <return  type> <name>(...) {...};"
+%token VIRTUAL "virtual"
+%put VIRTUAL summary "Method Modifier: virtual <type> <name>(...) ..."
+
 %token STRUCT "struct"
+%put STRUCT summary "Structure Type Declaration: struct [name] { ... };"
 %token UNION "union"
+%put UNION summary "Union Type Declaration: union [name] { ... };"
 %token ENUM "enum"
+%put ENUM summary "Enumeration Type Declaration: enum [name] { ... };"
 %token TYPEDEF "typedef"
+%put TYPEDEF summary "Arbitrary Type Declaration: typedef <typedeclaration> <name>;"
+%token CLASS "class"
+%put CLASS summary "Class Declaration: class <name>[:parents] { ... };"
+%token NAMESPACE "namespace"
+%put NAMESPACE summary "Namespace Declaration: namespace <name> { ... };"
 
+%token THROW "throw"
+%put THROW summary "<type> <methoddef> (<method args>) throw (<exception>) ..."
 
-declaration : include
-	    | macro
+# Leave these alone for now.
+%token OPERATOR "operator"
+%token PUBLIC "public"
+%token PRIVATE "private"
+%token PROTECTED "protected"
+
+
+# These aren't used for parsing, but is a useful place to describe the keywords.
+%token IF "if"
+%token ELSE "else"
+%put {IF ELSE} summary  "if (<condition>) { code } [ else { code } ]"
+
+%token DO "do"
+%token WHILE "while"
+%put DO summary " do { code } while (<condition>);"
+%put WHILE summary "do { code } while (<condition>); or while (<condition>) { code };"
+
+%token FOR "for"
+%put FOR summary "for(<init>; <condition>; <increment>) { code }"
+
+%token SWITCH "switch"
+%token CASE "case"
+%token DEFAULT "default"
+%put {SWITCH CASE DEFAULT} summary
+"switch (<variable>) { case <constvalue>: code; ... default: code; }"
+
+%token RETURN "return"
+%put RETURN summary "return <value>;"
+
+%token BREAK "break"
+%put BREAK summary "Non-local exit within a loop or switch (for, do/while, switch): break;"
+%token CONTINUE "continue"
+%put CONTINUE summary "Non-local continue within a lool (for, do/while): continue;"
+
+%token SIZEOF "sizeof"
+%put SIZEOF summary "Compile time macro: sizeof(<type or variable>) // size in bytes"
+
+# Types
+%token VOID "void"
+%put VOID summary "Built in typeless type: void"
+%token CHAR "char"
+%put CHAR summary "Integral Character Type: (0 to 256)"
+%token SHORT "short"
+%put SHORT summary "Integral Primitive Type: (-32768 to 32767)"
+%token INT "int"
+%put INT summary "Integral Primitive Type: (-2147483648 to 2147483647)"
+%token LONG "long"
+%put LONG summary "Integral primitive type (-9223372036854775808 to 9223372036854775807)"
+%token FLOAT "float"
+%put FLOAT summary "Primitive floating-point type (single-precision 32-bit IEEE 754)"
+%token DOUBLE "double"
+%put DOUBLE summary "Primitive floating-point type (double-precision 64-bit IEEE 754)"
+
+declaration : macro
 	    | type
-	    | function
-	    | variable
-	    | prototype
+	    | var-or-fun
 	    | define
 	    ;
-		
-include : HASH INCLUDE punctuation "<" filename punctuation ">"
-	  ( ,$4 include t nil )
-        | HASH INCLUDE string
-	  ( (read $3) include nil nil )
-        ;
+
+codeblock : define
+	  | var-or-fun
+	  | type # type is less likely to be used here.
+	  ;
+
+macro : HASH macro-or-include
+	( ,$2 )
+      ;
+
+macro-or-include : DEFINE symbol opt-expression
+		   ( $2 variable nil $3
+			(ASSOC const t)
+			nil
+			)
+		 | INCLUDE LESS filename GREATER
+		   ( ,$3 include t nil )
+		 | INCLUDE string
+		   ( (read $2) include nil nil )
+		 ;
+
+# This is used in struct parts.
+define : HASH DEFINE symbol opt-expression
+	 ( $2 variable nil $3
+	      (ASSOC const t)
+	      nil
+	      )
+       ;
 
 filename : symbol PERIOD symbol
 	   ( (concat $1 $2 $3) )
-         | symbol punctuation "/" filename
+         | symbol DIVIDE filename
 	   ( (concat $1 $2 (car $3)) )
 	 ;
-	  
-structparts : semantic-list
-	      (EXPANDFULL $1 structsubparts)
+
+# In C++, structures can have the same things as classes.
+# So delete this somday in the figure.
+#
+#structparts : semantic-list
+#	      (EXPANDFULL $1 structsubparts)
+#	    ;
+#
+#structsubparts : open-paren "{"
+#		 ( nil )
+#	       | close-paren "}"
+#		 ( nil )
+#	       | var-or-fun
+#	       | define
+#	       # sometimes there are defines in structs.
+#	       ;
+
+classparts : semantic-list
+	     (EXPANDFULL $1 classsubparts)
 	    ;
 
-structsubparts : variable
-	       | define
-	       # sometimes there are defines in structs.
+classsubparts : open-paren "{"
+		( nil )
+	      | close-paren "}"
+		( nil )
+	      | opt-class-protection COLON
+		( ,$1 label )
+	      | var-or-fun
+	      | type
+	      | define	      
+		( ,$1 protection )
+	      # In C++, this label in a classsubpart represents
+	      # PUBLIC or PRIVATE bits.  Ignore them for now.
+	      | EMPTY
+	      ;
+
+opt-class-parents : COLON class-parents
+		    ( $2 )
+		  | EMPTY
+		    ( )
+		  ;
+
+class-parents : opt-class-protection symbol COMA class-parents
+		( ,(cons $2 $4 ) )
+	      | opt-class-protection symbol
+		( $2 )
+	      ;
+
+opt-class-protection : PUBLIC
+		     | PRIVATE
+		     | PROTECTED
+		     ;
+
+namespaceparts : semantic-list
+		 (EXPANDFULL $1 namespacesubparts)
 	       ;
 
+namespacesubparts : open-paren "{"
+		    ( nil )
+		  | close-paren "}"
+		    ( nil )
+		  | type
+                  | var-or-fun
+		  | define
+		  | opt-class-protection COLON
+		    ( $1 protection )
+		  # In C++, this label in a classsubpart represents
+		  # PUBLIC or PRIVATE bits.  Ignore them for now.
+		  | EMPTY
+		  ;
 
 enumparts : semantic-list
 	    (EXPANDFULL $1 enumsubparts)
 	  ;
 
 enumsubparts : symbol opt-assign
-	       ( $1 )
+	       ( $1 variable "int" ,$2 (ASSOC const t) nil)
+	     | open-paren "{"
+	       ( nil )
+	     | close-paren "}"
+	       ( nil )
 	     ;
 
 opt-name : symbol
 	   ( nil )
          ;
 
-typesimple : STRUCT opt-name structparts
-	     ( ,$2 type $1 $3 nil nil nil )
+typesimple : struct-or-class opt-name opt-class-parents classparts
+	     ( ,$2 type ,$1 $4 ,$3 nil nil )
 	   | UNION opt-name structparts
 	     ( ,$2 type $1 $3 nil nil nil )
 	   | ENUM opt-name enumparts
 	     ( $3 type $1 nil $2 nil nil )
            ;
 
+struct-or-class: STRUCT
+	       | CLASS
+	       ;
+
 type : typesimple SEMICOLON
        ( ,$1 )
+     | NAMESPACE symbol namespaceparts
+       ( $2 type $1 $3 nil nil nil )
      ;
 
 opt-stars : STAR opt-stars
 	    ( 0 )
           ;
 
-declmods : symbol "\\(_+\\)?\\(extern\\|static\\|const\\|volitile\\|signed\\|unsigned\\)" declmods
-	   ( ,(cons $1 $2 ) )
-	 | symbol "\\(_+\\)?\\(extern\\|static\\|const\\|volitile\\|signed\\|unsigned\\)"
-	   ( $1 )
+declmods : DECLMOD declmods
+	   ( ,(cons ,(car ,$1) $2 ) )
+	 | DECLMOD
+	   ( ,$1 )
 	 | EMPTY
 	   ()
 	 ;
 
-# dont deal with the stars just yet.
-typeform : typeformbase opt-stars
+DECLMOD : EXTERN
+	| STATIC
+	| CONST
+	| VOLATILE
+	| SIGNED
+	| UNSIGNED
+	| VIRTUAL
+	| INLINE
+	;
+
+# Don't deal with the stars or reference just yet.
+typeform : typeformbase opt-stars opt-ref
 	   ( ,$1 )
 	 ;
 
+# C++: A type can be modified into a reference by "&"
+opt-ref : AMPERSAND
+	| EMPTY
+	;
+
 typeformbase : typesimple
 	       ( ,$1 )
 	     | STRUCT symbol
 	       ( $2 type $1 )
 	     | ENUM symbol
 	       ( $2 type $1 )
+	     | builtintype
+	       ( ,$1 )
 	     | symbol
 	       ( $1 )
 	     ;
-  
+
+builtintype : VOID
+	    | CHAR
+	    | SHORT
+	    | INT
+	    | LONG
+	    | FLOAT
+	    | DOUBLE
+	    ;
+
+var-or-fun : declmods typeform var-or-func-decl
+	     ( ,(semantic-c-reconstitute-token ,$3 $1 $2 ) )
+	   # it is possible for a function to not have a type, and
+	   # it is then assumed to be an int.  How annoying.
+	   | declmods var-or-func-decl
+	     ( ,(semantic-c-reconstitute-token ,$2 $1 nil ) )
+	   ;
+
+var-or-func-decl : opt-class opt-destructor functionname arg-list 
+		   opt-throw
+		   fun-or-proto-end
+		   ( ,$3 'function 
+			 ;; Extra stuff goes in here.
+			 ;; Continue with the stuff we found in
+			 ;; this definition
+			$1 $2 $4 $5)
+		 | varnamelist  SEMICOLON
+		   ( $1 'variable )
+		 ;
+
+opt-throw : THROW semantic-list
+	     ( EXPAND $2 throw-exception-list )
+	   | EMPTY
+	   ;
+
+# Is this true?  I don't actually know.
+throw-exception-list : symbol COMA throw-exception-list
+			( ,(cons $1 $3) )
+		      | symbol close-paren ")"
+			( $1 )
+		     | open-paren "(" throw-exception-list
+			( ,$2 )
+		      ;
+
 opt-bits : COLON symbol
 	   ( $2 )
 	 | EMPTY
 	     ( nil )
 	   ;
 
-macro : HASH DEFINE symbol opt-expression
-	( $3 variable nil t $4 nil nil )
-      ;
-
-variable : variabledef SEMICOLON
-	   ( ,$1 )
-	 ;
-
-variabledef : declmods typeform varnamelist
-	      ( $3 variable $2 (if $1 (string-match "const" (car $1))) nil 
-	      (if (and $1 (string-match "const" (car $1))) (cdr $1) $1) nil )
-	    ;
-
 opt-restrict : symbol "\\<\\(__\\)?restrict\\>"
 	     | EMPTY
 	     ;
 
 # I should store more in this def, but leave it simple for now.
 variablearg : declmods typeform varname
-	      ( (car $3) variable $2 (if $1 (string-match "const" (car $1)))
-	      nil (if (and $1 (string-match "const" (car $1))) (cdr $1) $1)
-	      nil )
+	      ( (car $3) variable $2 nil
+		 (ASSOC const (if (member "const" $1) t nil)
+			typemodifiers (delete "const" $1))
+		 nil
+		 )
 	    ;
 
 varnamelist : varname COMA varnamelist
 	      ( $1 )
 	    ;
 
+opt-class : symbol COLON COLON
+	    ( $1 )
+	  | EMPTY
+	    ( nil )
+	  ;
+
+opt-destructor : TILDE
+		 ( t )
+	       | EMPTY
+		 ( nil )
+	       ;
+
 arg-list : symbol "\\<__?P\\>" semantic-list
 	   (EXPAND $2 arg-list-p)
-	 | semantic-list knr-arguments
+	 | semantic-list "^(" knr-arguments
 	   ( ,$2 )
-	 | semantic-list
+	 | semantic-list "^("
 	   (EXPANDFULL $1 arg-sub-list)
 	 ;
 
 
 arg-sub-list : variablearg
 	       ( ,$1 )
-	     | PERIOD PERIOD PERIOD
-	       close-paren ")"
+	     | PERIOD PERIOD PERIOD close-paren ")"
 	       ( "..." )
+	     | open-paren "("
+	       ( nil )
+	     | close-paren ")"
+	       ( nil )
 	     ;
 
-functiondef : declmods typeform symbol arg-list
-	      ( $3 function $2 $4 $1 nil )
-            ;
+operatorsym : LESS LESS
+	      ( "<<" )
+	    | GREATER GREATER
+	      ( ">>" )
+	    | EQUAL EQUAL
+	      ( "==" )
+	    | LESS EQUAL
+	      ( "<=" )
+	    | GREATER EQUAL
+	      ( ">=" )
+	    | BANG EQUAL
+	      ( "!=" )
+	    | LESS
+	    | GREATER
+	    | STAR
+	    | PLUS
+	    | MINUS
+	    | DIVIDE
+	    | EQUAL
+	    ;
 
-prototype : functiondef SEMICOLON
-	    ( ,$1 )
-	  ;
+functionname : OPERATOR operatorsym
+	       ( ,$2 )
+	     | symbol
+	       ( $1 )
+	     ;
 
-function : functiondef semantic-list
-	   ( ,$1 )
-         ;
+fun-or-proto-end: SEMICOLON
+		  ( t )
+		| semantic-list
+		  ( nil )
+		;
 
 opt-expression : expression
 	       | EMPTY ( nil )
 	       ;
 
-# Use expressiosn for parsing only.  Don't actually return anything
-# for now.  Hopefully we can't fix this later.
+# Use expression for parsing only.  Don't actually return anything
+# for now.  Hopefully we can fix this later.
 expression : symbol
-	     ( nil )
+	     ( )
            | punctuation "[!*&~]" symbol
-	     ( nil )
+	     ( )
+	   | string
+	     ( )
            | semantic-list
-	     ( nil )
+	     ( )
 	   # | expression "+-*/%^|&" expression
 	   # ( nil )
 	   ;