Source

semantic / c.bnf

Diff from to
 %put TYPEDEF summary "Arbitrary Type Declaration: typedef <typedeclaration> <name>;"
 %token CLASS "class"
 %put CLASS summary "Class Declaration: class <name>[:parents] { ... };"
+%token TYPENAME "typename"
+%put TYPENAME summary "typename is used to handle a qualified name as a typename;"
 %token NAMESPACE "namespace"
 %put NAMESPACE summary "Namespace Declaration: namespace <name> { ... };"
 %token USING "using"
 %put VOID summary "Built in typeless type: void"
 %token CHAR "char"
 %put CHAR summary "Integral Character Type: (0 to 256)"
+%token WCHAR "wchar_t"
+%put WCHAR summary "Wide Character Type"
 %token SHORT "short"
 %put SHORT summary "Integral Primitive Type: (-32768 to 32767)"
 %token INT "int"
 
 declaration : macro
 	    | type
+# TODO: Klaus Berndl <klaus.berndl@sdm.de>: Is the define here
+# necessary or even wrong? Is this part not already covered by macro??
+	    | define
 	    | var-or-fun
-	    | define
 	    | extern-c
 	    | template
 	    | using
 	  ( nil )
 	;
 
+# Klaus Berndl <klaus.berndl@sdm.de>: At least one-liner should be
+# parsed correctly! Multi-line macros (every line ends with a
+# backslash) not being one code-block can not be parsed because
+# \+newline is flexed as nothing and therefore we can not distinguish
+# if the lines > 1 belong to the macro or are separated statements!
+# Maybe we need special backslash recognizing and then better
+# macro-parsing. For the moment it's goog enough.
+macro-expression-list : expression macro-expression-list SEMICOLON
+			( nil )
+		      | expression
+			( nil )
+		      ;
+
+
+macro-def : macro-expression-list
+            ( nil )
+	  | expression
+	    ( nil )
+	  | EMPTY 
+	  ;
+
 macro : HASH macro-or-include
 	( ,$2 )
       ;
 
-macro-or-include : DEFINE symbol opt-define-arglist opt-expression
+macro-or-include : DEFINE symbol opt-define-arglist macro-def
 		   ( $2 variable nil $3
 			(ASSOC const t)
 			nil
 		   ;
 
 # This is used in struct parts.
-define : HASH DEFINE symbol opt-expression
-	 ( $2 variable nil $3
+define : HASH DEFINE symbol opt-define-arglist macro-def
+	 ( $3 variable nil $4
 	      (ASSOC const t)
 	      nil
 	      )
 
 class-parents : opt-class-protection opt-class-declmods
 		namespace-symbol COMA class-parents
-		( ,(cons (car $4) $6 ) )
+		( ,(cons $3 $5 ) )
 	      | opt-class-protection opt-class-declmods namespace-symbol
-		( ,$4 )
+		( ,$3 )
 	      ;
 
 opt-class-declmods : class-declmods opt-class-declmods
 	   | UNION opt-name unionparts
 	     ( ,$2 type $1 $3 nil nil nil )
 	   | ENUM opt-name enumparts
-	     ( ,$2 type $1 $3 nil nil nil )	
-           | TYPEDEF typeformbase typedef-symbol-list
+	     ( ,$2 type $1 $3 nil nil nil )
+# Klaus Berndl <klaus.berndl@sdm.de>: a typedef can be a typeformbase
+# with all this declmods stuff.
+           | TYPEDEF declmods typeformbase cv-declmods typedef-symbol-list
 	   ## We put the type this typedef renames into PARENT
 	   ## but will move it in the expand function.
-	     ( $3 type $1 nil $2 nil nil )
+	     ( $5 type $1 nil $3 nil nil )
            ;
 
 typedef-symbol-list : typedefname COMA typedef-symbol-list
 		      ( $1 )
 		    ;
 
+# TODO: Klaus Berndl <klaus.berndl@sdm.de>: symbol -> namespace-symbol?!
+# Answer: Probably symbol is correct here!
 typedefname : opt-stars symbol opt-bits opt-array
 	      ( $1 $2 )
 	    ;
 
 type : typesimple SEMICOLON
        ( ,$1 )
+     # named namespaces like "namespace XXX {"
      | NAMESPACE symbol namespaceparts
        ( $2 type $1 $3 nil nil nil )
+     # unnamed namespaces like "namespace {"
+     | NAMESPACE namespaceparts
+       ( "unnamed" type $1 $2 nil nil nil )
      ;
 
+# Klaus Berndl <klaus.berndl@sdm.de>: 
+# We must parse "using namespace XXX" too
+
 # Using is vaguely like an include statement in the named portions
 # of the code.  We should probably specify a new token type for this.
 using : USING typeformbase SEMICOLON
 	( nil )
-      | USING symbol COLON COLON typeformbase SEMICOLON;
+      | USING NAMESPACE typeformbase SEMICOLON
+        ( nil )
       ;
 
 template : TEMPLATE template-specifier opt-friend template-definition
 			       ( )
 			     ;
 
-template-var : template-type opt-stars opt-template-equal
-	       ( ,(cons (concat (car $1) (make-string (car ,$2) ?*))
-			      (cdr $1)))
+#template-var : template-type opt-stars opt-template-equal
+#	       ( ,(cons (concat (car $1) (make-string (car ,$2) ?*))
+#			      (cdr $1)))
+## Klaus Berndl <klaus.berndl@sdm.de>: for template-types the
+## template-var can also be literals or constants.
+## Example: map<ClassX, ClassY, 10> map_size10_var; This parses also
+## template<class T, 0> which is nonsense but who cares....
+#	     | string
+#	       ( $1 )
+#	     | number
+#	       ( $1 )
+#	     ;
+
+template-var : 
+# Klaus Berndl <klaus.berndl@sdm.de>: The following handles all
+# template-vars of template-definitions
+	       template-type opt-template-equal
+               ( ,(cons (car $1) (cdr $1)) )
+# Klaus Berndl <klaus.berndl@sdm.de>: for template-types the
+# template-var can also be literals or constants.
+# Example: map<ClassX, ClassY, 10> map_size10_var; This parses also
+# template<class T, 0> which is nonsense but who cares....
+             | string
+	       ( $1 )
+	     | number
+	       ( $1 )
+# Klaus Berndl <klaus.berndl@sdm.de>: In template-types arguments can
+# be any symbols with optional adress-operator (&) and optional
+# dereferencing operator (*)
+# Example map<ClassX, ClassY, *size_var_ptr> sized_map_var.
+             | opt-stars opt-ref namespace-symbol
+	       ( ,$3 )
 	     ;
 
 opt-template-equal : EQUAL symbol LESS template-specifier-types GREATER
 		( $2 type "class" nil nil)
 	      | STRUCT symbol
 		( $2 type "struct" nil nil)
-	      | builtintype
-		( ,$1 type nil nil nil)
-	      | symbol
-		( $1 type nil nil nil)
+# TODO: Klaus Berndl <klaus.berndl@sdm.de>: For the moment is is ok,
+# that we parse the C++ keyword typename as a class....
+	      | TYPENAME symbol
+		( $2 type "class" nil nil)
+# Klaus Berndl <klaus.berndl@sdm.de>: template-types can be all
+# flavors of variable-args but here the argument is ignored, only the
+# type stuff is needed.
+	      | declmods typeformbase cv-declmods opt-stars
+	        opt-ref variablearg-opt-name
+	        ( (car $2) type nil nil
+		  (ASSOC const (if (member "const" (append $1 $3)) t nil)
+			 typemodifiers (delete "const" (append $1 $3))
+			 reference (car ,$5)
+			 pointer (car $4)
+			 )
+		  )
 	      ;
 
 template-definition : type
 
 DECLMOD : EXTERN
 	| STATIC
-	| CONST
-	| VOLATILE
-	| SIGNED
-	| UNSIGNED
+        | CVDECLMOD
+# Klaus Berndl <klaus.berndl@sdm.de>: IMHO signed and unsigned are not
+# decl-modes but these are only valid for some buildin-types like
+# short, int etc... whereas "real" declmods are valid for all types,
+# buildin and user-defined!
+#	| SIGNED
+#	| UNSIGNED
 	| INLINE
 	| REGISTER
 	| FRIEND
+# Klaus Berndl <klaus.berndl@sdm.de>: There can be a few cases where
+# TYPENAME is not allowed in C++-syntax but better than not
+# recognizing the allowed situations.
+        | TYPENAME
 	| METADECLMOD
-	;; This is a hack in case we are in a class.
+	# This is a hack in case we are in a class.
 	| VIRTUAL
 	;
 
 	      ()
 	    ;
 
+CVDECLMOD : CONST
+          | VOLATILE
+          ;
+
+cv-declmods : CVDECLMOD cv-declmods
+	      ( ,(cons ,(car ,$1) $2 ) )
+	    | CVDECLMOD
+              ( ,$1 )
+            | EMPTY
+              ()
+            ;
+
 METADECLMOD : VIRTUAL
 	    | MUTABLE
 	    ;
 	       ( $2 type $1 )
 	     | builtintype
 	       ( ,$1 )
-	     | symbol template-specifier
-	       ( $1 type "class" )
-	     | namespace-symbol opt-stars opt-template-specifier
-	       ( ,$1  )
+#	     | symbol template-specifier
+#	       ( $1 type "class" )
+#	     | namespace-symbol opt-stars opt-template-specifier
+#	     | namespace-symbol opt-template-specifier
+	     | namespace-symbol
+#	       ( ,$1 )
+	       ( ,$1 type "class" )
 	     | symbol
 	       ( $1 )
 	     ;
 
-builtintype : VOID
-	    | CHAR
-	    | SHORT
-	    | INT
-	    | LONG
-	    | FLOAT
-	    | DOUBLE
+signedmod : UNSIGNED
+          | SIGNED
+	  ;
+
+# Klaus Berndl <klaus.berndl@sdm.de>: builtintype-types was builtintype
+builtintype-types : VOID
+                  | CHAR
+# Klaus Berndl <klaus.berndl@sdm.de>: Added WCHAR
+                  | WCHAR
+                  | SHORT
+                  | INT
+		  | LONG INT
+		    ( (concat $1 " " $2) )
+                  | FLOAT
+                  | DOUBLE
+		  | LONG DOUBLE
+		    ( (concat $1 " " $2) )
+# TODO: Klaus Berndl <klaus.berndl@sdm.de>: Is there a long long, i
+# think so?!
+		  | LONG LONG
+		    ( (concat $1 " " $2) )
+                  | LONG
+                  ;
+
+builtintype : signedmod builtintype-types
+              ( (concat (car $1) " " (car $2)) )
+            | builtintype-types
+	      ( ,$1 )
+# Klaus Berndl <klaus.berndl@sdm.de>: unsigned is synonym for unsigned
+# int and signed for signed int. To make this confusing stuff clear we
+# add here the int.
+	    | signedmod
+	    ( (concat (car $1) " int") )
 	    ;
 
-codeblock-var-or-fun : declmods typeformbase metadeclmod
+# Klaus Berndl <klaus.berndl@sdm.de>: This parses also nonsense like
+# "const volatile int const volatile const const volatile a ..." but
+# IMHO nobody writes such code. Normaly we shoud define a rule like
+# typeformbase-mode which exactly defines the different allowed cases
+# and combinations of declmods (minus the CVDECLMOD) typeformbase and
+# cv-declmods so we could recognize more invalid code but IMHO this is
+# not worth the effort...
+codeblock-var-or-fun : declmods typeformbase declmods
 		       opt-ref var-or-func-decl
 		       ( ,(semantic-c-reconstitute-token ,$5 $1 $2 ) )
 		     ;
 	    | EMPTY
 	    ;
 
-opt-initializers: COLON symbol semantic-list opt-initializers
-		| COMA symbol semantic-list opt-initializers
+# Klaus Berndl <klaus.berndl@sdm.de>: symbol -> namespace-symbol
+opt-initializers: COLON namespace-symbol semantic-list opt-initializers
+		| COMA namespace-symbol semantic-list opt-initializers
 		| EMPTY
 		;
 
 	     | EMPTY
 	     ;
 
-varname : opt-stars opt-restrict symbol opt-bits opt-array opt-assign
-	  ( $3 ,$1 ,$4 ,$5 ,$6 )
+
+# Klaus Berndl <klaus.berndl@sdm.de>: symbol -> namespace-symbol?! I
+# think so. Can be that then also some invalid C++-syntax is parsed
+# but this is better than not parsing valid syntax.
+varname : opt-stars opt-restrict namespace-symbol opt-bits opt-array opt-assign
+	  ( ,$3 ,$1 ,$4 ,$5 ,$6 )
 	;
 
-# I should store more in this def, but leave it simple for now.
-variablearg : declmods typeformbase opt-ref opt-stars variablearg-opt-name
+# I should store more in this def, but leave it simple for now. Klaus
+# Berndl <klaus.berndl@sdm.de>: const and volatile can be written
+# after the type!
+variablearg : declmods typeformbase cv-declmods opt-ref variablearg-opt-name
 	      ( (list $5) variable $2 nil
-		 (ASSOC const (if (member "const" $1) t nil)
-			typemodifiers (delete "const" $1)
-			reference (car ,$3)
-			)
+		(ASSOC const (if (member "const" (append $1 $3)) t nil)
+		       typemodifiers (delete "const" (append $1 $3))
+		       reference (car ,$4)
+		       )
 		 nil
 		 )
 	    ;
 
 variablearg-opt-name: varname
 		      ( ,$1)
-		    | EMPTY
-		      ( ""  0 nil nil nil )
+# Klaus Berndl <klaus.berndl@sdm.de>: This allows variableargs without
+# a arg-name being parsed correct even if there several pointers (*)
+		    | opt-stars
+		      ( "" ,$1 nil nil nil )
 		    ;
 
 varnamelist : varname COMA varnamelist
 	      ( $1 )
 	    ;
 
-namespace-symbol : symbol COLON COLON namespace-symbol
-		   ( (concat $1 "::" (car $4)) )
-		 | symbol
+# Klaus Berndl <klaus.berndl@sdm.de>: 
+# Is necessary to parse stuff like 
+#     class list_of_facts : public list<fact>, public entity
+# and
+#     list <shared_ptr<item> >::const_iterator l;
+# Parses also invalid(?) and senseless(?) c++-syntax like
+#     symbol<template-spec>::symbol1<template-spec1>::test_iterator
+# but better parsing too much than to less
+namespace-symbol : symbol opt-template-specifier COLON COLON namespace-symbol
+		   ( (concat $1 "::" (car $5)) )
+		 | symbol opt-template-specifier
 		   ( $1 )
 		 ;
 
-opt-class : symbol COLON COLON
-	    ( $1 )
+#namespace-symbol : symbol COLON COLON namespace-symbol
+#		   ( (concat $1 "::" (car $4)) )
+#		 | symbol
+#		   ( $1 )
+#		 ;
+
+namespace-opt-class : symbol COLON COLON namespace-opt-class
+		      ( (concat $1 "::" (car $4)) )
+# Klaus Berndl <klaus.berndl@sdm.de>: We must recognize
+# template-specifiers here so we can parse correctly the
+# method-implementations of template-classes outside the
+# template-class-declaration
+# Example: TemplateClass1<T>::method_1(...)
+		    | symbol opt-template-specifier COLON COLON
+		      ( $1 )
+		    ;
+
+# Klaus Berndl <klaus.berndl@sdm.de>: The opt-class of a func-decl
+# must be able to recognize opt-classes with namespaces, e.g.
+# Test1::Test2::classname::
+opt-class : namespace-opt-class
+	    ( ,$1 )
 	  | EMPTY
 	    ( nil )
 	  ;
 	      ( "!=" )
 	    | MINUS GREATER
 	      ( "->" )
+# Klaus Berndl <klaus.berndl@sdm.de>: We have to parse also
+# operator() and operator[]
+            | semantic-list "()"
+	      ( "()" )
+            | semantic-list "\\[\\]"
+	      ( "[]" )
 	    | LESS
 	    | GREATER
 	    | STAR
 		  ( 'pure-virtual )
 		;
 
-opt-expression : expression
-	       | EMPTY ( nil )
-	       ;
-
 type-cast : semantic-list
 	    ( EXPAND $1 type-cast-list )
 	  ;
 type-cast-list : open-paren typeformbase close-paren
 	       ;
 
-function-call: symbol semantic-list
+# Klaus Berndl <klaus.berndl@sdm.de>: symbol -> namespace-symbol!
+function-call: namespace-symbol semantic-list
              ;
 
+string-seq : string string-seq
+             ( (concat $1 (car $2)) )
+           | string
+             ( $1 )
+           ;
+
 # Use expression for parsing only.  Don't actually return anything
 # for now.  Hopefully we can fix this later.
 expression : number
-	     ( (identity start) (identity end) )
+ 	     ( (identity start) (identity end) ) 
 	   | function-call
 	     ( (identity start) (identity end) )
-	   | symbol
+# Klaus Berndl <klaus.berndl@sdm.de>: symbol -> namespace-symbol!
+	   | namespace-symbol
 	     ( (identity start) (identity end) )
-	   | string
+# Klaus Berndl <klaus.berndl@sdm.de>: C/C++ allows sequences of
+# strings which are concatenated by the precompiler to one string
+	   | string-seq
 	     ( (identity start) (identity end))
            | type-cast expression  # A cast to some other type
 	     ( (identity start) (identity end) )