Commits

Andrew Dunstan committed 1b30aac

Efficiency changes for lexer

Comments (0)

Files changed (1)

src/backend/utils/adt/json.c

 	JSON_PARSE_END				/* saw the end of a document, expect nothing */
 }	JsonParseContext;
 
-static void json_lex(JsonLexContext *lex);
-static void json_lex_string(JsonLexContext *lex);
-static void json_lex_number(JsonLexContext *lex, char *s);
+static inline void json_lex(JsonLexContext *lex);
+static inline void json_lex_string(JsonLexContext *lex);
+static inline void json_lex_number(JsonLexContext *lex, char *s);
 static inline void parse_scalar(JsonLexContext *lex, JsonSemAction sem);
 static void parse_object_field(JsonLexContext *lex, JsonSemAction sem);
 static void parse_object(JsonLexContext *lex, JsonSemAction sem);
 /*
  * Lex one token from the input stream.
  */
-static void
+static inline void
 json_lex(JsonLexContext *lex)
 {
 	char	   *s;
 		lex->token_terminator = s;
 		lex->token_type = JSON_TOKEN_END;
 	}
-	else if (strchr("{}[],:", s[0]))
-	{
-		/* Single-character token, some kind of punctuation mark. */
-		lex->prev_token_terminator = lex->token_terminator;
-		lex->token_terminator = s + 1;
-		switch (s[0])
+	else	
+		switch(*s)
 		{
+			/* Single-character token, some kind of punctuation mark. */
 			case '{':
+				lex->prev_token_terminator = lex->token_terminator;
+				lex->token_terminator = s + 1;
 				lex->token_type = JSON_TOKEN_OBJECT_START;
 				break;
 			case '}':
+				lex->prev_token_terminator = lex->token_terminator;
+				lex->token_terminator = s + 1;
 				lex->token_type = JSON_TOKEN_OBJECT_END;
 				break;
 			case '[':
+				lex->prev_token_terminator = lex->token_terminator;
+				lex->token_terminator = s + 1;
 				lex->token_type = JSON_TOKEN_ARRAY_START;
 				break;
 			case ']':
+				lex->prev_token_terminator = lex->token_terminator;
+				lex->token_terminator = s + 1;
 				lex->token_type = JSON_TOKEN_ARRAY_END;
 				break;
 			case ',':
+				lex->prev_token_terminator = lex->token_terminator;
+				lex->token_terminator = s + 1;
 				lex->token_type = JSON_TOKEN_COMMA;
 				break;
 			case ':':
+				lex->prev_token_terminator = lex->token_terminator;
+				lex->token_terminator = s + 1;
 				lex->token_type = JSON_TOKEN_COLON;
 				break;
-			default:
-				break;
-		}
-	}
-	else if (*s == '"')
-	{
-		/* String. */
-		json_lex_string(lex);
-		lex->token_type = JSON_TOKEN_STRING;
-	}
-	else if (*s == '-')
-	{
-		/* Negative number. */
-		json_lex_number(lex, s + 1);
-		lex->token_type = JSON_TOKEN_NUMBER;
-	}
-	else if (*s >= '0' && *s <= '9')
-	{
-		/* Positive number. */
-		json_lex_number(lex, s);
-		lex->token_type = JSON_TOKEN_NUMBER;
-	}
-	else
-	{
-		char	   *p;
-
-		/*
-		 * We're not dealing with a string, number, legal punctuation mark, or
-		 * end of string.  The only legal tokens we might find here are true,
-		 * false, and null, but for error reporting purposes we scan until we
-		 * see a non-alphanumeric character.  That way, we can report the
-		 * whole word as an unexpected token, rather than just some
-		 * unintuitive prefix thereof.
-		 */
-		for (p = s; JSON_ALPHANUMERIC_CHAR(*p) && p - s < lex->input_length - len; p++)
-			 /* skip */ ;
 
-		/*
-		 * We got some sort of unexpected punctuation or an otherwise
-		 * unexpected character, so just complain about that one character.
-		 */
-		if (p == s)
-		{
-			lex->prev_token_terminator = lex->token_terminator;
-			lex->token_terminator = s + 1;
-			report_invalid_token(lex);
-		}
+			case '"':
+				/* string */
+				json_lex_string(lex);
+				lex->token_type = JSON_TOKEN_STRING;
+				break;
+			case '-':
+				/* Negative number. */
+				json_lex_number(lex, s + 1);
+				lex->token_type = JSON_TOKEN_NUMBER;
+				break;
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+				/* Positive number. */
+				json_lex_number(lex, s);
+				lex->token_type = JSON_TOKEN_NUMBER;
+				break;
+			default:
+			{
+				char	   *p;
 
-		/*
-		 * We've got a real alphanumeric token here.  If it happens to be
-		 * true, false, or null, all is well.  If not, error out.
-		 */
-		lex->prev_token_terminator = lex->token_terminator;
-		lex->token_terminator = p;
-		if (p - s == 4)
-		{
-			if (memcmp(s, "true", 4) == 0)
-				lex->token_type = JSON_TOKEN_TRUE;
-			else if (memcmp(s, "null", 4) == 0)
-				lex->token_type = JSON_TOKEN_NULL;
-			else
-				report_invalid_token(lex);
-		}
-		else if (p - s == 5 && memcmp(s, "false", 5) == 0)
-			lex->token_type = JSON_TOKEN_FALSE;
-		else
-			report_invalid_token(lex);
-	}
+				/*
+				 * We're not dealing with a string, number, legal punctuation mark, or
+				 * end of string.  The only legal tokens we might find here are true,
+				 * false, and null, but for error reporting purposes we scan until we
+				 * see a non-alphanumeric character.  That way, we can report the
+				 * whole word as an unexpected token, rather than just some
+				 * unintuitive prefix thereof.
+				 */
+				for (p = s; JSON_ALPHANUMERIC_CHAR(*p) && p - s < lex->input_length - len; p++)
+					/* skip */ ;
+				
+				/*
+				 * We got some sort of unexpected punctuation or an otherwise
+				 * unexpected character, so just complain about that one character.
+				 */
+				if (p == s)
+				{
+					lex->prev_token_terminator = lex->token_terminator;
+					lex->token_terminator = s + 1;
+					report_invalid_token(lex);
+				}
+				
+				/*
+				 * We've got a real alphanumeric token here.  If it happens to be
+				 * true, false, or null, all is well.  If not, error out.
+				 */
+				lex->prev_token_terminator = lex->token_terminator;
+				lex->token_terminator = p;
+				if (p - s == 4)
+				{
+					if (memcmp(s, "true", 4) == 0)
+						lex->token_type = JSON_TOKEN_TRUE;
+					else if (memcmp(s, "null", 4) == 0)
+						lex->token_type = JSON_TOKEN_NULL;
+					else
+						report_invalid_token(lex);
+				}
+				else if (p - s == 5 && memcmp(s, "false", 5) == 0)
+					lex->token_type = JSON_TOKEN_FALSE;
+				else
+					report_invalid_token(lex);
+				
+			}
+		} /* end of switch */
 }
 
 /*
  * The next token in the input stream is known to be a string; lex it.
  */
-static void
+static inline void
 json_lex_string(JsonLexContext *lex)
 {
 	char	   *s;
  *
  *-------------------------------------------------------------------------
  */
-static void
+static inline void
 json_lex_number(JsonLexContext *lex, char *s)
 {
 	bool		error = false;