Commits

Andrew Dunstan  committed b70fab3

avoid need to create cstrings for parser

  • Participants
  • Parent commits f5b621a

Comments (0)

Files changed (3)

File src/backend/utils/adt/json.c

 	JSON_PARSE_END				/* saw the end of a document, expect nothing */
 }	JsonParseContext;
 
-static void json_validate_cstring(char *input);
 static void json_lex(JsonLexContext *lex);
 static void json_lex_string(JsonLexContext *lex);
 static void json_lex_number(JsonLexContext *lex, char *s);
 Datum
 json_in(PG_FUNCTION_ARGS)
 {
-	char	   *text = PG_GETARG_CSTRING(0);
+	char	   *json = PG_GETARG_CSTRING(0);
+	text       *result = cstring_to_text(json);
+	JsonLexContext *lex;
 
-	json_validate_cstring(text);
+	/* validate it */
+	lex = makeJsonLexContext(result,false);
+	pg_parse_json(lex, NullSemAction);
 
 	/* Internal representation is the same as text, for now */
-	PG_RETURN_TEXT_P(cstring_to_text(text));
+	PG_RETURN_TEXT_P(result);
 }
 
 /*
 	text	   *result;
 	char	   *str;
 	int			nbytes;
+	JsonLexContext *lex;
 
 	str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 
-	/*
-	 * We need a null-terminated string to pass to json_validate_cstring().
-	 * Rather than make a separate copy, make the temporary result one byte
-	 * bigger than it needs to be.
-	 */
-	result = palloc(nbytes + 1 + VARHDRSZ);
+	result = palloc(nbytes + VARHDRSZ);
 	SET_VARSIZE(result, nbytes + VARHDRSZ);
 	memcpy(VARDATA(result), str, nbytes);
-	str = VARDATA(result);
-	str[nbytes] = '\0';
 
 	/* Validate it. */
-	json_validate_cstring(str);
+	lex = makeJsonLexContext(result, false);
+	pg_parse_json(lex, NullSemAction);
 
 	PG_RETURN_TEXT_P(result);
 }
  */
 
 JsonLexContext *
-makeJsonLexContext(char *json, bool need_escapes)
+makeJsonLexContext(text *json, bool need_escapes)
 {
 	JsonLexContext *lex = palloc0(sizeof(JsonLexContext));
 
-	lex->input = lex->token_terminator = lex->line_start = json;
+	lex->input = lex->token_terminator = lex->line_start = VARDATA(json);
 	lex->line_number = 1;
+	lex->input_length = VARSIZE(json) - VARHDRSZ;
 	if (need_escapes)
 		lex->strval = makeStringInfo();
 	return lex;
 }
 
 /*
- * Check whether supplied input is valid JSON.
- */
-static void
-json_validate_cstring(char *input)
-{
-	JsonLexContext *lex = makeJsonLexContext(input, false);
-
-	pg_parse_json(lex, NullSemAction);
-}
-
-/*
  * Lex one token from the input stream.
  */
 static void
 json_lex(JsonLexContext *lex)
 {
 	char	   *s;
-
+	int         len;
 	/* Skip leading whitespace. */
 	s = lex->token_terminator;
-	while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')
+	len = s - lex->input;
+	while (len < lex->input_length &&
+		   (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
 	{
 		if (*s == '\n')
 			++lex->line_number;
 		++s;
+		++len;
 	}
 	lex->token_start = s;
 
 	/* Determine token type. */
-	if (*s == '\0')
+	if (len >= lex->input_length)
 	{
 		lex->token_start = NULL;
 		lex->prev_token_terminator = lex->token_terminator;
 		 * whole word as an unexpected token, rather than just some
 		 * unintuitive prefix thereof.
 		 */
-		for (p = s; JSON_ALPHANUMERIC_CHAR(*p); p++)
+		for (p = s; JSON_ALPHANUMERIC_CHAR(*p) && p - s < lex->input_length - len; p++)
 			 /* skip */ ;
 
 		/*
 json_lex_string(JsonLexContext *lex)
 {
 	char	   *s;
-
+	int         len;
 	if (lex->strval != NULL)
 		resetStringInfo(lex->strval);
 
-	for (s = lex->token_start + 1; *s != '"'; s++)
+	len = lex->token_start - lex->input;
+	len++;
+	for (s = lex->token_start + 1; *s != '"'; s++, len++)
 	{
-		/* Per RFC4627, these characters MUST be escaped. */
-		if ((unsigned char) *s < 32)
+		/* Premature end of the string. */
+		if (len >= lex->input_length)
 		{
-			/* A NUL byte marks the (premature) end of the string. */
-			if (*s == '\0')
-			{
-				lex->token_terminator = s;
-				report_invalid_token(lex);
-			}
+			lex->token_terminator = s;
+			report_invalid_token(lex);
+		}
+		else if ((unsigned char) *s < 32)
+		{
+			/* Per RFC4627, these characters MUST be escaped. */
 			/* Since *s isn't printable, exclude it from the context string */
 			lex->token_terminator = s;
 			ereport(ERROR,
 		{
 			/* OK, we have an escape character. */
 			s++;
-			if (*s == '\0')
+			len++;
+			if (len >= lex->input_length)
 			{
 				lex->token_terminator = s;
 				report_invalid_token(lex);
 				for (i = 1; i <= 4; i++)
 				{
 					s++;
-					if (*s == '\0')
+					len++;
+					if (len >= lex->input_length)
 					{
 						lex->token_terminator = s;
 						report_invalid_token(lex);
 {
 	bool		error = false;
 	char	   *p;
+	int         len;
 
+	len = s - lex->input;
 	/* Part (1): leading sign indicator. */
 	/* Caller already did this for us; so do nothing. */
 
 	/* Part (2): parse main digit string. */
 	if (*s == '0')
+	{
 		s++;
+		len++;
+	}
 	else if (*s >= '1' && *s <= '9')
 	{
 		do
 		{
 			s++;
-		} while (*s >= '0' && *s <= '9');
+			len++;
+		} while (*s >= '0' && *s <= '9' && len < lex->input_length);
 	}
 	else
 		error = true;
 
 	/* Part (3): parse optional decimal portion. */
-	if (*s == '.')
+	if (len < lex->input_length && *s == '.')
 	{
 		s++;
-		if (*s < '0' || *s > '9')
+		len++;
+		if (len == lex->input_length || *s < '0' || *s > '9')
 			error = true;
 		else
 		{
 			do
 			{
 				s++;
-			} while (*s >= '0' && *s <= '9');
+				len++;
+			} while (*s >= '0' && *s <= '9' && len < lex->input_length);
 		}
 	}
 
 	/* Part (4): parse optional exponent. */
-	if (*s == 'e' || *s == 'E')
+	if (len < lex->input_length && (*s == 'e' || *s == 'E'))
 	{
 		s++;
-		if (*s == '+' || *s == '-')
+		len++;
+		if (len < lex->input_length && (*s == '+' || *s == '-'))
+		{
 			s++;
-		if (*s < '0' || *s > '9')
+			len++;
+		}
+		if (len == lex->input_length || *s < '0' || *s > '9')
 			error = true;
 		else
 		{
 			do
 			{
 				s++;
-			} while (*s >= '0' && *s <= '9');
+				len++;
+			} while (len < lex->input_length && *s >= '0' && *s <= '9');
 		}
 	}
 
 	 * here should be considered part of the token for error-reporting
 	 * purposes.
 	 */
-	for (p = s; JSON_ALPHANUMERIC_CHAR(*p); p++)
+	for (p = s; JSON_ALPHANUMERIC_CHAR(*p) && len < lex->input_length; p++, len++)
 		error = true;
 	lex->prev_token_terminator = lex->token_terminator;
 	lex->token_terminator = p;
 	 * suffixing "..." if not ending at end of line.
 	 */
 	prefix = (context_start > line_start) ? "..." : "";
-	suffix = (lex->token_type != JSON_TOKEN_END && *context_end != '\0' && *context_end != '\n' && *context_end != '\r') ? "..." : "";
+	suffix = (lex->token_type != JSON_TOKEN_END && context_end  - lex->input < lex->input_length && *context_end != '\n' && *context_end != '\r') ? "..." : "";
 
 	return errcontext("JSON data, line %d: %s%s%s",
 					  line_number, prefix, ctxt, suffix);

File src/backend/utils/adt/jsonfuncs.c

 static void get_scalar(void *state, char *token, JsonTokenType tokentype);
 
 /* common worker function for json_get* functions */
-static text *get_worker(char *json, char *field, int elem_index, char **path,
+static text *get_worker(text *json, char *field, int elem_index, char **path,
 		   int npath, bool normalize_results);
 
 /* semantic action functions for json_array_length */
 static void unnest_scalar(void *state, char *token, JsonTokenType tokentype);
 
 /* turn a json object into a hash table */
-static HTAB *get_json_object_as_hash(char *jsonstr, char *funcname, bool use_json_as_text);
+static HTAB *get_json_object_as_hash(text *json, char *funcname, bool use_json_as_text);
 
 /* semantic action functions for get_json_object_as_hash */
 static void hash_object_field_start(void *state, char *fname, bool isnull);
 	if (SRF_IS_FIRSTCALL())
 	{
 		text	   *json = PG_GETARG_TEXT_P(0);
-		char	   *jsonstr = text_to_cstring(json);
-		JsonLexContext *lex = makeJsonLexContext(jsonstr, true);
+		JsonLexContext *lex = makeJsonLexContext(json, true);
 		JsonSemAction sem;
 
 		MemoryContext oldcontext;
 {
 	text	   *json = PG_GETARG_TEXT_P(0);
 	text	   *fname = PG_GETARG_TEXT_P(1);
-	char	   *jsonstr = text_to_cstring(json);
 	char	   *fnamestr = text_to_cstring(fname);
 	text	   *result;
 
-	result = get_worker(jsonstr, fnamestr, -1, NULL, -1, false);
+	result = get_worker(json, fnamestr, -1, NULL, -1, false);
 
 	if (result != NULL)
 		PG_RETURN_TEXT_P(result);
 {
 	text	   *json = PG_GETARG_TEXT_P(0);
 	text	   *fname = PG_GETARG_TEXT_P(1);
-	char	   *jsonstr = text_to_cstring(json);
 	char	   *fnamestr = text_to_cstring(fname);
 	text	   *result;
 
-	result = get_worker(jsonstr, fnamestr, -1, NULL, -1, true);
+	result = get_worker(json, fnamestr, -1, NULL, -1, true);
 	if (result != NULL)
 		PG_RETURN_TEXT_P(result);
 	else
 {
 	text	   *json = PG_GETARG_TEXT_P(0);
 	int			element = PG_GETARG_INT32(1);
-	char	   *jsonstr = text_to_cstring(json);
 	text	   *result;
 
-	result = get_worker(jsonstr, NULL, element, NULL, -1, false);
+	result = get_worker(json, NULL, element, NULL, -1, false);
 
 	if (result != NULL)
 		PG_RETURN_TEXT_P(result);
 {
 	text	   *json = PG_GETARG_TEXT_P(0);
 	int			element = PG_GETARG_INT32(1);
-	char	   *jsonstr = text_to_cstring(json);
 	text	   *result;
 
-	result = get_worker(jsonstr, NULL, element, NULL, -1, true);
+	result = get_worker(json, NULL, element, NULL, -1, true);
 
 	if (result != NULL)
 		PG_RETURN_TEXT_P(result);
 {
 	text	   *json = PG_GETARG_TEXT_P(0);
 	ArrayType  *path = PG_GETARG_ARRAYTYPE_P(1);
-	char	   *jsonstr = text_to_cstring(json);
 	text	   *result;
 	Datum	   *pathtext;
 	bool	   *pathnulls;
 							"json_get_path_as_text")));
 	}
 
-	result = get_worker(jsonstr, NULL, -1, pathstr, npath, false);
+	result = get_worker(json, NULL, -1, pathstr, npath, false);
 
 	if (result != NULL)
 		PG_RETURN_TEXT_P(result);
 {
 	text	   *json = PG_GETARG_TEXT_P(0);
 	ArrayType  *path = PG_GETARG_ARRAYTYPE_P(1);
-	char	   *jsonstr = text_to_cstring(json);
 	text	   *result;
 	Datum	   *pathtext;
 	bool	   *pathnulls;
 							"json_get_path_as_text")));
 	}
 
-	result = get_worker(jsonstr, NULL, -1, pathstr, npath, true);
+	result = get_worker(json, NULL, -1, pathstr, npath, true);
 
 	if (result != NULL)
 		PG_RETURN_TEXT_P(result);
 }
 
 static text *
-get_worker(char *json,
+get_worker(text *json,
 		   char *field,
 		   int elem_index,
 		   char **path,
 json_array_length(PG_FUNCTION_ARGS)
 {
 	text	   *json = PG_GETARG_TEXT_P(0);
-	char	   *jsonstr = text_to_cstring(json);
 
 	AlenState	state;
-	JsonLexContext *lex = makeJsonLexContext(jsonstr, false);
+	JsonLexContext *lex = makeJsonLexContext(json, false);
 	JsonSemAction sem;
 
 	state = palloc0(sizeof(alenState));
 json_each(PG_FUNCTION_ARGS)
 {
 	text	   *json = PG_GETARG_TEXT_P(0);
-	char	   *jsonstr = text_to_cstring(json);
-	JsonLexContext *lex = makeJsonLexContext(jsonstr, true);
+	JsonLexContext *lex = makeJsonLexContext(json, true);
 	JsonSemAction sem;
 	ReturnSetInfo *rsi;
 	MemoryContext old_cxt;
 json_each_as_text(PG_FUNCTION_ARGS)
 {
 	text	   *json = PG_GETARG_TEXT_P(0);
-	char	   *jsonstr = text_to_cstring(json);
-	JsonLexContext *lex = makeJsonLexContext(jsonstr, true);
+	JsonLexContext *lex = makeJsonLexContext(json, true);
 	JsonSemAction sem;
 	ReturnSetInfo *rsi;
 	MemoryContext old_cxt;
 json_unnest(PG_FUNCTION_ARGS)
 {
 	text	   *json = PG_GETARG_TEXT_P(0);
-	char	   *jsonstr = text_to_cstring(json);
-	JsonLexContext *lex = makeJsonLexContext(jsonstr, true);
+	JsonLexContext *lex = makeJsonLexContext(json, true);
 	JsonSemAction sem;
 	ReturnSetInfo *rsi;
 	MemoryContext old_cxt;
 	Oid			argtype = get_fn_expr_argtype(fcinfo->flinfo, 0);
 	text	   *json = PG_GETARG_TEXT_P(1);
 	bool		use_json_as_text = PG_GETARG_BOOL(2);
-	char	   *jsonstr = text_to_cstring(json);
 	HTAB	   *json_hash;
 	HeapTupleHeader rec;
 	Oid			tupType;
 		tupTypmod = HeapTupleHeaderGetTypMod(rec);
 	}
 
-	json_hash = get_json_object_as_hash(jsonstr, "json_populate_record", use_json_as_text);
+	json_hash = get_json_object_as_hash(json, "json_populate_record", use_json_as_text);
 
 	/*
 	 * if the input json is empty, we can only skip the rest if we were passed
  * error messages.
  */
 static HTAB *
-get_json_object_as_hash(char *jsonstr, char *funcname, bool use_json_as_text)
+get_json_object_as_hash(text *json, char *funcname, bool use_json_as_text)
 {
 	HASHCTL		ctl;
 	HTAB	   *tab;
 	JHashState	state;
-	JsonLexContext *lex = makeJsonLexContext(jsonstr, true);
+	JsonLexContext *lex = makeJsonLexContext(json, true);
 	JsonSemAction sem;
 
 	memset(&ctl, 0, sizeof(ctl));
 	Oid			argtype = get_fn_expr_argtype(fcinfo->flinfo, 0);
 	text	   *json = PG_GETARG_TEXT_P(1);
 	bool		use_json_as_text = PG_GETARG_BOOL(2);
-	char	   *jsonstr;
 	ReturnSetInfo *rsi;
 	MemoryContext old_cxt;
 	Oid			tupType;
 	tupTypmod = tupdesc->tdtypmod;
 	ncolumns = tupdesc->natts;
 
-	jsonstr = text_to_cstring(json);
-	lex = makeJsonLexContext(jsonstr, true);
+	lex = makeJsonLexContext(json, true);
 
 	/*
 	 * We arrange to look up the needed I/O info just once per series of

File src/include/utils/jsonapi.h

 typedef struct JsonLexContext
 {
 	char	   *input;
+	int			input_length;
 	char	   *token_start;
 	char	   *token_terminator;
 	char	   *prev_token_terminator;
 extern void pg_parse_json(JsonLexContext *lex, JsonSemAction sem);
 
 /* constructor for JsonLexContext, with or without strval element */
-extern JsonLexContext *makeJsonLexContext(char *json, bool need_escapes);
+extern JsonLexContext *makeJsonLexContext(text *json, bool need_escapes);
 
 #endif   /* JSONAPI_H */