Andrew Dunstan avatar Andrew Dunstan committed 34d8067

mostly comments

Comments (0)

Files changed (3)

src/backend/utils/adt/json.c

 
 /* Recursive Descent parser support routines */
 
+/*
+ * lex_peek
+ *
+ * what is the current look_ahead token?
+*/
 static inline JsonTokenType
 lex_peek(JsonLexContext *lex)
 {
 	return lex->token_type;
 }
 
+/*
+ * lex_accept
+ *
+ * accept the look_ahead token and move the lexer to the next token if the
+ * look_ahead token matches the token parameter. In that case, and if required,
+ * also hand back the de-escaped lexeme.
+ *
+ * returns true if the token matched, false otherwise.
+ */
 static inline bool
 lex_accept(JsonLexContext *lex, JsonTokenType token, char **lexeme)
 {
 	return false;
 }
 
+/*
+ * lex_accept
+ *
+ * move the lexer to the next token if the current look_ahead token matches
+ * the parameter token. Otherwise, report an error.
+ */
 static inline void
 lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
 {
 }
 
 /*
+ * makeJsonLexContext
+ *
  * lex constructor, with or without StringInfo object
  * for de-escaped lexemes.
+ *
+ * Without is better as it makes the processing faster, so only make one
+ * if really required.
  */
-
 JsonLexContext *
 makeJsonLexContext(text *json, bool need_escapes)
 {
 }
 
 /*
- * parse routines
+ * pg_parse_json
+ *
+ * Publicly visible entry point for the JSON parser.
+ *
+ * lex is a lexing context, set up for the json to be processed by calling
+ * makeJsonLexContext(). sem is a strucure of function pointers to semantic
+ * action routines to be called at appropriate spots during parsing, and a
+ * pointer to a state object to be passed to those routines.
  */
 void
 pg_parse_json(JsonLexContext *lex, JsonSemAction sem)
 
 }
 
+/*
+ *	Recursive Descent parse routines. There is one for each structural
+ *	element in a json document:
+ *	  - scalar (string, number, true, false, null)
+ *	  - array  ( [ ] )
+ *	  - array element
+ *	  - object ( { } )
+ *	  - object field
+ */
 static inline void
 parse_scalar(JsonLexContext *lex, JsonSemAction sem)
 {
 
 	valaddr = sfunc == NULL ? NULL : &val;
 
+	/* a scalar must be a string, a number, true, false, or null */
 	switch (tok)
 	{
 		case JSON_TOKEN_TRUE:
 static void
 parse_object_field(JsonLexContext *lex, JsonSemAction sem)
 {
+	/*
+	 * an object field is "fieldname" : value where value can be a scalar,
+	 * object or array
+	 */
+
 	char	   *fname = NULL;	/* keep compiler quiet */
 	json_ofield_action ostart = sem->object_field_start;
 	json_ofield_action oend = sem->object_field_end;
 static void
 parse_object(JsonLexContext *lex, JsonSemAction sem)
 {
+	/*
+	 * an object is a possibly empty sequence of object fields, separated by
+	 * commas and surrounde by curly braces.
+	 */
 	json_struct_action ostart = sem->object_start;
 	json_struct_action oend = sem->object_end;
 	JsonTokenType tok;
 	if (ostart != NULL)
 		(*ostart) (sem->semstate);
 
+	/*
+	 * Data inside an object at at a higher nesting level than the object
+	 * itself. Note that we increment this after we call the semantic routine
+	 * for the object start and restore it before we call the routine for the
+	 * object end.
+	 */
 	lex->lex_level++;
 
 	/* we know this will succeeed, just clearing the token */
 	if (astart != NULL)
 		(*astart) (sem->semstate, isnull);
 
+	/* an array element is any object, array or scalar */
 	switch (tok)
 	{
 		case JSON_TOKEN_OBJECT_START:
 static void
 parse_array(JsonLexContext *lex, JsonSemAction sem)
 {
+	/*
+	 * an array is a possibly empty sequence of array elements, separated by
+	 * commas and surrounded by square brackets.
+	 */
 	json_struct_action astart = sem->array_start;
 	json_struct_action aend = sem->array_end;
 
 	if (astart != NULL)
 		(*astart) (sem->semstate);
 
+	/*
+	 * Data inside an array at at a higher nesting level than the array
+	 * itself. Note that we increment this after we call the semantic routine
+	 * for the array start and restore it before we call the routine for the
+	 * array end.
+	 */
 	lex->lex_level++;
 
 	lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
 			{
 				/*
 				 * Simpler processing if we're not bothered about de-escaping
+				 *
+				 * It's very tempting to remove the strchr() call here and
+				 * replace it with a switch statement, but testing so far has
+				 * shown it's not a performance win.
 				 */
 				lex->token_terminator = s + pg_mblen(s);
 				ereport(ERROR,

src/backend/utils/adt/jsonfuncs.c

  * jsonfuncs.c
  *		Functions to process JSON data type.
  *
- * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
  * SQL function json_object-keys
  *
  * Returns the set of keys for the object argument.
+ *
+ * This SRF operates in value-per-call mode. It processes the
+ * object during the first call, and the keys are simply stashed
+ * in an array, whise size is expanded as necessary. This is probably
+ * safe enough for a list of keys of a single object, since they are
+ * limited in size to NAMEDATALEN and the number of keys is unlikely to
+ * be so huge that it has major memory implications.
  */
 
 PG_FUNCTION_INFO_V1(json_object_keys);
 {
 	OkeysState	_state = (OkeysState) state;
 
+	/* only collecting keys for the top level object */
 	if (_state->lex->lex_level != 1)
 		return;
+
+	/* enlarge result array if necessary */
 	if (_state->result_count >= _state->result_size)
 	{
 		_state->result_size *= 2;
 		_state->result =
 			repalloc(_state->result, sizeof(char *) * _state->result_size);
 	}
+
+	/* save a copy of the field name */
 	_state->result[_state->result_count++] = pstrdup(fname);
 }
 
 {
 	OkeysState	_state = (OkeysState) state;
 
+	/* top level must be a json object */
 	if (_state->lex->lex_level == 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 {
 	OkeysState	_state = (OkeysState) state;
 
+	/* top level must be a json object */
 	if (_state->lex->lex_level == 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  * different setup options.
  */
 
+
+/*
+ * SQL function json_get(json text) -> json
+ *
+ * return json for named field
+ *
+ * also used for json -> text operator
+ */
 PG_FUNCTION_INFO_V1(json_get_ofield);
 
 Datum
 		PG_RETURN_NULL();
 }
 
+/*
+ * SQL function json_get_as_text(json text) -> text
+ *
+ * return text for named field. If the field is a
+ * string the de-escaped value of the string is delivered.
+ *
+ * also used for json ->> text operator
+ */
+
 PG_FUNCTION_INFO_V1(json_get_ofield_as_text);
 
 Datum
 		PG_RETURN_NULL();
 }
 
+/*
+ * SQL function json_get(json, int) -> json
+ *
+ * return json for numbered field
+ *
+ * also used for json -> int operator
+ */
+
 PG_FUNCTION_INFO_V1(json_get_aelem);
 
 Datum
 }
 
 
+/*
+ * SQL function json_get_as_text(json, int) -> text
+ *
+ * return text for numbered field . If the field is a
+ * string the de-escaped value of the string is delivered.
+ *
+ * also used for json ->> int operator
+ */
+
 PG_FUNCTION_INFO_V1(json_get_aelem_as_text);
 
 Datum
 }
 
 
+/*
+ * SQL function json_get_path(json, variadic text[]) -> json
+ *
+ * return json for object pointed to by path contained in second
+ * parameter. If the json structure refered to by a path element is
+ * an array, the path element is treated as a (zero based) index. If
+ * it's an object it is treated as a field name. Since SQL arrays are
+ * homogeneous, integer arguments for array indexes must be passed as text.
+ *
+ * There is also a non-variadic function json_get_path_op
+ * that maps to this function and is used in the construction of the
+ * json -> text[] operator.
+ */
+
 PG_FUNCTION_INFO_V1(json_get_path);
 
 Datum
 		PG_RETURN_NULL();
 }
 
+/*
+ * SQL function json_get_path_as_text(json, variadic text[]) -> json
+ *
+ * return text for object pointed to by path contained in second
+ * parameter. If the json structure refered to by a path element is
+ * an array, the path element is treated as a (zero based) index. If
+ * it's an object it is treated as a field name. Since SQL arrays are
+ * homogeneous, integer arguments for array indexes must be passed as text.
+ *
+ * If the field is a string the de-escaped value of the string is delivered.
+ *
+ * There is also a non-variadic function json_get_path_as_text_op
+ * that maps to this function and is used in the construction of the
+ * json ->> text[] operator.
+ */
+
 PG_FUNCTION_INFO_V1(json_get_path_as_text);
 
 Datum
 		PG_RETURN_NULL();
 }
 
+/*
+ * get_worker
+ *
+ * common worker for json_get* functions
+ */
 static text *
 get_worker(text *json,
 		   char *field,
 	JsonLexContext *lex = makeJsonLexContext(json, true);
 	JsonSemAction sem;
 
+	/* only allowed to use one of these */
+	Assert(elem_index < 0 || (path == NULL && field == NULL));
+	Assert(path == NULL || field == NULL);
+
 	state = palloc0(sizeof(getState));
 	sem = palloc0(sizeof(jsonSemAction));
 
 	state->lex = lex;
+	/* is is "_as_text" variant? */
 	state->normalize_results = normalize_results;
 	if (field != NULL)
 	{
+		/* single text argument */
 		state->search_type = JSON_SEARCH_OBJECT;
 		state->search_term = field;
 	}
 	else if (path != NULL)
 	{
+		/* path array argument */
 		int			i;
 		long int	ind;
 		char	   *endptr;
 		state->pathok[0] = true;
 		state->array_level_index = palloc(sizeof(int) * npath);
 		state->path_level_index = palloc(sizeof(int) * npath);
+
+		/*
+		 * we have no idea at this stage what structure the document is so
+		 * just convert anything in the path that we can to an integer and set
+		 * all the other integers to -1 which will never match.
+		 */
 		for (i = 0; i < npath; i++)
 		{
 			ind = strtol(path[i], &endptr, 10);
 	}
 	else
 	{
+		/* single integer argument */
 		state->search_type = JSON_SEARCH_ARRAY;
 		state->search_index = elem_index;
 		state->array_index = -1;
 	}
 
 	sem->semstate = (void *) state;
+
+	/*
+	 * Not all	variants need all the semantic routines. only set the ones
+	 * that ar actually needed for maximum efficiency.
+	 */
 	sem->object_start = get_object_start;
 	sem->array_start = get_array_start;
 	sem->scalar = get_scalar;
 {
 	GetState	_state = (GetState) state;
 
+	/* json structure check */
 	if (_state->lex->lex_level == 0 && _state->search_type == JSON_SEARCH_ARRAY)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 	if (lex_level == 1 && _state->search_type == JSON_SEARCH_OBJECT &&
 		strcmp(fname, _state->search_term) == 0)
 	{
+		/* single field search and we have a match at the right nesting level */
 		get_next = true;
 	}
 	else if (_state->search_type == JSON_SEARCH_PATH &&
 			 _state->pathok[_state->lex->lex_level - 1] &&
 			 strcmp(fname, _state->path[lex_level - 1]) == 0)
 	{
+		/* path search, path so far is ok,	and we have a match */
+
+		/* if not at end of path just mark path ok */
 		if (lex_level < _state->npath)
 			_state->pathok[lex_level] = true;
 
+		/* end of path, so we want this value */
 		if (lex_level == _state->npath)
 			get_next = true;
 	}
 
 	if (get_next)
 	{
+		/*
+		 * If tresult is already set it means we've already made this match.
+		 * So complain about it.
+		 */
 		if (_state->tresult != NULL || _state->result_start != NULL)
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 		if (_state->normalize_results &&
 			_state->lex->token_type == JSON_TOKEN_STRING)
 		{
+			/* for as_text variants, tell get_scalar to set it for us */
 			_state->next_scalar = true;
 		}
 		else
 		{
+			/* for non-as_text variants, just note the json starting point */
 			_state->result_start = _state->lex->token_start;
 		}
 	}
 	bool		get_last = false;
 	int			lex_level = _state->lex->lex_level;
 
+
+	/* same tests as in get_object_field_start, mutatis mutandis */
 	if (lex_level == 1 && _state->search_type == JSON_SEARCH_OBJECT &&
 		strcmp(fname, _state->search_term) == 0)
 	{
 			get_last = true;
 	}
 
+	/* for as_test variants our work is already done */
 	if (get_last && _state->result_start != NULL)
 	{
+		/*
+		 * make a text object from the string from the prevously noted json
+		 * start up to the end of the previous token (the lexer is by now
+		 * ahead of us on whatevere came after what we're interested in).
+		 */
 		int			len = _state->lex->prev_token_terminator - _state->result_start;
 
 		_state->tresult = cstring_to_text_with_len(_state->result_start, len);
 	GetState	_state = (GetState) state;
 	int			lex_level = _state->lex->lex_level;
 
+	/* json structure check */
 	if (lex_level == 0 && _state->search_type == JSON_SEARCH_OBJECT)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("cannot call json_get(fieldname) on a non-object")));
-	else if (_state->search_type == JSON_SEARCH_PATH &&
-			 lex_level <= _state->npath)
+	/* initialize array count for this nesting level */
+	if (_state->search_type == JSON_SEARCH_PATH &&
+		lex_level <= _state->npath)
 		_state->array_level_index[lex_level] = -1;
 }
 
 
 	if (lex_level == 1 && _state->search_type == JSON_SEARCH_ARRAY)
 	{
+		/* single integer search */
 		_state->array_index++;
 		if (_state->array_index == _state->search_index)
 			get_next = true;
 			 lex_level <= _state->npath &&
 			 _state->pathok[lex_level - 1])
 	{
+		/*
+		 * path search, path so far is ok
+		 *
+		 * increment the array counter. no point doing this if we already know
+		 * the path is bad.
+		 *
+		 * then check if we have a match.
+		 */
+
 		if (++_state->array_level_index[lex_level - 1] ==
 			_state->path_level_index[lex_level - 1])
 		{
 			if (lex_level == _state->npath)
+			{
+				/* match and at end of path, so get value */
 				get_next = true;
+			}
 			else
+			{
+				/* not at end of path just mark path ok */
 				_state->pathok[lex_level] = true;
+			}
 		}
 
 	}
 
+	/* same logic as for objects */
 	if (get_next)
 	{
 		if (_state->normalize_results &&
 	bool		get_last = false;
 	int			lex_level = _state->lex->lex_level;
 
+	/* same logic as in get_object_end, modified for arrays */
+
 	if (lex_level == 1 && _state->search_type == JSON_SEARCH_ARRAY &&
 		_state->array_index == _state->search_index)
 	{
 				 errmsg("cannot call json_get on a scalar")));
 	if (_state->next_scalar)
 	{
+		/* a de-escaped text value is wanted, so supply it */
 		_state->tresult = cstring_to_text(token);
+		/* make sure the next call to get_scalar doesn't overwrite it */
 		_state->next_scalar = false;
 	}
 
 }
 
 /*
- * SQL function json_array_length
+ * SQL function json_array_length(json) -> int
  */
 
 PG_FUNCTION_INFO_V1(json_array_length);
 	PG_RETURN_INT32(state->count);
 }
 
+/*
+ * These next two check ensure that the json is an array (since it can't be
+ * a scala or an object).
+ */
+
 static void
 alen_object_start(void *state)
 {
 	AlenState	_state = (AlenState) state;
 
+	/* json structure check */
 	if (_state->lex->lex_level == 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 {
 	AlenState	_state = (AlenState) state;
 
+	/* json structure check */
 	if (_state->lex->lex_level == 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 {
 	AlenState	_state = (AlenState) state;
 
+	/* just count up all the level on elements */
 	if (_state->lex->lex_level == 1)
 		_state->count++;
 }
  * SQL function json_each
  *
  * decompose a json object into key value pairs.
+ *
+ * Unlike json_object_keys() this SRF operates in materialize mode,
+ * stashing its results into a Tuplestore object as it goes.
+ * The constriction of tuples is done using a temporary memory context
+ * that is cleared out after each tuple is built.
  */
 
 PG_FUNCTION_INFO_V1(json_each);
  *
  * decompose a json object into key value pairs with
  * de-escaped scalar string values.
+ *
+ * See also comments for json_each
  */
 
 PG_FUNCTION_INFO_V1(json_each_as_text);
 {
 	EachState	_state = (EachState) state;
 
+	/* json structure check */
 	if (_state->lex->lex_level == 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 {
 	EachState	_state = (EachState) state;
 
+	/* json structure check */
 	if (_state->lex->lex_level == 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("cannot call json_each on a scalar")));
 
+	/* supply de-escaped value if required */
 	if (_state->next_scalar)
 		_state->normalized_scalar = token;
 }
  * SQL function json_unnest
  *
  * get the elements from a json array
+ *
+ * a lot of this processing is similar to the json_each* functions
  */
 
 PG_FUNCTION_INFO_V1(json_unnest);
 json_unnest(PG_FUNCTION_ARGS)
 {
 	text	   *json = PG_GETARG_TEXT_P(0);
-	JsonLexContext *lex = makeJsonLexContext(json, true);
+
+	/* unnest doesn't need any escaped strings, so use false here */
+	JsonLexContext *lex = makeJsonLexContext(json, false);
 	JsonSemAction sem;
 	ReturnSetInfo *rsi;
 	MemoryContext old_cxt;
 {
 	UnnestState _state = (UnnestState) state;
 
+	/* json structure check */
 	if (_state->lex->lex_level == 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 {
 	UnnestState _state = (UnnestState) state;
 
+	/* json structure check */
 	if (_state->lex->lex_level == 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("cannot call json_unnest on a scalar")));
+
+	/*
+	 * json_unnest always returns json, so there's no need to think about
+	 * de-escaped values here.
+	 */
 }
 
 /*

src/include/utils/jsonapi.h

  * jsonapi.h
  *	  Declarations for JSON API support.
  *
- * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * src/include/utils/jsonapi.h
 	JSON_TOKEN_END,
 }	JsonTokenType;
 
+
+/*
+ * All the fields in this structure should be treated as read-only.
+ *
+ * If strval is not null, then it should contain the de-escaped value
+ * of the lexeme if it's a string. Otherwise most of these field names
+ * should be self-explanatory.
+ *
+ * line_number and line_start are principally for use by the parser's
+ * error reporting routines.
+ * token_terminator and prev_token_terminator point to the character
+ * AFTER the end of the token, i.e. where there would be a nul byte
+ * if we were using nul-terminated strings.
+ */
 typedef struct JsonLexContext
 {
 	char	   *input;
 
 
 /*
- * any of these actions can be NULL, in which case nothig is done.
+ * Semantic Action structure for use in parsing json.
+ * Any of these actions can be NULL, in which case nothing is done at that
+ * point, Likewise, semstate can be NULL. Using an all-NULL structure amounts
+ * to doing a pure parse with no side-effects, and is therefore exactly
+ * what the json input routines do.
  */
 typedef struct jsonSemAction
 {
  */
 extern void pg_parse_json(JsonLexContext *lex, JsonSemAction sem);
 
-/* constructor for JsonLexContext, with or without strval element */
+/*
+ * constructor for JsonLexContext, with or without strval element.
+ * If supplied, the strval element will contain a de-escaped version of
+ * the lexeme. However, doing this imposes a performance penalty, so
+ * it should be avoided if the de-escaped lexeme is not required.
+ */
 extern JsonLexContext *makeJsonLexContext(text *json, bool need_escapes);
 
 #endif   /* JSONAPI_H */
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.