Commits

Andrew Dunstan committed 970981d

json_each and json_get_path

Comments (0)

Files changed (6)

src/backend/utils/adt/json.c

 
     if (oend != NULL)
         (*oend) (sem->semstate, fname, isnull);
+
+	if (fname != NULL)
+		pfree(fname);
 }
 
 static void 

src/backend/utils/adt/jsonfuncs.c

 
 #include "postgres.h"
 
+#include <limits.h>
+
 #include "fmgr.h"
 #include "funcapi.h"
+#include "miscadmin.h"
+#include "access/htup_details.h"
+#include "catalog/pg_type.h"
 #include "lib/stringinfo.h"
 #include "mb/pg_wchar.h"
+#include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/json.h"
 #include "utils/jsonapi.h"
+#include "utils/memutils.h"
 
 /* semantic action functions for json_object_keys */
 static void okeys_object_start(void *state);
 static void get_array_element_start(void *state, bool isnull);
 static void get_array_element_end(void *state, bool isnull);
 static void get_scalar(void *state, char *token, JsonTokenType tokentype);
-/* common workeer function for json_get* functions */
-static text *get_worker(char *json, char *field, int elem_index, bool normalize_results);
+/* common worker function for json_get* functions */
+static text *get_worker(char *json, char *field, int elem_index, char **path, 
+						int npath, bool normalize_results);
 
 /* semantic action functions for json_array_length */
 static void alen_object_start(void *state);
 typedef enum
 {
 	JSON_SEARCH_OBJECT = 1,
-	JSON_SEARCH_ARRAY
+	JSON_SEARCH_ARRAY,
+	JSON_SEARCH_PATH
 }	JsonSearch;
 
 /* stats for json_object_keys */
 	bool		result_is_null;
 	bool		normalize_results;
 	bool		next_scalar;
+	char      **path;
+	int         npath;
+	char      **current_path;
+	bool       *pathok;
+	int        *array_level_index;
+	int        *path_level_index;
 }	getState, *GetState;
 
 /* state for json_array_length */
 	char	   *fnamestr = text_to_cstring(fname);
 	text	   *result;
 
-	result = get_worker(jsonstr, fnamestr, -1, false);
+	result = get_worker(jsonstr, fnamestr, -1, NULL, -1, false);
 
 	if (result != NULL)
 		PG_RETURN_TEXT_P(result);
 	char	   *fnamestr = text_to_cstring(fname);
 	text	   *result;
 
-	result = get_worker(jsonstr, fnamestr, -1, true);
-
+	result = get_worker(jsonstr, fnamestr, -1, NULL, -1, true);
 	if (result != NULL)
 		PG_RETURN_TEXT_P(result);
 	else
 	char	   *jsonstr = text_to_cstring(json);
 	text	   *result;
 
-	result = get_worker(jsonstr, NULL, element, false);
+	result = get_worker(jsonstr, NULL, element, NULL, -1, false);
 
 	if (result != NULL)
 		PG_RETURN_TEXT_P(result);
 	char	   *jsonstr = text_to_cstring(json);
 	text	   *result;
 
-	result = get_worker(jsonstr, NULL, element, true);
+	result = get_worker(jsonstr, NULL, element, NULL, -1, true);
 
 	if (result != NULL)
 		PG_RETURN_TEXT_P(result);
 }
 
 
+PG_FUNCTION_INFO_V1(json_get_path);
+
+Datum
+json_get_path(PG_FUNCTION_ARGS)
+{
+	text	   *json = PG_GETARG_TEXT_P(0);
+	ArrayType  *path = PG_GETARG_ARRAYTYPE_P(1);
+	char	   *jsonstr = text_to_cstring(json);
+	text	   *result;
+    Datum      *pathtext;
+    bool       *pathnulls;
+    int         npath;
+    char      **pathstr;
+	int         i;
+
+	if (array_contains_nulls(path))
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("cannot call json_get_path with null elements in path parameter")));
+
+
+    deconstruct_array(path, TEXTOID, -1, false, 'i',
+                      &pathtext, &pathnulls, &npath);
+    
+    pathstr = palloc(npath * sizeof(char *));
+
+    for (i = 0; i < npath; i++)
+	{
+		pathstr[i] = TextDatumGetCString(pathtext[i]);
+		if (*pathstr[i] == '\0')
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("cannot call json_get_path with empty elements in path parameter")));
+	}
+
+	result = get_worker(jsonstr, NULL, -1, pathstr, npath, false);
+
+	if (result != NULL)
+		PG_RETURN_TEXT_P(result);
+	else
+		PG_RETURN_NULL();
+}
+
 static text *
-get_worker(char *json, char *field, int elem_index, bool normalize_results)
+get_worker(char *json, char *field, int elem_index, char **path, int npath, bool normalize_results)
 {
 	GetState	state;
 	JsonLexContext lex;
 		state->search_type = JSON_SEARCH_OBJECT;
 		state->search_term = field;
 	}
+	else if (path != NULL)
+	{
+		int i;
+		long int ind;
+		char *endptr;
+
+		state->search_type = JSON_SEARCH_PATH;
+		state->path = path;
+		state->npath = npath;
+		state->current_path = palloc(sizeof(char *) * npath);
+		state->pathok = palloc(sizeof(bool) * npath);
+		state->pathok[0] = true;
+		state->array_level_index = palloc(sizeof(int) * npath);
+		state->path_level_index = palloc(sizeof(int) * npath);
+		for (i=0; i < npath; i++)
+		{
+			ind = strtol(path[i],&endptr,10);
+			if (*endptr == '\0' && ind <= INT_MAX && ind >= 0)
+				state->path_level_index[i] = (int) ind;
+			else
+				state->path_level_index[i] = -1;
+ 		}
+	}
 	else
 	{
 		state->search_type = JSON_SEARCH_ARRAY;
 	sem->array_start = get_array_start;
 	sem->array_end = get_array_end;
 	sem->scalar = get_scalar;
-	if (field != NULL)
+	if (field != NULL || path != NULL)
 	{
 		sem->object_field_start = get_object_field_start;
 		sem->object_field_end = get_object_field_end;
 	}
-	else
+	if (field == NULL)
 	{
 		sem->array_element_start = get_array_element_start;
 		sem->array_element_end = get_array_element_end;
 	GetState	_state = (GetState) state;
 
 	_state->lex_level++;
-	if (_state->lex_level == 1 && _state->search_type != JSON_SEARCH_OBJECT)
+	if (_state->lex_level == 1 && _state->search_type == JSON_SEARCH_ARRAY)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 				 errmsg("cannot call json_get(int) on a non-array")));
 get_object_field_start(void *state, char *fname, bool isnull)
 {
 	GetState	_state = (GetState) state;
+	bool        get_next = false;
 
 	if (_state->lex_level == 1 && _state->search_type == JSON_SEARCH_OBJECT &&
 		strcmp(fname, _state->search_term) == 0)
 	{
-		if (_state->result_start != NULL)
+		get_next = true;
+	}
+	else if (_state->search_type == JSON_SEARCH_PATH && 
+			 _state->lex_level <= _state->npath &&_state->pathok[_state->lex_level - 1] 
+			 && strcmp(fname, _state->path[_state->lex_level - 1]) == 0)
+	{
+		if (_state->lex_level < _state->npath)
+			_state->pathok[_state->lex_level] = true;
+
+		if (_state->lex_level == _state->npath)
+			get_next = true;
+	}
+
+	if (get_next)
+	{
+		if (_state->tresult != NULL || _state->result_start != NULL)
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 					 errmsg("field name is not unique in json object")));
+
 		if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING)
 		{
 			_state->next_scalar = true;
 get_object_field_end(void *state, char *fname, bool isnull)
 {
 	GetState	_state = (GetState) state;
+	bool        get_last = false;
 
 	if (_state->lex_level == 1 && _state->search_type == JSON_SEARCH_OBJECT &&
 	 _state->result_start != NULL && strcmp(fname, _state->search_term) == 0)
 	{
+		get_last = true;
+	}
+	else if (_state->search_type == JSON_SEARCH_PATH && 
+			 _state->lex_level <= _state->npath &&_state->pathok[_state->lex_level - 1] 
+			 && strcmp(fname, _state->path[_state->lex_level - 1]) == 0)
+	{
+		/* done with this field so reset pathok */
+		if (_state->lex_level < _state->npath)
+			_state->pathok[_state->lex_level] = false;
+
+		if (_state->lex_level == _state->npath)
+			get_last = true;
+	}
+
+	if (get_last)
+	{
 		int			len = _state->lex->prev_token_terminator - _state->result_start;
 
 		_state->tresult = cstring_to_text_with_len(_state->result_start, len);
 	}
+	/* 
+	 * don't need to reset _state->result_start b/c we're only returning one datum,
+	 * the conditions should not occur more than once, and this lets us check 
+	 * cheaply that they don't (see object_field_start() )
+	 */
 }
 
 static void
 	GetState	_state = (GetState) state;
 
 	_state->lex_level++;
-	if (_state->lex_level == 1 && _state->search_type != JSON_SEARCH_ARRAY)
+	if (_state->lex_level == 1 && _state->search_type == JSON_SEARCH_OBJECT)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 				 errmsg("cannot call json_get(fieldname) on a non-object")));
+	else if (_state->search_type == JSON_SEARCH_PATH && 
+			 _state->lex_level <= _state->npath)
+		_state->array_level_index[_state->lex_level  - 1] = -1;
 }
 
 static void
 get_array_element_start(void *state, bool isnull)
 {
 	GetState	_state = (GetState) state;
+	bool        get_next = false;
 
 	if (_state->lex_level == 1 && _state->search_type == JSON_SEARCH_ARRAY)
 	{
 		_state->array_index++;
 		if (_state->array_index == _state->search_index)
+			get_next = true;
+	}
+	else if (_state->search_type == JSON_SEARCH_PATH &&  
+			 _state->lex_level <= _state->npath && 
+			 _state->pathok[_state->lex_level - 1])
+	{
+		if (++_state->array_level_index[_state->lex_level  - 1] == _state->path_level_index[_state->lex_level  - 1])
 		{
-			if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING)
-				_state->next_scalar = true;
-			else
-				_state->result_start = _state->lex->token_start;
+			if (_state->lex_level ==  _state->npath)
+				get_next = true;
+			else 
+				_state->pathok[_state->lex_level] = true;
 		}
+		
+	}
+
+	if (get_next)
+	{
+		if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING)
+			_state->next_scalar = true;
+		else
+			_state->result_start = _state->lex->token_start;
 	}
 }
 
 get_array_element_end(void *state, bool isnull)
 {
 	GetState	_state = (GetState) state;
+	bool        get_last = false;
 
 	if (_state->lex_level == 1 && _state->search_type == JSON_SEARCH_ARRAY &&
 		_state->array_index == _state->search_index && _state->result_start != NULL)
 	{
+		get_last = true;
+	}
+	else if (_state->search_type == JSON_SEARCH_PATH && 
+			 _state->lex_level <= _state->npath &&_state->pathok[_state->lex_level - 1] 
+			 && _state->array_level_index[_state->lex_level  - 1] == _state->path_level_index[_state->lex_level  - 1])
+	{
+		/* done with this element so reset pathok */
+		if (_state->lex_level < _state->npath)
+			_state->pathok[_state->lex_level] = false;
+
+		if (_state->lex_level == _state->npath)
+			get_last = true;
+	}
+	if (get_last)
+	{
 		int			len = _state->lex->prev_token_terminator - _state->result_start;
 
 		_state->tresult = cstring_to_text_with_len(_state->result_start, len);
 {
 	GetState	_state = (GetState) state;
 
-	if (_state->lex_level == 0)
+	if (_state->lex_level == 0 && _state->search_type != JSON_SEARCH_PATH) 
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 				 errmsg("cannot call json_get on a scalar")));
 		_state->count++;
 }
 
+/* semantic action functions for json_get* functions */
+static void each_object_start(void *state);
+static void each_object_end(void *state);
+static void each_object_field_start(void *state, char *fname, bool isnull);
+static void each_object_field_end(void *state, char *fname, bool isnull);
+static void each_array_start(void *state);
+static void each_scalar(void *state, char *token, JsonTokenType tokentype);
+/* common worker function for json_get* functions */
+/*static text *each_worker(char *json, char *field, int elem_index, bool normalize_results); */
+
+
+typedef struct
+{
+	JsonLexContext *lex;
+	int			lex_level;
+    Tuplestorestate *tuple_store;
+    TupleDesc   ret_tdesc;
+	MemoryContext tmp_cxt;
+	char  *result_start;
+} eachState, *EachState;
+
+
+PG_FUNCTION_INFO_V1(json_each);
+
+Datum
+json_each(PG_FUNCTION_ARGS)
+{
+
+	text	   *json = PG_GETARG_TEXT_P(0);
+	char	   *jsonstr = text_to_cstring(json);
+	JsonLexContext lex;
+	JsonSemAction sem;
+	ReturnSetInfo *rsi;
+	MemoryContext old_cxt;
+	TupleDesc   tupdesc;
+	EachState	state;
+
+	state = palloc0(sizeof(eachState));
+	sem = palloc0(sizeof(jsonSemAction));
+
+	rsi = (ReturnSetInfo *) fcinfo->resultinfo;
+
+	if (!rsi || !IsA(rsi, ReturnSetInfo) ||
+		(rsi->allowedModes & SFRM_Materialize) == 0 ||
+		rsi->expectedDesc == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("set-valued function called in context that "
+						"cannot accept a set")));
+
+
+	rsi->returnMode = SFRM_Materialize;
+
+	(void) get_call_result_type(fcinfo, NULL, &tupdesc);
+
+	/* make these in a sufficiently long-lived memory context */
+	old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
+
+	state->ret_tdesc = CreateTupleDescCopy(tupdesc);
+	BlessTupleDesc(	state->ret_tdesc);
+	state->tuple_store = 
+		tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
+							  false, work_mem);
+
+	MemoryContextSwitchTo(old_cxt);
+
+	sem->semstate = (void *) state;
+	sem->object_start = each_object_start;
+	sem->object_end = each_object_end;
+	sem->array_start = each_array_start;
+	sem->scalar = each_scalar;
+	sem->object_field_start = each_object_field_start;
+	sem->object_field_end = each_object_field_end;
+
+	/* Set up lexing context. */
+	lex.input = jsonstr;
+	lex.token_terminator = lex.input;
+	lex.line_number = 1;
+	lex.line_start = jsonstr;
+	lex.strval = makeStringInfo();
+
+	state->lex = &lex;
+	state->lex_level = 0;
+	state->tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
+                                  "json_each temporary cxt",
+                                  ALLOCSET_DEFAULT_MINSIZE,
+                                  ALLOCSET_DEFAULT_INITSIZE,
+                                  ALLOCSET_DEFAULT_MAXSIZE);
+
+	pg_parse_json(&lex, sem);
+	
+	rsi->setResult = state->tuple_store;
+	rsi->setDesc = state->ret_tdesc;
+
+	PG_RETURN_NULL();
+}
+
+static void each_object_start(void *state)
+{
+	EachState	_state = (EachState) state;
+
+	_state->lex_level++;
+}
+static void each_object_end(void *state)
+{
+	EachState	_state = (EachState) state;
+
+	_state->lex_level--;
+}
+static void each_object_field_start(void *state, char *fname, bool isnull)
+{
+	EachState	_state = (EachState) state;
+
+	/* save a pointer to where the value starts */
+	if (_state->lex_level == 1)
+		_state->result_start = _state->lex->token_start;
+}
+
+static void each_object_field_end(void *state, char *fname, bool isnull)
+{
+	EachState	_state = (EachState) state;
+	MemoryContext old_cxt;
+	int len;
+	text *val;
+	HeapTuple   tuple;
+	Datum values[2];
+	static bool nulls[2] = {false,false};
+
+	/* skip over nested objects */
+	if (_state->lex_level != 1)
+		return;
+
+	/* use the tmp context so we can clean up after each tuple is done */
+	old_cxt = MemoryContextSwitchTo(_state->tmp_cxt);
+
+	len = _state->lex->prev_token_terminator - _state->result_start;
+	val = cstring_to_text_with_len(_state->result_start, len);
+
+	values[0] = CStringGetTextDatum(fname);
+	values[1] = PointerGetDatum(val);
+
+	tuple = heap_form_tuple(_state->ret_tdesc, values, nulls);
+	
+	tuplestore_puttuple(_state->tuple_store, tuple);
+
+	/* clean up and switch back */
+	MemoryContextSwitchTo(old_cxt);
+    MemoryContextReset(_state->tmp_cxt);
+}
+
+static void each_array_start(void *state)
+{
+	EachState	_state = (EachState) state;
+
+	if (_state->lex_level == 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("cannot call json_each on an array")));
+}
+
+static void each_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+	EachState	_state = (EachState) state;
+
+	if (_state->lex_level == 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("cannot call json_each on a scalar")));
+}

src/include/catalog/pg_proc.h

 DESCR("get json object keys");
 DATA(insert OID = 5006 (  json_array_length PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 23 "114" _null_ _null_ _null_ _null_ json_array_length _null_ _null_ _null_ ));
 DESCR("length of json array");
+DATA(insert OID = 5007 (  json_each PGNSP PGUID 12 1 100 0 0 f f f f t t s 1 0 2249 "114" "{114,25,114}" "{i,o,o}" "{from_json,key,value}" _null_ json_each _null_ _null_ _null_ ));
+DESCR("key value pairs of a json object");
+DATA(insert OID = 5008 (  json_get_path	   PGNSP PGUID 12 1 0 25 0 f f f f t f s 2 0 114 "114 1009" "{114,1009}" "{i,v}" "{from_json,path_elems}" _null_ json_get_path _null_ _null_ _null_ ));
+DESCR("get json with path elements");
 
 
 /* uuid */

src/include/utils/json.h

 extern Datum json_get_ofield(PG_FUNCTION_ARGS);
 extern Datum json_object_keys(PG_FUNCTION_ARGS);
 extern Datum json_array_length(PG_FUNCTION_ARGS);
+extern Datum json_each(PG_FUNCTION_ARGS);
+extern Datum json_get_path(PG_FUNCTION_ARGS);
 
 #endif   /* JSON_H */

src/test/regress/expected/json.out

 ERROR:  cannot call json_array_length on an object
 SELECT json_array_length('4');
 ERROR:  cannot call json_array_length on a scalar
+-- each
+select json_each('{"f1":[1,2,3],"f2":{"f3":1},"f4":null}');
+     json_each     
+-------------------
+ (f1,"[1,2,3]")
+ (f2,"{""f3"":1}")
+ (f4,null)
+(3 rows)
+
+select * from json_each('{"f1":[1,2,3],"f2":{"f3":1},"f4":null,"f5":99,"f6":"stringy"}') q;
+ key |   value   
+-----+-----------
+ f1  | [1,2,3]
+ f2  | {"f3":1}
+ f4  | null
+ f5  | 99
+ f6  | "stringy"
+(5 rows)
+
+-- get_path
+select json_get_path('{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}','f4','f6');
+ json_get_path 
+---------------
+ "stringy"
+(1 row)
+
+select json_get_path('{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}','f2');
+ json_get_path 
+---------------
+ {"f3":1}
+(1 row)
+
+select json_get_path('{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}','f2',0::text);
+ json_get_path 
+---------------
+ "f3"
+(1 row)
+
+select json_get_path('{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}','f2',1::text);
+ json_get_path 
+---------------
+ 1
+(1 row)
+

src/test/regress/sql/json.sql

 
 SELECT json_array_length('4');
 
+-- each
+
+select json_each('{"f1":[1,2,3],"f2":{"f3":1},"f4":null}');
+select * from json_each('{"f1":[1,2,3],"f2":{"f3":1},"f4":null,"f5":99,"f6":"stringy"}') q;
+
+-- get_path
+
+select json_get_path('{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}','f4','f6');
+select json_get_path('{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}','f2');
+select json_get_path('{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}','f2',0::text);
+select json_get_path('{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}','f2',1::text);