Commits

Andrew Dunstan committed b3fdf67

json_unnest

  • Participants
  • Parent commits 970981d

Comments (0)

Files changed (5)

src/backend/utils/adt/jsonfuncs.c

 static void alen_scalar(void *state, char *token, JsonTokenType tokentype);
 static void alen_array_element_start(void *state, bool isnull);
 
+/* semantic action functions for json_each */
+static void each_object_start(void *state);
+static void each_object_end(void *state);
+static void each_object_field_start(void *state, char *fname, bool isnull);
+static void each_object_field_end(void *state, char *fname, bool isnull);
+static void each_array_start(void *state);
+static void each_scalar(void *state, char *token, JsonTokenType tokentype);
+
+/* semantic action functions for json_unnest */
+static void unnest_object_start(void *state);
+static void unnest_array_start(void *state);
+static void unnest_array_end(void *state);
+static void unnest_array_element_start(void *state, bool isnull);
+static void unnest_array_element_end(void *state, bool isnull);
+static void unnest_scalar(void *state, char *token, JsonTokenType tokentype);
+
+/* search type classification for json_get* functions */
 typedef enum
 {
 	JSON_SEARCH_OBJECT = 1,
 	JSON_SEARCH_PATH
 }	JsonSearch;
 
-/* stats for json_object_keys */
+/* state for json_object_keys */
 typedef struct
 {
 	int			lex_level;
 	int         count;
 }	alenState, *AlenState;
 
+/* state for json_each */
+typedef struct
+{
+	JsonLexContext *lex;
+	int			lex_level;
+    Tuplestorestate *tuple_store;
+    TupleDesc   ret_tdesc;
+	MemoryContext tmp_cxt;
+	char  *result_start;
+} eachState, *EachState;
+
+/* state for json_unnest */
+typedef struct
+{
+	JsonLexContext *lex;
+	int			lex_level;
+    Tuplestorestate *tuple_store;
+    TupleDesc   ret_tdesc;
+	MemoryContext tmp_cxt;
+	char  *result_start;
+} unnestState, *UnnestState;
 
 
 PG_FUNCTION_INFO_V1(json_object_keys);
 	if (array_contains_nulls(path))
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				 errmsg("cannot call json_get_path with null elements in path parameter")));
+				 errmsg("cannot call json_get_path with null path elements")));
 
 
     deconstruct_array(path, TEXTOID, -1, false, 'i',
 	{
 		pathstr[i] = TextDatumGetCString(pathtext[i]);
 		if (*pathstr[i] == '\0')
-			ereport(ERROR,
-					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-					 errmsg("cannot call json_get_path with empty elements in path parameter")));
+			ereport(
+				ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("cannot call json_get_path with empty path elements")));
 	}
 
 	result = get_worker(jsonstr, NULL, -1, pathstr, npath, false);
 }
 
 static text *
-get_worker(char *json, char *field, int elem_index, char **path, int npath, bool normalize_results)
+get_worker(char *json, 
+		   char *field, 
+		   int elem_index, 
+		   char **path, 
+		   int npath, 
+		   bool normalize_results)
 {
 	GetState	state;
 	JsonLexContext lex;
 		get_next = true;
 	}
 	else if (_state->search_type == JSON_SEARCH_PATH && 
-			 _state->lex_level <= _state->npath &&_state->pathok[_state->lex_level - 1] 
-			 && strcmp(fname, _state->path[_state->lex_level - 1]) == 0)
+			 _state->lex_level <= _state->npath &&
+			 _state->pathok[_state->lex_level - 1] &&
+			 strcmp(fname, _state->path[_state->lex_level - 1]) == 0)
 	{
 		if (_state->lex_level < _state->npath)
 			_state->pathok[_state->lex_level] = true;
 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 					 errmsg("field name is not unique in json object")));
 
-		if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING)
+		if (_state->normalize_results && 
+			_state->lex->token_type == JSON_TOKEN_STRING)
 		{
 			_state->next_scalar = true;
 		}
 		get_last = true;
 	}
 	else if (_state->search_type == JSON_SEARCH_PATH && 
-			 _state->lex_level <= _state->npath &&_state->pathok[_state->lex_level - 1] 
-			 && strcmp(fname, _state->path[_state->lex_level - 1]) == 0)
+			 _state->lex_level <= _state->npath &&
+			 _state->pathok[_state->lex_level - 1] &&
+			 strcmp(fname, _state->path[_state->lex_level - 1]) == 0)
 	{
 		/* done with this field so reset pathok */
 		if (_state->lex_level < _state->npath)
 			 _state->lex_level <= _state->npath && 
 			 _state->pathok[_state->lex_level - 1])
 	{
-		if (++_state->array_level_index[_state->lex_level  - 1] == _state->path_level_index[_state->lex_level  - 1])
+		if (++_state->array_level_index[_state->lex_level  - 1] == 
+			_state->path_level_index[_state->lex_level  - 1])
 		{
 			if (_state->lex_level ==  _state->npath)
 				get_next = true;
 
 	if (get_next)
 	{
-		if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING)
+		if (_state->normalize_results && 
+			_state->lex->token_type == JSON_TOKEN_STRING)
+		{
 			_state->next_scalar = true;
+		}
 		else
+		{
 			_state->result_start = _state->lex->token_start;
+		}
 	}
 }
 
 		get_last = true;
 	}
 	else if (_state->search_type == JSON_SEARCH_PATH && 
-			 _state->lex_level <= _state->npath &&_state->pathok[_state->lex_level - 1] 
-			 && _state->array_level_index[_state->lex_level  - 1] == _state->path_level_index[_state->lex_level  - 1])
+			 _state->lex_level <= _state->npath &&
+			 _state->pathok[_state->lex_level - 1] &&
+			 _state->array_level_index[_state->lex_level  - 1] == 
+			 _state->path_level_index[_state->lex_level  - 1])
 	{
 		/* done with this element so reset pathok */
 		if (_state->lex_level < _state->npath)
 	}
 	if (get_last)
 	{
-		int			len = _state->lex->prev_token_terminator - _state->result_start;
+		int	len = _state->lex->prev_token_terminator - _state->result_start;
 
 		_state->tresult = cstring_to_text_with_len(_state->result_start, len);
 	}
 		_state->count++;
 }
 
-/* semantic action functions for json_get* functions */
-static void each_object_start(void *state);
-static void each_object_end(void *state);
-static void each_object_field_start(void *state, char *fname, bool isnull);
-static void each_object_field_end(void *state, char *fname, bool isnull);
-static void each_array_start(void *state);
-static void each_scalar(void *state, char *token, JsonTokenType tokentype);
-/* common worker function for json_get* functions */
-/*static text *each_worker(char *json, char *field, int elem_index, bool normalize_results); */
-
-
-typedef struct
-{
-	JsonLexContext *lex;
-	int			lex_level;
-    Tuplestorestate *tuple_store;
-    TupleDesc   ret_tdesc;
-	MemoryContext tmp_cxt;
-	char  *result_start;
-} eachState, *EachState;
-
-
 PG_FUNCTION_INFO_V1(json_each);
 
 Datum
 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 				 errmsg("cannot call json_each on a scalar")));
 }
+
+PG_FUNCTION_INFO_V1(json_unnest);
+
+Datum
+json_unnest(PG_FUNCTION_ARGS)
+{
+
+	text	   *json = PG_GETARG_TEXT_P(0);
+	char	   *jsonstr = text_to_cstring(json);
+	JsonLexContext lex;
+	JsonSemAction sem;
+	ReturnSetInfo *rsi;
+	MemoryContext old_cxt;
+	TupleDesc   tupdesc;
+	UnnestState	state;
+
+	state = palloc0(sizeof(unnestState));
+	sem = palloc0(sizeof(jsonSemAction));
+
+	rsi = (ReturnSetInfo *) fcinfo->resultinfo;
+
+	if (!rsi || !IsA(rsi, ReturnSetInfo) ||
+		(rsi->allowedModes & SFRM_Materialize) == 0 ||
+		rsi->expectedDesc == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("set-valued function called in context that "
+						"cannot accept a set")));
+
+
+	rsi->returnMode = SFRM_Materialize;
+
+	/* it's a simple type, so don't use get_call_result_type() */
+	tupdesc = rsi->expectedDesc;
+
+	/* make these in a sufficiently long-lived memory context */
+	old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
+
+	state->ret_tdesc = CreateTupleDescCopy(tupdesc);
+	BlessTupleDesc(	state->ret_tdesc);
+	state->tuple_store = 
+		tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
+							  false, work_mem);
+
+	MemoryContextSwitchTo(old_cxt);
+
+	sem->semstate = (void *) state;
+	sem->object_start = unnest_object_start;
+	sem->array_start = unnest_array_start;
+	sem->array_end = unnest_array_end;
+	sem->scalar = unnest_scalar;
+	sem->array_element_start = unnest_array_element_start;
+	sem->array_element_end = unnest_array_element_end;
+
+	/* Set up lexing context. */
+	lex.input = jsonstr;
+	lex.token_terminator = lex.input;
+	lex.line_number = 1;
+	lex.line_start = jsonstr;
+	lex.strval = makeStringInfo();
+
+	state->lex = &lex;
+	state->lex_level = 0;
+	state->tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
+                                  "json_unnest temporary cxt",
+                                  ALLOCSET_DEFAULT_MINSIZE,
+                                  ALLOCSET_DEFAULT_INITSIZE,
+                                  ALLOCSET_DEFAULT_MAXSIZE);
+
+	pg_parse_json(&lex, sem);
+	
+	rsi->setResult = state->tuple_store;
+	rsi->setDesc = state->ret_tdesc;
+
+	PG_RETURN_NULL();
+}
+
+static void unnest_array_start(void *state)
+{
+	UnnestState	_state = (UnnestState) state;
+
+	_state->lex_level++;
+}
+
+static void unnest_array_end(void *state)
+{
+	UnnestState	_state = (UnnestState) state;
+
+	_state->lex_level--;
+}
+
+static void unnest_array_element_start(void *state, bool isnull)
+{
+	UnnestState	_state = (UnnestState) state;
+
+	/* save a pointer to where the value starts */
+	if (_state->lex_level == 1)
+		_state->result_start = _state->lex->token_start;
+}
+
+static void unnest_array_element_end(void *state, bool isnull)
+{
+	UnnestState	_state = (UnnestState) state;
+	MemoryContext old_cxt;
+	int len;
+	text *val;
+	HeapTuple   tuple;
+	Datum values[1];
+	static bool nulls[1] = {false};
+
+	/* skip over nested objects */
+	if (_state->lex_level != 1)
+		return;
+
+	/* use the tmp context so we can clean up after each tuple is done */
+	old_cxt = MemoryContextSwitchTo(_state->tmp_cxt);
+
+	len = _state->lex->prev_token_terminator - _state->result_start;
+	val = cstring_to_text_with_len(_state->result_start, len);
+
+	values[0] = PointerGetDatum(val);
+
+	tuple = heap_form_tuple(_state->ret_tdesc, values, nulls);
+	
+	tuplestore_puttuple(_state->tuple_store, tuple);
+
+	/* clean up and switch back */
+	MemoryContextSwitchTo(old_cxt);
+    MemoryContextReset(_state->tmp_cxt);
+}
+
+static void unnest_object_start(void *state)
+{
+	UnnestState	_state = (UnnestState) state;
+
+	if (_state->lex_level == 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("cannot call json_unnest on an object")));
+}
+
+static void unnest_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+	UnnestState	_state = (UnnestState) state;
+
+	if (_state->lex_level == 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("cannot call json_unnest on a scalar")));
+}
+

src/include/catalog/pg_proc.h

 DESCR("key value pairs of a json object");
 DATA(insert OID = 5008 (  json_get_path	   PGNSP PGUID 12 1 0 25 0 f f f f t f s 2 0 114 "114 1009" "{114,1009}" "{i,v}" "{from_json,path_elems}" _null_ json_get_path _null_ _null_ _null_ ));
 DESCR("get json with path elements");
+DATA(insert OID = 5009 (  json_unnest      PGNSP PGUID 12 1 100 0 0 f f f f t t s 1 0 114 "114" "{114,114}" "{i,o}" "{from_json,value}" _null_ json_unnest _null_ _null_ _null_ ));
+DESCR("key value pairs of a json object");
 
 
 /* uuid */

src/include/utils/json.h

 extern Datum json_array_length(PG_FUNCTION_ARGS);
 extern Datum json_each(PG_FUNCTION_ARGS);
 extern Datum json_get_path(PG_FUNCTION_ARGS);
+extern Datum json_unnest(PG_FUNCTION_ARGS);
 
 #endif   /* JSON_H */

src/test/regress/expected/json.out

  1
 (1 row)
 
+--unnest
+select json_unnest('[1,true,[1,[2,3]],null,{"f1":1,"f2":[7,8,9]},false]');
+      json_unnest      
+-----------------------
+ 1
+ true
+ [1,[2,3]]
+ null
+ {"f1":1,"f2":[7,8,9]}
+ false
+(6 rows)
+

src/test/regress/sql/json.sql

 select json_get_path('{"f2":{"f3":1},"f4":{"f5":99,"f6":"stringy"}}','f2');
 select json_get_path('{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}','f2',0::text);
 select json_get_path('{"f2":["f3",1],"f4":{"f5":99,"f6":"stringy"}}','f2',1::text);
+
+--unnest
+
+select json_unnest('[1,true,[1,[2,3]],null,{"f1":1,"f2":[7,8,9]},false]');