Andrew Dunstan avatar Andrew Dunstan committed d89031c

Working populate_recordset.

Comments (0)

Files changed (5)

src/backend/utils/adt/jsonfuncs.c

 	bool		found;
 	char		name[NAMEDATALEN];
 
+	/* ignore field names >= NAMEDATALEN - they can't match a record field */
+	if (strlen(fname) >= NAMEDATALEN)
+		return;
+
 	memset(name, 0, NAMEDATALEN);
 	strncpy(name, fname, NAMEDATALEN);
 
 	if (_state->lex->lex_level == 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-			   errmsg("cannot call %s on an array", _state->function_name)));
+			   errmsg("cannot call %s on a scalar", _state->function_name)));
+
+	_state->saved_scalar = token;
+}
+
+
+typedef struct populateRecordsetState
+{
+	JsonLexContext *lex;
+	HTAB *json_hash;
+	char * saved_scalar;
+	Tuplestorestate *tuple_store;
+	TupleDesc ret_tdesc;
+	HeapTupleHeader rec;
+	RecordIOData *my_extra;
+	MemoryContext fn_mcxt;
+} populateRecordsetState, *PopulateRecordsetState;
+
+static void populate_one_record(PopulateRecordsetState state);
+static void populate_recordset_object_field_start(void *state, char *fname, bool isnull);
+static void populate_recordset_object_field_end(void *state, char *fname, bool isnull);
+static void populate_recordset_scalar(void *state, char *token, JsonTokenType tokentype);
+static void populate_recordset_object_start(void *state);
+static void populate_recordset_object_end(void *state);
+static void populate_recordset_array_start(void *state);
+
+/*
+ * SQL function json_populate_recordset
+ *
+ * set fields in a set of records from the argument json
+ *
+ * Code adapted shamelessly from hstore's populate_record
+ * which is in turn partly adapted from record_out.
+ *
+ * The json is decomposed into a hash table, in which each
+ * field in the record is then looked up by name.
+ */
+
+PG_FUNCTION_INFO_V1(json_populate_recordset);
+
+Datum
+json_populate_recordset(PG_FUNCTION_ARGS)
+{
+	Oid			argtype = get_fn_expr_argtype(fcinfo->flinfo, 0);
+	text	   *json = PG_GETARG_TEXT_P(1);
+	char	   *jsonstr;
+	ReturnSetInfo *rsi;
+	MemoryContext old_cxt;
+	Oid			tupType;
+	int32		tupTypmod;
+	HeapTupleHeader rec;
+	TupleDesc	tupdesc;
+	RecordIOData *my_extra;
+	int			ncolumns;
+	JsonLexContext *lex;
+	JsonSemAction sem;
+	PopulateRecordsetState state;
+
+	if (!type_is_rowtype(argtype))
+		ereport(ERROR,
+				(errcode(ERRCODE_DATATYPE_MISMATCH),
+				 errmsg("first argument must be a rowtype")));
+
+	rsi = (ReturnSetInfo *) fcinfo->resultinfo;
+
+	if (!rsi || !IsA(rsi, ReturnSetInfo) ||
+		(rsi->allowedModes & SFRM_Materialize) == 0 ||
+		rsi->expectedDesc == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("set-valued function called in context that "
+						"cannot accept a set")));
+
+
+	rsi->returnMode = SFRM_Materialize;
+
+	/* 
+	 * get the tupdesc from the result set info - it must be a record type
+	 * because we already checked that arg1 is a record type.
+	 */
+	(void) get_call_result_type(fcinfo, NULL, &tupdesc);
+
+	state = palloc0(sizeof(populateRecordsetState));
+	sem = palloc0(sizeof(jsonSemAction));
+
+
+	/* make these in a sufficiently long-lived memory context */
+	old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
+
+	state->ret_tdesc = CreateTupleDescCopy(tupdesc);
+	BlessTupleDesc(state->ret_tdesc);
+	state->tuple_store =
+		tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
+							  false, work_mem);
+
+	MemoryContextSwitchTo(old_cxt);
+
+	/* if the json is null send back an empty set */
+	if (PG_ARGISNULL(1))
+		PG_RETURN_NULL();
+
+	if (PG_ARGISNULL(0))
+		rec = NULL;
+	else
+		rec = PG_GETARG_HEAPTUPLEHEADER(0);
+
+	tupType = tupdesc->tdtypeid;
+	tupTypmod = tupdesc->tdtypmod;
+	ncolumns = tupdesc->natts;
+
+	jsonstr = text_to_cstring(json);
+	lex = makeJsonLexContext(jsonstr, true);
+
+	/*
+	 * We arrange to look up the needed I/O info just once per series of
+	 * calls, assuming the record type doesn't change underneath us.
+	 */
+	my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
+	if (my_extra == NULL ||
+		my_extra->ncolumns != ncolumns)
+	{
+		fcinfo->flinfo->fn_extra =
+			MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+							   sizeof(RecordIOData) - sizeof(ColumnIOData)
+							   + ncolumns * sizeof(ColumnIOData));
+		my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
+		my_extra->record_type = InvalidOid;
+		my_extra->record_typmod = 0;
+	}
+
+	if (my_extra->record_type != tupType ||
+		my_extra->record_typmod != tupTypmod)
+	{
+		MemSet(my_extra, 0,
+			   sizeof(RecordIOData) - sizeof(ColumnIOData)
+			   + ncolumns * sizeof(ColumnIOData));
+		my_extra->record_type = tupType;
+		my_extra->record_typmod = tupTypmod;
+		my_extra->ncolumns = ncolumns;
+	}
+
+	sem->semstate = (void *) state;
+	sem->array_start = populate_recordset_array_start;
+	sem->scalar = populate_recordset_scalar;
+	sem->object_field_start = populate_recordset_object_field_start;
+	sem->object_field_end = populate_recordset_object_field_end;
+	sem->object_start = populate_recordset_object_start;
+	sem->object_end = populate_recordset_object_end;
+
+	state->lex = lex;
+
+	state->my_extra = my_extra;
+	state->rec = rec;
+	state->fn_mcxt = fcinfo->flinfo->fn_mcxt;
+
+	pg_parse_json(lex, sem);
+
+	rsi->setResult = state->tuple_store;
+	rsi->setDesc = state->ret_tdesc;
+
+	PG_RETURN_NULL();
+
+}
+
+static void populate_one_record(PopulateRecordsetState state)
+{
+	PopulateRecordsetState _state = (PopulateRecordsetState) state;
+	HTAB *json_hash = state->json_hash;
+	Datum *values;
+	bool *nulls;
+	char fname[NAMEDATALEN];
+	int i;
+	RecordIOData *my_extra = _state->my_extra;
+	int ncolumns = my_extra->ncolumns;
+	TupleDesc tupdesc = _state->ret_tdesc;
+	JsonHashEntry hashentry;
+	HeapTupleHeader rec = _state->rec;
+	HeapTuple	rettuple;
+
+	values = (Datum *) palloc(ncolumns * sizeof(Datum));
+	nulls = (bool *) palloc(ncolumns * sizeof(bool));
+
+	if (_state->rec)
+	{
+		HeapTupleData tuple;
+
+		/* Build a temporary HeapTuple control structure */
+		tuple.t_len = HeapTupleHeaderGetDatumLength(_state->rec);
+		ItemPointerSetInvalid(&(tuple.t_self));
+		tuple.t_tableOid = InvalidOid;
+		tuple.t_data = _state->rec;
+
+		/* Break down the tuple into fields */
+		heap_deform_tuple(&tuple, tupdesc, values, nulls);
+	}
+	else
+	{
+		for (i = 0; i < ncolumns; ++i)
+		{
+			values[i] = (Datum) 0;
+			nulls[i] = true;
+		}
+	}
+
+	for (i = 0; i < ncolumns; ++i)
+	{
+		ColumnIOData *column_info = &my_extra->columns[i];
+		Oid			column_type = tupdesc->attrs[i]->atttypid;
+		char	   *value;
+
+		/* Ignore dropped columns in datatype */
+		if (tupdesc->attrs[i]->attisdropped)
+		{
+			nulls[i] = true;
+			continue;
+		}
+
+		memset(fname, 0, NAMEDATALEN);
+		strncpy(fname, NameStr(tupdesc->attrs[i]->attname), NAMEDATALEN);
+		hashentry = hash_search(json_hash, fname, HASH_FIND, NULL);
+
+		/*
+		 * we can't just skip here if the key wasn't found since we might have
+		 * a domain to deal with. If we were passed in a non-null record
+		 * datum, we assume that the existing values are valid (if they're
+		 * not, then it's not our fault), but if we were passed in a null,
+		 * then every field which we don't populate needs to be run through
+		 * the input function just in case it's a domain type.
+		 */
+		if (hashentry == NULL && rec)
+			continue;
+
+		/*
+		 * Prepare to convert the column value from text
+		 */
+		if (column_info->column_type != column_type)
+		{
+			getTypeInputInfo(column_type,
+							 &column_info->typiofunc,
+							 &column_info->typioparam);
+			fmgr_info_cxt(column_info->typiofunc, &column_info->proc,
+						  _state->fn_mcxt);
+			column_info->column_type = column_type;
+		}
+		if (hashentry == NULL || hashentry->isnull)
+		{
+			/*
+			 * need InputFunctionCall to happen even for nulls, so that domain
+			 * checks are done
+			 */
+			values[i] = InputFunctionCall(&column_info->proc, NULL,
+										  column_info->typioparam,
+										  tupdesc->attrs[i]->atttypmod);
+			nulls[i] = true;
+		}
+		else
+		{
+			value = hashentry->val;
+
+			values[i] = InputFunctionCall(&column_info->proc, value,
+										  column_info->typioparam,
+										  tupdesc->attrs[i]->atttypmod);
+			nulls[i] = false;
+		}
+	}
+
+	rettuple = heap_form_tuple(tupdesc, values, nulls);
+	
+	tuplestore_puttuple(_state->tuple_store, rettuple);
+
+}
+
+static void populate_recordset_object_start(void *state)
+{
+	PopulateRecordsetState _state = (PopulateRecordsetState) state;
+	int lex_level = _state->lex->lex_level;
+	HASHCTL		ctl;
+
+	if (lex_level == 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+			   errmsg("cannot call populate_recordset on an object")));
+	else if (lex_level > 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+			   errmsg("cannot call populate_recordset with nested objects")));
+
+	/* set up a new hash for this entry */
+	memset(&ctl, 0, sizeof(ctl));
+	ctl.keysize = NAMEDATALEN;
+	ctl.entrysize = sizeof(jsonHashEntry);
+	ctl.hcxt = CurrentMemoryContext;
+	_state->json_hash = hash_create("json object hashtable",
+									100,
+									&ctl,
+									HASH_ELEM | HASH_CONTEXT);
+}
+
+static void populate_recordset_object_end(void *state)
+{
+	PopulateRecordsetState _state = (PopulateRecordsetState) state;
+
+	populate_one_record(_state);
+	hash_destroy(_state->json_hash);
+}
+
+static void populate_recordset_array_start(void *state)
+{
+	PopulateRecordsetState _state = (PopulateRecordsetState) state;
+	if (_state->lex->lex_level != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+			   errmsg("cannot call populate_recordset with nested arrays")));
+}
+
+static void populate_recordset_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+	PopulateRecordsetState _state = (PopulateRecordsetState) state;
+
+	if (_state->lex->lex_level == 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+			   errmsg("cannot call populate_recordset on a scalar")));
 
 	_state->saved_scalar = token;
 }
+
+static void
+populate_recordset_object_field_start(void *state, char *fname, bool isnull)
+{
+	PopulateRecordsetState _state = (PopulateRecordsetState) state;
+
+	if (_state->lex->token_type == JSON_TOKEN_ARRAY_START ||
+		_state->lex->token_type == JSON_TOKEN_OBJECT_START)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+		errmsg("cannot call populate_recordset on a nested object")));
+	}
+}
+
+static void
+populate_recordset_object_field_end(void *state, char *fname, bool isnull)
+{
+	PopulateRecordsetState _state = (PopulateRecordsetState) state;
+	JsonHashEntry hashentry;
+	bool		found;
+	char		name[NAMEDATALEN];
+
+	/* ignore field names >= NAMEDATALEN - they can't match a record field */
+	if (strlen(fname) >= NAMEDATALEN)
+		return;
+
+	memset(name, 0, NAMEDATALEN);
+	strncpy(name, fname, NAMEDATALEN);
+
+	hashentry = hash_search(_state->json_hash, name, HASH_ENTER, &found);
+
+	if (found)
+		elog(ERROR, "duplicate key");
+
+	hashentry->isnull = isnull;
+	hashentry->val = _state->saved_scalar;
+}
+

src/include/catalog/pg_proc.h

 DESCR("key value pairs of a json object");
 DATA(insert OID = 5012 (  json_populate_record PGNSP PGUID 12 1 0 0 0 f f f f f f s 2 0 2283 "2283 114" _null_ _null_ _null_ _null_ json_populate_record _null_ _null_ _null_ ));
 DESCR("get record fields from a json object");
+DATA(insert OID = 5013 (  json_populate_recordset PGNSP PGUID 12 1 100 0 0 f f f f f t s 2 0 2283 "2283 114" _null_ _null_ _null_ _null_ json_populate_recordset _null_ _null_ _null_ ));
+DESCR("get record fields from a json object");
 
 /* uuid */
 DATA(insert OID = 2952 (  uuid_in		   PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2950 "2275" _null_ _null_ _null_ _null_ uuid_in _null_ _null_ _null_ ));

src/include/utils/json.h

 extern Datum json_get_path_as_text(PG_FUNCTION_ARGS);
 extern Datum json_unnest(PG_FUNCTION_ARGS);
 extern Datum json_populate_record(PG_FUNCTION_ARGS);
+extern Datum json_populate_recordset(PG_FUNCTION_ARGS);
 
 #endif   /* JSON_H */

src/test/regress/expected/json.out

  blurfl | 3 | Mon Dec 31 15:30:56 2012
 (1 row)
 
+-- populate_recordset
+select * from json_populate_recordset(null::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]') q;
+   a    | b |            c             
+--------+---+--------------------------
+ blurfl |   | 
+        | 3 | Fri Jan 20 10:42:53 2012
+(2 rows)
+

src/test/regress/sql/json.sql

 
 select * from json_populate_record(null::jpop,'{"a":"blurfl","x":43.2}') q;
 select * from json_populate_record(row('x',3,'2012-12-31 15:30:56')::jpop,'{"a":"blurfl","x":43.2}') q;
+
+-- populate_recordset
+
+select * from json_populate_recordset(null::jpop,'[{"a":"blurfl","x":43.2},{"b":3,"c":"2012-01-20 10:42:53"}]') q;
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.