see also #2450 which may or may not be related if we choose to support these return types as composite types.
figure out how to support all of PG's bizarro SETOF, RECORD, etc. types w/ emphasis on JSON fucntions
that is, all the functions in http://www.postgresql.org/docs/9.4/static/functions-json.html need to be possible without building subclasses of functions.
Official response
Comments (19)
-
reporter -
reporter Issue
#3555was marked as a duplicate of this issue. -
reporter also queries of the form:
SELECT elem FROM json_foo_bar(expr) AS elem
users see lots of PG examples illustrating this non-SQL syntax and we need to ensure some system of establishing an equivalent approach. The function as FROM system we have in http://docs.sqlalchemy.org/en/rel_1_0/core/tutorial.html#functions at the very least needs a much more flexible version for Postgresql.
-
reporter Issue
#3565was marked as a duplicate of this issue. -
reporter queries of the form:
SELECT json_foo_bar(table.expr).foo, json_foo_bar(table.expr).bar FROM table
must also be supported in some way, see
#3555 -
reporter ATTENTION VIEWERS:
Here is an initial implementation you can use for now:
from sqlalchemy.sql import functions from sqlalchemy.sql.selectable import FromClause from sqlalchemy.sql.elements import ColumnClause from sqlalchemy.ext.compiler import compiles class FunctionColumn(ColumnClause): def __init__(self, function, name, type_=None): self.function = self.table = function self.name = self.key = name self.type_ = type_ self.is_literal = False @property def _from_objects(self): return [] def _make_proxy(self, selectable, name=None, attach=True, name_is_truncatable=False, **kw): co = ColumnClause(self.name, self.type) co.table = selectable co._proxies = [self] if selectable._is_clone_of is not None: co._is_clone_of = \ selectable._is_clone_of.columns.get(co.key) if attach: selectable._columns[co.key] = co return co @compiles(FunctionColumn) def _compile_function_column(element, compiler, **kw): return "(%s).%s" % ( compiler.process(element.function, **kw), compiler.preparer.quote(element.name) ) class ColumnFunction(functions.FunctionElement): __visit_name__ = 'function' @property def columns(self): return FromClause.columns.fget(self) def _populate_column_collection(self): for name in self.column_names: self._columns[name] = FunctionColumn(self, name)
Example one - the query illustrated in
#3565:class jsonb_array_elements_text(ColumnFunction): name = 'jsonb_array_elements_text' column_names = ['value'] from sqlalchemy import func, Float, Integer, Column, create_engine from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import Session Base = declarative_base() class Test(Base): __tablename__ = 'test' id = Column(Integer, primary_key=True) data = Column(JSONB) e = create_engine("postgresql://scott:tiger@localhost/test", echo='debug') Base.metadata.drop_all(e) Base.metadata.create_all(e) session = Session(e) session.add_all([ Test(data={"depth": [0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06]}) ]) session.commit() elem = jsonb_array_elements_text(Test.data['depth']).alias('elem') subq = session.query( func.max(elem.c.value.cast(Float))).\ select_from(elem) stmnt = session.query( Test.id, subq.as_scalar().label("maxdepth")).\ order_by("maxdepth") print stmnt.all()
renders as:
SELECT test.id AS test_id, (SELECT max(CAST(value AS FLOAT)) AS max_1 FROM jsonb_array_elements_text(test.data -> %(data_1)s) AS elem) AS maxdepth FROM test ORDER BY maxdepth 2015-10-26 11:47:30,611 INFO sqlalchemy.engine.base.Engine {'data_1': 'depth'} 2015-10-26 11:47:30,612 DEBUG sqlalchemy.engine.base.Engine Col ('test_id', 'maxdepth') 2015-10-26 11:47:30,612 DEBUG sqlalchemy.engine.base.Engine Row (1, 0.06) [(1, 0.06)]
Example two - the query illustrated in
#3555:from sqlalchemy import * from sqlalchemy.dialects import postgresql from sqlalchemy.sql.expression import Grouping m = MetaData() assets_transactions = Table( 'assets_transactions', m, Column('id', Integer, primary_key=True), Column('contents', postgresql.JSONB) ) e = create_engine("postgresql://scott:tiger@localhost/test", echo=True) m.drop_all(e) m.create_all(e) e.execute( assets_transactions.insert(), [ {"contents": {"1": 1, "4": 2}}, {"contents": {"2": 2, "5": 3}}, {"contents": {"3": 3, "6": 4}}, ] ) class jsonb_each(ColumnFunction): name = 'jsonb_each' column_names = ['key', 'value'] print e.execute( select([ assets_transactions.c.id, jsonb_each(assets_transactions.c.contents).c.key, jsonb_each(assets_transactions.c.contents).c.value, ]).select_from(assets_transactions) ).fetchall()
renders as:
SELECT assets_transactions.id, (jsonb_each(assets_transactions.contents)).key, (jsonb_each(assets_transactions.contents)).value FROM assets_transactions 2015-10-26 11:50:10,993 INFO sqlalchemy.engine.base.Engine {} [(1, u'1', 1), (1, u'4', 2), (2, u'2', 2), (2, u'5', 3), (3, u'3', 3), (3, u'6', 4)]
Example three: "row_to_json(aliasname)":
from sqlalchemy.sql import Alias, ColumnElement from sqlalchemy.ext.compiler import compiles class as_row(ColumnElement): def __init__(self, expr): assert isinstance(expr, Alias) self.expr = expr @compiles(as_row) def _gen_as_row(element, compiler, **kw): return compiler.visit_alias(element.expr, ashint=True, **kw) if __name__ == '__main__': from sqlalchemy import Column, Integer, create_engine, func from sqlalchemy.orm import Session from sqlalchemy.ext.declarative import declarative_base Base = declarative_base() class A(Base): __tablename__ = 'a' id = Column(Integer, primary_key=True) x = Column(Integer) y = Column(Integer) e = create_engine("postgresql://scott:tiger@localhost/test", echo=True) Base.metadata.drop_all(e) Base.metadata.create_all(e) s = Session(e) s.add_all([ A(x=1, y=2), A(x=5, y=4) ]) s.commit() subq = s.query(A).subquery() print s.query(func.row_to_json(as_row(subq))).select_from(subq).all()
-
reporter For the issue in
#3594.Given:
from sqlalchemy import * from sqlalchemy.orm import * from sqlalchemy.dialects.postgresql import array, ARRAY from sqlalchemy.ext.declarative import declarative_base Base = declarative_base() class A(Base): __tablename__ = 'a' id = Column(Integer, primary_key=True) refs = Column(ARRAY(String)) class B(Base): __tablename__ = 'b' id = Column(Integer, primary_key=True) ref = Column(String) e = create_engine("postgresql://scott:tiger@localhost/test", echo=True) Base.metadata.create_all(e)
There's a few ways this can work, here's the most flexible. Modify the above recipe as:
class FunctionColumn(ColumnClause): def __init__(self, function, name, type_=None): self.function = self.table = function self.name = self.key = name self.type_ = type_ self.is_literal = False @property def _from_objects(self): return [] def _make_proxy(self, selectable, name=None, attach=True, name_is_truncatable=False, **kw): if self.name == self.function.name: name = selectable.name else: name = self.name co = ColumnClause(name, self.type) co.key = self.name co._proxies = [self] if selectable._is_clone_of is not None: co._is_clone_of = \ selectable._is_clone_of.columns.get(co.key) co.table = selectable co.named_with_table = False if attach: selectable._columns[co.key] = co return co @compiles(FunctionColumn) def _compile_function_column(element, compiler, **kw): if kw.get('asfrom', False): return "(%s).%s" % ( compiler.process(element.function, **kw), compiler.preparer.quote(element.name) ) else: return element.name
Basically Postgresql takes a name that we put in the FROM clause and it magically becomes a name we can reference directly in the columns clause...very awkward, but there we go. The feature here will need to somehow make this pattern apparent and even more strangely allow the user to differentiate between the two variants here, e.g. "SELECT foobar().foo" vs. "SELECT foobar FROM foobar()".
The query can then be emitted either with or without an alias:
e = create_engine("postgresql://scott:tiger@localhost/test", echo=True) Base.metadata.create_all(e) class unnest_func(ColumnFunction): name = 'unnest' column_names = ['unnest', 'ordinality'] from sqlalchemy.ext.compiler import compiles @compiles(unnest_func) def _compile_unnest_func(element, compiler, **kw): return compiler.visit_function(element, **kw) + " WITH ORDINALITY" s = Session(e) unnest = unnest_func(A.refs) q = s.query(A.id, A.refs, unnest.c.unnest, unnest.c.ordinality, B.id, B.ref).\ select_from(A).outerjoin(unnest, true()).outerjoin(B, unnest.c.unnest == B.ref) q.all() unnest = unnest_func(A.refs).alias('unnested') q = s.query(A.id, A.refs, unnest.c.unnest, unnest.c.ordinality, B.id, B.ref).\ select_from(A).outerjoin(unnest, true()).outerjoin(B, unnest.c.unnest == B.ref) q.all()
producing:
SELECT a.id AS a_id, a.refs AS a_refs, unnest AS unnest, ordinality AS ordinality, b.id AS b_id, b.ref AS b_ref FROM a LEFT OUTER JOIN unnest(a.refs) WITH ORDINALITY ON true LEFT OUTER JOIN b ON unnest = b.ref
and
SELECT a.id AS a_id, a.refs AS a_refs, unnested.unnested AS unnested_unnested, unnested.ordinality AS unnested_ordinality, b.id AS b_id, b.ref AS b_ref FROM a LEFT OUTER JOIN unnest(a.refs) WITH ORDINALITY AS unnested ON true LEFT OUTER JOIN b ON unnested.unnested = b.ref
both of which are accepted by PG.
To get the form with "AS x(unnest, ordinality)" you'd need to add another construct like a subclass of Alias or something, however it seems like the "unnest" construct can be aliased in the tradtional way and the column names become "(function name, ordinality)". Again the amount of special functions and behaviors that would be part of a standard feature keep on growing, making this a difficult feature to add as "done" without missing one of PG's weird non-SQL things.
-
reporter Issue
#3594was marked as a duplicate of this issue. -
reporter mentioned earlier, queries of the form:
SELECT elem FROM json_foo_bar(expr) AS elem
ARE supported. Just call
func.json_foo_bar(expr).alias('elem')
. This was implemented in#3137in 0.9.8. -
reporter Example Three - recordset where we need "AS q(a sometype, b sometype)". Need a new alias subclass:
from sqlalchemy.sql import functions from sqlalchemy.sql.selectable import FromClause, Alias from sqlalchemy.sql.elements import ColumnClause from sqlalchemy.ext.compiler import compiles class FunctionColumn(ColumnClause): def __init__(self, function, name, type_=None): self.function = self.table = function self.name = self.key = name self.type = type_ self.is_literal = False @property def _from_objects(self): return [] def _make_proxy(self, selectable, name=None, attach=True, name_is_truncatable=False, **kw): if self.name == self.function.name: name = selectable.name else: name = self.name co = ColumnClause(name, self.type) co.key = self.name co._proxies = [self] if selectable._is_clone_of is not None: co._is_clone_of = \ selectable._is_clone_of.columns.get(co.key) co.table = selectable co.named_with_table = False if attach: selectable._columns[co.key] = co return co @compiles(FunctionColumn) def _compile_function_column(element, compiler, **kw): if kw.get('asfrom', False): return "(%s).%s" % ( compiler.process(element.function, **kw), compiler.preparer.quote(element.name) ) else: return element.name class PGAlias(Alias): pass @compiles(PGAlias) def _compile_pg_alias(element, compiler, **kw): text = compiler.visit_alias(element, **kw) if kw['asfrom']: text += "(%s)" % ( ", ".join( "%s %s" % ( col.name, compiler.visit_typeclause(col)) for col in element.element.c ) ) return text class ColumnFunction(functions.FunctionElement): __visit_name__ = 'function' @property def columns(self): return FromClause.columns.fget(self) def _populate_column_collection(self): for name, type_ in self.column_names: self._columns[name] = FunctionColumn(self, name, type_) def alias(self, name): return PGAlias(self, name) from sqlalchemy import * # noqa from sqlalchemy.orm import * # noqa from sqlalchemy.ext.declarative import declarative_base # noqa from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.dialects import postgresql class jsonb_to_recordset_func(ColumnFunction): # noqa name = 'jsonb_to_recordset' column_names = [('goods_id', Integer()), ('quantity', Integer())] Base = declarative_base() class A(Base): __tablename__ = 'a' id = Column(Integer, primary_key=True) supplemental_items = Column(JSONB) e = create_engine("postgresql://scott:tiger@localhost/test", echo=True) Base.metadata.drop_all(e) Base.metadata.create_all(e) s = Session(e) s.add(A(supplemental_items=[{"goods_id": 5, "quantity": 10}])) s.commit() jr = jsonb_to_recordset_func(A.supplemental_items).alias('j') q = s.query(A.id, jr.c.goods_id, jr.c.quantity) print q.all()
-
reporter Issue
#3729was marked as a duplicate of this issue. -
reporter Example four - the alias name is the column name, we will need to get this use case as well:
class generate_series(ColumnFunction): name = 'generate_series' def alias(self, name): self.column_names = [name] return super(generate_series, self).alias(name) x = generate_series(1, 2).alias('x') y = generate_series(1, 2).alias('y') from sqlalchemy import select stmt = select([x.c.x, y.c.y]).select_from(x).select_from(y) print stmt
output:
SELECT x, y FROM generate_series(:generate_series_1, :generate_series_2) AS x, generate_series(:generate_series_3, :generate_series_4) AS y
-
reporter - changed milestone to 1.3
- edited description
this is fairly important but is also an enormous lift, including new APIs that would need careful consideration, trying to expedite 1.2 for now.
-
Great that you are still working on this. Just want to let you know I really admire the effort and dedication you have put in and continue to put in SQLAlchemy! Almost ridiculous that such a huge, useful and important tool is all the work of only one man! I'd be glad to assist on this, but not sure where to start.
-
Trying out this recipe to wrap one of my
RETURNS TABLE
functions I see that the columns of the function do not use the correct alias, and thus I'm getting an "ambiguous column" error from PG, but I wasn't able to figure out if/how that can be fixed.The following script illustrate the issue:
m = MetaData() values = Table( 'values', m, Column('id', Integer, primary_key=True), Column('value', String) ) e = create_engine("postgresql://localhost/test", echo=True) m.drop_all(e) m.create_all(e) e.execute( values.insert(), [ {"value": "one"}, {"value": "two"}, {"value": "three"}, ] ) e.execute("""\ create or replace function value_ids() returns table (id integer) as $$ select id from values; $$ language sql """) class value_ids(ColumnFunction): name = 'value_ids' column_names = ['id'] vi = value_ids().alias('vi') vv = values.alias('vv') stmt = select([ vi.c.id, vv.c.value ]).select_from(vi.join(vv, vv.c.id == vi.c.id)) print(str(stmt)) print(e.execute(stmt).fetchall())
that emits the query:
SELECT id, vv.value FROM value_ids() AS vi JOIN values AS vv ON vv.id = id
and gives an execution error:
psycopg2.ProgrammingError: ERROR: column reference "id" is ambiguousi
Thanks in advance for any hint!
-
reporter @lele I never get time to look at these can you please paste / attach which "ColumnFunction" recipe you're using (full MCVE is fine so I can just run it) ? thanks
-
Sure, sorry for the confusion!
Here is the complete script:
from sqlalchemy import create_engine, select, Column, Integer, String, Table, MetaData from sqlalchemy.sql import functions from sqlalchemy.sql.selectable import FromClause from sqlalchemy.sql.elements import ColumnClause from sqlalchemy.ext.compiler import compiles class FunctionColumn(ColumnClause): def __init__(self, function, name, type_=None): self.function = self.table = function self.name = self.key = name self.type_ = type_ self.is_literal = False @property def _from_objects(self): return [] def _make_proxy(self, selectable, name=None, attach=True, name_is_truncatable=False, **kw): co = ColumnClause(self.name, self.type) co._proxies = [self] if selectable._is_clone_of is not None: co._is_clone_of = \ selectable._is_clone_of.columns.get(co.key) if attach: selectable._columns[co.key] = co return co @compiles(FunctionColumn) def _compile_function_column(element, compiler, **kw): return "(%s).%s" % ( compiler.process(element.function, **kw), compiler.preparer.quote(element.name) ) class ColumnFunction(functions.FunctionElement): __visit_name__ = 'function' @property def columns(self): return FromClause.columns.fget(self) def _populate_column_collection(self): for name in self.column_names: self._columns[name] = FunctionColumn(self, name) m = MetaData() values = Table( 'values', m, Column('id', Integer, primary_key=True), Column('value', String) ) e = create_engine("postgresql://localhost/test", echo=True) m.drop_all(e) m.create_all(e) e.execute( values.insert(), [ {"value": "one"}, {"value": "two"}, {"value": "three"}, ] ) e.execute("""\ create or replace function value_ids() returns table (id integer) as $$ select id from values; $$ language sql """) class value_ids(ColumnFunction): name = 'value_ids' column_names = ['id'] vi = value_ids().alias('vi') vv = values.alias('vv') stmt = select([ vi.c.id, vv.c.value ]).select_from(vi.join(vv, vv.c.id == vi.c.id)) print(str(stmt)) print(e.execute(stmt).fetchall())
-
reporter @lele add "co.table = selectable":
def _make_proxy(self, selectable, name=None, attach=True, name_is_truncatable=False, **kw): co = ColumnClause(self.name, self.type) co.table = selectable co._proxies = [self] if selectable._is_clone_of is not None: co._is_clone_of = \ selectable._is_clone_of.columns.get(co.key) if attach: selectable._columns[co.key] = co return co
-
Wonderful, thank you so much!
- Log in to comment
ATTENTION VIEWERS:
Here is an initial implementation you can use for now:
Example one - the query illustrated in
#3565:renders as:
Example two - the query illustrated in
#3555:renders as:
Example three: "row_to_json(aliasname)":