Cannot undefer cte-based column property on both the query and a joinedloaded self-referential relationship

Issue #3727 duplicate
Adrian created an issue
from sqlalchemy import *
from sqlalchemy.dialects.postgresql import ARRAY, array
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import *


def _patch_sa_cte_fix():
    import pkg_resources
    sa_dist = pkg_resources.get_distribution('sqlalchemy')
    if sa_dist.parsed_version > pkg_resources.parse_version('1.0.12'):
        raise Exception('Remove this monkeypatch; SQLAlchemy contains the CTE fix')
    from sqlalchemy.sql.selectable import CTE
    from sqlalchemy.sql.elements import _clone
    def _copy_internals(self, clone=_clone, **kw):
        super(CTE, self)._copy_internals(clone, **kw)
        if self._cte_alias is not None:
            self._cte_alias = self
        self._restates = frozenset([clone(elem, **kw) for elem in self._restates])
    CTE._copy_internals = _copy_internals

_patch_sa_cte_fix()
Base = declarative_base()


class Category(Base):
    __tablename__ = 'categories'
    id = Column(Integer, primary_key=True)
    parent_id = Column(
        Integer,
        ForeignKey('categories.id'),
        index=True,
        nullable=True
    )
    is_deleted = Column(
        Boolean,
        nullable=False,
        default=False
    )
    children = relationship(
        'Category',
        primaryjoin=(id == remote(parent_id)) & ~remote(is_deleted),
        lazy=True,
        backref=backref(
            'parent',
            primaryjoin=(remote(id) == parent_id),
            lazy=True
        )
    )

    @staticmethod
    def _create_column_prop():
        cat_alias = aliased(Category)
        cte_query = (select([cat_alias.id, cast(array([]), ARRAY(Integer)).label('parents')])
                     .where(cat_alias.parent_id.is_(None) & ~cat_alias.is_deleted)
                     .cte(recursive=True))
        parent_query = (select([cat_alias.id, cte_query.c.parents.op('||')(cat_alias.parent_id)])
                        .where((cat_alias.parent_id == cte_query.c.id) & ~cat_alias.is_deleted))
        cte_query = cte_query.union_all(parent_query)
        query = select([func.count()]).where(cte_query.c.parents.contains(array([Category.id])))
        Category.deep_children_count = column_property(query, deferred=True)


Category._create_column_prop()

e = create_engine('postgresql:///test', echo=True)
Base.metadata.create_all(e)
s = Session(e)


root = Category(id=0, children=[
    Category(id=1),
    Category(id=2, children=[
        Category(id=3),
        Category(id=4),
        Category(id=5, children=[
            Category(id=6),
            Category(id=7)
        ])
    ])
])
s.add(root)
s.flush()
s.expire_all()

print

cat = s.query(Category).options(undefer('deep_children_count'),
                                joinedload('children').undefer('deep_children_count')).get(2)
print cat
print cat.deep_children_count
print cat.children

sqlalchemy.exc.CompileError: Multiple, unrelated CTEs found with the same name: u'anon_2'

Removing one of the undefers or using subqueryload instead of joinedload works, but it still looks like a bug to me.

Full traceback:

Traceback (most recent call last):
  File "satest.py", line 90, in <module>
    joinedload('children').undefer('deep_children_count')).get(2)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/orm/query.py", line 831, in get
    return self._get_impl(ident, loading.load_on_ident)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/orm/query.py", line 864, in _get_impl
    return fallback_fn(self, key)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/orm/loading.py", line 219, in load_on_ident
    return q.one()
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/orm/query.py", line 2693, in one
    ret = list(self)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/orm/query.py", line 2736, in __iter__
    return self._execute_and_instances(context)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/orm/query.py", line 2751, in _execute_and_instances
    result = conn.execute(querycontext.statement, self._params)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/engine/base.py", line 914, in execute
    return meth(self, multiparams, params)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/sql/elements.py", line 323, in _execute_on_connection
    return connection._execute_clauseelement(self, multiparams, params)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/engine/base.py", line 1003, in _execute_clauseelement
    inline=len(distilled_params) > 1)
  File "<string>", line 1, in <lambda>
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/sql/elements.py", line 494, in compile
    return self._compiler(dialect, bind=bind, **kw)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/sql/elements.py", line 500, in _compiler
    return dialect.statement_compiler(dialect, self, **kw)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/sql/compiler.py", line 395, in __init__
    Compiled.__init__(self, dialect, statement, **kwargs)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/sql/compiler.py", line 190, in __init__
    self.string = self.process(self.statement, **compile_kwargs)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/sql/compiler.py", line 213, in process
    return obj._compiler_dispatch(self, **kwargs)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/sql/visitors.py", line 81, in _compiler_dispatch
    return meth(self, **kw)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/sql/compiler.py", line 1585, in visit_select
    for name, column in select._columns_plus_names
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/sql/compiler.py", line 1357, in _label_select_column
    **column_clause_args
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/sql/visitors.py", line 93, in _compiler_dispatch
    return meth(self, **kw)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/sql/compiler.py", line 615, in visit_label
    OPERATORS[operators.as_] + \
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/sql/visitors.py", line 93, in _compiler_dispatch
    return meth(self, **kw)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/sql/compiler.py", line 527, in visit_grouping
    return "(" + grouping.element._compiler_dispatch(self, **kwargs) + ")"
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/sql/visitors.py", line 81, in _compiler_dispatch
    return meth(self, **kw)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/sql/compiler.py", line 1608, in visit_select
    text, select, inner_columns, froms, byfrom, kwargs)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/sql/compiler.py", line 1700, in _compose_select_body
    for f in froms])
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/sql/visitors.py", line 81, in _compiler_dispatch
    return meth(self, **kw)
  File "/home/adrian/dev/indico/env/lib/python2.7/site-packages/sqlalchemy/sql/compiler.py", line 1194, in visit_cte
    cte_name)
sqlalchemy.exc.CompileError: Multiple, unrelated CTEs found with the same name: u'anon_2'

Comments (7)

  1. Mike Bayer repo owner

    I'm not sure immediately how this could be resolved without major changes. The CTE is a total outlier of a construct; if the column_property were made against a scalar subquery, the naming scheme falls under the realm of column naming. CTEs are FROM objects so without cloning it, it would have the same name each time. then again joined eager loading is cloning things as it makes aliases though how this should interact with an embedded CTE is not clear (the current behavior is probably what's expected in most other case).

    Short answer I don't have any solution for this in the near term :). I'd have to find some time for a deep dive and that is not this week.

  2. Adrian reporter

    Maybe adding some way to mark as CTE as "global" could be a solution.. So if you know that a CTE does not need to be repeated, no matter how/where it's used within a query it would be only added with the WITH statement once and referenced using that same name all the time.

  3. Mike Bayer repo owner

    your use case can be made to work completely using #3058. It's a little bit manual but since you are doing deferred() anyway, it would work fine in your case:

    class Category(Base):
        __tablename__ = 'categories'
        id = Column(Integer, primary_key=True)
        parent_id = Column(
            Integer,
            ForeignKey('categories.id'),
            index=True,
            nullable=True
        )
        is_deleted = Column(
            Boolean,
            nullable=False,
            default=False
        )
        children = relationship(
            'Category',
            primaryjoin=(id == remote(parent_id)) & ~remote(is_deleted),
            lazy=True,
            backref=backref(
                'parent',
                primaryjoin=(remote(id) == parent_id),
                lazy=True
            )
        )
    
        deep_children_count = column_property(null(), deferred=True)
    
    
    def _create_column_prop():
        cat_alias = aliased(Category)
        cte_query = (select([cat_alias.id, cast(array([]), ARRAY(Integer)).label('parents')])
                     .where(cat_alias.parent_id.is_(None) & ~cat_alias.is_deleted)
                     .cte(recursive=True))
        parent_query = (select([cat_alias.id, cte_query.c.parents.op('||')(cat_alias.parent_id)])
                        .where((cat_alias.parent_id == cte_query.c.id) & ~cat_alias.is_deleted))
        cte_query = cte_query.union_all(parent_query)
        return select([func.count()]).where(cte_query.c.parents.contains(array([Category.id])))
    
    
    e = create_engine('postgresql://scott:tiger@localhost/test', echo=True)
    Base.metadata.drop_all(e)
    Base.metadata.create_all(e)
    s = Session(e)
    
    
    root = Category(id=0, children=[
        Category(id=1),
        Category(id=2, children=[
            Category(id=3),
            Category(id=4),
            Category(id=5, children=[
                Category(id=6),
                Category(id=7)
            ])
        ])
    ])
    s.add(root)
    s.flush()
    s.expire_all()
    
    print
    
    cat = s.query(Category).options(
        undefer('deep_children_count', expression=_create_column_prop()),
        joinedload('children').undefer(
            'deep_children_count', expression=_create_column_prop())).get(2)
    print cat
    print cat.deep_children_count
    print cat.children
    

    the other way it would work would be if the CTE() itself knew to clone itself to a new name at copy time, however this is difficult because the expression refers to a Column from the previous CTE, and that has to be moved as well, which would require a new mechanism within the clause-copying system, there's not any precedent for that kind of pattern. That is, it's totally doable but is likely to have bugs for months / years, as has been the case with all other translation systems the first few years they are introduced (not to mention CTEs themselves).

    So I'm going to dupe as #3058 for now.

  4. Log in to comment