eager loading + single inh polymorphic join issue

Issue #3593 resolved
Mike Bayer repo owner created an issue

breaks as of 0.8.1 !! 637232709770de034caf67c is the changeset. this is a deep one, taking out polymorphic resolves so this is an old school one.

from __future__ import print_function, unicode_literals

import sqlalchemy as sa
import sqlalchemy.orm
import sqlalchemy.ext.declarative
import sqlalchemy.ext.orderinglist

Base = sqlalchemy.ext.declarative.declarative_base()


class Node(Base):
    __tablename__ = 'todo_elements'

    element_id = sa.Column(sa.Integer, nullable=False, primary_key=True)
    element_type = sa.Column(sa.String(20), nullable=False)

    __mapper_args__ = {
        'polymorphic_on': element_type,
        'with_polymorphic': ('*', None),
    }


class NodeWithEdges(Node):
    __mapper_args__ = {'polymorphic_identity': 'todo.list'}


class LeafNode(Node):
    __mapper_args__ = {'polymorphic_identity': 'todo.item'}

    my_flag = sa.Column(sa.Boolean, default=False)


class Edge(Base):
    __tablename__ = 'todo_links'
    __table_args__ = (
        sa.PrimaryKeyConstraint('parent_id', 'child_id'),
        sa.ForeignKeyConstraint(['parent_id'], [Node.element_id]),
        sa.ForeignKeyConstraint(['child_id'], [Node.element_id]),
    )

    parent_id = sa.Column(sa.Integer, nullable=False)
    child_id = sa.Column(sa.Integer, nullable=False)


Edge.child = sa.orm.relationship(
    Node,
    uselist=False,
    primaryjoin=Edge.child_id == Node.element_id,
    lazy=False,
    cascade='all',
    passive_updates=False,
    join_depth=8,
)


NodeWithEdges.links = sa.orm.relationship(
    Edge,
    primaryjoin=NodeWithEdges.element_id == Edge.parent_id,
    lazy=False,
    cascade='all, delete-orphan',
    single_parent=True,
    passive_updates=False,
    join_depth=8,
)


#engine = sa.create_engine('sqlite:///:memory:', echo=True)
engine = sa.create_engine('postgresql://scott:tiger@localhost/test', echo=True)

Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)

Session = sa.orm.sessionmaker(bind=engine)
session = Session()


#
# 1 --> 2 --> 3
#         --> 4
#         --> 5
#

n1 = NodeWithEdges(element_id=1)
n2 = NodeWithEdges(element_id=2)
n3 = LeafNode(element_id=3)
n4 = LeafNode(element_id=4, my_flag=True)
n5 = LeafNode(element_id=5)

e12 = Edge(parent_id=n1.element_id, child_id=n2.element_id)
e23 = Edge(parent_id=n2.element_id, child_id=n3.element_id)
e24 = Edge(parent_id=n2.element_id, child_id=n4.element_id)
e25 = Edge(parent_id=n2.element_id, child_id=n5.element_id)

session.add_all([n1, n2, n3, n4, n5, e12, e23, e24, e25])
session.commit()
session.expunge_all()

new_n1 = session.query(NodeWithEdges).filter(
    NodeWithEdges.element_id == 1).all()[0] #first()
#print(session.identity_map.keys())


def traverse(node, f, depth=0):
    f(node, depth)
    if hasattr(node, 'links'):
        for link in node.links:
            traverse(link.child, f, depth + 1)


def indent_print(node, depth):
    print('  ' * depth + str(node.element_id))
    if hasattr(node, 'my_flag'):
        print(node.my_flag)

# recursion overflow after commit 
traverse(new_n1, indent_print)

Comments (7)

  1. Mike Bayer reporter

    It's the SQL rendering, so that's somewhat of a relief.

    Shorter test:

    import sqlalchemy as sa
    import sqlalchemy.orm
    from sqlalchemy.orm import joinedload, joinedload_all
    import sqlalchemy.ext.declarative
    import sqlalchemy.ext.orderinglist
    
    Base = sqlalchemy.ext.declarative.declarative_base()
    
    
    class Node(Base):
        __tablename__ = 'todo_elements'
    
        element_id = sa.Column(sa.Integer, nullable=False, primary_key=True)
        element_type = sa.Column(sa.String(20), nullable=False)
    
        __mapper_args__ = {
            'polymorphic_on': element_type,
            'with_polymorphic': ('*', None),
        }
    
    
    class NodeWithEdges(Node):
        __mapper_args__ = {'polymorphic_identity': 'todo.list'}
    
    
    class LeafNode(Node):
        __mapper_args__ = {'polymorphic_identity': 'todo.item'}
    
        my_flag = sa.Column(sa.Boolean, default=False)
    
    
    class Edge(Base):
        __tablename__ = 'todo_links'
        __table_args__ = (
            sa.PrimaryKeyConstraint('parent_id', 'child_id'),
            sa.ForeignKeyConstraint(['parent_id'], [Node.element_id]),
            sa.ForeignKeyConstraint(['child_id'], [Node.element_id]),
        )
    
        parent_id = sa.Column(sa.Integer, nullable=False)
        child_id = sa.Column(sa.Integer, nullable=False)
    
    
    Edge.child = sa.orm.relationship(
        Node,
        primaryjoin=Edge.child_id == Node.element_id,
    )
    
    
    NodeWithEdges.links = sa.orm.relationship(
        Edge,
        primaryjoin=NodeWithEdges.element_id == Edge.parent_id,
    )
    
    engine = sa.create_engine('postgresql://scott:tiger@localhost/test', echo=True)
    
    Base.metadata.drop_all(engine)
    Base.metadata.create_all(engine)
    
    Session = sa.orm.sessionmaker(bind=engine)
    session = Session()
    
    
    #
    # 1 --> 2 --> 3
    #         --> 4
    #         --> 5
    #
    
    n1 = NodeWithEdges(element_id=1)
    n2 = NodeWithEdges(element_id=2)
    n3 = LeafNode(element_id=3)
    n4 = LeafNode(element_id=4, my_flag=True)
    n5 = LeafNode(element_id=5)
    
    e12 = Edge(parent_id=n1.element_id, child_id=n2.element_id)
    e23 = Edge(parent_id=n2.element_id, child_id=n3.element_id)
    e24 = Edge(parent_id=n2.element_id, child_id=n4.element_id)
    e25 = Edge(parent_id=n2.element_id, child_id=n5.element_id)
    
    session.add_all([n1, n2, n3, n4, n5, e12, e23, e24, e25])
    session.commit()
    session.expunge_all()
    
    new_n1 = session.query(NodeWithEdges).options(
            joinedload_all(
                NodeWithEdges.links,
                Edge.child,
                NodeWithEdges.links
            )
        ).filter(
        NodeWithEdges.element_id == 1).all()[0]
    
    
    def traverse(node, f, depth=0):
        f(node, depth)
        if hasattr(node, 'links'):
            for link in node.links:
                if link.parent_id != node.element_id:
                #if link.child is node:
                    import pdb
                    pdb.set_trace()
                else:
                    traverse(link.child, f, depth + 1)
    
    def indent_print(node, depth):
        print('  ' * depth + str(node.element_id))
        if hasattr(node, 'my_flag'):
            print(node.my_flag)
    
    traverse(new_n1, indent_print)
    

    the good SQL in 0.8.0:

    SELECT todo_elements.element_id AS todo_elements_element_id, todo_elements.element_type AS todo_elements_element_type, todo_links_1.parent_id AS todo_links_1_parent_id, todo_links_1.child_id AS todo_links_1_child_id, todo_elements_1.element_id AS todo_elements_1_element_id, todo_elements_1.element_type AS todo_elements_1_element_type, todo_links_2.parent_id AS todo_links_2_parent_id, todo_links_2.child_id AS todo_links_2_child_id, todo_elements_1.my_flag AS todo_elements_1_my_flag 
    FROM todo_elements LEFT OUTER JOIN todo_links AS todo_links_1 ON todo_elements.element_id = todo_links_1.parent_id LEFT OUTER JOIN todo_elements AS todo_elements_1 ON todo_links_1.child_id = todo_elements_1.element_id LEFT OUTER JOIN todo_links AS todo_links_2 ON todo_elements_1.element_id = todo_links_2.parent_id 
    WHERE todo_elements.element_id = %(element_id_1)s AND todo_elements.element_type IN (%(element_type_1)s)
    

    the bad, the last ON clause gets messed up:

    SELECT todo_elements.element_id AS todo_elements_element_id, todo_elements.element_type AS todo_elements_element_type, todo_links_1.parent_id AS todo_links_1_parent_id, todo_links_1.child_id AS todo_links_1_child_id, todo_elements_1.element_id AS todo_elements_1_element_id, todo_elements_1.element_type AS todo_elements_1_element_type, todo_links_2.parent_id AS todo_links_2_parent_id, todo_links_2.child_id AS todo_links_2_child_id, todo_elements_1.my_flag AS todo_elements_1_my_flag 
    FROM todo_elements LEFT OUTER JOIN todo_links AS todo_links_1 ON todo_elements.element_id = todo_links_1.parent_id LEFT OUTER JOIN todo_elements AS todo_elements_1 ON todo_links_1.child_id = todo_elements_1.element_id LEFT OUTER JOIN todo_links AS todo_links_2 ON todo_elements.element_id = todo_links_2.parent_id 
    WHERE todo_elements.element_id = %(element_id_1)s AND todo_elements.element_type IN (%(element_type_1)s)
    
  2. Mike Bayer reporter

    here's the patch, needs a test:

    diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py
    index 7de470d..21152e3 100644
    --- a/lib/sqlalchemy/orm/strategies.py
    +++ b/lib/sqlalchemy/orm/strategies.py
    @@ -1325,8 +1325,17 @@ class JoinedLoader(AbstractRelationshipLoader):
    
             if adapter:
                 if getattr(adapter, 'aliased_class', None):
    +                # joining from an adapted entity.  The adapted entity
    +                # might be a "with_polymorphic", so resolve that to our
    +                # specific mapper's entity before looking for our attribute
    +                # name on it.
    +                efm = inspect(adapter.aliased_class).\
    +                    _entity_for_mapper(self.parent)
    +
    +                # look for our attribute on the adapted entity, else fall back
    +                # to our straight property
                     onclause = getattr(
    -                    adapter.aliased_class, self.key,
    +                    efm.entity, self.key,
                         self.parent_property)
                 else:
                     onclause = getattr(
    

    this might be portable to even 0.9, I doubt I want to port it there but this bug does go back to 0.8.1.

  3. Mike Bayer reporter

    revised:

    diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py
    index 7de470d..21152e3 100644
    --- a/lib/sqlalchemy/orm/strategies.py
    +++ b/lib/sqlalchemy/orm/strategies.py
    @@ -1325,8 +1325,17 @@ class JoinedLoader(AbstractRelationshipLoader):
    
             if adapter:
                 if getattr(adapter, 'aliased_class', None):
    +                # joining from an adapted entity.  The adapted entity
    +                # might be a "with_polymorphic", so resolve that to our
    +                # specific mapper's entity before looking for our attribute
    +                # name on it.
    +                efm = inspect(adapter.aliased_class).\
    +                    _entity_for_mapper(self.parent)
    +
    +                # look for our attribute on the adapted entity, else fall back
    +                # to our straight property
                     onclause = getattr(
    -                    adapter.aliased_class, self.key,
    +                    efm.entity, self.key,
                         self.parent_property)
                 else:
                     onclause = getattr(
    diff --git a/lib/sqlalchemy/orm/util.py b/lib/sqlalchemy/orm/util.py
    index 4351c8d..46183a4 100644
    --- a/lib/sqlalchemy/orm/util.py
    +++ b/lib/sqlalchemy/orm/util.py
    @@ -537,7 +537,11 @@ class AliasedInsp(InspectionAttr):
         def _entity_for_mapper(self, mapper):
             self_poly = self.with_polymorphic_mappers
             if mapper in self_poly:
    -            return getattr(self.entity, mapper.class_.__name__)._aliased_insp
    +            if mapper is self.mapper:
    +                return self
    +            else:
    +                return getattr(
    +                    self.entity, mapper.class_.__name__)._aliased_insp
             elif mapper.isa(self.mapper):
                 return self
             else:
    

    need to test both:

    Node.links = sa.orm.relationship(
        Edge,
        primaryjoin=NodeWithEdges.element_id == Edge.parent_id,
    )
    

    and

    NodeWithEdges.links = sa.orm.relationship(
        Edge,
        primaryjoin=NodeWithEdges.element_id == Edge.parent_id,
    )
    
  4. Mike Bayer reporter
    • Fixed bug which is actually a regression that occurred between versions 0.8.0 and 0.8.1, due 🎫2714. The case where joined eager loading needs to join out over a subclass-bound relationship when "with_polymorphic" were also used would fail to join from the correct entity. fixes #3593

    → <<cset 1202e140b987>>

  5. Mike Bayer reporter
    • Fixed bug which is actually a regression that occurred between versions 0.8.0 and 0.8.1, due 🎫2714. The case where joined eager loading needs to join out over a subclass-bound relationship when "with_polymorphic" were also used would fail to join from the correct entity. fixes #3593

    (cherry picked from commit 1202e140b9876cf202c56d2f41bbbb573e15a39d)

    → <<cset 2aeda9bd21eb>>

  6. Bertrand Croq

    Upgrading from SQLAlchemy 1.0.9 to 1.0.10 breaks one of my applications :

      File ".../lib/python2.7/site-packages/SQLAlchemy-1.0.10-py2.7-linux-x86_64.egg/sqlalchemy/orm/query.py", line 2728, in __iter__
        context = self._compile_context()
      File ".../lib/python2.7/site-packages/SQLAlchemy-1.0.10-py2.7-linux-x86_64.egg/sqlalchemy/orm/query.py", line 3196, in _compile_context
        strategy(*rec[1:])
      File ".../lib/python2.7/site-packages/SQLAlchemy-1.0.10-py2.7-linux-x86_64.egg/sqlalchemy/orm/strategies.py", line 1333, in _create_eager_join
        _entity_for_mapper(self.parent)
      File ".../lib/python2.7/site-packages/SQLAlchemy-1.0.10-py2.7-linux-x86_64.egg/sqlalchemy/orm/util.py", line 549, in _entity_for_mapper
        mapper, self)
    AssertionError: mapper Mapper|BaseClass|relation_name doesn't correspond to <AliasedInsp at 0x7fab70121f90; PolymClass>
    

    I had to remove lazy=False from some relations to make it work again.

    I work on a simple test to report the problem in a cleaner way.

    EDIT: https://gist.github.com/bcroq/312877e01102f14a8948

  7. Log in to comment