object eager loaded on scalar relationship via two separate paths, approach for "existing" row, populate key if not populated?

Issue #3431 resolved
Mike Bayer repo owner created an issue

test:

from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()


class A(Base):
    __tablename__ = 'a'
    id = Column(Integer, primary_key=True)
    b_id = Column(ForeignKey('b.id'))
    c_id = Column(ForeignKey('c.id'))

    b = relationship("B")
    c = relationship("C")


class B(Base):
    __tablename__ = 'b'
    id = Column(Integer, primary_key=True)
    c_id = Column(ForeignKey('c.id'))

    c = relationship("C")


class C(Base):
    __tablename__ = 'c'
    id = Column(Integer, primary_key=True)
    d_id = Column(ForeignKey('d.id'))
    d = relationship("D")


class D(Base):
    __tablename__ = 'd'
    id = Column(Integer, primary_key=True)


e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)

s = Session(e)

c = C(d=D())

s.add(
    A(b=B(c=c), c=c)
)
s.commit()

c_alias_1 = aliased(C)
c_alias_2 = aliased(C)

q = s.query(A)
q = q.join(A.b).join(c_alias_1, B.c).join(c_alias_1.d)
q = q.options(contains_eager(A.b).contains_eager(B.c, alias=c_alias_1).contains_eager(C.d))
q = q.join(c_alias_2, A.c)
q = q.options(contains_eager(A.c, alias=c_alias_2))


a1 = q.all()[0]
assert 'd' in a1.c.__dict__

this test relies on the order in which we load attributes, and for some reason PYTHONHASHSEED isn't doing the job in all cases, so test with this patch:

diff --git a/lib/sqlalchemy/orm/loading.py b/lib/sqlalchemy/orm/loading.py
index 50afaf6..8de66de 100644
--- a/lib/sqlalchemy/orm/loading.py
+++ b/lib/sqlalchemy/orm/loading.py
@@ -295,6 +295,13 @@ def _instance_processor(
     quick_populators = path.get(
         context.attributes, "memoized_setups", _none_set)

+    import random
+    props = list(props)
+    random.shuffle(props)
+
+    print(
+        "PROPS!!!!",[p.key for p in props]
+    )
     for prop in props:
         if prop in quick_populators:

there's no way to force this loader to happen in all cases because ultimately when the C object is already there, the "isnew" flag is not set and we naturally hit the "existing" loader. However, the "existing" loader can do a check here and populate the key, and we are checking anyway:

diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py
index 78e9293..dff51b9 100644
--- a/lib/sqlalchemy/orm/strategies.py
+++ b/lib/sqlalchemy/orm/strategies.py
@@ -1559,13 +1559,15 @@ class JoinedLoader(AbstractRelationshipLoader):
             # call _instance on the row, even though the object has
             # been created, so that we further descend into properties
             existing = _instance(row)
-            if existing is not None \
-                and key in dict_ \
-                    and existing is not dict_[key]:
-                util.warn(
-                    "Multiple rows returned with "
-                    "uselist=False for eagerly-loaded attribute '%s' "
-                    % self)
+            if existing is not None:
+                if key in dict_:
+                    if existing is not dict_[key]:
+                        util.warn(
+                            "Multiple rows returned with "
+                            "uselist=False for eagerly-loaded attribute '%s' "
+                            % self)
+                else:
+                    dict_[key] = existing

         def load_scalar_from_joined_exec(state, dict_, row):
             _instance(row)

             # this is an inlined path just for column-based attributes.

would need to do the deep thinking here to see if this is right.

Comments (9)

  1. Mike Bayer reporter

    so the subtlety here is that the "isnew" flag is intended to track if the identity we're working on was already initialized in a previous row. in this test, we only have one row; we're hitting the same identity twice in the same row, and on the second time we hit it, "isnew" is false. But really "isnew" shouldn't be false; we're still on the "isnew" row! Not sure if that's how this should be approached.

  2. Mike Bayer reporter

    lets try to see where this condition happens:

    diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py
    index 78e9293..c5117f7 100644
    --- a/lib/sqlalchemy/orm/strategies.py
    +++ b/lib/sqlalchemy/orm/strategies.py
    @@ -1559,13 +1559,16 @@ class JoinedLoader(AbstractRelationshipLoader):
                 # call _instance on the row, even though the object has
                 # been created, so that we further descend into properties
                 existing = _instance(row)
    -            if existing is not None \
    -                and key in dict_ \
    -                    and existing is not dict_[key]:
    -                util.warn(
    -                    "Multiple rows returned with "
    -                    "uselist=False for eagerly-loaded attribute '%s' "
    -                    % self)
    +            if existing is not None:
    +                if key in dict_:
    +                    if existing is not dict_[key]:
    +                        util.warn(
    +                            "Multiple rows returned with "
    +                            "uselist=False for eagerly-loaded attribute '%s' "
    +                            % self)
    +                else:
    +                    raise Exception("why not?")
    +                    #dict_[key] = existing
    
             def load_scalar_from_joined_exec(state, dict_, row):
                 _instance(row)
    

    only one test raises it, test_assorted_eager EagerTest9.test_joinedload_on_path.

  3. Mike Bayer reporter

    also this is just for the scalar loader. Does this case exist for collections as well ?

  4. Mike Bayer reporter

    it does not! if we do:

    class C(Base):
        __tablename__ = 'c'
        id = Column(Integer, primary_key=True)
        d_id = Column(ForeignKey('d.id'))
        d = relationship("D", uselist=True)
    
    # ...
    
    c = C(d=[D()])
    
    
    # ...
    
    assert 'd' in a1.c.__dict__ and len(a1.c.__dict__['d'])
    

    this passes. Because the "existing" loader for joined collection loading is obviously always going to append new entries to the list. This clarifies what "isnew" is trying to do for scalar loaders. it's just so that we can do that "multiple rows returned" check. It is likely fine to populate the dict if not populated.

  5. Mike Bayer reporter

    here's another variant on this, add this test case as well. For this one, PYTHONHASHSEED does nothing. Only deleting the .pyc files makes it happen.

    """This test case demonstrates a bug where contains_eager does not
    work properly when there are multiple join paths leading to the same
    row.  More specifically, we have a schema with some foreign key
    references shown as edges:
    
      .----------.
      |          |
      |         \ /
    LDA -> A -> LD -> User
    
    In the instance of the DB below, each table contains exactly one row.
    
    If we do a query that does explicit joins and asks for eager loading
    along the join path LDA -> A -> LD -> User, but NOT along the join
    path LDA -> LD -> User, there is a 50% chance that sqlalchemy will
    fail to eagerly load the User instance.
    
    The bug appears to depend on the order of iteration in loading.py over
    the list "populators", which I assume is arbitrarily ordered.  The
    ordering seems to depend on the names of the tables and/or columns.  I
    suspect it might stem from the ordering of a dict keyed by name at
    some point.
    
    This is the loop in loading.py whose iteration order seems to
    determine whether or not the bug happens.  If the bug does not happen
    for you, then try using reversed(populators) below.
    
            if only_load_props is None:
                for key, populator in populators:
                    populator(state, dict_, row)
    
    I suspect the bug is stemming from code that reuses an existing
    instance of the class LD that came from the non-eager-loading
    joinpath, but it is failing to reconsider whether the instance needs
    to have more fields populated to account for eager loading
    requirements of the eager-loading joinpath.
    
    I have tested with the SQLAlchemy-1.0.8 release version and SQLAlchemy
    0.9.7.
    
    When the bug happens, the final line of this file has this exception:
    
    sqlalchemy.orm.exc.DetachedInstanceError: Parent instance <LD at
    0x2ca26d0> is not bound to a Session; lazy load operation of attribute
    'user' cannot proceed
    
    """
    from sqlalchemy import *
    from sqlalchemy.orm import *
    from sqlalchemy.ext.declarative import declarative_base
    
    Base = declarative_base()
    
    class User(Base):
        __tablename__ = 'cs_user'
    
        id = Column(Integer, primary_key=True)
        data = Column(Integer)
    
    class LD(Base):
        """Child.  The column we reference 'A' with is an integer."""
    
        __tablename__ = 'cs_ld'
    
        id = Column(Integer, primary_key=True)
        user_id = Column(Integer, ForeignKey('cs_user.id'))
        user = relationship(User, primaryjoin=user_id == User.id)
    
    class A(Base):
        """Child.  The column we reference 'A' with is an integer."""
    
        __tablename__ = 'cs_a'
    
        id = Column(Integer, primary_key=True)
        ld_id = Column(Integer, ForeignKey('cs_ld.id'))
        ld = relationship(LD, primaryjoin=ld_id == LD.id)
    
    class LDA(Base):
        """Child.  The column we reference 'A' with is an integer."""
    
        __tablename__ = 'cs_lda'
    
        id = Column(Integer, primary_key=True)
        ld_id = Column(Integer, ForeignKey('cs_ld.id'))
        a_id = Column(Integer, ForeignKey('cs_a.id'))
        ld = relationship(LD, primaryjoin=ld_id == LD.id)
        a = relationship(A, primaryjoin=a_id == A.id)
    
    # we demonstrate with SQLite, but the important part
    # is the CAST rendered in the SQL output.
    
    #e = create_engine('postgresql://scott:tiger@localhost/test', echo=True)
    e = create_engine('sqlite://', echo=True)
    Base.metadata.drop_all(e)
    Base.metadata.create_all(e)
    
    s = Session(e)
    
    u0 = User(data=42)
    l0 = LD(user=u0)
    z0 = A(ld=l0)
    lz0 = LDA(ld=l0, a=z0)
    s.add_all([
        u0, l0, z0, lz0
    ])
    s.commit()
    
    l_ac = aliased(LD)
    u_ac = aliased(User)
    
    lz_test = (s.query(LDA)
               .join('ld')
               .options(contains_eager('ld'))
               .join('a', (l_ac, 'ld'), (u_ac, 'user'))
               .options(contains_eager('a')
                        .contains_eager('ld', alias=l_ac)
                        .contains_eager('user', alias=u_ac))
               .first())
    s.expunge_all()
    s.commit()
    s.close()
    print 'TEST0', lz_test
    
    # Exception here:
    print 'TEST5', lz_test.a.ld.user.data
    
  6. Mike Bayer reporter
    • Fixed bug which would cause an eagerly loaded many-to-one attribute to not be loaded, if the joined eager load were from a row where the same entity were present multiple times, some calling for the attribute to be eagerly loaded and others not. The logic here is revised to take in the attribute even though a different loader path has handled the parent entity already. fixes #3431

    → <<cset 594a974ede97>>

  7. Log in to comment