object eager loaded on scalar relationship via two separate paths, approach for "existing" row, populate key if not populated?

Issue #3431 resolved

Mike Bayer repo owner created an issue 2015-05-24

test:

from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()


class A(Base):
    __tablename__ = 'a'
    id = Column(Integer, primary_key=True)
    b_id = Column(ForeignKey('b.id'))
    c_id = Column(ForeignKey('c.id'))

    b = relationship("B")
    c = relationship("C")


class B(Base):
    __tablename__ = 'b'
    id = Column(Integer, primary_key=True)
    c_id = Column(ForeignKey('c.id'))

    c = relationship("C")


class C(Base):
    __tablename__ = 'c'
    id = Column(Integer, primary_key=True)
    d_id = Column(ForeignKey('d.id'))
    d = relationship("D")


class D(Base):
    __tablename__ = 'd'
    id = Column(Integer, primary_key=True)


e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)

s = Session(e)

c = C(d=D())

s.add(
    A(b=B(c=c), c=c)
)
s.commit()

c_alias_1 = aliased(C)
c_alias_2 = aliased(C)

q = s.query(A)
q = q.join(A.b).join(c_alias_1, B.c).join(c_alias_1.d)
q = q.options(contains_eager(A.b).contains_eager(B.c, alias=c_alias_1).contains_eager(C.d))
q = q.join(c_alias_2, A.c)
q = q.options(contains_eager(A.c, alias=c_alias_2))


a1 = q.all()[0]
assert 'd' in a1.c.__dict__

this test relies on the order in which we load attributes, and for some reason PYTHONHASHSEED isn't doing the job in all cases, so test with this patch:

diff --git a/lib/sqlalchemy/orm/loading.py b/lib/sqlalchemy/orm/loading.py
index 50afaf6..8de66de 100644
--- a/lib/sqlalchemy/orm/loading.py
+++ b/lib/sqlalchemy/orm/loading.py
@@ -295,6 +295,13 @@ def _instance_processor(
     quick_populators = path.get(
         context.attributes, "memoized_setups", _none_set)

+    import random
+    props = list(props)
+    random.shuffle(props)
+
+    print(
+        "PROPS!!!!",[p.key for p in props]
+    )
     for prop in props:
         if prop in quick_populators:

there's no way to force this loader to happen in all cases because ultimately when the C object is already there, the "isnew" flag is not set and we naturally hit the "existing" loader. However, the "existing" loader can do a check here and populate the key, and we are checking anyway:

diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py
index 78e9293..dff51b9 100644
--- a/lib/sqlalchemy/orm/strategies.py
+++ b/lib/sqlalchemy/orm/strategies.py
@@ -1559,13 +1559,15 @@ class JoinedLoader(AbstractRelationshipLoader):
             # call _instance on the row, even though the object has
             # been created, so that we further descend into properties
             existing = _instance(row)
-            if existing is not None \
-                and key in dict_ \
-                    and existing is not dict_[key]:
-                util.warn(
-                    "Multiple rows returned with "
-                    "uselist=False for eagerly-loaded attribute '%s' "
-                    % self)
+            if existing is not None:
+                if key in dict_:
+                    if existing is not dict_[key]:
+                        util.warn(
+                            "Multiple rows returned with "
+                            "uselist=False for eagerly-loaded attribute '%s' "
+                            % self)
+                else:
+                    dict_[key] = existing

         def load_scalar_from_joined_exec(state, dict_, row):
             _instance(row)

             # this is an inlined path just for column-based attributes.

would need to do the deep thinking here to see if this is right.

Comments (9)

Mike Bayer reporter
so the subtlety here is that the "isnew" flag is intended to track if the identity we're working on was already initialized in a previous row. in this test, we only have one row; we're hitting the same identity twice in the same row, and on the second time we hit it, "isnew" is false. But really "isnew" shouldn't be false; we're still on the "isnew" row! Not sure if that's how this should be approached.
- 2015-05-24T01:06:04+00:00

Mike Bayer reporter

lets try to see where this condition happens:

diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py
index 78e9293..c5117f7 100644
--- a/lib/sqlalchemy/orm/strategies.py
+++ b/lib/sqlalchemy/orm/strategies.py
@@ -1559,13 +1559,16 @@ class JoinedLoader(AbstractRelationshipLoader):
             # call _instance on the row, even though the object has
             # been created, so that we further descend into properties
             existing = _instance(row)
-            if existing is not None \
-                and key in dict_ \
-                    and existing is not dict_[key]:
-                util.warn(
-                    "Multiple rows returned with "
-                    "uselist=False for eagerly-loaded attribute '%s' "
-                    % self)
+            if existing is not None:
+                if key in dict_:
+                    if existing is not dict_[key]:
+                        util.warn(
+                            "Multiple rows returned with "
+                            "uselist=False for eagerly-loaded attribute '%s' "
+                            % self)
+                else:
+                    raise Exception("why not?")
+                    #dict_[key] = existing

         def load_scalar_from_joined_exec(state, dict_, row):
             _instance(row)

only one test raises it, test_assorted_eager EagerTest9.test_joinedload_on_path.

2015-05-24T01:14:13+00:00

Mike Bayer reporter
also this is just for the scalar loader. Does this case exist for collections as well ?
- 2015-05-24T01:21:23+00:00
Mike Bayer reporter
it does not! if we do:
```
class C(Base):
    __tablename__ = 'c'
    id = Column(Integer, primary_key=True)
    d_id = Column(ForeignKey('d.id'))
    d = relationship("D", uselist=True)

# ...

c = C(d=[D()])


# ...

assert 'd' in a1.c.__dict__ and len(a1.c.__dict__['d'])
```
this passes. Because the "existing" loader for joined collection loading is obviously always going to append new entries to the list. This clarifies what "isnew" is trying to do for scalar loaders. it's just so that we can do that "multiple rows returned" check. It is likely fine to populate the dict if not populated.
- 2015-05-24T01:25:34+00:00
Mike Bayer reporter
- changed title to object eager loaded on scalar relationship via two separate paths, approach for "existing" row, populate key if not populated?
- 2015-05-24T01:25:59+00:00

Mike Bayer reporter

here's another variant on this, add this test case as well. For this one, PYTHONHASHSEED does nothing. Only deleting the .pyc files makes it happen.

"""This test case demonstrates a bug where contains_eager does not
work properly when there are multiple join paths leading to the same
row.  More specifically, we have a schema with some foreign key
references shown as edges:

  .----------.
  |          |
  |         \ /
LDA -> A -> LD -> User

In the instance of the DB below, each table contains exactly one row.

If we do a query that does explicit joins and asks for eager loading
along the join path LDA -> A -> LD -> User, but NOT along the join
path LDA -> LD -> User, there is a 50% chance that sqlalchemy will
fail to eagerly load the User instance.

The bug appears to depend on the order of iteration in loading.py over
the list "populators", which I assume is arbitrarily ordered.  The
ordering seems to depend on the names of the tables and/or columns.  I
suspect it might stem from the ordering of a dict keyed by name at
some point.

This is the loop in loading.py whose iteration order seems to
determine whether or not the bug happens.  If the bug does not happen
for you, then try using reversed(populators) below.

        if only_load_props is None:
            for key, populator in populators:
                populator(state, dict_, row)

I suspect the bug is stemming from code that reuses an existing
instance of the class LD that came from the non-eager-loading
joinpath, but it is failing to reconsider whether the instance needs
to have more fields populated to account for eager loading
requirements of the eager-loading joinpath.

I have tested with the SQLAlchemy-1.0.8 release version and SQLAlchemy
0.9.7.

When the bug happens, the final line of this file has this exception:

sqlalchemy.orm.exc.DetachedInstanceError: Parent instance <LD at
0x2ca26d0> is not bound to a Session; lazy load operation of attribute
'user' cannot proceed

"""
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()

class User(Base):
    __tablename__ = 'cs_user'

    id = Column(Integer, primary_key=True)
    data = Column(Integer)

class LD(Base):
    """Child.  The column we reference 'A' with is an integer."""

    __tablename__ = 'cs_ld'

    id = Column(Integer, primary_key=True)
    user_id = Column(Integer, ForeignKey('cs_user.id'))
    user = relationship(User, primaryjoin=user_id == User.id)

class A(Base):
    """Child.  The column we reference 'A' with is an integer."""

    __tablename__ = 'cs_a'

    id = Column(Integer, primary_key=True)
    ld_id = Column(Integer, ForeignKey('cs_ld.id'))
    ld = relationship(LD, primaryjoin=ld_id == LD.id)

class LDA(Base):
    """Child.  The column we reference 'A' with is an integer."""

    __tablename__ = 'cs_lda'

    id = Column(Integer, primary_key=True)
    ld_id = Column(Integer, ForeignKey('cs_ld.id'))
    a_id = Column(Integer, ForeignKey('cs_a.id'))
    ld = relationship(LD, primaryjoin=ld_id == LD.id)
    a = relationship(A, primaryjoin=a_id == A.id)

# we demonstrate with SQLite, but the important part
# is the CAST rendered in the SQL output.

#e = create_engine('postgresql://scott:tiger@localhost/test', echo=True)
e = create_engine('sqlite://', echo=True)
Base.metadata.drop_all(e)
Base.metadata.create_all(e)

s = Session(e)

u0 = User(data=42)
l0 = LD(user=u0)
z0 = A(ld=l0)
lz0 = LDA(ld=l0, a=z0)
s.add_all([
    u0, l0, z0, lz0
])
s.commit()

l_ac = aliased(LD)
u_ac = aliased(User)

lz_test = (s.query(LDA)
           .join('ld')
           .options(contains_eager('ld'))
           .join('a', (l_ac, 'ld'), (u_ac, 'user'))
           .options(contains_eager('a')
                    .contains_eager('ld', alias=l_ac)
                    .contains_eager('user', alias=u_ac))
           .first())
s.expunge_all()
s.commit()
s.close()
print 'TEST0', lz_test

# Exception here:
print 'TEST5', lz_test.a.ld.user.data

2015-09-03T02:06:54+00:00

Mike Bayer reporter
- changed status to resolved
- Fixed bug which would cause an eagerly loaded many-to-one attribute to not be loaded, if the joined eager load were from a row where the same entity were present multiple times, some calling for the attribute to be eagerly loaded and others not. The logic here is revised to take in the attribute even though a different loader path has handled the parent entity already. fixes ~~#3431~~
→ <<cset 594a974ede97>>
- 2016-02-17T23:03:21+00:00
Mike Bayer reporter
Issue ~~#3681~~ was marked as a duplicate of this issue.
- 2016-03-17T15:28:45+00:00
Mike Bayer reporter
see also ~~#3811~~, ~~#3822~~
- 2016-10-13T16:29:56+00:00
Log in to comment

Assignee: –

Type: bug

Priority: major

Status: resolved

Component: orm

Milestone: 1.1

Votes: 0

Watchers: 3