object eager loaded on scalar relationship via two separate paths, approach for "existing" row, populate key if not populated?
test:
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class A(Base):
__tablename__ = 'a'
id = Column(Integer, primary_key=True)
b_id = Column(ForeignKey('b.id'))
c_id = Column(ForeignKey('c.id'))
b = relationship("B")
c = relationship("C")
class B(Base):
__tablename__ = 'b'
id = Column(Integer, primary_key=True)
c_id = Column(ForeignKey('c.id'))
c = relationship("C")
class C(Base):
__tablename__ = 'c'
id = Column(Integer, primary_key=True)
d_id = Column(ForeignKey('d.id'))
d = relationship("D")
class D(Base):
__tablename__ = 'd'
id = Column(Integer, primary_key=True)
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
s = Session(e)
c = C(d=D())
s.add(
A(b=B(c=c), c=c)
)
s.commit()
c_alias_1 = aliased(C)
c_alias_2 = aliased(C)
q = s.query(A)
q = q.join(A.b).join(c_alias_1, B.c).join(c_alias_1.d)
q = q.options(contains_eager(A.b).contains_eager(B.c, alias=c_alias_1).contains_eager(C.d))
q = q.join(c_alias_2, A.c)
q = q.options(contains_eager(A.c, alias=c_alias_2))
a1 = q.all()[0]
assert 'd' in a1.c.__dict__
this test relies on the order in which we load attributes, and for some reason PYTHONHASHSEED isn't doing the job in all cases, so test with this patch:
diff --git a/lib/sqlalchemy/orm/loading.py b/lib/sqlalchemy/orm/loading.py
index 50afaf6..8de66de 100644
--- a/lib/sqlalchemy/orm/loading.py
+++ b/lib/sqlalchemy/orm/loading.py
@@ -295,6 +295,13 @@ def _instance_processor(
quick_populators = path.get(
context.attributes, "memoized_setups", _none_set)
+ import random
+ props = list(props)
+ random.shuffle(props)
+
+ print(
+ "PROPS!!!!",[p.key for p in props]
+ )
for prop in props:
if prop in quick_populators:
there's no way to force this loader to happen in all cases because ultimately when the C object is already there, the "isnew" flag is not set and we naturally hit the "existing" loader. However, the "existing" loader can do a check here and populate the key, and we are checking anyway:
diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py
index 78e9293..dff51b9 100644
--- a/lib/sqlalchemy/orm/strategies.py
+++ b/lib/sqlalchemy/orm/strategies.py
@@ -1559,13 +1559,15 @@ class JoinedLoader(AbstractRelationshipLoader):
# call _instance on the row, even though the object has
# been created, so that we further descend into properties
existing = _instance(row)
- if existing is not None \
- and key in dict_ \
- and existing is not dict_[key]:
- util.warn(
- "Multiple rows returned with "
- "uselist=False for eagerly-loaded attribute '%s' "
- % self)
+ if existing is not None:
+ if key in dict_:
+ if existing is not dict_[key]:
+ util.warn(
+ "Multiple rows returned with "
+ "uselist=False for eagerly-loaded attribute '%s' "
+ % self)
+ else:
+ dict_[key] = existing
def load_scalar_from_joined_exec(state, dict_, row):
_instance(row)
# this is an inlined path just for column-based attributes.
would need to do the deep thinking here to see if this is right.
Comments (9)
-
reporter -
reporter lets try to see where this condition happens:
diff --git a/lib/sqlalchemy/orm/strategies.py b/lib/sqlalchemy/orm/strategies.py index 78e9293..c5117f7 100644 --- a/lib/sqlalchemy/orm/strategies.py +++ b/lib/sqlalchemy/orm/strategies.py @@ -1559,13 +1559,16 @@ class JoinedLoader(AbstractRelationshipLoader): # call _instance on the row, even though the object has # been created, so that we further descend into properties existing = _instance(row) - if existing is not None \ - and key in dict_ \ - and existing is not dict_[key]: - util.warn( - "Multiple rows returned with " - "uselist=False for eagerly-loaded attribute '%s' " - % self) + if existing is not None: + if key in dict_: + if existing is not dict_[key]: + util.warn( + "Multiple rows returned with " + "uselist=False for eagerly-loaded attribute '%s' " + % self) + else: + raise Exception("why not?") + #dict_[key] = existing def load_scalar_from_joined_exec(state, dict_, row): _instance(row)
only one test raises it, test_assorted_eager EagerTest9.test_joinedload_on_path.
-
reporter also this is just for the scalar loader. Does this case exist for collections as well ?
-
reporter it does not! if we do:
class C(Base): __tablename__ = 'c' id = Column(Integer, primary_key=True) d_id = Column(ForeignKey('d.id')) d = relationship("D", uselist=True) # ... c = C(d=[D()]) # ... assert 'd' in a1.c.__dict__ and len(a1.c.__dict__['d'])
this passes. Because the "existing" loader for joined collection loading is obviously always going to append new entries to the list. This clarifies what "isnew" is trying to do for scalar loaders. it's just so that we can do that "multiple rows returned" check. It is likely fine to populate the dict if not populated.
-
reporter - changed title to object eager loaded on scalar relationship via two separate paths, approach for "existing" row, populate key if not populated?
-
reporter here's another variant on this, add this test case as well. For this one, PYTHONHASHSEED does nothing. Only deleting the .pyc files makes it happen.
"""This test case demonstrates a bug where contains_eager does not work properly when there are multiple join paths leading to the same row. More specifically, we have a schema with some foreign key references shown as edges: .----------. | | | \ / LDA -> A -> LD -> User In the instance of the DB below, each table contains exactly one row. If we do a query that does explicit joins and asks for eager loading along the join path LDA -> A -> LD -> User, but NOT along the join path LDA -> LD -> User, there is a 50% chance that sqlalchemy will fail to eagerly load the User instance. The bug appears to depend on the order of iteration in loading.py over the list "populators", which I assume is arbitrarily ordered. The ordering seems to depend on the names of the tables and/or columns. I suspect it might stem from the ordering of a dict keyed by name at some point. This is the loop in loading.py whose iteration order seems to determine whether or not the bug happens. If the bug does not happen for you, then try using reversed(populators) below. if only_load_props is None: for key, populator in populators: populator(state, dict_, row) I suspect the bug is stemming from code that reuses an existing instance of the class LD that came from the non-eager-loading joinpath, but it is failing to reconsider whether the instance needs to have more fields populated to account for eager loading requirements of the eager-loading joinpath. I have tested with the SQLAlchemy-1.0.8 release version and SQLAlchemy 0.9.7. When the bug happens, the final line of this file has this exception: sqlalchemy.orm.exc.DetachedInstanceError: Parent instance <LD at 0x2ca26d0> is not bound to a Session; lazy load operation of attribute 'user' cannot proceed """ from sqlalchemy import * from sqlalchemy.orm import * from sqlalchemy.ext.declarative import declarative_base Base = declarative_base() class User(Base): __tablename__ = 'cs_user' id = Column(Integer, primary_key=True) data = Column(Integer) class LD(Base): """Child. The column we reference 'A' with is an integer.""" __tablename__ = 'cs_ld' id = Column(Integer, primary_key=True) user_id = Column(Integer, ForeignKey('cs_user.id')) user = relationship(User, primaryjoin=user_id == User.id) class A(Base): """Child. The column we reference 'A' with is an integer.""" __tablename__ = 'cs_a' id = Column(Integer, primary_key=True) ld_id = Column(Integer, ForeignKey('cs_ld.id')) ld = relationship(LD, primaryjoin=ld_id == LD.id) class LDA(Base): """Child. The column we reference 'A' with is an integer.""" __tablename__ = 'cs_lda' id = Column(Integer, primary_key=True) ld_id = Column(Integer, ForeignKey('cs_ld.id')) a_id = Column(Integer, ForeignKey('cs_a.id')) ld = relationship(LD, primaryjoin=ld_id == LD.id) a = relationship(A, primaryjoin=a_id == A.id) # we demonstrate with SQLite, but the important part # is the CAST rendered in the SQL output. #e = create_engine('postgresql://scott:tiger@localhost/test', echo=True) e = create_engine('sqlite://', echo=True) Base.metadata.drop_all(e) Base.metadata.create_all(e) s = Session(e) u0 = User(data=42) l0 = LD(user=u0) z0 = A(ld=l0) lz0 = LDA(ld=l0, a=z0) s.add_all([ u0, l0, z0, lz0 ]) s.commit() l_ac = aliased(LD) u_ac = aliased(User) lz_test = (s.query(LDA) .join('ld') .options(contains_eager('ld')) .join('a', (l_ac, 'ld'), (u_ac, 'user')) .options(contains_eager('a') .contains_eager('ld', alias=l_ac) .contains_eager('user', alias=u_ac)) .first()) s.expunge_all() s.commit() s.close() print 'TEST0', lz_test # Exception here: print 'TEST5', lz_test.a.ld.user.data
-
reporter - changed status to resolved
- Fixed bug which would cause an eagerly loaded many-to-one attribute
to not be loaded, if the joined eager load were from a row where the
same entity were present multiple times, some calling for the attribute
to be eagerly loaded and others not. The logic here is revised to
take in the attribute even though a different loader path has
handled the parent entity already. fixes
#3431
→ <<cset 594a974ede97>>
-
reporter Issue
#3681was marked as a duplicate of this issue. -
reporter - Log in to comment
so the subtlety here is that the "isnew" flag is intended to track if the identity we're working on was already initialized in a previous row. in this test, we only have one row; we're hitting the same identity twice in the same row, and on the second time we hit it, "isnew" is false. But really "isnew" shouldn't be false; we're still on the "isnew" row! Not sure if that's how this should be approached.