1. Mike Bayer
  2. sqlalchemy

Wiki

Clone wiki

sqlalchemy / UsageRecipes / RelationshipToLatest

RelationshipToLatest

Apply a correlated subquery to relationship() in order to retrieve only the "Xth" item of a collection.

from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
import datetime

class A(Base):
    __tablename__ = 'a'
    id = Column(Integer, primary_key=True)
    bs = relationship("B")

    latest_b = relationship(lambda: B, 
                    primaryjoin=lambda: and_(
                            A.id==B.a_id, 
                            B.date==select([func.max(B.date)]).
                                      where(B.a_id==A.id).
                                      correlate(A.__table__)
                        )
                    )

class B(Base):
    __tablename__ = 'b'
    id = Column(Integer, primary_key=True)
    a_id = Column(Integer, ForeignKey('a.id'))
    date = Column(Date)

    def __repr__(self):
        return "B(date=%r)" % self.date


e = create_engine('sqlite://', echo=True)
Base.metadata.create_all(e)
s = Session(e)

s.add_all([
    A(bs=[
            B(date=datetime.date(2011, 10, 5)),
            B(date=datetime.date(2011, 8, 4)),
            B(date=datetime.date(2011, 9, 17)),
        ]),
    A(bs=[
            B(date=datetime.date(2011, 10, 5)),
            B(date=datetime.date(2011, 8, 4)),
            B(date=datetime.date(2011, 9, 17)),
        ]),
])
s.commit()

for obj in s.query(A).options(joinedload(A.latest_b)):
        print obj.latest_b

The SQL we see when using joinedload() in this way will apply the correlated subquery into the ON clause of the join:

SELECT a.id AS a_id, b_1.id AS b_1_id, b_1.a_id AS b_1_a_id, b_1.date AS b_1_date 
    FROM a 
    LEFT OUTER JOIN b AS b_1 ON 
        a.id = b_1.a_id AND 
        b_1.date = (
             SELECT max(b_1.date) AS max_1 FROM b AS b_1 
             WHERE b_1.a_id = a.id
            )

We can build SQL with a nicer query plan by instead using contains_eager() in conjunction with explicit usage of join():

subq = s.query(B.a_id, func.max(B.date).label('date')).group_by(B.a_id).subquery()
for obj in s.query(A).join(A.bs).\
                join(subq, A.bs).\
                filter(subq.c.date==B.date).options(contains_eager(A.bs)):
    print obj.bs

Producing instead of a correlated subquery in an ON clause, two separate targets to JOIN on. The set intersection is also easier to see here:

SELECT b.id AS b_id, b.a_id AS b_a_id, b.date AS b_date, a.id AS a_id 
    FROM a 
    JOIN b ON a.id = b.a_id 
    JOIN (
          SELECT b.a_id AS a_id, max(b.date) AS date 
              FROM b GROUP BY b.a_id
        ) AS anon_1 ON a.id = anon_1.a_id 
    WHERE anon_1.date = b.date

Updated