fine grained eager load support

Issue #777 resolved
Mike Bayer repo owner created an issue
from sqlalchemy import *
from sqlalchemy.orm import *

engine = create_engine('sqlite:///:memory:', echo=True)

metadata = MetaData()
accounts_table = Table('accounts', metadata,
    Column('account_id', Integer, primary_key=True),
    Column('name', String(40)),
)
transactions_table = Table('transactions', metadata,
    Column('transaction_id', Integer, primary_key=True),
    Column('name', String(40)),
)
entries_table = Table('entries', metadata,
    Column('entry_id', Integer, primary_key=True),
    Column('name', String(40)),
    Column('account_id', Integer, ForeignKey(accounts_table.c.account_id)),
    Column('transaction_id', Integer, ForeignKey(transactions_table.c.transaction_id)),
)
metadata.create_all(engine)

class Account(object):
    def __init__(self, name):
        self.name = name
    def __repr__(self):
        return "<Account(%r)>" % self.name

class Transaction(object):
    def __init__(self, name):
        self.name = name
    def __repr__(self):
        return "<Transaction(%r)>" % self.name

class Entry(object):
    def __init__(self, name):
        self.name = name
    def __repr__(self):
        return "<Entry(%r)>" % self.name

mapper(Account, accounts_table)
mapper(Transaction, transactions_table)
mapper(Entry, entries_table, properties = dict(
    account = relation(Account, uselist=False, backref=backref('entries', lazy=True)),
    transaction = relation(Transaction, uselist=False, backref=backref('entries', lazy=False, join_depth=3)),
))

Session = sessionmaker(bind=engine, autoflush=True, transactional=True)
session = Session()

# Account 1 with its entries
acc1 = Account('acc1')
ent11 = Entry('ent11')
ent12 = Entry('ent12')
acc1.entries.append(ent11)
acc1.entries.append(ent12)

# Account 2 with its entries
acc2 = Account('acc2')
ent21 = Entry('ent21')
ent22 = Entry('ent22')
acc2.entries.append(ent21)
acc2.entries.append(ent22)

# Make connection between both accounts with transactions
tx1 = Transaction('tx1')
tx1.entries.append(ent11)
tx1.entries.append(ent21)

tx2 = Transaction('tx2')
tx2.entries.append(ent12)
tx2.entries.append(ent22)

session.save(acc1)
session.flush()
session.clear()

# I want to retrieve in ONE select all transactions engaged with acc1 and the account.name of each entry:
acc = session.query(Account).options(eagerload_all('entries.transaction.entries.account')).first()
assert acc.name == 'acc1'
assert acc.entries[0](0).transaction.entries[0](0).account.name == 'acc1'  # no new SELECT, that's fine.
assert acc.entries[0](0).transaction.entries[1](1).account.name == 'acc2'  # <== execute a new SELECT to retrieve all entries of acc2 (I just want 'acc2')

Comments (3)

  1. Mike Bayer reporter

    to do this requires another major refactoring of the mechanics of query/mapper/interfaces/strategies, which builds upon the major refactorings we've done so far in 0.4. We've gotten the idea of a "path" introduced using the LoaderStack, where EagerLoader can track its current position against an AliasedClauses object and the correct row decorator to use. this concept has to be expanded into the StrategizedProperty and PropertyOption classes so that mapper options are keyed by path and not the actual property instance. Also, the "path" tracked by AliasedClauses should be replaced by one consistent "path building" concept, which occurs both within setup()/setup_query() as well as _instance()/populate_instance(). The idea of hanging attributes and options off a particular path should be strongly implemented.

    this fragment of a patch, which changes just a small number of the actual amount of code that would have to change, suggests the direction to go:

    Index: lib/sqlalchemy/orm/interfaces.py
    ===================================================================
    --- lib/sqlalchemy/orm/interfaces.py    (revision 3507)
    +++ lib/sqlalchemy/orm/interfaces.py    (working copy)
    @@ -431,7 +431,7 @@
         """
    
         def _get_context_strategy(self, context):
    -        return self._get_strategy(context.attributes.get(("loaderstrategy", self), self.strategy.__class__))
    +        return self._get_strategy(context.attributes.get(("loaderstrategy", context.path), self.strategy.__class__))
    
         def _get_strategy(self, cls):
             try:
    @@ -457,35 +457,6 @@
             if self.is_primary():
                 self.strategy.init_class_attribute()
    
    -class LoaderStack(object):
    -    """a stack object used during load operations to track the 
    -    current position among a chain of mappers to eager loaders."""
    -    
    -    def __init__(self):
    -        self.__stack = [       
    -    def push_property(self, key):
    -        self.__stack.append(key)
    -        return tuple(self.__stack)
    -        
    -    def push_mapper(self, mapper):
    -        self.__stack.append(mapper.base_mapper)
    -        return tuple(self.__stack)
    -        
    -    def pop(self):
    -        self.__stack.pop()
    -        
    -    def snapshot(self):
    -        """return an 'snapshot' of this stack.
    -        
    -        this is a tuple form of the stack which can be used as a hash key.
    -        """
    -        
    -        return tuple(self.__stack)
    -        
    -    def __str__(self):
    -        return "->".join([str(s) for s in self.__stack](]
    -))
    -        
     class OperationContext(object):
         """Serve as a context during a query construction or instance
         loading operation.
    @@ -501,9 +472,24 @@
             for opt in util.flatten_iterator(options):
                 self.accept_option(opt)
    
    +    def push_property(self, key):
    +        self.path = self.path + (key)
    +        
    +    def push_mapper(self, mapper):
    +        self.path = self.path + (mapper)
    +        
    +    def pop(self):
    +        self.path = tuple(self.path[-1](-1))
    +
         def accept_option(self, opt):
             pass
    -
    +    
    +    def get_path_attribute(self, key):
    +        return self.attributes.get((self.path, key))
    +    
    +    def set_path_attribute(self, key, value):
    +        self.attributes[key)]((self.path,) = value
    +        
     class MapperOption(object):
         """Describe a modification to an OperationContext or Query."""
    
    @@ -580,10 +566,12 @@
             except AttributeError:
                 l = [            mapper = context.mapper
    +            l.append(mapper)
                 for token in self.key.split('.'):
                     prop = mapper.get_property(token, resolve_synonyms=True)
    -                l.append(prop)
    +                l.append(prop.key)
                     mapper = getattr(prop, 'mapper', None)
    +                l.append(mapper)
                 self.__prop = l
             return l
    
    Index: lib/sqlalchemy/orm/mapper.py
    ===================================================================
    --- lib/sqlalchemy/orm/mapper.py        (revision 3507)
    +++ lib/sqlalchemy/orm/mapper.py        (working copy)
    @@ -1478,13 +1478,13 @@
         def populate_instance(self, selectcontext, instance, row, ispostselect=None, isnew=False, **flags):
             """populate an instance from a result row."""
    
    -        snapshot = selectcontext.stack.push_mapper(self)
    +        selectcontext.push_mapper(self)
             # retrieve a set of "row population" functions derived from the MapperProperties attached
             # to this Mapper.  These are keyed in the select context based primarily off the 
             # "snapshot" of the stack, which represents a path from the lead mapper in the query to this one,
             # including relation() names.  the key also includes "self", and allows us to distinguish between
             # other mappers within our inheritance hierarchy
    -        populators = selectcontext.attributes.get(((isnew or ispostselect) and 'new_populators' or 'existing_populators', self, snapshot, ispostselect), None)
    +        populators = selectcontext.get_path_attribute(((isnew or ispostselect) and 'new_populators' or 'existing_populators', ispostselect), None)
             if populators is None:
                 # no populators; therefore this is the first time we are receiving a row for
                 # this result set.  issue create_row_processor() on all MapperProperty objects
    @@ -1505,8 +1505,8 @@
                 if poly_select_loader is not None:
                     post_processors.append(poly_select_loader)
    
    -            selectcontext.attributes[('new_populators', self, snapshot, ispostselect)](]
    ) = new_populators
    -            selectcontext.attributes[self, snapshot, ispostselect)](('existing_populators',) = existing_populators
    +            selectcontext.set_path_attribute(('new_populators', ispostselect), new_populators)
    +            selectcontext.set_path_attribute(('existing_populators', ispostselect), existing_populators)
                 selectcontext.attributes[self, ispostselect)](('post_processors',) = post_processors
                 if isnew or ispostselect:
                     populators = new_populators
    @@ -1516,7 +1516,7 @@
             for p in populators:
                 p(instance, row, ispostselect=ispostselect, isnew=isnew, **flags)
    
    -        selectcontext.stack.pop()
    +        selectcontext.pop()
    
             if self.non_primary:
                 selectcontext.attributes[instance)](('populating_mapper',) = self
    
  2. Log in to comment