super-self-referential m2m joins need to use annotations

Issue #3458 new
Mike Bayer repo owner created an issue

e.g. "node.id == node.id" essentially, one side is on the secondary, we need to make use of remote() annotations on "secondary" for this case. just need to re-review how the annotation/join thing works and add it here.

from sqlalchemy import (
    Column,
    Integer,
    and_,
    create_engine,
)
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import relationship
from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql import select


Base = declarative_base()


class Node(Base):
    __tablename__ = 'node'

    id = Column(Integer, primary_key=True, nullable=False)

    left = Column(Integer, nullable=False)
    right = Column(Integer, nullable=False)

    def __repr__(self):
        return "<Node {self.id}>".format(self=self)

_node_parent = Node.__table__.alias()
_node_child = Node.__table__.alias()
_node_bridge = (
    select([_node_child.c.id.label('child_id'), _node_parent.c.id.label('parent_id')])
    .where(and_(
        _node_parent.c.left < _node_child.c.left,
        _node_child.c.right < _node_parent.c.right,
    ))
    .order_by(_node_child.c.id, _node_parent.c.left)
    .distinct(_node_child.c.id).alias()
)
Node.parent = relationship(
    Node,
    secondary=_node_bridge,
    primaryjoin=Node.id == _node_bridge.c.child_id,
    secondaryjoin=_node_bridge.c.parent_id == Node.id,
    viewonly=True,
    uselist=False
)


engine = create_engine('postgresql://scott:tiger@localhost/test', echo=True)
Base.metadata.drop_all(bind=engine)
Base.metadata.create_all(bind=engine)
session = sessionmaker(bind=engine)()

session.query(Node).delete()

node1 = Node(id=1, left=1, right=4)
node2 = Node(id=2, left=2, right=3)
node3 = Node(id=3, left=5, right=6)
session.add_all([node1, node2, node3])
session.commit()


# Note that the bridge table itself has the correct results -- the only
# parent/child link is that 1 is the parent of 2.
# Output:
# (2, 1)
for row in session.execute(_node_bridge):
    print(row)


# PROBLEM 1
# Output:
# <Node 1> <Node 1>
# <Node 2> <Node 1>
# <Node 3> <Node 1>
# This is incorrect -- node 3 has no parent.
# Generated SQL looks like:
#   SELECT node.id AS node_id, node."left" AS node_left, node."right" AS node_right, node_1.id AS node_1_id, node_1."left" AS node_1_left, node_1."right" AS node_1_right
#   FROM node
#   LEFT OUTER JOIN (
#       (
#           SELECT DISTINCT ON (node_2.id) node_2.id AS child_id, node_3.id AS parent_id
#           FROM node AS node_2, node AS node_3
#           WHERE node_3."left" < node_2."left" AND node_2."right" < node_3."right"
#           ORDER BY node_2.id, node_3."left"
#       ) AS anon_1
#       -- why are we joining node a second time here?
#       JOIN node AS node_1 ON anon_1.parent_id = node_1.id
#   -- what on earth is up with this ON clause?
#   ) ON anon_1.child_id = anon_1.child_id
q = (
    session.query(Node)
    .options(
        joinedload(Node.parent)
    )
    .all()
)
for node in q:
    print(node, node.parent)

Comments (3)

  1. Log in to comment