find cycles algorithm needs to be exhaustive

Issue #2282 resolved
Mike Bayer repo owner created an issue

here's a test where the find cycles result changes each time. When it fails to find all cycles, some nodes don't get counted and an assertion fails. If we tinker around with removing the assertion and adding it instead to the conditional I can get a PG flush to break randomly.

#!/usr/bin/env python

from sqlalchemy import create_engine, Integer, ForeignKey, Column, __version__
from sqlalchemy.orm import relationship, sessionmaker
from sqlalchemy.schema import MetaData
from sqlalchemy.ext.declarative import declarative_base

print __version__

metadata = MetaData()
Model = declarative_base(metadata=metadata)


class First(Model):
    __tablename__ = 'First'
    id = Column(Integer, primary_key=True)
    second_id = Column(Integer, ForeignKey('Second.id', use_alter=True,
                                           name='first_second_fkey'),
                       unique=True)
    second = relationship('Second')


class Second(Model):
    __tablename__ = 'Second'
    id = Column(Integer, primary_key=True)

    third_id = Column(ForeignKey('Third.id', ), nullable=False)
    third = relationship('Third', backref='seconds')
    fourth_id = Column(ForeignKey('Fourth.id'))
    fourth = relationship('Fourth')


class Third(Model):
    __tablename__ = 'Third'
    id = Column(Integer, primary_key=True)
    first_id = Column(ForeignKey('First.id'), nullable=False)
    first = relationship('First', )


class Fourth(Model):
    __tablename__ = 'Fourth'
    id = Column(Integer, primary_key=True)
    first_id = Column(ForeignKey('First.id'))
    first = relationship('First')

def create_instances(db):
    first = First()
    third = Third(first=first)
    second = Second(third=third)
    fourth = Fourth(first=first)
    db.add_all([second, third, fourth](first,))
    db.flush()

    assert db.execute(First.__table__.select()).first() == (1, None)
    assert db.execute(Second.__table__.select()).first() == (1, 1, None)
    assert db.execute(Third.__table__.select()).first() == (1, 1)
    assert db.execute(Fourth.__table__.select()).first() == (1, 1)


def test(metadata):
    engine = create_engine('postgresql://scott:tiger@localhost/test', echo=True)
    #engine = create_engine('sqlite:///:memory:', echo=True)
    conn = engine.connect()
    trans = conn.begin()
    try:
        metadata.create_all(conn)
        Session = sessionmaker(autoflush=False, bind=conn)
        db = Session()
        create_instances(db)
    finally:
        trans.rollback()
    db.close()
    conn.close()

for i in xrange(10):
    test(metadata)

its really tough to trip this one with the usual "randomize" approach too for some reason, you need to run it multiple times. find_cycles doesn't come up with the same result each time - depending on which node it starts with, it pops that node from the "todo" list, then another cycle which might involve it fails to be located. This patch so far seems to fix it by making it do the whole cycle pass for every node:

--- a/lib/sqlalchemy/util/topological.py    Thu Sep 22 11:55:15 2011 -0400
+++ b/lib/sqlalchemy/util/topological.py    Thu Sep 22 18:32:59 2011 -0400
@@ -56,20 +56,22 @@

     output = set()

-    while todo:
-        node = todo.pop()
+    for node in todo:
+        items = todo.difference([node](node))
+#    while todo:
+#        node = todo.pop()
         stack = [node](node)
         while stack:
             top = stack[-1](-1)
             for node in edges[top](top):
                 if node in stack:
                     cyc = stack[stack.index(node):](stack.index(node):)
-                    todo.difference_update(cyc)
+                    items.difference_update(cyc)
                     output.update(cyc)

-                if node in todo:
+                if node in items:
                     stack.append(node)
-                    todo.remove(node)
+                    items.remove(node)
                     break
             else:
                 node = stack.pop()

that patch in turn causes one of the cycles tests in test_dependency to add an extra node to the result.

Comments (3)

  1. Mike Bayer reporter

    Um OK the randomize thing didn't work because i had it pointing to a dead topological.pyc file in my checkout that's been removed since april....carry on..

  2. Log in to comment