bind param replacement in join_condition() doesn't work for heterogeneous types

Issue #3530 resolved
Mike Bayer repo owner created an issue
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()


class CastToIntegerType(TypeDecorator):
    impl = String

    def column_expression(self, col):
        return cast(col, Integer)

    def bind_expression(self,col):
        return cast(col, String)

class Person(Base):
    __tablename__ = 'person'
    id = Column('id_string', CastToIntegerType, primary_key=True)

    pets = relationship('Pets',
        primaryjoin='foreign(Pets.person_id)==Person.id')

class Pets(Base):
    __tablename__ = 'pets'
    id = Column('id', Integer, primary_key=True)
    person_id = Column('person_id', Integer, ForeignKey('person.id_string'), primary_key=True)


e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)

s = Session(e)
s.add_all([Person(id="5", pets=[Pets(id="1")])])
s.commit()

p1 = s.query(Person).first()
p1.pets

generates:

SELECT pets.id AS pets_id, pets.person_id AS pets_person_id 
FROM pets 
WHERE pets.person_id = CAST(? AS VARCHAR)

should generate:

SELECT pets.id AS pets_id, pets.person_id AS pets_person_id 
FROM pets 
WHERE pets.person_id = ?

that is, inside of create_lazy_clause()->col_to_bind() we are taking the type of the column itself when we create the bindparam but not taking into account the thing we're comparing to.

this would fix that:

diff --git a/lib/sqlalchemy/orm/relationships.py b/lib/sqlalchemy/orm/relationships.py
index 552ce8b..6ac3ac0 100644
--- a/lib/sqlalchemy/orm/relationships.py
+++ b/lib/sqlalchemy/orm/relationships.py
@@ -2782,6 +2782,7 @@ class JoinCondition(object):
     def create_lazy_clause(self, reverse_direction=False):
         binds = util.column_dict()
         equated_columns = util.column_dict()
+        types = util.column_dict()

         has_secondary = self.secondaryjoin is not None

@@ -2790,12 +2791,15 @@ class JoinCondition(object):
             for l, r in self.local_remote_pairs:
                 lookup[l].append((l, r))
                 equated_columns[r] = l
+                types[l] = r.type
         elif not reverse_direction:
             for l, r in self.local_remote_pairs:
                 equated_columns[r] = l
+                types[l] = r.type
         else:
             for l, r in self.local_remote_pairs:
                 equated_columns[l] = r
+                types[r] = l.type

         def col_to_bind(col):

@@ -2808,7 +2812,8 @@ class JoinCondition(object):
             ):
                 if col not in binds:
                     binds[col] = sql.bindparam(
-                        None, None, type_=col.type, unique=True)
+                        None, None,
+                        type_=types.get(col, col.type), unique=True)
                 return binds[col]
             return None

Comments (9)

  1. Mike Bayer reporter

    but...this fix assumes that the datatype on the "bind" side deals with a Python type that is compatible with the "column" side..... although, if Pets and Person had Integer / String as the Python-facing types, then you'd be putting CAST() into the primaryjoin....I guess.

  2. Mike Bayer reporter

    what we're trying to avoid is the double CAST.

    With this mapping:

    class Person(Base):
        __tablename__ = 'person'
        id = Column('id_string', CastToIntegerType, primary_key=True)
    
        pets = relationship('Pets',
            primaryjoin="foreign(Pets.person_id) == cast(Person.id, Integer)")
    

    we get for lazyload:

    WHERE pets.person_id = CAST(CAST(%(param_1)s AS VARCHAR) AS INTEGER)
    

    for the joinedload we get:

    LEFT OUTER JOIN pets AS pets_1 ON pets_1.person_id = CAST(person.id_string AS INTEGER)
    

    so...double CAST, but it's not clear we really want to be changing anything here. CAST at compile time could perhaps collapse itself but that seems risky.

  3. Mike Bayer reporter

    here's a more pure test where the patch is needed:

    from sqlalchemy import *
    from sqlalchemy.orm import *
    from sqlalchemy.ext.declarative import declarative_base
    
    Base = declarative_base()
    
    
    class GoofyA(TypeDecorator):
        impl = String
    
        def process_bind_param(self, value, dialect):
            return "PREFIX_A:" + value
    
        def process_result_value(self, value, dialect):
            return value[9:]
    
    
    class GoofyB(TypeDecorator):
        impl = String
    
        def process_bind_param(self, value, dialect):
            return "PREFIX_B:" + value
    
        def process_result_value(self, value, dialect):
            return value[9:]
    
    
    class A(Base):
        __tablename__ = 'a'
        id = Column(GoofyA, primary_key=True)
        bs = relationship("B")
    
    
    class B(Base):
        __tablename__ = 'b'
        id = Column(Integer, primary_key=True)
        a_id = Column(GoofyB, ForeignKey('a.id'))
    
    e = create_engine("sqlite://", echo=True)
    Base.metadata.create_all(e)
    
    
    s = Session(e)
    
    s.add(A(id='a1', bs=[B()]))
    s.commit()
    
    
    a1 = s.query(A).first()
    print a1.bs
    
  4. Mike Bayer reporter

    so, hypothesis, if a primaryjoin has "a == b" inside of it, while a and b can have different SQL representations, the Python type has to be the same. Otherwise, even persistence doesn't work, as it fails with these two types:

    class GoofyA(TypeDecorator):
        impl = String
    
        def process_bind_param(self, value, dialect):
            return "PREFIX_A:%d" % (int(value) * 10)
    
        def process_result_value(self, value, dialect):
            return int(value[9:]) / 10
    
    
    class GoofyB(TypeDecorator):
        impl = String
    
        def process_bind_param(self, value, dialect):
            return "PREFIX_B:" + value.capitalize()
    
        def process_result_value(self, value, dialect):
            return value[9:].lower()
    

    because the a.id type as produced in Python can't be sent to B.a_id, it isn't compatible.

    so this patch might be good to go.

  5. Mike Bayer reporter

    BANG. Here's where it breaks. Great, I knew I wasn't crazy:

    from sqlalchemy import *
    from sqlalchemy.orm import *
    from sqlalchemy.ext.declarative import declarative_base
    
    from sqlalchemy.dialects.postgresql import INET, CIDR
    
    Base = declarative_base()
    
    
    class InetAddress(object):
        def __init__(self, element):
            self.inet_element = element
    
    
    class CidrAddress(object):
        def __init__(self, element):
            self.cidr_element = element
    
    
    class Inet(TypeDecorator):
        impl = INET
    
        def process_bind_param(self, value, dialect):
            assert isinstance(value, InetAddress)
            return value.inet_element
    
        def process_result_value(self, value, dialect):
            return InetAddress(value)
    
    
    class Cidr(TypeDecorator):
        impl = CIDR
    
        def process_bind_param(self, value, dialect):
            assert isinstance(value, CidrAddress)
            return value.cidr_element
    
        def process_result_value(self, value, dialect):
            return CidrAddress(value)
    
    
    class IPA(Base):
        __tablename__ = 'ip_address'
    
        id = Column(Integer, primary_key=True)
        v4address = Column(Inet)
    
        network = relationship(
            "Network",
            primaryjoin="IPA.v4address.op('<<', is_comparison=True)"
                        "(foreign(Network.v4representation))",
                        viewonly=True)
    
    
    class Network(Base):
        __tablename__ = 'network'
    
        id = Column(Integer, primary_key=True)
        v4representation = Column(Cidr)
    
    
    e = create_engine("postgresql://scott:tiger@localhost/test", echo=True)
    Base.metadata.drop_all(e)
    Base.metadata.create_all(e)
    
    s = Session(e)
    s.add_all([
        IPA(v4address=InetAddress("192.168.1.2")),
        Network(v4representation=CidrAddress("192.168.1.0/24"))])
    s.commit()
    
    
    ip1 = s.query(IPA).first()
    print ip1.network
    

    that is, both types have an entirely different Python representation because the SQL expression in fact is a comparison between two different types. This is valid. So. What's the solution to the original issue?

  6. Mike Bayer reporter

    works for me - the code right now is correct.

    This is the best way to do the original mapping / type:

    class CastToIntegerType(TypeDecorator):
        impl = String
    
        def column_expression(self, col):
            return cast(col, Integer)
    
        def process_bind_param(self, value, dialect):
            return str(value)
    
    
    class Person(Base):
        __tablename__ = 'person'
        id = Column('id_string', CastToIntegerType, primary_key=True)
    
        pets = relationship(
            'Pets',
            primaryjoin='foreign(Pets.person_id)==cast(Person.id, Integer)')
    

    we keep the Python conversion of int to string in Python, then cast in the primaryjoin. That way lazyload gives us:

    SELECT pets.id AS pets_id, pets.person_id AS pets_person_id
    FROM pets
    WHERE pets.person_id = CAST(%(param_1)s AS INTEGER)
    2015-09-16 14:37:00,449 INFO sqlalchemy.engine.base.Engine {'param_1': '5'}
    

    joinedload gives us:

    SELECT CAST(anon_1.person_id_string AS INTEGER) AS anon_1_person_id_string, pets_1.id AS pets_1_id, pets_1.person_id AS pets_1_person_id
    FROM (SELECT person.id_string AS person_id_string
    FROM person
     LIMIT %(param_1)s) AS anon_1 LEFT OUTER JOIN pets AS pets_1 ON pets_1.person_id = CAST(anon_1.person_id_string AS INTEGER)
    

    so no double cast, and also the fact that we have string coersion still happening in the lazyload is appropriate, as the type converters take place the same way as they would in the joined load.

  7. Log in to comment