bind param replacement in join_condition() doesn't work for heterogeneous types
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class CastToIntegerType(TypeDecorator):
impl = String
def column_expression(self, col):
return cast(col, Integer)
def bind_expression(self,col):
return cast(col, String)
class Person(Base):
__tablename__ = 'person'
id = Column('id_string', CastToIntegerType, primary_key=True)
pets = relationship('Pets',
primaryjoin='foreign(Pets.person_id)==Person.id')
class Pets(Base):
__tablename__ = 'pets'
id = Column('id', Integer, primary_key=True)
person_id = Column('person_id', Integer, ForeignKey('person.id_string'), primary_key=True)
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
s = Session(e)
s.add_all([Person(id="5", pets=[Pets(id="1")])])
s.commit()
p1 = s.query(Person).first()
p1.pets
generates:
SELECT pets.id AS pets_id, pets.person_id AS pets_person_id
FROM pets
WHERE pets.person_id = CAST(? AS VARCHAR)
should generate:
SELECT pets.id AS pets_id, pets.person_id AS pets_person_id
FROM pets
WHERE pets.person_id = ?
that is, inside of create_lazy_clause()->col_to_bind() we are taking the type of the column itself when we create the bindparam but not taking into account the thing we're comparing to.
this would fix that:
diff --git a/lib/sqlalchemy/orm/relationships.py b/lib/sqlalchemy/orm/relationships.py
index 552ce8b..6ac3ac0 100644
--- a/lib/sqlalchemy/orm/relationships.py
+++ b/lib/sqlalchemy/orm/relationships.py
@@ -2782,6 +2782,7 @@ class JoinCondition(object):
def create_lazy_clause(self, reverse_direction=False):
binds = util.column_dict()
equated_columns = util.column_dict()
+ types = util.column_dict()
has_secondary = self.secondaryjoin is not None
@@ -2790,12 +2791,15 @@ class JoinCondition(object):
for l, r in self.local_remote_pairs:
lookup[l].append((l, r))
equated_columns[r] = l
+ types[l] = r.type
elif not reverse_direction:
for l, r in self.local_remote_pairs:
equated_columns[r] = l
+ types[l] = r.type
else:
for l, r in self.local_remote_pairs:
equated_columns[l] = r
+ types[r] = l.type
def col_to_bind(col):
@@ -2808,7 +2812,8 @@ class JoinCondition(object):
):
if col not in binds:
binds[col] = sql.bindparam(
- None, None, type_=col.type, unique=True)
+ None, None,
+ type_=types.get(col, col.type), unique=True)
return binds[col]
return None
Comments (9)
-
reporter -
reporter - changed milestone to 1.1
- edited description
compared to
#3531this fix is probably the more localized one, easier to test -
reporter but...this fix assumes that the datatype on the "bind" side deals with a Python type that is compatible with the "column" side..... although, if Pets and Person had Integer / String as the Python-facing types, then you'd be putting CAST() into the primaryjoin....I guess.
-
reporter what we're trying to avoid is the double CAST.
With this mapping:
class Person(Base): __tablename__ = 'person' id = Column('id_string', CastToIntegerType, primary_key=True) pets = relationship('Pets', primaryjoin="foreign(Pets.person_id) == cast(Person.id, Integer)")
we get for lazyload:
WHERE pets.person_id = CAST(CAST(%(param_1)s AS VARCHAR) AS INTEGER)
for the joinedload we get:
LEFT OUTER JOIN pets AS pets_1 ON pets_1.person_id = CAST(person.id_string AS INTEGER)
so...double CAST, but it's not clear we really want to be changing anything here. CAST at compile time could perhaps collapse itself but that seems risky.
-
reporter here's a more pure test where the patch is needed:
from sqlalchemy import * from sqlalchemy.orm import * from sqlalchemy.ext.declarative import declarative_base Base = declarative_base() class GoofyA(TypeDecorator): impl = String def process_bind_param(self, value, dialect): return "PREFIX_A:" + value def process_result_value(self, value, dialect): return value[9:] class GoofyB(TypeDecorator): impl = String def process_bind_param(self, value, dialect): return "PREFIX_B:" + value def process_result_value(self, value, dialect): return value[9:] class A(Base): __tablename__ = 'a' id = Column(GoofyA, primary_key=True) bs = relationship("B") class B(Base): __tablename__ = 'b' id = Column(Integer, primary_key=True) a_id = Column(GoofyB, ForeignKey('a.id')) e = create_engine("sqlite://", echo=True) Base.metadata.create_all(e) s = Session(e) s.add(A(id='a1', bs=[B()])) s.commit() a1 = s.query(A).first() print a1.bs
-
reporter so, hypothesis, if a primaryjoin has "a == b" inside of it, while a and b can have different SQL representations, the Python type has to be the same. Otherwise, even persistence doesn't work, as it fails with these two types:
class GoofyA(TypeDecorator): impl = String def process_bind_param(self, value, dialect): return "PREFIX_A:%d" % (int(value) * 10) def process_result_value(self, value, dialect): return int(value[9:]) / 10 class GoofyB(TypeDecorator): impl = String def process_bind_param(self, value, dialect): return "PREFIX_B:" + value.capitalize() def process_result_value(self, value, dialect): return value[9:].lower()
because the a.id type as produced in Python can't be sent to B.a_id, it isn't compatible.
so this patch might be good to go.
-
reporter BANG. Here's where it breaks. Great, I knew I wasn't crazy:
from sqlalchemy import * from sqlalchemy.orm import * from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.dialects.postgresql import INET, CIDR Base = declarative_base() class InetAddress(object): def __init__(self, element): self.inet_element = element class CidrAddress(object): def __init__(self, element): self.cidr_element = element class Inet(TypeDecorator): impl = INET def process_bind_param(self, value, dialect): assert isinstance(value, InetAddress) return value.inet_element def process_result_value(self, value, dialect): return InetAddress(value) class Cidr(TypeDecorator): impl = CIDR def process_bind_param(self, value, dialect): assert isinstance(value, CidrAddress) return value.cidr_element def process_result_value(self, value, dialect): return CidrAddress(value) class IPA(Base): __tablename__ = 'ip_address' id = Column(Integer, primary_key=True) v4address = Column(Inet) network = relationship( "Network", primaryjoin="IPA.v4address.op('<<', is_comparison=True)" "(foreign(Network.v4representation))", viewonly=True) class Network(Base): __tablename__ = 'network' id = Column(Integer, primary_key=True) v4representation = Column(Cidr) e = create_engine("postgresql://scott:tiger@localhost/test", echo=True) Base.metadata.drop_all(e) Base.metadata.create_all(e) s = Session(e) s.add_all([ IPA(v4address=InetAddress("192.168.1.2")), Network(v4representation=CidrAddress("192.168.1.0/24"))]) s.commit() ip1 = s.query(IPA).first() print ip1.network
that is, both types have an entirely different Python representation because the SQL expression in fact is a comparison between two different types. This is valid. So. What's the solution to the original issue?
-
reporter what works is LateTypeCoerce in
#3531. so we are back to the beginning. -
reporter - changed status to resolved
works for me - the code right now is correct.
This is the best way to do the original mapping / type:
class CastToIntegerType(TypeDecorator): impl = String def column_expression(self, col): return cast(col, Integer) def process_bind_param(self, value, dialect): return str(value) class Person(Base): __tablename__ = 'person' id = Column('id_string', CastToIntegerType, primary_key=True) pets = relationship( 'Pets', primaryjoin='foreign(Pets.person_id)==cast(Person.id, Integer)')
we keep the Python conversion of int to string in Python, then cast in the primaryjoin. That way lazyload gives us:
SELECT pets.id AS pets_id, pets.person_id AS pets_person_id FROM pets WHERE pets.person_id = CAST(%(param_1)s AS INTEGER) 2015-09-16 14:37:00,449 INFO sqlalchemy.engine.base.Engine {'param_1': '5'}
joinedload gives us:
SELECT CAST(anon_1.person_id_string AS INTEGER) AS anon_1_person_id_string, pets_1.id AS pets_1_id, pets_1.person_id AS pets_1_person_id FROM (SELECT person.id_string AS person_id_string FROM person LIMIT %(param_1)s) AS anon_1 LEFT OUTER JOIN pets AS pets_1 ON pets_1.person_id = CAST(anon_1.person_id_string AS INTEGER)
so no double cast, and also the fact that we have string coersion still happening in the lazyload is appropriate, as the type converters take place the same way as they would in the joined load.
- Log in to comment
see
#3531for a different bug in a variant of this test case. either issue would allow this to work.