Commits

Michael Granger committed 92cc211

Honor Ruby's default_internal encoding when connecting via the synchronous interface; fixes #33

Comments (0)

Files changed (2)

  * UTILITY FUNCTIONS
  **************************************************************************/
 
-static void free_pgconn(PGconn *);
-static void pgresult_check(VALUE, VALUE);
-
-static PGconn *get_pgconn(VALUE self);
-static VALUE pgconn_finish(VALUE self);
-static VALUE pgresult_clear(VALUE self);
-static VALUE pgresult_aref(VALUE self, VALUE index);
-static VALUE make_column_result_array( VALUE self, int col );
+static void free_pgconn( PGconn * );
+static void pgresult_check( VALUE, VALUE );
+
+static PGconn *get_pgconn( VALUE );
+static VALUE pgconn_finish( VALUE );
+static VALUE pgresult_clear( VALUE );
+static VALUE pgresult_aref( VALUE, VALUE );
+static VALUE make_column_result_array( VALUE, int );
 
 #ifdef M17N_SUPPORTED
-# define ASSOCIATE_INDEX(obj, index_holder) rb_enc_associate_index((obj), enc_get_index((index_holder)))
-static rb_encoding * pgconn_get_client_encoding_as_rb_encoding(PGconn* conn);
-static int enc_get_index(VALUE val);
+# define ASSOCIATE_INDEX( obj, index_holder ) rb_enc_associate_index((obj), enc_get_index((index_holder)))
+static rb_encoding * pgconn_get_client_encoding_as_rb_encoding( PGconn * );
+static const char *  pgconn_get_rb_encoding_as_pg_encname( rb_encoding * );
+static int enc_get_index( VALUE );
 #else
-# define ASSOCIATE_INDEX(obj, index_holder) /* nothing */
+# define ASSOCIATE_INDEX( obj, index_holder ) /* nothing */
 #endif
 
 static PQnoticeReceiver default_notice_receiver = NULL;
  *   # As an Array
  *   PGconn.connect( nil, 5432, nil, nil, 'test', nil, nil )
  *  
- * On failure, it raises a PGError.
+ * If the Ruby default internal encoding is set (i.e., Encoding.default_internal != nil), the
+ * connection will have its +client_encoding+ set accordingly.
+ * 
+ * @raises [PGError] if the connection fails.
  */
 static VALUE
 pgconn_init(int argc, VALUE *argv, VALUE self)
 	PGconn *conn = NULL;
 	VALUE conninfo;
 	VALUE error;
+#ifdef M17N_SUPPORTED	
+	rb_encoding *enc;
+	const char *encname;
+#endif
 
 	conninfo = parse_connect_args(argc, argv, self);
 	conn = PQconnectdb(StringValuePtr(conninfo));
 		rb_exc_raise(error);
 	}
 
+#ifdef M17N_SUPPORTED
+	/* If Ruby has its Encoding.default_internal set, set PostgreSQL's client_encoding 
+	 * to match */
+	if (( enc = rb_default_internal_encoding() )) {
+		encname = pgconn_get_rb_encoding_as_pg_encname( enc );
+		if ( PQsetClientEncoding(conn, encname) != 0 )
+			rb_warn( "Failed to set the default_internal encoding to %s: '%s'",
+			         encname, PQerrorMessage(conn) );
+	}
+#endif
+
 	if (rb_block_given_p()) {
 		return rb_ensure(rb_yield, self, pgconn_finish, self);
 	}
  * This is an asynchronous version of PGconn.connect().
  *
  * Use PGconn#connect_poll to poll the status of the connection.
+ *
+ * NOTE: this does *not* set the connection's +client_encoding+ for you if 
+ * Encoding.default_internal is set. To set it after the connection is established, 
+ * call PGconn#internal_encoding=. You can also set it automatically by setting 
+ * ENV['PGCLIENTENCODING'], or include the 'options' connection parameter.
+ * 
  */
 static VALUE
 pgconn_s_connect_start(int argc, VALUE *argv, VALUE self)
 	VALUE ary = rb_ary_new();
 	VALUE hash;
 	int i = 0;
-	
+
 	for(i = 0; options[i].keyword != NULL; i++) {
 		hash = rb_hash_new();
 		if(options[i].keyword)
 {
 	char *encrypted = NULL;
 	VALUE rval = Qnil;
-	
+
 	Check_Type(password, T_STRING);
 	Check_Type(username, T_STRING);
 
 		else
 			paramFormats[i] = NUM2INT(param_format);
 	}
-	
+
 	result = PQexecParams(conn, StringValuePtr(command), nParams, paramTypes, 
 		(const char * const *)paramValues, paramLengths, paramFormats, resultFormat);
 
 		else
 			paramFormats[i] = NUM2INT(param_format);
 	}
-	
+
 	result = PQexecPrepared(conn, StringValuePtr(name), nParams, 
 		(const char * const *)paramValues, paramLengths, paramFormats, 
 		resultFormat);
 		else
 			paramFormats[i] = NUM2INT(param_format);
 	}
-	
+
 	result = PQsendQueryParams(conn, StringValuePtr(command), nParams, paramTypes, 
 		(const char * const *)paramValues, paramLengths, paramFormats, resultFormat);
 
 		else
 			paramFormats[i] = NUM2INT(param_format);
 	}
-	
+
 	result = PQsendQueryPrepared(conn, StringValuePtr(name), nParams, 
 		(const char * const *)paramValues, paramLengths, paramFormats, 
 		resultFormat);
 		rb_raise(rb_ePGError,"Invalid connection!");
 
 	ret = PQcancel(cancel, errbuf, 256);
-	if(ret == 1) 
+	if(ret == 1)
 		retval = Qnil;
 	else
 		retval = rb_str_new2(errbuf);
 	if (notify == NULL) {
 		return Qnil;
 	}
-	
+
 	hash = rb_hash_new();
 	relname = rb_tainted_str_new2(notify->relname);
 	be_pid = INT2NUM(notify->be_pid);
 	extra = rb_tainted_str_new2(PGNOTIFY_EXTRA(notify));
-	
+
 	rb_hash_aset(hash, sym_relname, relname);
 	rb_hash_aset(hash, sym_be_pid, be_pid);
 	rb_hash_aset(hash, sym_extra, extra);
 	} else if (ret < 0) {
 		rb_sys_fail(0);
 	}
-	
+
     if ( (ret = PQconsumeInput(conn)) != 1 ) {
 		rb_raise(rb_ePGError, "PQconsumeInput == %d: %s", ret, PQerrorMessage(conn));
 	}
-	
+
     while ((notify = PQnotifies(conn)) != NULL) {
         relname = rb_tainted_str_new2(notify->relname);
         be_pid = INT2NUM(notify->be_pid);
 	PGresult *result;
 	VALUE rb_pgresult;
 	int status;
-	
+
 	if (rb_block_given_p()) {
 		result = PQexec(conn, "BEGIN");
 		rb_pgresult = new_pgresult(result, conn);
 			pgresult_check(self, rb_pgresult);
 			rb_jump_tag(status);
 		}
-			
+
 	}
 	else {
 		/* no block supplied? */
 	 * double-quotes. */
 	char buffer[NAMEDATALEN*2+2];
 	unsigned int i=0,j=0;
-	
+
 	if(strlen(str) >= NAMEDATALEN) {
 		rb_raise(rb_eArgError, 
 			"Input string is longer than NAMEDATALEN-1 (%d)",
 
 	if( col_number < 0 || col_number >= PQnfields(pgresult)) 
 		rb_raise(rb_eArgError,"Invalid column index: %d", col_number);
-	
+
 	n = PQftablecol(pgresult, col_number);
 	return INT2FIX(n);
 }
 				PQgetlength(result, tuple_num, field_num));
 
 			/* associate client encoding for text format only */
-			if(0 == PQfformat(result, field_num)) { 
+			if(0 == PQfformat(result, field_num)) {
+				fflush( stdout );
 				ASSOCIATE_INDEX(val, self);
 			} else {
 #ifdef M17N_SUPPORTED
+				fflush( stdout );
 				rb_enc_associate(val, rb_ascii8bit_encoding());
 #endif
 			}
 
 	if ( fnum < 0 )
 		rb_raise( rb_eIndexError, "no such field '%s' in result", fieldname );
-	
+
 	return make_column_result_array( self, fnum );
 }
 
 	int row = PQntuples( result );
 	VALUE ary = rb_ary_new2( row );
 	VALUE val = Qnil;
-	
+
 	if ( col >= PQnfields(result) )
 		rb_raise( rb_eIndexError, "no column %d in result", col );
 
 
 		rb_ary_store( ary, row, val );
 	}
-	
+
 	return ary;
 }
 
  * The mapping from canonical encoding names in PostgreSQL to ones in Ruby.
  */
 static const char * const (enc_pg2ruby_mapping[][2]) = {
-	    {"BIG5",          "Big5"       },
-	    {"EUC_CN",        "GB2312"     },
-	    {"EUC_JP",        "EUC-JP"     },
-	    {"EUC_JIS_2004",  "EUC-JP"     },
-	    {"EUC_KR",        "EUC-KR"     },
-	    {"EUC_TW",        "EUC-TW"     },
-	    {"GB18030",       "GB18030"    },
-	    {"GBK",           "GBK"        },
-	    {"ISO_8859_5",    "ISO-8859-5" },
-	    {"ISO_8859_6",    "ISO-8859-6" },
-	    {"ISO_8859_7",    "ISO-8859-7" },
-	    {"ISO_8859_8",    "ISO-8859-8" },
-	    /* {"JOHAB",         "JOHAB"     }, dummy */
-	    {"KOI8",          "KOI8-U"     },
-	    {"LATIN1",        "ISO-8859-1" },
-	    {"LATIN2",        "ISO-8859-2" },
-	    {"LATIN3",        "ISO-8859-3" },
-	    {"LATIN4",        "ISO-8859-4" },
-	    {"LATIN5",        "ISO-8859-5" },
-	    {"LATIN6",        "ISO-8859-6" },
-	    {"LATIN7",        "ISO-8859-7" },
-	    {"LATIN8",        "ISO-8859-8" },
-	    {"LATIN9",        "ISO-8859-9" },
+	    {"BIG5",          "Big5"        },
+	    {"EUC_CN",        "GB2312"      },
+	    {"EUC_JP",        "EUC-JP"      },
+	    {"EUC_JIS_2004",  "EUC-JP"      },
+	    {"EUC_KR",        "EUC-KR"      },
+	    {"EUC_TW",        "EUC-TW"      },
+	    {"GB18030",       "GB18030"     },
+	    {"GBK",           "GBK"         },
+	    {"ISO_8859_5",    "ISO-8859-5"  },
+	    {"ISO_8859_6",    "ISO-8859-6"  },
+	    {"ISO_8859_7",    "ISO-8859-7"  },
+	    {"ISO_8859_8",    "ISO-8859-8"  },
+	    /* {"JOHAB",         "JOHAB"       }, dummy */
+	    {"KOI8",          "KOI8-R"      },
+	    {"KOI8R",         "KOI8-R"      },
+	    {"KOI8U",         "KOI8-U"      },
+	    {"LATIN1",        "ISO-8859-1"  },
+	    {"LATIN2",        "ISO-8859-2"  },
+	    {"LATIN3",        "ISO-8859-3"  },
+	    {"LATIN4",        "ISO-8859-4"  },
+	    {"LATIN5",        "ISO-8859-5"  },
+	    {"LATIN6",        "ISO-8859-6"  },
+	    {"LATIN7",        "ISO-8859-7"  },
+	    {"LATIN8",        "ISO-8859-8"  },
+	    {"LATIN9",        "ISO-8859-9"  },
 	    {"LATIN10",       "ISO-8859-10" },
-	    {"MULE_INTERNAL", "Emacs-Mule" },
+	    {"MULE_INTERNAL", "Emacs-Mule"  },
 	    {"SJIS",          "Windows-31J" },
 	    {"SHIFT_JIS_2004","Windows-31J" },
-	    /*{"SQL_ASCII",     NULL        },  special case*/
+	    /* {"SQL_ASCII",     NULL          },  special case*/
 	    {"UHC",           "CP949"       },
 	    {"UTF8",          "UTF-8"       },
 	    {"WIN866",        "IBM866"      },
 	return enc;
 }
 
+
+/* 
+ * Returns the given rb_encoding as the equivalent PostgreSQL encoding string.
+ * 
+ */
+static const char *
+pgconn_get_rb_encoding_as_pg_encname( rb_encoding *enc )
+{
+	const char *rb_encname = rb_enc_name( enc );
+	const char *encname = NULL;
+	int i;
+
+	for (i = 0; i < sizeof(enc_pg2ruby_mapping)/sizeof(enc_pg2ruby_mapping[0]); ++i) {
+		if (strcmp(rb_encname, enc_pg2ruby_mapping[i][1]) == 0) {
+			encname = enc_pg2ruby_mapping[i][0];
+		}
+	}
+
+	if ( !encname ) encname = "SQL_ASCII";
+
+	return encname;
+}
+
+
 /*
  * call-seq:
  *   conn.internal_encoding() -> Encoding
 	rb_define_singleton_method(rb_cPGconn, "conndefaults", pgconn_s_conndefaults, 0);
 
 	/******     PGconn CLASS CONSTANTS: Connection Status     ******/
-	
+
 	/* Connection succeeded */
 	rb_define_const(rb_cPGconn, "CONNECTION_OK", INT2FIX(CONNECTION_OK));
 	/* Connection failed */
 	rb_define_const(rb_cPGconn, "PGRES_POLLING_OK", INT2FIX(PGRES_POLLING_OK));
 
 	/******     PGconn CLASS CONSTANTS: Transaction Status     ******/
-	
+
 	/* Transaction is currently idle (#transaction_status) */
 	rb_define_const(rb_cPGconn, "PQTRANS_IDLE", INT2FIX(PQTRANS_IDLE));
 	/* Transaction is currently active; query has been sent to the server, but not yet completed. (#transaction_status) */

spec/m17n_spec.rb

 					res = conn.exec( stmt, [], 0 )
 					out_string = res[0]['column1']
 				end
-				out_string.should == 'foo'.encode(Encoding::ASCII_8BIT)
+				out_string.should == 'foo'.encode( Encoding::ASCII_8BIT )
 				out_string.encoding.should == Encoding::ASCII_8BIT
 			end
 		end
 
-		it "should use client encoding for escaped string" do
-			original = "string to escape".force_encoding("euc-jp")
-			@conn.set_client_encoding("euc_jp")
-			escaped  = @conn.escape(original)
+		it "uses the client encoding for escaped string" do
+			original = "string to escape".force_encoding( "euc-jp" )
+			@conn.set_client_encoding( "euc_jp" )
+			escaped  = @conn.escape( original )
 			escaped.encoding.should == Encoding::EUC_JP
 		end
+	end
+
+	describe "Ruby 1.9.x default_internal encoding" do
+
+		it "honors the Encoding.default_internal if it's set and the synchronous interface is used" do
+			@conn.transaction do |txn_conn|
+				txn_conn.internal_encoding = Encoding::ISO8859_1
+				txn_conn.exec( "CREATE TABLE defaultinternaltest ( foo text )" )
+				txn_conn.exec( "INSERT INTO defaultinternaltest VALUES ('Grün und Weiß')" )
+			end
+
+			begin
+				prev_encoding = Encoding.default_internal
+				Encoding.default_internal = Encoding::UTF_8
+
+				conn = PGconn.connect( @conninfo )
+				conn.internal_encoding.should == Encoding::UTF_8
+				res = conn.exec( "SELECT foo FROM defaultinternaltest" )
+				res[0]['foo'].encoding.should == Encoding::UTF_8
+			ensure
+				conn.finish if conn
+				Encoding.default_internal = prev_encoding
+			end
+		end
 
 	end