Commits

ph...@9ae0c189-cd1f-4510-a509-f4891f5cf20d  committed 2513bb3

Modified Files:
LibXML.xs LibXML.pm dom.c dom.h
+ garbage collection problems with documents fixed.
+ encoding bugs fixed (the DOM is definitly in UTF-8 now)
+ encodeToUTF8()
+ decodeFromUTF8()

  • Participants
  • Parent commits 9d74424

Comments (0)

Files changed (4)

 use vars qw($VERSION @ISA @EXPORT);
 use Carp;
 
-$VERSION = "0.94";
+$VERSION = "0.92";
 require Exporter;
 require DynaLoader;
 
               XML_ENTITY_DECL
               XML_NAMESPACE_DECL
               XML_XINCLUDE_START
-              XML_XINCLUDE_END );
+              XML_XINCLUDE_END
+              encodeToUTF8
+              decodeFromUTF8
+            );
 
 
 sub new {
     return $result;
 }
 
-
 sub XML_ELEMENT_NODE(){1;}
 sub XML_ATTRIBUTE_NODE(){2;}
 sub XML_TEXT_NODE(){3;}
         }
         else {
             if (real_dom->encoding == NULL) {
-                real_dom->encoding = xmlStrdup("UTF-8");
+                real_dom->encoding = "UTF-8";
             }
 
             proxy = make_proxy_node( (xmlNodePtr)real_dom ); 
     OUTPUT:
         RETVAL
 
+SV*
+encodeToUTF8( encoding, string )
+        const char * encoding
+        const char * string
+    PREINIT:
+        char * tstr;
+    CODE:
+        tstr =  domEncodeString( encoding, string );
+        RETVAL = newSVpvn( (char *)tstr, xmlStrlen( tstr ) );
+    OUTPUT:
+        RETVAL
+
+SV*
+decodeFromUTF8( encoding, string ) 
+        const char * encoding
+        const char * string
+    PREINIT:
+        char * tstr;
+    CODE: 
+        tstr =  domDecodeString( encoding, string );
+        RETVAL = newSVpvn( (char *)tstr, xmlStrlen( tstr ) );
+    OUTPUT:
+        RETVAL
+
 
 MODULE = XML::LibXML         PACKAGE = XML::LibXML::Document
 
 void
 DESTROY(self)
         ProxyObject* self
+    PREINIT:
+        xmlDocPtr real_node;
     CODE:
-
+       
+   
 
 SV *
 toString(self, format=0)
 	        croak("Failed to convert doc to string");
     	} else {
             RETVAL = newSVpvn((char *)result, (STRLEN)len);
-	    xmlFree(result);
-	}
-        xmlReconciliateNs( real_dom, xmlDocGetRootElement( real_dom ) );
+            xmlFree(result);
+        }
+        xmlReconciliateNs(real_dom,xmlDocGetRootElement(real_dom));
     OUTPUT:
         RETVAL
 
     PREINIT:
         char * CLASS = "XML::LibXML::Element";
         xmlNodePtr newNode;
+        xmlDocPtr real_dom;
     CODE:
-        newNode = xmlNewNode( 0 , name );
-        newNode->doc =(xmlDocPtr)((ProxyObject*)SvIV((SV*)SvRV(dom)))->object;
+        real_dom = (xmlDocPtr)((ProxyObject*)SvIV((SV*)SvRV(dom)))->object;
+
+        newNode = xmlNewNode( NULL , 
+                              domEncodeString( real_dom->encoding, name ) );
+        newNode->doc = real_dom;
+        # warn( newNode->name );
         RETVAL = make_proxy_node(newNode);
         RETVAL->extra = dom;
         SvREFCNT_inc(dom);
          xmlChar *prefix;
          xmlChar *lname = NULL;
          xmlNsPtr ns = NULL;
+         xmlDocPtr real_dom;
      CODE:
-         if (nsURI != NULL && strlen(nsURI) != 0) {
-             lname = xmlSplitQName2(qname, &prefix);
-             ns = domNewNs (0 , prefix , nsURI);
-         }
-         newNode = xmlNewNode( ns , lname );
-         newNode->doc = (xmlDocPtr)((ProxyObject*)SvIV((SV*)SvRV(dom)))->object;
-         RETVAL = make_proxy_node(newNode);
-         RETVAL->extra = dom;
-         SvREFCNT_inc(dom);
+        real_dom = (xmlDocPtr)((ProxyObject*)SvIV((SV*)SvRV(dom)))->object;
+        if ( nsURI != NULL && strlen(nsURI)!=0 ){
+            lname = xmlSplitQName2(qname, &prefix);
+            ns = domNewNs (0 , 
+                           domEncodeString( real_dom->encoding, prefix ) , 
+                           nsURI);
+        }
+        else {
+            lname = qname;
+        }
+        newNode = xmlNewNode( ns ,
+                              domEncodeString( real_dom->encoding, lname ) );
+        newNode->doc =(xmlDocPtr)((ProxyObject*)SvIV((SV*)SvRV(dom)))->object;
+        RETVAL = make_proxy_node(newNode);
+        RETVAL->extra = dom;
+        SvREFCNT_inc(dom);
      OUTPUT:
-         RETVAL
+        RETVAL
 
 ProxyObject *
 createTextNode( dom, content )
     PREINIT:
         char * CLASS = "XML::LibXML::Text";
         xmlNodePtr newNode;
+        xmlDocPtr real_dom;
     CODE:
-        newNode = xmlNewDocText( (xmlDocPtr)((ProxyObject*)SvIV((SV*)SvRV(dom)))->object, content );
+        real_dom = (xmlDocPtr)((ProxyObject*)SvIV((SV*)SvRV(dom)))->object;
+
+        newNode = xmlNewDocText( real_dom, 
+                                 domEncodeString( real_dom->encoding,
+                                                  content ) );
         RETVAL = make_proxy_node(newNode);
         RETVAL->extra = dom;
         SvREFCNT_inc(dom);
     PREINIT:
         char * CLASS = "XML::LibXML::Comment";
         xmlNodePtr newNode;
+        xmlDocPtr real_dom;
     CODE:
-        newNode = xmlNewDocComment( (xmlDocPtr)((ProxyObject*)SvIV((SV*)SvRV(dom)))->object, content );
+        real_dom = (xmlDocPtr)((ProxyObject*)SvIV((SV*)SvRV(dom)))->object;
+        content = domEncodeString( real_dom->encoding, content );
+
+        newNode = xmlNewDocComment( real_dom, content );
         RETVAL = make_proxy_node(newNode);
         RETVAL->extra = dom;
         SvREFCNT_inc(dom);
     PREINIT:
         char * CLASS = "XML::LibXML::CDATASection";
         xmlNodePtr newNode;
+        xmlDocPtr real_dom;
     CODE:
-        newNode = domCreateCDATASection( (xmlDocPtr)((ProxyObject*)SvIV((SV*)SvRV(dom)))->object, content );
+        real_dom = (xmlDocPtr)((ProxyObject*)SvIV((SV*)SvRV(dom)))->object;
+        content = domEncodeString( real_dom->encoding, content );
+
+        newNode = domCreateCDATASection( real_dom, content );
         RETVAL = make_proxy_node(newNode);
         RETVAL->extra = dom;
         SvREFCNT_inc(dom);
     PREINIT:
         const char* CLASS = "XML::LibXML::Attr";
         xmlNodePtr newNode;
+        xmlDocPtr real_dom;
     CODE:
+        real_dom = (xmlDocPtr)((ProxyObject*)SvIV((SV*)SvRV(dom)))->object;
+        name  = domEncodeString( real_dom->encoding, name );
+        value = domEncodeString( real_dom->encoding, value );
+
         newNode = (xmlNodePtr)xmlNewProp(NULL, name , value );
         newNode->doc = (xmlDocPtr)((ProxyObject*)SvIV((SV*)SvRV(dom)))->object;
         if ( newNode->children!=NULL ) {
         const char* CLASS = "XML::LibXML::Attr";
         xmlNodePtr newNode;
         xmlChar *prefix;
-        xmlChar *lname = NULL;
-        xmlNsPtr ns = NULL;
+        xmlChar *lname =NULL;
+        xmlNsPtr ns=NULL;
+        xmlDocPtr real_dom;
     CODE:
-        lname = qname;
-        if (nsURI != NULL && strlen(nsURI) != 0) {
+        real_dom = (xmlDocPtr)((ProxyObject*)SvIV((SV*)SvRV(dom)))->object;
+        if ( nsURI != NULL && strlen( nsURI ) != 0 ){
             lname = xmlSplitQName2(qname, &prefix);
-            if (lname == NULL) {
-                lname = qname;
-            }
             ns = domNewNs (0 , prefix , nsURI);
         }
-        newNode = (xmlNodePtr)xmlNewNsProp(NULL, ns, lname , value );
-        newNode->doc = (xmlDocPtr)((ProxyObject*)SvIV((SV*)SvRV(dom)))->object;
+        else{
+            lname = qname;
+        }
+        lname = domEncodeString( real_dom->encoding, lname );
+        value = domEncodeString( real_dom->encoding, value );
+        if ( ns != NULL ) {
+            newNode = (xmlNodePtr) xmlNewNsProp(NULL, ns, lname , value );
+        }
+        else {
+            newNode = (xmlNodePtr) xmlNewProp( NULL, lname, value );
+        }
+        newNode->doc = real_dom;
+
         if ( newNode->children!=NULL ) {
-            newNode->children->doc = (xmlDocPtr)((ProxyObject*)SvIV((SV*)SvRV(dom)))->object;
+            newNode->children->doc = real_dom;
         }
         RETVAL = make_proxy_node(newNode);
         RETVAL->extra = dom;
         if (node == NULL) {
            XSRETURN_UNDEF;
         }
-
         real_node = node->object;
+        if (node->extra != NULL) {
+            SvREFCNT_dec(node->extra);
+        }
+     
         if ( real_node != NULL ) {
-            if( real_node->type == XML_DOCUMENT_NODE ){
-                Safefree(node); 
-                # warn("xmlFreeDoc(%d)\n", real_node);
-                xmlFreeDoc((xmlDocPtr)real_node);
-                node->object = NULL;
-                node->extra  = &PL_sv_undef;
+            if ( real_node->type == XML_DOCUMENT_NODE ){
+                if ( node->extra == NULL ) {
+                    xmlFreeDoc((xmlDocPtr)node->object);
+                }
+                else {
+                    node->extra  = NULL;
+                }
             }
-            else {
-                /**
-                 * this block should remove old (unbound) nodes from the system
-                 * but for some reason this condition is not valid ... :(
-                 **/
-                if (node->extra != NULL) {
-                    SvREFCNT_dec(node->extra);
-                }
-                Safefree(node); 
-            }
-            # warn( "Free node\n" );
-        }
-	
+            Safefree(node);
+        } 
+ 
 int 
 getType( node ) 
         xmlNodePtr node
                 RETVAL->extra = self->extra ;
                 SvREFCNT_inc(self->extra);                
             }
-            if ( ret == (xmlNodePtr)((xmlNodePtr)self->object)->doc) {
-                CLASS = "XML::LibXML::Document";
-            }
         }
     OUTPUT:
         RETVAL
     CODE:
         if( node != NULL ) {
             if ( node->type != XML_ATTRIBUTE_NODE ){
-                content = node->content;
+                if ( node->doc != NULL ){
+                    content = domDecodeString( node->doc->encoding,
+                                               node->content );
+                }
+                else {
+                    content = node->content;
+                }
             }
             else if ( node->children != NULL ) {
-                content = node->children->content;
+                if ( node->doc != NULL ){
+                    content = domDecodeString( node->doc->encoding,
+                                               node->children->content );
+                }
+                else {
+                    content = node->children->content;
+                }
             }
         }
+
         if ( content != 0 ){
             RETVAL = newSVpvn( (char *)content, xmlStrlen( content ) );
         }
         const char * lname;
     CODE:
         if( node != NULL ) {
-            lname =  node->name;
+            if ( node->doc != NULL ) {
+                lname = domDecodeString( node->doc->encoding, node->name );
+            }
+            else {
+                lname =  node->name;
+            }
             RETVAL = newSVpvn( (char *)lname, xmlStrlen( lname ) );
         }
         else {
         if( node != NULL 
             && node->ns != NULL
             && node->ns->prefix != NULL ) {
-            prefix =  node->ns->prefix;
+            if ( node->doc != NULL ) {
+                prefix = domDecodeString( node->doc->encoding, 
+                                          node->ns->prefix );
+            }
+            else {
+                prefix =  node->ns->prefix;
+            }
+
             RETVAL = newSVpvn( (char *)prefix, xmlStrlen( prefix ) );
         }
         else {
         char * name
         char * value
     CODE:
+        if( elem->doc != NULL ) {
+            name  = domEncodeString( elem->doc->encoding, name );
+            value = domEncodeString( elem->doc->encoding, value );
+        }
         xmlSetProp( elem, name, value );
 
 void
         xmlChar *lname = NULL;
         xmlNsPtr ns = NULL;
     CODE:
+        if( elem->doc != NULL ) {
+            qname  = domEncodeString( elem->doc->encoding, qname );
+            value = domEncodeString( elem->doc->encoding, value );
+        }
+
         if ( nsURI != NULL && strlen(nsURI) != 0 ) {
             lname = xmlSplitQName2(qname, &prefix);
+        
             ns = domNewNs (elem , prefix , nsURI);
             xmlSetNsProp( elem, ns, lname, value );
         }
 	    char * content;
     CODE:
         content = xmlGetProp( elem->object, name );
+        if( ((xmlNodePtr)elem->object)->doc != NULL ){
+            content = domDecodeString( ((xmlNodePtr)elem->object)->doc->encoding, content );
+        }
         if ( content != NULL ) {
             RETVAL  = newSVpvn( content, xmlStrlen( content ) );
         }
         if ( att != NULL && att->children != NULL ) {
             content = att->children->content;
         }
+
+        if( ((xmlNodePtr)elem->object)->doc != NULL ){
+            content = domDecodeString( ((xmlNodePtr)elem->object)->doc->encoding, content );
+        }
+
         if ( content != NULL ) {
             RETVAL  = newSVpvn( content, xmlStrlen( content ) );
         }
     CODE:
         if ( self->doc != NULL && xmlString != NULL ) {
             if ( self->doc != NULL ) {
+                xmlString = domEncodeString( self->doc->encoding, xmlString );
                 tn = xmlNewDocText( self->doc, xmlString ); 
             }
             else {
         char * childname
         char * xmlString
     CODE:
+        if( self->doc != NULL ) {
+            childname = domEncodeString( self->doc->encoding, childname );
+            xmlString = domEncodeString( self->doc->encoding, xmlString );
+        }
         xmlNewTextChild( self, NULL, childname, xmlString );
 
 MODULE = XML::LibXML         PACKAGE = XML::LibXML::Text
         xmlNodePtr node
         char * value 
     CODE:
+        if ( node->doc != NULL ) {
+            value = domEncodeString( node->doc->encoding, value );
+        }
         domSetNodeValue( node, value );
 
 ProxyObject *
                                    xmlStrlen( attr->children->content )  );
             }
             else {
-                # we have to decode the string!
-                xmlBufferPtr in  = xmlBufferCreate();
-                xmlBufferPtr out = xmlBufferCreate();                
-                xmlCharEncodingHandlerPtr handler;
-                int len = -1;
-                handler = xmlGetCharEncodingHandler( xmlParseCharEncoding(attr->doc->encoding) );
-                if( handler != NULL ) {
-                    xmlBufferCat( in, attr->children->content ) ;
-                
-                    xmlCharEncOutFunc( handler, out, NULL );
-                    len = xmlCharEncOutFunc( handler, out, in );
-                    if ( len >= 0 ) {
-                        RETVAL =  newSVpvn( out->content, 
-                                        out->use );
-                    }
-                    else {
-                        RETVAL = &PL_sv_undef;
-                    }                
-                }
-                else {
-                    croak("handler error" );
-                }
+                char *str = domDecodeString( attr->doc->encoding, 
+                                             attr->children->content );
+                RETVAL =  newSVpvn( str, xmlStrlen( str ) );
             }
         }
         else {
         xmlNodePtr attr 
         char * value
     CODE:
+        if ( attr->doc != NULL ) {
+            value = domEncodeString( attr->doc->encoding, value );
+        }
+
         if ( attr->children != NULL ) {
             domSetNodeValue( attr->children , value );
         }
   return nodes;
 }
 
+/** 
+ * encodeString returns an UTF-8 encoded String
+ * while the encodig has the name of the encoding of string
+ **/ 
+xmlChar*
+domEncodeString( const char *encoding, const char *string ){
+  xmlCharEncoding enc;
+  xmlChar *ret = NULL;
+
+  if ( string != NULL ) {
+    if( encoding != NULL ) {
+      enc = xmlParseCharEncoding( encoding );
+      if ( enc > 0 ) {
+        if( enc > 1 ) {
+          xmlBufferPtr in, out;
+          xmlCharEncodingHandlerPtr coder ;
+          in  = xmlBufferCreate();
+          out = xmlBufferCreate();
+          
+          coder = xmlGetCharEncodingHandler( enc );
+          
+          xmlBufferCCat( in, string );
+          
+          if ( xmlCharEncInFunc( coder, out, in ) >= 0 ) {
+            ret = xmlStrdup( out->content );
+          }
+          else {
+            /* printf("encoding error\n"); */
+          }
+          
+          xmlBufferFree( in );
+          xmlBufferFree( out );
+        }
+        else {
+          /* if utf-8 is requested we do nothing */
+          ret = xmlStrdup( string );
+        }
+      }
+      else {
+        /* printf( "encoding error: no enciding\n" ); */
+      }
+    }
+    else {
+      /* if utf-8 is requested we do nothing */
+      ret = xmlStrdup( string );
+    }
+  }
+  return ret;
+}
+
+/**
+ * decodeString returns an $encoding encoded string.
+ * while string is an UTF-8 encoded string and 
+ * encoding is the coding name
+ **/
+char*
+domDecodeString( const char *encoding, const xmlChar *string){
+  char *ret=NULL;
+  xmlBufferPtr in, out;
+ 
+  if ( string != NULL ) {
+    if( encoding != NULL ) {
+      xmlCharEncoding enc = xmlParseCharEncoding( encoding );
+      /*      printf("encoding: %d\n", enc ); */
+      if ( enc > 0 ) {
+        if( enc > 1 ) {
+          xmlBufferPtr in, out;
+          xmlCharEncodingHandlerPtr coder;
+          in  = xmlBufferCreate();
+          out = xmlBufferCreate();
+
+          coder = xmlGetCharEncodingHandler( enc );
+          xmlBufferCat( in, string );        
+          
+          if ( xmlCharEncOutFunc( coder, out, in ) >= 0 ) {
+            ret=xmlStrdup(out->content);
+          }
+          else {
+            /* printf("decoding error \n"); */
+          }
+
+          xmlBufferFree( in );
+          xmlBufferFree( out );
+        }
+        else {
+          ret = xmlStrdup(string);
+        }
+      }
+      else {
+        /* printf( "decoding error:no encoding\n" ); */
+      }
+    }
+    else {
+      /* if utf-8 is requested we do nothing */
+      ret = xmlStrdup( string );
+    }
+  }
+  return ret;
+}
+
 xmlNodePtr
 domUnbindNode( xmlNodePtr );
 
  * If the node belongs to a namespace it returns the prefix and 
  * the local name. otherwise only the local name is returned.
  **/
+
+/* esther 0179-3929246; schlesische str 14; 10997 bln; estherman@gmx.de */
+
 const xmlChar*
 domName(xmlNodePtr node) {
   xmlChar *qname = NULL; 
   if ( node ) {
     if (node->ns != NULL) {
       if (node->ns->prefix != NULL) {
-        qname = xmlStrdup( node->ns->prefix );
-        qname = xmlStrcat( qname , ":" );
-        qname = xmlStrcat( qname , node->name );
+        xmlChar *tname = xmlStrdup( node->ns->prefix );
+        tname = xmlStrcat( tname , ":" );
+        tname = xmlStrcat( tname , node->name );
+        if ( node->doc != NULL ) {
+          qname = domDecodeString( node->doc->encoding , tname );
+          xmlFree( tname );
+        }
+        else {
+          qname = tname;
+        }
       } 
       else {
+        if ( node->doc != NULL ) {
+          qname = domDecodeString( node->doc->encoding , node->name );
+        }
+        else {
+          qname = xmlStrdup( node->name );
+        }
+      }
+    } 
+    else {
+      if ( node->doc != NULL ) {
+        qname = domDecodeString( node->doc->encoding , node->name );
+      }
+      else {
         qname = xmlStrdup( node->name );
       }
-    } 
-    else {
-      qname = xmlStrdup( node->name );
     }
   }
   return qname;
 }
 
 void
-domSetName( xmlNodePtr node, xmlChar* name ) {
+domSetName( xmlNodePtr node, char* name ) {
+  xmlChar* str = NULL;  
   /* TODO: add ns support */
   if ( node == NULL || name == NULL ) 
     return ;
     /* required since node->name is const! */
     xmlFree( (void*) node->name );
   }
-  node->name = xmlStrdup( name );
+
+  if ( node->doc != NULL ) {
+    str = domEncodeString( node->doc->encoding , name );
+  }
+  else {
+    str = xmlStrdup( name );
+  }
+  warn( str );
+  node->name = str;
 }
 
 xmlNodePtr
 domAppendChild( xmlNodePtr self,
-		xmlNodePtr newChild ){
+                xmlNodePtr newChild ){
   /* unbind the new node if nessecary ...  */
 
   newChild = domIsNotParentOf( newChild, self );
 
 void
 domSetNodeValue( xmlNodePtr n , xmlChar* val ){
-  xmlDocPtr doc = NULL;
-  
   if ( n == NULL ) 
     return;
 
     xmlFree( n->content );
   }
 
-  doc = n->doc;
-
-  if ( doc != NULL ) {
-    xmlCharEncodingHandlerPtr handler = xmlGetCharEncodingHandler( xmlParseCharEncoding(doc->encoding) );
-
-    if ( handler != NULL ){
-       xmlBufferPtr in  = xmlBufferCreate();
-       xmlBufferPtr out = xmlBufferCreate();   
-       int len=-1;
-
-       xmlBufferCat( in, val );
-       len = xmlCharEncInFunc( handler, out, in );
-
-       if ( len >= 0 ) {
-         n->content = xmlStrdup( out->content );
-       }
-       else {
-         printf( "\nencoding error %d \n", len );
-         n->content = xmlStrdup( "" );
-       }
-    }
-    else {
-      /* handler error => no output */ 
-      n->content = xmlStrdup( "" );
-    }
-  }
-  else {    
-    /* take data as UTF-8 */
-    n->content = xmlStrdup( val );
-  }
+  n->content = xmlStrdup( val );
 }
 
 
   xmlNodePtr elem = NULL;
 
   if ( ( self != NULL ) && ( strNodeContent != NULL ) ) {
+    strNodeContent = domEncodeString( self->encoding, strNodeContent );
     elem = xmlNewCDataBlock( self, strNodeContent, xmlStrlen(strNodeContent) );
     elem->next = NULL;
     elem->prev = NULL;
   /* prefix is not in use */
   if (ns == NULL) {
     ns = xmlNewNs( elem , href , prefix );
-    ns->next = NULL;
   } else {
     /* prefix is in use; if it has same URI, let it go, otherwise it's
        an error */
 xmlNodePtr 
 domReadWellBalancedString( xmlDocPtr doc, xmlChar* string );
 
+xmlChar*
+domEncodeString( const char *encoding, const char *string );
+char*
+domDecodeString( const char *encoding, const xmlChar *string);
+
 /**
  * part A:
  *