Commits

Tobias Pape committed fc1de55

Small dotviewer fixes.

* use strunicode throughout, also in msgstruct.
-> avoids "utf-8" in msgstuct.py
* do not fail on garbage (+tests)

Comments (0)

Files changed (3)

dotviewer/msgstruct.py

 import sys, os
 from struct import pack, unpack, calcsize
+from strunicode import forceencoded
 
 MAGIC = -0x3b83728b
 
 long_max = 2147483647
 
 
-def _encodeme(x):
-    if type(x) is unicode:
-        x = x.encode('utf-8')
-    return x
-
 def message(tp, *values):
     #print >> sys.stderr, tp, values
     typecodes = ['']
-    values = map(_encodeme, values)
+    values = map(forceencoded, values)
     for v in values:
         if type(v) is str:
             typecodes.append('%ds' % len(v))

dotviewer/strunicode.py

 RAW_ENCODING = "utf-8"
-
+ENCODING_ERROR_HANDLING = "replace"
 
 def forceunicode(name):
-    return name if isinstance(name, unicode) else name.decode(RAW_ENCODING)
+    """ returns `name` as unicode, even if it wasn't before  """
+    return name if isinstance(name, unicode) else name.decode(RAW_ENCODING, ENCODING_ERROR_HANDLING)
 
 
 def forcestr(name):
-    return name if isinstance(name, str) else name.encode(RAW_ENCODING)
+    """ returns `name` as (possibly `RAW_ENCODING` encoded) string, even if it wasn't before  """
+    return name if isinstance(name, str) else name.encode(RAW_ENCODING, ENCODING_ERROR_HANDLING)
+
+def forceencoded(name):
+    """ returns `name` as encoded string if it was unicode before """
+    return name.encode(RAW_ENCODING, ENCODING_ERROR_HANDLING) if isinstance(name, unicode) else name

dotviewer/test/test_unicode_util.py

 #
 import py
 import codecs
-from dotviewer.strunicode import RAW_ENCODING, forcestr, forceunicode
+from dotviewer.strunicode import RAW_ENCODING, forcestr, forceunicode, forceencoded
 
 SOURCE1 = u"""digraph G{
 λ -> b
     def test_idempotent(self):
         x = u"a"
         assert forceunicode(forcestr(x)) == x
-        
+
         x = u"λ"
         assert forceunicode(forcestr(x)) == x
 
         x_u = forceunicode(x_e)
         assert forceunicode(x_u) == x_u
 
-    def test_file(self):       
+    def test_file(self):
         udir = py.path.local.make_numbered_dir(prefix='usession-dot-', keep=3)
         full_filename = str(udir.join(FILENAME))
         f = codecs.open(full_filename, 'wb', RAW_ENCODING)
         f3.close()
         result = (c == SOURCE1)
         assert result
+
+    def test_only_unicode_encode(self):
+
+        sut =      [1,   u"a", "miau", u"λ"]
+        expected = [int, str,  str   , str ]
+
+        results = map(forceencoded, sut)
+
+
+        for result, expected_type in zip(results, expected):
+            assert isinstance(result, expected_type)
+
+    def test_forceunicode_should_not_fail(self):
+
+        garbage = "\xef\xff\xbb\xbf\xce\xbb\xff\xff" # garbage with a lambda
+
+        result = forceunicode(garbage)
+        assert True, "should not raise"
+
+    def test_forcestr_should_not_fail(self):
+
+        garbage = u"\xef\xff\xbb\xbf\xce\xbb\xff\xff" # garbage
+
+        result = forcestr(garbage)
+        assert True, "should not raise"
+
+