Commits

masklinn committed ccf62e5

Tokenize 'unicode' strings correctly, improve tokenizer for numbers and names as well

Comments (0)

Files changed (3)

         var Special = '[:;.,`@]';
         var Funny = group(Operator, Bracket, Special);
 
-        var ContStr = group("'[^']*'", '"[^"]*"');
+        var ContStr = group("[uU]?'([^']*)'", '[uU]?"([^"]*)"');
 
         var PseudoToken = Whitespace + group(Number, Funny, ContStr, Name);
 
+        var number_pattern = new RegExp('^' + Number + '$');
+        var string_pattern = new RegExp('^' + ContStr + '$');
+        var name_pattern = new RegExp('^' + Name + '$');
+        var strip = new RegExp('^' + Whitespace);
         return function tokenize(s) {
             var max=s.length, tokens = [], start, end = undefined;
             // /g flag makes repeated exec() have memory
                 start = pseudomatch.index;
                 end = pseudoprog.lastIndex;
                 // strip leading space caught by Whitespace
-                var token = s.slice(start, end).replace(new RegExp('^' + Whitespace), '');
+                var token = s.slice(start, end).replace(strip, '');
                 var initial = token[0];
 
-                if (/\d/.test(initial) || (initial === '.' && token !== '.')) {
+                if (number_pattern.test(token)) {
                     tokens.push(create(symbols['(number)'], {
                         value: parseFloat(token)
                     }));
-                } else if (/'|"/.test(initial)) {
+                } else if (string_pattern.test(token)) {
+                    var m = string_pattern.exec(token);
                     tokens.push(create(symbols['(string)'], {
-                        value: token.slice(1, -1)
+                        value: m[2] || m[3]
                     }));
                 } else if (token in symbols) {
                     var symbol;
                         symbol = symbols[token];
                     }
                     tokens.push(create(symbol));
-                } else if (/[_a-zA-Z]/.test(initial)) {
+                } else if (name_pattern.test(token)) {
                     tokens.push(create(symbols['(name)'], {
                         value: token
                     }));

test/builtins/str.js

     });
 };
 
-describe('Convert from JS', function () {
-    it('should produce a py.str', function () {
-        expect(py.str.fromJSON("foo"))
-            .to.be.a(ctor(py.str));
+describe('py.str', function () {
+    describe('Literal syntax', function () {
+        it('should accept string literals', function () {
+            expect(py.eval('"foo"')).to.be("foo");
+        });
+        it('should accept unicode literals', function () {
+            expect(py.eval('u"foo"')).to.be("foo");
+        });
+        it('should have the right type', function () {
+            expect(ev('"foo"').__class__).to.be(py.str);
+            expect(ev("'foo'").__class__).to.be(py.str);
+        });
+        it('should yield the corresponding JS string', function () {
+            expect(py.eval('"somestring"')).to.be('somestring');
+            expect(py.eval("'somestring'")).to.be('somestring');
+        });
+
     });
-    it('should do roundtrip', function () {
-        expect(py.str.fromJSON("foo").toJSON())
-            .to.be("foo");
-    });
-});
-describe('Convert other py types', function () {
-    describe('No __str__ defined', function () {
-        it("should use object's", function () {
-            var t = py.type('t', null, {});
-            expect(py.eval('str(t())', {t: t}))
-                .to.be("<t object>");
+    describe('Convert from JS', function () {
+        it('should produce a py.str', function () {
+            expect(py.str.fromJSON("foo"))
+                .to.be.a(ctor(py.str));
         });
-    });
-    it('should accept a py.str', function () {
-        var t = makeT(function () {return py.str.fromJSON("Wheee");})
-        expect(py.eval('str(t())', {t: t}))
-               .to.be("Wheee");
-    });
-    it('should reject a py.object non py.str', function () {
-        var t = makeT(function () {return py.float.fromJSON(42);});
-        expect(function () { py.eval('str(t())', {t: t}); })
-            .to.throwException(
-                /^TypeError: __str__ returned non-string \(type float\)$/);
-    });
-    it('should reject a non-py js object', function () {
-        var t = makeT(function () { return new String("foo"); });
-        expect(function () { py.eval('str(t())', {t: t}); })
-            .to.throwException(
-                /^TypeError: __str__ returned non-string \(type String\)$/);
-    });
-    it('should reject a js primitive', function () {
-        var t = makeT(function () { return "foo"; });
-        expect(function () { py.eval('str(t())', {t: t}); })
-            .to.throwException(
-                /^TypeError: __str__ returned non-string \(type string\)/);
-    });
-});
-describe('Python protocols', function () {
-    describe('boolean', function () {
-        it('should be true if non-empty', function () {
-            expect(py.eval('bool("2008")'))
-                .to.be(true);
-        });
-        it('should be false if empty', function () {
-            expect(py.eval('bool("")')) .to.be(false);
-        });
-    });
-    describe('str', function () {
-        it('should be an identity', function () {
-            expect(py.eval('str("foo")'))
+        it('should do roundtrip', function () {
+            expect(py.str.fromJSON("foo").toJSON())
                 .to.be("foo");
         });
     });
+    describe('Convert other py types', function () {
+        describe('No __str__ defined', function () {
+            it("should use object's", function () {
+                var t = py.type('t', null, {});
+                expect(py.eval('str(t())', {t: t}))
+                    .to.be("<t object>");
+            });
+        });
+        it('should accept a py.str', function () {
+            var t = makeT(function () {return py.str.fromJSON("Wheee");})
+            expect(py.eval('str(t())', {t: t}))
+                .to.be("Wheee");
+        });
+        it('should reject a py.object non py.str', function () {
+            var t = makeT(function () {return py.float.fromJSON(42);});
+            expect(function () { py.eval('str(t())', {t: t}); })
+                .to.throwException(
+                    /^TypeError: __str__ returned non-string \(type float\)$/);
+        });
+        it('should reject a non-py js object', function () {
+            var t = makeT(function () { return new String("foo"); });
+            expect(function () { py.eval('str(t())', {t: t}); })
+                .to.throwException(
+                    /^TypeError: __str__ returned non-string \(type String\)$/);
+        });
+        it('should reject a js primitive', function () {
+            var t = makeT(function () { return "foo"; });
+            expect(function () { py.eval('str(t())', {t: t}); })
+                .to.throwException(
+                    /^TypeError: __str__ returned non-string \(type string\)/);
+        });
+    });
+    describe('Python protocols', function () {
+        describe('boolean', function () {
+            it('should be true if non-empty', function () {
+                expect(py.eval('bool("2008")'))
+                    .to.be(true);
+            });
+            it('should be false if empty', function () {
+                expect(py.eval('bool("")')) .to.be(false);
+            });
+        });
+        describe('str', function () {
+            it('should be an identity', function () {
+                expect(py.eval('str("foo")'))
+                    .to.be("foo");
+            });
+        });
+    });
 });
             expect(py.eval('None')).to.be(null);
         });
     });
-    describe('String', function () {
-        it('should have the right type', function () {
-            expect(ev('"foo"').__class__).to.be(py.str);
-            expect(ev("'foo'").__class__).to.be(py.str);
-        });
-        it('should yield the corresponding JS string', function () {
-            expect(py.eval('"somestring"')).to.be('somestring');
-            expect(py.eval("'somestring'")).to.be('somestring');
-        });
-    });
     describe('Tuple', function () {
         it('shoud have the right type', function () {
             expect(ev('()').__class__).to.be(py.tuple);