committed ed9497d

Basic support for PEP 414 without docs or tests.

# Lib/tokenize.py

` Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"`
` # Tail end of """ string.`
` Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'`
`-Triple = group("[bB]?[rR]?'''", '[bB]?[rR]?"""')`
`+Triple = group("[bBuU]?[rR]?'''", '[bBuU]?[rR]?"""')`
` # Single-line ' or " string.`
`-String = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",`
`-               r'[bB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')`
`+String = group(r"[bBuU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",`
`+               r'[bBuU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')`
` `
` # Because of leftmost-then-longest match semantics, be sure to put the`
` # longest operators first (e.g., if = came before ==, == would get`
` Token = Ignore + PlainToken`
` `
` # First (or only) line of ' or " string.`
`-ContStr = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +`
`+ContStr = group(r"[bBuU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +`
`                 group("'", r'\\\r?\n'),`
`-                r'[bB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +`
`+                r'[bBuU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +`
`                 group('"', r'\\\r?\n'))`
` PseudoExtras = group(r'\\\r?\n', Comment, Triple)`
` PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)`
`            "bR'''": Single3, 'bR"""': Double3,`
`            "Br'''": Single3, 'Br"""': Double3,`
`            "BR'''": Single3, 'BR"""': Double3,`
`-           'r': None, 'R': None, 'b': None, 'B': None}`
`+           "u'''": Single3, 'u"""': Double3,`
`+           "ur'''": Single3, 'ur"""': Double3,`
`+           "R'''": Single3, 'R"""': Double3,`
`+           "U'''": Single3, 'U"""': Double3,`
`+           "uR'''": Single3, 'uR"""': Double3,`
`+           "Ur'''": Single3, 'Ur"""': Double3,`
`+           "UR'''": Single3, 'UR"""': Double3,`
`+           'r': None, 'R': None, 'b': None, 'B': None,`
`+           'u': None, 'U': None}`
` `
` triple_quoted = {}`
` for t in ("'''", '"""',`
`           "r'''", 'r"""', "R'''", 'R"""',`
`           "b'''", 'b"""', "B'''", 'B"""',`
`           "br'''", 'br"""', "Br'''", 'Br"""',`
`-          "bR'''", 'bR"""', "BR'''", 'BR"""'):`
`+          "bR'''", 'bR"""', "BR'''", 'BR"""',`
`+          "u'''", 'u"""', "U'''", 'U"""',`
`+          "ur'''", 'ur"""', "Ur'''", 'Ur"""',`
`+          "uR'''", 'uR"""', "UR'''", 'UR"""'):`
`     triple_quoted[t] = t`
` single_quoted = {}`
` for t in ("'", '"',`
`           "r'", 'r"', "R'", 'R"',`
`           "b'", 'b"', "B'", 'B"',`
`           "br'", 'br"', "Br'", 'Br"',`
`-          "bR'", 'bR"', "BR'", 'BR"' ):`
`+          "bR'", 'bR"', "BR'", 'BR"' ,`
`+          "u'", 'u"', "U'", 'U"',`
`+          "ur'", 'ur"', "Ur'", 'Ur"',`
`+          "uR'", 'uR"', "UR'", 'UR"' ):`
`     single_quoted[t] = t`
` `
` tabsize = 8`

# Parser/tokenizer.c

`     /* Identifier (most frequent token!) */`
`     nonascii = 0;`
`     if (is_potential_identifier_start(c)) {`
`-        /* Process b"", r"", br"" and rb"" */`
`-        int saw_b = 0, saw_r = 0;`
`+        /* Process b"", r"", u"", br"", rb"" and ur"" */`
`+        int saw_b = 0, saw_r = 0, saw_u = 0;`
`         while (1) {`
`-            if (!saw_b && (c == 'b' || c == 'B'))`
`+            if (!(saw_b || saw_u) && (c == 'b' || c == 'B'))`
`                 saw_b = 1;`
`+            /* Since this is a backwards compatibility support literal we don't`
`+               want to support it in arbitrary order like byte literals. */`
`+            else if (!(saw_b || saw_u || saw_r) && (c == 'u' || c == 'U'))`
`+                saw_u = 1;`
`             else if (!saw_r && (c == 'r' || c == 'R'))`
`                 saw_r = 1;`
`             else`

# Python/ast.c

`                 quote = *++s;`
`                 *bytesmode = 1;`
`             }`
`+            else if (quote == 'u' || quote == 'U') {`
`+                quote = *++s;`
`+            }`
`             else if (quote == 'r' || quote == 'R') {`
`                 quote = *++s;`
`                 rawmode = 1;`