Improve the cleanpath function.

    It was more readable but not as powerful. I decided to use a regex while trying to get rid of the date. Splitting on '\t' didn't work because sometimes spaces were uses, and some of the patches also contained spaces on the filename, so splitting on spaces wasn't an option. Using a regex also simplified looking for leading a/ b/ A/, dirs starting with python-, Python-, Python2, Python3, dates starting with a week day or with a year, and things like "(working copy)". There are still a few cases left though. Windows paths are not converted, and I've seen lines like "b/C:\Python32\Lib\distutils\msvc9compiler_manifest.py" (quotes included). The regex covers 98% of the cases I've seen though.

 from __future__ import print_function
+import re
 import sys
 import json
     cached = True
   return (filename, cached)
+days = 'Mon|Tue|Wed|Thu|Fri|Sat|Sun'
+path_re = re.compile(r'^(?:[ab]/)?(?:python[-23][^/]*/)?(.*?)\s*'
+                     r'(?:\s(?:%s|20[01]\d|199\d|\(\w+\s)\b.*)?(?:\.orig)?$' %
+                     days, re.I)
 def cleanpath(source, target):
-    # best-effort function to clean up the path
-    path = source
-    if not source or source == 'dev/null':
-        path = target
-    # some paths are followed by the date
-    path = path.split()[0]
-    if path.startswith(('a/', 'b/')):
-        path = path[2:]
-    if path.endswith('.orig'):
-        path = path[:-5]
-    parts = path.split('/')
-    if parts[0].startswith('Python-'):
-        parts = parts[1:]
-    path = '/'.join(parts)
+    # clean up the path by removing leading a/, b/, or python* dirs,
+    # and trailing dates, or '(working copy)', or '.orig' extensions
+    path = target
+    if not target or target == 'dev/null':
+        path = source
+    # if this fails the regex is broken
+    path = path_re.match(path).group(1)
     return path
 issue_files = {}    # 'number' => []
