megies avatar megies committed 8585af0

git hook: try to encode/decode with correct encoding (try to detect from file header)

Comments (0)

Files changed (1)

 # -*- coding: utf-8 -*-
 from __future__ import with_statement
+import re
 import os
 import sys
 import stat
     # Copy staged versions to temporary directory
     tmpdir = mkdtemp()
     files_to_check = []
+    coding_pattern = r'coding[=:]\s*([-\w.]+)'
         for file_ in files_modified:
+            # try to detect encoding of python file according to PEP 0263
+            with open(file_) as fh:
+                for i_ in xrange(2):
+                    line = fh.readline()
+                    match =, line)
+                    if match:
+                        coding = match.groups()[0]
+                        break
+                else:
+                    coding = None
             # get the staged version of the file
             gitcmd_getstaged = ["git", "show", ":%s" % file_]
-            _, out, _ = run(gitcmd_getstaged, raw_output=True)
+            _, out, _ = run(gitcmd_getstaged, raw_output=True, coding=coding)
             # write the staged version to temp dir with its full path to
             # avoid overwriting files with the same name
             dirname, filename = os.path.split(os.path.abspath(file_))
             prefix = os.path.commonprefix([dirname, tmpdir])
-            print dirname, tmpdir, prefix
             dirname = os.path.relpath(dirname, start=prefix)
             dirname = os.path.join(tmpdir, dirname)
             if not os.path.isdir(dirname):
             filename = os.path.join(dirname, filename)
+            # encode for output if we know the encoding
+            if coding:
+                out = out.encode(coding)
+            # write staged version of file to temporary directory
             with open(filename, "wb") as fh:
         # Run the checks
         report = flake8_style.check_files(files_to_check)
+    # remove temporary directory
         shutil.rmtree(tmpdir, ignore_errors=True)
     return 0
-def run(command, raw_output=False):
+def run(command, raw_output=False, coding=None):
     if isinstance(command, basestring):
         command = command.split()
     p = Popen(command, stdout=PIPE, stderr=PIPE)
     # string objects. This is simply less mysterious than using b'.py' in the
     # endswith method. That should work but might still fail horribly.
     if hasattr(stdout, 'decode'):
-        stdout = stdout.decode()
+        if coding:
+            stdout = stdout.decode(coding)
+        else:
+            stdout = stdout.decode()
     if hasattr(stderr, 'decode'):
-        stderr = stderr.decode()
+        if coding:
+            stderr = stderr.decode(coding)
+        else:
+            stderr = stderr.decode()
     if not raw_output:
         stdout = [line.strip() for line in stdout.splitlines()]
         stderr = [line.strip() for line in stderr.splitlines()]
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.