Commits

Michael Heemskerk committed 8c367b8

CRUC-4793: Unit tests for planned workaround for JVM bug on windows regarding non-ascii characters

  • Participants
  • Parent commits 98824dc

Comments (0)

Files changed (6)

processutils/.classpath

 <?xml version="1.0" encoding="UTF-8"?>
 <classpath>
 	<classpathentry kind="src" output="target/classes" path="src/main/java"/>
+	<classpathentry kind="src" path="src/test/java"/>
 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"/>
 	<classpathentry kind="con" path="org.maven.ide.eclipse.MAVEN2_CLASSPATH_CONTAINER"/>
 	<classpathentry kind="output" path="target/classes"/>

processutils/pom.xml

                 </exclusion>
             </exclusions>
         </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.5</version>
+            <scope>test</scope>
+	    </dependency>
     </dependencies>
 </project>

processutils/src/main/java/com/atlassian/utils/process/ExternalProcess.java

                         processException = new ProcessException(e);
                     }
                 }
+                boolean done = true;
+                
             }
         };
 
     public void setTimeout(long timeout) {
         this.timeout = timeout;
     }
+    
+    public static void shutdown() {
+        if (pumpThreadPool != null) {
+            pumpThreadPool.shutdown();
+        }
+    }
 }

processutils/src/test/java/com/atlassian/utils/process/CallEcho.java

+package com.atlassian.utils.process;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+public class CallEcho {
+    /**
+     * Helper method to print out a string as a list of bytes (integers)
+     * @param value
+     * @return
+     */
+    private static String getBytes(String value) throws UnsupportedEncodingException {
+        if (value == null) {
+            return null;
+        }
+        StringBuilder builder = new StringBuilder();
+        String sep = "";
+        for (byte b : value.getBytes("UTF-8")) {
+            builder.append(sep).append(b);
+            sep = " ";
+        }
+        return builder.toString();
+    }
+    
+    
+    /**
+     * @param args
+     */
+    public static void main(String[] args) throws IOException, InterruptedException {
+        Properties testStrings = new Properties();
+        testStrings.load(CallEcho.class.getResourceAsStream("echostrings.properties"));
+        
+        StringBuilder echoString = new StringBuilder();
+        String separator = "";
+        for (String arg : args) {
+            if (testStrings.containsKey(arg)) {
+                echoString.append(separator).append(testStrings.getProperty(arg));
+                separator = " ";
+            }
+        }
+        
+        final String echo = echoString.toString();
+        if (!echo.isEmpty()) {
+            final String[] result = new String[1];
+            List<String> echoCmd = Arrays.asList("echo", echo);
+            // the command line arguments will be converted using the default encoding (file.encoding). The lineoutputhandler needs to use the same encoding
+            // to process the results. Otherwise, encoding errors will occur.
+            ExternalProcess process = new ExternalProcessBuilder().command(echoCmd).handlers(new LineOutputHandler(System.getProperty("file.encoding")) {
+                @Override
+                protected void processLine(int lineNum, String line) {
+                    result[0] = line;
+                }
+            }).build();
+            process.setTimeout(250);
+            process.execute();
+            
+            // output the expected and actual strings as a list of bytes to prevent encodings to mess up the results
+            System.out.println(System.getProperty("file.encoding"));
+            System.out.println(getBytes(echo));
+            System.out.println(getBytes(result[0]));
+        }
+        ExternalProcess.shutdown();
+    }
+}

processutils/src/test/java/com/atlassian/utils/process/ExternalProcessTest.java

+package com.atlassian.utils.process;
+
+import static org.junit.Assert.*;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.Test;
+
+public class ExternalProcessTest {
+    private static class EchoResult {
+        public String input;
+        public String output;
+        public String reportedEncoding;
+    }
+    
+    /*
+     * Starts the CallEcho as an separate process. This is necessary because the jvm uses the default encoding (-Dfile.encoding) for encoding commandline
+     * arguments that are provided to external processes. This test tests whether encodings that should be compatible are actually compatible. This is
+     * needed for http://jira.atlassian.com/browse/CRUC-4793 which provides a workaround for a JVM bug on windows.
+     */
+    protected EchoResult spawnEcho(String encoding, String... testStrings) throws Exception {
+        List<String> cmd = new ArrayList<String>(Arrays.asList("java", "-Dfile.encoding=" + encoding, CallEcho.class.getName()));
+        cmd.addAll(Arrays.asList(testStrings));
+        
+        ProcessBuilder processBuilder = new ProcessBuilder(cmd);
+        processBuilder.environment().put("CLASSPATH", System.getProperty("java.class.path"));
+        Process process = processBuilder.start();
+        
+        BufferedReader reader = null;
+        try {
+            reader = new BufferedReader(new InputStreamReader(process.getInputStream()));
+            EchoResult result = new EchoResult();
+            result.reportedEncoding = reader.readLine();
+            result.input = reader.readLine();
+            result.output = reader.readLine();
+            
+            return result;
+        } finally {
+            reader.close();
+        }
+    }
+    
+    protected void assertNotEquals(String message, String value1, String value2) {
+        assertFalse(message, (value1 == null && value2 == null) || (value1 != null && value1.equals(value2)));
+    }
+    
+    @Test
+    public void testIncompatibleCommandLineArgumentsASCII() throws Exception {
+        EchoResult result = spawnEcho("ASCII", "iso-8859-1");
+        assertNotEquals("ACII - incompatibility expected", result.input, result.output);
+
+        result = spawnEcho("ASCII", "unicode-chinese");
+        assertNotEquals("ACII - incompatibility expected", result.input, result.output);
+}
+
+    @Test
+    public void testCommandLineArgumentsUTF8() throws Exception {
+        EchoResult result = spawnEcho("UTF-8", "iso-8859-1", "win-1252-not-in-iso-8859-1", "unicode-chinese", "latin-ext-A", "latin-ext-B");
+        assertEquals("UTF-8 - no incompatibility expected", result.input, result.output);
+    }
+    
+    @Test
+    public void testCommandLineArgumentsWin1252() throws Exception {
+        if (Charset.isSupported("windows-1252")) {
+            EchoResult result = spawnEcho("windows-1252", "iso-8859-1", "win-1252-not-in-iso-8859-1");
+            assertEquals("Windows cp 1252 - 8 - no incompatibility expected", result.input, result.output);
+        }
+    }
+    
+    @Test
+    public void testCommandLineArgumentsIso88591() throws Exception {
+        EchoResult result = spawnEcho("windows-1252", "iso-8859-1");
+        assertEquals("ISO-8859-1 - no incompatibility expected", result.input, result.output);
+    }
+}

processutils/src/test/java/com/atlassian/utils/process/echostrings.properties

+# see http://www.iana.org/assignments/character-sets for canonical charsets
+
+# collection of iso-8859-1 unicode chars, see http://en.wikipedia.org/wiki/ISO/IEC_8859-1
+iso-8859-1=u00A0\u00A1\u00A2\u00A3\u00A4\u00A5\u00A6\u00A7\u00A8\u00A9\u00AA\u00AB\u00AC\u00AD\u00AE\u00AF\u00E0\u00E1\u00E2\u00E3\u00E4\u00E5\u00E6
+
+# latin-extended
+latin-ext-A=\u0100\u0101\u0102\u0103\u0104\u0105\u0106\u0107\u0108
+latin-ext-B=\u0190\u0191\u0192\u0193\u0194\u0195\u0196\u0197\u0198
+
+# collection of windows code page 1252 characters that have no corresponding counterpart in iso-8859-1
+win-1252-not-in-iso-8859-1=\u20AC\u201A\u0192\u0161\u0153\u0178
+
+unicode-chinese=\u4ea0\u4ea1\u4ea2\u4ea3\u4ea4\u4ea5\u4ea6\u5100\u5101\u5102\u5103