Commits

Anonymous committed 37d6715

WW-1495 FastByteArrayOutputStream has an encoding bug

Thanks Brat (Atlassian) and Alexey for reporting this and providing a patch. Thanks guys, all the best to you and Atlassian.

git-svn-id: http://svn.opensymphony.com/svn/webwork/trunk@3000573baa09-0c28-0410-bef9-dab3c582ae83

Comments (0)

Files changed (3)

src/java/com/opensymphony/webwork/util/FastByteArrayOutputStream.java

  * does not copy buffers when it's expanded. There's also no copying of the internal buffer
  * if it's contents is extracted with the writeTo(stream) method.
  *
- * @author Rickard Öberg
- * @version $Revision$
+ * @author Rickard �berg
+ * @author Brat Baker (Atlassian)
+ * @author Alexey
+ * @version $Date$ $Id$
  */
 public class FastByteArrayOutputStream extends OutputStream {
 
     }
 
     public void writeTo(Writer out, String encoding) throws IOException {
-        // Check if we have a list of buffers
-        if (buffers != null) {
-            Iterator iter = buffers.iterator();
+        /*
+          There is design tradeoff between being fast, correct and using too much memory when decoding bytes to strings.
 
-            while (iter.hasNext()) {
-                byte[] bytes = (byte[]) iter.next();
+         The rules are thus :
 
-                if (encoding != null) {
-                    out.write(new String(bytes, encoding));
-                } else {
-                    out.write(new String(bytes));
-                }
-            }
+         1. if there is only one buffer then its a simple String conversion
+
+              REASON : Fast!!!
+
+         2. uses full buffer allocation annd System.arrayCopy() to smooosh together the bytes
+              and then use String conversion
+
+              REASON : Fast at the expense of a known amount of memory (eg the used memory * 2)
+        */
+        if (buffers != null)
+        {
+            // RULE 2 : a balance between using some memory and speed
+            writeToViaSmoosh(out, encoding);
         }
+        else
+        {
+            // RULE 1 : fastest!
+            writeToViaString(out, encoding);
+        }
+    }
 
-        // write the internal buffer directly
-        if (encoding != null) {
-            out.write(new String(buffer, 0, index, encoding));
-        } else {
-            out.write(new String(buffer, 0, index));
+    /**
+     * This can <b>ONLY</b> be called if there is only a single buffer to write, instead
+     * use {@link #writeTo(java.io.Writer, String)}, which auto detects if
+     * {@link #writeToViaString(java.io.Writer, String)} is to be used or
+     * {@link #writeToViaSmoosh(java.io.Writer, String)}.
+     *
+     * @param out      the JspWriter
+     * @param encoding the encoding
+     * @throws IOException
+     */
+    void writeToViaString(Writer out, String encoding) throws IOException
+    {
+        byte[] bufferToWrite = buffer; // this is always the last buffer to write
+        int bufferToWriteLen = index;  // index points to our place in the last buffer
+        writeToImpl(out, encoding, bufferToWrite, bufferToWriteLen);
+    }
+
+    /**
+     * This is recommended to be used where there's more than 1 buffer to write, instead
+     * use {@link #writeTo(java.io.Writer, String)} which auto detects if
+     * {@link #writeToViaString(java.io.Writer, String)} is to be used or
+     * {@link #writeToViaSmoosh(java.io.Writer, String)}.
+     * 
+     * @param out
+     * @param encoding
+     * @throws IOException
+     */
+    void writeToViaSmoosh(Writer out, String encoding) throws IOException
+    {
+        byte[] bufferToWrite = toByteArray();
+        int bufferToWriteLen = bufferToWrite.length;
+        writeToImpl(out, encoding, bufferToWrite, bufferToWriteLen);
+    }
+
+    /**
+     * Write <code>bufferToWriteLen</code> of bytes from <code>bufferToWrite</code> to
+     * <code>out</code> encoding it at the same time.
+     * 
+     * @param out
+     * @param encoding
+     * @param bufferToWrite
+     * @param bufferToWriteLen
+     * @throws IOException
+     */
+    private void writeToImpl(Writer out, String encoding, byte[] bufferToWrite, int bufferToWriteLen)
+            throws IOException
+    {
+        String writeStr;
+        if (encoding != null)
+        {
+            writeStr = new String(bufferToWrite, 0, bufferToWriteLen, encoding);
+        }
+        else
+        {
+            writeStr = new String(bufferToWrite, 0, bufferToWriteLen);
         }
+        out.write(writeStr);
     }
 
     /**

src/test/com/opensymphony/webwork/util/EncodingTestCase.java

+package com.opensymphony.webwork.util;
+
+import junit.framework.Assert;
+import junit.framework.AssertionFailedError;
+import junit.framework.TestCase;
+
+import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
+
+/**
+ * Holds test strings and encodings.  There was more test written hence the base class.
+ *
+ * @author Brad Baker (Atlassian)
+ * @version $Date$ $Id$
+ */
+public abstract class EncodingTestCase extends TestCase
+{
+
+    static final int K = 1024;
+
+    static final String ASCII = "ASCII";
+    static final String UTF_8 = "UTF-8";
+    static final String LATIN = "ISO8859_1";
+    static final String UTF_16 = "UTF-16";
+    static final String KOI8_R = "KOI8_R";
+    static final String WINDOWS_CYRILLIC = "Cp1251";
+    static final String UBIG_ENDIAN = "UnicodeBig";
+    static final String ULITTLE_ENDIAN = "UnicodeLittle";
+    static final String UBIG_ENDIAN_UNMARKED = "UnicodeBigUnmarked";
+    static final String ULITTLE_ENDIAN_UNMARKED = "UnicodeLittleUnmarked";
+
+
+    static final String RUSSIAN_DESC1 = "\u0418\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0439\u0442\u0435 \u0434\u0430\u043d\u043d\u0443\u044e \u0444\u043e\u0440\u043c\u0443 \u0434\u043b\u044f \u0434\u043e\u0431\u0430\u0432\u043b\u0435\u043d\u0438\u044f \u043a\u043e\u043c\u043c\u0435\u043d\u0442\u0430\u0440\u0438\u044f";
+    static final String RUSSIAN_DESC_SHORT = "\u0418\u0441\u043f\u043e";
+    static final String ASCII_TEXT = "will this string be written to bytes and then back to chars as expected?";
+    static final String ASCII_TEXT_SHORT = "ok";
+    static final String CHINESE_TIMETRACKING = "\\u65f6\\u95f4\\u8ddf\\u8e2a\\u62a5\\u544a";
+    static final String HUNGRIAN_APPLET_PROBLEM = "Az Applet biztons\\u00e1gi be\\u00e1ll\\u00edt\\u00e1sai helytelenek.  Fogadja el az Applet tan\\u00fas\\u00edtv\\u00e1ny\\u00e1t a futtat\\u00e1shoz.";
+
+
+    protected abstract void implementEncodingTest(String expectedStr, String encoding, int bufferSize) throws Exception;
+
+    void assertEncoding(String expectedStr, String encoding) throws Exception
+    {
+        doAssertionsOnCombinations(new String[] { expectedStr }, new String[] { encoding });
+    }
+
+    public void assertEncodings(String[] testStrs, String[] encodings) throws Exception
+    {
+        doAssertionsOnCombinations(testStrs, encodings);
+    }
+
+    private void doAssertionsOnCombinations(String[] testStrs, String[] encodings) throws Exception
+    {
+        int[] bufferSizes = { 1, 3, 5, 10, 101, 1024, 8192 };
+        int[] outputLens = { 100 * K, 50 * K, 20 * K, 10 * K, 1 * K };
+
+        // given a bunch  of output buffer sizes
+        for (int i = 0; i < encodings.length; i++)
+        {
+            String encoding = encodings[i];
+
+            // but only if we have the encoding in this JDK
+            if (Charset.isSupported(encoding))
+            {
+                for (int j = 0; j < bufferSizes.length; j++)
+                {
+                    int bufferSize = bufferSizes[j];
+
+                    // with a number of strings
+                    for (int k = 0; k < testStrs.length; k++)
+                    {
+                        String testStr = testStrs[k];
+                        implementEncodingTest(testStr, encoding, bufferSize);
+
+                        // now make the strings bigger up to a bunch of sizes
+                        for (int l = 0; l < outputLens.length; l++)
+                        {
+                            int outputLen = outputLens[l];
+                            final String largeStr = makeRoughly(testStr, outputLen);
+                            try
+                            {
+                                implementEncodingTest(largeStr, encoding, bufferSize);
+                            }
+                            catch (AssertionFailedError afe)
+                            {
+                                throw afe;
+                            }
+                            catch (Throwable t)
+                            {
+                                t.printStackTrace();
+                                throw new RuntimeException("enc :" + encoding + " bufferSize:" + bufferSize + " outputlen:" + outputLen);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    protected String makeRoughly(String srcStr, int roughSize)
+    {
+        StringBuffer sb = new StringBuffer(srcStr);
+        while (sb.length() < roughSize)
+        {
+            sb.append(srcStr);
+        }
+        return sb.toString();
+    }
+
+
+    protected void assertTheyAreEqual(Object expected, Object actual, String encoding)
+            throws UnsupportedEncodingException
+    {
+        String expectedStr = expected.toString();
+        String actualStr = actual.toString();
+        try
+        {
+
+            Assert.assertEquals(expectedStr, actualStr);
+        }
+        catch (AssertionFailedError afe)
+        {
+
+            System.out.println("___ " + encoding + "__________");
+
+            System.out.println("EL:|" + expectedStr.length() + "|");
+            System.out.println("AL:|" + actualStr.length() + "|");
+
+
+            System.out.println("E:|" + expectedStr + "|");
+            System.out.println("A:|" + actualStr + "|");
+
+            byte[] expectedBytes = expectedStr.getBytes(encoding);
+            byte[] actualBytes = actualStr.getBytes(encoding);
+
+            System.out.println("E:|" + toHex(expectedBytes) + "|");
+            System.out.println("A:|" + toHex(actualBytes) + "|");
+
+            throw afe;
+        }
+    }
+
+    String toHex(byte[] bytes)
+    {
+        StringBuffer sb = new StringBuffer();
+        for (int i = 0; i < bytes.length; i++)
+        {
+            sb.append(Integer.toHexString(bytes[i]).toUpperCase());
+        }
+        return sb.toString();
+    }
+
+
+}

src/test/com/opensymphony/webwork/util/FastByteArrayOutputStreamTestCase.java

+package com.opensymphony.webwork.util;
+
+import java.io.IOException;
+import javax.servlet.jsp.JspWriter;
+
+/**
+ * A test class for {@link webwork.util.FastByteArrayOutputStream}
+ *
+ * @author Brad Baker (Atlassian)
+ * @since $Date$ $Id$
+ */
+public class FastByteArrayOutputStreamTestCase extends EncodingTestCase
+{
+
+    public void testLatinCharsets() throws Exception
+    {
+        assertEncoding(ASCII_TEXT, LATIN);
+        assertEncoding(ASCII_TEXT, ASCII);
+    }
+
+    public void testRussianCharsets() throws Exception
+    {
+        assertEncoding(RUSSIAN_DESC_SHORT, KOI8_R);
+        assertEncoding(RUSSIAN_DESC1, KOI8_R);
+
+        assertEncoding(RUSSIAN_DESC_SHORT, WINDOWS_CYRILLIC);
+        assertEncoding(RUSSIAN_DESC1, WINDOWS_CYRILLIC);
+    }
+
+    public void testUnicodeCharsets() throws Exception
+    {
+        String[] testStrs = {ASCII_TEXT_SHORT, ASCII_TEXT, RUSSIAN_DESC_SHORT, RUSSIAN_DESC1, CHINESE_TIMETRACKING, HUNGRIAN_APPLET_PROBLEM, };
+        String[] encodings = { UTF_8, UTF_16, UBIG_ENDIAN, ULITTLE_ENDIAN, UBIG_ENDIAN_UNMARKED, ULITTLE_ENDIAN_UNMARKED };
+
+        assertEncodings(testStrs, encodings);
+    }
+
+    protected void implementEncodingTest(final String srcStr, final String encoding, final int bufferSize)
+            throws Exception
+    {
+        FastByteArrayOutputStream bout = new FastByteArrayOutputStream(bufferSize);
+
+        byte[] bytes = srcStr.getBytes(encoding);
+        bout.write(bytes);
+
+        JspWriter writer = new StringCapturingJspWriter();
+        bout.writeTo(writer, encoding);
+
+        String actualStr = writer.toString();
+        String expectedStr = new String(bytes, encoding);
+        assertTheyAreEqual(expectedStr, actualStr, encoding);
+    }
+
+
+    /**
+     * Before it was changed to use {@link java.nio.charset.CharsetDecoder} is took an this time
+     * <p/>
+     * Total Call Time = 1112.0ms
+     * Average Call Time = 0.001112ms
+     * <p/>
+     * Now with the change it takes this time
+     * <p/>
+     * <p/>
+     * The idea is that it did not get significantly worse in performance
+     *
+     * @throws IOException
+     */
+    public void testPerformanceOfWriteToJspWriter() throws IOException
+    {
+        final String NINEK_STR = makeRoughly(ASCII, 9 * K);
+        final String FIFTYK_STR = makeRoughly(ASCII, 50 * K);
+        final String ONEHUNDREDK_STR = makeRoughly(ASCII, 100 * K);
+
+        testPerformanceOfWriteToJspWriter(new TextSource()
+        {
+            public String getDesc()
+            {
+                return "With < than 8K of data";
+            }
+
+            public String getText(final int times)
+            {
+                return ASCII_TEXT;
+            }
+        });
+
+        testPerformanceOfWriteToJspWriter(new TextSource()
+        {
+            public String getDesc()
+            {
+                return "With > than 8K of data";
+            }
+
+            public String getText(final int times)
+            {
+                return NINEK_STR;
+            }
+        });
+
+        testPerformanceOfWriteToJspWriter(new TextSource()
+        {
+            public String getDesc()
+            {
+                return "With a 2/3 mix of small data and 1/3 > 8K of data";
+            }
+
+            public String getText(final int times)
+            {
+                if (times % 3 == 0)
+                {
+                    return NINEK_STR;
+                }
+                return ASCII_TEXT;
+            }
+        });
+
+        testPerformanceOfWriteToJspWriter(new TextSource()
+        {
+            public String getDesc()
+            {
+                return "With a 1/2 mix of small data and 1/2 > 8K of data";
+            }
+
+            public String getText(final int times)
+            {
+                if (times % 2 == 0)
+                {
+                    return NINEK_STR;
+                }
+                return ASCII_TEXT;
+            }
+        });
+
+        testPerformanceOfWriteToJspWriter(new TextSource()
+        {
+            public String getDesc()
+            {
+                return "With 50K of data";
+            }
+
+            public String getText(final int times)
+            {
+                return FIFTYK_STR;
+            }
+        });
+
+        testPerformanceOfWriteToJspWriter(new TextSource()
+        {
+            public String getDesc()
+            {
+                return "With 100K of data";
+            }
+
+            public String getText(final int times)
+            {
+                return ONEHUNDREDK_STR;
+            }
+        });
+
+
+    }
+
+    
+    public void testPerformanceOfWriteToJspWriter(TextSource textSource) throws IOException
+    {
+        NoopJspWriter noopJspWriter = new NoopJspWriter();
+        String[] methods = {
+                "writeTo (using hueristics)",
+                "writeToViaSmoosh",
+        };
+
+        System.out.println("====================");
+        System.out.println(textSource.getDesc());
+        System.out.println();
+        float bestTime = Float.MAX_VALUE;
+        String bestMethod = methods[0];
+        for (int methodIndex = 0; methodIndex < methods.length; methodIndex++)
+        {
+            String method = methods[methodIndex];
+
+            float totalTime = 0;
+            final int MAX_TIMES = 10;
+            final int MAX_ITERATIONS = 100;
+            for (int times = 0; times < MAX_TIMES; times++)
+            {
+                String srcText = textSource.getText(times);
+                for (int i = 0; i < MAX_ITERATIONS; i++)
+                {
+                    FastByteArrayOutputStream bout = new FastByteArrayOutputStream();
+                    bout.write(srcText.getBytes(UTF_8));
+
+                    // just time the JspWriter output. And let it warm u first as well
+                    if (times > 3)
+                    {
+                        long then = System.currentTimeMillis();
+                        switch (methodIndex)
+                        {
+                            case 0:
+                                bout.writeTo(noopJspWriter, UTF_8);
+                                break;
+                            case 1:
+                                bout.writeToViaSmoosh(noopJspWriter, UTF_8);
+                                break;
+                        }
+                        long now = System.currentTimeMillis();
+                        totalTime += (now - then);
+                    }
+                }
+            }
+            float avgTime = totalTime / MAX_TIMES / MAX_ITERATIONS;
+            System.out.println(method + "  - Total Call Time = " + totalTime + "ms");
+            System.out.println(method + " - Average Call Time = " + avgTime + "ms");
+            System.out.println();
+
+            if (avgTime < bestTime) {
+                bestTime = avgTime;
+                bestMethod = method;
+            }
+        }
+        System.out.println(bestMethod + " was the best method - Average Call Time = " + bestTime + "ms");
+        System.out.println("____________________\n");
+
+    }
+
+    interface TextSource
+    {
+        String getDesc();
+
+        String getText(int times);
+    }
+
+    static class StringCapturingJspWriter extends NoopJspWriter
+    {
+        StringCapturingJspWriter()
+        {
+            super(true);
+        }
+    }
+
+
+    static class NoopJspWriter extends JspWriter
+    {
+        final StringBuffer sb = new StringBuffer();
+        final boolean capture;
+
+        NoopJspWriter()
+        {
+            this(false);
+        }
+
+        NoopJspWriter(boolean capture)
+        {
+            super(0, false);
+            this.capture = capture;
+        }
+
+        NoopJspWriter(final int i, final boolean b)
+        {
+            super(i, b);
+            this.capture = false;
+        }
+
+        public String toString()
+        {
+            return sb.toString();
+        }
+
+        public void clear() throws IOException
+        {
+        }
+
+        public void clearBuffer() throws IOException
+        {
+        }
+
+        public void close() throws IOException
+        {
+        }
+
+        public void flush() throws IOException
+        {
+        }
+
+        public int getRemaining()
+        {
+            return 0;
+        }
+
+        public void newLine() throws IOException
+        {
+        }
+
+        public void print(final char c) throws IOException
+        {
+            if (capture)
+            {
+                sb.append(c);
+            }
+        }
+
+        public void print(final double v) throws IOException
+        {
+            if (capture)
+            {
+                sb.append(v);
+            }
+        }
+
+        public void print(final float v) throws IOException
+        {
+            if (capture)
+            {
+                sb.append(v);
+            }
+        }
+
+        public void print(final int i) throws IOException
+        {
+            if (capture)
+            {
+                sb.append(i);
+            }
+        }
+
+        public void print(final long l) throws IOException
+        {
+            if (capture)
+            {
+                sb.append(l);
+            }
+        }
+
+        public void print(final Object o) throws IOException
+        {
+            if (capture)
+            {
+                sb.append(o);
+            }
+        }
+
+        public void print(final String s) throws IOException
+        {
+            if (capture)
+            {
+                sb.append(s);
+            }
+        }
+
+        public void print(final boolean b) throws IOException
+        {
+            if (capture)
+            {
+                sb.append(b);
+            }
+        }
+
+        public void print(final char[] chars) throws IOException
+        {
+            if (capture)
+            {
+                sb.append(chars);
+            }
+        }
+
+        public void println() throws IOException
+        {
+            print('\n');
+        }
+
+        public void println(final char c) throws IOException
+        {
+            print(c);
+            println();
+        }
+
+        public void println(final double v) throws IOException
+        {
+            print(v);
+            println();
+        }
+
+        public void println(final float v) throws IOException
+        {
+            print(v);
+            println();
+        }
+
+        public void println(final int i) throws IOException
+        {
+            print(i);
+            println();
+        }
+
+        public void println(final long l) throws IOException
+        {
+            print(l);
+            println();
+        }
+
+        public void println(final Object o) throws IOException
+        {
+            print(o);
+            println();
+        }
+
+        public void println(final String s) throws IOException
+        {
+            print(s);
+            println();
+        }
+
+        public void println(final boolean b) throws IOException
+        {
+            print(b);
+            println();
+        }
+
+        public void println(final char[] chars) throws IOException
+        {
+            print(chars);
+            println();
+        }
+
+        public void write(final char cbuf[], final int off, final int len) throws IOException
+        {
+            String s = new String(cbuf, off, len);
+            print(s);
+        }
+    }
+}