8339699: Optimize DataOutputStream writeUTF

Reviewed-by: liach, bpb
wangweij · Oct 4, 2024 · b42fbf4 · b42fbf4
1 parent 5592894
commit b42fbf4
Show file tree

Hide file tree

Showing 5 changed files with 252 additions and 182 deletions.
diff --git a/src/java.base/share/classes/java/io/DataOutputStream.java b/src/java.base/share/classes/java/io/DataOutputStream.java
@@ -1,5 +1,6 @@
 /*
- * Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,8 +26,13 @@
 
 package java.io;
 
+import jdk.internal.access.JavaLangAccess;
+import jdk.internal.access.SharedSecrets;
 import jdk.internal.util.ByteArray;
 
+import static jdk.internal.util.ModifiedUtf.putChar;
+import static jdk.internal.util.ModifiedUtf.utfLen;
+
 /**
  * A data output stream lets an application write primitive Java data
  * types to an output stream in a portable way. An application can
@@ -44,6 +50,8 @@
  * @since   1.0
  */
 public class DataOutputStream extends FilterOutputStream implements DataOutput {
+    private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
+
     /**
      * The number of bytes written to the data output stream so far.
      * If this counter overflows, it will be wrapped to Integer.MAX_VALUE.
@@ -352,15 +360,11 @@ public final void writeUTF(String str) throws IOException {
      *             {@code str} would exceed 65535 bytes in length
      * @throws     IOException  if some other I/O error occurs.
      */
+    @SuppressWarnings("deprecation")
     static int writeUTF(String str, DataOutput out) throws IOException {
         final int strlen = str.length();
-        int utflen = strlen; // optimized for ASCII
-
-        for (int i = 0; i < strlen; i++) {
-            int c = str.charAt(i);
-            if (c >= 0x80 || c == 0)
-                utflen += (c >= 0x800) ? 2 : 1;
-        }
+        int countNonZeroAscii = JLA.countNonZeroAscii(str);
+        int utflen = utfLen(str, countNonZeroAscii);
 
         if (utflen > 65535 || /* overflow */ utflen < strlen)
             throw new UTFDataFormatException(tooLongMsg(str, utflen));
@@ -377,25 +381,11 @@ static int writeUTF(String str, DataOutput out) throws IOException {
         int count = 0;
         ByteArray.setUnsignedShort(bytearr, count, utflen);
         count += 2;
-        int i = 0;
-        for (i = 0; i < strlen; i++) { // optimized for initial run of ASCII
-            int c = str.charAt(i);
-            if (c >= 0x80 || c == 0) break;
-            bytearr[count++] = (byte) c;
-        }
+        str.getBytes(0, countNonZeroAscii, bytearr, count);
+        count += countNonZeroAscii;
 
-        for (; i < strlen; i++) {
-            int c = str.charAt(i);
-            if (c < 0x80 && c != 0) {
-                bytearr[count++] = (byte) c;
-            } else if (c >= 0x800) {
-                bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
-                bytearr[count++] = (byte) (0x80 | ((c >>  6) & 0x3F));
-                bytearr[count++] = (byte) (0x80 | ((c >>  0) & 0x3F));
-            } else {
-                bytearr[count++] = (byte) (0xC0 | ((c >>  6) & 0x1F));
-                bytearr[count++] = (byte) (0x80 | ((c >>  0) & 0x3F));
-            }
+        for (int i = countNonZeroAscii; i < strlen;) {
+            count = putChar(bytearr, count, str.charAt(i++));
         }
         out.write(bytearr, 0, utflen + 2);
         return utflen + 2;

diff --git a/src/java.base/share/classes/java/io/ObjectOutputStream.java b/src/java.base/share/classes/java/io/ObjectOutputStream.java
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 1996, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,8 +35,13 @@
 import java.util.StringJoiner;
 
 import jdk.internal.util.ByteArray;
+import jdk.internal.access.JavaLangAccess;
+import jdk.internal.access.SharedSecrets;
 import sun.reflect.misc.ReflectUtil;
 
+import static jdk.internal.util.ModifiedUtf.putChar;
+import static jdk.internal.util.ModifiedUtf.utfLen;
+
 /**
  * An ObjectOutputStream writes primitive data types and graphs of Java objects
  * to an OutputStream.  The objects can be read (reconstituted) using an
@@ -169,6 +175,7 @@
 public class ObjectOutputStream
     extends OutputStream implements ObjectOutput, ObjectStreamConstants
 {
+    private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
 
     private static class Caches {
         /** cache of subclass security audit results */
@@ -885,7 +892,7 @@ public void writeChars(String str) throws IOException {
      *          stream
      */
     public void writeUTF(String str) throws IOException {
-        bout.writeUTF(str);
+        bout.writeUTFInternal(str, false);
     }
 
     /**
@@ -1317,14 +1324,7 @@ private void writeNonProxyDesc(ObjectStreamClass desc, boolean unshared)
      */
     private void writeString(String str, boolean unshared) throws IOException {
         handles.assign(unshared ? null : str);
-        long utflen = bout.getUTFLength(str);
-        if (utflen <= 0xFFFF) {
-            bout.writeByte(TC_STRING);
-            bout.writeUTF(str, utflen);
-        } else {
-            bout.writeByte(TC_LONGSTRING);
-            bout.writeLongUTF(str, utflen);
-        }
+        bout.writeUTFInternal(str, true);
     }
 
     /**
@@ -1994,26 +1994,27 @@ public void writeDouble(double v) throws IOException {
             }
         }
 
-        public void writeBytes(String s) throws IOException {
-            int endoff = s.length();
-            int cpos = 0;
-            int csize = 0;
-            for (int off = 0; off < endoff; ) {
-                if (cpos >= csize) {
-                    cpos = 0;
-                    csize = Math.min(endoff - off, CHAR_BUF_SIZE);
-                    s.getChars(off, off + csize, cbuf, 0);
-                }
-                if (pos >= MAX_BLOCK_SIZE) {
+        @SuppressWarnings("deprecation")
+        void writeBytes(String s, int len) throws IOException {
+            int pos = this.pos;
+            for (int strpos = 0; strpos < len;) {
+                int rem = MAX_BLOCK_SIZE - pos;
+                int csize = Math.min(len - strpos, rem);
+                s.getBytes(strpos, strpos + csize, buf, pos);
+                pos += csize;
+                strpos += csize;
+
+                if (pos == MAX_BLOCK_SIZE) {
+                    this.pos = pos;
                     drain();
+                    pos = 0;
                 }
-                int n = Math.min(csize - cpos, MAX_BLOCK_SIZE - pos);
-                int stop = pos + n;
-                while (pos < stop) {
-                    buf[pos++] = (byte) cbuf[cpos++];
-                }
-                off += n;
             }
+            this.pos = pos;
+        }
+
+        public void writeBytes(String s) throws IOException {
+            writeBytes(s, s.length());
         }
 
         public void writeChars(String s) throws IOException {
@@ -2026,8 +2027,47 @@ public void writeChars(String s) throws IOException {
             }
         }
 
-        public void writeUTF(String s) throws IOException {
-            writeUTF(s, getUTFLength(s));
+        public void writeUTF(String str) throws IOException {
+            writeUTFInternal(str, false);
+        }
+
+        private void writeUTFInternal(String str, boolean writeHeader) throws IOException {
+            int strlen = str.length();
+            int countNonZeroAscii = JLA.countNonZeroAscii(str);
+            int utflen = utfLen(str, countNonZeroAscii);
+            if (utflen <= 0xFFFF) {
+                if(writeHeader) {
+                    writeByte(TC_STRING);
+                }
+                writeShort(utflen);
+            } else {
+                if(writeHeader) {
+                    writeByte(TC_LONGSTRING);
+                }
+                writeLong(utflen);
+            }
+
+            if (countNonZeroAscii != 0) {
+                writeBytes(str, countNonZeroAscii);
+            }
+            if (countNonZeroAscii != strlen) {
+                writeMoreUTF(str, countNonZeroAscii);
+            }
+        }
+
+        private void writeMoreUTF(String str, int stroff) throws IOException {
+            int pos = this.pos;
+            for (int strlen = str.length(); stroff < strlen;) {
+                char c = str.charAt(stroff++);
+                int csize = c != 0 && c < 0x80 ? 1 : c >= 0x800 ? 3 : 2;
+                if (pos + csize >= MAX_BLOCK_SIZE) {
+                    this.pos = pos;
+                    drain();
+                    pos = 0;
+                }
+                pos = putChar(buf, pos, c);
+            }
+            this.pos = pos;
         }
 
 
@@ -2153,112 +2193,6 @@ void writeDoubles(double[] v, int off, int len) throws IOException {
                 }
             }
         }
-
-        /**
-         * Returns the length in bytes of the UTF encoding of the given string.
-         */
-        long getUTFLength(String s) {
-            int len = s.length();
-            long utflen = 0;
-            for (int off = 0; off < len; ) {
-                int csize = Math.min(len - off, CHAR_BUF_SIZE);
-                s.getChars(off, off + csize, cbuf, 0);
-                for (int cpos = 0; cpos < csize; cpos++) {
-                    char c = cbuf[cpos];
-                    if (c >= 0x0001 && c <= 0x007F) {
-                        utflen++;
-                    } else if (c > 0x07FF) {
-                        utflen += 3;
-                    } else {
-                        utflen += 2;
-                    }
-                }
-                off += csize;
-            }
-            return utflen;
-        }
-
-        /**
-         * Writes the given string in UTF format.  This method is used in
-         * situations where the UTF encoding length of the string is already
-         * known; specifying it explicitly avoids a prescan of the string to
-         * determine its UTF length.
-         */
-        void writeUTF(String s, long utflen) throws IOException {
-            if (utflen > 0xFFFFL) {
-                throw new UTFDataFormatException();
-            }
-            writeShort((int) utflen);
-            if (utflen == (long) s.length()) {
-                writeBytes(s);
-            } else {
-                writeUTFBody(s);
-            }
-        }
-
-        /**
-         * Writes given string in "long" UTF format.  "Long" UTF format is
-         * identical to standard UTF, except that it uses an 8 byte header
-         * (instead of the standard 2 bytes) to convey the UTF encoding length.
-         */
-        void writeLongUTF(String s) throws IOException {
-            writeLongUTF(s, getUTFLength(s));
-        }
-
-        /**
-         * Writes given string in "long" UTF format, where the UTF encoding
-         * length of the string is already known.
-         */
-        void writeLongUTF(String s, long utflen) throws IOException {
-            writeLong(utflen);
-            if (utflen == (long) s.length()) {
-                writeBytes(s);
-            } else {
-                writeUTFBody(s);
-            }
-        }
-
-        /**
-         * Writes the "body" (i.e., the UTF representation minus the 2-byte or
-         * 8-byte length header) of the UTF encoding for the given string.
-         */
-        private void writeUTFBody(String s) throws IOException {
-            int limit = MAX_BLOCK_SIZE - 3;
-            int len = s.length();
-            for (int off = 0; off < len; ) {
-                int csize = Math.min(len - off, CHAR_BUF_SIZE);
-                s.getChars(off, off + csize, cbuf, 0);
-                for (int cpos = 0; cpos < csize; cpos++) {
-                    char c = cbuf[cpos];
-                    if (pos <= limit) {
-                        if (c <= 0x007F && c != 0) {
-                            buf[pos++] = (byte) c;
-                        } else if (c > 0x07FF) {
-                            buf[pos + 2] = (byte) (0x80 | ((c >> 0) & 0x3F));
-                            buf[pos + 1] = (byte) (0x80 | ((c >> 6) & 0x3F));
-                            buf[pos + 0] = (byte) (0xE0 | ((c >> 12) & 0x0F));
-                            pos += 3;
-                        } else {
-                            buf[pos + 1] = (byte) (0x80 | ((c >> 0) & 0x3F));
-                            buf[pos + 0] = (byte) (0xC0 | ((c >> 6) & 0x1F));
-                            pos += 2;
-                        }
-                    } else {    // write one byte at a time to normalize block
-                        if (c <= 0x007F && c != 0) {
-                            write(c);
-                        } else if (c > 0x07FF) {
-                            write(0xE0 | ((c >> 12) & 0x0F));
-                            write(0x80 | ((c >> 6) & 0x3F));
-                            write(0x80 | ((c >> 0) & 0x3F));
-                        } else {
-                            write(0xC0 | ((c >> 6) & 0x1F));
-                            write(0x80 | ((c >> 0) & 0x3F));
-                        }
-                    }
-                }
-                off += csize;
-            }
-        }
     }
 
     /**

diff --git a/src/java.base/share/classes/jdk/internal/classfile/impl/BufWriterImpl.java b/src/java.base/share/classes/jdk/internal/classfile/impl/BufWriterImpl.java
@@ -38,6 +38,9 @@
 import jdk.internal.access.SharedSecrets;
 import jdk.internal.vm.annotation.ForceInline;
 
+import static jdk.internal.util.ModifiedUtf.putChar;
+import static jdk.internal.util.ModifiedUtf.utfLen;
+
 public final class BufWriterImpl implements BufWriter {
     private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
 
@@ -162,14 +165,7 @@ public void writeBytes(BufWriterImpl other) {
     void writeUTF(String str) {
         int strlen = str.length();
         int countNonZeroAscii = JLA.countNonZeroAscii(str);
-        int utflen = strlen;
-        if (countNonZeroAscii != strlen) {
-            for (int i = countNonZeroAscii; i < strlen; i++) {
-                int c = str.charAt(i);
-                if (c >= 0x80 || c == 0)
-                    utflen += (c >= 0x800) ? 2 : 1;
-            }
-        }
+        int utflen = utfLen(str, countNonZeroAscii);
         if (utflen > 65535) {
             throw new IllegalArgumentException("string too long");
         }
@@ -185,20 +181,8 @@ void writeUTF(String str) {
         str.getBytes(0, countNonZeroAscii, elems, offset);
         offset += countNonZeroAscii;
 
-        for (int i = countNonZeroAscii; i < strlen; ++i) {
-            char c = str.charAt(i);
-            if (c >= '\001' && c <= '\177') {
-                elems[offset++] = (byte) c;
-            } else if (c > '\u07FF') {
-                elems[offset    ] = (byte) (0xE0 | c >> 12 & 0xF);
-                elems[offset + 1] = (byte) (0x80 | c >> 6 & 0x3F);
-                elems[offset + 2] = (byte) (0x80 | c      & 0x3F);
-                offset += 3;
-            } else {
-                elems[offset    ] = (byte) (0xC0 | c >> 6 & 0x1F);
-                elems[offset + 1] = (byte) (0x80 | c      & 0x3F);
-                offset += 2;
-            }
+        for (int i = countNonZeroAscii; i < strlen; i++) {
+            offset = putChar(elems, offset, str.charAt(i));
         }
 
         this.offset = offset;