Skip to content

Commit

Permalink
8339699: Optimize DataOutputStream writeUTF
Browse files Browse the repository at this point in the history
Reviewed-by: liach, bpb
  • Loading branch information
wenshao committed Oct 4, 2024
1 parent 5592894 commit b42fbf4
Show file tree
Hide file tree
Showing 5 changed files with 252 additions and 182 deletions.
42 changes: 16 additions & 26 deletions src/java.base/share/classes/java/io/DataOutputStream.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand All @@ -25,8 +26,13 @@

package java.io;

import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets;
import jdk.internal.util.ByteArray;

import static jdk.internal.util.ModifiedUtf.putChar;
import static jdk.internal.util.ModifiedUtf.utfLen;

/**
* A data output stream lets an application write primitive Java data
* types to an output stream in a portable way. An application can
Expand All @@ -44,6 +50,8 @@
* @since 1.0
*/
public class DataOutputStream extends FilterOutputStream implements DataOutput {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();

/**
* The number of bytes written to the data output stream so far.
* If this counter overflows, it will be wrapped to Integer.MAX_VALUE.
Expand Down Expand Up @@ -352,15 +360,11 @@ public final void writeUTF(String str) throws IOException {
* {@code str} would exceed 65535 bytes in length
* @throws IOException if some other I/O error occurs.
*/
@SuppressWarnings("deprecation")
static int writeUTF(String str, DataOutput out) throws IOException {
final int strlen = str.length();
int utflen = strlen; // optimized for ASCII

for (int i = 0; i < strlen; i++) {
int c = str.charAt(i);
if (c >= 0x80 || c == 0)
utflen += (c >= 0x800) ? 2 : 1;
}
int countNonZeroAscii = JLA.countNonZeroAscii(str);
int utflen = utfLen(str, countNonZeroAscii);

if (utflen > 65535 || /* overflow */ utflen < strlen)
throw new UTFDataFormatException(tooLongMsg(str, utflen));
Expand All @@ -377,25 +381,11 @@ static int writeUTF(String str, DataOutput out) throws IOException {
int count = 0;
ByteArray.setUnsignedShort(bytearr, count, utflen);
count += 2;
int i = 0;
for (i = 0; i < strlen; i++) { // optimized for initial run of ASCII
int c = str.charAt(i);
if (c >= 0x80 || c == 0) break;
bytearr[count++] = (byte) c;
}
str.getBytes(0, countNonZeroAscii, bytearr, count);
count += countNonZeroAscii;

for (; i < strlen; i++) {
int c = str.charAt(i);
if (c < 0x80 && c != 0) {
bytearr[count++] = (byte) c;
} else if (c >= 0x800) {
bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
} else {
bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
}
for (int i = countNonZeroAscii; i < strlen;) {
count = putChar(bytearr, count, str.charAt(i++));
}
out.write(bytearr, 0, utflen + 2);
return utflen + 2;
Expand Down
202 changes: 68 additions & 134 deletions src/java.base/share/classes/java/io/ObjectOutputStream.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright (c) 1996, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -34,8 +35,13 @@
import java.util.StringJoiner;

import jdk.internal.util.ByteArray;
import jdk.internal.access.JavaLangAccess;
import jdk.internal.access.SharedSecrets;
import sun.reflect.misc.ReflectUtil;

import static jdk.internal.util.ModifiedUtf.putChar;
import static jdk.internal.util.ModifiedUtf.utfLen;

/**
* An ObjectOutputStream writes primitive data types and graphs of Java objects
* to an OutputStream. The objects can be read (reconstituted) using an
Expand Down Expand Up @@ -169,6 +175,7 @@
public class ObjectOutputStream
extends OutputStream implements ObjectOutput, ObjectStreamConstants
{
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();

private static class Caches {
/** cache of subclass security audit results */
Expand Down Expand Up @@ -885,7 +892,7 @@ public void writeChars(String str) throws IOException {
* stream
*/
public void writeUTF(String str) throws IOException {
bout.writeUTF(str);
bout.writeUTFInternal(str, false);
}

/**
Expand Down Expand Up @@ -1317,14 +1324,7 @@ private void writeNonProxyDesc(ObjectStreamClass desc, boolean unshared)
*/
private void writeString(String str, boolean unshared) throws IOException {
handles.assign(unshared ? null : str);
long utflen = bout.getUTFLength(str);
if (utflen <= 0xFFFF) {
bout.writeByte(TC_STRING);
bout.writeUTF(str, utflen);
} else {
bout.writeByte(TC_LONGSTRING);
bout.writeLongUTF(str, utflen);
}
bout.writeUTFInternal(str, true);
}

/**
Expand Down Expand Up @@ -1994,26 +1994,27 @@ public void writeDouble(double v) throws IOException {
}
}

public void writeBytes(String s) throws IOException {
int endoff = s.length();
int cpos = 0;
int csize = 0;
for (int off = 0; off < endoff; ) {
if (cpos >= csize) {
cpos = 0;
csize = Math.min(endoff - off, CHAR_BUF_SIZE);
s.getChars(off, off + csize, cbuf, 0);
}
if (pos >= MAX_BLOCK_SIZE) {
@SuppressWarnings("deprecation")
void writeBytes(String s, int len) throws IOException {
int pos = this.pos;
for (int strpos = 0; strpos < len;) {
int rem = MAX_BLOCK_SIZE - pos;
int csize = Math.min(len - strpos, rem);
s.getBytes(strpos, strpos + csize, buf, pos);
pos += csize;
strpos += csize;

if (pos == MAX_BLOCK_SIZE) {
this.pos = pos;
drain();
pos = 0;
}
int n = Math.min(csize - cpos, MAX_BLOCK_SIZE - pos);
int stop = pos + n;
while (pos < stop) {
buf[pos++] = (byte) cbuf[cpos++];
}
off += n;
}
this.pos = pos;
}

public void writeBytes(String s) throws IOException {
writeBytes(s, s.length());
}

public void writeChars(String s) throws IOException {
Expand All @@ -2026,8 +2027,47 @@ public void writeChars(String s) throws IOException {
}
}

public void writeUTF(String s) throws IOException {
writeUTF(s, getUTFLength(s));
public void writeUTF(String str) throws IOException {
writeUTFInternal(str, false);
}

private void writeUTFInternal(String str, boolean writeHeader) throws IOException {
int strlen = str.length();
int countNonZeroAscii = JLA.countNonZeroAscii(str);
int utflen = utfLen(str, countNonZeroAscii);
if (utflen <= 0xFFFF) {
if(writeHeader) {
writeByte(TC_STRING);
}
writeShort(utflen);
} else {
if(writeHeader) {
writeByte(TC_LONGSTRING);
}
writeLong(utflen);
}

if (countNonZeroAscii != 0) {
writeBytes(str, countNonZeroAscii);
}
if (countNonZeroAscii != strlen) {
writeMoreUTF(str, countNonZeroAscii);
}
}

private void writeMoreUTF(String str, int stroff) throws IOException {
int pos = this.pos;
for (int strlen = str.length(); stroff < strlen;) {
char c = str.charAt(stroff++);
int csize = c != 0 && c < 0x80 ? 1 : c >= 0x800 ? 3 : 2;
if (pos + csize >= MAX_BLOCK_SIZE) {
this.pos = pos;
drain();
pos = 0;
}
pos = putChar(buf, pos, c);
}
this.pos = pos;
}


Expand Down Expand Up @@ -2153,112 +2193,6 @@ void writeDoubles(double[] v, int off, int len) throws IOException {
}
}
}

/**
* Returns the length in bytes of the UTF encoding of the given string.
*/
long getUTFLength(String s) {
int len = s.length();
long utflen = 0;
for (int off = 0; off < len; ) {
int csize = Math.min(len - off, CHAR_BUF_SIZE);
s.getChars(off, off + csize, cbuf, 0);
for (int cpos = 0; cpos < csize; cpos++) {
char c = cbuf[cpos];
if (c >= 0x0001 && c <= 0x007F) {
utflen++;
} else if (c > 0x07FF) {
utflen += 3;
} else {
utflen += 2;
}
}
off += csize;
}
return utflen;
}

/**
* Writes the given string in UTF format. This method is used in
* situations where the UTF encoding length of the string is already
* known; specifying it explicitly avoids a prescan of the string to
* determine its UTF length.
*/
void writeUTF(String s, long utflen) throws IOException {
if (utflen > 0xFFFFL) {
throw new UTFDataFormatException();
}
writeShort((int) utflen);
if (utflen == (long) s.length()) {
writeBytes(s);
} else {
writeUTFBody(s);
}
}

/**
* Writes given string in "long" UTF format. "Long" UTF format is
* identical to standard UTF, except that it uses an 8 byte header
* (instead of the standard 2 bytes) to convey the UTF encoding length.
*/
void writeLongUTF(String s) throws IOException {
writeLongUTF(s, getUTFLength(s));
}

/**
* Writes given string in "long" UTF format, where the UTF encoding
* length of the string is already known.
*/
void writeLongUTF(String s, long utflen) throws IOException {
writeLong(utflen);
if (utflen == (long) s.length()) {
writeBytes(s);
} else {
writeUTFBody(s);
}
}

/**
* Writes the "body" (i.e., the UTF representation minus the 2-byte or
* 8-byte length header) of the UTF encoding for the given string.
*/
private void writeUTFBody(String s) throws IOException {
int limit = MAX_BLOCK_SIZE - 3;
int len = s.length();
for (int off = 0; off < len; ) {
int csize = Math.min(len - off, CHAR_BUF_SIZE);
s.getChars(off, off + csize, cbuf, 0);
for (int cpos = 0; cpos < csize; cpos++) {
char c = cbuf[cpos];
if (pos <= limit) {
if (c <= 0x007F && c != 0) {
buf[pos++] = (byte) c;
} else if (c > 0x07FF) {
buf[pos + 2] = (byte) (0x80 | ((c >> 0) & 0x3F));
buf[pos + 1] = (byte) (0x80 | ((c >> 6) & 0x3F));
buf[pos + 0] = (byte) (0xE0 | ((c >> 12) & 0x0F));
pos += 3;
} else {
buf[pos + 1] = (byte) (0x80 | ((c >> 0) & 0x3F));
buf[pos + 0] = (byte) (0xC0 | ((c >> 6) & 0x1F));
pos += 2;
}
} else { // write one byte at a time to normalize block
if (c <= 0x007F && c != 0) {
write(c);
} else if (c > 0x07FF) {
write(0xE0 | ((c >> 12) & 0x0F));
write(0x80 | ((c >> 6) & 0x3F));
write(0x80 | ((c >> 0) & 0x3F));
} else {
write(0xC0 | ((c >> 6) & 0x1F));
write(0x80 | ((c >> 0) & 0x3F));
}
}
}
off += csize;
}
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
import jdk.internal.access.SharedSecrets;
import jdk.internal.vm.annotation.ForceInline;

import static jdk.internal.util.ModifiedUtf.putChar;
import static jdk.internal.util.ModifiedUtf.utfLen;

public final class BufWriterImpl implements BufWriter {
private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();

Expand Down Expand Up @@ -162,14 +165,7 @@ public void writeBytes(BufWriterImpl other) {
void writeUTF(String str) {
int strlen = str.length();
int countNonZeroAscii = JLA.countNonZeroAscii(str);
int utflen = strlen;
if (countNonZeroAscii != strlen) {
for (int i = countNonZeroAscii; i < strlen; i++) {
int c = str.charAt(i);
if (c >= 0x80 || c == 0)
utflen += (c >= 0x800) ? 2 : 1;
}
}
int utflen = utfLen(str, countNonZeroAscii);
if (utflen > 65535) {
throw new IllegalArgumentException("string too long");
}
Expand All @@ -185,20 +181,8 @@ void writeUTF(String str) {
str.getBytes(0, countNonZeroAscii, elems, offset);
offset += countNonZeroAscii;

for (int i = countNonZeroAscii; i < strlen; ++i) {
char c = str.charAt(i);
if (c >= '\001' && c <= '\177') {
elems[offset++] = (byte) c;
} else if (c > '\u07FF') {
elems[offset ] = (byte) (0xE0 | c >> 12 & 0xF);
elems[offset + 1] = (byte) (0x80 | c >> 6 & 0x3F);
elems[offset + 2] = (byte) (0x80 | c & 0x3F);
offset += 3;
} else {
elems[offset ] = (byte) (0xC0 | c >> 6 & 0x1F);
elems[offset + 1] = (byte) (0x80 | c & 0x3F);
offset += 2;
}
for (int i = countNonZeroAscii; i < strlen; i++) {
offset = putChar(elems, offset, str.charAt(i));
}

this.offset = offset;
Expand Down
Loading

0 comments on commit b42fbf4

Please sign in to comment.