Skip to content

Commit

Permalink
[JOHNZON-403] improved long string perfomance (#121)
Browse files Browse the repository at this point in the history
* prototype improved large string parsing

* fix regression, fix build

* properly close JsonReader in test
  • Loading branch information
jungm authored Feb 9, 2024
1 parent 9cafc42 commit c7afc7b
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
import java.io.Reader;
import java.math.BigDecimal;
import java.nio.charset.Charset;
import java.util.LinkedList;
import java.util.List;
import java.util.NoSuchElementException;

//This class represents either the Json tokenizer and the Json parser.
Expand Down Expand Up @@ -74,6 +76,8 @@ public class JsonStreamParserImpl extends JohnzonJsonParserImpl implements JsonC
private char[] fallBackCopyBuffer;
private boolean releaseFallBackCopyBufferLength = true;
private int fallBackCopyBufferLength;
// when boundaries of fallBackCopyBuffer have been reached
private List<Buffer> previousFallBackCopyBuffers;

// location (line, column, offset)
// We try to calculate this efficiently so we do not just increment the values per char read
Expand Down Expand Up @@ -115,6 +119,16 @@ private static final class StructureElement {
}
}

private static final class Buffer {
private char[] buffer;
private int length;

public Buffer(char[] buffer, int length) {
this.buffer = buffer;
this.length = length;
}
}

//detect charset according to RFC 4627
public JsonStreamParserImpl(final InputStream inputStream, final int maxStringLength,
final BufferStrategy.BufferProvider<char[]> bufferProvider, final BufferStrategy.BufferProvider<char[]> valueBuffer,
Expand Down Expand Up @@ -165,7 +179,7 @@ private JsonStreamParserImpl(final InputStream inputStream, final Reader reader,
//append a single char to the value buffer
private void appendToCopyBuffer(final char c) {
if (fallBackCopyBufferLength >= fallBackCopyBuffer.length - 1) {
doAutoAdjust(1);
createNewFallBackCopyBuffer();
}
fallBackCopyBuffer[fallBackCopyBufferLength++] = c;
}
Expand All @@ -180,40 +194,39 @@ private void copyCurrentValue() {
}

if (fallBackCopyBufferLength >= fallBackCopyBuffer.length - length) { // not good at runtime but handled
doAutoAdjust(length);
} else {
System.arraycopy(buffer, startOfValueInBuffer, fallBackCopyBuffer, fallBackCopyBufferLength, length);
createNewFallBackCopyBuffer();
}

System.arraycopy(buffer, startOfValueInBuffer, fallBackCopyBuffer, fallBackCopyBufferLength, length);
fallBackCopyBufferLength += length;
}

startOfValueInBuffer = endOfValueInBuffer = -1;
}

private void doAutoAdjust(final int length) {
// Creates new fallBackCopyBuffer and stores the old instance in previousFallBackCopyBuffers,
// this is much faster than resizing (recreating + copying) fallBackCopyBuffer
private void createNewFallBackCopyBuffer() {
if (!autoAdjust) {
throw new ArrayIndexOutOfBoundsException("Buffer too small for such a long string");
}

final char[] newArray = new char[fallBackCopyBuffer.length + Math.max(getBufferExtends(fallBackCopyBuffer.length), length)];
// TODO: log to adjust size once?
System.arraycopy(fallBackCopyBuffer, 0, newArray, 0, fallBackCopyBufferLength);
if (startOfValueInBuffer != -1) {
System.arraycopy(buffer, startOfValueInBuffer, newArray, fallBackCopyBufferLength, length);
if (previousFallBackCopyBuffers == null) {
previousFallBackCopyBuffers = new LinkedList<>();
}
if (releaseFallBackCopyBufferLength) {
bufferProvider.release(fallBackCopyBuffer);
releaseFallBackCopyBufferLength = false;
}
fallBackCopyBuffer = newArray;

previousFallBackCopyBuffers.add(new Buffer(fallBackCopyBuffer, fallBackCopyBufferLength));
fallBackCopyBuffer = valueProvider.newBuffer();
fallBackCopyBufferLength = 0;
}

/**
* @param currentLength length of the buffer
* @return the amount of bytes the current buffer should get extended with
*/
protected int getBufferExtends(int currentLength) {
return currentLength / 4;
private void releasePreviousFallBackCopyBuffers() {
if (previousFallBackCopyBuffers == null) {
return;
}

previousFallBackCopyBuffers.forEach(it -> valueProvider.release(it.buffer));
previousFallBackCopyBuffers = null;
}


Expand Down Expand Up @@ -443,6 +456,7 @@ protected final Event internalNext() {
currentIntegralNumber = Integer.MIN_VALUE;
}

releasePreviousFallBackCopyBuffers();
if (fallBackCopyBufferLength != 0) {
fallBackCopyBufferLength = 0;
}
Expand Down Expand Up @@ -898,6 +912,7 @@ private Event handleLiteral() {
@Override
public String getString() {
if (previousEvent == KEY_NAME || previousEvent == VALUE_STRING || previousEvent == VALUE_NUMBER) {
combinePreviousFallbackBuffersToCurrent();

//if there a content in the value buffer read from them, if not use main buffer
return fallBackCopyBufferLength > 0 ? new String(fallBackCopyBuffer, 0, fallBackCopyBufferLength) : new String(buffer,
Expand All @@ -907,6 +922,30 @@ public String getString() {
}
}

// Combines all old stored fallback buffers into the current fallback buffer again so we have a char[] to easily access
// Releases all previous fallback buffers while doing so
private void combinePreviousFallbackBuffersToCurrent() {
if (previousFallBackCopyBuffers == null) {
return;
}

int newSize = previousFallBackCopyBuffers.stream().mapToInt(it -> it.length).sum() + fallBackCopyBufferLength;
char[] newBuffer = new char[newSize];

int index = 0;
for (Buffer buffer : previousFallBackCopyBuffers) {
System.arraycopy(buffer.buffer, 0, newBuffer, index, buffer.length);
index += buffer.length;
}

System.arraycopy(fallBackCopyBuffer, 0, newBuffer, index, fallBackCopyBufferLength);
index += fallBackCopyBufferLength;

releasePreviousFallBackCopyBuffers();
fallBackCopyBuffer = newBuffer;
fallBackCopyBufferLength = index;
}

@Override
public boolean isIntegralNumber() {

Expand All @@ -929,6 +968,7 @@ public int getInt() {
} else if (isCurrentNumberIntegral && currentIntegralNumber != Integer.MIN_VALUE) {
return currentIntegralNumber;
} else if (isCurrentNumberIntegral) {
combinePreviousFallbackBuffersToCurrent();
//if there a content in the value buffer read from them, if not use main buffer
final Integer retVal = fallBackCopyBufferLength > 0 ? parseIntegerFromChars(fallBackCopyBuffer, 0, fallBackCopyBufferLength)
: parseIntegerFromChars(buffer, startOfValueInBuffer, endOfValueInBuffer);
Expand All @@ -949,6 +989,7 @@ public long getLong() {
} else if (isCurrentNumberIntegral && currentIntegralNumber != Integer.MIN_VALUE) {
return currentIntegralNumber;
} else if (isCurrentNumberIntegral) {
combinePreviousFallbackBuffersToCurrent();
//if there a content in the value buffer read from them, if not use main buffer
final Long retVal = fallBackCopyBufferLength > 0 ? parseLongFromChars(fallBackCopyBuffer, 0, fallBackCopyBufferLength)
: parseLongFromChars(buffer, startOfValueInBuffer, endOfValueInBuffer);
Expand Down Expand Up @@ -984,6 +1025,8 @@ public BigDecimal getBigDecimal() {
} else if (isCurrentNumberIntegral && currentIntegralNumber != Integer.MIN_VALUE) {
return new BigDecimal(currentIntegralNumber);
}

combinePreviousFallbackBuffersToCurrent();
//if there a content in the value buffer read from them, if not use main buffer
return (/*currentBigDecimalNumber = */fallBackCopyBufferLength > 0 ? new BigDecimal(fallBackCopyBuffer, 0,
fallBackCopyBufferLength) : new BigDecimal(buffer, startOfValueInBuffer, (endOfValueInBuffer - startOfValueInBuffer)));
Expand All @@ -1004,6 +1047,7 @@ public void close() {
if (releaseFallBackCopyBufferLength) {
valueProvider.release(fallBackCopyBuffer);
}
releasePreviousFallBackCopyBuffers();

try {
in.close();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.johnzon.core;

import jakarta.json.Json;
import jakarta.json.JsonReader;
import org.junit.Ignore;
import org.junit.Test;

import java.io.StringReader;

@Ignore
public class HugeStringTest {
@Test
public void test() {
String json = "{\"data\":\"" + "a".repeat(50 * 1024 * 1024 + 1) + "\"}";

// Warmup
for (int i = 0; i < 10; i++) {
try (JsonReader reader = Json.createReader(new StringReader(json))) {
reader.readObject();
}
}

long start = System.currentTimeMillis();
try (JsonReader reader = Json.createReader(new StringReader(json))) {
reader.readObject();
}
System.err.println("Took " + (System.currentTimeMillis() - start) + "ms");
}
}

0 comments on commit c7afc7b

Please sign in to comment.