diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000000..35da4aa33c --- /dev/null +++ b/.editorconfig @@ -0,0 +1,11 @@ +[*] +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace=true + +[{*.kt,*.kts}] +ij_kotlin_imports_layout = * +ij_kotlin_packages_to_use_import_on_demand=com.amazon.ion.**,java.util.* + +[src/test/**.kt] +ktlint_ignore_back_ticked_identifier=true diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2e7088d885..e366788b52 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -3,10 +3,7 @@ permissions: read-all on: push: - branches: [ master ] pull_request: - branches: [ master ] - jobs: build-and-test: diff --git a/THIRD_PARTY_LICENSES.md b/THIRD_PARTY_LICENSES.md index 73629fbdc9..6abe761391 100644 --- a/THIRD_PARTY_LICENSES.md +++ b/THIRD_PARTY_LICENSES.md @@ -15,3 +15,19 @@ **3** **Group:** `org.jetbrains.kotlin` **Name:** `kotlin-stdlib-common` **Version:** `1.9.0` > - **POM Project URL**: [https://kotlinlang.org/](https://kotlinlang.org/) > - **POM License**: Apache License, Version 2.0 - [https://www.apache.org/licenses/LICENSE-2.0](https://www.apache.org/licenses/LICENSE-2.0) + +**4** **Group:** `org.jetbrains.kotlin` **Name:** `kotlin-stdlib-jdk7` **Version:** `1.9.0` +> - **POM Project URL**: [https://kotlinlang.org/](https://kotlinlang.org/) +> - **POM License**: Apache License, Version 2.0 - [https://www.apache.org/licenses/LICENSE-2.0](https://www.apache.org/licenses/LICENSE-2.0) + +**5** **Group:** `org.jetbrains.kotlin` **Name:** `kotlin-stdlib-jdk8` **Version:** `1.9.0` +> - **POM Project URL**: [https://kotlinlang.org/](https://kotlinlang.org/) +> - **POM License**: Apache License, Version 2.0 - [https://www.apache.org/licenses/LICENSE-2.0](https://www.apache.org/licenses/LICENSE-2.0) + +**6** **Group:** `org.jetbrains.kotlinx` **Name:** `kotlinx-collections-immutable-jvm` **Version:** `0.3.6` +> - **POM Project URL**: [https://github.com/Kotlin/kotlinx.collections.immutable](https://github.com/Kotlin/kotlinx.collections.immutable) +> - **POM License**: Apache License, Version 2.0 - [https://www.apache.org/licenses/LICENSE-2.0](https://www.apache.org/licenses/LICENSE-2.0) + +## Unknown + +**7** **Group:** `org.jetbrains.kotlinx` **Name:** `kotlinx-collections-immutable` **Version:** `0.3.6` diff --git a/build.gradle.kts b/build.gradle.kts index 9df5b7f987..83f0bb614b 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,13 +1,14 @@ +import com.diffplug.gradle.spotless.SpotlessTask import com.github.jk1.license.filter.LicenseBundleNormalizer import com.github.jk1.license.render.InventoryMarkdownReportRenderer import com.github.jk1.license.render.TextReportRenderer +import java.time.Instant +import java.util.Properties import org.gradle.kotlin.dsl.support.unzipTo import org.gradle.kotlin.dsl.support.zipTo import org.jetbrains.kotlin.gradle.dsl.KotlinCompile import org.jetbrains.kotlin.gradle.dsl.KotlinJvmOptions import proguard.gradle.ProGuardTask -import java.time.Instant -import java.util.Properties buildscript { repositories { @@ -72,6 +73,7 @@ java { dependencies { implementation("org.jetbrains.kotlin:kotlin-stdlib:1.9.0") + implementation("org.jetbrains.kotlinx:kotlinx-collections-immutable:0.3.6") testImplementation("org.junit.jupiter:junit-jupiter:5.7.1") testCompileOnly("junit:junit:4.13") @@ -79,6 +81,11 @@ dependencies { testImplementation("org.hamcrest:hamcrest:2.2") testImplementation("pl.pragmatists:JUnitParams:1.1.1") testImplementation("com.google.code.tempus-fugit:tempus-fugit:1.1") + + // Used for the conformance test runner, because IonValue is not suitable for it. + testImplementation("com.amazon.ion:ion-element:1.2.0") + // Force the tests to use the locally built version rather than a version transitively provided by `ion-element`. + testImplementation(project) } group = "com.amazon.ion" @@ -90,9 +97,9 @@ description = "A Java implementation of the Amazon Ion data notation." val isCI: Boolean = System.getenv("CI") == "true" val githubRepositoryUrl = "https://github.com/amazon-ion/ion-java/" val isReleaseVersion: Boolean = !version.toString().endsWith("SNAPSHOT") -// The name we're checking for corresponds to the name that is set in the `publish-release-artifacts.yml` file. -val isReleaseWorkflow: Boolean = System.getenv("GITHUB_WORKFLOW") == "Publish Release Artifacts" -val generatedResourcesDir = "${layout.buildDirectory}/generated/main/resources" +// Workflows triggered by a new release always have a tag ref. +val isReleaseWorkflow: Boolean = (System.getenv("GITHUB_REF") ?: "").startsWith("refs/tags/") +val generatedResourcesDir = layout.buildDirectory.dir("generated/main/resources") sourceSets { main { @@ -105,7 +112,7 @@ licenseReport { // though ion-java does not depend on ion-java-cli. By default, the license report generator includes // the current project (ion-java) and all its subprojects. projects = arrayOf(project) - outputDir = "${layout.buildDirectory}/reports/licenses" + outputDir = layout.buildDirectory.dir("reports/licenses").get().asFile.path renderers = arrayOf(InventoryMarkdownReportRenderer(), TextReportRenderer()) // Dependencies use inconsistent titles for Apache-2.0, so we need to specify mappings filters = arrayOf( @@ -118,41 +125,52 @@ licenseReport { ) } +// Spotless eagerly checks for the `rachetFrom` git ref even if there are no spotless tasks in the task +// graph, so we're going to use a git tag to create our own lazy evaluation and setting of `rachetFrom`. +// See https://github.com/diffplug/spotless/issues/1902 +val SPOTLESS_TAG = "spotless-check-${Instant.now().epochSecond}-DELETE-ME" + /** - * This is the `git remote` name that corresponds to amazon-ion/ion-java. - * It is used for applying the "spotless" checks only to things that are changed - * compared to the master branch of the source repo. + * This is the commit where the current branch most recently forked from master. We use this as + * our "rachetFrom" base so that changes in master don't cause unexpected formatting failures in + * feature branches. */ -val sourceRepoRemoteName: String by lazy { +val sourceRepoRachetFromCommit: String by lazy { val git = System.getenv("GIT_CLI") ?: "git" fun String.isSourceRepo(): Boolean { - val url = "$git remote get-url ${this@isSourceRepo}".runCommand() + val url = "$git remote get-url ${this@isSourceRepo}".trim().runCommand() return "amazon-ion/ion-java" in url || "amzn/ion-java" in url } - var name = "$git remote".runCommand().lines().firstOrNull { it.isSourceRepo() } + var remoteName = "$git remote".runCommand().trim().lines().firstOrNull { it.isSourceRepo() } if (isCI) { // When running on a CI environment e.g. GitHub Actions, we might need to automatically add the remote - if (name == null) { - name = "ci_source_repository" - "$git remote add $name $githubRepositoryUrl".runCommand() + if (remoteName == null) { + remoteName = "ci_source_repository" + "$git remote add $remoteName $githubRepositoryUrl".runCommand(log = logger::quiet) + logger.quiet("Added remote repository ") } // ...and make sure that we have indeed fetched that remote - "$git fetch $name".runCommand() + "$git fetch --unshallow --no-tags --no-recurse-submodules $remoteName master".runCommand() } - name ?: throw Exception( + remoteName ?: throw Exception( """ |No git remote found for amazon-ion/ion-java. Try again after running: | | git remote add -f $githubRepositoryUrl - """.trimMargin() + """.trimMargin() ) + + // TODO: We might need to use the PR base ref when this is running as part of a CI check for a PR. + logger.quiet("Finding spotless ratchetFrom base...") + "$git merge-base $remoteName/master HEAD".runCommand(log = logger::quiet).trim() } -fun String.runCommand(workingDir: File = rootProject.projectDir): String { +fun String.runCommand(workingDir: File = rootProject.projectDir, log: (String) -> Unit = logger::info): String { + log("$ $this") val parts = this.split("\\s".toRegex()) val proc = ProcessBuilder(*parts.toTypedArray()) .directory(workingDir) @@ -160,7 +178,14 @@ fun String.runCommand(workingDir: File = rootProject.projectDir): String { .redirectError(ProcessBuilder.Redirect.PIPE) .start() proc.waitFor(30, TimeUnit.SECONDS) - return proc.inputStream.bufferedReader().readText() + val stdOut = proc.inputStream.bufferedReader().readText() + val stdErr = proc.errorStream.bufferedReader().readText() + if (stdOut.isNotBlank()) log(stdOut) + if (stdErr.isNotBlank()) logger.warn(stdErr) + if (proc.exitValue() != 0) { + throw Exception("Failed to run command: $this") + } + return stdOut } spotless { @@ -171,7 +196,10 @@ spotless { // release branch. if (isReleaseWorkflow) return@spotless - ratchetFrom("$sourceRepoRemoteName/master") + "git tag -f $SPOTLESS_TAG".runCommand() + ratchetFrom(SPOTLESS_TAG) + // Make sure this always gets cleaned up. We can't do it inline here, so we'll do it once the task graph is created. + gradle.taskGraph.addTaskExecutionGraphListener { "git tag -d $SPOTLESS_TAG".runCommand() } val shortFormLicenseHeader = """ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. @@ -179,6 +207,8 @@ spotless { """.trimIndent() java { + // Note that the order of these is important. Each of these is an individual formatter + // that is applied sequentially. licenseHeader(shortFormLicenseHeader) removeUnusedImports() } @@ -368,7 +398,7 @@ tasks { } ktlint { - version.set("0.40.0") + version.set("0.45.2") outputToConsole.set(true) } @@ -384,7 +414,9 @@ tasks { } ) - val spotbugsBaselineFile = "$rootDir/config/spotbugs/baseline.xml" + val spotbugsConfigDir = "$rootDir/config/spotbugs" + excludeFilter.set(file("$spotbugsConfigDir/exclude.xml")) + val spotbugsBaselineFile = "$spotbugsConfigDir/baseline.xml" val baselining = project.hasProperty("baseline") // e.g. `./gradlew :spotbugsMain -Pbaseline` @@ -416,7 +448,7 @@ tasks { commandLine( "xsltproc", "--output", spotbugsBaselineFile, - "$rootDir/config/spotbugs/baseline.xslt", + "$spotbugsConfigDir/baseline.xslt", "${outputLocation.get()}" ) } @@ -441,7 +473,7 @@ tasks { * for why this is done with a properties file rather than the Jar manifest. */ val generateJarInfo by creating { - val propertiesFile = File("$generatedResourcesDir/${project.name}.properties") + val propertiesFile = generatedResourcesDir.get().file("${project.name}.properties").asFile doLast { propertiesFile.parentFile.mkdirs() val properties = Properties() @@ -528,6 +560,11 @@ tasks { dependsOn(jar) includeConfigs.set(listOf("runtimeClasspath")) } + + withType { + doFirst { "git tag -f $SPOTLESS_TAG $sourceRepoRachetFromCommit".runCommand() } + doLast { "git tag -d $SPOTLESS_TAG".runCommand() } + } } publishing { diff --git a/config/spotbugs/exclude.xml b/config/spotbugs/exclude.xml new file mode 100644 index 0000000000..0b19f53745 --- /dev/null +++ b/config/spotbugs/exclude.xml @@ -0,0 +1,28 @@ + + + + + + + + + + + + diff --git a/ion-java-cli/build.gradle.kts b/ion-java-cli/build.gradle.kts index 8ddd311a4e..a1c515aeda 100644 --- a/ion-java-cli/build.gradle.kts +++ b/ion-java-cli/build.gradle.kts @@ -1,6 +1,8 @@ plugins { java application + // Apply GraalVM Native Image plugin + id("org.graalvm.buildtools.native") version "0.10.3" } description = "A CLI that implements the standard interface defined by ion-test-driver." @@ -15,8 +17,33 @@ repositories { dependencies { implementation("args4j:args4j:2.33") implementation(rootProject) + + implementation("info.picocli:picocli:4.7.6") + annotationProcessor("info.picocli:picocli-codegen:4.7.6") +} + +tasks.withType { + options.compilerArgs.add("-Aproject=${project.group}/${project.name}") } application { mainClass.set("com.amazon.tools.cli.IonJavaCli") } + +// Defines an ion-java-cli:nativeCompile task which produces ion-java-cli/build/native/nativeCompile/jion +// You need to have GRAALVM_HOME pointed at a GraalVM installation +// You can get one of those via e.g. `sdk install java 17.0.9-graalce` +// See: https://sdkman.io/ +graalvmNative { + testSupport.set(false) + binaries { + named("main") { + imageName.set("jion") + mainClass.set("com.amazon.tools.cli.SimpleIonCli") + buildArgs.add("-O4") + } + } + binaries.all { + buildArgs.add("--verbose") + } +} diff --git a/ion-java-cli/src/main/java/com/amazon/tools/cli/OutputFormat.java b/ion-java-cli/src/main/java/com/amazon/tools/cli/OutputFormat.java index 15a658fcd3..3f1609becf 100644 --- a/ion-java-cli/src/main/java/com/amazon/tools/cli/OutputFormat.java +++ b/ion-java-cli/src/main/java/com/amazon/tools/cli/OutputFormat.java @@ -11,78 +11,39 @@ * Represents the different Ion output formats supported by the command line com.amazon.tools in this package. */ public enum OutputFormat { - /** - * Nicely spaced, 'prettified' text Ion. - */ - PRETTY { - @Override - public IonWriter createIonWriter(OutputStream outputStream) { - return IonTextWriterBuilder.pretty().build(outputStream); - } - - @Override - public IonWriter createIonWriterWithImports(OutputStream outputStream, SymbolTable[] imports) { - return IonTextWriterBuilder.pretty().withImports(imports).build(outputStream); - } - }, - /** - * Minimally spaced text Ion. - */ - TEXT { - @Override - public IonWriter createIonWriter(OutputStream outputStream) { - return IonTextWriterBuilder.standard().build(outputStream); - } - - @Override - public IonWriter createIonWriterWithImports(OutputStream outputStream, SymbolTable[] imports) { - return IonTextWriterBuilder.standard().withImports(imports).build(outputStream); - } - }, - /** - * Compact, read-optimized binary Ion. - */ - BINARY { - @Override - public IonWriter createIonWriter(OutputStream outputStream) { - return IonBinaryWriterBuilder.standard().build(outputStream); - } - - @Override - public IonWriter createIonWriterWithImports(OutputStream outputStream, SymbolTable[] imports) { - return IonBinaryWriterBuilder.standard().withImports(imports).build(outputStream); - } - }, - /** - * Event Stream - */ - EVENTS { - @Override - public IonWriter createIonWriter(OutputStream outputStream) { - return IonTextWriterBuilder.pretty().build(outputStream); - } - - @Override - public IonWriter createIonWriterWithImports(OutputStream outputStream, SymbolTable[] imports) { - return IonTextWriterBuilder.pretty().withImports(imports).build(outputStream); - } - }, - /** - * None - */ - NONE { - @Override - public IonWriter createIonWriter(OutputStream outputStream) { - NoOpOutputStream out = new NoOpOutputStream(); - return IonTextWriterBuilder.pretty().build(out); - } - - @Override - public IonWriter createIonWriterWithImports(OutputStream outputStream, SymbolTable[] imports) { - return IonTextWriterBuilder.pretty().withImports(imports).build(outputStream); - } - }; - - abstract IonWriter createIonWriter(OutputStream outputStream); - abstract IonWriter createIonWriterWithImports(OutputStream outputStream, SymbolTable[] symbolTable); + /** Nicely spaced, 'prettified' text Ion */ PRETTY, + /** Minimally spaced text Ion */ TEXT, + /** Compact, read-optimized binary Ion */ BINARY, + /** Event Stream */ EVENTS, + /** No output, /dev/null */ NONE; + + IonWriter createIonWriter(OutputStream outputStream) { + return createIonWriter(this, outputStream); + } + + IonWriter createIonWriterWithImports(OutputStream outputStream, SymbolTable[] symbolTable) { + return createIonWriter(this, outputStream, symbolTable); + } + + private static IonWriter createIonWriter(OutputFormat format, OutputStream outputStream) { + switch (format) { + case TEXT: return IonTextWriterBuilder.standard().build(outputStream); + case PRETTY: return IonTextWriterBuilder.pretty().build(outputStream); + case EVENTS: return IonTextWriterBuilder.pretty().build(outputStream); + case BINARY: return IonBinaryWriterBuilder.standard().build(outputStream); + case NONE: return IonTextWriterBuilder.standard().build(new NoOpOutputStream()); + default: throw new IllegalStateException("Unsupported output format: " + format); + } + } + + private static IonWriter createIonWriter(OutputFormat format, OutputStream out, SymbolTable... symbols) { + switch (format) { + case TEXT: return IonTextWriterBuilder.standard().withImports(symbols).build(out); + case PRETTY: return IonTextWriterBuilder.pretty().withImports(symbols).build(out); + case EVENTS: return IonTextWriterBuilder.standard().withImports(symbols).build(out); + case BINARY: return IonBinaryWriterBuilder.standard().withImports(symbols).build(out); + case NONE: return IonTextWriterBuilder.standard().withImports(symbols).build(new NoOpOutputStream()); + default: throw new IllegalStateException("Unsupported output format: " + format); + } + } } diff --git a/ion-java-cli/src/main/java/com/amazon/tools/cli/SimpleIonCli.java b/ion-java-cli/src/main/java/com/amazon/tools/cli/SimpleIonCli.java new file mode 100644 index 0000000000..34c4068cff --- /dev/null +++ b/ion-java-cli/src/main/java/com/amazon/tools/cli/SimpleIonCli.java @@ -0,0 +1,159 @@ +package com.amazon.tools.cli; + + +import com.amazon.ion.IonEncodingVersion; +import com.amazon.ion.IonReader; +import com.amazon.ion.IonWriter; +import com.amazon.ion.system.IonReaderBuilder; +import picocli.CommandLine; +import picocli.CommandLine.Command; +import picocli.CommandLine.HelpCommand; +import picocli.CommandLine.Option; +import picocli.CommandLine.Parameters; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileDescriptor; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.SequenceInputStream; +import java.util.Arrays; + +@Command( + name = SimpleIonCli.NAME, + version = SimpleIonCli.VERSION, + subcommands = {HelpCommand.class}, + mixinStandardHelpOptions = true +) +class SimpleIonCli { + + public static final String NAME = "jion"; + public static final String VERSION = "2024-10-31"; + //TODO: Replace with InputStream.nullInputStream in JDK 11+ + public static final InputStream EMPTY = new ByteArrayInputStream(new byte[0]); + + public static void main(String[] args) { + CommandLine commandLine = new CommandLine(new SimpleIonCli()) + .setCaseInsensitiveEnumValuesAllowed(true) + .setUsageHelpAutoWidth(true); + System.exit(commandLine.execute(args)); + } + + @Option(names={"-v", "--ion-version"}, description = "Output Ion version", defaultValue = "1.0", + converter = IonEncodingVersionConverter.class, scope = CommandLine.ScopeType.INHERIT) + IonEncodingVersion ionVersion; + + @Option(names={"-f", "--format", "--output-format"}, defaultValue = "pretty", + description = "Output format, from the set (text | pretty | binary | debug | none).", + paramLabel = "", + scope = CommandLine.ScopeType.INHERIT) + OutputFormat outputFormat; + + @Option(names={"-o", "--output"}, paramLabel = "FILE", description = "Output file", + scope = CommandLine.ScopeType.INHERIT) + File outputFile; + + @Command(name = "cat", aliases = {"process"}, + description = "concatenate FILE(s) in the requested Ion output format", + mixinStandardHelpOptions = true) + int cat( @Parameters(paramLabel = "FILE") File... files) { + + + //TODO: Handle stream cutoff- java.io.IOException: Broken pipe + //TODO: This is not resilient to problems with a single file. Should it be? + try (InputStream in = getInputStream(files); + IonReader reader = IonReaderBuilder.standard().build(in); + OutputStream out = getOutputStream(outputFile); + IonWriter writer = getWriter(ionVersion, outputFormat, out)) { + // getInputStream will look for stdin if we don't supply + writer.writeValues(reader); + } catch (IOException e) { + System.err.println(e.getMessage()); + return CommandLine.ExitCode.SOFTWARE; + } + + // process files + return CommandLine.ExitCode.OK; + } + + private static InputStream getInputStream(File... files) { + if (files == null || files.length == 0) return new FileInputStream(FileDescriptor.in); + + // As convenient as this formulation is I'm not sure of the ordering guarantees here + // Revisit if that is ever problematic + return Arrays.stream(files) + .map(SimpleIonCli::getInputStream) + .reduce(EMPTY, SequenceInputStream::new); + } + + private static InputStream getInputStream(File inputFile) { + try { + return new FileInputStream(inputFile); + } catch (FileNotFoundException e) { + throw cloak(e); + } + } + + // Removing some boilerplate from checked-exception consuming paths, without RuntimeException wrapping + // JLS Section 18.4 covers type inference for generic methods, + // including the rule that `throws T` is inferred as RuntimeException if possible. + // See e.g. https://www.rainerhahnekamp.com/en/ignoring-exceptions-in-java/ + private static T cloak(Throwable t) throws T { + @SuppressWarnings("unchecked") + T result = (T) t; + return result; + } + + private static FileOutputStream getOutputStream(File outputFile) throws IOException { + // non-line-buffered stdout, or the requested file output + return outputFile == null ? new FileOutputStream(FileDescriptor.out) : new FileOutputStream(outputFile); + } + + private static IonWriter getWriter(IonEncodingVersion version, OutputFormat format, OutputStream out) { + if (version == IonEncodingVersion.ION_1_0) return getWriter_1_0(format, out); + if (version == IonEncodingVersion.ION_1_1) return getWriter_1_1(format, out); + throw new IllegalArgumentException("Unrecognized IonEncodingVersion: " + version); + } + + private static IonWriter getWriter_1_0(OutputFormat format, OutputStream out) { + switch (format) { + case Pretty: return IonEncodingVersion.ION_1_0.textWriterBuilder().withPrettyPrinting().build(out); + case Text: return IonEncodingVersion.ION_1_0.textWriterBuilder().build(out); + case Binary: return IonEncodingVersion.ION_1_0.binaryWriterBuilder().build(out); + case Debug: throw new UnsupportedOperationException("Not yet supported, pending ion-java #1005"); + case None: return IonEncodingVersion.ION_1_0.textWriterBuilder().build(new NoOpOutputStream()); + default: throw new IllegalArgumentException("Unrecognized or unsupported output format: " + format); + } + } + + private static IonWriter getWriter_1_1(OutputFormat format, OutputStream out) { + switch (format) { + case Pretty: return IonEncodingVersion.ION_1_1.textWriterBuilder().withPrettyPrinting().build(out); + case Text: return IonEncodingVersion.ION_1_1.textWriterBuilder().build(out); + case Binary: return IonEncodingVersion.ION_1_1.binaryWriterBuilder().build(out); + case Debug: throw new UnsupportedOperationException("Not yet supported, pending ion-java #1005"); + case None: return IonEncodingVersion.ION_1_1.textWriterBuilder().build(new NoOpOutputStream()); + default: throw new IllegalArgumentException("Unrecognized or unsupported output format: " + format); + } + } + + private enum OutputFormat { + Pretty, Text, Binary, Debug, None + } + + private static class IonEncodingVersionConverter implements CommandLine.ITypeConverter> { + + @Override + public IonEncodingVersion convert(String ionVersion) { + switch (ionVersion) { + case "1.0": return IonEncodingVersion.ION_1_0; + case "1.1": return IonEncodingVersion.ION_1_1; + default: throw new IllegalArgumentException("Unrecognized or unsupported Ion version: " + ionVersion); + } + } + } +} diff --git a/ion-tests b/ion-tests index ef0451a72a..c2aca01615 160000 --- a/ion-tests +++ b/ion-tests @@ -1 +1 @@ -Subproject commit ef0451a72a39f572175ad8e90b1a77110e9aec4c +Subproject commit c2aca0161515ba8b153c0d949c882705306cf67e diff --git a/src/main/java/com/amazon/ion/IonEncodingVersion.java b/src/main/java/com/amazon/ion/IonEncodingVersion.java new file mode 100644 index 0000000000..da7801824a --- /dev/null +++ b/src/main/java/com/amazon/ion/IonEncodingVersion.java @@ -0,0 +1,78 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion; + +import com.amazon.ion.impl._Private_IonTextWriterBuilder_1_1; +import com.amazon.ion.system.IonBinaryWriterBuilder; +import com.amazon.ion.system.IonBinaryWriterBuilder_1_1; +import com.amazon.ion.system.IonTextWriterBuilder; +import com.amazon.ion.system.IonTextWriterBuilder_1_1; +import com.amazon.ion.system._Private_IonBinaryWriterBuilder_1_1; + +/** + * Represents an Ion encoding version supported by this library. + *

+ * Instances may be used to retrieve writer builders for the relevant Ion version. For example, to construct an + * Ion 1.1 binary writer builder, use {@code ION_1_1.binaryWriterBuilder();} + *

+ * + * @param the type of binary writer builder compatible with this version. + */ +public abstract class IonEncodingVersion { + + /** + * Ion 1.0, see the binary and + * text specification. + */ + public static final IonEncodingVersion ION_1_0 = new IonEncodingVersion(0) { + + @Override + public IonBinaryWriterBuilder binaryWriterBuilder() { + return IonBinaryWriterBuilder.standard(); + } + + @Override + public IonTextWriterBuilder textWriterBuilder() { + return IonTextWriterBuilder.standard(); + } + }; + + /** + * Ion 1.1, TODO link to the finalized specification. + */ + public static final IonEncodingVersion ION_1_1 = new IonEncodingVersion(1) { + + @Override + public IonBinaryWriterBuilder_1_1 binaryWriterBuilder() { + return _Private_IonBinaryWriterBuilder_1_1.standard(); + } + + @Override + public IonTextWriterBuilder_1_1 textWriterBuilder() { + return _Private_IonTextWriterBuilder_1_1.standard(); + } + }; + + private final int minorVersion; + + private IonEncodingVersion(int minorVersion) { + this.minorVersion = minorVersion; + } + + /** + * Provides a new mutable binary writer builder for IonWriter instances that write this version of the Ion encoding. + * @return a new mutable writer builder. + */ + public abstract BinaryWriterBuilder binaryWriterBuilder(); + + /** + * Provides a new mutable text writer builder for IonWriter instances that write this version of the Ion encoding. + * @return a new mutable writer builder. + */ + public abstract TextWriterBuilder textWriterBuilder(); + + @Override + public String toString() { + return String.format("Ion 1.%d", minorVersion); + } +} diff --git a/src/main/java/com/amazon/ion/IonWriter.java b/src/main/java/com/amazon/ion/IonWriter.java index 4efc70e0c4..93f3fd3ca7 100644 --- a/src/main/java/com/amazon/ion/IonWriter.java +++ b/src/main/java/com/amazon/ion/IonWriter.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion; import com.amazon.ion.facet.Faceted; @@ -518,4 +505,8 @@ public void writeClob(byte[] value, int start, int len) */ public void writeBlob(byte[] value, int start, int len) throws IOException; + + public default void writeObject(WriteAsIon obj) { + obj.writeTo(this); + } } diff --git a/src/main/java/com/amazon/ion/MacroAwareIonReader.kt b/src/main/java/com/amazon/ion/MacroAwareIonReader.kt new file mode 100644 index 0000000000..5d06a333c8 --- /dev/null +++ b/src/main/java/com/amazon/ion/MacroAwareIonReader.kt @@ -0,0 +1,60 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion + +import java.io.Closeable +import java.io.IOException + +/** + * An enhancement to an Ion reader that supports macro-aware transcoding. + */ +interface MacroAwareIonReader : Closeable { + + /** + * Performs a macro-aware transcode of all values in the stream. This is + * shorthand for calling [prepareTranscodeTo], then calling [transcodeNext] + * repetitively until it returns `false`. + * @param writer the writer to which the reader's stream will be transcoded. + */ + @Throws(IOException::class) + fun transcodeAllTo(writer: MacroAwareIonWriter) + + /** + * Prepares the reader to perform a macro-aware transcode to the given + * writer. This must be called before calling [transcodeNext], but is not + * necessary if calling [transcodeAllTo]. + * @param writer the writer to which the reader's stream will be transcoded. + */ + fun prepareTranscodeTo(writer: MacroAwareIonWriter) + + /** + * Performs a macro-aware transcode of the next value read by this reader + * to the writer previously provided to a call to [prepareTranscodeTo]. + * For Ion 1.0 streams, this functions similarly to providing a system-level + * [IonReader] to [IonWriter.writeValue]. For Ion 1.1 streams, the transcoded + * stream will include the same symbol tables, encoding directives, and + * e-expression invocations as the source stream. In both cases, the + * transcoded stream will be data-model equivalent to the source stream. + * + * The following limitations should be noted: + * 1. Encoding directives with no effect on the encoding context may be + * elided from the transcoded stream. An example would be an encoding + * directive that re-exports the existing context but adds no new + * macros or new symbols. + * 2. When transcoding from text to text, comments will not be preserved. + * 3. Open content in encoding directives (e.g. macro invocations that + * expand to nothing) will not be preserved. + * 4. Granular details of the binary encoding, like inlining vs. interning + * for a particular symbol or length-prefixing vs. delimiting for a + * particular container, may not be preserved. It is up to the user + * to provide a writer configured to match these details if important. + * + * To get a [MacroAwareIonReader] use `_Private_IonReaderBuilder.buildMacroAware`. + * To get a [MacroAwareIonWriter] use [IonEncodingVersion.textWriterBuilder] or + * [IonEncodingVersion.binaryWriterBuilder]. + * @return true if a value was transcoded; false if the end of the stream was reached. + * @throws IOException if thrown during writing. + */ + @Throws(IOException::class) + fun transcodeNext(): Boolean +} diff --git a/src/main/java/com/amazon/ion/MacroAwareIonWriter.kt b/src/main/java/com/amazon/ion/MacroAwareIonWriter.kt new file mode 100644 index 0000000000..633931d01b --- /dev/null +++ b/src/main/java/com/amazon/ion/MacroAwareIonWriter.kt @@ -0,0 +1,69 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion + +import com.amazon.ion.impl.macro.* + +/** + * Extension of the IonWriter interface that supports writing macros. + * + * TODO: Consider exposing this as a Facet. + * + * TODO: See if we can have some sort of safe reference to a macro. + */ +interface MacroAwareIonWriter : IonWriter { + + /** + * Starts a new encoding segment with an Ion version marker, flushing + * the previous segment (if any) and resetting the encoding context. + */ + fun startEncodingSegmentWithIonVersionMarker() + + /** + * Starts a new encoding segment with an encoding directive, flushing + * the previous segment (if any). + * @param macros the macros added in the new segment. + * @param isMacroTableAppend true if the macros from the previous segment + * are to remain available. + * @param symbols the symbols added in the new segment. + * @param isSymbolTableAppend true if the macros from the previous + * segment are to remain available. + * @param encodingDirectiveAlreadyWritten true if the encoding directive + * that begins the new segment has already been written to this writer. + * If false, the writer will write an encoding directive consistent + * with the arguments provided to this method, using verbose + * s-expression syntax. + */ + fun startEncodingSegmentWithEncodingDirective( + macros: Map, + isMacroTableAppend: Boolean, + symbols: List, + isSymbolTableAppend: Boolean, + encodingDirectiveAlreadyWritten: Boolean + ) + + /** + * Starts writing a macro invocation, adding it to the macro table, if needed. + */ + fun startMacro(macro: Macro) + + /** + * Starts writing a macro invocation, adding it to the macro table, if needed. + */ + fun startMacro(name: String, macro: Macro) + + /** + * Ends and steps out of the current macro invocation. + */ + fun endMacro() + + /** + * Starts writing an expression group. May only be called while the writer is in a macro invocation. + */ + fun startExpressionGroup() + + /** + * Ends and steps out of the current expression group. + */ + fun endExpressionGroup() +} diff --git a/src/main/java/com/amazon/ion/SystemSymbols.java b/src/main/java/com/amazon/ion/SystemSymbols.java index 57d5745947..73f8344f20 100644 --- a/src/main/java/com/amazon/ion/SystemSymbols.java +++ b/src/main/java/com/amazon/ion/SystemSymbols.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion; /** @@ -130,4 +117,11 @@ private SystemSymbols() { } * The maximum ID of the IDs of system symbols defined by Ion 1.0. */ public static final int ION_1_0_MAX_ID = 9; + + // Ion 1.1 Symbols + + /** + * The name of the default module in Ion 1.1 + */ + public static final String DEFAULT_MODULE = "_"; } diff --git a/src/main/java/com/amazon/ion/Timestamp.java b/src/main/java/com/amazon/ion/Timestamp.java index 13057cf689..92c0c1dfda 100644 --- a/src/main/java/com/amazon/ion/Timestamp.java +++ b/src/main/java/com/amazon/ion/Timestamp.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion; import static com.amazon.ion.impl._Private_Utils.safeEquals; @@ -689,6 +676,19 @@ else if (shouldCheckFraction) offset, APPLY_OFFSET_NO, CHECK_FRACTION_YES); } + /** + * @return a new Timestamp from the given components in local time, without validating the fractional seconds. + */ + @Deprecated + public static Timestamp _private_createFromLocalTimeFieldsUnchecked(Precision p, int year, int month, int day, + int hour, int minute, int second, + BigDecimal frac, Integer offset) + { + return new Timestamp(p, year, month, day, + hour, minute, second, frac, + offset, APPLY_OFFSET_YES, CHECK_FRACTION_NO); + } + /** * Creates a new Timestamp from a {@link Calendar}, preserving the * {@link Calendar}'s precision and local offset from UTC. diff --git a/src/main/java/com/amazon/ion/WriteAsIon.kt b/src/main/java/com/amazon/ion/WriteAsIon.kt new file mode 100644 index 0000000000..c195762132 --- /dev/null +++ b/src/main/java/com/amazon/ion/WriteAsIon.kt @@ -0,0 +1,19 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion + +/** + * Indicates that the implementing class has a standardized/built-in way to serialize as Ion. + */ +interface WriteAsIon { + + /** + * Writes this object to an IonWriter capable of producing macro invocations. + */ + fun writeToMacroAware(writer: MacroAwareIonWriter) = writeTo(writer as IonWriter) + + /** + * Writes this object to a standard [IonWriter]. + */ + fun writeTo(writer: IonWriter) +} diff --git a/src/main/java/com/amazon/ion/_private/SuppressFBWarnings.kt b/src/main/java/com/amazon/ion/_private/SuppressFBWarnings.kt new file mode 100644 index 0000000000..8cb0e2240f --- /dev/null +++ b/src/main/java/com/amazon/ion/_private/SuppressFBWarnings.kt @@ -0,0 +1,18 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion._private + +/** + * SpotBugs looks for an annotation called `SuppressFBWarnings` regardless of the package it is in. + * This will allow us to disable some SpotBugs rules when we want e.g. to allow switch case fallthrough. + * + * We are implementing our own annotation so that we don't have to declare any dependency on another package just for this. + */ +annotation class SuppressFBWarnings( + /** + * The set of FindBugs/SpotBugs warnings that are to be suppressed in the annotated element. + * The value can be a bug category, kind, or pattern. + * For examples of some bug types, see https://spotbugs.readthedocs.io/en/stable/bugDescriptions.html + */ + vararg val value: String = [] +) diff --git a/src/main/java/com/amazon/ion/apps/macroize/InvocationSubstitute.java b/src/main/java/com/amazon/ion/apps/macroize/InvocationSubstitute.java new file mode 100644 index 0000000000..84a2b6143b --- /dev/null +++ b/src/main/java/com/amazon/ion/apps/macroize/InvocationSubstitute.java @@ -0,0 +1,178 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.apps.macroize; + +import com.amazon.ion.IonContainer; +import com.amazon.ion.IonSequence; +import com.amazon.ion.IonSexp; +import com.amazon.ion.IonStruct; +import com.amazon.ion.IonSystem; +import com.amazon.ion.IonValue; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +/** + * Describes a macro invocation that should be substituted into a datagram in place of the literal value that is + * currently there. + * TODO this is needed because we currently don't have a way of describing a macro invocation in the DOM. If that + * changes, this may go away. + */ +class InvocationSubstitute { + + static final String INVOCATION_ANNOTATION = "$ion_invocation"; + static final String EMPTY_GROUP_ANNOTATION = "$ion_empty"; + private IonContainer parent; + private int indexToReplace; + private final String fieldNameToReplace; + private final String shapeName; + private final List parameters; + private final SuggestedSignature signature; + private final IonSystem system; + + /** + * @param system the IonSystem that owns the parent container. + * @param parent the parent container that holds the value to be replaced with an invocation. + * @param indexToReplace the index in the parent of the value to be replaced. + * @param fieldNameToReplace the field name of the value to be replaced, if in a struct. + * @param shapeName the name of the macro to invoke. + * @param signature the signature of the macro to invoke. + */ + InvocationSubstitute( + IonSystem system, + IonContainer parent, + int indexToReplace, + String fieldNameToReplace, + String shapeName, + SuggestedSignature signature + ) { + this.system = system; + this.parent = parent; + this.indexToReplace = indexToReplace; + this.fieldNameToReplace = fieldNameToReplace; + this.shapeName = shapeName; + this.parameters = extractArguments(parent, indexToReplace, fieldNameToReplace, signature); + this.signature = signature; + } + + /** + * Retrieves the IonValue to be replaced with an invocation. + * @param parent the parent container of the value to replace. + * @param indexToReplace the index in the parent of the value to be replaced. + * @param fieldNameToReplace the field name of the value to be replaced, if in a struct. + * @return + */ + private static IonValue select(IonContainer parent, int indexToReplace, String fieldNameToReplace) { + IonValue target = null; + if (fieldNameToReplace == null || !(parent instanceof IonStruct)) { + Iterator children = parent.iterator(); + int index = 0; + while (index <= indexToReplace) { + index++; + if (!children.hasNext()) { + return null; + } + target = children.next(); + } + } else { + target = ((IonStruct) parent).get(fieldNameToReplace); + } + return target; + } + + /** + * @return an IonSexp that is used to represent an empty expression group. + */ + private IonSexp emptyExpressionGroup() { + IonSexp empty = system.newEmptySexp(); + empty.addTypeAnnotation(EMPTY_GROUP_ANNOTATION); + return empty; + } + + /** + * Extracts the values from the source data that must be passed into the invocation that will replace the current + * value. + * @param parent the parent container of the value to replace. + * @param indexToReplace the index in the parent of the value to be replaced. + * @param fieldNameToReplace the field name of the value to be replaced, if in a struct. + * @param signature the signature of the invocation. + * @return the list of arguments. + */ + private List extractArguments( + IonContainer parent, + int indexToReplace, + String fieldNameToReplace, + SuggestedSignature signature + ) { + IonStruct targetStruct = (IonStruct) select(parent, indexToReplace, fieldNameToReplace); + if (targetStruct == null) { + throw new IllegalArgumentException("Failed to extract parameters for " + fieldNameToReplace); + } + List parameters = new ArrayList<>(); + for (String argument : signature.allParameters()) { + IonValue parameter = targetStruct.get(argument); + if (parameter == null) { + // This is a missing optional + parameters.add(emptyExpressionGroup()); + } else { + parameters.add(parameter); + } + } + // Remove all the optionals that occur contiguously at the end of the invocation. + int tailOptionalCount = 0; + for (int i = parameters.size() - 1; i >= 0; i--) { + String[] annotations = parameters.get(i).getTypeAnnotations(); + if (annotations.length == 1 && annotations[0].equals(EMPTY_GROUP_ANNOTATION)) { + tailOptionalCount++; + } else { + break; + } + } + if (tailOptionalCount > 0) { + parameters = parameters.subList(0, parameters.size() - tailOptionalCount); + } + return parameters; + } + + /** + * Substitutes the target value with an invocation. + * @param nextDepthSubstitutes the substitutes at the next-greater depth. If the target values of those substitutes + * were children of the value substituted in this method, then their parent and index + * to replace must be updated to point at the new invocation. + */ + public void substitute(List nextDepthSubstitutes) { + IonValue target = select(parent, indexToReplace, fieldNameToReplace); + String fieldName = target == null ? null : target.getFieldName(); + IonSexp invocation = system.newEmptySexp(); + invocation.addTypeAnnotation(INVOCATION_ANNOTATION); + invocation.add(system.newSymbol(shapeName)); + for (IonValue value : parameters) { + value.removeFromContainer(); + invocation.add(value); + } + IonValue replaced; + if (fieldName == null) { + IonSequence parentSequence = ((IonSequence) parent); + if (indexToReplace >= parentSequence.size()) { + parentSequence.add(invocation); + replaced = null; + } else { + replaced = parentSequence.set(indexToReplace, invocation); + } + } else { + replaced = ((IonStruct) parent).get(fieldName); + ((IonStruct) parent).put(fieldName, invocation); + } + + if (nextDepthSubstitutes != null) { + for (InvocationSubstitute nextDepthSubstitute : nextDepthSubstitutes) { + if (nextDepthSubstitute.parent == replaced) { + nextDepthSubstitute.parent = invocation; + // The first index of an invocation starts at 1, since the macro name comes first. + nextDepthSubstitute.indexToReplace = signature.indexOf(nextDepthSubstitute.shapeName) + 1; + } + } + } + } +} diff --git a/src/main/java/com/amazon/ion/apps/macroize/Macroize.java b/src/main/java/com/amazon/ion/apps/macroize/Macroize.java new file mode 100644 index 0000000000..28d5d3cfba --- /dev/null +++ b/src/main/java/com/amazon/ion/apps/macroize/Macroize.java @@ -0,0 +1,599 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.apps.macroize; + +import com.amazon.ion.IonContainer; +import com.amazon.ion.IonDatagram; +import com.amazon.ion.IonException; +import com.amazon.ion.IonReader; +import com.amazon.ion.IonSystem; +import com.amazon.ion.IonType; +import com.amazon.ion.IonValue; +import com.amazon.ion.IonWriter; +import com.amazon.ion.SymbolToken; +import com.amazon.ion.Timestamp; +import com.amazon.ion.impl.BufferedOutputStreamFastAppendable; +import com.amazon.ion.impl.IonRawTextWriter_1_1; +import com.amazon.ion.impl.IonRawWriter_1_1; +import com.amazon.ion.impl._Private_IonTextAppender; +import com.amazon.ion.impl._Private_IonTextWriterBuilder_1_1; +import com.amazon.ion.impl.bin.BlockAllocatorProviders; +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1; +import com.amazon.ion.impl.bin.WriteBuffer; +import com.amazon.ion.system.IonReaderBuilder; +import com.amazon.ion.system.IonSystemBuilder; +import com.amazon.ion.system.IonTextWriterBuilder; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.math.MathContext; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Re-writes a stream of Ion data to the Ion 1.1 equivalent, leveraging Ion 1.1 macros. + */ +public class Macroize { + + private static final IonSystem SYSTEM = IonSystemBuilder.standard().build(); + + public static void main(String[] args) throws IOException { + // TODO replace argument handling with a library like pico CLI + String specFile = null; + boolean outputBinary = false; + int i; + for (i = 0; i < args.length; i++) { + switch (args[i]) { + case "--spec": + specFile = args[++i]; + break; + case "--format": + switch(args[++i]) { + case "binary": + outputBinary = true; + break; + case "text": + outputBinary = false; + break; + default: + throw new IllegalArgumentException("Unrecognized format: " + args[i]); + } + break; + case "--help": + case "-h": + System.out.println("IonJava Macroize Tool v0.1"); + System.out.println("Usage:\n--spec [--format ] "); + System.exit(0); + break; + default: + if (i == args.length - 1) { + // This is the final argument; it must be the input file name. + break; + } + throw new IllegalArgumentException("Unrecognized option: " + args[i]); + } + } + if (specFile == null) { + throw new IllegalArgumentException("Expected a spec file to be provided via the --spec option."); + } + + String inputFileWithSuffix = args[args.length - 1]; + Path inputPath = checkPath(inputFileWithSuffix); + String outputFileSuffix = outputBinary ? ".10n" : ".ion"; + String inputName = inputPath.toFile().getName(); + int dotIndex = inputName.lastIndexOf('.'); + String inputNameWithoutSuffix = (dotIndex < 0) ? inputName : inputName.substring(0, dotIndex); + Path specPath = checkPath(specFile); + + Path invocationsPath = Files.createTempFile(inputNameWithoutSuffix + "-invocations", ".ion"); + invocationsPath.toFile().deleteOnExit(); + Path headlessPath = Files.createTempFile(inputNameWithoutSuffix + "-headless-1-1", outputFileSuffix); + headlessPath.toFile().deleteOnExit(); + Path parentDirectory = inputPath.toAbsolutePath().getParent(); + if (parentDirectory == null) { + throw new IllegalArgumentException("Invalid input path: " + inputPath); + } + Path convertedPath = parentDirectory.resolve(inputNameWithoutSuffix + "-1-1" + outputFileSuffix); + + macroize( + () -> IonReaderBuilder.standard().build(Files.newInputStream(inputPath)), + () -> IonTextWriterBuilder.standard().build(Files.newOutputStream(invocationsPath)), + () -> IonReaderBuilder.standard().build(Files.newInputStream(invocationsPath)), + () -> Files.newOutputStream(headlessPath), + () -> Files.newOutputStream(convertedPath), + () -> appendCopy(headlessPath, convertedPath), + () -> IonReaderBuilder.standard().build(Files.newInputStream(specPath)), + outputBinary, + System.out + ); + System.out.println("Ion 1.1 file written to: " + convertedPath.toAbsolutePath()); + } + + /** + * Re-writes a stream of Ion data to the Ion 1.1 equivalent, leveraging Ion 1.1 macros. + * @param inputReaderSupplier supplies an IonReader over the input data. + * @param invocationsWriterSupplier supplies an IonWriter to write a description of where macro invocations should be substituted into the stream. + * @param invocationsReaderSupplier supplies an IonReader over the macro invocation description stream. + * @param headlessOutputSupplier supplies an OutputStream to which the body of the converted stream will be written (i.e., without a preceding encoding context). + * @param fullOutputSupplier supplies an OutputStream to which the entire converted stream (including encoding context) will be written. + * @param assembleFullOutput the procedure for appending the headless stream to the end of the stream containing the encoding context, creating the full output. + * @param specReaderSupplier supplies an IonReader over the spec file that informs the conversion. + * @param outputBinary true if the stream will be converted to binary Ion 1.1; false if it will be converted to text Ion 1.1. + * @param log an appendable log of any messages produced during the conversion, such as statistics and status. + * @throws IOException if thrown during the conversion. + */ + static void macroize( + ThrowingSupplier inputReaderSupplier, + ThrowingSupplier invocationsWriterSupplier, + ThrowingSupplier invocationsReaderSupplier, + ThrowingSupplier headlessOutputSupplier, + ThrowingSupplier fullOutputSupplier, + ThrowingProcedure assembleFullOutput, + ThrowingSupplier specReaderSupplier, + boolean outputBinary, + Appendable log + ) throws IOException { + // Read the input data into memory. + IonDatagram source; + try (IonReader reader = inputReaderSupplier.get()) { + source = SYSTEM.getLoader().load(reader); + } + + // Prepare the context and the spec to be used during the conversion. + ManualEncodingContext context = new ManualEncodingContext(); + MacroizeSpec spec = new MacroizeSpec(); + try (IonReader reader = specReaderSupplier.get()) { + spec.readSpec(reader, context); + } + + // Using the spec, produce a marked up text Ion 1.0 representation of the input that + // indicates which structs should be replaced with macro invocations. + try (IonWriter writer = invocationsWriterSupplier.get()) { + writeMacroMatchesUsingMarkedUpIon10(writer, source, spec, log); + } + + // Go through the marked up invocations and re-write to Ion 1.1, intercepting the special marked up + // Ion 1.0 values and replacing them with proper Ion 1.1 e-expressions. + log.append("\n\nConverting to 1.1\n"); + IonRawWriter_1_1 writer = newRawWriter_1_1(headlessOutputSupplier.get(), outputBinary); + try (IonReader reader = invocationsReaderSupplier.get()) { + while (reader.next() != null) { + replaceMatchesWithInvocations(reader, writer, context, outputBinary, spec.textPatterns); + } + } finally { + writer.close(); + } + + // Write the symbol and macro tables + IonRawWriter_1_1 symbolTableWriter = newRawWriter_1_1(fullOutputSupplier.get(), outputBinary); + try { + symbolTableWriter.writeIVM(); + context.writeTo(symbolTableWriter); + } finally { + symbolTableWriter.close(); + } + // Now, copy the headless Ion 1.1 data to the end. + assembleFullOutput.execute(); + log.append("\nDone.\n"); + } + + /** + * Substitute value literals that match any of the specified macros with invocation instructions, represented using + * annotated Ion 1.0 s-expressions of the form `$ion_invocation::(name_of_macro arguments...)`. This intermediate + * form is used to make it possible to mutate the existing IonValue structure, which does not support modeling + * macro invocations. If this is supported in the future, this can likely be simplified. + * @param writer the writer. + * @param source the source data. + * @param spec the spec containing the macros to match. + * @param log an appendable log. + * @throws IOException if thrown during writing. + */ + private static void writeMacroMatchesUsingMarkedUpIon10( + IonWriter writer, + IonDatagram source, + MacroizeSpec spec, + Appendable log + ) throws IOException { + Map suggestedSignatures = spec.matchMacros(source, log); + for (int topLevelValueIndex = 0; topLevelValueIndex < source.size(); topLevelValueIndex++) { + IonValue topLevelValue = source.get(topLevelValueIndex); + if (!IonType.isContainer(topLevelValue.getType())) { + topLevelValue.writeTo(writer); + continue; + } + // key: depth, value: invocations at that depth + Map> invocationSubstitutes = new HashMap<>(); + findMatch(topLevelValue, source, topLevelValueIndex, spec.customMatchers, suggestedSignatures, invocationSubstitutes, 0); + matchMacrosRecursive((IonContainer) topLevelValue, spec.customMatchers, suggestedSignatures, invocationSubstitutes, 1); + // Iterate over all invocation matches, sorted by depth from shallowest to deepest. + for ( + Map.Entry> substitutesByDepth + : invocationSubstitutes.entrySet().stream().sorted(Map.Entry.comparingByKey()).collect(Collectors.toList()) + ) { + int depth = substitutesByDepth.getKey(); + for (InvocationSubstitute substitute : substitutesByDepth.getValue()) { + substitute.substitute(invocationSubstitutes.get(depth + 1)); + } + // 'topLevelValue' has been replaced with an invocation; update it with the replacement before writing. + if (depth == 0) { + topLevelValue = source.get(topLevelValueIndex); + } + } + topLevelValue.writeTo(writer); + } + } + + /** + * Attempts to match the given value with any of the given macro matchers. + * @param value the value to attempt to match. + * @param parent the value's parent container (which may be an IonDatagram if 'value' is at the top level). + * @param containerIndex the index of 'value' within 'parent'. + * @param customMacroMatchers the macro matchers to evaluate. + * @param suggestedSignatures the macro signatures available. + * @param substituteInvocations receives the invocation substitutes identified for this value, organized by depth. + * @param depth the depth at which the given container resides. + * @return true if a match was found. + */ + private static boolean findMatch( + IonValue value, + IonContainer parent, + int containerIndex, + List customMacroMatchers, + Map suggestedSignatures, + Map> substituteInvocations, + int depth + ) { + // TODO efficiency is not a main concern for the first release of this tool, but if it does become + // important, then it should be considered how the following might be optimized. Currently every value + // every depth must be compared against all macro matchers. + for (MacroizeMacroMatcher customMacroMatcher : customMacroMatchers) { + if (customMacroMatcher.match(value)) { + String name = customMacroMatcher.name(); + InvocationSubstitute substitute = new InvocationSubstitute(SYSTEM, parent, containerIndex, value.getFieldName(), name, suggestedSignatures.get(name)); + substituteInvocations.computeIfAbsent(depth, k -> new ArrayList<>()).add(substitute); + return true; + } + } + return false; + } + + /** + * Recursively visits the given container, evaluating it against the possible macro matches. + * @param container a container. + * @param customMacroMatchers the macro matchers to evaluate. + * @param suggestedSignatures the macro signatures available. + * @param substituteInvocations receives the invocation substitutes identified for this value, organized by depth. + * @param depth the depth at which the given container resides. + * @return the name of the macro that this container matched, or null if there was no match. + */ + private static String matchMacrosRecursive( + IonContainer container, + List customMacroMatchers, + Map suggestedSignatures, + Map> substituteInvocations, + int depth + ) { + Iterator children = container.iterator(); + int containerIndex = 0; + Set childFields = new LinkedHashSet<>(); + while (children.hasNext()) { + IonValue child = children.next(); + if (findMatch(child, container, containerIndex, customMacroMatchers, suggestedSignatures, substituteInvocations, depth)) { + // A custom matcher was matched; don't descend further. + containerIndex++; + continue; + } + if (container.getType() == IonType.STRUCT) { + childFields.add(child.getFieldName()); + } + switch (child.getType()) { + case STRUCT: + case LIST: + case SEXP: + String shapeName = matchMacrosRecursive((IonContainer) child, customMacroMatchers, suggestedSignatures, substituteInvocations, depth + 1); + if (shapeName != null) { + InvocationSubstitute substitute = new InvocationSubstitute(SYSTEM, container, containerIndex, child.getFieldName(), shapeName, suggestedSignatures.get(shapeName)); + substituteInvocations.computeIfAbsent(depth, k -> new ArrayList<>()).add(substitute); + } + break; + default: + break; + } + containerIndex++; + } + String shapeName = getNameOfShape(container); + if (shapeName == null) { + return null; + } + SuggestedSignature suggestedSignature = suggestedSignatures.get(shapeName); + if (suggestedSignature != null && suggestedSignature.isCompatible(childFields)) { + if (container.getType() == IonType.STRUCT) { + return shapeName; + } + } + return null; + } + + /** + * Iterates through a stream that may contain macro invocation markup (e.g. + * `$ion_invocation::(name_of_macro arguments...)`), replacing these special marked up s-expressions with + * actual Ion 1.1 e-expressions. + * TODO the structure of this method is copied from `AbstractIonWriter.writeValueRecursive`, though several changes + * were made to fit this purpose. Ideally the code could be shared somehow. + * @param reader the reader over the marked-up Ion 1.0 stream. + * @param writer an Ion 1.1 raw writer. + * @param context the encoding context, containing the symbols and macros that will be used in the Ion 1.1 stream. + * @param isBinary true if the output encoding is binary; false if it is text. + * @param textPatterns the text patterns to match and replace when writing. + */ + private static void replaceMatchesWithInvocations( + IonReader reader, + IonRawWriter_1_1 writer, + ManualEncodingContext context, + boolean isBinary, + List textPatterns + ) { + // The IonReader does not need to be at the top level (getDepth()==0) when the function is called. + // We take note of its initial depth so we can avoid advancing the IonReader beyond the starting value. + int startingDepth = writer.depth(); + + // The IonReader will be at `startingDepth` when the function is first called and then again when we + // have finished traversing all of its children. This boolean tracks which of those two states we are + // in when `getDepth() == startingDepth`. + boolean alreadyProcessedTheStartingValue = false; + + // The IonType of the IonReader's current value. + IonType type; + + while (true) { + // Each time we reach the top of the loop we are in one of three states: + // 1. We have not yet begun processing the starting value. + // 2. We are currently traversing the starting value's children. + // 3. We have finished processing the starting value. + if (writer.depth() == startingDepth) { + // The IonReader is at the starting depth. We're either beginning our traversal or finishing it. + if (alreadyProcessedTheStartingValue) { + // We're finishing our traversal. + break; + } + // We're beginning our traversal. Don't advance the cursor; instead, use the current + // value's IonType. + type = reader.getType(); + // We've begun processing the starting value. + alreadyProcessedTheStartingValue = true; + } else { + // We're traversing the starting value's children (that is: values at greater depths). We need to + // advance the cursor by calling next(). + type = reader.next(); + } + + if (type == null) { + // There are no more values at this level. If we're at the starting level, we're done. + if (writer.depth() == startingDepth) { + break; + } + // Otherwise, step out once and then try to move forward again. + reader.stepOut(); + writer.stepOut(); + continue; + } + + final SymbolToken fieldName = reader.getFieldNameSymbol(); + if (fieldName != null && !writer._private_hasFieldName() && writer.isInStruct()) { + // TODO apply text patterns to field names + writer.writeFieldName(context.internSymbol(fieldName.getText())); + } + if (fieldName == null && writer.isInStruct()) { + throw new IonException("Missing field name"); + } + final SymbolToken[] annotations = reader.getTypeAnnotationSymbols(); + boolean isEexp = false; + boolean isEmptyExpressionGroup = false; + if (annotations.length == 1 && annotations[0].getText().equals(InvocationSubstitute.INVOCATION_ANNOTATION)) { + isEexp = true; + } else if (annotations.length == 1 && annotations[0].getText().equals(InvocationSubstitute.EMPTY_GROUP_ANNOTATION)) { + isEmptyExpressionGroup = true; + } else { + for (SymbolToken annotation : annotations) { + // TODO apply text patterns to annotations + writer.writeAnnotations(context.internSymbol(annotation.getText())); + } + } + if (reader.isNullValue()) { + writer.writeNull(type); + continue; + } + + switch (type) { + case BOOL: + final boolean booleanValue = reader.booleanValue(); + writer.writeBool(booleanValue); + break; + case INT: + switch (reader.getIntegerSize()) { + case INT: + final int intValue = reader.intValue(); + writer.writeInt(intValue); + break; + case LONG: + final long longValue = reader.longValue(); + writer.writeInt(longValue); + break; + case BIG_INTEGER: + final BigInteger bigIntegerValue = reader.bigIntegerValue(); + writer.writeInt(bigIntegerValue); + break; + default: + throw new IllegalStateException(); + } + break; + case FLOAT: + final double doubleValue = reader.doubleValue(); + writer.writeFloat(doubleValue); + break; + case DECIMAL: + BigDecimal decimalValue = reader.decimalValue(); + if (decimalValue.precision() > 16) { + decimalValue = decimalValue.round(MathContext.DECIMAL64); + } + writer.writeDecimal(decimalValue); + break; + case TIMESTAMP: + final Timestamp timestampValue = reader.timestampValue(); + writer.writeTimestamp(timestampValue); + break; + case SYMBOL: + final SymbolToken symbolToken = reader.symbolValue(); + writer.writeSymbol(context.internSymbol(symbolToken.getText())); + break; + case STRING: + final String stringValue = reader.stringValue(); + boolean isMatched = false; + for (TextPattern stringPattern : textPatterns) { + if (stringPattern.matches(stringValue)) { + stringPattern.invoke(stringValue, context, writer, isBinary); + isMatched = true; + break; + } + } + if (!isMatched) { + writer.writeString(stringValue); + } + break; + case CLOB: + final byte[] clobValue = reader.newBytes(); + writer.writeClob(clobValue); + break; + case BLOB: + final byte[] blobValue = reader.newBytes(); + writer.writeBlob(blobValue); + break; + case SEXP: + reader.stepIn(); + if (isEmptyExpressionGroup) { + writer.stepInExpressionGroup(false); + } else if (isEexp) { + reader.next(); + String macroName = reader.stringValue(); + if (isBinary) { + writer.stepInEExp(context.getMacroId(macroName), false, context.getMacro(macroName)); + } else { + writer.stepInEExp(macroName); + } + } else { + writer.stepInSExp(false); + } + break; + case LIST: + reader.stepIn(); + writer.stepInList(false); + break; + case STRUCT: + reader.stepIn(); + writer.stepInStruct(false); + break; + default: + throw new IllegalStateException("Unexpected type: " + type); + } + } + } + + /** + * Checks that the file with the given name exists and can be read. + * @param name the file name. + * @return a Path to the file. + */ + private static Path checkPath(String name) { + File file = new File(name); + if (!file.canRead()) { + throw new IllegalArgumentException("Cannot read file: " + name); + } + return file.toPath(); + } + + /** + * Appends a copy of the contents of 'from' to the end of the contents of 'to'. + * @param from the path to copy from. + * @param to the path to append to. + * @throws IOException if thrown during the copy. + */ + private static void appendCopy(Path from, Path to) throws IOException { + try (OutputStream output = new FileOutputStream(to.toFile(), true)) { + Files.copy(from, output); + } + } + + private static IonRawWriter_1_1 newRawWriter_1_1(OutputStream out, boolean isBinary) { + return isBinary ? newRawBinaryWriter_1_1(out) : newRawTextWriter_1_1(out); + } + + private static IonRawWriter_1_1 newRawBinaryWriter_1_1(OutputStream out) { + return new IonRawBinaryWriter_1_1( + out, + new WriteBuffer(BlockAllocatorProviders.basicProvider().vendAllocator(32768), () -> {}), + 0 + ); + } + + private static IonRawWriter_1_1 newRawTextWriter_1_1(OutputStream out) { + _Private_IonTextWriterBuilder_1_1 builder = _Private_IonTextWriterBuilder_1_1.standard() + .withNewLineType(IonTextWriterBuilder.NewLineType.LF) + .withPrettyPrinting(); + BufferedOutputStreamFastAppendable appendable = new BufferedOutputStreamFastAppendable( + out, + BlockAllocatorProviders.basicProvider().vendAllocator(4096), + 1.0 + ); + return new IonRawTextWriter_1_1( + builder, + _Private_IonTextAppender.forFastAppendable(appendable, StandardCharsets.UTF_8) + ); + } + + /** + * Sanitizes the given string so that it may be used as a macro name. + * @param original the original string. + * @return the sanitized name. + */ + private static String sanitizeName(String original) { + String sanitized = original.replaceAll("[.:\\-/]", "_"); + if (!Character.isAlphabetic(sanitized.charAt(0))) { + return "z" + sanitized; // This is arbitrary. + } + return sanitized; + } + + /** + * Gets a name describing the given container. This will either be its field name, if in a struct, or the field + * name of its parent sequence, if applicable. Otherwise, this method will return null. + * @param container the value for which to get a shape name. + * @return the name, or null if no name can be determined. + */ + private static String getNameOfShape(IonContainer container) { + String shapeName = container.getFieldName(); + if (shapeName == null) { + // Homogeneous sequences of structs are common. In this case use the field name of the sequence, if any. + IonContainer parentContainer = container.getContainer(); + if (parentContainer != null && (parentContainer.getType() == IonType.LIST || parentContainer.getType() == IonType.SEXP)) { + shapeName = parentContainer.getFieldName(); + } + } + if (shapeName == null) { + return null; + } + return sanitizeName(shapeName); + } +} diff --git a/src/main/java/com/amazon/ion/apps/macroize/MacroizeMacroMatcher.java b/src/main/java/com/amazon/ion/apps/macroize/MacroizeMacroMatcher.java new file mode 100644 index 0000000000..f009de329a --- /dev/null +++ b/src/main/java/com/amazon/ion/apps/macroize/MacroizeMacroMatcher.java @@ -0,0 +1,41 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.apps.macroize; + +import com.amazon.ion.IonReader; +import com.amazon.ion.impl.macro.Macro; +import com.amazon.ion.impl.macro.MacroMatcher; +import com.amazon.ion.impl.macro.MacroRef; + +/** + * A {@link MacroMatcher} that uses a {@link ManualEncodingContext} and can produce {@link SuggestedSignature}s. + */ +class MacroizeMacroMatcher extends MacroMatcher { + + public MacroizeMacroMatcher(IonReader macroReader, ManualEncodingContext symbolTable) { + super(macroReader, ref -> symbolTable.getMacro(((MacroRef.ByName) ref).getName())); + symbolTable.addMacro(name(), macro()); + } + + /** + * @return the suggested signature for this matcher. + */ + SuggestedSignature getSignature() { + SuggestedSignature signature = new SuggestedSignature(); + for (Macro.Parameter parameter : macro().getSignature()) { + switch (parameter.getCardinality()) { + case ZeroOrOne: + signature.addOptional(parameter.getVariableName()); + break; + case ExactlyOne: + signature.addRequired(parameter.getVariableName()); + break; + case OneOrMore: + throw new UnsupportedOperationException("TODO: + not yet supported"); + case ZeroOrMore: + throw new UnsupportedOperationException("TODO: * not yet supported"); + } + } + return signature; + } +} diff --git a/src/main/java/com/amazon/ion/apps/macroize/MacroizeSpec.java b/src/main/java/com/amazon/ion/apps/macroize/MacroizeSpec.java new file mode 100644 index 0000000000..ac9227526d --- /dev/null +++ b/src/main/java/com/amazon/ion/apps/macroize/MacroizeSpec.java @@ -0,0 +1,184 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.apps.macroize; + +import com.amazon.ion.IonContainer; +import com.amazon.ion.IonDatagram; +import com.amazon.ion.IonException; +import com.amazon.ion.IonReader; +import com.amazon.ion.IonType; +import com.amazon.ion.IonValue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Specifies how a particular stream of Ion data should be written using Ion 1.1. This spec is read from an Ion file + * that contains a struct with the following shape: + *
+ * {@code
+ *     {
+ *         macros: [(macro ...) ...] // The elements are Ion 1.1 TDL macro definitions
+ *         textPatterns: [(verbatim | prefix | substring ...) ...] // The elements refer to {@link TextPattern} types
+ *     }
+ * }
+ * 
+ * The textPattern elements may have the following shape: + *
+ * {@code
+ *     (verbatim [string...]) // Each string in the list is a string to write as a symbol using make_string
+ *     (prefix string [string...]) // The standalone string is the prefix; the optional list elements are potential suffixes.
+ *     (substring string [string...]) // The standalone string is a target substring; the optional list elements are potential prefixes or suffixes.
+ * }
+ * 
+ * Note the following known limitations, which may be fixed in the future: + *
    + *
  • Within macro definitions that expand to structs, variable names must match the field name, + * e.g., {foo: (%foo)}
  • + *
  • The tool only attempts to match suggested macros to container values.
  • + *
  • Nested macro invocations are not yet supported.
  • + *
+ */ +class MacroizeSpec { + final List customMatchers = new ArrayList<>(); + final List textPatterns = new ArrayList<>(); + + /** + * Reads the spec from the given reader. It is assumed that next() has not yet been called to position the reader + * on the spec struct. + * @param reader the reader. + * @param context the encoding context. + */ + void readSpec(IonReader reader, ManualEncodingContext context) { + if (reader.next() != IonType.STRUCT) { + throw new IonException("Expected struct."); + } + reader.stepIn(); + while (reader.next() != null) { + if (reader.getType() != IonType.LIST) { + throw new IonException("Expected list."); + } + switch (reader.getFieldName()) { + case "macros": + readMacroMatchers(reader, context, customMatchers); + break; + case "textPatterns": + readTextPatterns(reader, context, textPatterns); + break; + default: + throw new IonException("Expected 'macros' or 'textPatterns'."); + } + } + } + + private static void readMacroMatchers(IonReader reader, ManualEncodingContext symbolTable, List matchers) { + reader.stepIn(); + while (reader.next() != null) { + matchers.add(new MacroizeMacroMatcher(reader, symbolTable)); + } + reader.stepOut(); + } + + private static void readTextPatterns(IonReader reader, ManualEncodingContext symbolTable, List patterns) { + reader.stepIn(); + while (reader.next() != null) { + if (reader.getType() != IonType.SEXP) { + throw new IonException("Expected s-exp."); + } + reader.stepIn(); + if (!IonType.isText(reader.next())) { + throw new IonException("Expected pattern type name."); + } + switch (reader.stringValue()) { + case "verbatim": + patterns.add(new VerbatimTextPattern(symbolTable, readStringList(reader))); + break; + case "prefix": + if (!IonType.isText(reader.next())) { + throw new IonException("Expected prefix."); + } + patterns.add(new PrefixTextPattern(symbolTable, reader.stringValue(), readStringList(reader))); + break; + case "substring": + if (!IonType.isText(reader.next())) { + throw new IonException("Expected substring."); + } + patterns.add(new SubstringTextPattern(symbolTable, reader.stringValue(), readStringList(reader))); + break; + default: + throw new IonException("Expected 'stringAsSymbol', 'prefix', or 'contains'."); + } + reader.stepOut(); + } + reader.stepOut(); + } + + private static List readStringList(IonReader reader) { + List strings = new ArrayList<>(); + if (reader.next() == null) { + return strings; + } + if (reader.getType() != IonType.LIST) { + throw new IonException("Expected list of strings."); + } + reader.stepIn(); + while (reader.next() != null) { + if (IonType.isText(reader.getType())) { + strings.add(reader.stringValue()); + } + } + reader.stepOut(); + return strings; + } + + private void recursiveMatch(IonContainer container, Map matchCounter) { + for (IonValue child : container) { + for (MacroizeMacroMatcher customMatcher : customMatchers) { + if (customMatcher.match(child)) { + matchCounter.compute(customMatcher.name(), (key, existingValue) -> { + if (existingValue == null) { + existingValue = 0; + } + return existingValue + 1; + }); + } + } + switch (child.getType()) { + case STRUCT: + case LIST: + case SEXP: + recursiveMatch((IonContainer) child, matchCounter); + break; + default: + break; + } + } + } + + /** + * Match values from the given source against the macro matchers supplied by the spec. Logs the number of + * occurrences of each macro match and assembles suggested signatures for each matcher with at least one match. + * @param source the source data. + * @param log the log to receive messages about occurrences. + * @return a map from macro name to suggested signature for each name with at least one match. + * @throws IOException if thrown when logging occurrences. + */ + Map matchMacros(IonDatagram source, Appendable log) throws IOException { + Map customMacroMatches = new HashMap<>(); + Map suggestedSignatures = new HashMap<>(); + recursiveMatch(source, customMacroMatches); + + for (MacroizeMacroMatcher customMacroMatcher : customMatchers) { + String matcherName = customMacroMatcher.name(); + Integer occurrences = customMacroMatches.get(matcherName); + if (occurrences != null && occurrences > 0) { + suggestedSignatures.put(matcherName, customMacroMatcher.getSignature()); + log.append(String.format("%n%n === %s (total occurrences: %d)%n", matcherName, occurrences)); + } + } + return suggestedSignatures; + } +} diff --git a/src/main/java/com/amazon/ion/apps/macroize/ManualEncodingContext.java b/src/main/java/com/amazon/ion/apps/macroize/ManualEncodingContext.java new file mode 100644 index 0000000000..79e9b0380b --- /dev/null +++ b/src/main/java/com/amazon/ion/apps/macroize/ManualEncodingContext.java @@ -0,0 +1,200 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.apps.macroize; + +import com.amazon.ion.impl.IonRawWriter_1_1; +import com.amazon.ion.impl.SystemSymbols_1_1; +import com.amazon.ion.impl.macro.Expression; +import com.amazon.ion.impl.macro.Expression.TemplateBodyExpression; +import com.amazon.ion.impl.macro.Macro; +import com.amazon.ion.impl.macro.SystemMacro; +import com.amazon.ion.impl.macro.TemplateMacro; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; + +/** + * An encoding context that is manipulated manually. To be used alongside an IonRawWriter_1_1. + * TODO consider whether this class may be replaced by something similar from the core library. + */ +class ManualEncodingContext { + private final Map symbolToId = new HashMap<>(); + private final Map macroNameToId = new HashMap<>(); + private final Map macroNameToMacro = new HashMap<>(); + + int symbolMaxId = 0; + int macroMaxId = -1; + + public ManualEncodingContext() { + // Intern the Ion 1.1 special symbols that aren't in the system symbol table. + // TODO these should be written inline instead of added to the symbol table. + internSymbol("%"); + internSymbol("?"); + } + + /** + * Adds the given macro to the macro table. + * @param macroName the name of the macro. + * @param macro the macro. + */ + public void addMacro(String macroName, TemplateMacro macro) { + macroNameToId.put(macroName, ++macroMaxId); + macroNameToMacro.put(macroName, macro); + // Intern the symbols that will occur in the macro signature and template body. + internSymbol(macroName); + for (Expression.TemplateBodyExpression expression : macro.getBody()) { + if (expression instanceof TemplateBodyExpression.FieldName) { + internSymbol(((TemplateBodyExpression.FieldName) expression).getValue().getText()); + } + } + for (Macro.Parameter parameter : macro.getSignature()) { + internSymbol(parameter.getVariableName()); + } + } + + /** + * Gets the mapping to the given symbol in the symbol table, or creates a mapping if none yet exists. + * @param symbol the symbol to intern. + * @return the symbol ID. + */ + public int internSymbol(String symbol) { + return symbolToId.computeIfAbsent(symbol, k -> ++symbolMaxId); + } + + /** + * @param symbol a symbol. + * @return true if the symbol already has a mapping in the symbol table. + */ + public boolean hasSymbol(String symbol) { + return symbolToId.get(symbol) != null; + } + + /** + * @param macroName the name of a macro. + * @return the ID of the given macro in the macro table, if present. + */ + public int getMacroId(String macroName) { + return macroNameToId.get(macroName); + } + + /** + * @param macroName the name of a macro. + * @return the macro, if present in the macro table. + */ + public TemplateMacro getMacro(String macroName) { + return macroNameToMacro.get(macroName); + } + + /** + * Writes the encoding context to the given writer. It is assumed that the symbols in the symbol table are used + * to encode the macro table, so the symbol table is written first in its own encoding directive, followed by + * the macro table. + * @param writer the writer. + */ + public void writeTo(IonRawWriter_1_1 writer) { + // write the symbol table + writer.stepInEExp(SystemMacro.SetSymbols); + writer.stepInExpressionGroup(false); + List> symbols = new ArrayList<>(symbolToId.entrySet()); + symbols.sort(Map.Entry.comparingByValue()); + symbols.forEach(e -> writer.writeString(e.getKey())); + writer.stepOut(); + writer.stepOut(); + + // write the macro table + if (macroNameToId.isEmpty()) { + return; + } + writer.stepInEExp(SystemMacro.SetMacros); + writer.stepInExpressionGroup(false); + List> macros = new ArrayList<>(macroNameToId.entrySet()); + macros.sort(Map.Entry.comparingByValue()); + for (Map.Entry macroAndId : macros) { + TemplateMacro macro = macroNameToMacro.get(macroAndId.getKey()); + writeMacroTo(writer, macroAndId.getKey(), macro); + } + writer.stepOut(); + writer.stepOut(); + } + + /** + * Writes the given macro. + * @param writer the writer. + * @param name the name of the macro to write. + * @param macro the macro to write. + */ + private void writeMacroTo(IonRawWriter_1_1 writer, String name, TemplateMacro macro) { + writeMacroTo(writer, name, macro, symbol -> writer.writeSymbol(internSymbol(symbol)), symbol -> writer.writeFieldName(internSymbol(symbol))); + } + + /** + * Writes the given macro. + * @param writer the writer. + * @param name the name of the macro to write. + * @param macro the macro to write. + * @param symbolWriter function that writes a symbol value. + * @param fieldNameWriter function that writes a field name. + */ + private static void writeMacroTo(IonRawWriter_1_1 writer, String name, TemplateMacro macro, Consumer symbolWriter, Consumer fieldNameWriter) { + writer.stepInSExp(false); + writer.writeSymbol(SystemSymbols_1_1.MACRO); + symbolWriter.accept(name); + writer.stepInSExp(false); + List signature = macro.getSignature(); + for (Macro.Parameter parameter : signature) { + symbolWriter.accept(parameter.getVariableName()); + if (parameter.getCardinality() != Macro.ParameterCardinality.ExactlyOne) { + symbolWriter.accept("?"); + } + } + writer.stepOut(); + List body = macro.getBody(); + int index = 0; + int[] numberOfTimesToStepOut = new int[body.size() + 1]; + Arrays.fill(numberOfTimesToStepOut, 0); + for (Expression.TemplateBodyExpression expression : body) { + for (int i = 0; i < numberOfTimesToStepOut[index]; i++) { + writer.stepOut(); + } + if (expression instanceof Expression.ExpressionGroup) { + // Note: assumes that template bodies are composed of either structs or system macro invocations. Will + // need to be generalized to fit other use cases as necessary. + writer.stepInSExp(true); + symbolWriter.accept("."); + writer.writeAnnotations(SystemSymbols_1_1.ION); + writer.writeSymbol(SystemSymbols_1_1.MAKE_STRING); + writer.stepInSExp(true); + symbolWriter.accept(".."); + numberOfTimesToStepOut[((Expression.ExpressionGroup) expression).getEndExclusive()]++; + } else if (expression instanceof TemplateBodyExpression.FieldName) { + fieldNameWriter.accept(((TemplateBodyExpression.FieldName) expression).getValue().getText()); + } else if (expression instanceof TemplateBodyExpression.VariableRef) { + writer.stepInSExp(true); + symbolWriter.accept("%"); + symbolWriter.accept(signature.get(((TemplateBodyExpression.VariableRef) expression).getSignatureIndex()).getVariableName()); + writer.stepOut(); + } else if (expression instanceof Expression.TextValue) { + writer.writeString(((Expression.TextValue) expression).getStringValue()); + } else if (expression instanceof Expression.ListValue) { + writer.stepInList(true); + numberOfTimesToStepOut[((Expression.ListValue) expression).getEndExclusive()]++; + } else if (expression instanceof Expression.StructValue) { + writer.stepInStruct(true); + numberOfTimesToStepOut[((Expression.StructValue) expression).getEndExclusive()]++; + } else if (expression instanceof Expression.BoolValue) { + writer.writeBool(((Expression.BoolValue) expression).getValue()); + } else { + throw new UnsupportedOperationException("TODO: unsupported expression type"); + } + index++; + } + for (int i = 0; i < numberOfTimesToStepOut[body.size()]; i++) { + writer.stepOut(); + } + writer.stepOut(); + } +} diff --git a/src/main/java/com/amazon/ion/apps/macroize/PrefixTextPattern.java b/src/main/java/com/amazon/ion/apps/macroize/PrefixTextPattern.java new file mode 100644 index 0000000000..124b8789ba --- /dev/null +++ b/src/main/java/com/amazon/ion/apps/macroize/PrefixTextPattern.java @@ -0,0 +1,52 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.apps.macroize; + +import com.amazon.ion.impl.IonRawWriter_1_1; +import com.amazon.ion.impl.macro.SystemMacro; + +import java.util.List; + +/** + * Writes a String value as a make_string invocation whose first argument is a symbol and whose second argument + * is either a symbol or a string. This allows for strings with common prefixes to be written compactly, even if + * they may have high-cardinality suffixes. + */ +class PrefixTextPattern implements TextPattern { // TODO unify with SubstringTextPattern? + private final String commonPrefix; + + /** + * @param context the encoding context. + * @param commonPrefix the prefix. + * @param suffixes recurring suffixes, if any. May be empty. If a suffix not present in this list is encountered + * in the data, that suffix will be written as a string instead of a symbol. + */ + PrefixTextPattern(ManualEncodingContext context, String commonPrefix, List suffixes) { + this.commonPrefix = commonPrefix; + context.internSymbol(commonPrefix); + for (String suffix : suffixes) { + context.internSymbol(suffix); + } + } + + @Override + public boolean matches(String candidate) { + return candidate.startsWith(commonPrefix); + } + + @Override + public void invoke(String match, ManualEncodingContext table, IonRawWriter_1_1 writer, boolean isBinary) { + // TODO consider whether these could/should be written using a custom macro that itself calls make_string. + writer.stepInEExp(SystemMacro.MakeString); + writer.stepInExpressionGroup(true); + writer.writeSymbol(table.internSymbol(commonPrefix)); + String suffix = match.replace(commonPrefix, ""); + if (table.hasSymbol(suffix)) { + writer.writeSymbol(table.internSymbol(suffix)); + } else { + writer.writeString(suffix); + } + writer.stepOut(); + writer.stepOut(); + } +} diff --git a/src/main/java/com/amazon/ion/apps/macroize/SubstringTextPattern.java b/src/main/java/com/amazon/ion/apps/macroize/SubstringTextPattern.java new file mode 100644 index 0000000000..2c2481d8df --- /dev/null +++ b/src/main/java/com/amazon/ion/apps/macroize/SubstringTextPattern.java @@ -0,0 +1,62 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.apps.macroize; + +import com.amazon.ion.impl.IonRawWriter_1_1; +import com.amazon.ion.impl.macro.SystemMacro; + +import java.util.List; + +/** + * Writes a String value as a make_string invocation with a prefix, a recurring substring, and a suffix. This allows for + * strings with common substrings to be written compactly, even if they may have high-cardinality prefixes and/or + * suffixes. + */ +class SubstringTextPattern implements TextPattern { + + private final String substring; + + /** + * @param context the encoding context. + * @param substring the prefix. + * @param prefixesAndSuffixes recurring prefixes and/or suffixes, if any. May be empty. If a prefix or suffix + * not present in this list is encountered in the data, it will be written as a string + * instead of a symbol. + */ + SubstringTextPattern(ManualEncodingContext context, String substring, List prefixesAndSuffixes) { + this.substring = substring; + context.internSymbol(substring); + for (String prefixOrSuffix : prefixesAndSuffixes) { + context.internSymbol(prefixOrSuffix); + } + } + + @Override + public boolean matches(String candidate) { + return candidate.contains(substring); + } + + private void writeComponent(String component, ManualEncodingContext table, IonRawWriter_1_1 writer) { + if (table.hasSymbol(component)) { + writer.writeSymbol(table.internSymbol(component)); + } else { + writer.writeString(component); + } + } + + @Override + public void invoke(String match, ManualEncodingContext table, IonRawWriter_1_1 writer, boolean isBinary) { + writer.stepInEExp(SystemMacro.MakeString); + writer.stepInExpressionGroup(true); + String[] components = match.split(substring); + if (!components[0].isEmpty()) { + writeComponent(components[0], table, writer); + } + writer.writeSymbol(table.internSymbol(substring)); + if (components.length > 1 && !components[1].isEmpty()) { + writeComponent(components[1], table, writer); + } + writer.stepOut(); + writer.stepOut(); + } +} diff --git a/src/main/java/com/amazon/ion/apps/macroize/SuggestedSignature.java b/src/main/java/com/amazon/ion/apps/macroize/SuggestedSignature.java new file mode 100644 index 0000000000..1778286f75 --- /dev/null +++ b/src/main/java/com/amazon/ion/apps/macroize/SuggestedSignature.java @@ -0,0 +1,58 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.apps.macroize; + +import java.util.LinkedHashSet; +import java.util.Set; + +/** + * Represents a simple suggested macro signature. TODO support + and * cardinalities. + */ +class SuggestedSignature { + + // Names of the required parameters (! cardinality), in the order they were added. + private final Set required = new LinkedHashSet<>(); + // Names of the optional parameters (? cardinality), in the order they were added. + private final Set optional = new LinkedHashSet<>(); + // Names of all parameters (required and optional), in the order they were added. + private final Set all = new LinkedHashSet<>(); + + public void addRequired(String argument) { + required.add(argument); + all.add(argument); + } + + public void addOptional(String argument) { + optional.add(argument); + all.add(argument); + } + + public Set allParameters() { + return all; + } + + /** + * Gets the index of the target parameter in the sequence of all parameters. It is up to the caller to ensure + * the target parameter exists. + * @param targetParameter the target parameter + * @return the index of the target parameter. + */ + public int indexOf(String targetParameter) { + int index = 0; + for (String parameter : all) { + if (targetParameter.equals(parameter)) { + return index; + } + index++; + } + return index; + } + + /** + * @param candidate a set of parameter names to attempt to match to this signature. + * @return true if the given parameters are compatible with this signature. + */ + public boolean isCompatible(Set candidate) { + return candidate.containsAll(required) && all.containsAll(candidate); + } +} diff --git a/src/main/java/com/amazon/ion/apps/macroize/TextPattern.java b/src/main/java/com/amazon/ion/apps/macroize/TextPattern.java new file mode 100644 index 0000000000..e150ffa7ab --- /dev/null +++ b/src/main/java/com/amazon/ion/apps/macroize/TextPattern.java @@ -0,0 +1,26 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.apps.macroize; + +import com.amazon.ion.impl.IonRawWriter_1_1; + +/** + * A string pattern to match in some source data. + */ +interface TextPattern { + + /** + * @param candidate a string to evaluate against the pattern. + * @return true if the candidate matches this pattern. + */ + boolean matches(String candidate); + + /** + * Writes this pattern from the given match. It is up to the caller to ensure the given string is actually a match. + * @param match the match from which to write the pattern. + * @param table the context to use when writing. + * @param writer the writer to which the pattern will be written. + * @param isBinary true if the output format is binary. + */ + void invoke(String match, ManualEncodingContext table, IonRawWriter_1_1 writer, boolean isBinary); +} diff --git a/src/main/java/com/amazon/ion/apps/macroize/ThrowingProcedure.java b/src/main/java/com/amazon/ion/apps/macroize/ThrowingProcedure.java new file mode 100644 index 0000000000..df4aec23d1 --- /dev/null +++ b/src/main/java/com/amazon/ion/apps/macroize/ThrowingProcedure.java @@ -0,0 +1,10 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.apps.macroize; + +import java.io.IOException; + +@FunctionalInterface +interface ThrowingProcedure { + void execute() throws IOException; +} diff --git a/src/main/java/com/amazon/ion/apps/macroize/ThrowingSupplier.java b/src/main/java/com/amazon/ion/apps/macroize/ThrowingSupplier.java new file mode 100644 index 0000000000..a89031134e --- /dev/null +++ b/src/main/java/com/amazon/ion/apps/macroize/ThrowingSupplier.java @@ -0,0 +1,10 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.apps.macroize; + +import java.io.IOException; + +@FunctionalInterface +interface ThrowingSupplier { + T get() throws IOException; +} diff --git a/src/main/java/com/amazon/ion/apps/macroize/VerbatimTextPattern.java b/src/main/java/com/amazon/ion/apps/macroize/VerbatimTextPattern.java new file mode 100644 index 0000000000..37d484293d --- /dev/null +++ b/src/main/java/com/amazon/ion/apps/macroize/VerbatimTextPattern.java @@ -0,0 +1,44 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.apps.macroize; + +import com.amazon.ion.impl.IonRawWriter_1_1; +import com.amazon.ion.impl.macro.SystemMacro; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Writes a String value as a make_string invocation whose argument is a symbol. This allows recurring text to be + * added to the symbol table and encoded using an ID while retaining the String type. + */ +class VerbatimTextPattern implements TextPattern { + + // The strings to write using make_string invocations. + private final Set targets; + + /** + * @param context the encoding context. + * @param strings the strings to be written using make_string invocations. + */ + VerbatimTextPattern(ManualEncodingContext context, List strings) { + this.targets = new HashSet<>(); + targets.addAll(strings); + for (String target : strings) { + context.internSymbol(target); + } + } + + @Override + public boolean matches(String candidate) { + return targets.contains(candidate); + } + + @Override + public void invoke(String match, ManualEncodingContext table, IonRawWriter_1_1 writer, boolean isBinary) { + writer.stepInEExp(SystemMacro.MakeString); + writer.writeSymbol(table.internSymbol(match)); + writer.stepOut(); + } +} diff --git a/src/main/java/com/amazon/ion/impl/BufferedAppendableFastAppendable.kt b/src/main/java/com/amazon/ion/impl/BufferedAppendableFastAppendable.kt new file mode 100644 index 0000000000..6afed56d90 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/BufferedAppendableFastAppendable.kt @@ -0,0 +1,52 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl + +import com.amazon.ion.impl.bin.* +import com.amazon.ion.util.* +import java.io.Closeable +import java.io.Flushable + +/** + * A [_Private_FastAppendable] that buffers data to a [StringBuilder]. Only when + * [flush] is called is the data written to the wrapped [Appendable]. + * + * This is necessary for cases where an [IonManagedWriter_1_1] over Ion text needs to emit encoding directives that are + * not known in advance. The [AppendableFastAppendable] class has no buffering, so system and user values would be + * emitted in the wrong order. + * + * Once [IonManagedWriter_1_1] supports an auto-flush feature, then this class will have very little practical + * difference from [AppendableFastAppendable] for the case where no system values are needed. + * + * TODO: + * - Add proper tests + * + * @see BufferedOutputStreamFastAppendable + * @see AppendableFastAppendable + */ +internal class BufferedAppendableFastAppendable( + private val wrapped: Appendable, + private val buffer: StringBuilder = StringBuilder() +) : _Private_FastAppendable, Flushable, Closeable, Appendable by buffer { + + override fun appendAscii(c: Char) { append(c) } + override fun appendAscii(csq: CharSequence?) { append(csq) } + override fun appendAscii(csq: CharSequence?, start: Int, end: Int) { append(csq, start, end) } + override fun appendUtf16(c: Char) { append(c) } + + override fun appendUtf16Surrogate(leadSurrogate: Char, trailSurrogate: Char) { + append(leadSurrogate) + append(trailSurrogate) + } + + override fun close() { + flush() + if (wrapped is Closeable) wrapped.close() + } + + override fun flush() { + wrapped.append(buffer) + if (wrapped is Flushable) wrapped.flush() + buffer.setLength(0) + } +} diff --git a/src/main/java/com/amazon/ion/impl/BufferedOutputStreamFastAppendable.kt b/src/main/java/com/amazon/ion/impl/BufferedOutputStreamFastAppendable.kt new file mode 100644 index 0000000000..6410a22597 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/BufferedOutputStreamFastAppendable.kt @@ -0,0 +1,188 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl + +import com.amazon.ion.impl.bin.* +import com.amazon.ion.util.* +import java.io.OutputStream + +/** + * A [_Private_FastAppendable] that buffers data to blocks of memory which are managed by a [BlockAllocator]. Only when + * [flush] is called are the blocks written to the wrapped [OutputStream]. + * + * This is necessary for cases where an [IonManagedWriter_1_1] over Ion text needs to emit encoding directives that are + * not known in advance. The [OutputStreamFastAppendable] class only buffers a fixed amount of data, so it is not safe + * to use if there are system values to be written. For a sufficiently large user value, an [OutputStreamFastAppendable] + * can end up flushing partial or whole user values flushing to the [OutputStream] before the [IonManagedWriter_1_1] can + * write the system value that it depends on. + * + * Once [IonManagedWriter_1_1] supports an auto-flush feature, then this class will have very little practical + * difference from [OutputStreamFastAppendable] for the case where no system values are needed. + * + * TODO: + * - Add proper tests + * + * @see BufferedAppendableFastAppendable + * @see OutputStreamFastAppendable + */ +internal class BufferedOutputStreamFastAppendable( + private val out: OutputStream, + private val allocator: BlockAllocator, + /** + * The minimum utilization of a block before a longer value + * can skip the end of a block and just start a new block. + */ + minBlockUtilization: Double = 1.0, +) : OutputStream(), _Private_FastAppendable { + + init { + // 0.0 would have the possibility of wasting entire blocks. + // 0.5 is somewhat arbitrary, but at least sensible that you should use at least + // half of a block before moving on to the next block. + require(minBlockUtilization in 0.5..1.0) { "minBlockUtilization must be between 0.5 and 1" } + require(allocator.blockSize > 10) + } + + private val maxBlockWaste: Int = (allocator.blockSize * (1.0 - minBlockUtilization)).toInt() + + private var index = -1 + private val blocks = mutableListOf() + private var current: Block = nextBlock() + + private fun nextBlock(): Block { + index++ + if (index < 0) throw IllegalStateException("This output stream is closed.") + if (index >= blocks.size) blocks.add(allocator.allocateBlock()) + return blocks[index] + } + + override fun close() { + try { + flush() + } finally { + blocks.onEach { it.close() }.clear() + index = Int.MIN_VALUE + } + } + + override fun flush() { + blocks.forEach { block -> + out.write(block.data, 0, block.limit) + block.reset() + } + index = 0 + current = blocks[index] + out.flush() + } + + override fun write(b: Int) { + if (current.remaining() < 1) current = nextBlock() + val block = current + block.data[block.limit] = b.toByte() + block.limit++ + } + + override fun write(b: ByteArray, off: Int, len: Int) { + if (len > current.remaining()) { + if (current.remaining() < maxBlockWaste && len < allocator.blockSize) { + current = nextBlock() + } else { + writeBytesSlow(b, off, len) + return + } + } + val block = current + System.arraycopy(b, off, block.data, block.limit, len) + block.limit += len + } + + // slow in the sense that we do all kind of block boundary checking + private fun writeBytesSlow(bytes: ByteArray, _off: Int, _len: Int) { + var off = _off + var len = _len + while (len > 0) { + val block = current + val amount = Math.min(len, block.remaining()) + System.arraycopy(bytes, off, block.data, block.limit, amount) + block.limit += amount + off += amount + len -= amount + if (block.remaining() == 0) { + current = nextBlock() + } + } + } + + override fun append(c: Char): Appendable = apply { if (c.code < 0x80) appendAscii(c) else appendUtf16(c) } + + override fun append(csq: CharSequence): Appendable = apply { append(csq, 0, csq.length) } + + override fun append(csq: CharSequence, start: Int, end: Int): Appendable { + for (i in start until end) { + append(csq[i]) + } + return this + } + + override fun appendAscii(c: Char) { + assert(c.code < 0x80) + write(c.code) + } + + override fun appendAscii(csq: CharSequence) = appendAscii(csq, 0, csq.length) + + override fun appendAscii(csq: CharSequence, start: Int, end: Int) { + if (csq is String) { + // Using deprecated String.getBytes intentionally, since it is + // correct behavior in this case, and much faster. + var pos = start + val len = end - start + if (len > current.remaining() && current.remaining() < maxBlockWaste && len < allocator.blockSize) { + current = nextBlock() + } + while (true) { + val copyAmount = minOf(current.remaining(), end - pos) + csq.copyBytes(pos, pos + copyAmount, current.data, current.limit) + current.limit += copyAmount + pos += copyAmount + if (pos >= end) return + current = nextBlock() + } + } else { + append(csq, start, end) + } + } + + override fun appendUtf16(c: Char) { + assert(c.code >= 0x80) + if (current.remaining() < 3) { + current = nextBlock() + } + if (c.code < 0x800) { + current.data[current.limit++] = (0xff and (0xC0 or (c.code shr 6))).toByte() + current.data[current.limit++] = (0xff and (0x80 or (c.code and 0x3F))).toByte() + } else if (c.code < 0x10000) { + current.data[current.limit++] = (0xff and (0xE0 or (c.code shr 12))).toByte() + current.data[current.limit++] = (0xff and (0x80 or (c.code shr 6 and 0x3F))).toByte() + current.data[current.limit++] = (0xff and (0x80 or (c.code and 0x3F))).toByte() + } + } + + override fun appendUtf16Surrogate(leadSurrogate: Char, trailSurrogate: Char) { + // Here we must convert a UTF-16 surrogate pair to UTF-8 bytes. + val c = _Private_IonConstants.makeUnicodeScalar(leadSurrogate.code, trailSurrogate.code) + assert(c >= 0x10000) + if (current.remaining() < 4) { + current = nextBlock() + } + current.data[current.limit++] = (0xff and (0xF0 or (c shr 18))).toByte() + current.data[current.limit++] = (0xff and (0x80 or (c shr 12 and 0x3F))).toByte() + current.data[current.limit++] = (0xff and (0x80 or (c shr 6 and 0x3F))).toByte() + current.data[current.limit++] = (0xff and (0x80 or (c and 0x3F))).toByte() + } + + /** Helper function to wrap [java.lang.String.getBytes]. */ + private fun String.copyBytes(srcBegin: Int, srcEnd: Int, dst: ByteArray, dstBegin: Int) { + (this as java.lang.String).getBytes(srcBegin, srcEnd, dst, dstBegin) + } +} diff --git a/src/main/java/com/amazon/ion/impl/EncodingDirectiveReader.kt b/src/main/java/com/amazon/ion/impl/EncodingDirectiveReader.kt new file mode 100644 index 0000000000..dfb6334e08 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/EncodingDirectiveReader.kt @@ -0,0 +1,258 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl + +import com.amazon.ion.* +import com.amazon.ion.SystemSymbols.* +import com.amazon.ion.impl.macro.* +import com.amazon.ion.impl.macro.MacroRef.Companion.byId +import com.amazon.ion.impl.macro.MacroRef.Companion.byName + +/** + * Reads encoding directives from the given [IonReader]. This performs a similar function to + * [IonReaderContinuableCoreBinary.EncodingDirectiveReader], though that one requires more logic to handle continuable + * input. The two could be unified at the expense of higher complexity than is needed by the non-continuable text + * implementation. If the text reader is replaced with a continuable implementation in the future, + * IonReaderContinuableCoreBinary.EncodingDirectiveReader should be moved to the top level and shared by both readers. + * If that were to happen, this class would no longer be needed. + */ +internal class EncodingDirectiveReader(private val reader: IonReader, private val readerAdapter: ReaderAdapter) { + + private var localMacroMaxOffset: Int = -1 + private var state: State = State.READING_VALUE + + var isSymbolTableAppend = false + var isMacroTableAppend = false + var newSymbols: MutableList = ArrayList(8) + var newMacros: MutableMap = HashMap() + var isSymbolTableAlreadyClassified = false + var isMacroTableAlreadyClassified = false + + private enum class State { + IN_DIRECTIVE_SEXP, + IN_MODULE_DIRECTIVE_SEXP_AWAITING_MODULE_NAME, + IN_MODULE_DIRECTIVE_SEXP, + IN_SYMBOL_TABLE_SEXP, + IN_SYMBOL_TABLE_LIST, + IN_MACRO_TABLE_SEXP, + COMPILING_MACRO, + READING_VALUE + } + + private fun classifyDirective() { + errorIf(reader.type != IonType.SYMBOL) { "Ion encoding directives must start with a directive keyword; found ${reader.type}" } + val name: String = reader.stringValue() + // TODO: Add support for `import` and `encoding` directives + if (SystemSymbols_1_1.MODULE.text == name) { + state = State.IN_MODULE_DIRECTIVE_SEXP_AWAITING_MODULE_NAME + } else if (SystemSymbols_1_1.IMPORT.text == name) { + throw IonException("'import' directive not yet supported") + } else if (SystemSymbols_1_1.ENCODING.text == name) { + throw IonException("'encoding' directive not yet supported") + } else { + throw IonException(String.format("'%s' is not a valid directive keyword", name)) + } + } + + private fun classifySexpWithinModuleDirective() { + val name: String = reader.stringValue() + state = if (SystemSymbols_1_1.SYMBOL_TABLE.text == name) { + State.IN_SYMBOL_TABLE_SEXP + } else if (SystemSymbols_1_1.MACRO_TABLE.text == name) { + State.IN_MACRO_TABLE_SEXP + } else { + throw IonException("'$name' clause not supported in module definition") + } + } + + /** + * Classifies a symbol table as either 'set' or 'append'. The caller must ensure the reader is positioned within a + * symbol table (after the symbol 'symbol_table') before calling. Returns true if the end of the symbol table has + * been reached; otherwise, returns false with the reader positioned within a list in the symbol table. + */ + private fun classifySymbolTable(): Boolean { + val type: IonType = reader.type + if (isSymbolTableAlreadyClassified) { + if (type != IonType.LIST) { // TODO support module name imports + throw IonException("symbol_table s-expression must contain list(s) of symbols.") + } + reader.stepIn() + state = State.IN_SYMBOL_TABLE_LIST + return false + } + isSymbolTableAlreadyClassified = true + if (IonType.isText(type)) { + if (DEFAULT_MODULE == reader.stringValue() && !isSymbolTableAppend) { + isSymbolTableAppend = true + if (reader.next() == null) { + return true + } + if (reader.type != IonType.LIST) { + throw IonException("symbol_table s-expression must begin with a list.") + } + } else { + throw IonException("symbol_table s-expression must begin with either '_' or a list.") + } + } else if (type != IonType.LIST) { + throw IonException("symbol_table s-expression must begin with either '_' or a list.") + } + reader.stepIn() + state = State.IN_SYMBOL_TABLE_LIST + return false + } + + /** + * Classifies a macro table as either 'set' or 'append'. The caller must ensure the reader is positioned within a + * macro table (after the symbol 'macro_table') before calling. Returns true if the end of the macro table has + * been reached; otherwise, returns false with the reader positioned on an s-expression in the macro table. + */ + private fun classifyMacroTable(): Boolean { + val type: IonType = reader.type + if (isMacroTableAlreadyClassified) { + if (type != IonType.SEXP) { + throw IonException("macro_table s-expression must contain s-expressions.") + } + return false + } + isMacroTableAlreadyClassified = true + if (IonType.isText(type)) { + if (SystemSymbols.DEFAULT_MODULE == reader.stringValue() && !isMacroTableAppend) { + isMacroTableAppend = true + if (reader.next() == null) { + return true + } + if (reader.type != IonType.SEXP) { + throw IonException("macro_table s-expression must begin with s-expression(s).") + } + } else { + throw IonException("macro_table s-expression must begin with either '_' or s-expression(s).") + } + } else if (type == IonType.SEXP) { + localMacroMaxOffset = -1 + } else { + throw IonException("macro_table s-expression must begin with either '_' or s-expression(s).") + } + return false + } + + /** + * Utility function to make error cases more concise. + * @param condition the condition under which an IonException should be thrown + * @param lazyErrorMessage the message to use in the exception + */ + private inline fun errorIf(condition: Boolean, lazyErrorMessage: () -> String) { + if (condition) { + throw IonException(lazyErrorMessage()) + } + } + + /** + * Reads an encoding directive. After this method returns, the caller should access this class's properties to + * retrieve the symbols and macros declared within the directive. + */ + fun readEncodingDirective(encodingContext: EncodingContext) { + + val macroCompiler = MacroCompiler({ key -> resolveMacro(encodingContext, key) }, readerAdapter) + + reader.stepIn() + state = State.IN_DIRECTIVE_SEXP + while (true) { + when (state) { + + State.IN_DIRECTIVE_SEXP -> { + errorIf(reader.next() == null) { "invalid Ion directive; missing directive keyword" } + classifyDirective() + } + State.IN_MODULE_DIRECTIVE_SEXP_AWAITING_MODULE_NAME -> { + errorIf(reader.next() == null) { "invalid module directive; missing module name" } + errorIf(reader.type != IonType.SYMBOL) { "invalid module directive; module name must be a symbol" } + // TODO: Support other module names + errorIf(DEFAULT_MODULE != reader.stringValue()) { "IonJava currently supports only the default module" } + state = State.IN_MODULE_DIRECTIVE_SEXP + } + State.IN_MODULE_DIRECTIVE_SEXP -> { + if (reader.next() == null) { + reader.stepOut() + state = State.READING_VALUE + return + } + if (reader.type != IonType.SEXP) { + throw IonException("module definition must contain only s-expressions.") + } + reader.stepIn() + if (reader.next() == null || !IonType.isText(reader.type)) { + throw IonException("S-expressions within module definitions must begin with a text token.") + } + classifySexpWithinModuleDirective() + } + + State.IN_SYMBOL_TABLE_SEXP -> { + if (reader.next() == null || classifySymbolTable()) { + reader.stepOut() + state = State.IN_MODULE_DIRECTIVE_SEXP + continue + } + } + + State.IN_SYMBOL_TABLE_LIST -> { + if (reader.next() == null) { + reader.stepOut() + state = State.IN_SYMBOL_TABLE_SEXP + continue + } + if (!IonType.isText(reader.type)) { + throw IonException("The symbol_table must contain text.") + } + newSymbols.add(reader.stringValue()) + } + + State.IN_MACRO_TABLE_SEXP -> { + if (reader.next() == null || classifyMacroTable()) { + reader.stepOut() + state = State.IN_MODULE_DIRECTIVE_SEXP + continue + } + state = State.COMPILING_MACRO + val newMacro: Macro = macroCompiler.compileMacro() + newMacros[byId(++localMacroMaxOffset)] = newMacro + if (macroCompiler.macroName != null) { + newMacros[byName(macroCompiler.macroName!!)] = newMacro + } + state = State.IN_MACRO_TABLE_SEXP + } + + // TODO handle other legal encoding directive s-expression shapes. + // TODO add strict enforcement of the schema around e.g. repeats + + else -> throw IllegalStateException(state.toString()) + } + } + } + + private fun resolveMacro(context: EncodingContext, address: MacroRef): Macro? { + var newMacro = newMacros[address] + if (newMacro == null) { + newMacro = context.macroTable.get(address) + } + return newMacro + } + + /** + * @return true if the reader is currently being used by the [MacroCompiler]. + */ + fun isMacroCompilationInProgress(): Boolean { + return state == State.COMPILING_MACRO + } + + /** + * Prepares the EncodingDirectiveReader to read a new encoding directive. + */ + fun reset() { + isSymbolTableAppend = false + isSymbolTableAlreadyClassified = false + newSymbols.clear() + isMacroTableAppend = false + newMacros.clear() + isMacroTableAlreadyClassified = false + } +} diff --git a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java index b6ffde9383..0262997957 100644 --- a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java @@ -9,13 +9,26 @@ import com.amazon.ion.IonType; import com.amazon.ion.IvmNotificationConsumer; import com.amazon.ion.SystemSymbols; +import com.amazon.ion.impl.bin.FlexInt; +import com.amazon.ion.impl.bin.OpCodes; import java.io.ByteArrayInputStream; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; - +import java.nio.ByteOrder; + +import static com.amazon.ion.impl.IonTypeID.DELIMITED_END_ID; +import static com.amazon.ion.impl.IonTypeID.ONE_ANNOTATION_FLEX_SYM_LOWER_NIBBLE_1_1; +import static com.amazon.ion.impl.IonTypeID.ONE_ANNOTATION_SID_LOWER_NIBBLE_1_1; +import static com.amazon.ion.impl.IonTypeID.SYSTEM_MACRO_INVOCATION_ID; +import static com.amazon.ion.impl.IonTypeID.SYSTEM_SYMBOL_VALUE; +import static com.amazon.ion.impl.IonTypeID.TWO_ANNOTATION_FLEX_SYMS_LOWER_NIBBLE_1_1; +import static com.amazon.ion.impl.IonTypeID.TWO_ANNOTATION_SIDS_LOWER_NIBBLE_1_1; +import static com.amazon.ion.impl.IonTypeID.TYPE_IDS_1_1; +import static com.amazon.ion.impl.bin.Ion_1_1_Constants.FLEX_SYM_MAX_SYSTEM_SYMBOL; +import static com.amazon.ion.impl.bin.Ion_1_1_Constants.FLEX_SYM_SYSTEM_SYMBOL_OFFSET; import static com.amazon.ion.util.IonStreamUtils.throwAsIonException; /** @@ -104,11 +117,29 @@ private static class RefillableState { */ final int maximumBufferSize; + /** + * The number of bytes shifted left in the buffer during the current operation to make room for more bytes. This + * is needed when rewinding to a previous location, as any saved indices at that location will need to be + * shifted by this amount. + */ + long pendingShift = 0; + /** * The source of data, for refillable streams. */ final InputStream inputStream; + /** + * Index of the first "pinned" byte in the buffer. Pinned bytes must be preserved in the buffer until un-pinned. + */ + long pinOffset = -1; + + /** + * The target depth to which the reader should seek. This is used when a container is determined to be oversize + * while buffering one of its children. + */ + int targetSeekDepth = -1; + /** * Handler invoked when a single value would exceed `maximumBufferSize`. */ @@ -124,6 +155,21 @@ private static class RefillableState { */ int individualBytesSkippedWithoutBuffering = 0; + /** + * The last byte that was read without being buffered (due to the buffer exceeding the maximum size). This + * allows for one byte to be un-read even if an oversize value is being skipped. Un-reading is necessary + * when the cursor probes for, but does not find, an end delimiter. + */ + int lastUnbufferedByte = -1; + + /** + * Whether to skip over annotation sequences rather than recording them for consumption by the user. This is + * used when probing forward in the stream for the end of a delimited container while remaining logically + * positioned on the current value. This is only needed in 'slow' mode because in quick mode the entire + * container is assumed to be buffered in its entirety and no probing occurs. + */ + private boolean skipAnnotations = false; + RefillableState(InputStream inputStream, int capacity, int maximumBufferSize, State initialState) { this.inputStream = inputStream; this.capacity = capacity; @@ -133,6 +179,32 @@ private static class RefillableState { } + /** + * Marks an argument group. + */ + private static class ArgumentGroupMarker { + + /** + * Marks the start index of the current page in the argument group. + */ + long pageStartIndex = -1; + + /** + * Marks the end index of the current page in the argument group. If -1, this indicates that the argument + * group is delimited and the end of the page has not yet been found. + */ + long pageEndIndex = -1; + + /** + * For tagless groups, the primitive type of the tagless values in the group; otherwise, null. When null, + * there is always a single page of values in the group, and the end is reached either when an end delimiter + * is found (for delimited groups), or when the cursor's `peekIndex` reaches `pageEndIndex`. When non-null, + * there may be multiple pages of tagless values in the group; whenever the cursor reaches `pageEndIndex`, it + * must read a FlexUInt at that position to calculate the end index of the next page. + */ + TaglessEncoding taglessEncoding = null; + } + /** * Dummy state that indicates the cursor has been terminated and that additional API calls will have no effect. */ @@ -153,6 +225,9 @@ private static class RefillableState { */ Marker parent = null; + ArgumentGroupMarker[] argumentGroupStack = new ArgumentGroupMarker[CONTAINER_STACK_INITIAL_CAPACITY]; + int argumentGroupIndex = -1; + /** * The start offset into the user-provided byte array, or 0 if the user provided an InputStream. */ @@ -183,7 +258,13 @@ private static class RefillableState { * Marker for the sequence of annotation symbol IDs on the current value. If there are no annotations on * the current value, the startIndex will be negative. */ - final Marker annotationSequenceMarker = new Marker(-1, 0); + final Marker annotationSequenceMarker = new Marker(-1, -1); + + /** + * Holds both inline text markers and symbol IDs. If representing a symbol ID, the symbol ID value will + * be contained in the endIndex field, and the startIndex field will be -1. + */ + final MarkerList annotationTokenMarkers = new MarkerList(8); /** * Indicates whether the current value is annotated. @@ -193,7 +274,7 @@ private static class RefillableState { /** * Marker representing the current value. */ - final Marker valueMarker = new Marker(-1, 0); + final Marker valueMarker = new Marker(-1, -1); /** * The index of the first byte in the header of the value at which the reader is currently positioned. @@ -205,6 +286,11 @@ private static class RefillableState { */ IonTypeID valueTid = null; + /** + * Marker for the current inlineable field name. + */ + final Marker fieldTextMarker = new Marker(-1, -1); + /** * The consumer to be notified when Ion version markers are encountered. */ @@ -223,7 +309,7 @@ private static class RefillableState { /** * The major version of the Ion encoding currently being read. */ - private int majorVersion = -1; + private int majorVersion = 1; /** * The minor version of the Ion encoding currently being read. @@ -282,6 +368,24 @@ private static class RefillableState { */ private long lastReportedByteTotal = 0; + /** + * The ID of the current macro invocation. When `isSystemInvocation` is true, a positive value indicates a system + * macro address, while a negative value indicates a system symbol ID. When `isSystemInvocation` is false, a + * positive value indicates a user macro address, while a negative value indicates that the cursor's current token + * is not a macro invocation. + */ + private long macroInvocationId = -1; + + /** + * True if the given token represents a system invocation (either a system macro invocation or a system symbol + * value). When true, `macroInvocationId` is used to retrieve the ID of the system token. + */ + private boolean isSystemInvocation = false; + + /** + * The type of the current value, if tagless. Otherwise, null. + */ + TaglessEncoding taglessType = null; /** * @return the given configuration's DataHandler, or null if that DataHandler is a no-op. @@ -316,6 +420,10 @@ private static BufferConfiguration.DataHandler getDataHandler(IonBufferConfigura containerStack[i] = new Marker(-1, -1); } + for (int i = 0; i < CONTAINER_STACK_INITIAL_CAPACITY; i++) { + argumentGroupStack[i] = new ArgumentGroupMarker(); + } + this.buffer = buffer; this.startOffset = offset; this.offset = offset; @@ -448,6 +556,10 @@ private static IonBufferConfiguration getFixedSizeConfigurationFor( containerStack[i] = new Marker(-1, -1); } + for (int i = 0; i < CONTAINER_STACK_INITIAL_CAPACITY; i++) { + argumentGroupStack[i] = new ArgumentGroupMarker(); + } + this.buffer = new byte[configuration.getInitialBufferSize()]; this.startOffset = 0; this.offset = 0; @@ -498,19 +610,26 @@ private long availableAt(long index) { * Ensures that there is space for at least 'minimumNumberOfBytesRequired' additional bytes in the buffer, * growing the buffer if necessary. May consolidate buffered bytes to the beginning of the buffer, shifting indices * accordingly. - * @param minimumNumberOfBytesRequired the minimum number of additional bytes to buffer. + * @param numberOfBytes the number of bytes starting at `index` that need to be present. + * @param index the index after which to fill. * @return true if the buffer has sufficient capacity; otherwise, false. */ - private boolean ensureCapacity(long minimumNumberOfBytesRequired) { + private boolean ensureCapacity(long numberOfBytes, long index) { + int maximumFreeSpace = refillableState.maximumBufferSize; + int startOffset = (int) offset; + if (refillableState.pinOffset > -1) { + maximumFreeSpace -= (int) (offset - refillableState.pinOffset); + startOffset = (int) refillableState.pinOffset; + } + long minimumNumberOfBytesRequired = numberOfBytes + (index - startOffset); if (minimumNumberOfBytesRequired < 0) { throw new IonException("The number of bytes required cannot be represented in a Java long."); } - if (freeSpaceAt(offset) >= minimumNumberOfBytesRequired) { + refillableState.bytesRequested = minimumNumberOfBytesRequired; + if (freeSpaceAt(startOffset) >= minimumNumberOfBytesRequired) { // No need to shift any bytes or grow the buffer. return true; } - int maximumFreeSpace = refillableState.maximumBufferSize; - int startOffset = (int) offset; if (minimumNumberOfBytesRequired > maximumFreeSpace) { refillableState.isSkippingCurrentValue = true; return false; @@ -522,7 +641,9 @@ private boolean ensureCapacity(long minimumNumberOfBytesRequired) { moveBytesToStartOfBuffer(newBuffer, startOffset); refillableState.capacity = newSize; buffer = newBuffer; + ByteOrder byteOrder = byteBuffer.order(); byteBuffer = ByteBuffer.wrap(buffer, (int) offset, (int) refillableState.capacity); + byteBuffer.order(byteOrder); } else { // The current capacity can accommodate the requested size; move the existing bytes to the beginning // to make room for the remaining requested bytes to be filled at the end. @@ -534,14 +655,13 @@ private boolean ensureCapacity(long minimumNumberOfBytesRequired) { /** * Attempts to fill the buffer so that it contains at least `numberOfBytes` after `index`. * @param index the index after which to fill. - * @param numberOfBytes the number of bytes after `index` that need to be present. + * @param numberOfBytes the number of bytes starting at `index` that need to be present. * @return false if not enough bytes were available in the stream to satisfy the request; otherwise, true. */ private boolean fillAt(long index, long numberOfBytes) { long shortfall = numberOfBytes - availableAt(index); if (shortfall > 0) { - refillableState.bytesRequested = numberOfBytes + (index - offset); - if (ensureCapacity(refillableState.bytesRequested)) { + if (ensureCapacity(numberOfBytes, index)) { // Fill all the free space, not just the shortfall; this reduces I/O. shortfall = refill(refillableState.bytesRequested); } else { @@ -572,6 +692,9 @@ private void moveBytesToStartOfBuffer(byte[] destinationBuffer, int fromIndex) { shiftIndicesLeft(fromIndex); } offset = 0; + if (refillableState.pinOffset > 0) { + refillableState.pinOffset = 0; + } limit = size; } @@ -590,6 +713,11 @@ private long freeSpaceAt(long index) { */ private int readByteWithoutBuffering() { int b = -1; + if (refillableState.lastUnbufferedByte > -1) { + b = refillableState.lastUnbufferedByte; + refillableState.lastUnbufferedByte = -1; + return b; + } try { b = refillableState.inputStream.read(); } catch (EOFException e) { @@ -655,11 +783,25 @@ private void shiftIndicesLeft(int shiftAmount) { valueMarker.startIndex -= shiftAmount; valueMarker.endIndex -= shiftAmount; checkpoint -= shiftAmount; + if (fieldTextMarker.startIndex > -1) { + fieldTextMarker.startIndex -= shiftAmount; + fieldTextMarker.endIndex -= shiftAmount; + } if (annotationSequenceMarker.startIndex > -1) { annotationSequenceMarker.startIndex -= shiftAmount; annotationSequenceMarker.endIndex -= shiftAmount; } + // Note: even provisional annotation token markers must be shifted because a shift may occur between + // provisional creation and commit. + for (int i = 0; i < annotationTokenMarkers.provisionalSize(); i++) { + Marker marker = annotationTokenMarkers.provisionalGet(i); + if (marker.startIndex > -1) { + marker.startIndex -= shiftAmount; + marker.endIndex -= shiftAmount; + } + } shiftContainerEnds(shiftAmount); + refillableState.pendingShift = shiftAmount; refillableState.totalDiscardedBytes += shiftAmount; } @@ -689,7 +831,7 @@ private long refill(long minimumNumberOfBytesRequired) { if (numberOfBytesFilled > 0) { limit += numberOfBytesFilled; } - shortfall = minimumNumberOfBytesRequired - availableAt(offset); + shortfall = minimumNumberOfBytesRequired - availableAt(refillableState.pinOffset > -1 ? refillableState.pinOffset : offset); } while (shortfall > 0 && numberOfBytesFilled >= 0); return shortfall; } @@ -728,6 +870,8 @@ private boolean slowSeek(long numberOfBytes) { } while (shortfall > 0 && skipped > 0); if (shortfall <= 0) { refillableState.bytesRequested = 0; + // The value has been entirely skipped, so its endIndex is now the buffer's limit. + valueMarker.endIndex = limit; refillableState.state = State.READY; return false; } @@ -885,6 +1029,7 @@ private boolean slowReadAnnotationWrapperHeader_1_0(IonTypeID valueTid) { if (peekIndex >= valueMarker.endIndex) { throw new IonException("Annotation wrapper must wrap a value."); } + valueMarker.typeId = valueTid; return false; } @@ -955,221 +1100,1124 @@ private boolean slowReadFieldName_1_0() { /* ---- Ion 1.1 ---- */ - private long uncheckedReadVarUInt_1_1() { - throw new UnsupportedOperationException(); - } - - private long slowReadVarUInt_1_1() { - throw new UnsupportedOperationException(); - } - - private boolean uncheckedReadAnnotationWrapperHeader_1_1(IonTypeID valueTid) { - throw new UnsupportedOperationException(); - } - - private boolean slowReadAnnotationWrapperHeader_1_1(IonTypeID valueTid) { - throw new UnsupportedOperationException(); - } - - private long calculateEndIndex_1_1(IonTypeID valueTid, boolean isAnnotated) { - throw new UnsupportedOperationException(); - } - - private void uncheckedReadFieldName_1_1() { - throw new UnsupportedOperationException(); - } - - private boolean slowReadFieldName_1_1() { - throw new UnsupportedOperationException(); - } - - private boolean uncheckedIsDelimitedEnd_1_1() { - throw new UnsupportedOperationException(); - } - - private boolean slowIsDelimitedEnd_1_1() { - throw new UnsupportedOperationException(); - } - - boolean skipRemainingDelimitedContainerElements_1_1() { - throw new UnsupportedOperationException(); - } - - private void seekPastDelimitedContainer_1_1() { - throw new UnsupportedOperationException(); - } - - private boolean slowFindDelimitedEnd_1_1() { - throw new UnsupportedOperationException(); - } - - private boolean slowSeekToDelimitedEnd_1_1() { - throw new UnsupportedOperationException(); - } - - private boolean slowFillDelimitedContainer_1_1() { - throw new UnsupportedOperationException(); - } - - private boolean slowSkipRemainingDelimitedContainerElements_1_1() { - throw new UnsupportedOperationException(); - } - - /* ---- End: version-dependent parsing methods ---- */ - - /* ---- Begin: version-agnostic parsing, utility, and public API methods ---- */ - /** - * Attempts to make the cursor READY by finishing the operation that was in progress last time the end of the stream - * was reached. This should not be called when the cursor state is already READY. - * @return true if the cursor is ready; otherwise, false. + * Reads a 3+ byte FlexUInt into a long. After this method returns, `peekIndex` points to the first byte after the + * end of the FlexUInt. + * @param firstByte the first byte of the FlexUInt. + * @return the value. */ - private boolean slowMakeBufferReady() { - boolean isReady; - switch (refillableState.state) { - case SEEK: - isReady = !slowSeek(refillableState.bytesRequested); - break; - case FILL: - isReady = fillAt(offset, refillableState.bytesRequested); - break; - case FILL_DELIMITED: - refillableState.state = State.READY; - isReady = slowFindDelimitedEnd_1_1(); - break; - case SEEK_DELIMITED: - isReady = slowSeekToDelimitedEnd_1_1(); - break; - case TERMINATED: - isReady = false; - break; - default: - throw new IllegalStateException(); + private long uncheckedReadLargeFlexUInt_1_1(int firstByte) { + if (firstByte == 0) { + // Note: this is conservative, as 9-byte flex subfields (with a continuation bit in the second byte) can fit + // in a long. However, the flex subfields parsed by the methods in this class are used only in cases that + // require an int anyway (symbol IDs, decimal scale), so the added complexity is not warranted. + throw new IonException("Flex subfield exceeds the length of a long."); } - if (!isReady) { - event = Event.NEEDS_DATA; + byte length = (byte) (Integer.numberOfTrailingZeros(firstByte) + 1); + long result = firstByte >>> length; + for (byte i = 1; i < length; i++) { + result |= ((long) (buffer[(int) (peekIndex++)] & SINGLE_BYTE_MASK) << (8 * i - length)); } - return isReady; + return result; } /** - * Sets `checkpoint` to the current `peekIndex`, which is at the given type of location. - * @param location the type of checkpoint location. Must not be BEFORE_UNANNOTATED_TYPE_ID. + * Reads a FlexUInt. NOTE: the FlexUInt must fit in a `long`. This must only be called when it is known that the + * buffer already contains all the bytes in the FlexUInt. + * @return the value. */ - private void setCheckpoint(CheckpointLocation location) { - checkpointLocation = location; - checkpoint = peekIndex; + private long uncheckedReadFlexUInt_1_1() { + // Up-cast to int, ensuring the most significant bit in the byte is not treated as the sign. + int currentByte = buffer[(int)(peekIndex++)] & SINGLE_BYTE_MASK; + if ((currentByte & 1) == 1) { // TODO perf: analyze whether these special case checks are a net positive + // Single byte; shift out the continuation bit. + return currentByte >>> 1; + } + if ((currentByte & 2) != 0) { + // Two bytes; upcast the second byte to int, ensuring the most significant bit is not treated as the sign. + // Make room for the six value bits in the first byte. Or with those six value bits after shifting out the + // two continuation bits. + return ((buffer[(int) peekIndex++] & SINGLE_BYTE_MASK) << 6) | (currentByte >>> 2); + } + return uncheckedReadLargeFlexUInt_1_1(currentByte); } /** - * Sets `checkpoint` to the current `peekIndex`, which must be before an unannotated type ID, and seeks the - * buffer to that point. + * Reads the length of a FlexUInt (or FlexInt) at the given position. + * Does not alter the state of the peekIndex or anything else. + * @return the number of bytes used to encode the FlexUInt (or FlexInt) that starts a "position" */ - private void setCheckpointBeforeUnannotatedTypeId() { - reset(); - offset = peekIndex; - checkpointLocation = CheckpointLocation.BEFORE_UNANNOTATED_TYPE_ID; - checkpoint = peekIndex; + private long uncheckedReadLengthOfFlexUInt_1_1(long position) { + int length = 1; + while (true) { + int numZeros = Integer.numberOfTrailingZeros(buffer[(int) position]); + if (numZeros < 8) { + length += numZeros; + return length; + } else { + // We don't actually know the length without looking at even more bytes, + // so look at another. + length += 8; + position++; + } + } } /** - * Validates and sets the given marker, which must fit within its parent container (if applicable). The resulting - * `startIndex` will be set to `peekIndex` (the next byte to be consumed from the cursor's buffer), and its - * `endIndex` will be set to the given value. - * @param endIndex the value's end index. - * @param markerToSet the marker to set. + * Reads a multi-byte FlexUInt into a long, ensuring enough data is available in the buffer. After this method + * returns, `peekIndex` points to the first byte after the end of the FlexUInt. + * @param firstByte the first byte of the FlexUInt. + * @return the value. */ - private void setMarker(long endIndex, Marker markerToSet) { - if (parent != null && endIndex > parent.endIndex && parent.endIndex > DELIMITED_MARKER) { - throw new IonException("Value exceeds the length of its parent container."); + private long slowReadLargeFlexUInt_1_1(int firstByte) { + if (firstByte == 0) { + // Note: this is conservative, as 9-byte FlexUInts (with a continuation bit in the second byte) can fit + // in a long. However, the FlexUInt parsing methods in this class are only used to calculate value length, + // and the added complexity is not warranted to increase the maximum value size above 2^56 - 1 (72 PB). + throw new IonException("Found a FlexUInt that was too large to fit in a `long`"); } - markerToSet.startIndex = peekIndex; - markerToSet.endIndex = endIndex; + byte length = (byte) (Integer.numberOfTrailingZeros(firstByte) + 1); + if (!fillAt(peekIndex, length - 1)) { + return -1; + } + long result = firstByte >>> length; + for (byte i = 1; i < length; i++) { + result |= ((long) (buffer[(int) (peekIndex++)] & SINGLE_BYTE_MASK) << (8 * i - length)); + } + return result; } /** - * Determines whether the cursor has reached the end of the current container. If true, `event` will be set to - * END_CONTAINER and information about the current value will be reset. - * @return true if the end of the current container has been reached; otherwise, false. + * Reads a FlexUInt, ensuring enough data is available in the buffer. NOTE: the FlexUInt must fit in a `long`. + * @return the value. */ - private boolean checkContainerEnd() { - if (parent.endIndex > peekIndex) { - return false; - } - if (parent.endIndex == DELIMITED_MARKER) { - return isSlowMode ? slowIsDelimitedEnd_1_1() : uncheckedIsDelimitedEnd_1_1(); + private long slowReadFlexUInt_1_1() { + int currentByte = slowReadByte(); + if (currentByte < 0) { + return -1; } - if (parent.endIndex == peekIndex) { - event = Event.END_CONTAINER; - valueTid = null; - fieldSid = -1; - return true; + if ((currentByte & 1) == 1) { + return currentByte >>> 1; } - throw new IonException("Contained values overflowed the parent container length."); + return slowReadLargeFlexUInt_1_1(currentByte); } /** - * Resets state specific to the current value. + * Reads the length of a FlexUInt (or FlexInt) at the given position. + * Does not alter the state of the peekIndex. May fill data, if needed. + * @return the number of bytes used to encode the FlexUInt (or FlexInt) that starts a "position" + * or -1 if the end of the stream has been reached */ - private void reset() { - valueMarker.typeId = null; - valueMarker.startIndex = -1; - valueMarker.endIndex = -1; - fieldSid = -1; - hasAnnotations = false; + private long slowReadLengthOfFlexUInt_1_1(long position) { + int length = 1; + while (true) { + if (!fillAt(position, 1)) { + return -1; + } + int numZeros = Integer.numberOfTrailingZeros(buffer[(int) position]); + if (numZeros < 8) { + length += numZeros; + return length; + } else { + // We don't actually know the length without looking at even more bytes, + // so add 8 to length, and then look at the next byte. + length += 8; + position++; + } + } } /** - * Reads the final three bytes of an IVM. `peekIndex` must point to the first byte after the opening `0xE0` byte. - * After return, `majorVersion`, `minorVersion`, and `typeIds` will be updated accordingly, and `peekIndex` will - * point to the first byte after the IVM. + * Reads the header of an Ion 1.1 annotation wrapper. This must only be called when it is known that the buffer + * already contains all the bytes in the header. Sets `valueMarker` with the start and end indices of the wrapped + * value. Sets `annotationSequenceMarker` with the start and end indices of the sequence of annotation SIDs, if + * applicable, or fills `annotationTokenMarkers` if the annotation wrapper contains FlexSyms. After + * successful return, `peekIndex` will point at the type ID byte of the wrapped value. + * @param valueTid the type ID of the annotation wrapper. + * @return true if the length of the wrapped value extends beyond the bytes currently buffered; otherwise, false. */ - private void readIvm() { - if (limit < peekIndex + IVM_REMAINING_LENGTH) { - throw new IonException("Incomplete Ion version marker."); - } - majorVersion = buffer[(int) (peekIndex++)]; - minorVersion = buffer[(int) (peekIndex++)]; - if ((buffer[(int) (peekIndex++)] & SINGLE_BYTE_MASK) != IVM_FINAL_BYTE) { - throw new IonException("Invalid Ion version marker."); - } - if (majorVersion != 1) { - throw new IonException(String.format("Unsupported Ion version: %d.%d", majorVersion, minorVersion)); - } - if (minorVersion == 0) { - typeIds = IonTypeID.TYPE_IDS_1_0; + private boolean uncheckedReadAnnotationWrapperHeader_1_1(IonTypeID valueTid) { + annotationTokenMarkers.clear(); + if (valueTid.variableLength) { + // Opcodes 0xE6 (variable-length annotation SIDs) and 0xE9 (variable-length annotation FlexSyms) + int annotationsLength = (int) uncheckedReadFlexUInt_1_1(); + annotationSequenceMarker.typeId = valueTid; + annotationSequenceMarker.startIndex = peekIndex; + annotationSequenceMarker.endIndex = annotationSequenceMarker.startIndex + annotationsLength; + peekIndex = annotationSequenceMarker.endIndex; } else { - throw new IonException(String.format("Unsupported Ion version: %d.%d", majorVersion, minorVersion)); + if (valueTid.isInlineable) { + // Opcodes 0xE7 (one annotation FlexSym) and 0xE8 (two annotation FlexSyms) + Marker provisionalMarker = annotationTokenMarkers.provisionalElement(); + uncheckedReadFlexSym_1_1(provisionalMarker); + if (provisionalMarker.endIndex < 0) { + return true; + } + if (valueTid.lowerNibble == TWO_ANNOTATION_FLEX_SYMS_LOWER_NIBBLE_1_1) { + // Opcode 0xE8 (two annotation FlexSyms) + provisionalMarker = annotationTokenMarkers.provisionalElement(); + uncheckedReadFlexSym_1_1(provisionalMarker); + if (provisionalMarker.endIndex < 0) { + return true; + } + annotationTokenMarkers.commit(); + } + annotationTokenMarkers.commit(); + } else { + // Opcodes 0xE4 (one annotation SID) and 0xE5 (two annotation SIDs) + int annotationSid = (int) uncheckedReadFlexUInt_1_1(); + annotationTokenMarkers.provisionalElement().endIndex = annotationSid; + if (valueTid.lowerNibble == TWO_ANNOTATION_SIDS_LOWER_NIBBLE_1_1) { + // Opcode 0xE5 (two annotation SIDs) + annotationSid = (int) uncheckedReadFlexUInt_1_1(); + annotationTokenMarkers.provisionalElement().endIndex = annotationSid; + annotationTokenMarkers.commit(); + } + annotationTokenMarkers.commit(); + } } - ivmConsumer.ivmEncountered(majorVersion, minorVersion); + return false; } /** - * Validates and skips a NOP pad. After return, `peekIndex` will point to the first byte after the NOP pad. - * @param endIndex the endIndex of the NOP pad. - * @param isAnnotated true if the NOP pad occurs within an annotation wrapper (which is illegal); otherwise, false. + * Skips a non-length-prefixed annotation sequence (opcodes E4, E5, E7, or E8), ensuring enough space is available + * in the buffer. After this method returns, `peekIndex` points to the first byte after the end of the annotation + * sequence. + * @param valueTid the type ID of the annotation sequence to skip. + * @return true if there are not enough bytes in the stream to complete the annotation sequence; otherwise, false. */ - private void uncheckedSeekPastNopPad(long endIndex, boolean isAnnotated) { - if (isAnnotated) { - throw new IonException( - "Invalid annotation wrapper: NOP pad may not occur inside an annotation wrapper." - ); - } - if (endIndex > limit) { - throw new IonException("Invalid NOP pad."); - } - peekIndex = endIndex; - if (parent != null) { - checkContainerEnd(); + private boolean slowSkipNonPrefixedAnnotations_1_1(IonTypeID valueTid) { + if (valueTid.isInlineable) { + // Opcodes 0xE7 (one annotation FlexSym) and 0xE8 (two annotation FlexSyms) + if (slowSkipFlexSym_1_1(null) == FlexSymType.INCOMPLETE) { + return true; + } + if (valueTid.lowerNibble == TWO_ANNOTATION_FLEX_SYMS_LOWER_NIBBLE_1_1) { + // Opcode 0xE8 (two annotation FlexSyms) + return slowSkipFlexSym_1_1(null) == FlexSymType.INCOMPLETE; + } + } else { + // Opcodes 0xE4 (one annotation SID) and 0xE5 (two annotation SIDs) + int annotationSid = (int) slowReadFlexUInt_1_1(); + if (annotationSid < 0) { + return true; + } + if (valueTid.lowerNibble == TWO_ANNOTATION_SIDS_LOWER_NIBBLE_1_1) { + // Opcode 0xE5 (two annotation SIDs) + annotationSid = (int) slowReadFlexUInt_1_1(); + return annotationSid < 0; + } } + return false; } /** - * Validates and skips a NOP pad. After return, `peekIndex` will point to the first byte after the NOP pad. - * @param valueLength the length of the NOP pad. - * @param isAnnotated true if the NOP pad occurs within an annotation wrapper (which is illegal); otherwise, false. - * @return true if not enough data was available to seek past the NOP pad; otherwise, false. - */ + * Reads the header of an Ion 1.1 annotation wrapper, ensuring enough data is available in the buffer. Sets + * `valueMarker` with the start and end indices of the wrapped value. Sets `annotationSequenceMarker` with the start + * and end indices of the sequence of annotation SIDs, if applicable, or fills `annotationTokenMarkers` if the + * annotation wrapper contains FlexSyms. After successful return, `peekIndex` will point at the type ID byte of the + * wrapped value. + * @param valueTid the type ID of the annotation wrapper. + * @return true if there are not enough bytes in the stream to complete the value; otherwise, false. + */ + private boolean slowReadAnnotationWrapperHeader_1_1(IonTypeID valueTid) { + if (!refillableState.skipAnnotations) { + annotationTokenMarkers.clear(); + } + if (valueTid.variableLength) { + // Opcodes 0xE6 (variable-length annotation SIDs) and 0xE9 (variable-length annotation FlexSyms) + // At this point the value must be at least 3 more bytes: one for the smallest-possible annotations + // length, one for the smallest-possible annotation, and 1 for the smallest-possible value + // representation. + if (!fillAt(peekIndex, 3)) { + return true; + } + int annotationsLength = (int) slowReadFlexUInt_1_1(); + if (annotationsLength < 0) { + return true; + } + if (!fillAt(peekIndex, annotationsLength)) { + return true; + } + long annotationsEnd = peekIndex + annotationsLength; + if (!refillableState.skipAnnotations) { + annotationSequenceMarker.typeId = valueTid; + annotationSequenceMarker.startIndex = peekIndex; + annotationSequenceMarker.endIndex = annotationsEnd; + } + peekIndex = annotationsEnd; + } else { + // At this point the value must have at least one more byte for each annotation FlexSym (one for lower + // nibble 7, two for lower nibble 8), plus one for the smallest-possible value representation. + if (!fillAt(peekIndex, (valueTid.lowerNibble == ONE_ANNOTATION_FLEX_SYM_LOWER_NIBBLE_1_1 || valueTid.lowerNibble == ONE_ANNOTATION_SID_LOWER_NIBBLE_1_1) ? 2 : 3)) { + return true; + } + if (refillableState.skipAnnotations) { + return slowSkipNonPrefixedAnnotations_1_1(valueTid); + } + if (valueTid.isInlineable) { + // Opcodes 0xE7 (one annotation FlexSym) and 0xE8 (two annotation FlexSyms) + Marker provisionalMarker = annotationTokenMarkers.provisionalElement(); + if (slowReadFlexSym_1_1(provisionalMarker)) { + return true; + } + if (valueTid.lowerNibble == TWO_ANNOTATION_FLEX_SYMS_LOWER_NIBBLE_1_1) { + // Opcode 0xE8 (two annotation FlexSyms) + provisionalMarker = annotationTokenMarkers.provisionalElement(); + if (slowReadFlexSym_1_1(provisionalMarker)) { + return true; + } + annotationTokenMarkers.commit(); + } + annotationTokenMarkers.commit(); + } else { + // Opcodes 0xE4 (one annotation SID) and 0xE5 (two annotation SIDs) + int annotationSid = (int) slowReadFlexUInt_1_1(); + if (annotationSid < 0) { + return true; + } + annotationTokenMarkers.provisionalElement().endIndex = annotationSid; + if (valueTid.lowerNibble == TWO_ANNOTATION_SIDS_LOWER_NIBBLE_1_1) { + // Opcode 0xE5 (two annotation SIDs) + annotationSid = (int) slowReadFlexUInt_1_1(); + if (annotationSid < 0) { + return true; + } + annotationTokenMarkers.provisionalElement().endIndex = annotationSid; + annotationTokenMarkers.commit(); + } + annotationTokenMarkers.commit(); + } + + } + valueMarker.typeId = valueTid; + return false; + } + + /** + * Calculates the end index for the given type ID and sets `event` based on the type of value encountered, if any. + * At the time of invocation, `peekIndex` must point to the first byte after the value's type ID byte. After return, + * `peekIndex` will point to the first byte in the value's representation, or, in the case of a NOP pad, the first + * byte that follows the pad. + * @param valueTid the type ID of the value. + * @param isAnnotated true if the value is annotated. + * @return the end index of the value or NOP pad. + */ + private long calculateEndIndex_1_1(IonTypeID valueTid, boolean isAnnotated) { + if (valueTid.isDelimited) { + event = Event.START_CONTAINER; + return DELIMITED_MARKER; + } + long length = valueTid.length; + if (valueTid.variableLength) { + length = uncheckedReadFlexUInt_1_1(); + } else if (length < 0) { + // The value is a FlexInt or FlexUInt, so read the continuation bits to determine the length. + length = uncheckedReadLengthOfFlexUInt_1_1(peekIndex); + } + long endIndex = peekIndex + length; + if (valueTid.type != null && valueTid.type.ordinal() >= LIST_TYPE_ORDINAL) { + event = Event.START_CONTAINER; + } else if (valueTid.isNopPad) { + uncheckedSeekPastNopPad(endIndex, isAnnotated); + } else { + event = Event.START_SCALAR; + } + return endIndex; + } + + /** + * Reads the field name at `peekIndex`. After this method returns `peekIndex` points to the first byte of the + * value that follows the field name. If the field name contained a symbol ID, `fieldSid` is set to that symbol ID. + * If it contained inline text, `fieldSid` is set to -1, and the start and end indexes of the inline text are + * described by `fieldTextMarker`. + */ + private void uncheckedReadFieldName_1_1() { + if (parent.typeId.isInlineable) { + fieldSid = (int) uncheckedReadFlexSym_1_1(fieldTextMarker); + } else { + // 0 in field name position of a SID struct indicates that all field names that follow are represented as + // using FlexSyms. + if (buffer[(int) peekIndex] == FlexInt.ZERO) { + peekIndex++; + parent.typeId = IonTypeID.STRUCT_WITH_FLEX_SYMS_ID; + fieldSid = (int) uncheckedReadFlexSym_1_1(fieldTextMarker); + } else { + fieldSid = (int) uncheckedReadFlexUInt_1_1(); + fieldTextMarker.startIndex = -1; + fieldTextMarker.endIndex = fieldSid; + } + } + } + + /** + * Reads a 3+ byte FlexInt into a long. After this method returns, `peekIndex` points to the first byte after the + * end of the FlexUInt. + * @param firstByte the first byte of the FlexInt. + * @return the value. + */ + private long uncheckedReadLargeFlexInt_1_1(int firstByte) { + firstByte &= SINGLE_BYTE_MASK; + // FlexInts are essentially just FlexUInts that interpret the most significant bit as a sign that needs to be + // extended. + long result = uncheckedReadLargeFlexUInt_1_1(firstByte); + if (buffer[(int) peekIndex - 1] < 0) { + // Sign extension. + result |= ~(-1 >>> Long.numberOfLeadingZeros(result)); + } + return result; + } + + /** + * Reads a FlexInt into a long. After this method returns, `peekIndex` points to the first byte after the + * end of the FlexUInt. + * @return the value. + */ + private long uncheckedReadFlexInt_1_1() { + // The following up-cast to int performs sign extension, if applicable. + int currentByte = buffer[(int)(peekIndex++)]; + if ((currentByte & 1) == 1) { + // Single byte; shift out the continuation bit while preserving the sign. + return currentByte >> 1; + } + if ((currentByte & 2) != 0) { + // Two bytes; up-cast the second byte to int, thereby performing sign extension. Make room for the six + // value bits in the first byte. Or with those six value bits after shifting out the two continuation bits. + return buffer[(int) peekIndex++] << 6 | ((currentByte & SINGLE_BYTE_MASK) >>> 2); + } + return uncheckedReadLargeFlexInt_1_1(currentByte); + } + + /** + * Reads a FlexSym. After this method returns, `peekIndex` points to the first byte after the end of the FlexSym. + * When the FlexSym contains inline text, the given Marker's start and end indices are populated with the start and + * end of the UTF-8 byte sequence, and this method returns -1. When the FlexSym contains a symbol ID, the given + * Marker's endIndex is set to the symbol ID value and its startIndex is set to -1. When this FlexSym wraps a + * delimited end marker, neither the Marker's startIndex nor its endIndex is set. + * @param markerToSet the marker to populate. + * @return the user-space symbol ID value if one was present, otherwise -1. + */ + private long uncheckedReadFlexSym_1_1(Marker markerToSet) { + long result = uncheckedReadFlexInt_1_1(); + if (result == 0) { + int nextByte = buffer[(int)(peekIndex++)]; + if (isFlexSymSystemSymbolOrSid0(nextByte & SINGLE_BYTE_MASK)) { + setSystemSymbolMarker(markerToSet, (byte)(nextByte - FLEX_SYM_SYSTEM_SYMBOL_OFFSET)); + return -1; + } else if (nextByte != OpCodes.DELIMITED_END_MARKER) { + throw new IonException("FlexSym 0 may only precede symbol zero, system symbol, or delimited end."); + } + markerToSet.typeId = IonTypeID.DELIMITED_END_ID; + return -1; + } else if (result < 0) { + markerToSet.startIndex = peekIndex; + markerToSet.endIndex = peekIndex - result; + peekIndex = markerToSet.endIndex; + return -1; + } else { + markerToSet.startIndex = -1; + markerToSet.endIndex = result; + } + return result; + } + + /* + * Determines whether a byte (specifically, the byte following a FlexSym escape byte) represents a system symbol. + * + * @param byteAfterEscapeCode The unsigned value of the byte after the FlexSym escape byte + * @return true if the byte is in the reserved range for system symbols or $0. + */ + private static boolean isFlexSymSystemSymbolOrSid0(int byteAfterEscapeCode) { + return byteAfterEscapeCode >= FLEX_SYM_SYSTEM_SYMBOL_OFFSET && byteAfterEscapeCode <= FLEX_SYM_MAX_SYSTEM_SYMBOL; + } + + /** + * Reads a FlexInt into a long, ensuring enough space is available in the buffer. After this method returns false, + * `peekIndex` points to the first byte after the end of the FlexInt and `markerToSet.endIndex` contains the + * FlexInt value. + * @param firstByte the first (least-significant) byte of the FlexInt. + * @param markerToSet the marker to populate. + * @return true if there are not enough bytes to complete the FlexSym; otherwise, false. + */ + private boolean slowReadLargeFlexInt_1_1(int firstByte, Marker markerToSet) { + firstByte &= SINGLE_BYTE_MASK; + // FlexInts are essentially just FlexUInts that interpret the most significant bit as a sign that needs to be + // extended. + long result = slowReadLargeFlexUInt_1_1(firstByte); + if (result < 0) { + return true; + } + if (buffer[(int) peekIndex - 1] < 0) { + // Sign extension. + result |= ~(-1 >>> Long.numberOfLeadingZeros(result)); + } + markerToSet.endIndex = result; + return false; + } + + /** + * Reads a FlexInt into a long, ensuring enough space is available in the buffer. After this method returns false, + * `peekIndex` points to the first byte after the end of the FlexInt and `markerToSet.endIndex` contains the + * FlexInt value. + * @param markerToSet the marker to populate. + * @return true if there are not enough bytes to complete the FlexSym; otherwise, false. + */ + private boolean slowReadFlexInt_1_1(Marker markerToSet) { + int currentByte = slowReadByte(); + if (currentByte < 0) { + return true; + } + if ((currentByte & 1) == 1) { + // Single byte; shift out the continuation bit while preserving the sign. The downcast to byte and implicit + // upcast back to int results in sign extension. + markerToSet.endIndex = ((byte) currentByte) >> 1; + return false; + } + return slowReadLargeFlexInt_1_1(currentByte, markerToSet); + } + + /** + * Reads a FlexSym, ensuring enough space is available in the buffer. After this method returns, `peekIndex` + * points to the first byte after the end of the FlexSym. When the FlexSym contains inline text, the given Marker's + * start and end indices are populated with the start and end of the UTF-8 byte sequence, and this method returns + * -1. When the FlexSym contains a symbol ID, the given Marker's endIndex is set to the symbol ID value and its + * startIndex is set to -1. When this FlexSym wraps a delimited end marker, neither the Marker's startIndex nor its + * endIndex is set. + * @param markerToSet the marker to populate. + * @return true if there are not enough bytes to complete the FlexSym; otherwise, false. + */ + private boolean slowReadFlexSym_1_1(Marker markerToSet) { + if (slowReadFlexInt_1_1(markerToSet)) { + return true; + } + long result = markerToSet.endIndex; + markerToSet.endIndex = -1; + if (result == 0) { + int nextByte = slowReadByte(); + if (nextByte < 0) { + return true; + } + if (isFlexSymSystemSymbolOrSid0(nextByte)) { + setSystemSymbolMarker(markerToSet, nextByte - FLEX_SYM_SYSTEM_SYMBOL_OFFSET); + return false; + } else if ((byte) nextByte != OpCodes.DELIMITED_END_MARKER) { + throw new IonException("FlexSyms may only wrap symbol zero, empty string, or delimited end."); + } + markerToSet.typeId = DELIMITED_END_ID; + } else if (result < 0) { + markerToSet.startIndex = peekIndex; + markerToSet.endIndex = peekIndex - result; + peekIndex = markerToSet.endIndex; + } else { + markerToSet.startIndex = -1; + markerToSet.endIndex = result; + } + return false; + } + + /** + * FlexSym encoding types. + */ + private enum FlexSymType { + INCOMPLETE { + @Override + IonTypeID typeIdFor(int length) { + throw new IllegalStateException("The FlexSym is incomplete."); + } + }, + INLINE_TEXT { + @Override + IonTypeID typeIdFor(int length) { + if (length <= 0xF) { + return TYPE_IDS_1_1[0xA0 | length]; + } + return TYPE_IDS_1_1[OpCodes.VARIABLE_LENGTH_INLINE_SYMBOL & SINGLE_BYTE_MASK]; + } + }, + SYMBOL_ID { + @Override + IonTypeID typeIdFor(int length) { + if (length == 0) { + return TYPE_IDS_1_1[OpCodes.SYMBOL_ADDRESS_1_BYTE & SINGLE_BYTE_MASK]; + } + if (length < 3) { + return TYPE_IDS_1_1[0xE0 | length]; + } + return TYPE_IDS_1_1[OpCodes.SYMBOL_ADDRESS_MANY_BYTES & SINGLE_BYTE_MASK]; + } + }, + SYSTEM_SYMBOL_ID { + @Override + IonTypeID typeIdFor(int length) { + return SYSTEM_SYMBOL_VALUE; + } + }, + STRUCT_END { + @Override + IonTypeID typeIdFor(int length) { + throw new IllegalStateException("The special struct end FlexSym is not associated with a type ID."); + } + }; + + /** + * Classifies a special FlexSym (beginning with FlexInt zero) based on the byte that follows. + * @param specialByte the byte that followed FlexInt zero. + * @return the FlexSymType that corresponds to the given special byte. + */ + static FlexSymType classifySpecialFlexSym(int specialByte) { + if (specialByte < 0) { + return FlexSymType.INCOMPLETE; + } + if (isFlexSymSystemSymbolOrSid0(specialByte)) { + return FlexSymType.SYSTEM_SYMBOL_ID; + } + if ((byte) specialByte == OpCodes.DELIMITED_END_MARKER) { + return FlexSymType.STRUCT_END; + } + throw new IonException("FlexSyms may only wrap symbol zero, empty string, or delimited end."); + } + + /** + * Gets the most appropriate IonTypeID for a FlexSym of this type and the given length. + * @param length the length of the FlexSym. + * @return an Ion 1.1 IonTypeID with appropriate values for 'length' and 'isInlineable'. + */ + abstract IonTypeID typeIdFor(int length); + } + + /** + * Skips a FlexSym. After this method returns, `peekIndex` points to the first byte after the end of the FlexSym. + * @param markerToSet the method returns `INLINE_TEXT, will have `startIndex` and `endIndex` set to the bounds of + * the inline UTF-8 byte sequence. + * @return the type of FlexSym that was skipped. + */ + private FlexSymType uncheckedSkipFlexSym_1_1(Marker markerToSet) { + long result = uncheckedReadFlexInt_1_1(); + if (result == 0) { + markerToSet.startIndex = peekIndex + 1; + markerToSet.endIndex = markerToSet.startIndex; + int specialByte = buffer[(int) peekIndex++] & SINGLE_BYTE_MASK; + FlexSymType type = FlexSymType.classifySpecialFlexSym(specialByte); + if (type == FlexSymType.SYSTEM_SYMBOL_ID) { + setSystemSymbolMarker(markerToSet, (byte)(specialByte - FLEX_SYM_SYSTEM_SYMBOL_OFFSET)); + } + return type; + } else if (result < 0) { + markerToSet.startIndex = peekIndex; + markerToSet.endIndex = peekIndex - result; + peekIndex = markerToSet.endIndex; + return FlexSymType.INLINE_TEXT; + } + return FlexSymType.SYMBOL_ID; + } + + /** + * Skips a FlexSym, ensuring enough space is available in the buffer. After this method returns, `peekIndex` points + * to the first byte after the end of the FlexSym. + * @param markerToSet if non-null and the method returns `INLINE_TEXT`, will have `startIndex` and `endIndex` set + * to the bounds of the inline UTF-8 byte sequence. + * @return INCOMPLETE if there are not enough bytes in the stream to complete the FlexSym; otherwise, the type + * of FlexSym that was skipped. + */ + private FlexSymType slowSkipFlexSym_1_1(Marker markerToSet) { + long result = slowReadFlexUInt_1_1(); + if (result < 0) { + return FlexSymType.INCOMPLETE; + } + if (buffer[(int) peekIndex - 1] < 0) { + // Sign extension. + result |= ~(-1 >>> Long.numberOfLeadingZeros(result)); + } + if (result == 0) { + int specialByte = slowReadByte(); + FlexSymType flexSymType = FlexSymType.classifySpecialFlexSym(specialByte); + if (markerToSet != null && flexSymType != FlexSymType.INCOMPLETE) { + markerToSet.startIndex = peekIndex; + markerToSet.endIndex = peekIndex; + } + if (markerToSet != null && flexSymType == FlexSymType.SYSTEM_SYMBOL_ID) { + // FIXME: See if we can set the SID in the endIndex here without causing the slow reader to get confused + // about where the end of the value is for tagless symbols. + // I.e. use setSystemSymbolMarker(markerToSet, (byte)(specialByte - FLEX_SYM_SYSTEM_SYMBOL_OFFSET)); + markerToSet.typeId = SYSTEM_SYMBOL_VALUE; + markerToSet.startIndex = peekIndex - 1; + } + return flexSymType; + } else if (result < 0) { + if (markerToSet != null) { + markerToSet.startIndex = peekIndex; + markerToSet.endIndex = peekIndex - result; + } + peekIndex -= result; + return FlexSymType.INLINE_TEXT; + } + return FlexSymType.SYMBOL_ID; + } + + /** + * Reads the field name FlexSym at `peekIndex`, ensuring enough bytes are available in the buffer. After this method + * returns `peekIndex` points to the first byte of the value that follows the field name. If the field name + * contained a symbol ID, `fieldSid` is set to that symbol ID. If it contained inline text, `fieldSid` is set to -1, + * and the start and end indexes of the inline text are described by `fieldTextMarker`. + * @return true if there are not enough bytes in the stream to complete the field name; otherwise, false. + */ + private boolean slowReadFieldNameFlexSym_1_1() { + if (slowReadFlexSym_1_1(fieldTextMarker)) { + return true; + } + if (fieldTextMarker.startIndex < 0) { + fieldSid = (int) fieldTextMarker.endIndex; + } + return false; + } + + /** + * Reads the field name FlexSym or FlexUInt at `peekIndex`, ensuring enough bytes are available in the buffer. After + * this method returns `peekIndex` points to the first byte of the value that follows the field name. If the field + * name contained a symbol ID, `fieldSid` is set to that symbol ID. If it contained inline text, `fieldSid` is set + * to -1, and the start and end indexes of the inline text are described by `fieldTextMarker`. + * @return true if there are not enough bytes in the stream to complete the field name; otherwise, false. + */ + private boolean slowReadFieldName_1_1() { + // The value must have at least 2 more bytes: 1 for the smallest-possible field SID and 1 for + // the smallest-possible representation. + if (!fillAt(peekIndex, 2)) { + return true; + } + if (parent.typeId.isInlineable) { + return slowReadFieldNameFlexSym_1_1(); + } else { + // 0 in field name position of a SID struct indicates that all field names that follow are represented as + // using FlexSyms. + if (buffer[(int) peekIndex] == FlexInt.ZERO) { + peekIndex++; + setCheckpoint(CheckpointLocation.BEFORE_UNANNOTATED_TYPE_ID); + parent.typeId = IonTypeID.STRUCT_WITH_FLEX_SYMS_ID; + return slowReadFieldNameFlexSym_1_1(); + } else { + fieldSid = (int) slowReadFlexUInt_1_1(); + fieldTextMarker.startIndex = -1; + fieldTextMarker.endIndex = fieldSid; + return fieldSid < 0; + } + } + } + + /** + * Determines whether the current delimited container has reached its end. + * @return true if the container is at its end; otherwise, false. + */ + private boolean uncheckedIsDelimitedEnd_1_1() { + if (parent.typeId.type == IonType.STRUCT) { + uncheckedReadFieldName_1_1(); + if (fieldSid < 0 && fieldTextMarker.typeId != null && fieldTextMarker.typeId.lowerNibble == OpCodes.DELIMITED_END_MARKER) { + parent.endIndex = peekIndex; + event = Event.END_CONTAINER; + return true; + } + } else if (buffer[(int) peekIndex] == OpCodes.DELIMITED_END_MARKER) { + peekIndex++; + parent.endIndex = peekIndex; + event = Event.END_CONTAINER; + return true; + } + return false; + } + + /** + * Un-reads one byte. It is up to the caller to ensure the provided byte is actually the last byte read. + * @param b the byte to un-read. + */ + private void unreadByte(int b) { + if (refillableState.isSkippingCurrentValue) { + refillableState.lastUnbufferedByte = b; + } else { + peekIndex--; + } + } + + /** + * Determines whether the cursor is at the end of a delimited struct. + * @param currentByte the byte on which the cursor is currently positioned. + * @return true if the struct is at its end or if not enough data is available; otherwise, false. + */ + private boolean slowIsDelimitedStructEnd_1_1(int currentByte) { + if (currentByte == FlexInt.ZERO) { + // This is a special FlexSym in field position. Determine whether the next byte is DELIMITED_END_MARKER. + currentByte = slowReadByte(); + if (currentByte < 0) { + return true; + } + if (currentByte == (OpCodes.DELIMITED_END_MARKER & SINGLE_BYTE_MASK)) { + event = Event.END_CONTAINER; + valueTid = null; + fieldSid = -1; + return true; + } + // Note: slowReadByte() increments the peekIndex, but if the delimiter end is not found, the byte + // needs to remain available. + unreadByte(currentByte); + } + return false; + } + + /** + * Determines whether the current delimited container has reached its end, ensuring enough bytes are available + * in the stream. + * @return true if the container is at its end or if not enough data is available; otherwise, false. + */ + private boolean slowIsDelimitedEnd_1_1() { + int b = slowReadByte(); + if (b < 0) { + return true; + } + if (parent.typeId.type == IonType.STRUCT && slowIsDelimitedStructEnd_1_1(b)) { + parent.endIndex = peekIndex; + return true; + } else if (b == (OpCodes.DELIMITED_END_MARKER & SINGLE_BYTE_MASK)) { + parent.endIndex = peekIndex; + event = Event.END_CONTAINER; + valueTid = null; + fieldSid = -1; + return true; + } + // Note: slowReadByte() increments the peekIndex, but if the delimiter end is not found, the byte + // needs to remain available. + unreadByte(b); + return false; + } + + /** + * Skips past the remaining elements of the current delimited container. + * @return true if the end of the stream was reached before skipping past all remaining elements; otherwise, false. + */ + boolean uncheckedSkipRemainingDelimitedContainerElements_1_1() { + // TODO this needs to be updated to handle the case where the container contains non-prefixed macro invocations, + // as the length of these invocations is unknown to the cursor. Input from the macro evaluator is needed. + while (event != Event.END_CONTAINER) { + event = Event.NEEDS_DATA; + while (uncheckedNextToken()); + if (event == Event.NEEDS_DATA) { + return true; + } + } + return false; + } + + /** + * Skips past the remaining elements of the current delimited container, ensuring enough bytes are available in + * the stream. + * @return true if the end of the stream was reached before skipping past all remaining elements; otherwise, false. + */ + private boolean slowSkipRemainingDelimitedContainerElements_1_1() { + // TODO this needs to be updated ot handle the case where the container contains non-prefixed macro invocations, + // as the length of these invocations is unknown to the cursor. Input from the macro evaluator is needed. + while (event != Event.END_CONTAINER) { + slowNextToken(); + if (event == Event.START_CONTAINER && valueMarker.endIndex == DELIMITED_MARKER) { + seekPastDelimitedContainer_1_1(); + } + if (event == Event.NEEDS_DATA) { + return true; + } + } + return false; + } + + /** + * Seek past a delimited container that was never stepped into. + */ + private void seekPastDelimitedContainer_1_1() { + stepIntoContainer(); + stepOutOfContainer(); + } + + /** + * Locates the end of the delimited container on which the reader is currently positioned. + * @return true if the end of the container was found; otherwise, false. + */ + private boolean slowFindDelimitedEnd_1_1() { + // Pin the current buffer offset so that all bytes encountered while finding the end of the delimited container + // are buffered. If the pin is already set, do not overwrite; this indicates a retry after previously + // running out of data. + if (refillableState.pinOffset < 0) { + refillableState.pinOffset = offset; + } + if (parent == null) { + // At depth zero, there can not be any more upward recursive calls to which the shift needs to be + // conveyed. + refillableState.pendingShift = 0; + } + // Save the cursor's current state so that it can return to this position after finding the delimited end. + long savedPeekIndex = peekIndex; + long savedStartIndex = valueMarker.startIndex; + long savedEndIndex = valueMarker.endIndex; + int savedFieldSid = fieldSid; + IonTypeID savedFieldTid = fieldTextMarker.typeId; + long savedFieldTextStartIndex = fieldTextMarker.startIndex; + long savedFieldTextEndIndex = fieldTextMarker.endIndex; + IonTypeID savedValueTid = valueMarker.typeId; + IonTypeID savedAnnotationTid = annotationSequenceMarker.typeId; + long savedAnnotationStartIndex = annotationSequenceMarker.startIndex; + long savedAnnotationsEndIndex = annotationSequenceMarker.endIndex; + CheckpointLocation savedCheckpointLocation = checkpointLocation; + long savedCheckpoint = checkpoint; + int savedContainerIndex = containerIndex; + Marker savedParent = parent; + boolean savedHasAnnotations = hasAnnotations; + // The cursor remains logically positioned at the current value despite probing forward for the end of the + // delimited value. Accordingly, do not overwrite the existing annotations with any annotations found during + // the probe. + refillableState.skipAnnotations = true; + // ------------ + + // TODO performance: the following line causes the end indexes of any child delimited containers that are not + // contained within a length-prefixed container to be calculated. Currently these are thrown away, but storing + // them in case those containers are later accessed could make them faster to skip. This would require some + // additional complexity. + seekPastDelimitedContainer_1_1(); + + refillableState.skipAnnotations = false; + boolean isReady = event != Event.NEEDS_DATA; + if (refillableState.isSkippingCurrentValue) { + // This delimited container is oversized. The cursor must seek past it. + refillableState.state = State.SEEK_DELIMITED; + refillableState.targetSeekDepth = savedContainerIndex; + refillableState.pendingShift = 0; + return isReady; + } + + // Restore the state of the cursor at the start of the delimited container. + long pendingShift = refillableState.pendingShift; + valueMarker.startIndex = savedStartIndex - pendingShift; + valueMarker.endIndex = (savedEndIndex == DELIMITED_MARKER) ? DELIMITED_MARKER : (savedEndIndex - pendingShift); + fieldSid = savedFieldSid; + valueMarker.typeId = savedValueTid; + valueTid = savedValueTid; + annotationSequenceMarker.typeId = savedAnnotationTid; + annotationSequenceMarker.startIndex = savedAnnotationStartIndex - pendingShift; + annotationSequenceMarker.endIndex = savedAnnotationsEndIndex - pendingShift; + fieldTextMarker.typeId = savedFieldTid; + fieldTextMarker.startIndex = savedFieldTextStartIndex - pendingShift; + fieldTextMarker.endIndex = savedFieldTextEndIndex - pendingShift; + checkpointLocation = savedCheckpointLocation; + checkpoint = savedCheckpoint - pendingShift; + containerIndex = savedContainerIndex; + hasAnnotations = savedHasAnnotations; + + savedPeekIndex -= pendingShift; + parent = savedParent; + if (isReady) { + // Record the endIndex so that it does not need to be calculated repetitively. + valueMarker.endIndex = peekIndex; + event = Event.START_CONTAINER; + refillableState.state = State.READY; + refillableState.pinOffset = -1; + } else { + // The fill is not complete, but there is currently no more data. The cursor will have to resume the fill + // before processing the next request. + refillableState.state = State.FILL_DELIMITED; + } + + peekIndex = savedPeekIndex; + return isReady; + } + + /** + * Seeks to the end of the delimited container at `refillableState.targetSeekDepth`. + * @return true if the end of the container was reached; otherwise, false. + */ + private boolean slowSeekToDelimitedEnd_1_1() { + refillableState.state = State.READY; + refillableState.isSkippingCurrentValue = true; + while (containerIndex > refillableState.targetSeekDepth) { + stepOutOfContainer(); + if (event == Event.NEEDS_DATA) { + refillableState.state = State.SEEK_DELIMITED; + refillableState.isSkippingCurrentValue = false; + return false; + } + } + // The end of the container has been reached. Report the number of bytes skipped and exit seek mode. + if (dataHandler != null) { + reportSkippedData(); + } + refillableState.totalDiscardedBytes += refillableState.individualBytesSkippedWithoutBuffering; + refillableState.individualBytesSkippedWithoutBuffering = 0; + refillableState.isSkippingCurrentValue = false; + event = Event.NEEDS_INSTRUCTION; + return true; + } + + /* ---- End: version-dependent parsing methods ---- */ + + /* ---- Begin: version-agnostic parsing, utility, and public API methods ---- */ + + /** + * Attempts to make the cursor READY by finishing the operation that was in progress last time the end of the stream + * was reached. This should not be called when the cursor state is already READY. + * @return true if the cursor is ready; otherwise, false. + */ + private boolean slowMakeBufferReady() { + boolean isReady; + switch (refillableState.state) { + case SEEK: + isReady = !slowSeek(refillableState.bytesRequested); + break; + case FILL: + isReady = fillAt(offset, refillableState.bytesRequested); + break; + case FILL_DELIMITED: + refillableState.state = State.READY; + isReady = slowFindDelimitedEnd_1_1(); + break; + case SEEK_DELIMITED: + isReady = slowSeekToDelimitedEnd_1_1(); + break; + case TERMINATED: + isReady = false; + break; + default: + throw new IllegalStateException(); + } + if (!isReady) { + event = Event.NEEDS_DATA; + } + return isReady; + } + + /** + * Sets `checkpoint` to the current `peekIndex`, which is at the given type of location. + * @param location the type of checkpoint location. Must not be BEFORE_UNANNOTATED_TYPE_ID. + */ + private void setCheckpoint(CheckpointLocation location) { + checkpointLocation = location; + checkpoint = peekIndex; + } + + /** + * Sets `checkpoint` to the current `peekIndex`, which must be before an unannotated type ID, and seeks the + * buffer to that point. + */ + private void setCheckpointBeforeUnannotatedTypeId() { + reset(); + offset = peekIndex; + checkpointLocation = CheckpointLocation.BEFORE_UNANNOTATED_TYPE_ID; + checkpoint = peekIndex; + } + + /** + * Validates and sets the given marker, which must fit within its parent container (if applicable). The resulting + * `startIndex` will be set to `peekIndex` (the next byte to be consumed from the cursor's buffer), and its + * `endIndex` will be set to the given value. + * @param endIndex the value's end index. + * @param markerToSet the marker to set. + */ + private void setMarker(long endIndex, Marker markerToSet) { + if (parent != null && endIndex > parent.endIndex && parent.endIndex > DELIMITED_MARKER) { + throw new IonException(String.format("Value [%d:%d] exceeds the length of its parent container [%d:%d].", peekIndex, endIndex, parent.startIndex, parent.endIndex)); + } + markerToSet.startIndex = peekIndex; + markerToSet.endIndex = endIndex; + } + + /** + * Determines whether the cursor has reached the end of the current container. If true, `event` will be set to + * END_CONTAINER and information about the current value will be reset. + * @return true if the end of the current container has been reached; otherwise, false. + */ + private boolean checkContainerEnd() { + if (peekIndex < valueMarker.endIndex || parent.endIndex > peekIndex) { + return false; + } + if (parent.endIndex == DELIMITED_MARKER) { + return isSlowMode ? slowIsDelimitedEnd_1_1() : uncheckedIsDelimitedEnd_1_1(); + } + if (parent.endIndex == peekIndex) { + event = Event.END_CONTAINER; + valueTid = null; + fieldSid = -1; + return true; + } + throw new IonException("Contained values overflowed the parent container length."); + } + + /** + * Resets state specific to the current value. + */ + private void reset() { + valueTid = null; + valueMarker.typeId = null; + valueMarker.startIndex = -1; + valueMarker.endIndex = -1; + fieldSid = -1; + fieldTextMarker.typeId = null; + fieldTextMarker.startIndex = -1; + fieldTextMarker.endIndex = -1; + hasAnnotations = false; + annotationSequenceMarker.typeId = null; + annotationSequenceMarker.startIndex = -1; + annotationSequenceMarker.endIndex = -1; + macroInvocationId = -1; + isSystemInvocation = false; + taglessType = null; + } + + /** + * Reads the final three bytes of an IVM. `peekIndex` must point to the first byte after the opening `0xE0` byte. + * After return, `majorVersion`, `minorVersion`, and `typeIds` will be updated accordingly, and `peekIndex` will + * point to the first byte after the IVM. + */ + private void readIvm() { + if (limit < peekIndex + IVM_REMAINING_LENGTH) { + throw new IonException("Incomplete Ion version marker."); + } + majorVersion = buffer[(int) (peekIndex++)]; + minorVersion = buffer[(int) (peekIndex++)]; + if ((buffer[(int) (peekIndex++)] & SINGLE_BYTE_MASK) != IVM_FINAL_BYTE) { + throw new IonException("Invalid Ion version marker."); + } + if (majorVersion != 1) { + throw new IonException(String.format("Unsupported Ion version: %d.%d", majorVersion, minorVersion)); + } + if (minorVersion == 0) { + typeIds = IonTypeID.TYPE_IDS_1_0; + byteBuffer.order(ByteOrder.BIG_ENDIAN); + } else if (minorVersion == 1) { + typeIds = IonTypeID.TYPE_IDS_1_1; + byteBuffer.order(ByteOrder.LITTLE_ENDIAN); + } else { + throw new IonException(String.format("Unsupported Ion version: %d.%d", majorVersion, minorVersion)); + } + ivmConsumer.ivmEncountered(majorVersion, minorVersion); + } + + /** + * Validates and skips a NOP pad. After return, `peekIndex` will point to the first byte after the NOP pad. + * @param endIndex the endIndex of the NOP pad. + * @param isAnnotated true if the NOP pad occurs within an annotation wrapper (which is illegal); otherwise, false. + */ + private void uncheckedSeekPastNopPad(long endIndex, boolean isAnnotated) { + if (isAnnotated) { + throw new IonException( + "Invalid annotation wrapper: NOP pad may not occur inside an annotation wrapper." + ); + } + if (endIndex > limit) { + throw new IonException("Invalid NOP pad."); + } + peekIndex = endIndex; + if (parent != null) { + checkContainerEnd(); + } + } + + /** + * Validates and skips a NOP pad. After return, `peekIndex` will point to the first byte after the NOP pad. + * @param valueLength the length of the NOP pad. + * @param isAnnotated true if the NOP pad occurs within an annotation wrapper (which is illegal); otherwise, false. + * @return true if not enough data was available to seek past the NOP pad; otherwise, false. + */ private boolean slowSeekPastNopPad(long valueLength, boolean isAnnotated) { if (isAnnotated) { throw new IonException( @@ -1200,6 +2248,92 @@ private void validateAnnotationWrapperEndIndex(long endIndex) { } } + /* + * The given Marker's endIndex is set to the system symbol ID value and its startIndex is set to -1 + * @param markerToSet the marker to set. + */ + private void setSystemSymbolMarker(Marker markerToSet, int systemSid) { + event = Event.START_SCALAR; + markerToSet.typeId = SYSTEM_SYMBOL_VALUE; + markerToSet.startIndex = -1; + markerToSet.endIndex = systemSid; + } + + /** + * Sets the given marker to represent the current system macro invocation. + * Before calling this method, `macroInvocationId` must be set from the one-byte FixedUInt that represents the ID. + * @param markerToSet the marker to set. + */ + private void setSystemMacroInvocationMarker(Marker markerToSet) { + isSystemInvocation = true; + event = Event.NEEDS_INSTRUCTION; + markerToSet.typeId = SYSTEM_MACRO_INVOCATION_ID; + markerToSet.startIndex = peekIndex; + markerToSet.endIndex = -1; + } + + /** + * Sets the given marker to represent the current user macro invocation. + * @param valueTid the type ID of the macro invocation. + * @param markerToSet the Marker to set with information parsed from the macro invocation. After return, the + * marker's type ID will be set, startIndex will point to the first byte of the invocation's + * body, and endIndex will either be -1 (when not a system symbol or prefixed invocation), or + * will be set to the end of the invocation. + * @param length the declared length of the invocation. Ignored unless this is a length-prefixed invocation + * (denoted by `valueTid.variableLength == true`). + */ + private void setUserMacroInvocationMarker(IonTypeID valueTid, Marker markerToSet, long length) { + // It's not yet known whether the invocation represents a scalar or container, or even if it is complete. + // A higher-level reader must provide additional instructions to evaluate the invocation. + event = Event.NEEDS_INSTRUCTION; + markerToSet.typeId = valueTid; + markerToSet.startIndex = peekIndex; + // Unless this is a length-prefixed invocation, the end index of the macro invocation cannot be known until + // evaluation. + markerToSet.endIndex = valueTid.variableLength ? peekIndex + length : -1; + } + + /** + * Reads a macro invocation header, ensuring enough bytes are buffered. `peekIndex` must be positioned on the + * first byte that follows the opcode. After return, `peekIndex` will be positioned after any macro address + * byte(s), and `macroInvocationId` will be set to the address of the macro being invoked. + * @param valueTid the type ID of the macro invocation. + * @param markerToSet the Marker to set with information parsed from the macro invocation. After return, the + * marker's type ID will be set, startIndex will point to the first byte of the invocation's + * body, and endIndex will either be -1 (when not a system symbol or prefixed invocation), or + * will be set to the end of the invocation. + */ + private void uncheckedReadMacroInvocationHeader(IonTypeID valueTid, Marker markerToSet) { + if (valueTid.macroId < 0) { + if (valueTid.lowerNibble == 0x4) { + // Opcode 0xF4: Read the macro ID as a FlexUInt. + macroInvocationId = uncheckedReadFlexUInt_1_1(); + } else if (valueTid.variableLength) { + // Opcode 0xF5: Read the macro ID as a FlexUInt, then read the length as a FlexUInt. + macroInvocationId = uncheckedReadFlexUInt_1_1(); + setUserMacroInvocationMarker(valueTid, markerToSet, uncheckedReadFlexUInt_1_1()); + return; + } else { + // Opcode 0xEF: system macro invocation + macroInvocationId = buffer[(int) peekIndex++]; + setSystemMacroInvocationMarker(markerToSet); + return; + } + } else if (valueTid.length > 0) { + // Opcodes 0x4_: the rest of the macro ID follows in a 1-byte FixedUInt. + // Opcodes 0x5_: the rest of the macro ID follows in a 2-byte FixedUInt. + int remainingId = buffer[(int) peekIndex++] & SINGLE_BYTE_MASK; + if (valueTid.length > 1) { + remainingId |= ((buffer[(int) peekIndex++] & SINGLE_BYTE_MASK) << 8); + } + macroInvocationId = valueTid.macroId + remainingId; + } else { + // Opcodes 0x00 - 0x3F -- the opcode is the macro ID. + macroInvocationId = valueTid.macroId; + } + setUserMacroInvocationMarker(valueTid, markerToSet, -1); + } + /** * Reads a value header, consuming the value's annotation wrapper header, if any. Upon invocation, * `peekIndex` must be positioned on the first byte that follows the given type ID byte. After return, `peekIndex` @@ -1214,7 +2348,7 @@ private void validateAnnotationWrapperEndIndex(long endIndex) { private boolean uncheckedReadHeader(final int typeIdByte, final boolean isAnnotated, final Marker markerToSet) { IonTypeID valueTid = typeIds[typeIdByte]; if (!valueTid.isValid) { - throw new IonException("Invalid type ID."); + throw new IonException("Invalid type ID: " + valueTid.theByte); } else if (valueTid.type == IonTypeID.ION_TYPE_ANNOTATION_WRAPPER) { if (isAnnotated) { throw new IonException("Nested annotation wrappers are invalid."); @@ -1223,7 +2357,10 @@ private boolean uncheckedReadHeader(final int typeIdByte, final boolean isAnnota return true; } hasAnnotations = true; - return uncheckedReadHeader(buffer[(int)(peekIndex++)] & SINGLE_BYTE_MASK, true, valueMarker); + return uncheckedReadHeader(buffer[(int) (peekIndex++)] & SINGLE_BYTE_MASK, true, valueMarker); + } else if (minorVersion == 1 && valueTid.isMacroInvocation) { + uncheckedReadMacroInvocationHeader(valueTid, markerToSet); + return true; } else { long endIndex = minorVersion == 0 ? calculateEndIndex_1_0(valueTid, isAnnotated) @@ -1235,6 +2372,9 @@ private boolean uncheckedReadHeader(final int typeIdByte, final boolean isAnnota if (endIndex > limit) { isValueIncomplete = true; } + if (minorVersion == 1 && valueTid.isNull && valueTid.length > 0) { + valueTid = IonTypeID.NULL_TYPE_IDS_1_1[buffer[(int)(peekIndex++)] & SINGLE_BYTE_MASK]; + } } markerToSet.typeId = valueTid; if (event == Event.START_CONTAINER) { @@ -1279,7 +2419,19 @@ private boolean slowReadHeader(final int typeIdByte, final boolean isAnnotated, } return true; } - markerToSet.typeId = valueTid; + if (minorVersion == 1) { + if (valueTid.isMacroInvocation) { + setCheckpointAfterValueHeader(); + return true; + } + if (valueTid.isNull && valueTid.length > 0) { + int nullTypeIndex = slowReadByte(); + if (nullTypeIndex < 0) { + return true; + } + markerToSet.typeId = IonTypeID.NULL_TYPE_IDS_1_1[nullTypeIndex]; + } + } if (checkpointLocation == CheckpointLocation.AFTER_SCALAR_HEADER) { return true; } @@ -1292,6 +2444,68 @@ private boolean slowReadHeader(final int typeIdByte, final boolean isAnnotated, return false; } + /** + * Reads a macro invocation header, ensuring enough bytes are buffered. `peekIndex` must be positioned on the + * first byte that follows the opcode. After return, `peekIndex` will be positioned after any macro address + * byte(s), and `macroInvocationId` will be set to the address of the macro being invoked. + * @param valueTid the type ID of the macro invocation. + * @param markerToSet the Marker to set with information parsed from the macro invocation. After returning `false`, + * the marker's type ID will be set, startIndex will point to the first byte of the invocation's + * body, and endIndex will either be -1 (when not a system symbol or prefixed invocation), or + * will be set to the end of the invocation. + * @param macroId the ID of the invocation, if known. This is only the case for opcode 0xF5 (denoted by + * `valueTid.variableLength == true`), which has its macro ID encoded as a FlexUInt before its + * length. + * @return true if not enough data was available in the stream to complete the header; otherwise, false. + */ + private boolean slowReadMacroInvocationHeader(IonTypeID valueTid, Marker markerToSet, long macroId) { + if (valueTid.macroId < 0) { + if (valueTid.lowerNibble == 0x4) { + // Opcode 0xF4: Read the macro ID as a FlexUInt. + macroInvocationId = slowReadFlexUInt_1_1(); + if (macroInvocationId < 0) { + return true; + } + } else if (valueTid.variableLength) { + // Opcode 0xF5: The macro ID was already read as a FlexUInt. Now read the length as a FlexUInt. + macroInvocationId = macroId; + long length = slowReadFlexUInt_1_1(); + if (length < 0) { + return true; + } + setUserMacroInvocationMarker(valueTid, markerToSet, length); + return false; + } else { + // Opcode 0xEF: system macro invocation or system symbol value. + int truncatedId = slowReadByte(); + if (truncatedId < 0) { + return true; + } + // The downcast to byte then upcast to long results in sign extension, treating the byte as a FixedInt. + macroInvocationId = (byte) truncatedId; + setSystemMacroInvocationMarker(markerToSet); + return false; + } + } else if (valueTid.length > 0) { + // Opcode 0x4: the rest of the macro ID follows in a 1-byte FixedUInt. + // Opcode 0x5: the rest of the macro ID follows in a 2-byte FixedUInt. + if (!fillAt(peekIndex, valueTid.length)) { + return true; + } + int remainingId = slowPeekByte(); + if (valueTid.length > 1) { + remainingId |= ((byte) slowPeekByte() << 8); + } + macroInvocationId = valueTid.macroId + remainingId; + } else { + // Opcodes 0x00 - 0x3F -- the opcode is the macro ID. + macroInvocationId = valueTid.macroId; + } + setUserMacroInvocationMarker(valueTid, markerToSet, -1); + setCheckpoint(CheckpointLocation.BEFORE_UNANNOTATED_TYPE_ID); + return false; + } + /** * Reads a value header, ensuring enough bytes are buffered. Upon invocation, `peekIndex` must * be positioned on the first byte that follows the given type ID byte. After return, `peekIndex` @@ -1308,12 +2522,13 @@ private boolean slowReadValueHeader(IonTypeID valueTid, boolean isAnnotated, Mar if (valueTid.isDelimited) { endIndex = DELIMITED_MARKER; } else if (valueTid.variableLength) { - // At this point the value must be at least 2 more bytes: 1 for the smallest-possible value length - // and 1 for the smallest-possible value representation. - if (!fillAt(peekIndex, 2)) { + valueLength = minorVersion == 0 ? slowReadVarUInt_1_0() : slowReadFlexUInt_1_1(); + if (valueLength < 0) { return true; } - valueLength = minorVersion == 0 ? slowReadVarUInt_1_0() : slowReadVarUInt_1_1(); + } else if (valueTid.length < 0 && minorVersion > 0) { + // The value is itself a FlexInt or FlexUInt, so read the continuation bits to determine the length. + valueLength = slowReadLengthOfFlexUInt_1_1(peekIndex); if (valueLength < 0) { return true; } @@ -1328,14 +2543,22 @@ private boolean slowReadValueHeader(IonTypeID valueTid, boolean isAnnotated, Mar return true; } valueLength = 0; + } else if (minorVersion == 1 && valueTid.isMacroInvocation) { + // Note: The 0xF5 opcode is variable-length, but unlike other variable-length opcodes, it encodes the + // macro ID, rather than the length, as the first FlexUInt following the opcode. Therefore, for opcode + // 0xF5, `valueLength` below refers to the ID of the invocation. For the other macro invocation opcodes, + // this value is not used. + return slowReadMacroInvocationHeader(valueTid, markerToSet, valueLength); } else { setCheckpoint(CheckpointLocation.AFTER_SCALAR_HEADER); event = Event.START_SCALAR; } if (endIndex != DELIMITED_MARKER) { - if (refillableState.isSkippingCurrentValue) { + if (refillableState.isSkippingCurrentValue && valueLength > 0) { // Any bytes that were skipped directly from the input must still be included in the logical endIndex so - // that the rest of the oversized value's bytes may be skipped. + // that the rest of the oversized value's bytes may be skipped. However, if the value's length is 0, + // then the type ID byte must have been skipped. In this case, the skipped type ID byte is already + // accounted for in the peekIndex. endIndex = peekIndex + valueLength + refillableState.individualBytesSkippedWithoutBuffering; } else { endIndex = peekIndex + valueLength; @@ -1349,6 +2572,7 @@ private boolean slowReadValueHeader(IonTypeID valueTid, boolean isAnnotated, Mar validateAnnotationWrapperEndIndex(endIndex); } setMarker(endIndex, markerToSet); + markerToSet.typeId = valueTid; return false; } @@ -1374,6 +2598,30 @@ private void pushContainer() { parent = containerStack[containerIndex]; } + + /** + * Doubles the size of the cursor's argument group stack. + */ + private void growArgumentGroupStack() { + ArgumentGroupMarker[] newStack = new ArgumentGroupMarker[argumentGroupStack.length * 2]; + System.arraycopy(argumentGroupStack, 0, newStack, 0, argumentGroupStack.length); + for (int i = argumentGroupStack.length; i < newStack.length; i++) { + newStack[i] = new ArgumentGroupMarker(); + } + argumentGroupStack = newStack; + } + + /** + * Push a Marker representing the current argument group onto the stack. + * @return the marker at the new top of the stack. + */ + private ArgumentGroupMarker pushArgumentGroup() { + if (++argumentGroupIndex >= argumentGroupStack.length) { + growArgumentGroupStack(); + } + return argumentGroupStack[argumentGroupIndex]; + } + /** * Step into the current container. */ @@ -1413,29 +2661,72 @@ private Event slowStepIntoContainer() { if (containerIndex == refillableState.fillDepth) { isSlowMode = false; } - parent.typeId = valueMarker.typeId; - parent.endIndex = valueTid.isDelimited ? DELIMITED_MARKER : valueMarker.endIndex; - setCheckpointBeforeUnannotatedTypeId(); + parent.typeId = valueMarker.typeId; + parent.endIndex = valueTid.isDelimited ? DELIMITED_MARKER : valueMarker.endIndex; + setCheckpointBeforeUnannotatedTypeId(); + valueTid = null; + hasAnnotations = false; + event = Event.NEEDS_INSTRUCTION; + return event; + } + + @Override + public Event stepIntoContainer() { + if (isSlowMode) { + if (containerIndex != refillableState.fillDepth - 1) { + if (valueMarker.endIndex > DELIMITED_MARKER && valueMarker.endIndex <= limit) { + refillableState.fillDepth = containerIndex + 1; + } else { + return slowStepIntoContainer(); + } + } + isSlowMode = false; + } + uncheckedStepIntoContainer(); + return event; + } + + /** + * Steps into an e-expression, treating it as a logical container. + */ + void stepIntoEExpression() { + if (valueTid == null || !valueTid.isMacroInvocation) { + throw new IonException("Must be positioned on an e-expression."); + } + pushContainer(); + parent.typeId = valueTid; + // TODO support length prefixed e-expressions. + // TODO when the length is known to be within the buffer, exit slow mode. + parent.endIndex = DELIMITED_MARKER; + valueTid = null; + event = Event.NEEDS_INSTRUCTION; + reset(); + } + + /** + * Steps out of an e-expression, restoring the context of the parent container (if any). + */ + void stepOutOfEExpression() { + if (parent == null) { + throw new IonException("Cannot step out at the top level."); + } + if (!parent.typeId.isMacroInvocation) { + throw new IonException("Not positioned within an e-expression."); + } + // TODO support early step-out when support for lazy parsing of e-expressions is added (including continuable + // reading). + if (valueMarker.endIndex > peekIndex) { + peekIndex = valueMarker.endIndex; + } + setCheckpointBeforeUnannotatedTypeId(); + if (--containerIndex >= 0) { + parent = containerStack[containerIndex]; + } else { + parent = null; + containerIndex = -1; + } valueTid = null; - hasAnnotations = false; event = Event.NEEDS_INSTRUCTION; - return event; - } - - @Override - public Event stepIntoContainer() { - if (isSlowMode) { - if (containerIndex != refillableState.fillDepth - 1) { - if (valueMarker.endIndex > DELIMITED_MARKER && valueMarker.endIndex <= limit) { - refillableState.fillDepth = containerIndex + 1; - } else { - return slowStepIntoContainer(); - } - } - isSlowMode = false; - } - uncheckedStepIntoContainer(); - return event; } /** @@ -1459,7 +2750,7 @@ public Event stepOutOfContainer() { } // Seek past the remaining bytes at this depth and pop from the stack. if (parent.endIndex == DELIMITED_MARKER) { - if (skipRemainingDelimitedContainerElements_1_1()) { + if (uncheckedSkipRemainingDelimitedContainerElements_1_1()) { return event; } } else { @@ -1563,6 +2854,15 @@ private void reportConsumedData() { lastReportedByteTotal = totalNumberOfBytesRead; } + /** + * Reports the total number of bytes skipped without buffering since the last report. + */ + private void reportSkippedData() { + long totalNumberOfBytesRead = getTotalOffset() + refillableState.individualBytesSkippedWithoutBuffering; + dataHandler.onData((int) (totalNumberOfBytesRead - lastReportedByteTotal)); + lastReportedByteTotal = totalNumberOfBytesRead; + } + /** * Advances to the next token, seeking past the previous value if necessary. After return `event` will convey * the result (e.g. START_SCALAR, END_CONTAINER) @@ -1596,7 +2896,11 @@ private boolean uncheckedNextToken() { return true; } } else { - if (uncheckedNextContainedToken()) { + if (parent.typeId.isMacroInvocation) { + // When traversing a macro invocation, the cursor must visit each parameter; after visiting each one, + // peekIndex will point to the first byte in the next parameter or value. + valuePreHeaderIndex = peekIndex; + } else if (uncheckedNextContainedToken()) { return false; } if (peekIndex >= limit) { @@ -1682,11 +2986,19 @@ private void slowNextToken() { * @return true if not enough data was available in the stream; otherwise, false. */ private boolean slowSkipRemainingValueBytes() { + // TODO this needs to be updated ot handle the case where the value is a non-prefixed macro invocation, + // as the length of these invocations is unknown to the cursor. Input from the macro evaluator is needed. if (valueMarker.endIndex == DELIMITED_MARKER && valueTid != null && valueTid.isDelimited) { seekPastDelimitedContainer_1_1(); if (event == Event.NEEDS_DATA) { return true; } + } else if (refillableState.pinOffset > -1) { + // Bytes in the buffer are being pinned, so buffer the remaining bytes instead of seeking past them. + if (!fillAt(refillableState.pinOffset, valueMarker.endIndex - refillableState.pinOffset)) { + return true; + } + offset = valueMarker.endIndex; } else if (limit >= valueMarker.endIndex) { offset = valueMarker.endIndex; } else if (slowSeek(valueMarker.endIndex - offset)) { @@ -1724,7 +3036,15 @@ private Event slowOverflowableNextToken() { */ private void seekPastOversizedValue() { refillableState.oversizedValueHandler.onOversizedValue(); - if (refillableState.state != State.TERMINATED) { + if (refillableState.state == State.SEEK_DELIMITED) { + // Discard all buffered bytes. + slowSeek(availableAt(offset)); + refillableState.pinOffset = -1; + refillableState.totalDiscardedBytes += refillableState.individualBytesSkippedWithoutBuffering; + refillableState.state = State.SEEK_DELIMITED; + peekIndex = offset; + shiftContainerEnds(refillableState.individualBytesSkippedWithoutBuffering); + } else if (refillableState.state != State.TERMINATED) { slowSeek(valueMarker.endIndex - offset - refillableState.individualBytesSkippedWithoutBuffering); refillableState.totalDiscardedBytes += refillableState.individualBytesSkippedWithoutBuffering; peekIndex = offset; @@ -1767,6 +3087,503 @@ private Event slowNextValue() { return slowOverflowableNextToken(); } + /** + * Skips any bytes remaining in the previous value, positioning the cursor on the next token. + * @return true if not enough data was available in the stream to skip the previous value; otherwise, false. + */ + private boolean slowSkipToNextToken() { + if ((refillableState.state != State.READY && !slowMakeBufferReady())) { + return true; + } + if (checkpointLocation == CheckpointLocation.AFTER_SCALAR_HEADER || checkpointLocation == CheckpointLocation.AFTER_CONTAINER_HEADER) { + return slowSkipRemainingValueBytes(); + } + return false; + } + + /** + * Reads the length and type of the FlexSym that starts at the given position, ensuring enough bytes are available + * in the stream. After this method returns with a value greater than or equal to zero, `valueTid` and + * `valueMarker.typeId` will be set to the IonTypeID that most closely corresponds to the length and type of the + * FlexSym. + * @return the length of the FlexSym, or -1 if not enough bytes are available in the stream to determine the length. + */ + private long readFlexSymLengthAndType_1_1() { + FlexSymType flexSymType; + if (isSlowMode) { + flexSymType = slowSkipFlexSym_1_1(valueMarker); + if (flexSymType == FlexSymType.INCOMPLETE) { + return -1; + } + } else { + flexSymType = uncheckedSkipFlexSym_1_1(valueMarker); + } + int lengthOfFlexSym = (int) (peekIndex - valueMarker.startIndex); + peekIndex = valueMarker.startIndex; + valueTid = flexSymType.typeIdFor(lengthOfFlexSym); + valueMarker.typeId = valueTid; + return lengthOfFlexSym; + } + + /** + * Calculates the length and type of variable-length primitive value, ensuring enough bytes are available in the + * stream. + * @param taglessEncoding the variable-length primitive type of the tagless value that starts at `peekIndex`. + * @return the length of the value, or -1 if not enough bytes are available in the stream to determine the length. + */ + private long calculateTaglessLengthAndType(TaglessEncoding taglessEncoding) { + // TODO length calculation for these types could be deferred until they are consumed to avoid duplicate + // work. This would trade some added complexity for a potential performance gain that would need to be + // quantified. + long length; + switch (taglessEncoding) { + case FLEX_UINT: + case FLEX_INT: + length = isSlowMode ? slowReadLengthOfFlexUInt_1_1(peekIndex) : uncheckedReadLengthOfFlexUInt_1_1(peekIndex); + break; + case FLEX_SYM: + length = readFlexSymLengthAndType_1_1(); + break; + default: + throw new IllegalStateException("Length is built into the primitive type's IonTypeID."); + } + if (valueTid == SYSTEM_SYMBOL_VALUE) { + return 1; + } + if (length >= 0) { + valueMarker.endIndex = peekIndex + length; + } + return length; + } + + /** + * Skips any bytes remaining in the current token, positioning the cursor on the first byte of the next token. + * @return true if not enough data was available in the stream to skip the previous value; otherwise, false. + */ + private boolean skipToNextToken() { + event = Event.NEEDS_DATA; + if (isSlowMode) { + if (slowSkipToNextToken()) { + return true; + } + } else { + if (peekIndex < valueMarker.endIndex) { + peekIndex = valueMarker.endIndex; + } else if (valueTid != null && valueTid.isDelimited) { + seekPastDelimitedContainer_1_1(); + } + } + if (dataHandler != null) { + reportConsumedData(); + } + reset(); + return false; + } + + /** + * Advances the cursor to the next value, assuming that it is tagless with the given type, skipping the current + * value (if any). This method may return: + *
    + *
  • NEEDS_DATA, if not enough data is available in the stream
  • + *
  • START_SCALAR, if the reader is now positioned on a scalar value
  • + *
+ * @param taglessEncoding the {@link TaglessEncoding} of the tagless value on which to position the cursor. + * @return an Event conveying the result of the operation. + */ + public Event nextTaglessValue(TaglessEncoding taglessEncoding) { + event = Event.NEEDS_DATA; + if (isSlowMode) { + if (slowSkipToNextToken()) { + return event; + } + } else { + if (peekIndex < valueMarker.endIndex) { + peekIndex = valueMarker.endIndex; + } else if (valueTid != null && valueTid.isDelimited) { + seekPastDelimitedContainer_1_1(); + } + } + if (dataHandler != null) { + reportConsumedData(); + } + reset(); + taglessType = taglessEncoding; + valueTid = taglessEncoding.typeID; + valueMarker.typeId = valueTid; + valueMarker.startIndex = peekIndex; + valuePreHeaderIndex = peekIndex; + if (valueTid.variableLength) { + if (calculateTaglessLengthAndType(taglessEncoding) < 0) { + return event; + } + } else { + valueMarker.endIndex = peekIndex + valueTid.length; + } + setCheckpoint(CheckpointLocation.AFTER_SCALAR_HEADER); + event = Event.START_SCALAR; + return event; + } + + /** + * Fills the argument encoding bitmap (AEB) of the given byte width that is expected to occur at + * the cursor's current `peekIndex`. This method may return: + *
    + *
  • NEEDS_DATA, if not enough data is available in the stream
  • + *
  • NEEDS_INSTRUCTION, if the AEB was filled and the cursor is now positioned on the first byte of the + * macro invocation.
  • + *
+ * After return, `valueMarker` is set with the start and end indices of the AEB. + * @param numberOfBytes the byte width of the AEB. + * @return an Event conveying the result of the operation. + */ + public Event fillArgumentEncodingBitmap(int numberOfBytes) { + event = Event.NEEDS_DATA; + valueMarker.typeId = null; + valueMarker.startIndex = peekIndex; + valueMarker.endIndex = peekIndex + numberOfBytes; + if (isSlowMode && !fillAt(peekIndex, numberOfBytes)) { + return event; + } + peekIndex = valueMarker.endIndex; + setCheckpoint(CheckpointLocation.BEFORE_UNANNOTATED_TYPE_ID); + event = Event.NEEDS_INSTRUCTION; + return event; + } + + /** + * Reads the group continuation FlexUInt on which the cursor is currently positioned. + * @return the value of the continuation, or -1 if the end of the stream was reached. + */ + private long readGroupContinuation() { + long groupContinuation; + if (isSlowMode) { + groupContinuation = slowReadFlexUInt_1_1(); + if (groupContinuation < 0) { + return -1; + } + setCheckpoint(CheckpointLocation.BEFORE_UNANNOTATED_TYPE_ID); + } else { + groupContinuation = uncheckedReadFlexUInt_1_1(); + } + return groupContinuation; + } + + /** + * Positions the cursor after the previous token, then enters the tagged argument group that occurs at that + * position. It is up to the caller to ensure that a group actually exists at that location. This method may return: + *
    + *
  • NEEDS_DATA, if not enough data is available in the stream to complete the operation.
  • + *
  • NEEDS_INSTRUCTION, if the cursor successfully entered the argument group. Subsequently, the user must + * invoke {@link #nextGroupedValue()} to position it on the next value.
  • + *
+ * @return an Event conveying the result of the operation. + */ + public Event enterTaggedArgumentGroup() { + if (skipToNextToken()) { + return event; + } + long groupContinuation = readGroupContinuation(); + if (groupContinuation < 0) { + return event; + } + ArgumentGroupMarker group = pushArgumentGroup(); + group.pageStartIndex = peekIndex; + if (groupContinuation == 0) { + // Delimited argument group. + group.pageEndIndex = -1; + } else { + group.pageEndIndex = peekIndex + groupContinuation; + } + group.taglessEncoding = null; + valueMarker.endIndex = peekIndex; + event = Event.NEEDS_INSTRUCTION; + return event; + } + + /** + * Positions the cursor after the previous token, then enters the tagless argument group that occurs at that + * position. It is up to the caller to ensure that a group actually exists at that location. This method may return: + *
    + *
  • NEEDS_DATA, if not enough data is available in the stream to complete the operation.
  • + *
  • NEEDS_INSTRUCTION, if the cursor successfully entered the argument group. Subsequently, the user must + * invoke {@link #nextGroupedValue()} to position it on the next value.
  • + *
+ * @param taglessEncoding the primitive type of the values in the group. + * @return an Event conveying the result of the operation. + */ + public Event enterTaglessArgumentGroup(TaglessEncoding taglessEncoding) { + if (skipToNextToken()) { + return event; + } + long indexBeforeFirstContinuation = peekIndex; + long groupContinuation = readGroupContinuation(); + if (groupContinuation < 0) { + return event; + } + if (groupContinuation == 0) { + // This is an empty group. Rather than storing extra state to track this rare special case, simply + // rewind and cause the continuation to be read again during nextGroupedValue(). + peekIndex = indexBeforeFirstContinuation; + } + ArgumentGroupMarker group = pushArgumentGroup();; + group.pageStartIndex = peekIndex; + group.pageEndIndex = peekIndex + groupContinuation; + group.taglessEncoding = taglessEncoding; + valueMarker.endIndex = peekIndex; + event = Event.NEEDS_INSTRUCTION; + return event; + } + + /** + * Attempts to fill the current page of the current argument group. This should only be called when it has been + * determined that the page is not already buffered in its entirety. + * @param group the group containing the page to fill. + * @return true if not enough data was available to fill the page; otherwise, false. + * @throws IonException if the cursor is not in 'slow' mode, indicating unexpected EOF. + */ + private boolean fillArgumentGroupPage(ArgumentGroupMarker group) { + if (isSlowMode) { + // Fill the entire page. + if (!fillAt(group.pageStartIndex, group.pageEndIndex - group.pageStartIndex)) { + event = Event.NEEDS_DATA; + return true; + } + // TODO performance: exit slow mode until the page is finished. + } else { + throw new IonException("Unexpected EOF: argument group extended beyond the end of the buffer."); + } + return false; + } + + /** + * Sets the checkpoint based on whether a scalar or container header has just been read. It is up to the caller + * to ensure that the cursor is positioned immediately after a value header. + */ + private void setCheckpointAfterValueHeader() { + switch (event) { + case START_SCALAR: + setCheckpoint(CheckpointLocation.AFTER_SCALAR_HEADER); + break; + case START_CONTAINER: + setCheckpoint(CheckpointLocation.AFTER_CONTAINER_HEADER); + break; + case NEEDS_INSTRUCTION: + // A macro invocation header has just been read. + setCheckpoint(CheckpointLocation.BEFORE_UNANNOTATED_TYPE_ID); + break; + default: + throw new IllegalStateException(); + } + } + + /** + * Positions the cursor on the next value in the tagged group. Upon return, the value will be filled and + * `valueMarker` set to the value's start and end indices. + * @param group the group to which the value belongs. + * @return an Event conveying the result of the operation. + */ + private Event nextGroupedTaggedValue(ArgumentGroupMarker group) { + boolean isUserValue; // if false, the header represents no-op padding + if (group.pageEndIndex < 0) { + // Delimited. + int b; + if (isSlowMode) { + b = slowReadByte(); + if (b < 0) { + event = Event.NEEDS_DATA; + return event; + } + if (b == (OpCodes.DELIMITED_END_MARKER & SINGLE_BYTE_MASK)) { + group.pageEndIndex = peekIndex; + setCheckpoint(CheckpointLocation.BEFORE_UNANNOTATED_TYPE_ID); + event = Event.NEEDS_INSTRUCTION; + return event; + } + isUserValue = slowReadHeader(b, false, valueMarker); + } else { + b = buffer[(int)(peekIndex++)] & SINGLE_BYTE_MASK; + if (b == (OpCodes.DELIMITED_END_MARKER & SINGLE_BYTE_MASK)) { + group.pageEndIndex = peekIndex; + event = Event.NEEDS_INSTRUCTION; + return event; + } + isUserValue = uncheckedReadHeader(b, false, valueMarker); + setCheckpointAfterValueHeader(); + } + } else { + if (peekIndex == group.pageEndIndex) { + // End of the group + event = Event.NEEDS_INSTRUCTION; + return event; + } + if (group.pageEndIndex > limit && fillArgumentGroupPage(group)) { + return event; + } + isUserValue = uncheckedReadHeader(buffer[(int)(peekIndex++)] & SINGLE_BYTE_MASK, false, valueMarker); + setCheckpointAfterValueHeader(); + } + valueTid = valueMarker.typeId; + if (!isUserValue) { + throw new IonException("No-op padding is not currently supported in argument groups."); + } + return event; + } + + /** + * Positions the cursor on the next value in the tagless group. Upon return, the value will be filled and + * `valueMarker` set to the value's start and end indices. + * @param group the group to which the value belongs. + * @return an Event conveying the result of the operation. + */ + private Event nextGroupedTaglessValue(ArgumentGroupMarker group) { + if (peekIndex == group.pageEndIndex) { + // End of the page. + long continuation = readGroupContinuation(); + if (continuation == 0) { + // End of the group + event = Event.NEEDS_INSTRUCTION; + return event; + } + group.pageEndIndex = peekIndex + continuation; + } + if (group.pageEndIndex > limit && fillArgumentGroupPage(group)) { + return event; + } + // TODO performance: for fixed-width tagless types, the following could be skipped after the first value. + nextTaglessValue(group.taglessEncoding); + return event; + } + + /** + * Positions the cursor on the next value in the group. Upon return, the value will be filled and `valueMarker` set + * to the value's start and end indices. This method may return: + *
    + *
  • NEEDS_DATA, if not enough data is available in the stream
  • + *
  • START_SCALAR, if the reader is now positioned on a scalar value
  • + *
  • START_CONTAINER, if the reader is now positioned on a container value
  • + *
  • NEEDS_INSTRUCTION, if the cursor reached the end of the argument group. Subsequently, the caller must + * call {@link #exitArgumentGroup()}.
  • + *
+ * @return an Event conveying the result of the operation. + */ + public Event nextGroupedValue() { + ArgumentGroupMarker group = argumentGroupStack[argumentGroupIndex]; + if (peekIndex < valueMarker.endIndex) { + peekIndex = valueMarker.endIndex; + } + if (group.taglessEncoding == null) { + return nextGroupedTaggedValue(group); + } + return nextGroupedTaglessValue(group); + } + + /** + * Seeks the cursor to the end of the current page of the argument group. + * @param group the group in which to seek. + * @return true if there was not enough data to complete the seek; otherwise, false. + */ + private boolean seekToEndOfArgumentGroupPage(ArgumentGroupMarker group) { + if (isSlowMode) { + if (slowSeek(group.pageEndIndex - offset)) { + return true; + } + peekIndex = offset; + } else { + peekIndex = group.pageEndIndex; + } + return false; + } + + // Dummy delimited container to be used when seeking forward to a delimited end marker of a synthetic container, + // like an argument group. + private static final IonTypeID DUMMY_DELIMITED_CONTAINER = TYPE_IDS_1_1[OpCodes.DELIMITED_SEXP & SINGLE_BYTE_MASK]; + + /** + * Seeks to the end of the current delimited argument group. + * @return true if not enough data was available to complete the seek; otherwise, false. + */ + private boolean seekToEndOfDelimitedArgumentGroup() { + // Push a dummy delimited container onto the stack, preparing the cursor to seek forward to the delimited end + // marker applicable at the current depth. + pushContainer(); + parent.endIndex = -1; + parent.typeId = DUMMY_DELIMITED_CONTAINER; + boolean isEof; + if (isSlowMode) { + isEof = slowSkipRemainingDelimitedContainerElements_1_1(); + } else { + isEof = uncheckedSkipRemainingDelimitedContainerElements_1_1(); + } + // Pop the dummy delimited container from the stack. + if (--containerIndex >= 0) { + parent = containerStack[containerIndex]; + } else { + parent = null; + containerIndex = -1; + } + return isEof; + } + + /** + * Exits the cursor's current tagged argument group. + * @param group the group to exit. + * @return an Event conveying the result of the operation (either NEEDS_DATA or NEEDS_INSTRUCTION). + */ + private Event exitTaggedArgumentGroup(ArgumentGroupMarker group) { + if (group.pageEndIndex < 0) { + if (seekToEndOfDelimitedArgumentGroup()) { + return event; + } + } else if (seekToEndOfArgumentGroupPage(group)) { + return event; + } + event = Event.NEEDS_INSTRUCTION; + return event; + } + + /** + * Exits the cursor's current tagless argument group. + * @param group the group to exit. + * @return an Event conveying the result of the operation (either NEEDS_DATA or NEEDS_INSTRUCTION). + */ + private Event exitTaglessArgumentGroup(ArgumentGroupMarker group) { + long continuation = -1; + while (continuation != 0) { + if (seekToEndOfArgumentGroupPage(group)) { + return event; + } + continuation = readGroupContinuation(); + if (continuation < 0) { + return event; + } + group.pageEndIndex = peekIndex + continuation; + } + event = Event.NEEDS_INSTRUCTION; + return event; + } + + /** + * Exits the cursor's current argument group. This method may return: + *
    + *
  • NEEDS_DATA, if not enough data is available in the stream to exit the group.
  • + *
  • NEEDS_INSTRUCTION, if the cursor successfully exited the argument group. Subsequently, the user must + * invoke a method on the cursor to position it on the next value.
  • + *
+ * @return an Event conveying the result of the operation. + */ + public Event exitArgumentGroup() { + ArgumentGroupMarker group = argumentGroupStack[argumentGroupIndex]; + if (group.pageEndIndex >= 0 && peekIndex >= group.pageEndIndex) { + event = Event.NEEDS_INSTRUCTION; + return event; + } + event = Event.NEEDS_DATA; + if (group.taglessEncoding == null) { + return exitTaggedArgumentGroup(group); + } + return exitTaglessArgumentGroup(group); + } + @Override public Event fillValue() { event = Event.VALUE_READY; @@ -1795,7 +3612,7 @@ private Event slowFillValue() { event = Event.NEEDS_DATA; if (valueMarker.endIndex == DELIMITED_MARKER) { - if (slowFillDelimitedContainer_1_1()) { + if (!slowFindDelimitedEnd_1_1()) { return event; } } @@ -1835,6 +3652,14 @@ Marker getValueMarker() { return valueMarker; } + long getMacroInvocationId() { + return macroInvocationId; + } + + boolean isSystemInvocation() { + return isSystemInvocation; + } + /** * Slices the buffer using the given offset and limit. Slices are treated as if they were at the top level. This * can be used to seek the reader to a "span" of bytes that represent a value in the stream. @@ -1937,6 +3762,7 @@ public void close() { } buffer = null; containerStack = null; + argumentGroupStack = null; byteBuffer = null; terminate(); } diff --git a/src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt new file mode 100644 index 0000000000..146d67d0f2 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt @@ -0,0 +1,476 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl + +import com.amazon.ion.* +import com.amazon.ion.impl.IonRawTextWriter_1_1.ContainerType.* +import com.amazon.ion.impl.IonRawTextWriter_1_1.ContainerType.List +import com.amazon.ion.impl.bin.* +import com.amazon.ion.impl.macro.* +import com.amazon.ion.system.* +import com.amazon.ion.util.* +import java.io.OutputStream +import java.math.BigDecimal +import java.math.BigInteger + +/** + * A raw writer for Ion 1.1 text. This should be combined with managed writer to handle concerns such as macros and + * possible symbol interning. + * + * Notes: + * - Never writes using "long string" syntax in order to simplify the writer. + * - Does not try to resolve symbol tokens. That is the concern of the managed writer. + * - To make it easier to concatenate streams, this eagerly emits a top-level separator after each top-level syntax item. + */ +class IonRawTextWriter_1_1 internal constructor( + private val options: _Private_IonTextWriterBuilder_1_1, + private val output: _Private_IonTextAppender, +) : IonRawWriter_1_1, PrivateIonRawWriter_1_1 { + + companion object { + const val IVM = "\$ion_1_1" + + @JvmStatic + fun from(output: OutputStream, blockSize: Int, options: IonTextWriterBuilder_1_1): IonRawTextWriter_1_1 { + val bufferedOutput = BufferedOutputStreamFastAppendable( + output, + BlockAllocatorProviders.basicProvider().vendAllocator(blockSize) + ) + return IonRawTextWriter_1_1( + options as _Private_IonTextWriterBuilder_1_1, + _Private_IonTextAppender.forFastAppendable(bufferedOutput, Charsets.UTF_8) + ) + } + } + + enum class ContainerType { + List, + SExp, + Struct, + EExpression, + ExpressionGroup, + Top, + } + + private var closed = false + + private val ancestorContainersStack: ArrayList = ArrayList() + private var currentContainer: ContainerType = Top + private var currentContainerHasValues = false + + private var isPendingSeparator = false + private var isPendingLeadingWhitespace = false + + private var fieldNameText: CharSequence? = null + private var fieldNameId: Int = -1 + private var hasFieldName = false + + private var annotationsTextBuffer = arrayOfNulls(8) + private var annotationsIdBuffer = IntArray(8) + private var numAnnotations = 0 + + private inline fun openValue(valueWriterExpression: () -> Unit) { + if (currentContainer == Struct) { + confirm(hasFieldName) { "Values in a struct require a field name." } + } + val separatorCharacter = when (currentContainer) { + List, Struct -> "," + EExpression, SExp, ExpressionGroup -> " " + Top -> options.topLevelSeparator() + } + + if (options.isPrettyPrintOn && !forceNoNewlines) { + if (isPendingSeparator && !IonTextUtils.isAllWhitespace(separatorCharacter)) { + // Only bother if the separator is non-whitespace. + output.appendAscii(separatorCharacter) + } + if (isPendingSeparator || isPendingLeadingWhitespace) { + output.appendAscii(options.lineSeparator()) + output.appendAscii(" ".repeat(ancestorContainersStack.size * 2)) + } + } else if (isPendingSeparator) { + output.appendAscii(separatorCharacter) + } + + isPendingSeparator = false + + if (hasFieldName) { + if (fieldNameText != null) { + output.printSymbol(fieldNameText) + output.appendAscii(':') + if (options.isPrettyPrintOn) output.appendAscii(" ") + fieldNameText = null + } else { + output.appendAscii("$") + output.printInt(fieldNameId.toLong()) + output.appendAscii(":") + if (options.isPrettyPrintOn) output.appendAscii(" ") + fieldNameId = -1 + } + } + + for (i in 0 until numAnnotations) { + if (annotationsTextBuffer[i] != null) { + output.printSymbol(annotationsTextBuffer[i]) + annotationsTextBuffer[i] = null + } else { + output.appendAscii("$") + output.printInt(annotationsIdBuffer[i].toLong()) + annotationsIdBuffer[i] = -1 + } + output.appendAscii("::") + } + + hasFieldName = false + numAnnotations = 0 + valueWriterExpression() + } + + private inline fun closeValue(valueWriterExpression: () -> Unit) { + valueWriterExpression() + if (currentContainer == Top) { + output.appendAscii(options.topLevelSeparator()) + isPendingSeparator = false + } else { + isPendingSeparator = true + } + isPendingLeadingWhitespace = false + currentContainerHasValues = true + } + + private inline fun writeScalar(valueWriterExpression: () -> Unit) { + // Note—it doesn't matter which order we combine these. The result will be the same because of where + // valueWriterExpression is called in openValue and closeValue. + openValue { closeValue(valueWriterExpression) } + } + + override fun close() { + if (closed) return + flush() + output.close() + closed = true + } + + override fun flush() { + if (closed) return + confirm(depth() == 0) { "Cannot call finish() while in a container" } + confirm(numAnnotations == 0) { "Cannot call finish with dangling annotations" } + output.flush() + } + + override fun writeIVM() { + confirm(currentContainer == Top) { "IVM can only be written at the top level of an Ion stream." } + confirm(numAnnotations == 0) { "Cannot write an IVM with annotations" } + output.appendAscii(IVM) + output.appendAscii(options.topLevelSeparator()) + isPendingSeparator = false + } + + override fun isInStruct(): Boolean = currentContainer == Struct + + override fun depth(): Int = ancestorContainersStack.size + + /** + * Ensures that there is enough space in the annotation buffers for [n] annotations. + * If more space is needed, it over-allocates by 8 to ensure that we're not continually allocating when annotations + * are being added one by one. + */ + private inline fun ensureAnnotationSpace(n: Int) { + // We only need to check the size of one of the arrays because we always keep them the same size. + if (annotationsIdBuffer.size < n) { + val oldIds = annotationsIdBuffer + annotationsIdBuffer = IntArray(n + 8) + oldIds.copyInto(annotationsIdBuffer) + val oldText = annotationsTextBuffer + annotationsTextBuffer = arrayOfNulls(n + 8) + oldText.copyInto(annotationsTextBuffer) + } + } + + override fun writeAnnotations(annotation0: Int) { + ensureAnnotationSpace(numAnnotations + 1) + annotationsIdBuffer[numAnnotations++] = annotation0 + } + + override fun writeAnnotations(annotation0: Int, annotation1: Int) { + ensureAnnotationSpace(numAnnotations + 2) + annotationsIdBuffer[numAnnotations++] = annotation0 + annotationsIdBuffer[numAnnotations++] = annotation1 + } + + override fun writeAnnotations(annotations: IntArray) { + ensureAnnotationSpace(numAnnotations + annotations.size) + annotations.copyInto(annotationsIdBuffer, numAnnotations) + numAnnotations += annotations.size + } + + override fun writeAnnotations(annotation0: SystemSymbols_1_1) = writeAnnotations(annotation0.text) + + override fun writeAnnotations(annotation0: CharSequence) { + ensureAnnotationSpace(numAnnotations + 1) + annotationsTextBuffer[numAnnotations++] = annotation0 + } + + override fun writeAnnotations(annotation0: CharSequence, annotation1: CharSequence) { + ensureAnnotationSpace(numAnnotations + 2) + annotationsTextBuffer[numAnnotations++] = annotation0 + annotationsTextBuffer[numAnnotations++] = annotation1 + } + + override fun writeAnnotations(annotations: Array) { + if (annotations.isEmpty()) return + ensureAnnotationSpace(numAnnotations + annotations.size) + annotations.copyInto(annotationsTextBuffer, numAnnotations) + numAnnotations += annotations.size + } + + override fun _private_clearAnnotations() { + numAnnotations = 0 + } + + override fun _private_hasFirstAnnotation(sid: Int, text: String?): Boolean { + if (numAnnotations == 0) return false + if (sid >= 0 && annotationsIdBuffer[0] == sid) { + return true + } + if (text != null && annotationsTextBuffer[0] == text) { + return true + } + return false + } + + override fun _private_hasFieldName(): Boolean = hasFieldName + + override fun writeFieldName(sid: Int) { + confirm(currentContainer == Struct) { "Cannot write field name outside of a struct." } + confirm(!hasFieldName) { "Field name already set." } + fieldNameId = sid + hasFieldName = true + } + + override fun writeFieldName(text: CharSequence) { + confirm(currentContainer == Struct) { "Cannot write field name outside of a struct." } + confirm(!hasFieldName) { "Field name already set." } + fieldNameText = text + hasFieldName = true + } + + override fun writeFieldName(symbol: SystemSymbols_1_1) = writeFieldName(symbol.text) + + override fun writeNull() = writeScalar { + output.appendAscii("null") + } + + override fun writeNull(type: IonType) = writeScalar { + val nullimage = if (options._untyped_nulls) { "null" } else { + when (type) { + IonType.NULL -> "null" + IonType.BOOL -> "null.bool" + IonType.INT -> "null.int" + IonType.FLOAT -> "null.float" + IonType.DECIMAL -> "null.decimal" + IonType.TIMESTAMP -> "null.timestamp" + IonType.SYMBOL -> "null.symbol" + IonType.STRING -> "null.string" + IonType.BLOB -> "null.blob" + IonType.CLOB -> "null.clob" + IonType.SEXP -> "null.sexp" + IonType.LIST -> "null.list" + IonType.STRUCT -> "null.struct" + else -> throw IllegalStateException("unexpected type $type") + } + } + output.appendAscii(nullimage) + } + + override fun writeBool(value: Boolean) = writeScalar { output.appendAscii(if (value) "true" else "false") } + + override fun writeInt(value: Long) = writeScalar { output.printInt(value) } + override fun writeInt(value: BigInteger) = writeScalar { output.printInt(value) } + + override fun writeFloat(value: Float) = writeFloat(value.toDouble()) + override fun writeFloat(value: Double) = writeScalar { output.printFloat(options, value) } + + override fun writeDecimal(value: BigDecimal) = writeScalar { output.printDecimal(options, value) } + + override fun writeTimestamp(value: Timestamp) = writeScalar { + writeTimestampHelper( + toMillis = { value.millis }, + toString = { value.toString() }, + ) + } + + private inline fun writeTimestampHelper(toMillis: () -> Long, toString: () -> String) { + if (options._timestamp_as_millis) { + output.appendAscii("${toMillis()}") + } else if (options._timestamp_as_string) { + // Timestamp is ASCII-safe so this is easy + output.appendAscii('"') + output.appendAscii(toString()) + output.appendAscii('"') + } else { + output.appendAscii(toString()) + } + } + + override fun writeSymbol(id: Int) = writeScalar { + output.appendAscii('$') + output.printInt(id.toLong()) + } + + override fun writeSymbol(text: CharSequence) = writeScalar { + when (IonTextUtils.symbolVariant(text)) { + IonTextUtils.SymbolVariant.IDENTIFIER -> output.appendAscii(text) + IonTextUtils.SymbolVariant.OPERATOR -> if (currentContainer == SExp) output.appendAscii(text) else output.printQuotedSymbol(text) + IonTextUtils.SymbolVariant.QUOTED -> output.printQuotedSymbol(text) + } + } + + override fun writeSymbol(symbol: SystemSymbols_1_1) = writeSymbol(symbol.text) + + override fun writeString(value: CharSequence) = writeScalar { output.printString(value) } + + override fun writeBlob(value: ByteArray, start: Int, length: Int) = writeScalar { output.printBlob(options, value, start, length) } + + override fun writeClob(value: ByteArray, start: Int, length: Int) = writeScalar { output.printClob(options, value, start, length) } + + override fun stepInList(usingLengthPrefix: Boolean) { + openValue { output.appendAscii("[") } + ancestorContainersStack.add(currentContainer) + currentContainer = List + currentContainerHasValues = false + isPendingLeadingWhitespace = true + } + + override fun stepInSExp(usingLengthPrefix: Boolean) { + startSexp { output.appendAscii("(") } + } + + private inline fun startSexp(openingTokens: () -> Unit) { + openValue(openingTokens) + ancestorContainersStack.add(currentContainer) + currentContainer = SExp + currentContainerHasValues = false + isPendingLeadingWhitespace = true + } + + override fun stepInStruct(usingLengthPrefix: Boolean) { + openValue { output.appendAscii("{") } + ancestorContainersStack.add(currentContainer) + currentContainer = Struct + currentContainerHasValues = false + isPendingLeadingWhitespace = true + } + + override fun stepInEExp(name: CharSequence) { + confirm(numAnnotations == 0) { "Cannot annotate a macro invocation" } + openValue { + output.appendAscii("(:") + output.printSymbol(name) + } + ancestorContainersStack.add(currentContainer) + currentContainer = EExpression + currentContainerHasValues = false + isPendingSeparator = true // Treat the macro name as if it is a value that needs a separator. + } + + override fun stepInEExp(id: Int, usingLengthPrefix: Boolean, macro: Macro) { + confirm(numAnnotations == 0) { "Cannot annotate a macro invocation" } + openValue { + output.appendAscii("(:") + output.printInt(id.toLong()) + } + ancestorContainersStack.add(currentContainer) + currentContainer = EExpression + currentContainerHasValues = false + isPendingSeparator = true // Treat the macro id as if it is a value that needs a separator. + } + + override fun stepInEExp(systemMacro: SystemMacro) { + confirm(numAnnotations == 0) { "Cannot annotate a macro invocation" } + openValue { + output.appendAscii("(:\$ion::") + output.printSymbol(systemMacro.macroName) + } + ancestorContainersStack.add(currentContainer) + currentContainer = EExpression + currentContainerHasValues = false + isPendingSeparator = true // Treat the macro name as if it is a value that needs a separator. + } + + override fun stepInExpressionGroup(usingLengthPrefix: Boolean) { + confirm(numAnnotations == 0) { "Cannot annotate an expression group" } + confirm(currentContainer == EExpression) { "Can only create an expression group in a macro invocation" } + openValue { output.appendAscii("(::") } + ancestorContainersStack.add(currentContainer) + currentContainer = ExpressionGroup + currentContainerHasValues = false + isPendingLeadingWhitespace = true + isPendingSeparator = true + } + + override fun stepOut() { + confirm(numAnnotations == 0) { "Cannot step out with a dangling annotation" } + confirm(!hasFieldName) { "Cannot step out with a dangling field name" } + val endChar = when (currentContainer) { + Struct -> '}' + SExp, EExpression, ExpressionGroup -> ')' + List -> ']' + Top -> throw IonException("Nothing to step out of.") + } + + currentContainer = ancestorContainersStack.removeLast() + + closeValue { + if (options.isPrettyPrintOn && currentContainerHasValues && !forceNoNewlines) { + output.appendAscii(options.lineSeparator()) + output.appendAscii(" ".repeat(ancestorContainersStack.size * 2)) + } + output.appendAscii(endChar) + } + } + + private var forceNoNewlines: Boolean = false + override fun forceNoNewlines(boolean: Boolean) { forceNoNewlines = boolean } + + override fun writeMacroParameterCardinality(cardinality: Macro.ParameterCardinality) { + output.appendAscii(cardinality.sigil) + } + + override fun stepInTdlExpressionGroup() { + startSexp { output.appendAscii("(..") } + isPendingSeparator = true + } + + override fun stepInTdlMacroInvocation(macroRef: Int) { + startSexp { + output.appendAscii("(.") + output.printInt(macroRef.toLong()) + } + isPendingSeparator = true + } + + override fun stepInTdlMacroInvocation(macroRef: String) { + startSexp { + output.appendAscii("(.") + output.appendAscii(macroRef) + } + isPendingSeparator = true + } + + override fun stepInTdlSystemMacroInvocation(systemSymbol: SystemSymbols_1_1) { + startSexp { + output.appendAscii("(.\$ion::") + output.appendAscii(systemSymbol.text) + } + isPendingSeparator = true + } + + override fun writeTdlVariableExpansion(variableName: String) { + writeScalar { + output.appendAscii("(%") + output.appendAscii(variableName) + output.appendAscii(")") + } + } +} diff --git a/src/main/java/com/amazon/ion/impl/IonRawWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/IonRawWriter_1_1.kt new file mode 100644 index 0000000000..f5e4f02a91 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/IonRawWriter_1_1.kt @@ -0,0 +1,227 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl + +import com.amazon.ion.IonType +import com.amazon.ion.Timestamp +import com.amazon.ion.impl.macro.* +import java.math.BigDecimal +import java.math.BigInteger + +/** + * Writes Ion 1.1 data to an output source. + * + * This interface allows the user to write Ion data without being concerned about which output format is being used. + */ +interface IonRawWriter_1_1 { + + /** + * Indicates that writing is completed and all buffered data should be written and flushed as if this were the end + * of the Ion data stream. For example, an Ion binary writer will finalize any local symbol table, write all + * top-level values, and then flush. + * + * This method may only be called when all top-level values are completely written and (`stepped out`)[stepOut]. + * + * Implementations should allow the application to continue writing further top-level values following the semantics + * for concatenating Ion data streams. + */ + fun flush() + + /** + * Closes this stream and releases any system resources associated with it. + * If the stream is already closed then invoking this method has no effect. + * + * If the cursor is between top-level values, this method will [flush] before closing the underlying output stream. + * If not, the resulting data may be incomplete and invalid Ion. + */ + fun close() + + /** Returns true if the writer is currently in a struct (indicating that field names are required). */ + fun isInStruct(): Boolean + + /** Returns the current depth of containers the writer is at. This is 0 if the writer is at top-level. */ + fun depth(): Int + + /** + * Writes the Ion 1.1 IVM. IVMs can only be written at the top level of an Ion stream. + * @throws com.amazon.ion.IonException if in any container. + */ + fun writeIVM() + + /** + * *Attempts* to reset the current annotations. This is not guaranteed to succeed, and will + * throw an [IllegalStateException] if annotations have eagerly been written to the output + * buffer. + * + * TODO: Decide if this is something that should be public. It seems advantageous for the 1.1 + * writer if we reserve the ability for it to eagerly write annotations, and if we want + * to keep this behavior, then it's best _not_ to expose this method. + */ + fun _private_clearAnnotations() + + /** + * Returns true if the reader has at least one annotation set and the first annotation matches the + * given sid OR text. + */ + fun _private_hasFirstAnnotation(sid: Int, text: String?): Boolean + + /** + * Writes one annotation for the next value. + * [writeAnnotations] may be called more than once to build up a list of annotations. + */ + fun writeAnnotations(annotation0: SystemSymbols_1_1) + + /** + * Writes one annotation for the next value. + * [writeAnnotations] may be called more than once to build up a list of annotations. + */ + fun writeAnnotations(annotation0: Int) + + /** + * Writes two annotations for the next value. + * [writeAnnotations] may be called more than once to build up a list of annotations. + */ + fun writeAnnotations(annotation0: Int, annotation1: Int) + + /** + * Writes any number of annotations for the next value. + * [writeAnnotations] may be called more than once to build up a list of annotations. + */ + fun writeAnnotations(annotations: IntArray) + + /** + * Writes one annotation for the next value. + * [writeAnnotations] may be called more than once to build up a list of annotations. + */ + fun writeAnnotations(annotation0: CharSequence) + + /** + * Writes two annotations for the next value. + * [writeAnnotations] may be called more than once to build up a list of annotations. + */ + fun writeAnnotations(annotation0: CharSequence, annotation1: CharSequence) + + /** + * Writes any number of annotations for the next value. + * [writeAnnotations] may be called more than once to build up a list of annotations. + */ + fun writeAnnotations(annotations: Array) + + /** + * TODO: Consider making this a public method. It's probably safe to do so. + */ + fun _private_hasFieldName(): Boolean + + /** + * Writes the field name for the next value. Must be called while in a struct and must be called before [writeAnnotations]. + * @throws com.amazon.ion.IonException if annotations are already written for the value or if not in a struct. + */ + fun writeFieldName(symbol: SystemSymbols_1_1) + + /** + * Writes the field name for the next value. Must be called while in a struct and must be called before [writeAnnotations]. + * @throws com.amazon.ion.IonException if annotations are already written for the value or if not in a struct. + */ + fun writeFieldName(text: CharSequence) + + /** + * Writes the field name for the next value. Must be called while in a struct and must be called before [writeAnnotations]. + * @throws com.amazon.ion.IonException if annotations are already written for the value or if not in a struct. + */ + fun writeFieldName(sid: Int) + + /** + * Steps into a List. + * + * The [usingLengthPrefix] parameter is a suggestion. Implementations may ignore it if it is not relevant for that + * particular implementation. All implementations must document their specific behavior for this method. + */ + fun stepInList(usingLengthPrefix: Boolean) + + /** + * Steps into a SExp. + * + * The [usingLengthPrefix] parameter is a suggestion. Implementations may ignore it if it is not relevant for that + * particular implementation. All implementations must document their specific behavior for this method. + */ + fun stepInSExp(usingLengthPrefix: Boolean) + + /** + * Steps into a Struct. + * + * The [usingLengthPrefix] parameter is a suggestion. Implementations may ignore it if it is not relevant for that + * particular implementation. All implementations must document their specific behavior for this method. + */ + fun stepInStruct(usingLengthPrefix: Boolean) + + /** + * Steps into an expression group. + * An expression group is not a container in the Ion data model, but it is a container from an encoding perspective. + */ + fun stepInExpressionGroup(usingLengthPrefix: Boolean) + + /** + * Writes a macro invocation for the given macro name. + * A macro is not a container in the Ion data model, but it is a container from an encoding perspective. + */ + fun stepInEExp(name: CharSequence) + + /** + * Writes a macro invocation for the given id corresponding to a macro in the macro table. + * A macro is not a container in the Ion data model, but it is a container from an encoding perspective. + */ + fun stepInEExp(id: Int, usingLengthPrefix: Boolean, macro: Macro) + + /** + * Writes a system macro invocation for the given system macro. + * A macro is not a container in the Ion data model, but it is a container from an encoding perspective. + * + * TODO: Consider adding `usingLengthPrefix: Boolean`. + */ + fun stepInEExp(systemMacro: SystemMacro) + + /** + * Steps out of the current container. + */ + fun stepOut() + + // TODO: Doc comments for the uninteresting functions + + fun writeNull() + fun writeNull(type: IonType) + + fun writeBool(value: Boolean) + + fun writeInt(value: Long) + fun writeInt(value: BigInteger) + + fun writeFloat(value: Float) + fun writeFloat(value: Double) + + fun writeDecimal(value: BigDecimal) + + /** + * TODO: Consider adding a function for writing a timestamp that doesn't require creating a [Timestamp] instance, so + * that users don't have to allocate an intermediate between their data type and the Ion writer. E.g.: + * ``` + * fun writeTimestamp(precision: Timestamp.Precision, + * year: Int, month: Int?, day: Int?, + * hour: Int?, minute: Int?, second: Int?, + * fractionalSeconds: BigDecimal?, + * offsetMinutes: Int?) + * ``` + */ + fun writeTimestamp(value: Timestamp) + + fun writeSymbol(id: Int) + fun writeSymbol(text: CharSequence) + fun writeSymbol(symbol: SystemSymbols_1_1) + + fun writeString(value: CharSequence) + + fun writeBlob(value: ByteArray) = writeBlob(value, 0, value.size) + fun writeBlob(value: ByteArray, start: Int, length: Int) + + fun writeClob(value: ByteArray) = writeClob(value, 0, value.size) + fun writeClob(value: ByteArray, start: Int, length: Int) +} diff --git a/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplication.java b/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplication.java index 81b80f41bc..fae3b059ee 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplication.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplication.java @@ -1,9 +1,7 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 - package com.amazon.ion.impl; -import com.amazon.ion.IonType; import com.amazon.ion.SymbolTable; import com.amazon.ion.SymbolToken; import com.amazon.ion.UnknownSymbolException; @@ -69,29 +67,4 @@ interface IonReaderContinuableApplication extends IonReaderContinuableCore { * */ SymbolToken[] getTypeAnnotationSymbols(); - - /** - * Gets the current value's field name as a symbol token (text + ID). - * If the text of the token isn't known, the result's - * {@link SymbolToken#getText()} will be null. - * If the symbol ID of the token isn't known, the result's - * {@link SymbolToken#getSid()} will be - * {@link SymbolTable#UNKNOWN_SYMBOL_ID}. - * At least one of the two fields will be defined. - * - * @return null if there is no current value or if the current value is - * not a field of a struct. - * - */ - SymbolToken getFieldNameSymbol(); - - /** - * Returns the current value as a symbol token (text + ID). - * This is only valid when {@link #getType()} returns - * {@link IonType#SYMBOL}. - * - * @return null if {@link #isNullValue()} - * - */ - SymbolToken symbolValue(); } diff --git a/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java b/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java index 213c300415..1a0d6f62ce 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.InputStream; +import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; @@ -43,29 +44,23 @@ class IonReaderContinuableApplicationBinary extends IonReaderContinuableCoreBinary implements IonReaderContinuableApplication { // The UTF-8 encoded bytes representing the text `$ion_symbol_table`. - private static final byte[] ION_SYMBOL_TABLE_UTF8; - - static { - ION_SYMBOL_TABLE_UTF8 = SystemSymbols.ION_SYMBOL_TABLE.getBytes(StandardCharsets.UTF_8); - } + private static final byte[] ION_SYMBOL_TABLE_UTF8 = SystemSymbols.ION_SYMBOL_TABLE.getBytes(StandardCharsets.UTF_8); + private static final byte[] IMPORTS_UTF8 = SystemSymbols.IMPORTS.getBytes(StandardCharsets.UTF_8); + private static final byte[] SYMBOLS_UTF8 = SystemSymbols.SYMBOLS.getBytes(StandardCharsets.UTF_8); + private static final byte[] NAME_UTF8 = SystemSymbols.NAME.getBytes(StandardCharsets.UTF_8); + private static final byte[] VERSION_UTF8 = SystemSymbols.VERSION.getBytes(StandardCharsets.UTF_8); + private static final byte[] MAX_ID_UTF8 = SystemSymbols.MAX_ID.getBytes(StandardCharsets.UTF_8); // An IonCatalog containing zero shared symbol tables. private static final IonCatalog EMPTY_CATALOG = new SimpleCatalog(); // Initial capacity of the ArrayList used to hold the text in the current symbol table. - private static final int SYMBOLS_LIST_INITIAL_CAPACITY = 128; + static final int SYMBOLS_LIST_INITIAL_CAPACITY = 128; // The imports for Ion 1.0 data with no shared user imports. private static final LocalSymbolTableImports ION_1_0_IMPORTS = new LocalSymbolTableImports(SharedSymbolTable.getSystemSymbolTable(1)); - // The text representations of the symbol table that is currently in scope, indexed by symbol ID. If the element at - // a particular index is null, that symbol has unknown text. - private String[] symbols; - - // The maximum offset into the 'symbols' array that points to a valid local symbol. - private int localSymbolMaxOffset = -1; - // The catalog used by the reader to resolve shared symbol table imports. private final IonCatalog catalog; @@ -75,7 +70,7 @@ class IonReaderContinuableApplicationBinary extends IonReaderContinuableCoreBina // The shared symbol tables imported by the local symbol table that is currently in scope. private LocalSymbolTableImports imports = ION_1_0_IMPORTS; - // The first lowest local symbol ID in the symbol table. + // The first (lowest) local symbol ID in the symbol table. private int firstLocalSymbolId = imports.getMaxId() + 1; // The cached SymbolTable representation of the current local symbol table. Invalidated whenever a local @@ -83,7 +78,7 @@ class IonReaderContinuableApplicationBinary extends IonReaderContinuableCoreBina private SymbolTable cachedReadOnlySymbolTable = null; // The reusable annotation iterator. - private final AnnotationSequenceIterator annotationIterator = new AnnotationSequenceIterator(); + private final AnnotationMarkerIterator annotationTextIterator = new AnnotationMarkerIterator(); // ------ @@ -97,15 +92,9 @@ class IonReaderContinuableApplicationBinary extends IonReaderContinuableCoreBina IonReaderContinuableApplicationBinary(IonReaderBuilder builder, byte[] bytes, int offset, int length) { super(builder.getBufferConfiguration(), bytes, offset, length); this.catalog = builder.getCatalog() == null ? EMPTY_CATALOG : builder.getCatalog(); - symbols = new String[SYMBOLS_LIST_INITIAL_CAPACITY]; symbolTableReader = new SymbolTableReader(); - resetImports(); - registerIvmNotificationConsumer((x, y) -> { - // Note: for Ion 1.1 support, use the versions to set the proper system symbol table and local symbol table - // processing logic. - resetSymbolTable(); - resetImports(); - }); + resetImports(getIonMajorVersion(), getIonMinorVersion()); + registerIvmNotificationConsumer((x, y) -> resetEncodingContext()); } /** @@ -118,15 +107,9 @@ class IonReaderContinuableApplicationBinary extends IonReaderContinuableCoreBina IonReaderContinuableApplicationBinary(final IonReaderBuilder builder, final InputStream inputStream, byte[] alreadyRead, int alreadyReadOff, int alreadyReadLen) { super(builder.getBufferConfiguration(), inputStream, alreadyRead, alreadyReadOff, alreadyReadLen); this.catalog = builder.getCatalog() == null ? EMPTY_CATALOG : builder.getCatalog(); - symbols = new String[SYMBOLS_LIST_INITIAL_CAPACITY]; symbolTableReader = new SymbolTableReader(); - resetImports(); - registerIvmNotificationConsumer((x, y) -> { - // Note: for Ion 1.1 support, use the versions to set the proper system symbol table and local symbol table - // processing logic. - resetSymbolTable(); - resetImports(); - }); + resetImports(getIonMajorVersion(), getIonMinorVersion()); + registerIvmNotificationConsumer((x, y) -> resetEncodingContext()); registerOversizedValueHandler( () -> { boolean mightBeSymbolTable = true; @@ -161,38 +144,92 @@ class IonReaderContinuableApplicationBinary extends IonReaderContinuableCoreBina /** * Reusable iterator over the annotations on the current value. */ - private class AnnotationSequenceIterator implements Iterator { + private class AnnotationMarkerIterator implements Iterator { - // All of the annotation SIDs on the current value. - private IntList annotationSids; - // The index into `annotationSids` containing the next annotation to be returned. - private int index = 0; + // TODO perf: try splitting into separate iterators for SIDs and FlexSyms + boolean isSids; + // The byte position of the annotation to return from the next call to next(). + long nextAnnotationPeekIndex; - void reset() { - index = 0; - annotationSids = getAnnotationSidList(); - } + long target; @Override public boolean hasNext() { - return index < annotationSids.size(); + return nextAnnotationPeekIndex < target; } @Override public String next() { - int sid = annotationSids.get(index); - String annotation = getSymbol(sid); - if (annotation == null) { - throw new UnknownSymbolException(sid); + if (isSids) { + long savedPeekIndex = peekIndex; + peekIndex = nextAnnotationPeekIndex; + int sid; + if (minorVersion == 0) { + byte b = buffer[(int) peekIndex++]; + if (b < 0) { + sid = b & 0x7F; + } else { + sid = readVarUInt_1_0(b); + } + } else { + sid = (int) readFlexInt_1_1(); + } + nextAnnotationPeekIndex = peekIndex; + peekIndex = savedPeekIndex; + return convertToString(sid); } - index++; - return annotation; + Marker marker = annotationTokenMarkers.get((int) nextAnnotationPeekIndex++); + if (marker.startIndex < 0) { + if (marker.typeId == IonTypeID.SYSTEM_SYMBOL_VALUE) { + return getSystemSymbolToken(marker).assumeText(); + } else { + // This means the endIndex represents the token's symbol ID. + return convertToString((int) marker.endIndex); + } + } + // The token is inline UTF-8 text. + java.nio.ByteBuffer utf8InputBuffer = prepareByteBuffer(marker.startIndex, marker.endIndex); + return utf8Decoder.decode(utf8InputBuffer, (int) (marker.endIndex - marker.startIndex)); + } + + SymbolToken nextSymbolToken() { + if (isSids) { + long savedPeekIndex = peekIndex; + peekIndex = nextAnnotationPeekIndex; + int sid = minorVersion == 0 ? readVarUInt_1_0() : (int) readFlexInt_1_1(); + nextAnnotationPeekIndex = peekIndex; + peekIndex = savedPeekIndex; + return getSymbolToken(sid); + } + Marker marker = annotationTokenMarkers.get((int) nextAnnotationPeekIndex++); + if (marker.typeId == IonTypeID.SYSTEM_SYMBOL_VALUE) { + if (marker.startIndex < 0) { + return getSystemSymbolToken(marker); + } else { + throw new IllegalStateException("This should be unreachable."); + } + } + if (marker.startIndex < 0) { + // This means the endIndex represents the token's symbol ID. + return getSymbolToken((int) marker.endIndex); + } + // The token is inline UTF-8 text. + ByteBuffer utf8InputBuffer = prepareByteBuffer(marker.startIndex, marker.endIndex); + return new SymbolTokenImpl(utf8Decoder.decode(utf8InputBuffer, (int) (marker.endIndex - marker.startIndex)), -1); } @Override public void remove() { throw new UnsupportedOperationException("This iterator does not support element removal."); } + + private String convertToString(int symbolId) { + String annotation = getSymbol(symbolId); + if (annotation == null) { + throw new UnknownSymbolException(symbolId); + } + return annotation; + } } /** @@ -433,24 +470,20 @@ public SymbolTable[] getImportedTablesNoCopy() { } } - /** - * Reset the local symbol table to the system symbol table. - */ - private void resetSymbolTable() { - // The following line is not required for correctness, but it frees the references to the old symbols, - // potentially allowing them to be garbage collected. - Arrays.fill(symbols, 0, localSymbolMaxOffset + 1, null); - localSymbolMaxOffset = -1; + @Override + protected void resetSymbolTable() { + super.resetSymbolTable(); cachedReadOnlySymbolTable = null; } - /** - * Reset the list of imported shared symbol tables. - */ - private void resetImports() { - // Note: when support for the next version of Ion is added, conditionals on 'majorVersion' and 'minorVersion' - // must be added here. - imports = ION_1_0_IMPORTS; + + @Override + protected void resetImports(int major, int minor) { + if (minor == 0) { + imports = ION_1_0_IMPORTS; + } else { + imports = LocalSymbolTableImports.EMPTY; + } firstLocalSymbolId = imports.getMaxId() + 1; } @@ -478,7 +511,8 @@ protected void restoreSymbolTable(SymbolTable symbolTable) { // Note: this will only happen when `symbolTable` is the system symbol table. resetSymbolTable(); cachedReadOnlySymbolTable = symbolTable; - resetImports(); + // FIXME: This should take into account the version at the point in the stream. + resetImports(1, 0); localSymbolMaxOffset = -1; } } @@ -537,12 +571,8 @@ private String getSymbolString(int sid, LocalSymbolTableImports importedSymbols, return localSymbols[sid - (importedSymbols.getMaxId() + 1)]; } - /** - * Retrieves the String text for the given symbol ID. - * @param sid a symbol ID. - * @return a String. - */ - String getSymbol(int sid) { + @Override + public String getSymbol(int sid) { if (sid < firstLocalSymbolId) { return imports.findKnownSymbol(sid); } @@ -553,12 +583,8 @@ String getSymbol(int sid) { return symbols[localSymbolOffset]; } - /** - * Creates a SymbolToken representation of the given symbol ID. - * @param sid a symbol ID. - * @return a SymbolToken. - */ - private SymbolToken getSymbolToken(int sid) { + @Override + protected SymbolToken getSymbolToken(int sid) { int symbolTableSize = localSymbolMaxOffset + firstLocalSymbolId + 1; // +1 because the max ID is 0-indexed. if (sid >= symbolTableSize) { throw new UnknownSymbolException(sid); @@ -571,13 +597,6 @@ private SymbolToken getSymbolToken(int sid) { return new SymbolTokenImpl(text, sid); } - private void growSymbolsArray(int shortfall) { - int newSize = nextPowerOfTwo(symbols.length + shortfall); - String[] resized = new String[newSize]; - System.arraycopy(symbols, 0, resized, 0, localSymbolMaxOffset + 1); - symbols = resized; - } - /** * Uses the underlying raw reader to read the symbol tables from the stream. Capable of resuming if not enough * data is currently available to complete the symbol table. @@ -611,25 +630,46 @@ private void finishReadingSymbolTableStruct() { stepOutOfContainer(); if (!hasSeenImports) { resetSymbolTable(); - resetImports(); - } - if (newSymbols != null) { - int numberOfNewSymbols = newSymbols.size(); - int numberOfAvailableSlots = symbols.length - (localSymbolMaxOffset + 1); - int shortfall = numberOfNewSymbols - numberOfAvailableSlots; - if (shortfall > 0) { - growSymbolsArray(shortfall); - } - int i = localSymbolMaxOffset; - for (String newSymbol : newSymbols) { - symbols[++i] = newSymbol; - } - localSymbolMaxOffset += newSymbols.size(); + resetImports(getIonMajorVersion(), getIonMinorVersion()); } + installSymbols(newSymbols); state = State.READING_VALUE; } + /** + * Gets the symbol ID for the Marker representing a symbol token. + * @param marker the symbol token marker. + * @return a symbol ID, or -1 if unknown or not a system symbol. + */ + private int mapInlineTextToSystemSid(Marker marker) { + if (marker.startIndex < 0) { + // Symbol ID is already populated. + return (int) marker.endIndex; + } + if (bytesMatch(SYMBOLS_UTF8, buffer, (int) marker.startIndex, (int) marker.endIndex)) { + return SYMBOLS_SID; + } + if (bytesMatch(IMPORTS_UTF8, buffer, (int) marker.startIndex, (int) marker.endIndex)) { + return IMPORTS_SID; + } + if (bytesMatch(NAME_UTF8, buffer, (int) marker.startIndex, (int) marker.endIndex)) { + return NAME_SID; + } + if (bytesMatch(VERSION_UTF8, buffer, (int) marker.startIndex, (int) marker.endIndex)) { + return VERSION_SID; + } + if (bytesMatch(MAX_ID_UTF8, buffer, (int) marker.startIndex, (int) marker.endIndex)) { + return MAX_ID_SID; + } + // Not a system symbol. + return -1; + } + private void readSymbolTableStructField() { + if (minorVersion > 0) { + readSymbolTableStructField_1_1(); + return; + } if (fieldSid == SYMBOLS_SID) { state = State.ON_SYMBOL_TABLE_SYMBOLS; if (hasSeenSymbols) { @@ -645,17 +685,42 @@ private void readSymbolTableStructField() { } } + private void readSymbolTableStructField_1_1() { + if (matchesSystemSymbol_1_1(fieldTextMarker, SystemSymbols_1_1.SYMBOLS)) { + state = State.ON_SYMBOL_TABLE_SYMBOLS; + if (hasSeenSymbols) { + throw new IonException("Symbol table contained multiple symbols fields."); + } + hasSeenSymbols = true; + } else if (matchesSystemSymbol_1_1(fieldTextMarker, SystemSymbols_1_1.IMPORTS)) { + state = State.ON_SYMBOL_TABLE_IMPORTS; + if (hasSeenImports) { + throw new IonException("Symbol table contained multiple imports fields."); + } + hasSeenImports = true; + } + } + private void startReadingImportsList() { - resetImports(); + resetImports(getIonMajorVersion(), getIonMinorVersion()); resetSymbolTable(); newImports = new ArrayList<>(3); - newImports.add(getSystemSymbolTable()); + if (minorVersion == 0) { + newImports.add(getSystemSymbolTable()); + } state = State.READING_SYMBOL_TABLE_IMPORTS_LIST; } private void preparePossibleAppend() { - if (symbolValueId() != ION_SYMBOL_TABLE_SID) { - resetSymbolTable(); + if (minorVersion > 0) { + prepareScalar(); + if (!matchesSystemSymbol_1_1(valueMarker, SystemSymbols_1_1.ION_SYMBOL_TABLE)) { + resetSymbolTable(); + } + } else { + if (symbolValueId() != ION_SYMBOL_TABLE_SID) { + resetSymbolTable(); + } } state = State.ON_SYMBOL_TABLE_FIELD; } @@ -712,6 +777,9 @@ private void finishReadingImportStruct() { private void startReadingImportStructField() { int fieldId = getFieldId(); + if (minorVersion > 0 && fieldId < 0) { + fieldId = mapInlineTextToSystemSid(fieldTextMarker); + } if (fieldId == NAME_SID) { state = State.READING_SYMBOL_TABLE_IMPORT_NAME; } else if (fieldId == VERSION_SID) { @@ -847,7 +915,7 @@ void readSymbolTable() { } readImportMaxId(); break; - default: throw new IllegalStateException(); + default: throw new IllegalStateException(state.toString()); } } } @@ -875,27 +943,6 @@ private enum State { // The current state. private State state = State.READING_VALUE; - /** - * @return true if current value has a sequence of annotations that begins with `$ion_symbol_table`; otherwise, - * false. - */ - boolean startsWithIonSymbolTable() { - long savedPeekIndex = peekIndex; - peekIndex = annotationSequenceMarker.startIndex; - int sid = minorVersion == 0 ? readVarUInt_1_0() : readVarUInt_1_1(); - peekIndex = savedPeekIndex; - return ION_SYMBOL_TABLE_SID == sid; - } - - /** - * @return true if the reader is positioned on a symbol table; otherwise, false. - */ - private boolean isPositionedOnSymbolTable() { - return hasAnnotations && - super.getType() == IonType.STRUCT && - startsWithIonSymbolTable(); - } - @Override public Event nextValue() { Event event; @@ -935,64 +982,66 @@ public SymbolTable getSymbolTable() { return cachedReadOnlySymbolTable; } - @Override - public String stringValue() { - String value; - IonType type = super.getType(); - if (type == IonType.STRING) { - value = super.stringValue(); - } else if (type == IonType.SYMBOL) { - int sid = symbolValueId(); - if (sid < 0) { - // The raw reader uses this to denote null.symbol. - return null; - } - value = getSymbol(sid); - if (value == null) { - throw new UnknownSymbolException(sid); - } - } else { - throw new IllegalStateException("Invalid type requested."); - } - return value; - } - - @Override - public SymbolToken symbolValue() { - int sid = symbolValueId(); - if (sid < 0) { - // The raw reader uses this to denote null.symbol. - return null; - } - return getSymbolToken(sid); - } - @Override public String[] getTypeAnnotations() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.getTypeAnnotations(); + } if (!hasAnnotations) { return _Private_Utils.EMPTY_STRING_ARRAY; } - IntList annotationSids = getAnnotationSidList(); - String[] annotationArray = new String[annotationSids.size()]; - for (int i = 0; i < annotationArray.length; i++) { - String symbol = getSymbol(annotationSids.get(i)); - if (symbol == null) { - throw new UnknownSymbolException(annotationSids.get(i)); + if (annotationSequenceMarker.startIndex >= 0) { + if (annotationSequenceMarker.typeId != null && annotationSequenceMarker.typeId.isInlineable) { + getAnnotationMarkerList(); + } else { + IntList annotationSids = getAnnotationSidList(); + String[] annotationArray = new String[annotationSids.size()]; + for (int i = 0; i < annotationArray.length; i++) { + String symbol = getSymbol(annotationSids.get(i)); + if (symbol == null) { + throw new UnknownSymbolException(annotationSids.get(i)); + } + annotationArray[i] = symbol; + } + return annotationArray; } - annotationArray[i] = symbol; + } + String[] annotationArray = new String[annotationTokenMarkers.size()]; + annotationTextIterator.nextAnnotationPeekIndex = 0; + annotationTextIterator.target = annotationTokenMarkers.size(); + annotationTextIterator.isSids = false; + while (annotationTextIterator.hasNext()) { + annotationArray[(int) annotationTextIterator.nextAnnotationPeekIndex] = annotationTextIterator.next(); } return annotationArray; } @Override public SymbolToken[] getTypeAnnotationSymbols() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.getTypeAnnotationSymbols(); + } if (!hasAnnotations) { return SymbolToken.EMPTY_ARRAY; } - IntList annotationSids = getAnnotationSidList(); - SymbolToken[] annotationArray = new SymbolToken[annotationSids.size()]; - for (int i = 0; i < annotationArray.length; i++) { - annotationArray[i] = getSymbolToken(annotationSids.get(i)); + if (annotationSequenceMarker.startIndex >= 0) { + if (annotationSequenceMarker.typeId != null && annotationSequenceMarker.typeId.isInlineable) { + getAnnotationMarkerList(); + } else { + IntList annotationSids = getAnnotationSidList(); + SymbolToken[] annotationArray = new SymbolToken[annotationSids.size()]; + for (int i = 0; i < annotationArray.length; i++) { + annotationArray[i] = getSymbolToken(annotationSids.get(i)); + } + return annotationArray; + } + } + SymbolToken[] annotationArray = new SymbolToken[annotationTokenMarkers.size()]; + annotationTextIterator.nextAnnotationPeekIndex = 0; + annotationTextIterator.target = annotationTokenMarkers.size(); + annotationTextIterator.isSids = false; + while (annotationTextIterator.hasNext()) { + annotationArray[(int) annotationTextIterator.nextAnnotationPeekIndex] = annotationTextIterator.nextSymbolToken(); } return annotationArray; } @@ -1017,15 +1066,34 @@ public void remove() { @Override public Iterator iterateTypeAnnotations() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.iterateTypeAnnotations(); + } if (!hasAnnotations) { return EMPTY_ITERATOR; } - annotationIterator.reset(); - return annotationIterator; + if (annotationSequenceMarker.startIndex >= 0) { + if (annotationSequenceMarker.typeId != null && annotationSequenceMarker.typeId.isInlineable) { + // Note: this could be made more efficient by parsing from the marker sequence iteratively. + getAnnotationMarkerList(); + } else { + annotationTextIterator.nextAnnotationPeekIndex = annotationSequenceMarker.startIndex; + annotationTextIterator.target = annotationSequenceMarker.endIndex; + annotationTextIterator.isSids = true; + return annotationTextIterator; + } + } + annotationTextIterator.nextAnnotationPeekIndex = 0; + annotationTextIterator.target = annotationTokenMarkers.size(); + annotationTextIterator.isSids = false; + return annotationTextIterator; } @Override public String getFieldName() { + if (fieldTextMarker.startIndex > -1 || isEvaluatingEExpression || fieldTextMarker.typeId == IonTypeID.SYSTEM_SYMBOL_VALUE) { + return getFieldText(); + } if (fieldSid < 0) { return null; } @@ -1036,12 +1104,4 @@ public String getFieldName() { return fieldName; } - @Override - public SymbolToken getFieldNameSymbol() { - if (fieldSid < 0) { - return null; - } - return getSymbolToken(fieldSid); - } - } diff --git a/src/main/java/com/amazon/ion/impl/IonReaderContinuableCore.java b/src/main/java/com/amazon/ion/impl/IonReaderContinuableCore.java index c6866e449f..97b71357dd 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderContinuableCore.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderContinuableCore.java @@ -1,6 +1,5 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 - package com.amazon.ion.impl; import com.amazon.ion.Decimal; @@ -9,18 +8,24 @@ import com.amazon.ion.IonInt; import com.amazon.ion.IonType; import com.amazon.ion.IvmNotificationConsumer; +import com.amazon.ion.SymbolTable; +import com.amazon.ion.SymbolToken; import com.amazon.ion.Timestamp; import com.amazon.ion.UnknownSymbolException; import java.math.BigDecimal; import java.math.BigInteger; import java.util.Date; +import java.util.function.Consumer; /** * IonCursor with the core IonReader interface methods. Useful for adapting an IonCursor implementation into a * system-level IonReader. */ -interface IonReaderContinuableCore extends IonCursor { +// TODO this is currently public because it is used by MacroCompiler, which exists in a different Java package. +// consider ways of not exposing this interface, either by moving MacroCompiler into com.amazon.ion.impl, or using +// the _Private_ naming convention for this interface. +public interface IonReaderContinuableCore extends IonCursor { /** * Returns the depth into the Ion value that this reader has traversed. @@ -34,6 +39,12 @@ interface IonReaderContinuableCore extends IonCursor { */ IonType getType(); + /** + * Returns the type of the current value in the raw encoding, or + * null if there is no current value. + */ + IonType getEncodingType(); + /** * Returns an {@link IntegerSize} representing the smallest-possible * Java type of the Ion {@code int} at the current value. @@ -71,24 +82,58 @@ interface IonReaderContinuableCore extends IonCursor { * @return the symbol ID of the field name, if the current value is a * field within a struct. * If the current value is not a field, or if the symbol ID cannot be - * determined, this method returns a value less than one. + * determined, this method returns a value less than zero. + * If this method returns less than zero and the reader is positioned + * on a value with a field name, then the text for the field name can be + * retrieved using {@link #getFieldText()}. * */ @Deprecated int getFieldId(); /** - * Gets the symbol IDs of the annotations attached to the current value. + * @return true if the value on which the reader is currently positioned has field + * name text available for reading via {@link #getFieldText()}. If this + * method returns false but the reader is positioned on a value with a field name, + * then the field name symbol ID can be retrieved using {@link #getFieldId()}. + */ + boolean hasFieldText(); + + /** + * Reads the text for the current field name. It is the caller's responsibility to + * ensure {@link #hasFieldText()} returns true before calling this method. + * @return the field name text. + */ + String getFieldText(); + + /** + * Gets the current value's field name as a symbol token (text + ID). + * If the text of the token isn't known, the result's + * {@link SymbolToken#getText()} will be null. + * If the symbol ID of the token isn't known, the result's + * {@link SymbolToken#getSid()} will be + * {@link SymbolTable#UNKNOWN_SYMBOL_ID}. + * At least one of the two fields will be defined. + * + * @return null if there is no current value or if the current value is + * not a field of a struct. + * + */ + SymbolToken getFieldNameSymbol(); + + /** + * Consumes SymbolTokens representing the annotations attached to the current value. + * Each SymbolToken provided will contain *either* a symbol ID, *or* its symbol + * text, depending on how it was encoded. *

* This is an "expert method": correct use requires deep understanding * of the Ion binary format. You almost certainly don't want to use it. - * - * @return the symbol IDs of the annotations on the current value. - * If the current value has no annotations, this method returns an empty array. - * + *

+ * It is the caller's responsibility to ensure {@link #hasAnnotations()} returns + * true before calling this method. */ @Deprecated - int[] getAnnotationIds(); + void consumeAnnotationTokens(Consumer consumer); /** * Returns the current value as an boolean. @@ -180,11 +225,6 @@ interface IonReaderContinuableCore extends IonCursor { */ String stringValue(); - /** - * Reads the symbol ID of the symbol value that begins at `valueMarker.startIndex` and ends at - * `valueMarker.endIndex`. - * @return -1 if the value is null - */ /** * Gets the symbol ID of the current symbol value. *

@@ -192,11 +232,38 @@ interface IonReaderContinuableCore extends IonCursor { * of the Ion binary format. You almost certainly don't want to use it. * * @return the symbol ID of the value. - * If the symbol ID cannot be determined, this method returns a value less than one. + * If the symbol ID cannot be determined, this method returns a value less than zero. + * If this is the case and the reader is positioned on a symbol value, then the text for the + * symbol can be retrieved using {@link #hasSymbolText()}. */ @Deprecated int symbolValueId(); + /** + * @return true if the value on which the reader is currently positioned is a + * symbol with text available for reading via {@link #getSymbolText()}. If this + * method returns false but the reader is positioned on a symbol value, then + * the value's symbol ID can be retrieved using {@link #symbolValueId()}. + */ + boolean hasSymbolText(); + + /** + * Reads the text for the current symbol value. It is the caller's responsibility to + * ensure {@link #hasSymbolText()} returns true before calling this method. + * @return the symbol value text. + */ + String getSymbolText(); + + /** + * Returns the current value as a symbol token (text + ID). + * This is only valid when {@link #getType()} returns + * {@link IonType#SYMBOL}. + * + * @return null if {@link #isNullValue()} + * + */ + SymbolToken symbolValue(); + /** * Gets the size in bytes of the current lob value. * This is only valid when {@link #getType()} returns {@link IonType#BLOB} @@ -253,4 +320,17 @@ interface IonReaderContinuableCore extends IonCursor { */ boolean hasAnnotations(); + /** + * Resets the reader's encoding context back to the one applicable immediately + * after an Ion version marker. + */ + void resetEncodingContext(); + + /** + * Retrieves the String text for the given symbol ID, if the text is available. + * @param sid a symbol ID. + * @return a String or null. + */ + String getSymbol(int sid); + } diff --git a/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java b/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java index f0ce3e41be..90ae2fb7ac 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java @@ -5,23 +5,62 @@ import com.amazon.ion.Decimal; import com.amazon.ion.IntegerSize; import com.amazon.ion.IonBufferConfiguration; +import com.amazon.ion.IonCursor; import com.amazon.ion.IonException; +import com.amazon.ion.IonReader; import com.amazon.ion.IonType; +import com.amazon.ion.MacroAwareIonReader; +import com.amazon.ion.MacroAwareIonWriter; +import com.amazon.ion.SymbolTable; +import com.amazon.ion.SymbolToken; import com.amazon.ion.Timestamp; +import com.amazon.ion.UnknownSymbolException; +import com.amazon.ion._private.SuppressFBWarnings; import com.amazon.ion.impl.bin.IntList; +import com.amazon.ion.impl.bin.OpCodes; +import com.amazon.ion.impl.bin.PresenceBitmap; import com.amazon.ion.impl.bin.utf8.Utf8StringDecoder; import com.amazon.ion.impl.bin.utf8.Utf8StringDecoderPool; - +import com.amazon.ion.impl.macro.EncodingContext; +import com.amazon.ion.impl.macro.Expression; +import com.amazon.ion.impl.macro.EExpressionArgsReader; +import com.amazon.ion.impl.macro.IonReaderFromReaderAdapter; +import com.amazon.ion.impl.macro.Macro; +import com.amazon.ion.impl.macro.MacroCompiler; +import com.amazon.ion.impl.macro.MacroTable; +import com.amazon.ion.impl.macro.MutableMacroTable; +import com.amazon.ion.impl.macro.ReaderAdapter; +import com.amazon.ion.impl.macro.ReaderAdapterContinuable; +import com.amazon.ion.impl.macro.MacroEvaluator; +import com.amazon.ion.impl.macro.MacroEvaluatorAsIonReader; +import com.amazon.ion.impl.macro.MacroRef; +import com.amazon.ion.impl.macro.SystemMacro; + +import java.io.IOException; import java.io.InputStream; import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.Date; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; + +import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE_SID; +import static com.amazon.ion.SystemSymbols.DEFAULT_MODULE; +import static com.amazon.ion.impl.IonReaderContinuableApplicationBinary.SYMBOLS_LIST_INITIAL_CAPACITY; +import static com.amazon.ion.impl.IonTypeID.SYSTEM_SYMBOL_VALUE; +import static com.amazon.ion.impl.bin.Ion_1_1_Constants.*; /** * An IonCursor capable of raw parsing of binary Ion streams. */ -class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReaderContinuableCore { +class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReaderContinuableCore, MacroAwareIonReader { // Isolates the highest bit in a byte. private static final int HIGHEST_BIT_BITMASK = 0x80; @@ -30,6 +69,7 @@ class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReade private static final int LOWER_SEVEN_BITS_BITMASK = 0x7F; private static final int SINGLE_BYTE_MASK = 0xFF; + private static final int TWO_BYTE_MASK = 0xFFFF; // Isolates the lowest six bits in a byte. private static final int LOWER_SIX_BITS_BITMASK = 0x3F; @@ -61,7 +101,8 @@ class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReade // The second-most significant bit in the most significant byte of a VarInt is the sign. private static final int VAR_INT_SIGN_BITMASK = 0x40; - // 32-bit floats must declare length 4. + private static final int FLOAT_16_BYTE_LENGTH = 2; + private static final int FLOAT_32_BYTE_LENGTH = 4; // Initial capacity of the ArrayList used to hold the symbol IDs of the annotations on the current value. @@ -81,6 +122,43 @@ class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReade // The symbol IDs for the annotations on the current value. private final IntList annotationSids; + // The core MacroEvaluator that this core reader delegates to when evaluating a macro invocation. + private final MacroEvaluator macroEvaluator = new MacroEvaluator(); + + // The IonReader-like MacroEvaluator that this core reader delegates to when evaluating a macro invocation. + protected MacroEvaluatorAsIonReader macroEvaluatorIonReader = new MacroEvaluatorAsIonReader(macroEvaluator); + + // The encoding context (macro table) that is currently active. + private EncodingContext encodingContext = EncodingContext.getDefault(); + + // Adapts this reader for use in code that supports multiple reader types. + private final ReaderAdapter readerAdapter = new ReaderAdapterContinuable(this); + + // Adapts this reader for use in code that supports IonReader. + private final IonReader asIonReader = new IonReaderFromReaderAdapter(readerAdapter); + + // Reads encoding directives from the stream. + private final EncodingDirectiveReader encodingDirectiveReader = new EncodingDirectiveReader(); + + // Reads macro invocation arguments as expressions and feeds them to the MacroEvaluator. + private final EExpressionArgsReader expressionArgsReader = new BinaryEExpressionArgsReader(); + + // The text representations of the symbol table that is currently in scope, indexed by symbol ID. If the element at + // a particular index is null, that symbol has unknown text. + protected String[] symbols = new String[SYMBOLS_LIST_INITIAL_CAPACITY]; + + // The maximum offset into the 'symbols' array that points to a valid local symbol. + protected int localSymbolMaxOffset = -1; + + // The maximum offset into the macro table that points to a valid local macro. + private int localMacroMaxOffset = -1; + + // Indicates whether the reader is currently evaluating an e-expression. + protected boolean isEvaluatingEExpression = false; + + // The writer that will perform a macro-aware transcode, if requested. + private MacroAwareIonWriter macroAwareTranscoder = null; + /** * Constructs a new reader from the given byte array. * @param configuration the configuration to use. The buffer size and oversized value configuration are unused, as @@ -125,6 +203,22 @@ class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReade new byte[12], }; + /** + * Returns a new or reused array of the requested size. + * @param requestedSize the size of the scratch space to retrieve. + * @return a byte array. + */ + private byte[] getScratchForSize(int requestedSize) { + byte[] bytes = null; + if (requestedSize < scratchForSize.length) { + bytes = scratchForSize[requestedSize]; + } + if (bytes == null) { + bytes = new byte[requestedSize]; + } + return bytes; + } + /** * Copy the requested number of bytes from the buffer into a scratch buffer of exactly the requested length. * @param startIndex the start index from which to copy. @@ -134,13 +228,7 @@ class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReade private byte[] copyBytesToScratch(long startIndex, int length) { // Note: using reusable scratch buffers makes reading ints and decimals 1-5% faster and causes much less // GC churn. - byte[] bytes = null; - if (length < scratchForSize.length) { - bytes = scratchForSize[length]; - } - if (bytes == null) { - bytes = new byte[length]; - } + byte[] bytes = getScratchForSize(length); // The correct number of bytes will be requested from the buffer, so the limit is set at the capacity to // avoid having to calculate a limit. System.arraycopy(buffer, (int) startIndex, bytes, 0, bytes.length); @@ -165,6 +253,25 @@ int readVarUInt_1_0() { return result; } + /** + * Reads a 2+ byte VarUInt, given the first byte. When called, `peekIndex` must point at the second byte in the + * VarUInt. When this method returns, `peekIndex` will point at the first byte that follows the VarUInt. + * NOTE: the VarUInt must fit in an `int`. + * @param currentByte the first byte in the VarUInt. + * @return the value. + */ + int readVarUInt_1_0(byte currentByte) { + int result = currentByte & LOWER_SEVEN_BITS_BITMASK; + do { + if (peekIndex >= limit) { + throw new IonException("Malformed data: declared length exceeds the number of bytes remaining in the stream."); + } + currentByte = buffer[(int) (peekIndex++)]; + result = (result << VALUE_BITS_PER_VARUINT_BYTE) | (currentByte & LOWER_SEVEN_BITS_BITMASK); + } while (currentByte >= 0); + return result; + } + /** * Reads a 2+ byte VarInt, given the first byte. When called, `peekIndex` must point at the second byte in the * VarInt. at `peekIndex`.When this method returns, `peekIndex` will point at the first byte that follows the @@ -341,8 +448,8 @@ private Timestamp readTimestamp_1_0() { offset = readVarInt_1_0(firstByte); } int year = readVarUInt_1_0(); - int month = 0; - int day = 0; + int month = 1; + int day = 1; int hour = 0; int minute = 0; int second = 0; @@ -421,194 +528,1848 @@ private boolean classifyInteger_1_0() { return (buffer[(int) (valueMarker.startIndex)] & SINGLE_BYTE_MASK) <= MOST_SIGNIFICANT_BYTE_OF_MAX_INTEGER; } - int readVarUInt_1_1() { - throw new UnsupportedOperationException(); - } - - private int readVarSym_1_1(Marker marker) { - throw new UnsupportedOperationException(); + /** + * Reads a 3+ byte FlexUInt into a long. After this method returns, `peekIndex` points to the first byte after the + * end of the FlexUInt. + * @param firstByte the first byte of the FlexUInt. + * @return the value. + */ + private long readLargeFlexUInt_1_1(int firstByte) { + byte length = 0; + int bitShift = 0; + if (firstByte == 0) { + length = 7; // Don't include the skipped zero byte. + bitShift = -7; + firstByte = buffer[(int) peekIndex++] & SINGLE_BYTE_MASK; + if (firstByte == 0) { + throw new IonException("Flex subfield exceeds the length of a long."); + } + } + length += (byte) (Integer.numberOfTrailingZeros(firstByte) + 1); + bitShift += length; + long result = firstByte >>> bitShift; + for (byte i = 1; i < length; i++) { + result |= ((long) (buffer[(int) (peekIndex++)] & SINGLE_BYTE_MASK) << (8 * i - bitShift)); + } + return result; } - private BigDecimal readBigDecimal_1_1() { - throw new UnsupportedOperationException(); + /** + * Reads a FlexUInt into a long. After this method returns, `peekIndex` points to the first byte after the end of + * the FlexUInt. + * @return the value. + */ + long readFlexUInt_1_1() { + // Up-cast to int, ensuring the most significant bit in the byte is not treated as the sign. + int currentByte = buffer[(int)(peekIndex++)] & SINGLE_BYTE_MASK; + if ((currentByte & 1) == 1) { + // Single byte; shift out the continuation bit. + return currentByte >>> 1; + } + if ((currentByte & 2) != 0) { + // Two bytes; upcast the second byte to int, ensuring the most significant bit is not treated as the sign. + // Make room for the six value bits in the first byte. Or with those six value bits after shifting out the + // two continuation bits. + return ((buffer[(int) peekIndex++] & SINGLE_BYTE_MASK) << 6 ) | (currentByte >>> 2); + } + return readLargeFlexUInt_1_1(currentByte); } - private Decimal readDecimal_1_1() { - throw new UnsupportedOperationException(); + /** + * Reads a 3+ byte FlexInt into a long. After this method returns, `peekIndex` points to the first byte after the + * end of the FlexInt. + * @return the value. + */ + long readLargeFlexInt_1_1(int firstByte) { + firstByte &= SINGLE_BYTE_MASK; + // FlexInts are essentially just FlexUInts that interpret the most significant bit as a sign that needs to be + // extended. + long result = readLargeFlexUInt_1_1(firstByte); + if (buffer[(int) peekIndex - 1] < 0) { + // Sign extension. + result |= ~(-1L >>> Long.numberOfLeadingZeros(result)); + } + return result; } - private long readLong_1_1() { - throw new UnsupportedOperationException(); + /** + * Reads a FlexInt into a long. After this method returns, `peekIndex` points to the first byte after the end of + * the FlexInt. + * @return the value. + */ + long readFlexInt_1_1() { + // The following up-cast to int performs sign extension, if applicable. + int currentByte = buffer[(int)(peekIndex++)]; + if ((currentByte & 1) == 1) { + // Single byte; shift out the continuation bit while preserving the sign. + return currentByte >> 1; + } + if ((currentByte & 2) != 0) { + // Two bytes; up-cast the second byte to int, thereby performing sign extension. Make room for the six + // value bits in the first byte. Or with those six value bits after shifting out the two continuation bits. + return buffer[(int) peekIndex++] << 6 | ((currentByte & SINGLE_BYTE_MASK) >>> 2); + } + return readLargeFlexInt_1_1(currentByte); } - private BigInteger readBigInteger_1_1() { - throw new UnsupportedOperationException(); + /** + * Reads a FlexSym. After this method returns, `peekIndex` points to the first byte after the end of the FlexSym. + * When the FlexSym contains inline text, the given Marker's start and end indices are populated with the start and + * end of the UTF-8 byte sequence, and this method returns -1. When the FlexSym contains a symbol ID, the given + * Marker's endIndex is set to the symbol ID value and its startIndex is not set. When this FlexSym wraps a + * delimited end marker, neither the Marker's startIndex nor its endIndex is set. + * @param markerToSet the marker to populate. + * @return the symbol ID value if one was present, otherwise -1. + */ + private long readFlexSym_1_1(Marker markerToSet) { + // TODO find a factoring that reduces duplication with IonCursorBinary, taking into account performance. + long result = readFlexInt_1_1(); + if (result == 0) { + int nextByte = buffer[(int)(peekIndex++)]; + // We pretend $0 is a system symbol to reduce the number of branches here. + if (nextByte >= FLEX_SYM_SYSTEM_SYMBOL_OFFSET || nextByte <= (byte) (FLEX_SYM_SYSTEM_SYMBOL_OFFSET + Byte.MAX_VALUE)) { + markerToSet.typeId = SYSTEM_SYMBOL_VALUE; + markerToSet.startIndex = -1; + markerToSet.endIndex = (byte)(nextByte - FLEX_SYM_SYSTEM_SYMBOL_OFFSET); + } else if (nextByte != OpCodes.DELIMITED_END_MARKER) { + throw new IonException("FlexSym 0 may only precede symbol zero, system symbol, or delimited end."); + } + return -1; + } else if (result < 0) { + markerToSet.startIndex = peekIndex; + markerToSet.endIndex = peekIndex - result; + peekIndex = markerToSet.endIndex; + return -1; + } else { + markerToSet.endIndex = result; + } + return result; } - private Timestamp readTimestamp_1_1() { - throw new UnsupportedOperationException(); + /** + * Reads a FixedInt into a long. After this method returns, `peekIndex` points to the first byte after the end of + * the FixedInt. + * @return the value. + */ + private long readFixedInt_1_1() { + if (peekIndex >= valueMarker.endIndex) { + return 0; + } + long startIndex = peekIndex; + peekIndex = valueMarker.endIndex; + // Note: the following line performs sign extension via the cast to long without masking with 0xFF. + long value = buffer[(int) --peekIndex]; + while (peekIndex > startIndex) { + value = (value << 8) | (buffer[(int) --peekIndex] & SINGLE_BYTE_MASK); + } + peekIndex = valueMarker.endIndex; + return value; } - private boolean readBoolean_1_1() { - throw new UnsupportedOperationException(); + /** + * Reads a FixedInt or FixedUInt as a BigInteger, first copying the value into scratch space and converting it to + * its equivalent big-endian two's complement representation. If the provided length is longer than the actual + * length of the value, the most significant byte in the two's complement representation will be zero, maintaining + * a positive sign. + * @param length the number of bytes remaining in the FixedInt or FixedUInt representation. + * @return a new BigInteger that represents the value. + */ + private BigInteger readLargeFixedIntOrFixedUIntAsBigInteger(int length) { + // FixedInt is a little-endian two's complement representation. Simply reverse the bytes. + byte[] bytes = getScratchForSize(length); + // Clear the most significant byte in case the scratch space is padded to accommodate an unsigned value with + // its highest bit set. + bytes[0] = 0; + int copyIndex = bytes.length; + for (long i = peekIndex; i < valueMarker.endIndex; i++) { + bytes[--copyIndex] = buffer[(int) i]; + } + peekIndex = valueMarker.endIndex; + return new BigInteger(bytes); } - @Override - public Event nextValue() { - lobBytesRead = 0; - return super.nextValue(); + /** + * Reads a FixedUInt value into a BigInteger. + * @return the value. + */ + private BigInteger readFixedUIntAsBigInteger_1_1(int length) { + if (buffer[(int) valueMarker.endIndex - 1] < 0) { + // The most-significant bit is set; pad the length by one byte so that the value remains unsigned. + length += 1; + } + return readLargeFixedIntOrFixedUIntAsBigInteger(length); } /** - * Prepares the ByteBuffer to wrap a slice of the underlying buffer. - * @param startIndex the start of the slice. - * @param endIndex the end of the slice. - * @return the ByteBuffer. + * Reads a FlexUInt or FlexInt value into a BigInteger. + * @param length the byte length of the encoded FlexUInt or FlexInt to read. + * @return the value. */ - ByteBuffer prepareByteBuffer(long startIndex, long endIndex) { - // Setting the limit to the capacity first is required because setting the position will fail if the new - // position is outside the limit. - byteBuffer.limit(buffer.length); - byteBuffer.position((int) startIndex); - byteBuffer.limit((int) endIndex); - return byteBuffer; + private BigInteger readLargeFlexIntOrFlexUIntAsBigInteger(int length) { + int bitShift = length; + int maskForLength = (SINGLE_BYTE_MASK >>> (8 - bitShift)); + int numberOfLeadingZeroBytes = 0; + // First count the leading zeroes and calculate the number of bits that need to be shifted out of each + // encoded byte. + for (long i = peekIndex; i < valueMarker.endIndex; i++) { + int b = buffer[(int) i] & SINGLE_BYTE_MASK; + if (b == 0) { + bitShift -= 8; + numberOfLeadingZeroBytes++; + maskForLength = (SINGLE_BYTE_MASK >>> (8 - bitShift)); + continue; // Skip over any bytes that contain only continuation bits. + } + break; + } + // FlexInt and FlexUInt are little-endian. Reverse the bytes and shift out the continuation bits. + byte[] bytes = getScratchForSize(length - numberOfLeadingZeroBytes); + int copyIndex = bytes.length; + for (long i = peekIndex + numberOfLeadingZeroBytes; i < valueMarker.endIndex; i++) { + int b = buffer[(int) i] & SINGLE_BYTE_MASK; + if (copyIndex < bytes.length) { + bytes[copyIndex] |= (byte) ((b & maskForLength) << (8 - bitShift)); + } + if (--copyIndex == 0 && !taglessType.isUnsigned) { + bytes[copyIndex] = (byte) ((byte) b >> bitShift); // Sign extend most significant byte. + } else { + bytes[copyIndex] = (byte) (b >>> bitShift); + } + } + peekIndex = valueMarker.endIndex; + return new BigInteger(bytes); } - /** - * Reads a UInt. - * @param startIndex the index of the first byte in the UInt value. - * @param endIndex the index of the first byte after the end of the UInt value. + * Reads a tagless int value into a BigInteger. * @return the value. */ - private long readUInt(long startIndex, long endIndex) { - long result = 0; - for (long i = startIndex; i < endIndex; i++) { - result = (result << VALUE_BITS_PER_UINT_BYTE) | buffer[(int) i] & SINGLE_BYTE_MASK; + private BigInteger readTaglessIntAsBigInteger_1_1() { + BigInteger value; + int length = (int) (valueMarker.endIndex - peekIndex); + if (valueTid.variableLength) { + value = readLargeFlexIntOrFlexUIntAsBigInteger(length); + } else if (length < LONG_SIZE_IN_BYTES || !taglessType.isUnsigned) { + // Note: all fixed-width tagless signed ints fit in a Java long. + value = BigInteger.valueOf(readTaglessInt_1_1()); + } else { + value = readFixedUIntAsBigInteger_1_1(length); } - return result; + return value; } - @Override - public boolean isNullValue() { - return valueTid != null && valueTid.isNull; - } + /** + * Reads a FixedInt value into a BigInteger. + * @return the value. + */ + private BigInteger readFixedIntAsBigInteger_1_1() { + BigInteger value; + int length = (int) (valueMarker.endIndex - peekIndex); + if (length <= LONG_SIZE_IN_BYTES) { + value = BigInteger.valueOf(readFixedInt_1_1()); + } else { + value = readLargeFixedIntOrFixedUIntAsBigInteger(length); + } + return value; + } /** - * Performs any logic necessary to prepare a scalar value for parsing. Subclasses may wish to provide additional - * logic, such as ensuring that the value is present in the buffer. + * Reads into a BigDecimal the decimal value that begins at `peekIndex` and ends at `valueMarker.endIndex`. + * @return the value. */ - void prepareScalar() { - if (valueMarker.endIndex > limit) { - throw new IonException("Malformed data: declared length exceeds the number of bytes remaining in the stream."); + private BigDecimal readBigDecimal_1_1() { + int scale = (int) -readFlexInt_1_1(); + BigDecimal value; + int length = (int) (valueMarker.endIndex - peekIndex); + if (length <= LONG_SIZE_IN_BYTES) { + // No need to allocate a BigInteger to hold the coefficient. + value = BigDecimal.valueOf(readFixedInt_1_1(), scale); + } else { + // The coefficient may overflow a long, so a BigInteger is required. + value = new BigDecimal(readLargeFixedIntOrFixedUIntAsBigInteger(length), scale); } + return value; } - @Override - public IntegerSize getIntegerSize() { - if (valueTid == null || valueTid.type != IonType.INT || valueTid.isNull) { - return null; + /** + * Reads into a Decimal the decimal value that begins at `peekIndex` and ends at `valueMarker.endIndex`. + * @return the value. + */ + private Decimal readDecimal_1_1() { + int scale = (int) -readFlexInt_1_1(); + BigInteger coefficient; + if (valueMarker.endIndex > peekIndex) { + // NOTE: there is a BigDecimal.valueOf(long unscaledValue, int scale) factory method that avoids allocating + // a BigInteger for coefficients that fit in a long. See its use in readBigDecimal() above. Unfortunately, + // it is not possible to use this for Decimal because the necessary BigDecimal constructor is + // package-private. If a compatible BigDecimal constructor is added in a future JDK revision, a + // corresponding factory method should be added to Decimal to enable this optimization. + coefficient = readFixedIntAsBigInteger_1_1(); + if (coefficient.signum() == 0) { + return Decimal.negativeZero(scale); + } } - prepareScalar(); - if (valueTid.length < 0) { - return IntegerSize.BIG_INTEGER; - } else if (valueTid.length < INT_SIZE_IN_BYTES) { - return IntegerSize.INT; - } else if (valueTid.length == INT_SIZE_IN_BYTES) { - return (minorVersion != 0 || classifyInteger_1_0()) ? IntegerSize.INT : IntegerSize.LONG; - } else if (valueTid.length < LONG_SIZE_IN_BYTES) { - return IntegerSize.LONG; - } else if (valueTid.length == LONG_SIZE_IN_BYTES) { - return (minorVersion != 0 || classifyInteger_1_0()) ? IntegerSize.LONG : IntegerSize.BIG_INTEGER; + else { + coefficient = BigInteger.ZERO; } - return IntegerSize.BIG_INTEGER; - } - - private void throwDueToInvalidType(IonType type) { - throw new IllegalStateException( - String.format("Invalid type. Required %s but found %s.", type, valueTid == null ? null : valueTid.type) - ); + return Decimal.valueOf(coefficient, scale); } - @Override - public int byteSize() { - if (valueTid == null || !IonType.isLob(valueTid.type) || valueTid.isNull) { - throw new IonException("Reader must be positioned on a blob or clob."); + /** + * Reads the tagless int bounded by 'valueMarker` into a long. + * @return the value. + */ + private long readTaglessInt_1_1() { + // TODO performance: the fixed width types all correspond to Java primitives and could therefore be read + // using ByteBuffer, possibly more quickly than using the following methods, especially if several in a row + // can be read without requiring the cursor's state to be modified before each one. + if (taglessType.isUnsigned) { + if (taglessType == TaglessEncoding.FLEX_UINT) { + return readFlexUInt_1_1(); + } + return readFixedUInt_1_1(valueMarker.startIndex, valueMarker.endIndex); } - prepareScalar(); - return (int) (valueMarker.endIndex - valueMarker.startIndex); - } - - @Override - public byte[] newBytes() { - byte[] bytes = new byte[byteSize()]; - // The correct number of bytes will be requested from the buffer, so the limit is set at the capacity to - // avoid having to calculate a limit. - System.arraycopy(buffer, (int) valueMarker.startIndex, bytes, 0, bytes.length); - return bytes; + if (taglessType == TaglessEncoding.FLEX_INT) { + return readFlexInt_1_1(); + } + return readFixedInt_1_1(); } - @Override - public int getBytes(byte[] bytes, int offset, int len) { - int length = Math.min(len, byteSize() - lobBytesRead); - // The correct number of bytes will be requested from the buffer, so the limit is set at the capacity to - // avoid having to calculate a limit. - System.arraycopy(buffer, (int) (valueMarker.startIndex + lobBytesRead), bytes, offset, length); - lobBytesRead += length; - return length; + /** + * Reads the FixedInt bounded by `valueMarker` into a `long`. + * @return the value. + */ + private long readLong_1_1() { + peekIndex = valueMarker.startIndex; + if (taglessType != null) { + return readTaglessInt_1_1(); + } + return readFixedInt_1_1(); } /** - * Loads the scalar converter with an integer value that fits the Ion int on which the reader is positioned. + * Reads the FixedInt bounded by `valueMarker` into a BigInteger. + * @return the value. */ - private void prepareToConvertIntValue() { - if (getIntegerSize() == IntegerSize.BIG_INTEGER) { - scalarConverter.addValue(bigIntegerValue()); - scalarConverter.setAuthoritativeType(_Private_ScalarConversions.AS_TYPE.bigInteger_value); - } else { - scalarConverter.addValue(longValue()); - scalarConverter.setAuthoritativeType(_Private_ScalarConversions.AS_TYPE.long_value); + private BigInteger readBigInteger_1_1() { + peekIndex = valueMarker.startIndex; + if (taglessType != null) { + return readTaglessIntAsBigInteger_1_1(); } + return readFixedIntAsBigInteger_1_1(); } - @Override - public BigDecimal bigDecimalValue() { - BigDecimal value = null; - if (valueTid.type == IonType.DECIMAL) { - if (valueTid.isNull) { - return null; - } - prepareScalar(); - peekIndex = valueMarker.startIndex; - if (peekIndex >= valueMarker.endIndex) { - value = BigDecimal.ZERO; - } else { - value = minorVersion == 0 ? readBigDecimal_1_0() : readBigDecimal_1_1(); - } - } else if (valueTid.type == IonType.INT) { - if (valueTid.isNull) { - return null; - } - prepareToConvertIntValue(); - scalarConverter.cast(scalarConverter.get_conversion_fnid(_Private_ScalarConversions.AS_TYPE.decimal_value)); - value = scalarConverter.getBigDecimal(); - scalarConverter.clear(); - } else if (valueTid.type == IonType.FLOAT) { - scalarConverter.addValue(doubleValue()); - scalarConverter.setAuthoritativeType(_Private_ScalarConversions.AS_TYPE.double_value); - scalarConverter.cast(scalarConverter.get_conversion_fnid(_Private_ScalarConversions.AS_TYPE.decimal_value)); - value = scalarConverter.getDecimal(); - scalarConverter.clear(); + /** + * Reads the fraction component of an Ion 1.1 long form timestamp. + * @return the value as a BigDecimal. + */ + private BigDecimal readTimestampFraction_1_1() { + // The fractional seconds are encoded as a (scale, coefficient) pair, + // which is similar to a decimal. The primary difference is that the scale represents a negative + // exponent because it is illegal for the fractional seconds value to be greater than or equal to 1.0 + // or less than 0.0. The coefficient is encoded as a FixedUInt (instead of FixedInt) to prevent the + // encoding of fractional seconds less than 0.0. The scale is encoded as a FlexUInt (instead of FlexInt) + // to discourage the encoding of decimal numbers greater than 1.0. + BigDecimal value; + peekIndex = valueMarker.startIndex + L_TIMESTAMP_SECOND_BYTE_LENGTH; + int scale = (int) readFlexUInt_1_1(); + int length = (int) (valueMarker.endIndex - peekIndex); + if (length >= LONG_SIZE_IN_BYTES) { + value = new BigDecimal(readFixedUIntAsBigInteger_1_1(length), scale); + } else if (length > 0) { + value = BigDecimal.valueOf(readFixedUInt_1_1(peekIndex, valueMarker.endIndex), scale); } else { - throwDueToInvalidType(IonType.DECIMAL); + value = BigDecimal.valueOf(0, scale); + } + if (BigDecimal.ONE.compareTo(value) < 1) { + throw new IllegalArgumentException(String.format("Fractional seconds %s must be greater than or equal to 0 and less than 1", value)); } return value; } - @Override - public Decimal decimalValue() { - Decimal value = null; - if (valueTid.type == IonType.DECIMAL) { + /** + * Reads an Ion 1.1 long form timestamp. + * @return the value. + */ + @SuppressFBWarnings("SF_SWITCH_FALLTHROUGH") + private Timestamp readTimestampLongForm_1_1() { + int year; + int month = 1; + int day = 1; + int hour = 0; + int minute = 0; + int second = 0; + BigDecimal fractionalSecond = null; + boolean isOffsetUnknown = true; + int offset = 0; + int length = (int) (valueMarker.endIndex - valueMarker.startIndex); + if (length > L_TIMESTAMP_SECOND_BYTE_LENGTH) { + // Fractional component. + fractionalSecond = readTimestampFraction_1_1(); + length = L_TIMESTAMP_SECOND_BYTE_LENGTH; + } + Timestamp.Precision precision = L_TIMESTAMP_PRECISION_FOR_LENGTH[length]; + long bits = 0; + for (int i = length - 1; i >= 0 ; i--) { + bits = (bits << 8) | (buffer[i + (int) valueMarker.startIndex] & SINGLE_BYTE_MASK); + } + switch (length) { + case L_TIMESTAMP_SECOND_BYTE_LENGTH: + second = (int) ((bits & L_TIMESTAMP_SECOND_MASK) >>> L_TIMESTAMP_SECOND_BIT_OFFSET); + case L_TIMESTAMP_MINUTE_BYTE_LENGTH: + offset = (int) ((bits & L_TIMESTAMP_OFFSET_MASK) >>> L_TIMESTAMP_OFFSET_BIT_OFFSET); + if ((offset ^ TWELVE_BIT_MASK) != 0) { + isOffsetUnknown = false; + offset -= L_TIMESTAMP_OFFSET_BIAS; + } + minute = (int) ((bits & L_TIMESTAMP_MINUTE_MASK) >>> L_TIMESTAMP_MINUTE_BIT_OFFSET); + hour = (int) (bits & L_TIMESTAMP_HOUR_MASK) >>> L_TIMESTAMP_HOUR_BIT_OFFSET; + case L_TIMESTAMP_DAY_OR_MONTH_BYTE_LENGTH: + day = (int) (bits & L_TIMESTAMP_DAY_MASK) >>> L_TIMESTAMP_DAY_BIT_OFFSET; + if (length == L_TIMESTAMP_DAY_OR_MONTH_BYTE_LENGTH) { + // Month and Day precision share the same length. If the day subfield is 0, the timestamp has + // month precision. Otherwise, it has day precision. + precision = day == 0 ? Timestamp.Precision.MONTH : Timestamp.Precision.DAY; + } + month = (int) (bits & L_TIMESTAMP_MONTH_MASK) >>> L_TIMESTAMP_MONTH_BIT_OFFSET; + case L_TIMESTAMP_YEAR_BYTE_LENGTH: + year = (int) (bits & L_TIMESTAMP_YEAR_MASK); + break; + default: + throw new IonException("Illegal timestamp encoding."); + } + try { + return Timestamp._private_createFromLocalTimeFieldsUnchecked( + precision, + year, + month, + day, + hour, + minute, + second, + fractionalSecond, + isOffsetUnknown ? null : offset + ); + } catch (IllegalArgumentException e) { + throw new IonException("Illegal timestamp encoding. ", e); + } + } + + /** + * Reads an Ion 1.1 timestamp in either the long or short form. + * @return the value. + */ + @SuppressFBWarnings("SF_SWITCH_FALLTHROUGH") + private Timestamp readTimestamp_1_1() { + if (valueTid.variableLength) { + return readTimestampLongForm_1_1(); + } + Timestamp.Precision precision = S_TIMESTAMP_PRECISION_FOR_TYPE_ID_OFFSET[valueTid.lowerNibble]; + int year = 0; + int month = 1; + int day = 1; + int hour = 0; + int minute = 0; + int second = 0; + BigDecimal fractionalSecond = null; + Integer offset = null; + long bits = 0; + for (int i = (int) Math.min(valueMarker.endIndex, valueMarker.startIndex + 8) - 1; i >= valueMarker.startIndex ; i--) { + bits = (bits << 8) | (buffer[i] & SINGLE_BYTE_MASK); + } + switch (precision) { + case FRACTION: + case SECOND: + int unscaledValue = -1; + int scale = -1; + int bound = -1; + switch (valueTid.lowerNibble) { + case S_O_TIMESTAMP_NANOSECOND_LOWER_NIBBLE: + // The least-significant 24 bits of the nanoseconds field are contained in the long. + unscaledValue = (int) ((bits & S_O_TIMESTAMP_NANOSECOND_EIGHTH_BYTE_MASK) >>> S_O_TIMESTAMP_FRACTION_BIT_OFFSET); + // The most-significant 6 bits of the nanoseconds field are contained in the ninth byte. + unscaledValue |= (int) ((buffer[(int) valueMarker.endIndex - 1] & S_O_TIMESTAMP_NANOSECOND_NINTH_BYTE_MASK)) << S_O_TIMESTAMP_NANOSECOND_BITS_IN_EIGHTH_BYTE; + bound = MAX_NANOSECONDS; + scale = NANOSECOND_SCALE; + break; + case S_U_TIMESTAMP_NANOSECOND_LOWER_NIBBLE: + unscaledValue = (int) ((bits & S_U_TIMESTAMP_NANOSECOND_MASK) >>> S_U_TIMESTAMP_FRACTION_BIT_OFFSET); + bound = MAX_NANOSECONDS; + scale = NANOSECOND_SCALE; + break; + case S_O_TIMESTAMP_MICROSECOND_LOWER_NIBBLE: + unscaledValue = (int) ((bits & S_O_TIMESTAMP_MICROSECOND_MASK) >>> S_O_TIMESTAMP_FRACTION_BIT_OFFSET); + bound = MAX_MICROSECONDS; + scale = MICROSECOND_SCALE; + break; + case S_U_TIMESTAMP_MICROSECOND_LOWER_NIBBLE: + unscaledValue = (int) ((bits & S_U_TIMESTAMP_MICROSECOND_MASK) >>> S_U_TIMESTAMP_FRACTION_BIT_OFFSET); + bound = MAX_MICROSECONDS; + scale = MICROSECOND_SCALE; + break; + case S_O_TIMESTAMP_MILLISECOND_LOWER_NIBBLE: + unscaledValue = (int) ((bits & S_O_TIMESTAMP_MILLISECOND_MASK) >>> S_O_TIMESTAMP_FRACTION_BIT_OFFSET); + bound = MAX_MILLISECONDS; + scale = MILLISECOND_SCALE; + break; + case S_U_TIMESTAMP_MILLISECOND_LOWER_NIBBLE: + unscaledValue = (int) ((bits & S_U_TIMESTAMP_MILLISECOND_MASK) >>> S_U_TIMESTAMP_FRACTION_BIT_OFFSET); + bound = MAX_MILLISECONDS; + scale = MILLISECOND_SCALE; + break; + default: + // Second. + break; + } + if (unscaledValue >= 0) { + if (unscaledValue > bound) { + throw new IonException("Timestamp fraction must be between 0 and 1."); + } + fractionalSecond = BigDecimal.valueOf(unscaledValue, scale); + } + if (valueTid.lowerNibble >= S_O_TIMESTAMP_MINUTE_LOWER_NIBBLE) { + second = (int) ((bits & S_O_TIMESTAMP_SECOND_MASK) >>> S_O_TIMESTAMP_SECOND_BIT_OFFSET); + } else { + second = (int) ((bits & S_U_TIMESTAMP_SECOND_MASK) >>> S_U_TIMESTAMP_SECOND_BIT_OFFSET); + } + case MINUTE: + if (valueTid.lowerNibble >= S_O_TIMESTAMP_MINUTE_LOWER_NIBBLE) { + offset = (int) (((bits & S_O_TIMESTAMP_OFFSET_MASK) >>> S_O_TIMESTAMP_OFFSET_BIT_OFFSET) - S_O_TIMESTAMP_OFFSET_BIAS) * S_O_TIMESTAMP_OFFSET_INCREMENT; + } else { + offset = (bits & S_U_TIMESTAMP_UTC_FLAG) == 0 ? null : 0; + } + minute = (int) (bits & S_TIMESTAMP_MINUTE_MASK) >>> S_TIMESTAMP_MINUTE_BIT_OFFSET; + hour = (int) (bits & S_TIMESTAMP_HOUR_MASK) >>> S_TIMESTAMP_HOUR_BIT_OFFSET; + case DAY: + day = (int) (bits & S_TIMESTAMP_DAY_MASK) >>> S_TIMESTAMP_DAY_BIT_OFFSET; + case MONTH: + month = (int) (bits & S_TIMESTAMP_MONTH_MASK) >>> S_TIMESTAMP_MONTH_BIT_OFFSET; + case YEAR: + // Year is encoded as the number of years since 1970. + year = S_TIMESTAMP_YEAR_BIAS + (int) (bits & S_TIMESTAMP_YEAR_MASK); + } + try { + return Timestamp._private_createFromLocalTimeFieldsUnchecked( + precision, + year, + month, + day, + hour, + minute, + second, + fractionalSecond, + offset + ); + } catch (IllegalArgumentException e) { + throw new IonException("Illegal timestamp encoding. ", e); + } + } + + /** + * Reads the boolean value using the type ID of the current value. + * @return the value. + */ + private boolean readBoolean_1_1() { + // Boolean 'true' is 0x6E; 'false' is 0x6F. + return valueTid.lowerNibble == 0xE; + } + + /** + * Determines whether the bytes between [start, end) in 'buffer' match the target bytes. + * @param target the target bytes. + * @param buffer the bytes to match. + * @param start index of the first byte to match. + * @param end index of the first byte after the last byte to match. + * @return true if the bytes match; otherwise, false. + */ + static boolean bytesMatch(byte[] target, byte[] buffer, int start, int end) { + // TODO if this ends up on a critical performance path, see if it's faster to copy the bytes into a + // pre-allocated buffer and then perform a comparison. It's possible that a combination of System.arraycopy() + // and Arrays.equals(byte[], byte[]) is faster because it can be more easily optimized with native code by the + // JVM—both are annotated with @HotSpotIntrinsicCandidate. + int length = end - start; + if (length != target.length) { + return false; + } + for (int i = 0; i < target.length; i++) { + if (target[i] != buffer[start + i]) { + return false; + } + } + return true; + } + + /** + * @return true if current value has a sequence of annotations that begins with `$ion`; otherwise, false. + */ + boolean startsWithIonAnnotation() { + if (minorVersion > 0) { + Marker marker = annotationTokenMarkers.get(0); + return matchesSystemSymbol_1_1(marker, SystemSymbols_1_1.ION); + } + return false; + } + + @Override + public String getSymbol(int sid) { + // Only symbol IDs declared in Ion 1.1 encoding directives (not Ion 1.0 symbol tables) are resolved by the + // core reader. In Ion 1.0, 'symbols' is never populated by the core reader. + if (sid > 0 && sid - 1 <= localSymbolMaxOffset) { + return symbols[sid - 1]; + } + return null; + } + + /** + * Returns true if the symbol at `marker`... + *

* is a system symbol with the same ID as the expected System Symbol + *

* is an inline symbol with the same utf8 bytes as the expected System Symbol + *

* is a user symbol that maps to the same text as the expected System Symbol + *

+ */ + boolean matchesSystemSymbol_1_1(Marker marker, SystemSymbols_1_1 systemSymbol) { + if (marker.typeId == IonTypeID.SYSTEM_SYMBOL_VALUE) { + return systemSymbol.getText().equals(getSystemSymbolToken(marker).getText()); + } else if (marker.startIndex < 0) { + // This is a local symbol whose ID is stored in marker.endIndex. + return systemSymbol.getText().equals(getSymbol((int) marker.endIndex)); + } else { + // This is an inline symbol with UTF-8 bytes bounded by the marker. + return bytesMatch(systemSymbol.getUtf8Bytes(), buffer, (int) marker.startIndex, (int) marker.endIndex); + } + } + + /** + * @return true if the reader is positioned on an encoding directive; otherwise, false. + */ + private boolean isPositionedOnEncodingDirective() { + return event == Event.START_CONTAINER + && hasAnnotations + && valueTid.type == IonType.SEXP + && parent == null + && startsWithIonAnnotation(); + } + + /** + * @return true if the macro evaluator is positioned on an encoding directive; otherwise, false. + */ + private boolean isPositionedOnEvaluatedEncodingDirective() { + if (macroEvaluatorIonReader.getType() != IonType.SEXP) { + return false; + } + Iterator annotations = macroEvaluatorIonReader.iterateTypeAnnotations(); + return annotations.hasNext() + && annotations.next().equals(SystemSymbols_1_1.ION.getText()); + } + + /** + * Grows the `symbols` array to the next power of 2 that will fit the current need. + */ + protected void growSymbolsArray(int shortfall) { + int newSize = nextPowerOfTwo(symbols.length + shortfall); + String[] resized = new String[newSize]; + System.arraycopy(symbols, 0, resized, 0, localSymbolMaxOffset + 1); + symbols = resized; + } + + /** + * Reset the local symbol table to the system symbol table. + */ + protected void resetSymbolTable() { + // The following line is not required for correctness, but it frees the references to the old symbols, + // potentially allowing them to be garbage collected. + Arrays.fill(symbols, 0, localSymbolMaxOffset + 1, null); + localSymbolMaxOffset = -1; + } + + /** + * Reset the list of imported shared symbol tables. + */ + protected void resetImports(int major, int minor) { + // The core reader does not currently handle imports, though we may find this necessary as we add + // support for shared modules. + } + + /** + * Installs the given symbols at the end of the `symbols` array. + * @param newSymbols the symbols to install. + */ + protected void installSymbols(List newSymbols) { + if (newSymbols != null && !newSymbols.isEmpty()) { + int numberOfNewSymbols = newSymbols.size(); + int numberOfAvailableSlots = symbols.length - (localSymbolMaxOffset + 1); + int shortfall = numberOfNewSymbols - numberOfAvailableSlots; + if (shortfall > 0) { + growSymbolsArray(shortfall); + } + int i = localSymbolMaxOffset; + for (String newSymbol : newSymbols) { + symbols[++i] = newSymbol; + } + localSymbolMaxOffset += newSymbols.size(); + } + } + + /** + * @return the {@link EncodingContext} currently active, or {@code null}. + */ + EncodingContext getEncodingContext() { + return encodingContext; + } + + /** + * Reads encoding directives from the stream. Capable of resuming if not enough data is currently available to + * complete the encoding directive. + */ + private class EncodingDirectiveReader { + + boolean isSymbolTableAppend = false; + boolean isMacroTableAppend = false; + List newSymbols = new ArrayList<>(8); + Map newMacros = new LinkedHashMap<>(); + MacroCompiler macroCompiler = new MacroCompiler(this::resolveMacro, readerAdapter); + + boolean isSymbolTableAlreadyClassified = false; + boolean isMacroTableAlreadyClassified = false; + + private Macro resolveMacro(MacroRef macroRef) { + Macro newMacro = newMacros.get(macroRef); + if (newMacro == null) { + newMacro = encodingContext.getMacroTable().get(macroRef); + } + return newMacro; + } + + private boolean valueUnavailable() { + if (isEvaluatingEExpression) { + return false; + } + Event event = fillValue(); + return event == Event.NEEDS_DATA || event == Event.NEEDS_INSTRUCTION; + } + + private void classifyDirective() { + errorIf(getEncodingType() != IonType.SYMBOL, "Ion encoding directives must start with a directive keyword."); + String name = getSymbolText(); + // TODO: Add support for `import` and `encoding` directives + if (SystemSymbols_1_1.MODULE.getText().equals(name)) { + state = State.IN_MODULE_DIRECTIVE_SEXP_AWAITING_MODULE_NAME; + } else if (SystemSymbols_1_1.IMPORT.getText().equals(name)) { + throw new IonException("'import' directive not yet supported"); + } else if (SystemSymbols_1_1.ENCODING.getText().equals(name)) { + throw new IonException("'encoding' directive not yet supported"); + } else { + throw new IonException(String.format("'%s' is not a valid directive keyword", name)); + } + } + + private void classifySexpWithinModuleDirective() { + String name = getSymbolText(); + if (SystemSymbols_1_1.SYMBOL_TABLE.getText().equals(name)) { + state = State.IN_SYMBOL_TABLE_SEXP; + } else if (SystemSymbols_1_1.MACRO_TABLE.getText().equals(name)) { + state = State.IN_MACRO_TABLE_SEXP; + } else { + // TODO: add support for 'module' and 'import' clauses + throw new IonException(String.format("'%s' clause not supported in module definition", name)); + } + } + + /** + * Classifies a symbol table as either 'set' or 'append'. The caller must ensure the reader is positioned within + * a symbol table (after the symbol 'symbol_table') before calling. Upon return, the reader will be positioned + * on a list in the symbol table. + */ + private void classifySymbolTable() { + IonType type = getEncodingType(); + if (isSymbolTableAlreadyClassified) { + if (type != IonType.LIST) { // TODO support module name imports + throw new IonException("symbol_table s-expression must contain list(s) of symbols."); + } + state = State.ON_SYMBOL_TABLE_LIST; + return; + } + isSymbolTableAlreadyClassified = true; + if (IonType.isText(type)) { + if (DEFAULT_MODULE.equals(stringValue()) && !isSymbolTableAppend) { + state = State.IN_APPENDED_SYMBOL_TABLE; + } else { + throw new IonException("symbol_table s-expression must begin with either '_' or a list."); + } + } else if (type == IonType.LIST) { + state = State.ON_SYMBOL_TABLE_LIST; + } else { + throw new IonException("symbol_table s-expression must begin with either '_' or a list."); + } + } + + /** + * Classifies a macro table as either 'set' or 'append'. The caller must ensure the reader is positioned within + * a macro table (after the symbol 'macro_table') before calling. Upon return, the reader will be positioned + * on an s-expression in the macro table. + */ + private void classifyMacroTable() { + IonType type = getEncodingType(); + if (isMacroTableAlreadyClassified) { + if (type != IonType.SEXP) { + throw new IonException("macro_table s-expression must contain s-expression(s)."); + } + state = State.ON_MACRO_SEXP; + return; + } + isMacroTableAlreadyClassified = true; + if (IonType.isText(type)) { + if (DEFAULT_MODULE.equals(stringValue()) && !isMacroTableAppend) { + state = State.IN_APPENDED_MACRO_TABLE; + } else { + throw new IonException("macro_table s-expression must begin with either '_' or s-expression(s)."); + } + } else if (type == IonType.SEXP) { + localMacroMaxOffset = -1; + state = State.ON_MACRO_SEXP; + } else { + throw new IonException("macro_table s-expression must contain s-expression(s)."); + } + } + + private void stepOutOfSexpWithinEncodingDirective() { + stepOutOfContainer(); + state = State.IN_MODULE_DIRECTIVE_SEXP_BODY; + } + + /** + * Install `newMacros`, initializing a macro evaluator capable of evaluating them. + */ + private void installMacros() { + if (!isMacroTableAppend) { + encodingContext = new EncodingContext(new MutableMacroTable(MacroTable.empty())); + } else if (!encodingContext.isMutable()) { // we need to append, but can't + encodingContext = new EncodingContext(new MutableMacroTable(encodingContext.getMacroTable())); + } + + if (newMacros.isEmpty()) return; // our work is done + + encodingContext.getMacroTable().putAll(newMacros); + } + + /** + * Install any new symbols and macros, step out of the encoding directive, and resume reading raw values. + */ + private void finishEncodingDirective() { + if (!isSymbolTableAppend) { + resetSymbolTable(); + } + installSymbols(newSymbols); + installMacros(); + stepOutOfContainer(); + state = State.READING_VALUE; + } + + /** + * Navigate to the next value at the core level (without interpretation by subclasses). + * @return the event that conveys the result of the operation. + */ + private Event coreNextValue() { + if (isEvaluatingEExpression) { + evaluateNext(); + return event; + } else { + return IonReaderContinuableCoreBinary.super.nextValue(); + } + } + + /** + * Utility function to make error cases more concise. + * @param condition the condition under which an IonException should be thrown + * @param errorMessage the message to use in the exception + */ + private void errorIf(boolean condition, String errorMessage) { + if (condition) { + throw new IonException(errorMessage); + } + } + + /** + * Read an encoding directive. If the stream ends before the encoding directive finishes, `event` will be + * `NEEDS_DATA` and this method can be called again when more data is available. + */ + void readEncodingDirective() { + Event event; + while (true) { + switch (state) { + case ON_DIRECTIVE_SEXP: + if (Event.NEEDS_DATA == stepIntoContainer()) { + return; + } + state = State.IN_DIRECTIVE_SEXP; + break; + case IN_DIRECTIVE_SEXP: + event = coreNextValue(); + if (event == Event.NEEDS_DATA) { + return; + } + errorIf(event == Event.END_CONTAINER, "invalid Ion directive; missing directive keyword"); + classifyDirective(); + break; + case IN_MODULE_DIRECTIVE_SEXP_AWAITING_MODULE_NAME: + event = coreNextValue(); + if (event == Event.NEEDS_DATA) { + return; + } + errorIf(event == Event.END_CONTAINER, "invalid module definition; missing module name"); + errorIf(getEncodingType() != IonType.SYMBOL, "invalid module definition; module name must be a symbol"); + // TODO: Support other module names + errorIf(!DEFAULT_MODULE.equals(getSymbolText()), "IonJava currently supports only the default module"); + state = State.IN_MODULE_DIRECTIVE_SEXP_BODY; + break; + case IN_MODULE_DIRECTIVE_SEXP_BODY: + event = coreNextValue(); + if (event == Event.NEEDS_DATA) { + return; + } + if (event == Event.END_CONTAINER) { + finishEncodingDirective(); + return; + } + if (getEncodingType() != IonType.SEXP) { + throw new IonException("module definitions must contain only s-expressions."); + } + state = State.ON_SEXP_IN_MODULE_DIRECTIVE; + break; + case ON_SEXP_IN_MODULE_DIRECTIVE: + if (Event.NEEDS_DATA == stepIntoContainer()) { + return; + } + state = State.IN_SEXP_IN_MODULE_DIRECTIVE; + break; + case IN_SEXP_IN_MODULE_DIRECTIVE: + if (Event.NEEDS_DATA == coreNextValue()) { + return; + } + if (!IonType.isText(getEncodingType())) { + throw new IonException("S-expressions within module definitions must begin with a text token."); + } + state = State.CLASSIFYING_SEXP_IN_MODULE_DIRECTIVE; + break; + case CLASSIFYING_SEXP_IN_MODULE_DIRECTIVE: + if (valueUnavailable()) { + return; + } + classifySexpWithinModuleDirective(); + break; + case IN_SYMBOL_TABLE_SEXP: + event = coreNextValue(); + if (event == Event.NEEDS_DATA) { + return; + } + if (event == Event.END_CONTAINER) { + stepOutOfSexpWithinEncodingDirective(); + break; + } + classifySymbolTable(); + break; + case IN_APPENDED_SYMBOL_TABLE: + event = coreNextValue(); + if (Event.NEEDS_DATA == event) { + return; + } + isSymbolTableAppend = true; + if (Event.END_CONTAINER == event) { + // Nothing to append. + stepOutOfSexpWithinEncodingDirective(); + break; + } + if (getEncodingType() != IonType.LIST) { + throw new IonException("symbol_table s-expression must begin with a list."); + } + state = State.ON_SYMBOL_TABLE_LIST; + break; + case ON_SYMBOL_TABLE_LIST: + if (Event.NEEDS_DATA == stepIntoContainer()) { + return; + } + state = State.IN_SYMBOL_TABLE_LIST; + break; + case IN_SYMBOL_TABLE_LIST: + event = coreNextValue(); + if (event == Event.NEEDS_DATA) { + return; + } + if (event == Event.END_CONTAINER) { + stepOutOfContainer(); + state = State.IN_SYMBOL_TABLE_SEXP; + break; + } + if (!IonType.isText(getEncodingType())) { + throw new IonException("The symbol_table must contain text."); + } + state = State.ON_SYMBOL; + break; + case ON_SYMBOL: + if (valueUnavailable()) { + return; + } + newSymbols.add(stringValue()); + state = State.IN_SYMBOL_TABLE_LIST; + break; + case IN_MACRO_TABLE_SEXP: + event = coreNextValue(); + if (event == Event.NEEDS_DATA) { + return; + } + if (event == Event.END_CONTAINER) { + stepOutOfSexpWithinEncodingDirective(); + break; + } + classifyMacroTable(); + break; + case IN_APPENDED_MACRO_TABLE: + event = coreNextValue(); + if (Event.NEEDS_DATA == event) { + return; + } + isMacroTableAppend = true; + if (event == Event.END_CONTAINER) { + // Nothing to append + stepOutOfSexpWithinEncodingDirective(); + break; + } if (getEncodingType() != IonType.SEXP) { + throw new IonException("macro_table s-expression must contain s-expressions."); + } + state = State.ON_MACRO_SEXP; + break; + case ON_MACRO_SEXP: + if (valueUnavailable()) { + return; + } + state = State.COMPILING_MACRO; + Macro newMacro = macroCompiler.compileMacro(); + newMacros.put(MacroRef.byId(++localMacroMaxOffset), newMacro); + String macroName = macroCompiler.getMacroName(); + if (macroName != null) { + newMacros.put(MacroRef.byName(macroName), newMacro); + } + state = State.IN_MACRO_TABLE_SEXP; + break; + default: + throw new IllegalStateException(state.toString()); + } + } + } + + void resetState() { + isSymbolTableAppend = false; + isSymbolTableAlreadyClassified = false; + newSymbols.clear(); + isMacroTableAppend = false; + isMacroTableAlreadyClassified = false; + newMacros.clear(); + } + } + + /** + * The reader's state. `READING_VALUE` indicates that the reader is reading a raw value; all other states + * indicate that the reader is in the middle of reading an encoding directive. + */ + private enum State { + ON_DIRECTIVE_SEXP, + IN_DIRECTIVE_SEXP, + IN_MODULE_DIRECTIVE_SEXP_AWAITING_MODULE_NAME, + IN_MODULE_DIRECTIVE_SEXP_BODY, + ON_SEXP_IN_MODULE_DIRECTIVE, + IN_SEXP_IN_MODULE_DIRECTIVE, + CLASSIFYING_SEXP_IN_MODULE_DIRECTIVE, + IN_SYMBOL_TABLE_SEXP, + IN_APPENDED_SYMBOL_TABLE, + ON_SYMBOL_TABLE_LIST, + IN_SYMBOL_TABLE_LIST, + ON_SYMBOL, + IN_MACRO_TABLE_SEXP, + IN_APPENDED_MACRO_TABLE, + ON_MACRO_SEXP, + COMPILING_MACRO, + READING_VALUE, + } + + // The current state. + private State state = State.READING_VALUE; + + /** + * Reads macro invocation arguments as expressions and feeds them to the MacroEvaluator. + */ + private class BinaryEExpressionArgsReader extends EExpressionArgsReader { + + BinaryEExpressionArgsReader() { + super (readerAdapter); + } + + /** + * Reads a single (non-grouped) expression. + * @param parameter the parameter. + * @param expressions receives the expressions as they are materialized. + */ + private void readSingleExpression(Macro.Parameter parameter, List expressions) { + Macro.ParameterEncoding encoding = parameter.getType(); + if (encoding == Macro.ParameterEncoding.Tagged) { + IonReaderContinuableCoreBinary.super.nextValue(); + } else { + nextTaglessValue(encoding.taglessEncodingKind); + } + if (event == Event.NEEDS_DATA) { + throw new UnsupportedOperationException("TODO: support continuable parsing of macro arguments."); + } + readValueAsExpression(false, expressions); + } + + /** + * Reads a group expression. + * @param parameter the parameter. + * @param expressions receives the expressions as they are materialized. + */ + private void readGroupExpression(Macro.Parameter parameter, List expressions, boolean requireSingleton) { + Macro.ParameterEncoding encoding = parameter.getType(); + if (encoding == Macro.ParameterEncoding.Tagged) { + enterTaggedArgumentGroup(); + } else { + enterTaglessArgumentGroup(encoding.taglessEncodingKind); + } + if (event == Event.NEEDS_DATA) { + throw new UnsupportedOperationException("TODO: support continuable parsing of macro arguments."); + } + int startIndex = expressions.size(); + expressions.add(Expression.Placeholder.INSTANCE); + boolean isSingleton = true; + while (nextGroupedValue() != Event.NEEDS_INSTRUCTION || isMacroInvocation()) { + readValueAsExpression(false, expressions); + isSingleton = false; + } + if (requireSingleton && !isSingleton) { + throw new IonException(String.format( + "Parameter %s with cardinality %s must not contain multiple expressions.", + parameter.getVariableName(), + parameter.getCardinality().name()) + ); + } + if (exitArgumentGroup() == Event.NEEDS_DATA) { + throw new UnsupportedOperationException("TODO: support continuable parsing of macro arguments."); + } + expressions.set(startIndex, new Expression.ExpressionGroup(startIndex, expressions.size())); + } + + /** + * Adds an expression that conveys that the parameter was not present (void). + * @param expressions receives the expressions as they are materialized. + */ + private void addVoidExpression(List expressions) { + int startIndex = expressions.size(); + expressions.add(new Expression.ExpressionGroup(startIndex, startIndex + 1)); + } + + @Override + protected void readParameter(Macro.Parameter parameter, long parameterPresence, List expressions, boolean isTrailing) { + switch (parameter.getCardinality()) { + case ZeroOrOne: + if (parameterPresence == PresenceBitmap.EXPRESSION) { + readSingleExpression(parameter, expressions); + } else if (parameterPresence == PresenceBitmap.VOID) { + addVoidExpression(expressions); + } else if (parameterPresence == PresenceBitmap.GROUP) { + readGroupExpression(parameter, expressions, true); + } else { + throw new IllegalStateException("Unreachable: presence bitmap validated but reserved bits found."); + } + break; + case ExactlyOne: + // TODO determine if a group with a single element is valid here. + readSingleExpression(parameter, expressions); + break; + case OneOrMore: + if (parameterPresence == PresenceBitmap.EXPRESSION) { + readSingleExpression(parameter, expressions); + } else if (parameterPresence == PresenceBitmap.GROUP) { + readGroupExpression(parameter, expressions, false); + } else { + throw new IonException(String.format( + "Invalid void argument for non-voidable parameter: %s", + parameter.getVariableName()) + ); + } + break; + case ZeroOrMore: + if (parameterPresence == PresenceBitmap.EXPRESSION) { + readSingleExpression(parameter, expressions); + } else if (parameterPresence == PresenceBitmap.GROUP) { + readGroupExpression(parameter, expressions, false); + } else if (parameterPresence == PresenceBitmap.VOID) { + addVoidExpression(expressions); + } else { + throw new IllegalStateException("Unreachable: presence bitmap validated but reserved bits found."); + } + break; + } + } + + @Override + protected Macro loadMacro() { + Macro macro; + long id = getMacroInvocationId(); + if (isSystemInvocation()) { + macro = SystemMacro.get((int) id); + if (macro == null) { + throw new UnsupportedOperationException("System macro " + id + " not yet supported."); + } + } else { + if (id > Integer.MAX_VALUE) { + throw new IonException("Macro addresses larger than 2147483647 are not supported by this implementation."); + } + MacroRef address = MacroRef.byId((int) id); + macro = encodingContext.getMacroTable().get(address); + + if (macro == null) { + throw new IonException(String.format("Encountered an unknown macro address: %d.", id)); + } + } + return macro; + } + + @Override + protected PresenceBitmap loadPresenceBitmapIfNecessary(List signature) { + PresenceBitmap presenceBitmap = new PresenceBitmap(); + presenceBitmap.initialize(signature); + if (presenceBitmap.getByteSize() > 0) { + if (fillArgumentEncodingBitmap(presenceBitmap.getByteSize()) == IonCursor.Event.NEEDS_DATA) { + throw new UnsupportedOperationException("TODO: support continuable parsing of AEBs."); + } + presenceBitmap.readFrom(buffer, (int) valueMarker.startIndex); + presenceBitmap.validate(); + } + return presenceBitmap; + } + + @Override + protected boolean isMacroInvocation() { + return valueTid != null && valueTid.isMacroInvocation; + } + + @Override + protected boolean isContainerAnExpressionGroup() { + // In binary, expression groups denoted by the AEB, not using container syntax. + return false; + } + + @Override + protected List getAnnotations() { + if (!hasAnnotations) { + return Collections.emptyList(); + } + List out = new ArrayList<>(); + consumeAnnotationTokens(out::add); + return out; + } + + @Override + protected boolean nextRaw() { + return IonReaderContinuableCoreBinary.super.nextValue() != Event.END_CONTAINER; + } + + @Override + protected void stepInRaw() { + IonReaderContinuableCoreBinary.super.stepIntoContainer(); + } + + @Override + protected void stepOutRaw() { + IonReaderContinuableCoreBinary.super.stepOutOfContainer(); + } + + @Override + protected void stepIntoEExpression() { + IonReaderContinuableCoreBinary.super.stepIntoEExpression(); + } + + @Override + protected void stepOutOfEExpression() { + IonReaderContinuableCoreBinary.super.stepOutOfEExpression(); + } + } + + /** + * @return true if current value has a sequence of annotations that begins with `$ion_symbol_table`; otherwise, + * false. + */ + protected boolean startsWithIonSymbolTable() { + if (minorVersion == 0 && annotationSequenceMarker.startIndex >= 0) { + long savedPeekIndex = peekIndex; + peekIndex = annotationSequenceMarker.startIndex; + int sid = readVarUInt_1_0(); + peekIndex = savedPeekIndex; + return ION_SYMBOL_TABLE_SID == sid; + } else if (minorVersion == 1) { + Marker marker = annotationTokenMarkers.get(0); + return matchesSystemSymbol_1_1(marker, SystemSymbols_1_1.ION_SYMBOL_TABLE); + } + return false; + } + + /** + * @return true if the reader is positioned on a symbol table; otherwise, false. + */ + protected boolean isPositionedOnSymbolTable() { + return hasAnnotations && + getEncodingType() == IonType.STRUCT && + startsWithIonSymbolTable(); + } + + /** + * Consumes the next value (if any) from the MacroEvaluator, setting `event` based on the result. + * @return true if evaluation of the current invocation has completed; otherwise, false. + */ + private boolean evaluateNext() { + IonType type = macroEvaluatorIonReader.next(); + if (type == null) { + if (macroEvaluatorIonReader.getDepth() == 0) { + // Evaluation of this macro is complete. Resume reading from the stream. + isEvaluatingEExpression = false; + event = Event.NEEDS_INSTRUCTION; + return true; + } else { + event = Event.END_CONTAINER; + } + } else { + if (IonType.isContainer(type)) { + event = Event.START_CONTAINER; + } else { + event = Event.START_SCALAR; + } + } + return false; + } + + @Override + public void transcodeAllTo(MacroAwareIonWriter writer) throws IOException { + prepareTranscodeTo(writer); + while (transcodeNext()); + } + + @Override + public void prepareTranscodeTo(MacroAwareIonWriter writer) { + registerIvmNotificationConsumer((major, minor) -> { + resetEncodingContext(); + // Which IVM to write is inherent to the writer implementation. + // We don't have a single implementation that writes both formats. + writer.startEncodingSegmentWithIonVersionMarker(); + }); + macroAwareTranscoder = writer; + } + + @Override + public boolean transcodeNext() throws IOException { + if (macroAwareTranscoder == null) { + throw new IllegalArgumentException("prepareTranscodeTo must be called before transcodeNext."); + } + // NOTE: this method is structured very similarly to nextValue(). During performance analysis, we should + // see if the methods can be unified without sacrificing hot path performance. Performance of this method + // is not considered critical. + lobBytesRead = 0; + while (true) { + if (parent == null || state != State.READING_VALUE) { + boolean isEncodingDirective = false; + if (state != State.READING_VALUE && state != State.COMPILING_MACRO) { + boolean isEncodingDirectiveFromEExpression = isEvaluatingEExpression; + encodingDirectiveReader.readEncodingDirective(); + if (state != State.READING_VALUE) { + throw new IonException("Unexpected EOF when writing encoding-level value."); + } + // If the encoding directive was expanded from an e-expression, that expression has already been + // written. In that case, just make sure the writer is using the new context. Otherwise, also write + // the encoding directive. + macroAwareTranscoder.startEncodingSegmentWithEncodingDirective( + encodingDirectiveReader.newMacros, + encodingDirectiveReader.isMacroTableAppend, + encodingDirectiveReader.newSymbols, + encodingDirectiveReader.isSymbolTableAppend, + isEncodingDirectiveFromEExpression + ); + isEncodingDirective = true; + } + if (isEvaluatingEExpression) { + if (evaluateNext()) { + if (isEncodingDirective) { + continue; + } + // This is the end of a top-level macro invocation that expanded to a user value. + return true; + } + } else { + event = super.nextValue(); + } + if (minorVersion == 1 && parent == null && isPositionedOnEncodingDirective()) { + encodingDirectiveReader.resetState(); + state = State.ON_DIRECTIVE_SEXP; + continue; + } + } else if (isEvaluatingEExpression) { + if (evaluateNext()) { + // This is the end of a contained macro invocation; continue iterating through the parent container. + continue; + } + } else { + event = super.nextValue(); + } + if (valueTid != null && valueTid.isMacroInvocation) { + expressionArgsReader.beginEvaluatingMacroInvocation(macroEvaluator); + macroEvaluatorIonReader.transcodeArgumentsTo(macroAwareTranscoder); + isEvaluatingEExpression = true; + if (evaluateNext()) { + // This macro invocation expands to nothing; continue iterating until a user value is found. + continue; + } + if (parent == null && isPositionedOnEvaluatedEncodingDirective()) { + encodingDirectiveReader.resetState(); + state = State.ON_DIRECTIVE_SEXP; + continue; + } + } + if (isEvaluatingEExpression) { + // EExpressions are not expanded and provided to the writer; only the raw encoding is transferred. + continue; + } + break; + } + if (event == Event.NEEDS_DATA || event == Event.END_CONTAINER) { + return false; + } + transcodeValueLiteral(); + return true; + } + + /** + * Transcodes a value literal to the macroAwareTranscoder. The caller must ensure that the reader is positioned + * on a value literal (i.e. a scalar or container value not expanded from an e-expression) before calling this + * method. + * @throws IOException if thrown by the writer during transcoding. + */ + private void transcodeValueLiteral() throws IOException { + if (parent == null && isPositionedOnSymbolTable()) { + if (minorVersion > 0) { + // TODO finalize handling of Ion 1.0-style symbol tables in Ion 1.1: https://github.com/amazon-ion/ion-java/issues/1002 + throw new IonException("Macro-aware transcoding of Ion 1.1 data containing Ion 1.0-style symbol tables not yet supported."); + } + // Ion 1.0 symbol tables are transcoded verbatim for now; this may change depending on the resolution to + // https://github.com/amazon-ion/ion-java/issues/1002. + macroAwareTranscoder.writeValue(asIonReader); + } else if (event == Event.START_CONTAINER && !isNullValue()) { + // Containers need to be transcoded recursively to avoid expanding macro invocations at any depth. + if (isInStruct()) { + macroAwareTranscoder.setFieldNameSymbol(getFieldNameSymbol()); + } + macroAwareTranscoder.setTypeAnnotationSymbols(asIonReader.getTypeAnnotationSymbols()); + macroAwareTranscoder.stepIn(getEncodingType()); + super.stepIntoContainer(); + while (transcodeNext()); // TODO make this iterative. + super.stepOutOfContainer(); + macroAwareTranscoder.stepOut(); + } else { + // The reader is now positioned on a scalar literal. Write the value. + // Note: writeValue will include any field name and/or annotations on the scalar. + macroAwareTranscoder.writeValue(asIonReader); + } + } + + @Override + public Event nextValue() { + lobBytesRead = 0; + while (true) { + if (parent == null || state != State.READING_VALUE) { + if (state != State.READING_VALUE && state != State.COMPILING_MACRO) { + encodingDirectiveReader.readEncodingDirective(); + if (state != State.READING_VALUE) { + event = Event.NEEDS_DATA; + break; + } + } + if (isEvaluatingEExpression) { + if (evaluateNext()) { + continue; + } + } else { + event = super.nextValue(); + } + if (minorVersion == 1 && parent == null && isPositionedOnEncodingDirective()) { + encodingDirectiveReader.resetState(); + state = State.ON_DIRECTIVE_SEXP; + continue; + } + } else if (isEvaluatingEExpression) { + if (evaluateNext()) { + continue; + } + } else { + event = super.nextValue(); + } + if (valueTid != null && valueTid.isMacroInvocation) { + if (encodingContext == EncodingContext.getDefault() && !isSystemInvocation()) { + // If the macro evaluator is null, it means there is no active macro table. Do not attempt evaluation, + // but allow the user to do a raw read of the parameters if this is a core-level reader. + // TODO this is used in the tests for the core binary reader. If it cannot be useful elsewhere, remove + // and refactor the tests. + if (this instanceof IonReaderContinuableApplicationBinary) { + throw new IonException("The user-level binary reader encountered a macro invocation without an active macro table."); + } + } else { + expressionArgsReader.beginEvaluatingMacroInvocation(macroEvaluator); + isEvaluatingEExpression = true; + if (evaluateNext()) { + continue; + } + if (parent == null && isPositionedOnEvaluatedEncodingDirective()) { + encodingDirectiveReader.resetState(); + state = State.ON_DIRECTIVE_SEXP; + continue; + } + } + } + break; + } + return event; + } + + @Override + public Event fillValue() { + if (isEvaluatingEExpression) { + event = Event.VALUE_READY; + return event; + } + return super.fillValue(); + } + + @Override + public Event stepIntoContainer() { + if (isEvaluatingEExpression) { + macroEvaluatorIonReader.stepIn(); + event = Event.NEEDS_INSTRUCTION; + return event; + } + return super.stepIntoContainer(); + } + + @Override + public Event stepOutOfContainer() { + if (isEvaluatingEExpression) { + macroEvaluatorIonReader.stepOut(); + event = Event.NEEDS_INSTRUCTION; + return event; + } + return super.stepOutOfContainer(); + } + + /** + * Prepares the ByteBuffer to wrap a slice of the underlying buffer. + * @param startIndex the start of the slice. + * @param endIndex the end of the slice. + * @return the ByteBuffer. + */ + ByteBuffer prepareByteBuffer(long startIndex, long endIndex) { + // Setting the limit to the capacity first is required because setting the position will fail if the new + // position is outside the limit. + byteBuffer.limit(buffer.length); + byteBuffer.position((int) startIndex); + byteBuffer.limit((int) endIndex); + return byteBuffer; + } + + + /** + * Reads a UInt (big-endian). + * @param startIndex the index of the first byte in the UInt value. + * @param endIndex the index of the first byte after the end of the UInt value. + * @return the value. + */ + private long readUInt(long startIndex, long endIndex) { + long result = 0; + for (long i = startIndex; i < endIndex; i++) { + result = (result << VALUE_BITS_PER_UINT_BYTE) | buffer[(int) i] & SINGLE_BYTE_MASK; + } + return result; + } + + /** + * Reads a FixedUInt (little-endian), for the range of bytes given by `startInclusive` and `endExclusive`. + * @return the value. + */ + private long readFixedUInt_1_1(long startInclusive, long endExclusive) { + long result = 0; + for (int i = (int) startInclusive; i < endExclusive; i++) { + result |= ((long) (buffer[i] & SINGLE_BYTE_MASK) << ((i - startInclusive) * VALUE_BITS_PER_UINT_BYTE)); + } + return result; + } + + @Override + public boolean isNullValue() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.isNullValue(); + } + return valueTid != null && valueTid.isNull; + } + + /** + * Performs any logic necessary to prepare a scalar value for parsing. Subclasses may wish to provide additional + * logic, such as ensuring that the value is present in the buffer. + */ + void prepareScalar() { + if (valueMarker.endIndex > limit) { + throw new IonException("Malformed data: declared length exceeds the number of bytes remaining in the stream."); + } + } + + /** + * Determines whether the tagless integer starting at `valueMarker.startIndex` and ending at `valueMarker.endIndex` + * crosses a type boundary. Callers must only invoke this method when the integer's size is known to be either + * 4 or 8 bytes. + * @return true if the value fits in the Java integer type that matches its Ion serialized size; false if it + * requires the next larger size. + */ + private boolean classifyFixedWidthTaglessInteger_1_1() { + if (!taglessType.isUnsigned || taglessType.typeID.variableLength) { + return true; + } + // UInt values with the most significant bit set will not fit in the signed Java primitive of the same width. + return buffer[(int) valueMarker.endIndex - 1] >= 0; + } + + /** + * Selects and returns the size of the current integer value from the given options. Callers must only invoke this + * method when the integer's size is known to be either 4 or 8 bytes, and it is the caller's responsibility to + * provide correct values to 'smaller' and 'larger'. + * @param smaller the smaller of the possible sizes. + * @param larger the larger of the possible sizes. + * @return the matching size. + */ + private IntegerSize classifyFixedWidthInteger(IntegerSize smaller, IntegerSize larger) { + if (minorVersion == 0) { + return classifyInteger_1_0() ? smaller : larger; + } + if (taglessType == null) { + return smaller; + } + return classifyFixedWidthTaglessInteger_1_1() ? smaller : larger; + } + + // The maximum most-significant byte of a positive 5-byte FlexUInt or FlexUInt value that can fit in + // a Java int. Integer.MAX_VALUE is 0x7FFFFFFF and a 5-byte Flex integer requires a right-shift of 5 bits. + // 0x0FFF... >> 5 == 0x007F..., so all less significant byte values are guaranteed to fit and therefore do not + // need to be examined individually. + private static final int MAX_POSITIVE_FLEX_MSB_JAVA_INT = 0x0F; + + // The maximum most-significant byte of a positive 10-byte FlexUInt or FlexUInt value that can fit in + // a Java long. Long.MAX_VALUE is 0x7FFFFFFFFFFFFFFF and a 10-byte Flex integer requires a right-shift of 10 bits. + // 0x01FFFF... >> 10 == 0x00007F..., so all less significant byte values are guaranteed to fit and therefore do not + // need to be examined individually. + private static final int MAX_POSITIVE_FLEX_MSB_JAVA_LONG = 0x01; + + // The minimum most-significant byte of a negative 5-byte FlexInt with that can fit in a Java int. + // Integer.MIN_VALUE is 0x80000000 and a 5-byte FlexInt requires a right-shift of 5 bits. + // (int)(0xF000... >> 5) == 0x80... Any bits set in the less significant bytes would lessen the magnitude + // and therefore do not need to be examined individually. + private static final int MIN_NEGATIVE_FLEX_MSB_JAVA_INT = (byte) 0xF0; + + // The minimum most-significant byte of a negative 10-byte FlexInt with that can fit in a Java long. + // Long.MIN_VALUE is 0x8000000000000000 and a 10-byte FlexInt requires a right-shift of 10 bits. + // (long) (0xFE0000... >> 10) == 0x80... Any bits set in the less significant bytes would lessen the magnitude + // and therefore do not need to be examined individually. + private static final int MIN_NEGATIVE_FLEX_MSB_JAVA_LONG = (byte) 0xFE; + + /** + * Classifies a 5- or 10-byte FlexInt or FlexUInt according the Java integer size required to represent it without + * data loss. + * @param maxPositiveMsb the maximum most-significant byte of a positive encoded integer that would allow the + * value to fit in the smaller of the two Java types applicable to the relevant boundary. + * @param minNegativeMsb the minimum most-significant byte of a negative encoded integer that would allow the + * value to fit in the smaller of the two Java types applicable to the relevant boundary. + * @return true if the encoded value fits in the smaller of the two Java types applicable to the relevant boundary; + * otherwise, false. + */ + private boolean classifyVariableWidthTaglessIntegerAtBoundary_1_1(int maxPositiveMsb, int minNegativeMsb) { + int mostSignificantByte = buffer[(int) valueMarker.endIndex - 1]; + if (taglessType.isUnsigned) { + return (mostSignificantByte & SINGLE_BYTE_MASK) <= maxPositiveMsb; + } + return mostSignificantByte >= minNegativeMsb && mostSignificantByte <= maxPositiveMsb; + } + + /** + * Classifies the current variable-length integer (FlexInt or FlexUInt) according to the IntegerSize required to + * represent it without data loss. For efficiency, does not attempt to find the smallest-possible size for + * overpadded representations. + * @param length the byte length of the FlexInt or FlexUInt to classify. + * @return an IntegerSize capable of holding the value without data loss. + */ + private IntegerSize classifyVariableWidthTaglessInteger_1_1(int length) { + if (length < 5) { + // Flex integers of less than 5 bytes cannot hit the Java int boundaries. + return IntegerSize.INT; + } + if (length == 5) { + return classifyVariableWidthTaglessIntegerAtBoundary_1_1(MAX_POSITIVE_FLEX_MSB_JAVA_INT, MIN_NEGATIVE_FLEX_MSB_JAVA_INT) + ? IntegerSize.INT + : IntegerSize.LONG; + } + if (length < 10) { + // Flex integers of less than 10 bytes cannot hit the Java long boundaries. + return IntegerSize.LONG; + } + if (length == 10) { + return classifyVariableWidthTaglessIntegerAtBoundary_1_1(MAX_POSITIVE_FLEX_MSB_JAVA_LONG, MIN_NEGATIVE_FLEX_MSB_JAVA_LONG) + ? IntegerSize.LONG + : IntegerSize.BIG_INTEGER; + } + return IntegerSize.BIG_INTEGER; + } + + @Override + public IntegerSize getIntegerSize() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.getIntegerSize(); + } + if (valueTid == null || valueTid.type != IonType.INT || valueTid.isNull) { + return null; + } + prepareScalar(); + int length; + if (valueTid.variableLength) { + length = (int) (valueMarker.endIndex - valueMarker.startIndex); + if (taglessType != null) { + // FlexUInt or FlexInt + return classifyVariableWidthTaglessInteger_1_1(length); + } + } else { + length = valueTid.length; + } + if (length < 0) { + return IntegerSize.BIG_INTEGER; + } else if (length < INT_SIZE_IN_BYTES) { + return IntegerSize.INT; + } else if (length == INT_SIZE_IN_BYTES) { + return classifyFixedWidthInteger(IntegerSize.INT, IntegerSize.LONG); + } else if (length < LONG_SIZE_IN_BYTES) { + return IntegerSize.LONG; + } else if (length == LONG_SIZE_IN_BYTES) { + return classifyFixedWidthInteger(IntegerSize.LONG, IntegerSize.BIG_INTEGER); + } + return IntegerSize.BIG_INTEGER; + } + + private void throwDueToInvalidType(IonType type) { + throw new IllegalStateException( + String.format("Invalid type. Required %s but found %s.", type, valueTid == null ? null : valueTid.type) + ); + } + + @Override + public int byteSize() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.byteSize(); + } + if (valueTid == null || !IonType.isLob(valueTid.type) || valueTid.isNull) { + throw new IonException("Reader must be positioned on a blob or clob."); + } + prepareScalar(); + return (int) (valueMarker.endIndex - valueMarker.startIndex); + } + + @Override + public byte[] newBytes() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.newBytes(); + } + byte[] bytes = new byte[byteSize()]; + // The correct number of bytes will be requested from the buffer, so the limit is set at the capacity to + // avoid having to calculate a limit. + System.arraycopy(buffer, (int) valueMarker.startIndex, bytes, 0, bytes.length); + return bytes; + } + + @Override + public int getBytes(byte[] bytes, int offset, int len) { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.getBytes(bytes, offset, len); + } + int length = Math.min(len, byteSize() - lobBytesRead); + // The correct number of bytes will be requested from the buffer, so the limit is set at the capacity to + // avoid having to calculate a limit. + System.arraycopy(buffer, (int) (valueMarker.startIndex + lobBytesRead), bytes, offset, length); + lobBytesRead += length; + return length; + } + + @Override + public void resetEncodingContext() { + resetSymbolTable(); + int minorVersion = getIonMinorVersion(); + resetImports(getIonMajorVersion(), minorVersion); + if (minorVersion > 0) { + // TODO reset macro table + installSymbols(SystemSymbols_1_1.allSymbolTexts()); + } + } + + /** + * Loads the scalar converter with an integer value that fits the Ion int on which the reader is positioned. + */ + private void prepareToConvertIntValue() { + if (getIntegerSize() == IntegerSize.BIG_INTEGER) { + scalarConverter.addValue(bigIntegerValue()); + scalarConverter.setAuthoritativeType(_Private_ScalarConversions.AS_TYPE.bigInteger_value); + } else { + scalarConverter.addValue(longValue()); + scalarConverter.setAuthoritativeType(_Private_ScalarConversions.AS_TYPE.long_value); + } + } + + @Override + public BigDecimal bigDecimalValue() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.bigDecimalValue(); + } + BigDecimal value = null; + if (valueTid.type == IonType.DECIMAL) { + if (valueTid.isNull) { + return null; + } + prepareScalar(); + peekIndex = valueMarker.startIndex; + if (peekIndex >= valueMarker.endIndex) { + value = BigDecimal.ZERO; + } else { + value = minorVersion == 0 ? readBigDecimal_1_0() : readBigDecimal_1_1(); + } + } else if (valueTid.type == IonType.INT) { + if (valueTid.isNull) { + return null; + } + prepareToConvertIntValue(); + scalarConverter.cast(scalarConverter.get_conversion_fnid(_Private_ScalarConversions.AS_TYPE.decimal_value)); + value = scalarConverter.getBigDecimal(); + scalarConverter.clear(); + } else if (valueTid.type == IonType.FLOAT) { + scalarConverter.addValue(doubleValue()); + scalarConverter.setAuthoritativeType(_Private_ScalarConversions.AS_TYPE.double_value); + scalarConverter.cast(scalarConverter.get_conversion_fnid(_Private_ScalarConversions.AS_TYPE.decimal_value)); + value = scalarConverter.getDecimal(); + scalarConverter.clear(); + } else { + throwDueToInvalidType(IonType.DECIMAL); + } + return value; + } + + @Override + public Decimal decimalValue() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.decimalValue(); + } + Decimal value = null; + if (valueTid.type == IonType.DECIMAL) { if (valueTid.isNull) { return null; } @@ -641,6 +2402,9 @@ public Decimal decimalValue() { @Override public long longValue() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.longValue(); + } long value; if (valueTid.isNull) { throwDueToInvalidType(IonType.INT); @@ -671,6 +2435,9 @@ public long longValue() { @Override public BigInteger bigIntegerValue() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.bigIntegerValue(); + } BigInteger value; if (valueTid.type == IonType.INT) { if (valueTid.isNull) { @@ -704,8 +2471,54 @@ public int intValue() { return (int) longValue(); } + // IEEE-754 half-precision (s=sign, e=exponent, f=fraction): seee_eeff_ffff_ffff + private static final int FLOAT_16_SIGN_MASK = 0b1000_0000_0000_0000; + private static final int FLOAT_16_EXPONENT_MASK = 0b0111_1100_0000_0000; + private static final int FLOAT_16_FRACTION_MASK = 0b0000_0011_1111_1111; + + // float64 bias: 1023; float16 bias: 15. Shift left to align with the masked exponent bits. + private static final int FLOAT_16_TO_64_EXPONENT_BIAS_CONVERSION = (1023 - 15) << Integer.numberOfTrailingZeros(FLOAT_16_EXPONENT_MASK); + // The float16 sign bit has bit index 15; the float64 sign bit has bit index 63. + private static final int FLOAT_16_TO_64_SIGN_SHIFT = 63 - 15; + // The 5 float16 exponent bits start at index 10; the 11 float64 exponent bits start at index 52. + private static final int FLOAT_16_TO_64_EXPONENT_SHIFT = 52 - 10; + // The most significant float16 fraction bit is at index 9; the most significant float64 fraction bit is at index 51. + private static final int FLOAT_16_TO_64_FRACTION_SHIFT = 51 - 9; + + /** + * Reads the next two bytes from the given ByteBuffer as a 16-bit float, returning the value as a Java double. + * @param byteBuffer a buffer positioned at the first byte of the 16-bit float. + * @return the value. + */ + private static double readFloat16(ByteBuffer byteBuffer) { + int bits = byteBuffer.getShort() & TWO_BYTE_MASK; + int sign = bits & FLOAT_16_SIGN_MASK; + int exponent = bits & FLOAT_16_EXPONENT_MASK; + int fraction = bits & FLOAT_16_FRACTION_MASK; + if (exponent == 0) { + if (fraction == 0) { + return sign == 0 ? 0e0 : -0e0; + } + // Denormalized + throw new UnsupportedOperationException("Support for denormalized half-precision floats not yet added."); + } else if ((exponent ^ FLOAT_16_EXPONENT_MASK) == 0) { + if (fraction == 0) { + return sign == 0 ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; + } + return Double.NaN; + } + return Double.longBitsToDouble( + ((long) sign << FLOAT_16_TO_64_SIGN_SHIFT) + | ((long) (exponent + FLOAT_16_TO_64_EXPONENT_BIAS_CONVERSION) << FLOAT_16_TO_64_EXPONENT_SHIFT) + | ((long) fraction << FLOAT_16_TO_64_FRACTION_SHIFT) + ); + } + @Override public double doubleValue() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.doubleValue(); + } double value; if (valueTid.isNull) { throwDueToInvalidType(IonType.FLOAT); @@ -717,7 +2530,12 @@ public double doubleValue() { return 0.0d; } ByteBuffer bytes = prepareByteBuffer(valueMarker.startIndex, valueMarker.endIndex); - if (length == FLOAT_32_BYTE_LENGTH) { + if (length == FLOAT_16_BYTE_LENGTH) { + if (minorVersion == 0) { + throw new IonException("Ion 1.0 floats may may only have length 0, 4, or 8."); + } + value = readFloat16(bytes); + } else if (length == FLOAT_32_BYTE_LENGTH) { value = bytes.getFloat(); } else { // Note: there is no need to check for other lengths here; the type ID byte is validated during next(). @@ -742,6 +2560,9 @@ public double doubleValue() { @Override public Timestamp timestampValue() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.timestampValue(); + } if (valueTid == null || IonType.TIMESTAMP != valueTid.type) { throwDueToInvalidType(IonType.TIMESTAMP); } @@ -767,6 +2588,9 @@ public Date dateValue() { @Override public boolean booleanValue() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.booleanValue(); + } if (valueTid == null || IonType.BOOL != valueTid.type || valueTid.isNull) { throwDueToInvalidType(IonType.BOOL); } @@ -774,10 +2598,13 @@ public boolean booleanValue() { return minorVersion == 0 ? readBoolean_1_0() : readBoolean_1_1(); } - @Override - public String stringValue() { - if (valueTid == null || IonType.STRING != valueTid.type) { - throwDueToInvalidType(IonType.STRING); + /** + * Decodes the UTF-8 bytes between `valueMarker.startIndex` and `valueMarker.endIndex` into a String. + * @return the value. + */ + String readString() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.stringValue(); } if (valueTid.isNull) { return null; @@ -787,8 +2614,64 @@ public String stringValue() { return utf8Decoder.decode(utf8InputBuffer, (int) (valueMarker.endIndex - valueMarker.startIndex)); } + @Override + public String stringValue() { + String value; + IonType type = getEncodingType(); + if (type == IonType.STRING || isEvaluatingEExpression) { + value = readString(); + } else if (type == IonType.SYMBOL) { + if (valueTid.isInlineable) { + value = readString(); + } else if (valueTid == IonTypeID.SYSTEM_SYMBOL_VALUE) { + value = getSymbolText(); + } else { + int sid = symbolValueId(); + if (sid < 0) { + // The raw reader uses this to denote null.symbol. + return null; + } + value = getSymbol(sid); + if (value == null) { + throw new UnknownSymbolException(sid); + } + } + } else { + throw new IllegalStateException("Invalid type requested."); + } + return value; + } + + @Override + public boolean hasSymbolText() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.getType() == IonType.SYMBOL && !macroEvaluatorIonReader.isNullValue(); + } + if (valueTid == null || IonType.SYMBOL != valueTid.type) { + return false; + } + return valueTid.isInlineable || valueTid == IonTypeID.SYSTEM_SYMBOL_VALUE; + } + + @Override + public String getSymbolText() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.symbolValue().assumeText(); + } + if (valueMarker.typeId == IonTypeID.SYSTEM_SYMBOL_VALUE) { + return getSystemSymbolToken(valueMarker).getText(); + } + return readString(); + } + @Override public int symbolValueId() { + if (isEvaluatingEExpression) { + if (macroEvaluatorIonReader.getType() != IonType.SYMBOL || macroEvaluatorIonReader.isNullValue()) { + throwDueToInvalidType(IonType.SYMBOL); + } + return macroEvaluatorIonReader.symbolValue().getSid(); + } if (valueTid == null || IonType.SYMBOL != valueTid.type) { throwDueToInvalidType(IonType.SYMBOL); } @@ -796,7 +2679,26 @@ public int symbolValueId() { return -1; } prepareScalar(); - return (int) readUInt(valueMarker.startIndex, valueMarker.endIndex); + if (minorVersion == 0) { + return (int) readUInt(valueMarker.startIndex, valueMarker.endIndex); + } else { + if (taglessType != null) { + // It is the caller's responsibility to call 'symbolValueId()' only when 'hasSymbolText()' is false, + // meaning that the tagless FlexSym is encoded as a FlexInt representing a symbol ID. + peekIndex = valueMarker.startIndex; + return (int) readFlexInt_1_1(); + } + if (valueTid.length == 1){ + return (int) readFixedUInt_1_1(valueMarker.startIndex, valueMarker.endIndex); + } else if (valueTid.length == 2){ + return (int) readFixedUInt_1_1(valueMarker.startIndex, valueMarker.endIndex) + 256; + } else if (valueTid.length == -1) { + peekIndex = valueMarker.startIndex; + return (int) readFlexUInt_1_1() + 65792; + } else { + throw new IllegalStateException("Illegal length " + valueTid.length + " for " + valueMarker); + } + } } /** @@ -813,45 +2715,206 @@ IntList getAnnotationSidList() { } } else { while (peekIndex < annotationSequenceMarker.endIndex) { - annotationSids.add(readVarUInt_1_1()); + annotationSids.add((int) readFlexUInt_1_1()); } } peekIndex = savedPeekIndex; return annotationSids; } + /** + * Creates a SymbolToken representation of the given symbol ID. + * @param sid a symbol ID. + * @return a SymbolToken. + */ + protected SymbolToken getSymbolToken(int sid) { + return new SymbolTokenImpl(getSymbol(sid), sid); + } + + protected final SymbolToken getSystemSymbolToken(Marker marker) { + long id; + if (marker.startIndex == -1) { + id = marker.endIndex; + } else { + prepareScalar(); + id = readFixedUInt_1_1(marker.startIndex, marker.endIndex); + + // FIXME: This is a hack that works as long as our system symbol table doesn't grow to + // more than ~95 symbols. We need this hack because when we have to read the FixedInt, + // we don't know whether it's a tagless FlexSym or a Regular value. + // Possible solutions include: + // * changing the spec so that FlexSym System SIDs line up with the regular System SIDs + // * Introducing a dummy IonTypeID that indicates that we need to add the bias + // * Update IonCursorBinary.slowSkipFlexSym_1_1() to put the id into valueMarker.endIndex, + // though that seems to have its own problems. + if (id >= FLEX_SYM_SYSTEM_SYMBOL_OFFSET) { + id = id - FLEX_SYM_SYSTEM_SYMBOL_OFFSET; + } + } + // In some cases, we pretend that $0 is a system symbol, so we must handle it here. + if (id == 0) { + return _Private_Utils.SYMBOL_0; + } + SystemSymbols_1_1 systemSymbol = SystemSymbols_1_1.get((int) id); + if (systemSymbol == null) { + throw new IonException("Unknown system symbol ID: " + id); + } + return systemSymbol.getToken(); + } + @Override - public int[] getAnnotationIds() { - getAnnotationSidList(); - int[] annotationArray = new int[annotationSids.size()]; - for (int i = 0; i < annotationArray.length; i++) { - annotationArray[i] = annotationSids.get(i); + public void consumeAnnotationTokens(Consumer consumer) { + if (annotationSequenceMarker.startIndex >= 0) { + if (annotationSequenceMarker.typeId != null && annotationSequenceMarker.typeId.isInlineable) { + getAnnotationMarkerList(); + } else { + getAnnotationSidList(); + for (int i = 0; i < annotationSids.size(); i++) { + consumer.accept(getSymbolToken(annotationSids.get(i))); + } + } + } + for (int i = 0; i < annotationTokenMarkers.size(); i++) { + Marker marker = annotationTokenMarkers.get(i); + if (marker.startIndex < 0) { + // This means the endIndex represents the token's symbol ID. + if (minorVersion == 1 && marker.typeId == IonTypeID.SYSTEM_SYMBOL_VALUE) { + consumer.accept(getSystemSymbolToken(marker)); + } else { + consumer.accept(getSymbolToken((int) marker.endIndex)); + } + } else { + // The token is inline UTF-8 text. + ByteBuffer utf8InputBuffer = prepareByteBuffer(marker.startIndex, marker.endIndex); + consumer.accept(new SymbolTokenImpl(utf8Decoder.decode(utf8InputBuffer, (int) (marker.endIndex - marker.startIndex)), -1)); + } + } + } + + /** + * Gets the annotation markers for the current value, reading them from the buffer first if necessary. + * @return the annotation markers, or an empty list if the current value is not annotated. + */ + MarkerList getAnnotationMarkerList() { + annotationTokenMarkers.clear(); + long savedPeekIndex = peekIndex; + peekIndex = annotationSequenceMarker.startIndex; + while (peekIndex < annotationSequenceMarker.endIndex) { + Marker provisionalMarker = annotationTokenMarkers.provisionalElement(); + int annotationSid = (int) readFlexSym_1_1(provisionalMarker); + if (annotationSid >= 0) { + provisionalMarker.endIndex = annotationSid; + } else if (provisionalMarker.endIndex < 0) { + break; + } + annotationTokenMarkers.commit(); } - return annotationArray; + peekIndex = savedPeekIndex; + return annotationTokenMarkers; } @Override public int getFieldId() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.getFieldId(); + } return fieldSid; } + @Override + public boolean hasFieldText() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.getFieldName() != null; + } + return fieldTextMarker.startIndex > -1 || fieldTextMarker.typeId == IonTypeID.SYSTEM_SYMBOL_VALUE; + } + + @Override + public String getFieldText() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.getFieldName(); + } + if (fieldTextMarker.typeId == IonTypeID.SYSTEM_SYMBOL_VALUE) { + return getSystemSymbolToken(fieldTextMarker).getText(); + } + ByteBuffer utf8InputBuffer = prepareByteBuffer(fieldTextMarker.startIndex, fieldTextMarker.endIndex); + return utf8Decoder.decode(utf8InputBuffer, (int) (fieldTextMarker.endIndex - fieldTextMarker.startIndex)); + } + + @Override + public SymbolToken getFieldNameSymbol() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.getFieldNameSymbol(); + } + if (fieldTextMarker.startIndex > -1) { + return new SymbolTokenImpl(getFieldText(), SymbolTable.UNKNOWN_SYMBOL_ID); + } + if (fieldTextMarker.typeId == IonTypeID.SYSTEM_SYMBOL_VALUE) { + return getSystemSymbolToken(fieldTextMarker); + } + if (fieldSid < 0) { + return null; + } + return getSymbolToken(fieldSid); + } + + @Override + public SymbolToken symbolValue() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.symbolValue(); + } + if (valueTid == SYSTEM_SYMBOL_VALUE) { + return getSystemSymbolToken(valueMarker); + } + if (valueTid.isInlineable) { + return new SymbolTokenImpl(getSymbolText(), SymbolTable.UNKNOWN_SYMBOL_ID); + } + + int sid = symbolValueId(); + if (sid < 0) { + // The raw reader uses this to denote null.symbol. + return null; + } + return getSymbolToken(sid); + } + @Override public boolean isInStruct() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.isInStruct(); + } return parent != null && parent.typeId.type == IonType.STRUCT; } + @Override + public final IonType getEncodingType() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.getType(); + } + return valueTid == null ? null : valueTid.type; + } + @Override public IonType getType() { + if (isEvaluatingEExpression) { + return macroEvaluatorIonReader.getType(); + } return valueTid == null ? null : valueTid.type; } @Override public int getDepth() { + if (isEvaluatingEExpression) { + return containerIndex + 1 + macroEvaluatorIonReader.getDepth(); + } return containerIndex + 1; } @Override public void close() { + if (macroEvaluatorIonReader != null) { + macroEvaluatorIonReader.close(); + } utf8Decoder.close(); super.close(); } diff --git a/src/main/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinary.java b/src/main/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinary.java index e46118bc0f..5fba999a33 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinary.java @@ -1,6 +1,5 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 - package com.amazon.ion.impl; import com.amazon.ion.IonBufferConfiguration; @@ -219,6 +218,9 @@ void prepareScalar() { if (event == Event.NEEDS_INSTRUCTION) { throw new OversizedValueException(); } + } else { + super.prepareScalar(); + return; } } throw new IonException("Unexpected EOF."); diff --git a/src/main/java/com/amazon/ion/impl/IonReaderNonContinuableSystem.java b/src/main/java/com/amazon/ion/impl/IonReaderNonContinuableSystem.java index 36112fa7bc..c33020dc3e 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderNonContinuableSystem.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderNonContinuableSystem.java @@ -1,6 +1,5 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 - package com.amazon.ion.impl; import com.amazon.ion.Decimal; @@ -19,8 +18,10 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.util.ArrayDeque; +import java.util.ArrayList; import java.util.Date; import java.util.Iterator; +import java.util.List; import java.util.Queue; import static com.amazon.ion.IonCursor.Event.NEEDS_DATA; @@ -34,11 +35,34 @@ */ final class IonReaderNonContinuableSystem implements IonReader { + private static final SymbolToken IVM_1_0 = new SymbolTokenImpl(SystemSymbols.ION_1_0, SystemSymbols.ION_1_0_SID); + private static final SymbolToken IVM_1_1 = new SymbolTokenImpl("$ion_1_1", -1); + + /** + * Represents an IVM that was read that has not yet been exposed as a Symbol value. + */ + private enum PendingIvm { + ION_1_0(IVM_1_0), + ION_1_1(IVM_1_1); + + private final SymbolToken token; + PendingIvm(SymbolToken symbolToken) { + token = symbolToken; + } + + static PendingIvm pendingIvmForVersionOrNull(int major, int minor) { + if (major != 1) return null; + if (minor == 0) return ION_1_0; + if (minor == 1) return ION_1_1; + return null; + } + } + private final IonReaderContinuableCore reader; private IonType type = null; private IonType typeAfterIvm = null; - private final Queue pendingIvmSids = new ArrayDeque<>(1); - private int pendingIvmSid = -1; + private final Queue pendingIvms = new ArrayDeque<>(1); + private PendingIvm pendingIvm = null; /** * Constructs a new non-continuable system-level reader over the given continuable reader. @@ -46,8 +70,13 @@ final class IonReaderNonContinuableSystem implements IonReader { */ IonReaderNonContinuableSystem(IonReaderContinuableCore reader) { this.reader = reader; - reader.registerIvmNotificationConsumer((x, y) -> { - pendingIvmSids.add(SystemSymbols.ION_1_0_SID); // TODO generalize for Ion 1.1 + reader.registerIvmNotificationConsumer((major, minor) -> { + PendingIvm ivm = PendingIvm.pendingIvmForVersionOrNull(major, minor); + if (ivm == null) { + throw new IllegalStateException("The parser should have already thrown upon encountering this illegal IVM."); + } + reader.resetEncodingContext(); + pendingIvms.add(ivm); }); } @@ -62,8 +91,8 @@ public boolean hasNext() { * @return true if a value is ready to be presented to the user; otherwise, false. */ private boolean handleIvm() { - Integer ivmSid = pendingIvmSids.poll(); - if (ivmSid != null) { + PendingIvm nextPendingIvm = pendingIvms.poll(); + if (nextPendingIvm != null) { // An IVM has been found between values. if (typeAfterIvm == null) { // Only save the type of the next user value the first time an IVM is encountered before that value. @@ -71,11 +100,11 @@ private boolean handleIvm() { } // For consistency with the legacy implementation, the system reader surfaces IVMs as symbol values. type = IonType.SYMBOL; - pendingIvmSid = ivmSid; + pendingIvm = nextPendingIvm; return true; - } else if (pendingIvmSid != -1) { + } else if (pendingIvm != null) { // All preceding IVMs have been surfaced. Restore the value that follows. - pendingIvmSid = -1; + pendingIvm = null; type = typeAfterIvm; typeAfterIvm = null; return true; @@ -154,7 +183,7 @@ public IntegerSize getIntegerSize() { @Override public boolean isNullValue() { - return pendingIvmSid == -1 && reader.isNullValue(); + return pendingIvm == null && reader.isNullValue(); } @Override @@ -218,16 +247,20 @@ public Timestamp timestampValue() { @Override public String stringValue() { - if (pendingIvmSid != -1) { - return getSymbolTable().findKnownSymbol(pendingIvmSid); + if (pendingIvm != null) { + return pendingIvm.token.getText(); } prepareScalar(); String value; if (type == IonType.SYMBOL) { - int sid = reader.symbolValueId(); - value = getSymbolTable().findKnownSymbol(sid); - if (value == null) { - throw new UnknownSymbolException(sid); + if (reader.hasSymbolText()) { + value = reader.getSymbolText(); + } else { + int sid = reader.symbolValueId(); + value = getSymbolText(sid); + if (value == null) { + throw new UnknownSymbolException(sid); + } } } else { value = reader.stringValue(); @@ -235,6 +268,19 @@ public String stringValue() { return value; } + /** + * Attempts to match the given symbol ID to text. + * @param sid the symbol ID. + * @return the matching symbol text, or null. + */ + private String getSymbolText(int sid) { + if (reader.getIonMinorVersion() == 0) { + // In Ion 1.0, the system symbol table is always available. + return getSymbolTable().findKnownSymbol(sid); + } + return reader.getSymbol(sid); + } + @Override public int byteSize() { prepareScalar(); @@ -260,47 +306,55 @@ public T asFacet(Class facetType) { @Override public SymbolTable getSymbolTable() { - // TODO generalize for Ion 1.1 + // TODO generalize for Ion 1.1, whose system symbol table is not necessarily active. return SharedSymbolTable.getSystemSymbolTable(reader.getIonMajorVersion()); } @Override public String[] getTypeAnnotations() { - if (pendingIvmSid != -1 || !reader.hasAnnotations()) { + if (pendingIvm != null || !reader.hasAnnotations()) { return _Private_Utils.EMPTY_STRING_ARRAY; } - int[] annotationIds = reader.getAnnotationIds(); - String[] annotations = new String[annotationIds.length]; - SymbolTable symbolTable = getSymbolTable(); - for (int i = 0; i < annotationIds.length; i++) { - int sid = annotationIds[i]; - String annotation = symbolTable.findKnownSymbol(sid); - if (annotation == null) { - throw new UnknownSymbolException(sid); + // Note: it is not expected that the system reader is used in performance-sensitive applications; hence, + // no effort is made optimize the following. + List annotations = new ArrayList<>(); + reader.consumeAnnotationTokens((token) -> { + String text = token.getText(); + if (text == null) { + int sid = token.getSid(); + text = getSymbolText(sid); + if (text == null) { + throw new UnknownSymbolException(sid); + } } - annotations[i] = annotation; - } - return annotations; + annotations.add(text); + }); + return annotations.toArray(_Private_Utils.EMPTY_STRING_ARRAY); } @Override public SymbolToken[] getTypeAnnotationSymbols() { - if (pendingIvmSid != -1 || !reader.hasAnnotations()) { + if (pendingIvm != null || !reader.hasAnnotations()) { return SymbolToken.EMPTY_ARRAY; } - int[] annotationIds = reader.getAnnotationIds(); - SymbolToken[] annotationSymbolTokens = new SymbolToken[annotationIds.length]; - SymbolTable symbolTable = getSymbolTable(); - for (int i = 0; i < annotationIds.length; i++) { - int sid = annotationIds[i]; - annotationSymbolTokens[i] = new SymbolTokenImpl(symbolTable.findKnownSymbol(sid), sid); - } - return annotationSymbolTokens; + // Note: it is not expected that the system reader is used in performance-sensitive applications; hence, + // no effort is made optimize the following. + List annotations = new ArrayList<>(); + reader.consumeAnnotationTokens((token) -> { + String text = token.getText(); + if (text != null) { + annotations.add(token); + } else { + int sid = token.getSid(); + annotations.add(new SymbolTokenImpl(getSymbolText(sid), sid)); + } + }); + return annotations.toArray(SymbolToken.EMPTY_ARRAY); } @Override public Iterator iterateTypeAnnotations() { - if (pendingIvmSid != -1 || !reader.hasAnnotations()) { + if (pendingIvm != null || !reader.hasAnnotations()) { return _Private_Utils.emptyIterator(); } return _Private_Utils.stringIterator(getTypeAnnotations()); @@ -313,11 +367,14 @@ public int getFieldId() { @Override public String getFieldName() { + if (reader.hasFieldText()) { + return reader.getFieldText(); + } int sid = reader.getFieldId(); if (sid < 0) { return null; } - String name = getSymbolTable().findKnownSymbol(sid); + String name = getSymbolText(sid); if (name == null) { throw new UnknownSymbolException(sid); } @@ -326,23 +383,36 @@ public String getFieldName() { @Override public SymbolToken getFieldNameSymbol() { - int sid = reader.getFieldId(); - if (sid < 0) { - return null; + String fieldText; + int sid = -1; + if (reader.hasFieldText()) { + fieldText = reader.getFieldText(); + } else { + sid = reader.getFieldId(); + if (sid < 0) { + return null; + } + fieldText = getSymbolText(sid); } - return new SymbolTokenImpl(getSymbolTable().findKnownSymbol(sid), sid); + return new SymbolTokenImpl(fieldText, sid); } @Override public SymbolToken symbolValue() { - int sid; - if (pendingIvmSid != -1) { - sid = pendingIvmSid; + String symbolText; + int sid = -1; + if (pendingIvm != null) { + return pendingIvm.token; } else { prepareScalar(); - sid = reader.symbolValueId(); + if (reader.hasSymbolText()) { + symbolText = reader.getSymbolText(); + } else { + sid = reader.symbolValueId(); + symbolText = getSymbolText(sid); + } } - return new SymbolTokenImpl(getSymbolTable().findKnownSymbol(sid), sid); + return new SymbolTokenImpl(symbolText, sid); } @Override diff --git a/src/main/java/com/amazon/ion/impl/IonReaderTextRawTokensX.java b/src/main/java/com/amazon/ion/impl/IonReaderTextRawTokensX.java index 5a2f89ba80..49d6aa67a5 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderTextRawTokensX.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderTextRawTokensX.java @@ -100,6 +100,17 @@ public Appendable append(char c) throws IOException // Used for tracking terminator characters when skipping a container private final ArrayList containerSkipTerminatorStack = new ArrayList<>(CONTAINER_STACK_INITIAL_CAPACITY); + // The Ion encoding minor version currently active. + private int minorVersion; + + /** + * Sets the Ion minor version. + * @param minorVersion the version. + */ + void setMinorVersion(int minorVersion) { + this.minorVersion = minorVersion; + } + /** * IonTokenReader constructor requires a UnifiedInputStream * as the source of bytes/chars that serve as the basic input @@ -547,6 +558,8 @@ private final int skip_to_end(SavePoint sp) throws IOException skip_over_struct(); c = read_char(); break; + case IonTokenConstsX.TOKEN_OPEN_PAREN_COLON: + case IonTokenConstsX.TOKEN_OPEN_PAREN_DOUBLE_COLON: case IonTokenConstsX.TOKEN_OPEN_PAREN: skip_over_sexp(); // you can't save point a scanned sexp (right now anyway) c = read_char(); @@ -633,6 +646,18 @@ public final int nextToken() throws IOException case ']': return next_token_finish(IonTokenConstsX.TOKEN_CLOSE_SQUARE, false); case '(': + if (minorVersion > 0) { + c2 = read_char(); + if (c2 == ':') { + c2 = read_char(); + if (c2 == ':') { + return next_token_finish(IonTokenConstsX.TOKEN_OPEN_PAREN_DOUBLE_COLON, true); + } + unread_char(c2); + return next_token_finish(IonTokenConstsX.TOKEN_OPEN_PAREN_COLON, true); + } + unread_char(c2); + } return next_token_finish(IonTokenConstsX.TOKEN_OPEN_PAREN, true); // CAS: 9 nov 2009 case ')': return next_token_finish(IonTokenConstsX.TOKEN_CLOSE_PAREN, false); diff --git a/src/main/java/com/amazon/ion/impl/IonReaderTextRawX.java b/src/main/java/com/amazon/ion/impl/IonReaderTextRawX.java index 4449ef4122..7a96b1c4f4 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderTextRawX.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderTextRawX.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; import static com.amazon.ion.SymbolTable.UNKNOWN_SYMBOL_ID; @@ -30,7 +17,6 @@ import com.amazon.ion.impl._Private_ScalarConversions.AS_TYPE; import com.amazon.ion.impl._Private_ScalarConversions.ValueVariant; import java.io.IOException; -import java.math.BigInteger; import java.util.Iterator; /** @@ -72,7 +58,6 @@ abstract class IonReaderTextRawX implements IonTextReader { - public abstract BigInteger bigIntegerValue(); // static final boolean _object_parser = false; static final boolean _debug = false; @@ -126,7 +111,10 @@ private final String get_state_name(int state) { static final int ACTION_FINISH_LOB = 13; static final int ACTION_FINISH_DATAGRAM = 14; static final int ACTION_EOF = 15; - static final int ACTION_count = 16; + + static final int ACTION_START_E_EXPRESSION = 16; + static final int ACTION_START_EXPRESSION_GROUP = 17; + @SuppressWarnings("unused") private final String get_action_name(int action) { switch(action) { @@ -144,6 +132,8 @@ private final String get_action_name(int action) { case ACTION_FINISH_CONTAINER: return "ACTION_FINISH_CONTAINER"; case ACTION_FINISH_LOB: return "ACTION_FINISH_LOB"; case ACTION_FINISH_DATAGRAM: return "ACTION_FINISH_DATAGRAM"; + case ACTION_START_E_EXPRESSION: return "ACTION_START_E_EXPRESSION"; + case ACTION_START_EXPRESSION_GROUP: return "ACTION_START_EXPRESSION_GROUP"; case ACTION_EOF: return "ACTION_EOF"; default: return ""; } @@ -169,6 +159,8 @@ static final int[][] makeTransitionActionArray() actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_SYMBOL_IDENTIFIER] = ACTION_LOAD_ANNOTATION; actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_SYMBOL_QUOTED] = ACTION_LOAD_ANNOTATION; actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_OPEN_PAREN] = ACTION_START_SEXP; + actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_OPEN_PAREN_COLON] = ACTION_START_E_EXPRESSION; + actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_OPEN_PAREN_DOUBLE_COLON] = ACTION_START_EXPRESSION_GROUP; actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_OPEN_BRACE] = ACTION_START_STRUCT; actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_OPEN_SQUARE] = ACTION_START_LIST; actions[STATE_BEFORE_ANNOTATION_DATAGRAM][IonTokenConstsX.TOKEN_OPEN_DOUBLE_BRACE] = ACTION_START_LOB; @@ -252,6 +244,45 @@ static int[] makeTransition2ActionArray() { return a; } + private class ContainerState { + IonType type; + boolean isEExpression; + boolean isExpressionGroup; + + private void setFlags() { + switch (type) { + case LIST: + _container_is_struct = false; + _container_prohibits_commas = false; + break; + case DATAGRAM: + case SEXP: + _container_is_struct = false; + _container_prohibits_commas = true; + break; + case STRUCT: + _container_is_struct = true; + _container_prohibits_commas = false; + break; + default: + throw new IllegalStateException("type must be a container, not a " + type); + } + } + + void init(IonType type) { + this.type = type; + this.isEExpression = _container_is_e_expression; + this.isExpressionGroup = _container_is_expression_group; + setFlags(); + } + + void restore() { + _container_is_e_expression = isEExpression; + _container_is_expression_group = isExpressionGroup; + setFlags(); + } + } + // // actual class members (preceding values are just parsing // control constants). @@ -262,10 +293,13 @@ static int[] makeTransition2ActionArray() { boolean _eof; int _state; - IonType[] _container_state_stack = new IonType[DEFAULT_STACK_DEPTH]; + ContainerState[] _container_state_stack = new ContainerState[DEFAULT_STACK_DEPTH]; int _container_state_top; boolean _container_is_struct; // helper bool's set on push and pop and used boolean _container_prohibits_commas; // frequently during state transitions actions + boolean _container_is_e_expression; + boolean _container_is_expression_group; + boolean _is_expression_syntax_allowed; boolean _has_next_called; IonType _value_type; @@ -295,13 +329,26 @@ enum LOB_STATE { EMPTY, READ, FINISHED } LOB_STATE _lob_loaded; byte[] _lob_bytes; int _lob_actual_len; + int minorVersion = 0; protected IonReaderTextRawX() { super(); _nesting_parent = null; + for (int i = 0; i < _container_state_stack.length; i++) { + _container_state_stack[i] = new ContainerState(); + } } + /** + * Sets the Ion minor version. + * @param minorVersion the version. + */ + void setMinorVersion(int minorVersion) { + this.minorVersion = minorVersion; + _scanner.setMinorVersion(minorVersion); + _is_expression_syntax_allowed = false; + } /** * @return This implementation always returns null. @@ -350,6 +397,9 @@ protected final void re_init(UnifiedInputStreamX iis _container_state_top = 0; _container_is_struct = false; _container_prohibits_commas = false; + _container_is_e_expression = false; + _container_is_expression_group = false; + _is_expression_syntax_allowed = false; _has_next_called = false; _value_type = null; _value_keyword = 0; @@ -463,6 +513,8 @@ protected final boolean has_next_raw_value() { finish_value(null); clear_value(); parse_to_next_value(); + // Any expression syntax for the current container must have already occurred. + _is_expression_syntax_allowed = false; } catch (IOException e) { throw new IonException(e); @@ -472,6 +524,34 @@ protected final boolean has_next_raw_value() { return (_eof != true); } + /** + * Loads the raw value on which the reader is currently positioned. + */ + private void loadValueRaw() { + if (_value_type == null && _scanner.isUnfinishedToken()) { + try { + token_contents_load(_scanner.getToken()); + } + catch (IOException e) { + throw new IonException(e); + } + } + } + + /** + * Returns the type of the next raw value in the stream. Does not delegate any responsibilities to + * superclasses, ensuring that this method does not consume system values. + * @return the type of the next raw value, or null if there is none. + */ + protected final IonType nextRaw() { + if (!has_next_raw_value()) { + return null; + } + loadValueRaw(); + _has_next_called = false; + return _value_type; + } + /** * returns the type of the next value in the stream. * it calls hasNext to assure that the value has been properly @@ -487,14 +567,7 @@ public IonType next() if (!hasNext()) { return null; } - if (_value_type == null && _scanner.isUnfinishedToken()) { - try { - token_contents_load(_scanner.getToken()); - } - catch (IOException e) { - throw new IonException(e); - } - } + loadValueRaw(); _has_next_called = false; return _value_type; } @@ -545,29 +618,8 @@ private final void clear_value() clear_fieldname(); _v.clear(); _value_start_offset = -1; - } - - private final void set_container_flags(IonType t) { - switch (t) { - case LIST: - _container_is_struct = false; - _container_prohibits_commas = false; - break; - case SEXP: - _container_is_struct = false; - _container_prohibits_commas = true; - break; - case STRUCT: - _container_is_struct = true; - _container_prohibits_commas = false; - break; - case DATAGRAM: - _container_is_struct = false; - _container_prohibits_commas = true; - break; - default: - throw new IllegalArgumentException("type must be a container, not a "+t.toString()); - } + _container_is_e_expression = false; + _container_is_expression_group = false; } private int get_state_after_value() @@ -599,7 +651,7 @@ private final int get_state_after_annotation() { int state_after_annotation; switch(get_state_int()) { case STATE_AFTER_VALUE_CONTENTS: - IonType container = top_state(); + IonType container = top_state().type; switch(container) { case STRUCT: case LIST: @@ -633,13 +685,13 @@ private final int get_state_after_annotation() { } private final int get_state_after_container() { - IonType container = top_state(); + IonType container = top_state().type; int new_state = get_state_after_container(container); return new_state; } private final int get_state_after_container(int token) { - IonType container = top_state(); + IonType container = top_state().type; switch(container) { case STRUCT: @@ -772,7 +824,6 @@ private final SymbolToken parseSymbolToken(String context, return new SymbolTokenImpl(text, sid); } - protected final void parse_to_next_value() throws IOException { int t; @@ -1061,6 +1112,15 @@ else if (t == IonTokenConstsX.TOKEN_DOT) { set_state(STATE_EOF); _eof = true; return; + case ACTION_START_E_EXPRESSION: + case ACTION_START_EXPRESSION_GROUP: + _container_is_e_expression = action == ACTION_START_E_EXPRESSION; + _container_is_expression_group = action == ACTION_START_EXPRESSION_GROUP; + _value_type = IonType.SEXP; + temp_state = STATE_BEFORE_ANNOTATION_SEXP; + set_state(temp_state); + _is_expression_syntax_allowed = false; + return; default: parse_error("unexpected token encountered: "+IonTokenConstsX.getTokenName(t)); } } @@ -1200,17 +1260,19 @@ private final void push_container_state(IonType newContainer) int oldlen = _container_state_stack.length; if (_container_state_top >= oldlen) { int newlen = oldlen * 2; - IonType[] temp = new IonType[newlen]; + ContainerState[] temp = new ContainerState[newlen]; System.arraycopy(_container_state_stack, 0, temp, 0, oldlen); + for (int i = oldlen; i < temp.length; i++) { + temp[i] = new ContainerState(); + } _container_state_stack = temp; } - set_container_flags(newContainer); - _container_state_stack[_container_state_top++] = newContainer; + _container_state_stack[_container_state_top++].init(newContainer); } private final void pop_container_state() { _container_state_top--; - set_container_flags(top_state()); + top_state().restore(); _eof = false; _has_next_called = false; @@ -1218,10 +1280,8 @@ private final void pop_container_state() { set_state(new_state); } - private final IonType top_state() { - int top = _container_state_top - 1; - IonType top_container = _container_state_stack[top]; - return top_container; + private final ContainerState top_state() { + return _container_state_stack[_container_state_top - 1]; } public IonType getType() @@ -1260,14 +1320,14 @@ private boolean is_in_struct_internal() public IonType getContainerType() { if (_container_state_top == 0) return IonType.DATAGRAM; - return _container_state_stack[_container_state_top - 1]; + return _container_state_stack[_container_state_top - 1].type; } public int getDepth() { int depth = _container_state_top; if (depth > 0) { int debugging_depth = depth; - IonType top_type = _container_state_stack[0]; + IonType top_type = _container_state_stack[0].type; if (_nesting_parent == null) { if (IonType.DATAGRAM.equals(top_type)) { depth--; @@ -1336,7 +1396,9 @@ public void stepIn() switch (_value_type) { case STRUCT: case LIST: + break; case SEXP: + _is_expression_syntax_allowed = minorVersion > 0; break; default: throw new IllegalStateException("Unexpected value type: " + _value_type); @@ -1363,6 +1425,24 @@ public void stepIn() if (_debug) System.out.println("stepInto() new depth: "+getDepth()); } + + /** + * Forces the end of a container, assuming that the reader is already positioned after the container's end + * delimiter. + */ + protected final void endContainerRaw() { + pop_container_state(); + _scanner.tokenIsFinished(); + try { + finish_value(null); + } + catch (IOException e) { + throw new IonException(e); + } + + clear_value(); + } + public void stepOut() { if (getDepth() < 1) { @@ -1389,16 +1469,7 @@ public void stepOut() catch (IOException e) { throw new IonException(e); } - pop_container_state(); - _scanner.tokenIsFinished(); - try { - finish_value(null); - } - catch (IOException e) { - throw new IonException(e); - } - - clear_value(); + endContainerRaw(); if (_debug) System.out.println("stepOUT() new depth: "+getDepth()); } diff --git a/src/main/java/com/amazon/ion/impl/IonReaderTextSystemX.java b/src/main/java/com/amazon/ion/impl/IonReaderTextSystemX.java index c187682d53..e636c0a0db 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderTextSystemX.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderTextSystemX.java @@ -1,20 +1,10 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; +import static com.amazon.ion.SystemSymbols.ION_1_0; +import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE; +import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE_SID; import static com.amazon.ion.impl._Private_ScalarConversions.getValueTypeName; import com.amazon.ion.Decimal; @@ -30,19 +20,42 @@ import com.amazon.ion.IonTimestamp; import com.amazon.ion.IonType; import com.amazon.ion.IonValue; +import com.amazon.ion.MacroAwareIonReader; +import com.amazon.ion.MacroAwareIonWriter; import com.amazon.ion.SymbolTable; import com.amazon.ion.SymbolToken; import com.amazon.ion.Timestamp; import com.amazon.ion.UnknownSymbolException; +import com.amazon.ion.UnsupportedIonVersionException; import com.amazon.ion.impl.IonReaderTextRawTokensX.IonReaderTextTokenException; import com.amazon.ion.impl.IonTokenConstsX.CharacterSequence; import com.amazon.ion.impl._Private_ScalarConversions.AS_TYPE; import com.amazon.ion.impl._Private_ScalarConversions.CantConvertException; +import com.amazon.ion.impl.bin.PresenceBitmap; +import com.amazon.ion.impl.macro.EExpressionArgsReader; +import com.amazon.ion.impl.macro.EncodingContext; +import com.amazon.ion.impl.macro.Expression; +import com.amazon.ion.impl.macro.Macro; +import com.amazon.ion.impl.macro.MacroEvaluator; +import com.amazon.ion.impl.macro.MacroEvaluatorAsIonReader; +import com.amazon.ion.impl.macro.MacroRef; +import com.amazon.ion.impl.macro.MacroTable; +import com.amazon.ion.impl.macro.MutableMacroTable; +import com.amazon.ion.impl.macro.ReaderAdapter; +import com.amazon.ion.impl.macro.ReaderAdapterIonReader; +import com.amazon.ion.impl.macro.SystemMacro; +import org.jetbrains.annotations.NotNull; + import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.Date; -import java.lang.Character; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; /** * This reader calls the {@link IonReaderTextRawX} for low level events. @@ -56,15 +69,42 @@ */ class IonReaderTextSystemX extends IonReaderTextRawX - implements _Private_ReaderWriter + implements _Private_ReaderWriter, MacroAwareIonReader { private static int UNSIGNED_BYTE_MAX_VALUE = 255; SymbolTable _system_symtab; + SymbolTable _symbols; + + // The core MacroEvaluator that this core reader delegates to when evaluating a macro invocation. + private final MacroEvaluator macroEvaluator = new MacroEvaluator(); + + // The IonReader-like MacroEvaluator that this core reader delegates to when evaluating a macro invocation. + protected final MacroEvaluatorAsIonReader macroEvaluatorIonReader = new MacroEvaluatorAsIonReader(macroEvaluator); + + // The encoding context (macro table) that is currently active. + private EncodingContext encodingContext = EncodingContext.getDefault(); + + // Adapts this reader for use in code that supports multiple reader types. + private final ReaderAdapter readerAdapter = new ReaderAdapterIonReader(this); + + // Reads encoding directives from the stream. + private EncodingDirectiveReader encodingDirectiveReader = null; + + // Reads macro invocation arguments as expressions and feeds them to the MacroEvaluator. + private final EExpressionArgsReader expressionArgsReader = new TextEExpressionArgsReader(); + + // Indicates whether the reader is currently evaluating an e-expression. + boolean isEvaluatingEExpression = false; + + // The writer that will perform a macro-aware transcode, if requested. + private MacroAwareIonWriter macroAwareTranscoder = null; + protected IonReaderTextSystemX(UnifiedInputStreamX iis) { _system_symtab = _Private_Utils.systemSymtab(1); // TODO check IVM to determine version: amazon-ion/ion-java/issues/19 + _symbols = _system_symtab; init_once(); init(iis, IonType.DATAGRAM); } @@ -75,7 +115,7 @@ protected IonReaderTextSystemX(UnifiedInputStreamX iis) // into a base class (the *Value() methods also share a lot of similarity). public IntegerSize getIntegerSize() { - if (_value_type != IonType.INT || _v.isNull()) + if (_value_type != IonType.INT || isNullValue()) { return null; } @@ -83,8 +123,62 @@ public IntegerSize getIntegerSize() return _Private_ScalarConversions.getIntegerSize(_v.getAuthoritativeType()); } + /** + * Loads a scalar value (except lob values) from the macro evaluator. + */ + private void loadScalarValueFromMacro() { + switch (_value_type) { + case NULL: + _v.setValueToNull(_value_type); + break; + case BOOL: + _v.setValue(macroEvaluatorIonReader.booleanValue()); + break; + case INT: + switch (macroEvaluatorIonReader.getIntegerSize()) { + case INT: + _v.setValue(macroEvaluatorIonReader.intValue()); + break; + case LONG: + _v.setValue(macroEvaluatorIonReader.longValue()); + break; + case BIG_INTEGER: + _v.setValue(macroEvaluatorIonReader.bigIntegerValue()); + break; + } + break; + case FLOAT: + _v.setValue(macroEvaluatorIonReader.doubleValue()); + break; + case DECIMAL: + _v.setValue(macroEvaluatorIonReader.decimalValue()); + break; + case TIMESTAMP: + _v.setValue(macroEvaluatorIonReader.timestampValue()); + break; + case SYMBOL: + // TODO determine how to handle symbols with unknown text. + _v.setValue(macroEvaluatorIonReader.stringValue()); + break; + case STRING: + _v.setValue(macroEvaluatorIonReader.stringValue()); + break; + case CLOB: // see load_lob_contents + case BLOB: // see load_lob_contents + case LIST: + case SEXP: + case STRUCT: + case DATAGRAM: + throw new IllegalStateException(String.format("Type %s is not loaded by this method.", _value_type)); + } + } + private void load_once() { + if (isEvaluatingEExpression) { + loadScalarValueFromMacro(); + return; + } if (_v.isEmpty()) { try { load_scalar_value(); @@ -471,46 +565,64 @@ private final void cast_cached_value(int new_type) } } + /** + * Loads annotations, either from the stream or from a macro. + * @return the annotations. + */ + private SymbolToken[] loadAnnotations() { + SymbolToken[] annotations; + if (isEvaluatingEExpression) { + annotations = macroEvaluatorIonReader.getTypeAnnotationSymbols(); + _annotation_count = annotations == null ? 0 : annotations.length; + } else { + // The annotations are eagerly read from the stream into `_annotations`. + annotations = _annotations; + } + return annotations; + } + // // public value routines // public SymbolToken[] getTypeAnnotationSymbols() { + SymbolToken[] annotations = loadAnnotations(); final int count = _annotation_count; if (count == 0) return SymbolToken.EMPTY_ARRAY; - resolveAnnotationSymbols(count); + resolveAnnotationSymbols(annotations, count); SymbolToken[] result = new SymbolToken[count]; - System.arraycopy(_annotations, 0, result, 0, count); + System.arraycopy(annotations, 0, result, 0, count); return result; } public String[] getTypeAnnotations() { - resolveAnnotationSymbols(_annotation_count); - return _Private_Utils.toStrings(_annotations, _annotation_count); + SymbolToken[] annotations = loadAnnotations(); + resolveAnnotationSymbols(annotations, _annotation_count); + return _Private_Utils.toStrings(annotations, _annotation_count); } /** * Resolve annotations with the current symbol table. */ - private void resolveAnnotationSymbols(int count) { + private void resolveAnnotationSymbols(SymbolToken[] annotations, int count) { SymbolTable symbols = getSymbolTable(); for (int i = 0; i < count; i++) { - SymbolToken sym = _annotations[i]; + SymbolToken sym = annotations[i]; SymbolToken updated = _Private_Utils.localize(symbols, sym); if (updated != sym) { - _annotations[i] = updated; + annotations[i] = updated; } } } public boolean isNullValue() { - return _v.isNull(); + return (isEvaluatingEExpression && macroEvaluatorIonReader.isNullValue()) || _v.isNull(); } public boolean booleanValue() @@ -601,7 +713,7 @@ public Timestamp timestampValue() public final String stringValue() { if (! IonType.isText(_value_type)) throw new IllegalStateException("Unexpected value type: " + _value_type); - if (_v.isNull()) return null; + if (isNullValue()) return null; load_or_cast_cached_value(AS_TYPE.string_value); String text = _v.getString(); @@ -623,7 +735,7 @@ public SymbolTable getSymbolTable() SymbolTable symtab = super.getSymbolTable(); if (symtab == null) { - symtab = _system_symtab; + symtab = _symbols; } return symtab; } @@ -632,8 +744,13 @@ public SymbolTable getSymbolTable() @Override public final int getFieldId() { - // Superclass handles hoisting logic - int id = super.getFieldId(); + int id; + if (isEvaluatingEExpression) { + id = getFieldNameSymbol().getSid(); + } else { + // Superclass handles hoisting logic + id = super.getFieldId(); + } if (id == SymbolTable.UNKNOWN_SYMBOL_ID) { String fieldname = getRawFieldName(); @@ -649,6 +766,9 @@ public final int getFieldId() @Override public final String getFieldName() { + if (isEvaluatingEExpression) { + _field_name = macroEvaluatorIonReader.getFieldName(); + } // Superclass handles hoisting logic String text = getRawFieldName(); if (text == null) @@ -670,7 +790,12 @@ public final String getFieldName() @Override public SymbolToken getFieldNameSymbol() { - SymbolToken sym = super.getFieldNameSymbol(); + SymbolToken sym; + if (isEvaluatingEExpression) { + sym = macroEvaluatorIonReader.getFieldNameSymbol(); + } else { + sym = super.getFieldNameSymbol(); + } if (sym != null) { sym = _Private_Utils.localize(getSymbolTable(), sym); @@ -681,7 +806,7 @@ public SymbolToken getFieldNameSymbol() public SymbolToken symbolValue() { if (_value_type != IonType.SYMBOL) throw new IllegalStateException("Unexpected value type: " + _value_type); - if (_v.isNull()) return null; + if (isNullValue()) return null; load_or_cast_cached_value(AS_TYPE.string_value); if (! _v.hasValueOfType(AS_TYPE.int_value)) @@ -739,6 +864,13 @@ private final long load_lob_save_point() throws IOException } private int load_lob_contents() throws IOException { + if (isEvaluatingEExpression) { + // TODO performance: reduce allocation / copying. Can getBytes() be used? + _lob_bytes = macroEvaluatorIonReader.newBytes(); + _lob_actual_len = _lob_bytes.length; + _lob_loaded = LOB_STATE.FINISHED; + return _lob_actual_len; + } if (_lob_loaded == LOB_STATE.EMPTY) { load_lob_save_point(); } @@ -1009,4 +1141,440 @@ public SymbolTable pop_passed_symbol_table() { return null; } + + /** + * Sets the active symbol table. + * @param symbolTable the symbol table to make active. + */ + protected void setSymbolTable(SymbolTable symbolTable) { + _symbols = symbolTable; + } + + /** + * While reading an encoding directive, the reader allows itself to be controlled by the MacroCompiler during + * compilation of a macro. While this is happening, the reader should never attempt to read another encoding + * directive. + * @return true if the reader is not in the process of compiling a macro; false if it is. + */ + private boolean macroCompilationNotInProgress() { + return encodingDirectiveReader == null || !encodingDirectiveReader.isMacroCompilationInProgress(); + } + + /** + * @return true if current value has a sequence of annotations that begins with `$ion`; otherwise, false. + */ + boolean startsWithIonAnnotation() { + if (isEvaluatingEExpression) { + return SystemSymbols_1_1.ION.getText().equals(macroEvaluatorIonReader.iterateTypeAnnotations().next()); + } + // TODO also resolve symbol identifiers and compare against text that looks like $ion + return SystemSymbols_1_1.ION.getText().equals(_annotations[0].getText()); + } + + /** + * @return true if the current value has at least one annotation. + */ + private boolean hasAnnotations() { + return _annotation_count > 0 || (isEvaluatingEExpression && macroEvaluatorIonReader.hasAnnotations()); + } + + /** + * @return true if the reader is positioned on an encoding directive; otherwise, false. + */ + private boolean isPositionedOnEncodingDirective() { + return hasAnnotations() + && _value_type == IonType.SEXP + && !isNullValue() + && macroCompilationNotInProgress() + && startsWithIonAnnotation(); + } + + /** + * Reads an encoding directive and installs any symbols and/or macros found within. Upon calling this method, + * the reader must be positioned on a top-level s-expression annotated with `$ion`. + */ + private void readEncodingDirective() { + if (encodingDirectiveReader == null) { + encodingDirectiveReader = new EncodingDirectiveReader(this, readerAdapter); + } + encodingDirectiveReader.reset(); + encodingDirectiveReader.readEncodingDirective(encodingContext); + List newSymbols = encodingDirectiveReader.getNewSymbols(); + if (encodingDirectiveReader.isSymbolTableAppend()) { + SymbolTable current = getSymbolTable(); + if (current.isSystemTable()) { + // TODO determine the best way to handle the Ion 1.1 system symbols. + List withSystemSymbols = new ArrayList<>(SystemSymbols_1_1.allSymbolTexts()); + withSystemSymbols.addAll(newSymbols); + setSymbolTable(new LocalSymbolTable( + LocalSymbolTableImports.EMPTY, + withSystemSymbols + )); + } else { + LocalSymbolTable currentLocal = (LocalSymbolTable) current; + for (String appendedSymbol : newSymbols) { + currentLocal.putSymbol(appendedSymbol); + } + } + } else { + setSymbolTable(new LocalSymbolTable( + // TODO handle shared symbol table imports declared in the encoding directive + LocalSymbolTableImports.EMPTY, + newSymbols + )); + } + installMacros(); + } + + // This is essentially copied from IonReaderContinuableCoreBinary.EncodingDirectiveReader.installMacros + // See the comment for EncodingDirectiveReader.kt + private void installMacros() { + boolean isMacroTableAppend = encodingDirectiveReader.isMacroTableAppend(); + Map newMacros = encodingDirectiveReader.getNewMacros(); + + if (!isMacroTableAppend) { + encodingContext = new EncodingContext(new MutableMacroTable(MacroTable.empty()), true); + } else if (!encodingContext.isMutable() && !newMacros.isEmpty()){ // we need to append, but can't + encodingContext = new EncodingContext(new MutableMacroTable(encodingContext.getMacroTable()), true); + } + if (!newMacros.isEmpty()) encodingContext.getMacroTable().putAll(newMacros); + } + + + /** + * Reads macro invocation arguments as expressions and feeds them to the MacroEvaluator. + */ + private class TextEExpressionArgsReader extends EExpressionArgsReader { + + TextEExpressionArgsReader() { + super(readerAdapter); + } + + @Override + protected void readParameter(Macro.Parameter parameter, long parameterPresence, List expressions, boolean isTrailing) { + if (IonReaderTextSystemX.this.nextRaw() == null) { + // Add an empty expression group if nothing present. + int index = expressions.size() + 1; + expressions.add(new Expression.ExpressionGroup(index, index)); + return; + } + readValueAsExpression(isTrailing && parameter.getCardinality().canBeMulti, expressions); + } + + @Override + protected Macro loadMacro() { + IonReaderTextSystemX.this.stepIn(); + if (IonReaderTextSystemX.this.nextRaw() == null) { + throw new IonException("Macro invocation missing address."); + } + List annotations = getAnnotations(); + boolean isSystemMacro = !annotations.isEmpty() && SystemSymbols_1_1.ION.getText().equals(annotations.get(0).getText()); + MacroRef address; + if (_value_type == IonType.SYMBOL) { + String name = stringValue(); + if (name == null) { + throw new IonException("Macros invoked by name must have non-null name."); + } + address = MacroRef.byName(name); + } else if (_value_type == IonType.INT) { + long id = longValue(); + if (id > Integer.MAX_VALUE) { + throw new IonException("Macro addresses larger than 2147483647 are not supported by this implementation."); + } + address = MacroRef.byId((int) id); + } else { + throw new IonException("E-expressions must begin with an address."); + } + + Macro macro = isSystemMacro ? SystemMacro.get(address) : encodingContext.getMacroTable().get(address); + if (macro == null) { + throw new IonException(String.format("Encountered an unknown macro address: %s.", address)); + } + return macro; + } + + @Override + protected PresenceBitmap loadPresenceBitmapIfNecessary(List signature) { + // Text Ion does not use a presence bitmap. + return null; + } + + @Override + protected boolean isMacroInvocation() { + return _container_is_e_expression; + } + + @Override + protected boolean isContainerAnExpressionGroup() { + return _container_is_expression_group; + } + + @Override + protected List getAnnotations() { + return _annotation_count == 0 ? Collections.emptyList() : Arrays.asList(getTypeAnnotationSymbols()); + } + + @Override + protected boolean nextRaw() { + return IonReaderTextSystemX.this.nextRaw() != null; + } + + @Override + protected void stepInRaw() { + IonReaderTextSystemX.this.stepIn(); + } + + @Override + protected void stepOutRaw() { + IonReaderTextSystemX.this.stepOut(); + } + + @Override + protected void stepIntoEExpression() { + // Do nothing; the text reader must have already stepped into the e-expression in order to read its address. + } + + @Override + protected void stepOutOfEExpression() { + // In text, e-expressions are traversed handled in the same way as s-expressions. + IonReaderTextSystemX.this.stepOut(); + } + } + + /** + * Consumes the next value (if any) from the MacroEvaluator, setting `_value_type` based on the result. + * @return true if this call causes the evaluator to reach the end of the current invocation; otherwise, false. + */ + private boolean evaluateNext() { + _value_type = macroEvaluatorIonReader.next(); + if (_value_type == null && macroEvaluatorIonReader.getDepth() == 0) { + // Evaluation of this macro is complete. Resume reading from the stream. + isEvaluatingEExpression = false; + return true; + } + return false; + } + + /** + * Advances the reader, if necessary and possible, to the next value, reading any Ion 1.1+ encoding directives + * found along the way. + * @return true if the reader is positioned on a value; otherwise, false. + */ + protected final boolean has_next_system_value() { + while (!_has_next_called && !_eof) { + if (isEvaluatingEExpression) { + if (evaluateNext()) { + continue; + } + _has_next_called = true; + } else { + has_next_raw_value(); + } + if (minorVersion > 0 && _value_type != null && IonType.DATAGRAM.equals(getContainerType()) && isPositionedOnEncodingDirective()) { + readEncodingDirective(); + continue; + } + if (_container_is_e_expression) { + expressionArgsReader.beginEvaluatingMacroInvocation(macroEvaluator); + isEvaluatingEExpression = true; + continue; + } + break; + } + return !_eof; + } + + @Override + public boolean hasNext() + { + return has_next_system_value(); + } + + @Override + public void transcodeAllTo(MacroAwareIonWriter writer) throws IOException { + prepareTranscodeTo(writer); + while (transcodeNext()); + } + + @Override + public void prepareTranscodeTo(@NotNull MacroAwareIonWriter writer) { + macroAwareTranscoder = writer; + } + + @Override + public boolean transcodeNext() throws IOException { + // TODO consider improving the readability of this method and its binary counterpart: https://github.com/amazon-ion/ion-java/issues/1004 + if (macroAwareTranscoder == null) { + throw new IllegalArgumentException("prepareTranscodeTo must be called before transcodeNext."); + } + boolean isSystemValue = false; + while (true) { + if (isEvaluatingEExpression) { + if (evaluateNext()) { + if (isSystemValue) { + continue; + } + return !_eof; + } + } else { + nextRaw(); + } + isSystemValue = false; + if (_value_type != null && getDepth() == 0) { + if (IonType.SYMBOL == getType() && handlePossibleIonVersionMarker()) { + // Which IVM to write is inherent to the writer implementation. + // We don't have a single implementation that writes both formats. + macroAwareTranscoder.startEncodingSegmentWithIonVersionMarker(); + isSystemValue = true; + continue; + } + if (minorVersion > 0 && isPositionedOnEncodingDirective()) { + boolean isEncodingDirectiveFromEExpression = isEvaluatingEExpression; + readEncodingDirective(); + macroAwareTranscoder.startEncodingSegmentWithEncodingDirective( + encodingDirectiveReader.getNewMacros(), + encodingDirectiveReader.isMacroTableAppend(), + encodingDirectiveReader.getNewSymbols(), + encodingDirectiveReader.isSymbolTableAppend(), + isEncodingDirectiveFromEExpression + ); + isSystemValue = true; + continue; + } + } + if (_container_is_e_expression) { + expressionArgsReader.beginEvaluatingMacroInvocation(macroEvaluator); + macroEvaluatorIonReader.transcodeArgumentsTo(macroAwareTranscoder); + isEvaluatingEExpression = true; + continue; + } else if (isEvaluatingEExpression) { + // This is an e-expression that yields user values. Its arguments have already been transcoded. + continue; + } + if (_eof) { + return false; + } + transcodeValueLiteral(); + return !_eof; + } + } + + /** + * @return true if the reader is positioned on an Ion 1.0 symbol table; otherwise, false. Note: the caller must + * ensure this is called only at the top level. + */ + boolean isPositionedOnSymbolTable() { + return _annotation_count > 0 && (ION_SYMBOL_TABLE.equals(_annotations[0].getText()) || ION_SYMBOL_TABLE_SID == _annotations[0].getSid()); + } + + // Matches "$ion_x_y", where x and y are integers. + private static final Pattern ION_VERSION_MARKER_REGEX = Pattern.compile("^\\$ion_[0-9]+_[0-9]+$"); + + /** + * @param text the text of a symbol value. + * @return true if the text denotes an IVM; otherwise, false. + */ + static boolean isIonVersionMarker(String text) + { + return text != null && ION_VERSION_MARKER_REGEX.matcher(text).matches(); + } + + /** + * Resets the symbol table after an IVM is encountered. May be overridden if additional side effects are required. + */ + void symbol_table_reset() { + setSymbolTable(_system_symtab); + } + + /** + * Determines whether the top-level symbol value on which the reader is positioned is an Ion version marker. + * If it is, sets the reader's Ion version accordingly and resets the symbol table. + * @return true if the symbol represented an Ion version marker; otherwise, false. Note: the caller must + * ensure this is called only at the top level with the reader positioned on a symbol value. + */ + boolean handlePossibleIonVersionMarker() { + if (_annotation_count == 0) + { + // $ion_1_0 is read as an IVM only if it is not annotated + String version = symbolValue().getText(); + if (isIonVersionMarker(version)) + { + if (ION_1_0.equals(version) || "$ion_1_1".equals(version)) + { + setMinorVersion(version.charAt(version.length() - 1) - '0'); + if (_value_keyword != IonTokenConstsX.KEYWORD_sid) + { + symbol_table_reset(); + } + _has_next_called = false; + } + else + { + throw new UnsupportedIonVersionException(version); + } + return true; + } + } + return false; + } + + /** + * Transcodes a value literal to the macroAwareTranscoder. The caller must ensure that the reader is positioned + * on a value literal (i.e. a scalar or container value not expanded from an e-expression) before calling this + * method. + * @throws IOException if thrown by the writer during transcoding. + */ + private void transcodeValueLiteral() throws IOException { + if (getDepth() == 0 && isPositionedOnSymbolTable()) { + if (minorVersion > 0) { + // TODO finalize handling of Ion 1.0-style symbol tables in Ion 1.1: https://github.com/amazon-ion/ion-java/issues/1002 + throw new IonException("Macro-aware transcoding of Ion 1.1 data containing Ion 1.0-style symbol tables not yet supported."); + } + // Ion 1.0 symbol tables are transcoded verbatim for now; this may change depending on the resolution to + // https://github.com/amazon-ion/ion-java/issues/1002. + macroAwareTranscoder.writeValue(this); + } else if (IonType.isContainer(getType()) && !isNullValue()) { + // Containers need to be transcoded recursively to avoid expanding macro invocations at any depth. + if (isInStruct()) { + macroAwareTranscoder.setFieldNameSymbol(getFieldNameSymbol()); + } + macroAwareTranscoder.setTypeAnnotationSymbols(getTypeAnnotationSymbols()); + macroAwareTranscoder.stepIn(getType()); + super.stepIn(); + while (transcodeNext()); // TODO make this iterative. + super.stepOut(); + macroAwareTranscoder.stepOut(); + } else { + // The reader is now positioned on a scalar literal. Write the value. + // Note: writeValue will include any field name and/or annotations on the scalar. + macroAwareTranscoder.writeValue(this); + } + } + + @Override + public void stepIn() { + if (isEvaluatingEExpression) { + macroEvaluatorIonReader.stepIn(); + } + super.stepIn(); + } + + @Override + public void stepOut() { + if (isEvaluatingEExpression) { + macroEvaluatorIonReader.stepOut(); + // The reader is already positioned after the container. Simply pop the information about this container + // from the stack without seeking forward to find the delimiter. + endContainerRaw(); + return; + } + super.stepOut(); + } + + /** + * @return the {@link EncodingContext} currently active, or {@code null}. + */ + EncodingContext getEncodingContext() { + return encodingContext; + } } diff --git a/src/main/java/com/amazon/ion/impl/IonReaderTextUserX.java b/src/main/java/com/amazon/ion/impl/IonReaderTextUserX.java index 61e09041ef..23d32370e6 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderTextUserX.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderTextUserX.java @@ -1,22 +1,7 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; -import static com.amazon.ion.SystemSymbols.ION_1_0; -import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE; import com.amazon.ion.IonCatalog; import com.amazon.ion.IonType; @@ -28,8 +13,6 @@ import com.amazon.ion.SymbolToken; import com.amazon.ion.TextSpan; import com.amazon.ion.UnknownSymbolException; -import com.amazon.ion.UnsupportedIonVersionException; -import java.util.regex.Pattern; /** * The text user reader add support for symbols and recognizes, @@ -56,7 +39,6 @@ class IonReaderTextUserX extends IonReaderTextSystemX implements _Private_ReaderWriter { - private static final Pattern ION_VERSION_MARKER_REGEX = Pattern.compile("^\\$ion_[0-9]+_[0-9]+$"); /** * This is the physical start-of-stream offset when this reader was created. @@ -68,7 +50,6 @@ class IonReaderTextUserX // IonSystem _system; now in IonReaderTextSystemX where it could be null IonCatalog _catalog; - SymbolTable _symbols; protected IonReaderTextUserX(IonCatalog catalog, @@ -77,7 +58,7 @@ protected IonReaderTextUserX(IonCatalog catalog, int physicalStartOffset) { super(uis); - _symbols = _system_symtab; + setSymbolTable(_system_symtab); _physical_start_offset = physicalStartOffset; _catalog = catalog; _lstFactory = lstFactory; @@ -89,6 +70,7 @@ protected IonReaderTextUserX(IonCatalog catalog, this(catalog, lstFactory, uis, 0); } + /** * this looks forward to see if there is an upcoming value * and if there is it returns true. It may have to clean up @@ -121,7 +103,7 @@ private final boolean has_next_user_value() { // first move to the next value regardless of whether // it's a system value or a user value - has_next_raw_value(); + has_next_system_value(); // system values are only at the datagram level // we don't care about them if they're buried @@ -131,36 +113,16 @@ private final boolean has_next_user_value() if (_value_type != null && !isNullValue() && IonType.DATAGRAM.equals(getContainerType())) { switch (_value_type) { case STRUCT: - if (_annotation_count > 0 && ION_SYMBOL_TABLE.equals(_annotations[0].getText())) { - _symbols = _lstFactory.newLocalSymtab(_catalog, + if (isPositionedOnSymbolTable()) { + setSymbolTable(_lstFactory.newLocalSymtab(_catalog, this, - true); + true)); push_symbol_table(_symbols); _has_next_called = false; } break; case SYMBOL: - if (_annotation_count == 0) - { - // $ion_1_0 is read as an IVM only if it is not annotated - String version = symbolValue().getText(); - if (isIonVersionMarker(version)) - { - if (ION_1_0.equals(version)) - { - if (_value_keyword != IonTokenConstsX.KEYWORD_sid) - { - symbol_table_reset(); - push_symbol_table(_system_symtab); - } - _has_next_called = false; - } - else - { - throw new UnsupportedIonVersionException(version); - } - } - } + handlePossibleIonVersionMarker(); break; default: break; @@ -170,17 +132,13 @@ private final boolean has_next_user_value() return (!_eof); } - private static boolean isIonVersionMarker(String text) - { - return text != null && ION_VERSION_MARKER_REGEX.matcher(text).matches(); - } - - private final void symbol_table_reset() + @Override + final void symbol_table_reset() { IonType t = next(); assert( IonType.SYMBOL.equals(t) ); - _symbols = _system_symtab; - return; + setSymbolTable(_system_symtab); + push_symbol_table(_system_symtab); // TODO install the correct system symbol table for the active Ion version. } private void validateSymbolToken(SymbolToken symbol) { diff --git a/src/main/java/com/amazon/ion/impl/IonReaderTreeUserX.java b/src/main/java/com/amazon/ion/impl/IonReaderTreeUserX.java index 6977da27a6..d028532115 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderTreeUserX.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderTreeUserX.java @@ -1,23 +1,12 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; import static com.amazon.ion.SymbolTable.UNKNOWN_SYMBOL_ID; import static com.amazon.ion.SystemSymbols.ION_1_0_SID; import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE; +import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE_SID; +import static com.amazon.ion.impl.IonReaderTextUserX.isIonVersionMarker; import com.amazon.ion.IonCatalog; import com.amazon.ion.IonDatagram; @@ -29,6 +18,7 @@ import com.amazon.ion.Span; import com.amazon.ion.SpanProvider; import com.amazon.ion.SymbolTable; +import com.amazon.ion.SymbolToken; final class IonReaderTreeUserX @@ -111,9 +101,9 @@ boolean next_helper_user() sid = _system_symtab.findSymbol(name); } } - if (sid == ION_1_0_SID + if ((sid == ION_1_0_SID || isIonVersionMarker(sym.symbolValue().getText())) && _next.getTypeAnnotationSymbols().length == 0) { - // $ion_1_0 is read as an IVM only if it is not annotated + // $ion_1_0 and other version markers are read as an IVM only if unannotated SymbolTable symbols = _system_symtab; _symbols = symbols; push_symbol_table(symbols); @@ -122,7 +112,7 @@ boolean next_helper_user() } } else if (IonType.STRUCT.equals(next_type) - && _next.findTypeAnnotation(ION_SYMBOL_TABLE) == 0 + && _next_has_ion_symbol_table_annotation() ) { assert(_next instanceof IonStruct); // read a local symbol table @@ -143,6 +133,14 @@ else if (IonType.STRUCT.equals(next_type) } return (next_type != null); } + + private boolean _next_has_ion_symbol_table_annotation() { + SymbolToken[] annotations = _next.getTypeAnnotationSymbols(); + if (annotations.length == 0) return false; + return annotations[0].getSid() == ION_SYMBOL_TABLE_SID + || annotations[0].getText() == ION_SYMBOL_TABLE; + } + // // This code handles the skipped symbol table // support - it is cloned in IonReaderTextUserX diff --git a/src/main/java/com/amazon/ion/impl/IonTokenConstsX.java b/src/main/java/com/amazon/ion/impl/IonTokenConstsX.java index 33a8e8e365..73605df8fa 100644 --- a/src/main/java/com/amazon/ion/impl/IonTokenConstsX.java +++ b/src/main/java/com/amazon/ion/impl/IonTokenConstsX.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; import com.amazon.ion.IonException; @@ -81,8 +68,11 @@ public static class CharacterSequence { public static final int TOKEN_BINARY = 26; - public static final int TOKEN_MAX = 26; - public static final int TOKEN_count = 27; + public static final int TOKEN_OPEN_PAREN_COLON = 27; + public static final int TOKEN_OPEN_PAREN_DOUBLE_COLON = 28; + + public static final int TOKEN_MAX = 28; + public static final int TOKEN_count = 29; public static final int KEYWORD_unrecognized = -1; public static final int KEYWORD_none = 0; @@ -139,6 +129,9 @@ public final static String getTokenName(int t) { case TOKEN_OPEN_DOUBLE_BRACE: return "TOKEN_OPEN_DOUBLE_BRACE"; case TOKEN_CLOSE_DOUBLE_BRACE: return "TOKEN_CLOSE_DOUBLE_BRACE"; + case TOKEN_OPEN_PAREN_COLON: return "TOKEN_OPEN_PAREN_COLON"; + case TOKEN_OPEN_PAREN_DOUBLE_COLON: return "TOKEN_OPEN_PAREN_DOUBLE_COLON"; + default: return ""; } } diff --git a/src/main/java/com/amazon/ion/impl/IonTypeID.java b/src/main/java/com/amazon/ion/impl/IonTypeID.java index 20b3cf9860..29c8407217 100644 --- a/src/main/java/com/amazon/ion/impl/IonTypeID.java +++ b/src/main/java/com/amazon/ion/impl/IonTypeID.java @@ -1,9 +1,11 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 - package com.amazon.ion.impl; import com.amazon.ion.IonType; +import com.amazon.ion.impl.bin.OpCodes; + +import static com.amazon.ion.impl.bin.OpCodes.*; /** * Holds pre-computed information about a binary Ion type ID byte. @@ -21,6 +23,12 @@ final class IonTypeID { private static final int ANNOTATION_WRAPPER_MAX_LENGTH = 0xE; static final int ORDERED_STRUCT_NIBBLE = 0x1; + // Ion 1.1 annotation wrapper lower nibbles (upper nibble 0xE) + static final int ONE_ANNOTATION_SID_LOWER_NIBBLE_1_1 = 0x4; + static final int TWO_ANNOTATION_SIDS_LOWER_NIBBLE_1_1 = 0x5; + static final int ONE_ANNOTATION_FLEX_SYM_LOWER_NIBBLE_1_1 = 0x7; + static final int TWO_ANNOTATION_FLEX_SYMS_LOWER_NIBBLE_1_1 = 0x8; + // NOTE: 'annotation wrapper' is not an IonType, but it is simplest to treat it as one for the purposes of this // implementation in order to have a direct mapping from binary type IDs to IonType enum values. IonType.DATAGRAM // does not have a type ID, so we will use it to mean 'annotation wrapper' instead. @@ -49,19 +57,89 @@ final class IonTypeID { null // The 0xF type code is illegal in Ion 1.0. }; + private static final IonType[] BINARY_TOKEN_TYPES_1_1 = new IonType[] { + null, // 0: macro invocation + null, // 1: macro invocation + null, // 2: macro invocation + null, // 3: macro invocation + null, // 4: macro invocation + null, // 5: macro invocation + null, // 6: int, float, bool + IonType.DECIMAL, + IonType.TIMESTAMP, + IonType.STRING, + IonType.SYMBOL, + IonType.LIST, + IonType.SEXP, + IonType.STRUCT, // symbol ID field names + null, // E: symbol ID, annotated value, NOP, null, system macro invocation + null // F: variable length macro, variable length of all types, delimited start/end + }; + // Singleton invalid type ID. private static final IonTypeID ALWAYS_INVALID_TYPE_ID = new IonTypeID((byte) 0xFF, 0); // Pre-compute all possible type ID bytes. static final IonTypeID[] TYPE_IDS_NO_IVM; static final IonTypeID[] TYPE_IDS_1_0; + static final IonTypeID[] TYPE_IDS_1_1; + static final IonTypeID[] NULL_TYPE_IDS_1_1; + static final IonTypeID STRUCT_WITH_FLEX_SYMS_ID; + static final IonTypeID DELIMITED_END_ID; + static final IonTypeID SYSTEM_SYMBOL_VALUE; + static final IonTypeID SYSTEM_MACRO_INVOCATION_ID; static { TYPE_IDS_NO_IVM = new IonTypeID[NUMBER_OF_BYTES]; TYPE_IDS_1_0 = new IonTypeID[NUMBER_OF_BYTES]; + TYPE_IDS_1_1 = new IonTypeID[NUMBER_OF_BYTES]; for (int b = 0x00; b < NUMBER_OF_BYTES; b++) { TYPE_IDS_NO_IVM[b] = ALWAYS_INVALID_TYPE_ID; TYPE_IDS_1_0[b] = new IonTypeID((byte) b, 0); + TYPE_IDS_1_1[b] = new IonTypeID((byte) b, 1); } + // In Ion 1.1, typed nulls are represented by the type ID 0xEB followed by a 1-byte UInt indicating the type. + // Therefore, the type of the typed null cannot be precomputed in Ion 1.1. In order to avoid adding more hot + // path branching to the reader, we create IonTypeIDs that mimic precomputed typed nulls in Ion 1.1 by reusing + // the typed null type IDs from Ion 1.0. When the type of the typed null is determined, the reader's current + // IonTypeID will be replaced with one of these. The index is the one-byte value that follows 0xEB. + NULL_TYPE_IDS_1_1 = new IonTypeID[12]; + NULL_TYPE_IDS_1_1[0x0] = TYPE_IDS_1_0[0x1F]; // null.bool + NULL_TYPE_IDS_1_1[0x1] = TYPE_IDS_1_0[0x2F]; // null.int + NULL_TYPE_IDS_1_1[0x2] = TYPE_IDS_1_0[0x4F]; // null.float + NULL_TYPE_IDS_1_1[0x3] = TYPE_IDS_1_0[0x5F]; // null.decimal + NULL_TYPE_IDS_1_1[0x4] = TYPE_IDS_1_0[0x6F]; // null.timestamp + NULL_TYPE_IDS_1_1[0x5] = TYPE_IDS_1_0[0x8F]; // null.string + NULL_TYPE_IDS_1_1[0x6] = TYPE_IDS_1_0[0x7F]; // null.symbol + NULL_TYPE_IDS_1_1[0x7] = TYPE_IDS_1_0[0xAF]; // null.blob + NULL_TYPE_IDS_1_1[0x8] = TYPE_IDS_1_0[0x9F]; // null.clob + NULL_TYPE_IDS_1_1[0x9] = TYPE_IDS_1_0[0xBF]; // null.list + NULL_TYPE_IDS_1_1[0xA] = TYPE_IDS_1_0[0xCF]; // null.sexp + NULL_TYPE_IDS_1_1[0xB] = TYPE_IDS_1_0[0xDF]; // null.struct + + IonTypeID variableLengthStructId = TYPE_IDS_1_1[VARIABLE_LENGTH_STRUCT_WITH_SIDS & 0xFF]; + // This is used as a dummy ID when a struct switches to using FlexSym field names in the middle. The key + // here is that the type is STRUCT and the isInlineable flag is true. + STRUCT_WITH_FLEX_SYMS_ID = new IonTypeID( + variableLengthStructId.type, + variableLengthStructId.length, + variableLengthStructId.variableLength, + variableLengthStructId.isNull, + variableLengthStructId.isNopPad, + variableLengthStructId.lowerNibble, + variableLengthStructId.isValid, + variableLengthStructId.isNegativeInt, + variableLengthStructId.isMacroInvocation, + variableLengthStructId.macroId, + variableLengthStructId.isDelimited, + /* isInlineable */ true + ); + + // This is used as a dummy ID when a delimited container reaches its end. The key here is that the type ID's + // lower nibble is OpCodes.DELIMITED_END_MARKER. + DELIMITED_END_ID = TYPE_IDS_1_1[DELIMITED_END_MARKER & 0xFF]; + + SYSTEM_SYMBOL_VALUE = TYPE_IDS_1_1[SYSTEM_SYMBOL & 0xFF]; + SYSTEM_MACRO_INVOCATION_ID = TYPE_IDS_1_1[OpCodes.SYSTEM_MACRO_INVOCATION & 0xFF]; } final IonType type; @@ -69,18 +147,19 @@ final class IonTypeID { final boolean variableLength; final boolean isNull; final boolean isNopPad; - final byte lowerNibble; + final byte lowerNibble; // TODO consider storing the entire byte rather than just the lower nibble final boolean isValid; final boolean isNegativeInt; - final boolean isTemplateInvocation; // Unused in Ion 1.0 - final int templateId; // Unused in Ion 1.0 - final boolean isDelimited; // Unused in Ion 1.0 - // For structs, denotes whether field names are VarSyms. For symbols, denotes whether the text is inline. - // For annotation wrappers, denotes whether tokens are VarSyms. - final boolean isInlineable; // Unused in Ion 1.0 + final boolean isMacroInvocation; + final int macroId; + final boolean isDelimited; + // For structs, denotes whether field names are FlexSyms. For symbols, denotes whether the text is inline. + // For annotation wrappers, denotes whether tokens are FlexSyms. + final boolean isInlineable; + final int theByte; /** - * Determines whether the Ion spec allows this particular upperNibble/lowerNibble pair. + * Determines whether the Ion 1.0 spec allows this particular upperNibble/lowerNibble pair. */ private static boolean isValid_1_0(byte upperNibble, byte lowerNibble, IonType type) { if (upperNibble == TYPE_CODE_INVALID) { @@ -109,7 +188,52 @@ private static boolean isValid_1_0(byte upperNibble, byte lowerNibble, IonType t return true; } + /** + * Determines whether the Ion 1.1 spec allows this particular upperNibble/lowerNibble pair. + */ + private static boolean isValid_1_1(byte id) { + return !( + id == 0x69 + || id == (byte) 0xD1 + || id == (byte) 0xE0 + || id == (byte) 0x8D + || id == (byte) 0x8E + || id == (byte) 0x8F + ); + } + + // Used for creating dummy type ids. + private IonTypeID( + final IonType type, + final int length, + final boolean variableLength, + final boolean isNull, + final boolean isNopPad, + final byte lowerNibble, + final boolean isValid, + final boolean isNegativeInt, + final boolean isMacroInvocation, + final int macroId, + final boolean isDelimited, + final boolean isInlineable + ) { + this.type = type; + this.length = length; + this.variableLength = variableLength; + this.isNull = isNull; + this.isNopPad = isNopPad; + this.lowerNibble = lowerNibble; + this.isValid = isValid; + this.isNegativeInt = isNegativeInt; + this.isMacroInvocation = isMacroInvocation; + this.macroId = macroId; + this.isDelimited = isDelimited; + this.isInlineable = isInlineable; + theByte = -1; + } + private IonTypeID(byte id, int minorVersion) { + theByte = 0xFF & (int) id; if (minorVersion == 0) { byte upperNibble = (byte) ((id >> BITS_PER_NIBBLE) & LOW_NIBBLE_BITMASK); this.lowerNibble = (byte) (id & LOW_NIBBLE_BITMASK); @@ -136,12 +260,194 @@ private IonTypeID(byte id, int minorVersion) { } this.isNegativeInt = type == IonType.INT && upperNibble == NEGATIVE_INT_TYPE_CODE; this.length = length; - this.isTemplateInvocation = false; - this.templateId = -1; + this.isMacroInvocation = false; + this.macroId = -1; this.isDelimited = false; this.isInlineable = false; } else { - throw new IllegalStateException("Only Ion 1.0 is currently supported."); + isValid = isValid_1_1(id); + byte upperNibble = (byte) ((id >> BITS_PER_NIBBLE) & LOW_NIBBLE_BITMASK); + // For 0xF0 (delimited end byte) the entire byte is included. This avoids having to create a separate field + // just to identify this byte. + lowerNibble = (id == DELIMITED_END_MARKER) ? DELIMITED_END_MARKER : (byte) (id & LOW_NIBBLE_BITMASK); + isNegativeInt = false; // Not applicable for Ion 1.1; sign is conveyed by the representation. + isMacroInvocation = (id >= 0x00 && id <= 0x5F) || id == E_EXPRESSION_WITH_FLEX_UINT_ADDRESS + || id == SYSTEM_MACRO_INVOCATION || id == LENGTH_PREFIXED_MACRO_INVOCATION; + boolean isNopPad = false; + boolean isNull = false; + int length = -1; + if (isMacroInvocation) { + if (upperNibble == 0x5) { + variableLength = false; + length = 2; + // This isn't the whole macro ID, but it's all the relevant bits from the type ID byte (the 4 + // least-significant bits), with pre-computed bias. + macroId = (lowerNibble << 16) + 4160; + } else if (upperNibble == 0x4) { + variableLength = false; + length = 1; + // This isn't the whole macro ID, but it's all the relevant bits from the type ID byte (the 4 + // least-significant bits), with pre-computed bias. + macroId = (lowerNibble << 8) + 64; + } else if (upperNibble < 0x4){ + variableLength = false; + macroId = id; + length = 0; + } else { + if (id == E_EXPRESSION_WITH_FLEX_UINT_ADDRESS) { + variableLength = false; + length = 1; + } else if (id == LENGTH_PREFIXED_MACRO_INVOCATION) { + variableLength = true; + } else { + // System invocation; ID follows as a 1-byte FixedInt. + variableLength = false; + length = 1; + } + macroId = -1; + } + type = null; + isInlineable = false; + } else { + macroId = -1; + variableLength = + (upperNibble == 0xF && lowerNibble >= 0x4) // Variable length, all types. + || id == ANNOTATIONS_MANY_SYMBOL_ADDRESS + || id == ANNOTATIONS_MANY_FLEX_SYM + || id == VARIABLE_LENGTH_NOP; + isInlineable = + id == DELIMITED_STRUCT + || id == VARIABLE_LENGTH_INLINE_SYMBOL + || id == ANNOTATIONS_1_FLEX_SYM + || id == ANNOTATIONS_2_FLEX_SYM + || id == ANNOTATIONS_MANY_FLEX_SYM + // Symbol values with inline text. + || upperNibble == 0xA; + IonType typeFromUpperNibble = BINARY_TOKEN_TYPES_1_1[upperNibble]; + if (typeFromUpperNibble == null) { + if (!isValid) { + type = null; + } else if (upperNibble == 0x6) { + if (lowerNibble <= 0x8) { + type = IonType.INT; + length = lowerNibble; + } else if (id == BOOLEAN_TRUE || id == BOOLEAN_FALSE) { + type = IonType.BOOL; + length = 0; + } else { + type = IonType.FLOAT; + if (id == FLOAT_ZERO_LENGTH) { + length = 0; // 0e0 + } else if (id == FLOAT_16) { + length = 2; + } else if (id == FLOAT_32) { + length = 4; + } else if (id == FLOAT_64) { + length = 8; + } + } + } else if (upperNibble == 0xE) { + if (id == SYMBOL_ADDRESS_1_BYTE || id == SYMBOL_ADDRESS_2_BYTES || id == SYMBOL_ADDRESS_MANY_BYTES) { + type = IonType.SYMBOL; + length = id == SYMBOL_ADDRESS_MANY_BYTES ? -1 : lowerNibble; + } else if (lowerNibble <= 0x9) { + type = ION_TYPE_ANNOTATION_WRAPPER; + } else if (id == NULL_UNTYPED) { + type = IonType.NULL; + isNull = true; + length = 0; + } else if (id == NULL_TYPED) { + // Typed null. Type byte follows. + type = null; + isNull = true; + length = 1; + } else if (id == ONE_BYTE_NOP || id == VARIABLE_LENGTH_NOP) { + isNopPad = true; + type = null; + length = variableLength ? -1 : 0; + } else if (id == SYSTEM_SYMBOL) { + type = IonType.SYMBOL; + length = 1; + } else { // 0xF + // System macro invocation. + type = null; + } + } else { // 0xF + if (id == DELIMITED_END_MARKER) { + type = null; + length = 0; + } else if (id == DELIMITED_STRUCT || id == VARIABLE_LENGTH_STRUCT_WITH_SIDS) { + type = IonType.STRUCT; + } else if (id == VARIABLE_LENGTH_INTEGER) { + type = IonType.INT; + } else if (id == VARIABLE_LENGTH_DECIMAL) { + type = IonType.DECIMAL; + } else if (id == VARIABLE_LENGTH_TIMESTAMP) { + type = IonType.TIMESTAMP; + } else if (id == VARIABLE_LENGTH_INLINE_SYMBOL) { + type = IonType.SYMBOL; + } else if (id == VARIABLE_LENGTH_STRING) { + type = IonType.STRING; + } else if (id == VARIABLE_LENGTH_BLOB) { + type = IonType.BLOB; + } else if (id == VARIABLE_LENGTH_CLOB) { + type = IonType.CLOB; + } else if (id == DELIMITED_LIST || id == VARIABLE_LENGTH_LIST) { + type = IonType.LIST; + } else if (id == DELIMITED_SEXP || id == VARIABLE_LENGTH_SEXP) { + type = IonType.SEXP; + } else { // 0x4, 0x5 + // E-Expression with FlexUInt Address or E-Expression with FlexUInt Length + type = null; + } + } + } else { + type = typeFromUpperNibble; + if (type == IonType.TIMESTAMP) { + // Short-form timestamps. Long-form timestamps use the upper nibble 0xF, forcing them to take + // the previous branch. + switch (id) { + case TIMESTAMP_YEAR_PRECISION: + length = 1; + break; + case TIMESTAMP_MONTH_PRECISION: + case TIMESTAMP_DAY_PRECISION: + length = 2; + break; + case TIMESTAMP_MINUTE_PRECISION: + length = 4; + break; + case TIMESTAMP_SECOND_PRECISION: + case TIMESTAMP_MINUTE_PRECISION_WITH_OFFSET: + case TIMESTAMP_SECOND_PRECISION_WITH_OFFSET: + length = 5; + break; + case TIMESTAMP_MILLIS_PRECISION: + length = 6; + break; + case TIMESTAMP_MICROS_PRECISION: + case TIMESTAMP_MILLIS_PRECISION_WITH_OFFSET: + length = 7; + break; + case TIMESTAMP_NANOS_PRECISION: + case TIMESTAMP_MICROS_PRECISION_WITH_OFFSET: + length = 8; + break; + case TIMESTAMP_NANOS_PRECISION_WITH_OFFSET: + length = 9; + break; + default: + // Unreachable + } + } else { + length = lowerNibble; + } + } + } + isDelimited = id == DELIMITED_LIST || id == DELIMITED_SEXP || id == DELIMITED_STRUCT; + this.isNopPad = isNopPad; + this.isNull = isNull; + this.length = length; } } @@ -150,6 +456,6 @@ private IonTypeID(byte id, int minorVersion) { */ @Override public String toString() { - return String.format("%s(%s)", type, length); + return String.format("%02X(%s,%s)>", theByte, type, length); } } diff --git a/src/main/java/com/amazon/ion/impl/LocalSymbolTable.java b/src/main/java/com/amazon/ion/impl/LocalSymbolTable.java index 713d8998ae..62b6b90d05 100644 --- a/src/main/java/com/amazon/ion/impl/LocalSymbolTable.java +++ b/src/main/java/com/amazon/ion/impl/LocalSymbolTable.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; import static com.amazon.ion.SystemSymbols.IMPORTS; @@ -49,11 +36,11 @@ *

* Instances of this class are safe for use by multiple threads. */ -class LocalSymbolTable +public class LocalSymbolTable implements _Private_LocalSymbolTable { - static class Factory implements _Private_LocalSymbolTableFactory + public static class Factory implements _Private_LocalSymbolTableFactory { private Factory(){} // Should be accessed through the singleton @@ -88,7 +75,7 @@ public SymbolTable newLocalSymtab(SymbolTable defaultSystemSymtab, } - static final Factory DEFAULT_LST_FACTORY = new Factory(); + public static final Factory DEFAULT_LST_FACTORY = new Factory(); /** * The initial length of {@link #mySymbolNames}. diff --git a/src/main/java/com/amazon/ion/impl/LocalSymbolTableImports.java b/src/main/java/com/amazon/ion/impl/LocalSymbolTableImports.java index 3f6efe9d5c..0196096b3f 100644 --- a/src/main/java/com/amazon/ion/impl/LocalSymbolTableImports.java +++ b/src/main/java/com/amazon/ion/impl/LocalSymbolTableImports.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; import static com.amazon.ion.SymbolTable.UNKNOWN_SYMBOL_ID; @@ -20,6 +7,7 @@ import com.amazon.ion.SymbolTable; import com.amazon.ion.SymbolToken; import java.util.Arrays; +import java.util.Collections; import java.util.List; /** @@ -33,6 +21,10 @@ // there are zero or one imported non-system shared symtab(s). final class LocalSymbolTableImports { + public static final LocalSymbolTableImports EMPTY = new LocalSymbolTableImports(Collections.emptyList()); + + private static final SymbolTable[] EMPTY_SYMBOL_TABLE_ARRAY = new SymbolTable[0]; + /** * The symtabs imported by a local symtab, never null or empty. The first * symtab must be a system symtab, the rest must be non-system shared @@ -139,10 +131,11 @@ final class LocalSymbolTableImports */ private static int prepBaseSids(int[] baseSids, SymbolTable[] imports) { - SymbolTable firstImport = imports[0]; + if (imports.length == 0) { + return 0; + } - assert firstImport.isSystemTable() - : "first symtab must be a system symtab"; + SymbolTable firstImport = imports[0]; baseSids[0] = 0; int total = firstImport.getMaxId(); @@ -170,7 +163,7 @@ String findKnownSymbol(int sid) { String name = null; - if (sid <= myMaxId) + if (sid > 0 && sid <= myMaxId) { int i, previousBaseSid = 0; for (i = 1; i < myImports.length; i++) @@ -251,13 +244,15 @@ SymbolTable getSystemSymbolTable() */ SymbolTable[] getImportedTables() { - int count = myImports.length - 1; // we don't include system symtab - SymbolTable[] imports = new SymbolTable[count]; - if (count > 0) - { - // defensive copy - System.arraycopy(myImports, 1, imports, 0, count); - } + // We have only the system symbol table, or we have none. + // None implies an empty system symbol table, as in Ion 1.1. + if (myImports.length == 1 || myImports.length == 0) return EMPTY_SYMBOL_TABLE_ARRAY; + + int nonSystemTables = myImports.length - 1; // we don't include system symtab + + SymbolTable[] imports = new SymbolTable[nonSystemTables]; + // defensive copy + System.arraycopy(myImports, 1, imports, 0, nonSystemTables); return imports; } diff --git a/src/main/java/com/amazon/ion/impl/Marker.java b/src/main/java/com/amazon/ion/impl/Marker.java index 593e4fee90..548b12bb80 100644 --- a/src/main/java/com/amazon/ion/impl/Marker.java +++ b/src/main/java/com/amazon/ion/impl/Marker.java @@ -1,6 +1,5 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 - package com.amazon.ion.impl; /** @@ -25,11 +24,11 @@ class Marker { /** * @param startIndex index of the first byte in the slice. - * @param length the number of bytes in the slice. + * @param endIndex index of the first byte after the slice. */ - Marker(final int startIndex, final int length) { + Marker(final int startIndex, final int endIndex) { this.startIndex = startIndex; - this.endIndex = startIndex + length; + this.endIndex = endIndex; } /** diff --git a/src/main/java/com/amazon/ion/impl/MarkerList.java b/src/main/java/com/amazon/ion/impl/MarkerList.java new file mode 100644 index 0000000000..276f2745d8 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/MarkerList.java @@ -0,0 +1,125 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl; + +/** + * A list of {@link Marker} values that grows as necessary, and serves as a pool to avoid excessive allocation. + */ +public class MarkerList { + + private Marker[] data; + private int numberOfValues; + private int provisionalIndex; + + /** + * Constructs a new MarkerList with the specified capacity. + * @param initialCapacity The number of Markers that can be stored in this MarkerList before it will need to be + * reallocated. + */ + public MarkerList(final int initialCapacity) { + data = new Marker[initialCapacity]; + for (int i = 0; i < initialCapacity; i++) { + data[i] = new Marker(-1, -1); + } + numberOfValues = 0; + provisionalIndex = 0; + } + + /** + * Accessor. + * @return The number of Markers currently stored in the list. + */ + public int size() { + return numberOfValues; + } + + /** + * @return The number of markers, including provisional markers, in the list. + */ + public int provisionalSize() { + return provisionalIndex; + } + + /** + * @return {@code true} if there are Markers stored in the list. + */ + public boolean isEmpty() { + return numberOfValues == 0; + } + + /** + * Empties the list. + * Note that this method does not shrink the size of the backing data store. + */ + public void clear() { + numberOfValues = 0; + provisionalIndex = 0; + } + + /** + * Returns the {@code index}th Marker in the list. + * @param index The list index of the desired Marker. + * @return The Marker at index {@code index} in the list. + * @throws IndexOutOfBoundsException if the index is negative or greater than the number of committed Markers + * stored in the list. + */ + public Marker get(int index) { + if (index < 0 || index >= numberOfValues) { + throw new IndexOutOfBoundsException( + "Invalid index " + index + " requested from IntList with " + numberOfValues + " values." + ); + } + return data[index]; + } + + /** + * Returns the {@code index}th Marker in the list, even if that Marker is provisional. + * @param index The list index of the desired Marker. + * @return The Marker at index {@code index} in the list. + * @throws IndexOutOfBoundsException if the index is negative or greater than the number of Markers stored in the + * list. + */ + public Marker provisionalGet(int index) { + if (index < 0 || index >= provisionalIndex) { + throw new IndexOutOfBoundsException( + "Invalid index " + index + " requested from IntList with " + provisionalIndex + " values." + ); + } + return data[index]; + } + + /** + * @return The Marker that, if committed, will become the next element in the list. Grows the list if necessary. + */ + public Marker provisionalElement() { + if (provisionalIndex == data.length) { + grow(); + } + Marker provisional = data[provisionalIndex]; + provisional.startIndex = -1; + provisional.endIndex = -1; + provisional.typeId = null; + provisionalIndex += 1; + return provisional; + } + + /** + * Commits a provisional element, increasing the size of the list by one. It is the caller's responsibility to + * ensure that a provisional element exists. + */ + public void commit() { + numberOfValues += 1; + } + + /** + * Reallocates the backing array to accommodate storing more ints. + */ + private void grow() { + Marker[] newData = new Marker[data.length * 2]; + System.arraycopy(data, 0, newData, 0, data.length); + for (int i = data.length; i < newData.length; i++) { + newData[i] = new Marker(-1, -1); + } + data = newData; + } +} diff --git a/src/main/java/com/amazon/ion/impl/PrivateIonRawWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/PrivateIonRawWriter_1_1.kt new file mode 100644 index 0000000000..ae91af202b --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/PrivateIonRawWriter_1_1.kt @@ -0,0 +1,63 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl + +import com.amazon.ion.impl.macro.Macro + +/** + * Allows us to write encoding directives in a more optimized and/or readable way. + * Could be used to construct invalid data if used in the wrong way, so we don't + * expose this to users. + * + * Some functions may be meaningless to a particular underlying implementation. + */ +internal interface PrivateIonRawWriter_1_1 : IonRawWriter_1_1 { + /** + * Writes a parameter cardinality. + */ + fun writeMacroParameterCardinality(cardinality: Macro.ParameterCardinality) + + /** + * Steps into a TDL Macro Invocation—an s-expression, followed by `.` and then the macro address + */ + fun stepInTdlMacroInvocation(macroRef: Int) + + /** + * Steps into a TDL Macro Invocation—an s-expression, followed by `.` and then the macro name + */ + fun stepInTdlMacroInvocation(macroRef: String) + + /** + * Steps into a TDL System Macro Invocation—an s-expression, followed by `.` and then the qualified macro name + */ + fun stepInTdlSystemMacroInvocation(systemSymbol: SystemSymbols_1_1) + + /** + * Steps in s-expression, writes `%` symbol, variable name, and then closes the s-expression. + */ + fun writeTdlVariableExpansion(variableName: String) + + /** + * Steps in s-expression and writes `..` symbol. + */ + fun stepInTdlExpressionGroup() + + /** + * Sets a flag that can override the newlines that are normally inserted by a pretty printer. + * + * Ignored by binary implementations. + * + * TODO: Once system symbols are implemented, consider replacing this with dedicated + * `startClause(SystemSymbol)` and `endClause()`, or similar. + * * This will allow the text writer to + * * start the clauses without added newlines + * * Skip checking whether to write annotations + * * This will allow the binary writer to + * * Leverage macros, when possible + * * Skip checking whether to write annotations + * * Skip checking whether a given string or SID is a system symbol + * * E.g. `startClause(SystemSymbol_1_1.MACRO_TABLE)` could directly write the + * bytes `F2 EF` followed by `E3` for "macro_table". + */ + fun forceNoNewlines(boolean: Boolean) = Unit +} diff --git a/src/main/java/com/amazon/ion/impl/SystemSymbols_1_1.kt b/src/main/java/com/amazon/ion/impl/SystemSymbols_1_1.kt new file mode 100644 index 0000000000..ac3981e566 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/SystemSymbols_1_1.kt @@ -0,0 +1,141 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl + +import com.amazon.ion.* +import com.amazon.ion.SymbolTable.* +import java.util.* + +enum class SystemSymbols_1_1(val id: Int, val text: String) { + // System SID 0 is reserved. + ION( /* */ 1, "\$ion"), + ION_1_0( /* */ 2, "\$ion_1_0"), + ION_SYMBOL_TABLE( /* */ 3, "\$ion_symbol_table"), + NAME( /* */ 4, "name"), + VERSION( /* */ 5, "version"), + IMPORTS( /* */ 6, "imports"), + SYMBOLS( /* */ 7, "symbols"), + MAX_ID( /* */ 8, "max_id"), + ION_SHARED_SYMBOL_TABLE( /* */ 9, "\$ion_shared_symbol_table"), + ENCODING( /* */ 10, "encoding"), + ION_LITERAL( /* */ 11, "\$ion_literal"), + ION_SHARED_MODULE( /* */ 12, "\$ion_shared_module"), + MACRO( /* */ 13, "macro"), + MACRO_TABLE( /* */ 14, "macro_table"), + SYMBOL_TABLE( /* */ 15, "symbol_table"), + MODULE( /* */ 16, "module"), + EXPORT( /* */ 17, "export"), + IMPORT( /* */ 18, "import"), + FLEX_SYMBOL( /* */ 19, "flex_symbol"), + FLEX_INT( /* */ 20, "flex_int"), + FLEX_UINT( /* */ 21, "flex_uint"), + UINT8( /* */ 22, "uint8"), + UINT16( /* */ 23, "uint16"), + UINT32( /* */ 24, "uint32"), + UINT64( /* */ 25, "uint64"), + INT8( /* */ 26, "int8"), + INT16( /* */ 27, "int16"), + INT32( /* */ 28, "int32"), + INT64( /* */ 29, "int64"), + FLOAT16( /* */ 30, "float16"), + FLOAT32( /* */ 31, "float32"), + FLOAT64( /* */ 32, "float64"), + EMPTY_TEXT( /* */ 33, ""), + FOR( /* */ 34, "for"), + LITERAL( /* */ 35, "literal"), + IF_NONE( /* */ 36, "if_none"), + IF_SOME( /* */ 37, "if_some"), + IF_SINGLE( /* */ 38, "if_single"), + IF_MULTI( /* */ 39, "if_multi"), + NONE( /* */ 40, "none"), + VALUES( /* */ 41, "values"), + DEFAULT( /* */ 42, "default"), + META( /* */ 43, "meta"), + REPEAT( /* */ 44, "repeat"), + FLATTEN( /* */ 45, "flatten"), + DELTA( /* */ 46, "delta"), + SUM( /* */ 47, "sum"), + ANNOTATE( /* */ 48, "annotate"), + MAKE_STRING( /* */ 49, "make_string"), + MAKE_SYMBOL( /* */ 50, "make_symbol"), + MAKE_DECIMAL( /* */ 51, "make_decimal"), + MAKE_TIMESTAMP( /* */ 52, "make_timestamp"), + MAKE_BLOB( /* */ 53, "make_blob"), + MAKE_LIST( /* */ 54, "make_list"), + MAKE_SEXP( /* */ 55, "make_sexp"), + MAKE_FIELD( /* */ 56, "make_field"), + MAKE_STRUCT( /* */ 57, "make_struct"), + PARSE_ION( /* */ 58, "parse_ion"), + SET_SYMBOLS( /* */ 59, "set_symbols"), + ADD_SYMBOLS( /* */ 60, "add_symbols"), + SET_MACROS( /* */ 61, "set_macros"), + ADD_MACROS( /* */ 62, "add_macros"), + USE( /* */ 63, "use"), + ; + + val utf8Bytes = text.encodeToByteArray() + + val token: SymbolToken = SymbolTokenImpl(text, UNKNOWN_SYMBOL_ID) + + companion object { + private val ALL_VALUES: Array = entries.toTypedArray().apply { + // Put all system symbol enum values into an array, and ensure that they are sorted by ID in that array. + // This allows us to have O(1) lookup, but it doesn't rely on the enum's ordinal value, which could change. + Arrays.sort(this) { o1, o2 -> o1.id.compareTo(o2.id) } + } + init { + // Initialization checks to make sure that the system symbols are not misconfigured. + ALL_VALUES + .map { it.id } + .zipWithNext { a, b -> + check(b - a > -1) { "System symbols not sorted. Found $a before $b." } + check(b - a != 0) { "Duplicate ID $a in system symbols" } + check(b - a == 1) { "Gap in system symbols between $a and $b" } + } + } + + @JvmStatic + private val BY_NAME: HashMap = ALL_VALUES.fold(HashMap(ALL_VALUES.size)) { map, s -> + check(map.put(s.text, s) == null) { "Duplicate system symbol text: ${s.id}=${s.text}" } + map + } + + @JvmStatic + fun size() = ALL_VALUES.size + + // Private to avoid potential clashes with enum member names. + @JvmStatic + private val ALL_SYMBOL_TEXTS = ALL_VALUES.map { it.text } + + @JvmStatic + fun allSymbolTexts() = ALL_SYMBOL_TEXTS + + /** Returns true if the [id] is a valid system symbol ID. */ + @JvmStatic + operator fun contains(id: Int): Boolean { + return id > 0 && id <= SystemSymbols_1_1.ALL_VALUES.size + } + + /** Returns true if the [text] is in the system symbol table. */ + @JvmStatic + operator fun contains(text: String): Boolean { + return SystemSymbols_1_1.BY_NAME.containsKey(text) + } + + /** + * Returns the system symbol corresponding to the given system symbol ID, + * or `null` if not a valid system symbol ID.*/ + @JvmStatic + operator fun get(id: Int): SystemSymbols_1_1? { + return if (contains(id)) { SystemSymbols_1_1.ALL_VALUES[id - 1] } else { null } + } + + /** + * Returns the system symbol corresponding to the given system symbol text, + * or `null` if not a valid system symbol text.*/ + @JvmStatic + operator fun get(text: String): SystemSymbols_1_1? { + return BY_NAME[text] + } + } +} diff --git a/src/main/java/com/amazon/ion/impl/TaglessEncoding.kt b/src/main/java/com/amazon/ion/impl/TaglessEncoding.kt new file mode 100644 index 0000000000..4d6c4e1f9e --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/TaglessEncoding.kt @@ -0,0 +1,27 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl + +/** + * The tagless encodings supported by Ion 1.1+. + * + * TODO: + * - Consider moving to [com.amazon.ion.impl.macro]. + * - Try to make this `internal` or `package-private` + */ +enum class TaglessEncoding(@JvmField internal val typeID: IonTypeID, @JvmField val isUnsigned: Boolean) { + UINT8(IonTypeID.TYPE_IDS_1_1[0x61], true), + UINT16(IonTypeID.TYPE_IDS_1_1[0x62], true), + UINT32(IonTypeID.TYPE_IDS_1_1[0x64], true), + UINT64(IonTypeID.TYPE_IDS_1_1[0x68], true), + FLEX_UINT(IonTypeID.TYPE_IDS_1_1[0xF6], true), + INT8(IonTypeID.TYPE_IDS_1_1[0x61], false), + INT16(IonTypeID.TYPE_IDS_1_1[0x62], false), + INT32(IonTypeID.TYPE_IDS_1_1[0x64], false), + INT64(IonTypeID.TYPE_IDS_1_1[0x68], false), + FLEX_INT(IonTypeID.TYPE_IDS_1_1[0xF6], false), + FLOAT16(IonTypeID.TYPE_IDS_1_1[0x6B], false), + FLOAT32(IonTypeID.TYPE_IDS_1_1[0x6C], false), + FLOAT64(IonTypeID.TYPE_IDS_1_1[0x6D], false), + FLEX_SYM(IonTypeID.TYPE_IDS_1_1[0xFA], false) +} diff --git a/src/main/java/com/amazon/ion/impl/_Private_IonConstants.java b/src/main/java/com/amazon/ion/impl/_Private_IonConstants.java index d7c706b36d..df337368ef 100644 --- a/src/main/java/com/amazon/ion/impl/_Private_IonConstants.java +++ b/src/main/java/com/amazon/ion/impl/_Private_IonConstants.java @@ -1,21 +1,9 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; import com.amazon.ion.IonException; +import com.amazon.ion._private.SuppressFBWarnings; /** * NOT FOR APPLICATION USE! @@ -108,13 +96,23 @@ public static final boolean isSurrogate(int c) { /** * The byte sequence indicating use of Ion 1.0 binary format. */ + @SuppressFBWarnings("MS_MUTABLE_ARRAY") public static final byte[] BINARY_VERSION_MARKER_1_0 = { (byte) 0xE0, (byte) 0x01, (byte) 0x00, (byte) 0xEA }; /** - * The number of bytes in {@link #BINARY_VERSION_MARKER_1_0}. + * The byte sequence indicating use of Ion 1.1 binary format. + */ + @SuppressFBWarnings("MS_MUTABLE_ARRAY") + public static final byte[] BINARY_VERSION_MARKER_1_1 = { (byte) 0xE0, + (byte) 0x01, + (byte) 0x01, + (byte) 0xEA }; + + /** + * The number of bytes in {@link #BINARY_VERSION_MARKER_1_0} */ public static final int BINARY_VERSION_MARKER_SIZE = BINARY_VERSION_MARKER_1_0.length; diff --git a/src/main/java/com/amazon/ion/impl/_Private_IonReaderBuilder.java b/src/main/java/com/amazon/ion/impl/_Private_IonReaderBuilder.java index c7e56f8abd..0868421811 100644 --- a/src/main/java/com/amazon/ion/impl/_Private_IonReaderBuilder.java +++ b/src/main/java/com/amazon/ion/impl/_Private_IonReaderBuilder.java @@ -1,6 +1,5 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 - package com.amazon.ion.impl; import com.amazon.ion.IonCatalog; @@ -8,6 +7,7 @@ import com.amazon.ion.IonReader; import com.amazon.ion.IonTextReader; import com.amazon.ion.IonValue; +import com.amazon.ion.MacroAwareIonReader; import com.amazon.ion.system.IonReaderBuilder; import com.amazon.ion.util.IonStreamUtils; @@ -21,6 +21,7 @@ import static com.amazon.ion.impl.LocalSymbolTable.DEFAULT_LST_FACTORY; import static com.amazon.ion.impl._Private_IonReaderFactory.makeReader; import static com.amazon.ion.impl._Private_IonReaderFactory.makeReaderText; +import static com.amazon.ion.impl._Private_IonReaderFactory.makeSystemReaderText; /** * {@link IonReaderBuilder} extension for internal use only. @@ -183,26 +184,26 @@ public void close() throws IOException { } @FunctionalInterface - interface IonReaderFromBytesFactoryText { - IonReader makeReader(IonCatalog catalog, byte[] ionData, int offset, int length, _Private_LocalSymbolTableFactory lstFactory); + interface IonReaderFromBytesFactoryText { + T makeReader(IonCatalog catalog, byte[] ionData, int offset, int length, _Private_LocalSymbolTableFactory lstFactory); } @FunctionalInterface - interface IonReaderFromBytesFactoryBinary { - IonReader makeReader(_Private_IonReaderBuilder builder, byte[] ionData, int offset, int length); + interface IonReaderFromBytesFactoryBinary { + T makeReader(_Private_IonReaderBuilder builder, byte[] ionData, int offset, int length); } - static IonReader buildReader( + static T buildReader( _Private_IonReaderBuilder builder, byte[] ionData, int offset, int length, - IonReaderFromBytesFactoryBinary binary, - IonReaderFromBytesFactoryText text + IonReaderFromBytesFactoryBinary binary, + IonReaderFromBytesFactoryText text ) { if (IonStreamUtils.isGzip(ionData, offset, length)) { try { - return buildReader( + return (T) buildReader( builder, new GZIPInputStream(new ByteArrayInputStream(ionData, offset, length)), _Private_IonReaderFactory::makeReaderBinary, @@ -257,20 +258,20 @@ private static boolean startsWithGzipHeader(byte[] buffer, int length) { } @FunctionalInterface - interface IonReaderFromInputStreamFactoryText { - IonReader makeReader(IonCatalog catalog, InputStream source, _Private_LocalSymbolTableFactory lstFactory); + interface IonReaderFromInputStreamFactoryText { + T makeReader(IonCatalog catalog, InputStream source, _Private_LocalSymbolTableFactory lstFactory); } @FunctionalInterface - interface IonReaderFromInputStreamFactoryBinary { - IonReader makeReader(_Private_IonReaderBuilder builder, InputStream source, byte[] alreadyRead, int alreadyReadOff, int alreadyReadLen); + interface IonReaderFromInputStreamFactoryBinary { + T makeReader(_Private_IonReaderBuilder builder, InputStream source, byte[] alreadyRead, int alreadyReadOff, int alreadyReadLen); } - static IonReader buildReader( + static T buildReader( _Private_IonReaderBuilder builder, InputStream source, - IonReaderFromInputStreamFactoryBinary binary, - IonReaderFromInputStreamFactoryText text + IonReaderFromInputStreamFactoryBinary binary, + IonReaderFromInputStreamFactoryText text ) { if (source == null) { throw new NullPointerException("Cannot build a reader from a null InputStream."); @@ -352,4 +353,33 @@ public IonTextReader build(String ionText) { return makeReaderText(validateCatalog(), ionText, lstFactory); } + /** + * Creates a new {@link MacroAwareIonReader} over the given data. + * @param ionData the data to read. + * @return a new MacroAwareIonReader instance. + */ + public MacroAwareIonReader buildMacroAware(byte[] ionData) { + return buildReader( + this, + ionData, + 0, + ionData.length, + (builder, data, offset, length) -> new IonReaderContinuableCoreBinary(builder.getBufferConfiguration(), data, offset,length), + (catalog, data, offset, length, factory) -> (IonReaderTextSystemX) makeSystemReaderText(catalog, data, offset, length, factory) + ); + } + + /** + * Creates a new {@link MacroAwareIonReader} over the given data. + * @param ionData the data to read. + * @return a new MacroAwareIonReader instance. + */ + public MacroAwareIonReader buildMacroAware(InputStream ionData) { + return buildReader( + this, + ionData, + (builder, source, alreadyRead, alreadyReadOff, alreadyReadLen) -> new IonReaderContinuableCoreBinary(builder.getBufferConfiguration(), source, alreadyRead, alreadyReadOff, alreadyReadLen), + (catalog, source, factory) -> (IonReaderTextSystemX) makeSystemReaderText(catalog, source, factory) + ); + } } diff --git a/src/main/java/com/amazon/ion/impl/_Private_IonReaderFactory.java b/src/main/java/com/amazon/ion/impl/_Private_IonReaderFactory.java index 1f0e83ce57..04682cc9ee 100644 --- a/src/main/java/com/amazon/ion/impl/_Private_IonReaderFactory.java +++ b/src/main/java/com/amazon/ion/impl/_Private_IonReaderFactory.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; import static com.amazon.ion.impl.UnifiedInputStreamX.makeStream; @@ -113,9 +100,9 @@ public static IonReader makeSystemReaderText(InputStream is) ); } - private static IonReader makeSystemReaderText(IonCatalog catalog, - InputStream is, - _Private_LocalSymbolTableFactory lstFactory) + public static IonReader makeSystemReaderText(IonCatalog catalog, + InputStream is, + _Private_LocalSymbolTableFactory lstFactory) { UnifiedInputStreamX uis; try @@ -129,11 +116,11 @@ private static IonReader makeSystemReaderText(IonCatalog catalog, return new IonReaderTextSystemX(uis); } - private static IonReader makeSystemReaderText(IonCatalog catalog, - byte[] bytes, - int offset, - int length, - _Private_LocalSymbolTableFactory lstFactory) { + public static IonReader makeSystemReaderText(IonCatalog catalog, + byte[] bytes, + int offset, + int length, + _Private_LocalSymbolTableFactory lstFactory) { UnifiedInputStreamX uis; try { diff --git a/src/main/java/com/amazon/ion/impl/_Private_IonTextWriterBuilder.java b/src/main/java/com/amazon/ion/impl/_Private_IonTextWriterBuilder.java index 9d8ae52c2a..b1f7c957ce 100644 --- a/src/main/java/com/amazon/ion/impl/_Private_IonTextWriterBuilder.java +++ b/src/main/java/com/amazon/ion/impl/_Private_IonTextWriterBuilder.java @@ -1,52 +1,24 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; -import static com.amazon.ion.impl._Private_Utils.initialSymtab; -import com.amazon.ion.IonCatalog; -import com.amazon.ion.IonSystem; -import com.amazon.ion.IonWriter; -import com.amazon.ion.SymbolTable; -import com.amazon.ion.system.IonSystemBuilder; import com.amazon.ion.system.IonTextWriterBuilder; import com.amazon.ion.system.SimpleCatalog; -import com.amazon.ion.util._Private_FastAppendable; -import java.io.OutputStream; /** + * Contains configuration common to text writers for all Ion versions. * NOT FOR APPLICATION USE! */ -public class _Private_IonTextWriterBuilder +public abstract class _Private_IonTextWriterBuilder> extends IonTextWriterBuilder { private final static CharSequence SPACE_CHARACTER = " "; - public static _Private_IonTextWriterBuilder standard() - { - return new _Private_IonTextWriterBuilder.Mutable(); - } - - public static _Private_IonTextWriterBuilder STANDARD = - standard().immutable(); - //========================================================================= - private boolean _pretty_print; + public boolean _pretty_print; // These options control whether the IonTextWriter will write standard ion or ion that is down-converted json. public boolean _blob_as_string; @@ -62,15 +34,15 @@ public static _Private_IonTextWriterBuilder standard() public boolean _untyped_nulls; public boolean _allow_invalid_sids; - private _Private_CallbackBuilder _callback_builder; + public _Private_CallbackBuilder _callback_builder; - private _Private_IonTextWriterBuilder() + _Private_IonTextWriterBuilder() { super(); } - private _Private_IonTextWriterBuilder(_Private_IonTextWriterBuilder that) + _Private_IonTextWriterBuilder(T that) { super(that); this._callback_builder = that._callback_builder ; @@ -89,40 +61,35 @@ private _Private_IonTextWriterBuilder(_Private_IonTextWriterBuilder that) this._allow_invalid_sids = that._allow_invalid_sids ; } - @Override - public final _Private_IonTextWriterBuilder copy() - { - return new Mutable(this); - } + public abstract T copy(); @Override - public _Private_IonTextWriterBuilder immutable() + public T immutable() { - return this; + return (T) this; } @Override - public _Private_IonTextWriterBuilder mutable() + public T mutable() { return copy(); } - //========================================================================= @Override - public final IonTextWriterBuilder withPrettyPrinting() + public IonTextWriterBuilder withPrettyPrinting() { - _Private_IonTextWriterBuilder b = mutable(); + T b = mutable(); b._pretty_print = true; return b; } @Override - public final IonTextWriterBuilder withJsonDowngrade() + public IonTextWriterBuilder withJsonDowngrade() { - _Private_IonTextWriterBuilder b = mutable(); + T b = mutable(); b.withMinimalSystemData(); @@ -150,8 +117,8 @@ public final IonTextWriterBuilder withJsonDowngrade() * @param allowInvalidSids whether to allow invalid SIDs. * @return the builder. */ - public final _Private_IonTextWriterBuilder withInvalidSidsAllowed(boolean allowInvalidSids) { - _Private_IonTextWriterBuilder b = mutable(); + public final T withInvalidSidsAllowed(boolean allowInvalidSids) { + T b = mutable(); b._allow_invalid_sids = allowInvalidSids; return b; } @@ -179,7 +146,7 @@ final CharSequence topLevelSeparator() //========================================================================= - private _Private_IonTextWriterBuilder fillDefaults() + T fillDefaults() { // Ensure that we don't modify the user's builder. IonTextWriterBuilder b = copy(); @@ -206,87 +173,7 @@ private _Private_IonTextWriterBuilder fillDefaults() )); } - return (_Private_IonTextWriterBuilder) b.immutable(); - } - - - /** Assumes that {@link #fillDefaults()} has been called. */ - private IonWriter build(_Private_FastAppendable appender) - { - IonCatalog catalog = getCatalog(); - SymbolTable[] imports = getImports(); - - // TODO We shouldn't need a system here - IonSystem system = - IonSystemBuilder.standard().withCatalog(catalog).build(); - - SymbolTable defaultSystemSymtab = system.getSystemSymbolTable(); - - IonWriterSystemText systemWriter = - (getCallbackBuilder() == null - ? new IonWriterSystemText(defaultSystemSymtab, - this, - appender) - : new IonWriterSystemTextMarkup(defaultSystemSymtab, - this, - appender)); - - SymbolTable initialSymtab = - initialSymtab(((_Private_ValueFactory)system).getLstFactory(), defaultSystemSymtab, imports); - - return new IonWriterUser(catalog, system, systemWriter, initialSymtab, !_allow_invalid_sids); - } - - - @Override - public final IonWriter build(Appendable out) - { - _Private_IonTextWriterBuilder b = fillDefaults(); - - _Private_FastAppendable fast = new AppendableFastAppendable(out); - - return b.build(fast); - } - - - @Override - public final IonWriter build(OutputStream out) - { - _Private_IonTextWriterBuilder b = fillDefaults(); - - _Private_FastAppendable fast = new OutputStreamFastAppendable(out); - - return b.build(fast); - } - - //========================================================================= - - private static final class Mutable - extends _Private_IonTextWriterBuilder - { - private Mutable() { } - - private Mutable(_Private_IonTextWriterBuilder that) - { - super(that); - } - - @Override - public _Private_IonTextWriterBuilder immutable() - { - return new _Private_IonTextWriterBuilder(this); - } - - @Override - public _Private_IonTextWriterBuilder mutable() - { - return this; - } - - @Override - protected void mutationCheck() - { - } + return (T) b.immutable(); } //------------------------------------------------------------------------- @@ -328,10 +215,9 @@ public void setCallbackBuilder(_Private_CallbackBuilder builder) * @see #getCallbackBuilder() * @see #setCallbackBuilder(_Private_CallbackBuilder) */ - public final _Private_IonTextWriterBuilder - withCallbackBuilder(_Private_CallbackBuilder builder) + public final T withCallbackBuilder(_Private_CallbackBuilder builder) { - _Private_IonTextWriterBuilder b = mutable(); + T b = mutable(); b.setCallbackBuilder(builder); return b; } diff --git a/src/main/java/com/amazon/ion/impl/_Private_IonTextWriterBuilder_1_0.java b/src/main/java/com/amazon/ion/impl/_Private_IonTextWriterBuilder_1_0.java new file mode 100644 index 0000000000..5fbaf584e3 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/_Private_IonTextWriterBuilder_1_0.java @@ -0,0 +1,124 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl; + +import com.amazon.ion.IonCatalog; +import com.amazon.ion.IonSystem; +import com.amazon.ion.IonWriter; +import com.amazon.ion.SymbolTable; +import com.amazon.ion.system.IonSystemBuilder; +import com.amazon.ion.util._Private_FastAppendable; + +import java.io.OutputStream; + +import static com.amazon.ion.impl._Private_Utils.initialSymtab; + +/** + * Contains configuration for Ion 1.0 text writers. + * NOT FOR APPLICATION USE! + */ +public class _Private_IonTextWriterBuilder_1_0 extends _Private_IonTextWriterBuilder<_Private_IonTextWriterBuilder_1_0> { + + public static _Private_IonTextWriterBuilder_1_0 standard() + { + return new _Private_IonTextWriterBuilder_1_0.Mutable(); + } + + public static final _Private_IonTextWriterBuilder_1_0 STANDARD = standard().immutable(); + + private _Private_IonTextWriterBuilder_1_0() + { + super(); + } + + private _Private_IonTextWriterBuilder_1_0(_Private_IonTextWriterBuilder_1_0 that) + { + super(that); + } + + @Override + public final _Private_IonTextWriterBuilder_1_0 copy() + { + return new Mutable(this); + } + + + /** Assumes that {@link #fillDefaults()} has been called. */ + private IonWriter build(_Private_FastAppendable appender) + { + IonCatalog catalog = getCatalog(); + SymbolTable[] imports = getImports(); + + // TODO We shouldn't need a system here + IonSystem system = + IonSystemBuilder.standard().withCatalog(catalog).build(); + + SymbolTable defaultSystemSymtab = system.getSystemSymbolTable(); + + IonWriterSystemText systemWriter = + (getCallbackBuilder() == null + ? new IonWriterSystemText(defaultSystemSymtab, + this, + appender) + : new IonWriterSystemTextMarkup(defaultSystemSymtab, + this, + appender)); + + SymbolTable initialSymtab = + initialSymtab(((_Private_ValueFactory)system).getLstFactory(), defaultSystemSymtab, imports); + + return new IonWriterUser(catalog, system, systemWriter, initialSymtab, !_allow_invalid_sids); + } + + + @Override + public final IonWriter build(Appendable out) + { + _Private_IonTextWriterBuilder_1_0 b = fillDefaults(); + + _Private_FastAppendable fast = new AppendableFastAppendable(out); + + return b.build(fast); + } + + + @Override + public final IonWriter build(OutputStream out) + { + _Private_IonTextWriterBuilder_1_0 b = fillDefaults(); + + _Private_FastAppendable fast = new OutputStreamFastAppendable(out); + + return b.build(fast); + } + + //========================================================================= + + private static final class Mutable + extends _Private_IonTextWriterBuilder_1_0 + { + private Mutable() { } + + private Mutable(_Private_IonTextWriterBuilder_1_0 that) + { + super(that); + } + + @Override + public _Private_IonTextWriterBuilder_1_0 immutable() + { + return new _Private_IonTextWriterBuilder_1_0(this); + } + + @Override + public _Private_IonTextWriterBuilder_1_0 mutable() + { + return this; + } + + @Override + protected void mutationCheck() + { + } + } +} diff --git a/src/main/java/com/amazon/ion/impl/_Private_IonTextWriterBuilder_1_1.java b/src/main/java/com/amazon/ion/impl/_Private_IonTextWriterBuilder_1_1.java new file mode 100644 index 0000000000..f6166f9986 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/_Private_IonTextWriterBuilder_1_1.java @@ -0,0 +1,202 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl; + +import com.amazon.ion.IonCatalog; +import com.amazon.ion.IonWriter; +import com.amazon.ion.SymbolTable; +import com.amazon.ion.impl.bin.LengthPrefixStrategy; +import com.amazon.ion.impl.bin.IonManagedWriter_1_1; +import com.amazon.ion.impl.bin.ManagedWriterOptions_1_1; +import com.amazon.ion.impl.bin.SymbolInliningStrategy; +import com.amazon.ion.system.IonTextWriterBuilder_1_1; + +import java.io.OutputStream; +import java.nio.charset.Charset; +import java.util.Objects; + +/** + * Contains configuration for Ion 1.1 text writers. + * NOT FOR APPLICATION USE! + */ +public class _Private_IonTextWriterBuilder_1_1 + extends _Private_IonTextWriterBuilder<_Private_IonTextWriterBuilder_1_1> implements IonTextWriterBuilder_1_1 { + + public static _Private_IonTextWriterBuilder_1_1 standard() { + return new _Private_IonTextWriterBuilder_1_1.Mutable(); + } + + private SymbolInliningStrategy symbolInliningStrategy = SymbolInliningStrategy.ALWAYS_INLINE; + + private _Private_IonTextWriterBuilder_1_1() { + super(); + } + + private _Private_IonTextWriterBuilder_1_1(_Private_IonTextWriterBuilder_1_1 that) { + super(that); + symbolInliningStrategy = that.symbolInliningStrategy; + } + + @Override + public SymbolInliningStrategy getSymbolInliningStrategy() { + return symbolInliningStrategy; + } + + @Override + public void setSymbolInliningStrategy(SymbolInliningStrategy symbolInliningStrategy) { + mutationCheck(); + this.symbolInliningStrategy = Objects.requireNonNull(symbolInliningStrategy); + } + + @Override + public IonTextWriterBuilder_1_1 withSymbolInliningStrategy(SymbolInliningStrategy symbolInliningStrategy) { + _Private_IonTextWriterBuilder_1_1 b = mutable(); + b.setSymbolInliningStrategy(symbolInliningStrategy); + return b; + } + + // The following methods are overridden in order to resolve a clashing return type, as they are defined by + // multiple ancestors (version-agnostic abstract classes and the Ion 1.1 interface). + + @Override + public _Private_IonTextWriterBuilder_1_1 copy() + { + return new _Private_IonTextWriterBuilder_1_1.Mutable(this); + } + + @Override + public _Private_IonTextWriterBuilder_1_1 immutable() + { + return this; + } + + @Override + public _Private_IonTextWriterBuilder_1_1 mutable() + { + return copy(); + } + + @Override + public _Private_IonTextWriterBuilder_1_1 withCatalog(IonCatalog catalog) { + return (_Private_IonTextWriterBuilder_1_1) super.withCatalog(catalog); + } + + @Override + public _Private_IonTextWriterBuilder_1_1 withImports(SymbolTable[] imports) { + return (_Private_IonTextWriterBuilder_1_1) super.withImports(imports); + } + + @Override + public _Private_IonTextWriterBuilder_1_1 withPrettyPrinting() { + return (_Private_IonTextWriterBuilder_1_1) super.withPrettyPrinting(); + } + + @Override + public _Private_IonTextWriterBuilder_1_1 withJsonDowngrade() { + return (_Private_IonTextWriterBuilder_1_1) super.withJsonDowngrade(); + } + + @Override + public _Private_IonTextWriterBuilder_1_1 withCharset(Charset charset) { + return (_Private_IonTextWriterBuilder_1_1) super.withCharset(charset); + } + + @Override + public _Private_IonTextWriterBuilder_1_1 withCharsetAscii() { + return (_Private_IonTextWriterBuilder_1_1) super.withCharsetAscii(); + } + + @Override + public _Private_IonTextWriterBuilder_1_1 withMinimalSystemData() { + return (_Private_IonTextWriterBuilder_1_1) super.withMinimalSystemData(); + } + + @Override + public _Private_IonTextWriterBuilder_1_1 withLstMinimizing(LstMinimizing minimizing) { + return (_Private_IonTextWriterBuilder_1_1) super.withLstMinimizing(minimizing); + } + + @Override + public _Private_IonTextWriterBuilder_1_1 withLongStringThreshold(int threshold) { + return (_Private_IonTextWriterBuilder_1_1) super.withLongStringThreshold(threshold); + } + + @Override + public _Private_IonTextWriterBuilder_1_1 withNewLineType(NewLineType newLineType) { + return (_Private_IonTextWriterBuilder_1_1) super.withNewLineType(newLineType); + } + + @Override + public _Private_IonTextWriterBuilder_1_1 withWriteTopLevelValuesOnNewLines(boolean writeTopLevelValuesOnNewLines) { + return (_Private_IonTextWriterBuilder_1_1) super.withWriteTopLevelValuesOnNewLines(writeTopLevelValuesOnNewLines); + } + + @Override + public _Private_IonTextWriterBuilder_1_1 withMaximumTimestampPrecisionDigits(int maximumTimestampPrecisionDigits) { + return (_Private_IonTextWriterBuilder_1_1) super.withMaximumTimestampPrecisionDigits(maximumTimestampPrecisionDigits); + } + + @Override + public _Private_IonTextWriterBuilder_1_1 withIvmMinimizing(IvmMinimizing minimizing) { + return (_Private_IonTextWriterBuilder_1_1) super.withIvmMinimizing(minimizing); + } + + @Override + public IonWriter build(Appendable out) { + if (out == null) { + throw new NullPointerException("Cannot construct a writer with a null Appendable."); + } + _Private_IonTextWriterBuilder_1_1 b = fillDefaults(); + ManagedWriterOptions_1_1 options = new ManagedWriterOptions_1_1( + false, + true, + symbolInliningStrategy, + LengthPrefixStrategy.NEVER_PREFIXED, + // This could be made configurable. + ManagedWriterOptions_1_1.EExpressionIdentifierStrategy.BY_NAME + ); + return IonManagedWriter_1_1.textWriter(out, options, b); + } + + @Override + public IonWriter build(OutputStream out) { + if (out == null) { + throw new NullPointerException("Cannot construct a writer with a null OutputStream."); + } + + _Private_IonTextWriterBuilder_1_1 b = fillDefaults(); + ManagedWriterOptions_1_1 options = new ManagedWriterOptions_1_1( + false, + true, + symbolInliningStrategy, + LengthPrefixStrategy.NEVER_PREFIXED, + // This could be made configurable. + ManagedWriterOptions_1_1.EExpressionIdentifierStrategy.BY_NAME + ); + return IonManagedWriter_1_1.textWriter(out, options, b); + } + + //========================================================================= + + private static final class Mutable extends _Private_IonTextWriterBuilder_1_1 { + private Mutable() { } + + private Mutable(_Private_IonTextWriterBuilder_1_1 that) { + super(that); + } + + @Override + public _Private_IonTextWriterBuilder_1_1 immutable() { + return new _Private_IonTextWriterBuilder_1_1(this); + } + + @Override + public _Private_IonTextWriterBuilder_1_1 mutable() { + return this; + } + + @Override + protected void mutationCheck() { + } + } +} diff --git a/src/main/java/com/amazon/ion/impl/_Private_IonWriter.java b/src/main/java/com/amazon/ion/impl/_Private_IonWriter.java index 519a14f3d1..9b41a3cae6 100644 --- a/src/main/java/com/amazon/ion/impl/_Private_IonWriter.java +++ b/src/main/java/com/amazon/ion/impl/_Private_IonWriter.java @@ -1,22 +1,12 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; import com.amazon.ion.IonCatalog; +import com.amazon.ion.IonReader; import com.amazon.ion.IonWriter; +import com.amazon.ion.SystemSymbols; + import java.io.IOException; /** @@ -62,4 +52,50 @@ public interface _Private_IonWriter /** Indicates whether the writer is stream copy optimized through {@link #writeValue(com.amazon.ion.IonReader)}. */ public boolean isStreamCopyOptimized(); + + @FunctionalInterface + interface IntTransformer { + + /** + * Transforms an int to another int. + * @param original the int to transform. + * @return the transformed int. + */ + int transform(int original); + } + + /** + * Returns the provided int unchanged. + */ + IntTransformer IDENTITY_INT_TRANSFORMER = i -> i; + + /** + * Transforms Ion 1.0 local symbol IDs to the equivalent Ion 1.1 local symbol ID. Note: system symbols do not + * follow this path. + */ + IntTransformer ION_1_0_SID_TO_ION_1_1_SID = sid -> sid - SystemSymbols.ION_1_0_MAX_ID; + + /** + * Works the same as {@link IonWriter#writeValues(IonReader)}, but transforms all symbol IDs that would otherwise + * be written verbatim using the given transform function. This can be used to do a system-level transcode of + * Ion 1.0 data to Ion 1.1 while preserving symbol IDs that point to the same text. + * @param reader the reader from which to transcode. + * @param symbolIdTransformer the symbol ID transform function. + * @throws IOException if thrown during write. + */ + default void writeValues(IonReader reader, IntTransformer symbolIdTransformer) throws IOException { + throw new UnsupportedOperationException(); + } + + /** + * Works the same as {@link IonWriter#writeValue(IonReader)}, but transforms all symbol IDs that would otherwise + * be written verbatim using the given transform function. This can be used to do a system-level transcode of + * Ion 1.0 data to Ion 1.1 while preserving symbol IDs that point to the same text. + * @param reader the reader from which to transcode. + * @param symbolIdTransformer the symbol ID transform function. + * @throws IOException if thrown during write. + */ + default void writeValue(IonReader reader, IntTransformer symbolIdTransformer) throws IOException { + throw new UnsupportedOperationException(); + } } diff --git a/src/main/java/com/amazon/ion/impl/_Private_RecyclingQueue.java b/src/main/java/com/amazon/ion/impl/_Private_RecyclingQueue.java index a52ccd7a35..22afbafd07 100644 --- a/src/main/java/com/amazon/ion/impl/_Private_RecyclingQueue.java +++ b/src/main/java/com/amazon/ion/impl/_Private_RecyclingQueue.java @@ -1,3 +1,5 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; import java.util.ArrayList; @@ -52,7 +54,7 @@ public T next() { /** * @param initialCapacity the initial capacity of the underlying collection. - * @param elementFactory the factory used to create a new element on {@link #push()} when the queue has + * @param elementFactory the factory used to create a new element on {@link #push(Recycler)} when the queue has * not previously grown to the new depth. */ public _Private_RecyclingQueue(int initialCapacity, ElementFactory elementFactory) { @@ -73,7 +75,7 @@ public T get(int index) { /** * Pushes an element onto the top of the queue, instantiating a new element only if the queue has not * previously grown to the new depth. - * @return the element at the top of the queue after the push. This element must be initialized by the caller. + * @return the index of the element at the top of the queue after the push. This element must be initialized by the caller. */ public int push(Recycler recycler) { currentIndex++; @@ -87,6 +89,23 @@ public int push(Recycler recycler) { return currentIndex; } + /** + * Pushes an element onto the top of the queue, instantiating a new element only if the queue has not + * previously grown to the new depth. + * @return the element at the top of the queue after the push. + */ + public T pushAndGet(Recycler recycler) { + currentIndex++; + if (currentIndex >= elements.size()) { + top = elementFactory.newElement(); + elements.add(top); + } else { + top = elements.get(currentIndex); + } + recycler.recycle(top); + return top; + } + /** * Reclaim the current element. */ @@ -119,4 +138,4 @@ public void clear() { public int size() { return currentIndex + 1; } -} \ No newline at end of file +} diff --git a/src/main/java/com/amazon/ion/impl/_Private_Utils.java b/src/main/java/com/amazon/ion/impl/_Private_Utils.java index 751fb46489..9c388683c9 100644 --- a/src/main/java/com/amazon/ion/impl/_Private_Utils.java +++ b/src/main/java/com/amazon/ion/impl/_Private_Utils.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; import static com.amazon.ion.SymbolTable.UNKNOWN_SYMBOL_ID; @@ -193,6 +180,17 @@ public static SymbolTokenImpl newSymbolToken(String text, int sid) return new SymbolTokenImpl(text, sid); } + /** + * @return not null + */ + public static SymbolToken newSymbolToken(String text) + { + return new SymbolTokenImpl(text, UNKNOWN_SYMBOL_ID); + } + + /** Cached copy of $0 */ + public static final SymbolTokenImpl SYMBOL_0 = newSymbolToken((String) null, 0); + /** * @return not null */ diff --git a/src/main/java/com/amazon/ion/impl/bin/Block.java b/src/main/java/com/amazon/ion/impl/bin/Block.java index 9f03d2c252..206091ea76 100644 --- a/src/main/java/com/amazon/ion/impl/bin/Block.java +++ b/src/main/java/com/amazon/ion/impl/bin/Block.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl.bin; import java.io.Closeable; @@ -24,7 +11,7 @@ *

* This class and its implementations are not thread-safe. */ -/*package*/ abstract class Block implements Closeable +public abstract class Block implements Closeable { /** The data backing this block. */ public final byte[] data; diff --git a/src/main/java/com/amazon/ion/impl/bin/BlockAllocator.java b/src/main/java/com/amazon/ion/impl/bin/BlockAllocator.java index fe6cf1bb13..baf161c141 100644 --- a/src/main/java/com/amazon/ion/impl/bin/BlockAllocator.java +++ b/src/main/java/com/amazon/ion/impl/bin/BlockAllocator.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl.bin; import java.io.Closeable; @@ -25,7 +12,7 @@ *

* Implementations are not thread-safe. */ -/*package*/ abstract class BlockAllocator implements Closeable +public abstract class BlockAllocator implements Closeable { /*package*/ BlockAllocator() {} diff --git a/src/main/java/com/amazon/ion/impl/bin/BlockAllocatorProvider.java b/src/main/java/com/amazon/ion/impl/bin/BlockAllocatorProvider.java index 8e6ee59691..6cf1b191a7 100644 --- a/src/main/java/com/amazon/ion/impl/bin/BlockAllocatorProvider.java +++ b/src/main/java/com/amazon/ion/impl/bin/BlockAllocatorProvider.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl.bin; /** @@ -22,7 +9,7 @@ *

* Implementations must be thread-safe. */ -/*package*/ abstract class BlockAllocatorProvider +public abstract class BlockAllocatorProvider { /*package*/ BlockAllocatorProvider() {} diff --git a/src/main/java/com/amazon/ion/impl/bin/BlockAllocatorProviders.java b/src/main/java/com/amazon/ion/impl/bin/BlockAllocatorProviders.java index d9a17423e4..d2f9d57a09 100644 --- a/src/main/java/com/amazon/ion/impl/bin/BlockAllocatorProviders.java +++ b/src/main/java/com/amazon/ion/impl/bin/BlockAllocatorProviders.java @@ -1,24 +1,11 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl.bin; /** * Utility implementations of {@link BlockAllocatorProvider}. */ -/*package*/ final class BlockAllocatorProviders +public final class BlockAllocatorProviders { private BlockAllocatorProviders() {} diff --git a/src/main/java/com/amazon/ion/impl/bin/FixedInt.kt b/src/main/java/com/amazon/ion/impl/bin/FixedInt.kt new file mode 100644 index 0000000000..1052cd5d8f --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/bin/FixedInt.kt @@ -0,0 +1,68 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +import java.lang.Long.numberOfLeadingZeros + +/** + * Functions for encoding FixedInts and FixedUInts. + * + * Expected usage is calling one of the `___length` functions, and then using the result as the input for + * [writeFixedIntOrUIntInto]. The length and write functions are separate so that callers can make decisions or + * compute other values based on the encoded size of the value. + */ +object FixedInt { + + /** + * Writes a FixedInt or FixedUInt encoding of [value] into [data] starting at [offset]. + * Use [fixedIntLength] or [fixedUIntLength] to get the value for the [numBytes] parameter. + */ + @JvmStatic + inline fun writeFixedIntOrUIntInto(data: ByteArray, offset: Int, value: Long, numBytes: Int) { + when (numBytes) { + 1 -> data[offset] = value.toByte() + 2 -> { + data[offset] = value.toByte() + data[offset + 1] = (value shr 8).toByte() + } + 3 -> { + data[offset] = value.toByte() + data[offset + 1] = (value shr 8).toByte() + data[offset + 2] = (value shr 16).toByte() + } + 4 -> { + data[offset] = value.toByte() + data[offset + 1] = (value shr 8).toByte() + data[offset + 2] = (value shr 16).toByte() + data[offset + 3] = (value shr 24).toByte() + } + else -> { + for (i in 0 until numBytes) { + data[offset + i] = (value shr 8 * i).toByte() + } + } + } + } + + /** Determine the length of FixedUInt for the provided value. */ + @JvmStatic + fun fixedUIntLength(value: Long): Int { + val numLeadingZeros = numberOfLeadingZeros(value) + val numMagnitudeBitsRequired = 64 - numLeadingZeros + return (numMagnitudeBitsRequired - 1) / 8 + 1 + } + + /** Determine the length of FixedInt for the provided value. */ + @JvmStatic + fun fixedIntLength(value: Long): Int { + val numMagnitudeBitsRequired: Int + if (value < 0) { + val numLeadingOnes = numberOfLeadingZeros(value.inv()) + numMagnitudeBitsRequired = 64 - numLeadingOnes + } else { + val numLeadingZeros = numberOfLeadingZeros(value) + numMagnitudeBitsRequired = 64 - numLeadingZeros + } + return numMagnitudeBitsRequired / 8 + 1 + } +} diff --git a/src/main/java/com/amazon/ion/impl/bin/FlexInt.kt b/src/main/java/com/amazon/ion/impl/bin/FlexInt.kt new file mode 100644 index 0000000000..8324a3fe5f --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/bin/FlexInt.kt @@ -0,0 +1,214 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +import java.math.BigInteger + +/** + * Functions for encoding FlexInts and FlexUInts. + * + * Expected usage is calling one of the `___length` functions, and then using the result as the input for + * [writeFlexIntOrUIntInto]. The length and write functions are separate so that callers can make decisions or + * compute other values based on the encoded size of the value. + */ +object FlexInt { + + /** + * A byte representing zero, encoded as a FlexInt (or FlexUInt). + */ + const val ZERO: Byte = 0x01 + + /** Determine the length of FlexUInt for the provided value. */ + @JvmStatic + fun flexUIntLength(value: Long): Int { + val numLeadingZeros = java.lang.Long.numberOfLeadingZeros(value) + val numMagnitudeBitsRequired = 64 - numLeadingZeros + return (numMagnitudeBitsRequired - 1) / 7 + 1 + } + + /** Determine the length of FlexInt for the provided value. */ + @JvmStatic + fun flexIntLength(value: Long): Int { + val numMagnitudeBitsRequired: Int + numMagnitudeBitsRequired = if (value < 0) { + val numLeadingOnes = java.lang.Long.numberOfLeadingZeros(value.inv()) + 64 - numLeadingOnes + } else { + val numLeadingZeros = java.lang.Long.numberOfLeadingZeros(value) + 64 - numLeadingZeros + } + return numMagnitudeBitsRequired / 7 + 1 + } + + @JvmStatic + fun writeFlexIntOrUInt4Into(data: ByteArray, offset: Int, value: Long) { + data[offset] = (0x08L or (value shl 4)).toByte() + data[offset + 1] = (value shr 4).toByte() + data[offset + 2] = (value shr 12).toByte() + data[offset + 3] = (value shr 20).toByte() + } + + @JvmStatic + fun writeFlexIntOrUInt5Into(data: ByteArray, offset: Int, value: Long) { + data[offset] = (0x10L or (value shl 5)).toByte() + data[offset + 1] = (value shr 3).toByte() + data[offset + 2] = (value shr 11).toByte() + data[offset + 3] = (value shr 19).toByte() + data[offset + 4] = (value shr 27).toByte() + } + + @JvmStatic + fun writeFlexIntOrUInt6Into(data: ByteArray, offset: Int, value: Long) { + data[offset] = (0x20L or (value shl 6)).toByte() + data[offset + 1] = (value shr 2).toByte() + data[offset + 2] = (value shr 10).toByte() + data[offset + 3] = (value shr 18).toByte() + data[offset + 4] = (value shr 26).toByte() + data[offset + 5] = (value shr 34).toByte() + } + + @JvmStatic + fun writeFlexIntOrUInt7Into(data: ByteArray, offset: Int, value: Long) { + data[offset] = (0x40L or (value shl 7)).toByte() + data[offset + 1] = (value shr 1).toByte() + data[offset + 2] = (value shr 9).toByte() + data[offset + 3] = (value shr 17).toByte() + data[offset + 4] = (value shr 25).toByte() + data[offset + 5] = (value shr 33).toByte() + data[offset + 6] = (value shr 41).toByte() + } + + @JvmStatic + fun writeFlexIntOrUInt8Into(data: ByteArray, offset: Int, value: Long) { + data[offset] = 0x80.toByte() + data[offset + 1] = (value shr 0).toByte() + data[offset + 2] = (value shr 8).toByte() + data[offset + 3] = (value shr 16).toByte() + data[offset + 4] = (value shr 24).toByte() + data[offset + 5] = (value shr 32).toByte() + data[offset + 6] = (value shr 40).toByte() + data[offset + 7] = (value shr 48).toByte() + } + + @JvmStatic + fun writeFlexIntOrUInt9Into(data: ByteArray, offset: Int, value: Long) { + data[offset] = 0 + data[offset + 1] = (0x01L or (value shl 1)).toByte() + data[offset + 2] = (value shr 7).toByte() + data[offset + 3] = (value shr 15).toByte() + data[offset + 4] = (value shr 23).toByte() + data[offset + 5] = (value shr 31).toByte() + data[offset + 6] = (value shr 39).toByte() + data[offset + 7] = (value shr 47).toByte() + data[offset + 8] = (value shr 55).toByte() + } + + @JvmStatic + fun writeFlexIntOrUInt10Into(data: ByteArray, offset: Int, value: Long) { + data[offset] = 0 + data[offset + 1] = (0x02L or (value shl 2)).toByte() + data[offset + 2] = (value shr 6).toByte() + data[offset + 3] = (value shr 14).toByte() + data[offset + 4] = (value shr 22).toByte() + data[offset + 5] = (value shr 30).toByte() + data[offset + 6] = (value shr 38).toByte() + data[offset + 7] = (value shr 46).toByte() + data[offset + 8] = (value shr 54).toByte() + data[offset + 9] = (value shr 62).toByte() + } + + /** + * Writes a FlexInt or FlexUInt encoding of [value] into [data] starting at [offset]. + * Use [flexIntLength] or [flexUIntLength] to get the value for the [numBytes] parameter. + */ + @JvmStatic + inline fun writeFlexIntOrUIntInto(data: ByteArray, offset: Int, value: Long, numBytes: Int) { + + when (numBytes) { + 1 -> { + data[offset] = (0x01L or (value shl 1)).toByte() + } + 2 -> { + data[offset] = (0x02L or (value shl 2)).toByte() + data[offset + 1] = (value shr 6).toByte() + } + 3 -> { + data[offset] = (0x04L or (value shl 3)).toByte() + data[offset + 1] = (value shr 5).toByte() + data[offset + 2] = (value shr 13).toByte() + } + 4 -> { + writeFlexIntOrUInt4Into(data, offset, value) + } + 5 -> { + writeFlexIntOrUInt5Into(data, offset, value) + } + 6 -> { + writeFlexIntOrUInt6Into(data, offset, value) + } + 7 -> { + writeFlexIntOrUInt7Into(data, offset, value) + } + 8 -> { + writeFlexIntOrUInt8Into(data, offset, value) + } + 9 -> { + writeFlexIntOrUInt9Into(data, offset, value) + } + 10 -> { + writeFlexIntOrUInt10Into(data, offset, value) + } + } + } + + /** Determine the length of FlexUInt for the provided value. */ + @JvmStatic + fun flexUIntLength(value: BigInteger): Int { + return (value.bitLength() - 1) / 7 + 1 + } + + /** Determine the length of FlexInt for the provided value. */ + @JvmStatic + fun flexIntLength(value: BigInteger): Int { + return value.bitLength() / 7 + 1 + } + + /** + * Writes a FlexInt or FlexUInt encoding of [value] into [data] starting at [offset]. + * Use [flexIntLength] or [flexUIntLength] to get the value for the [numBytes] parameter. + */ + @JvmStatic + fun writeFlexIntOrUIntInto(data: ByteArray, offset: Int, value: BigInteger, numBytes: Int) { + // TODO: Should we branch to the implementation for long if the number is small enough? + // https://github.com/amazon-ion/ion-java/issues/614 + val valueBytes = value.toByteArray() + var i = 0 // `i` gets incremented for every byte written. + + // Start with leading zero bytes. + // If there's 1-8 total bytes, we need no leading zero-bytes. + // If there's 9-16 total bytes, we need one zero-byte + // If there's 17-24 total bytes, we need two zero-bytes, etc. + while (i < (numBytes - 1) / 8) { + data[offset + i] = 0 + i++ + } + + // Write the last length bits, possibly also containing some value bits. + val remainingLengthBits = (numBytes - 1) % 8 + val lengthPart = (0x01 shl remainingLengthBits).toByte() + val valueBitOffset = remainingLengthBits + 1 + val valuePart = (valueBytes[valueBytes.size - 1].toInt() shl valueBitOffset).toByte() + data[offset + i] = (valuePart.toInt() or lengthPart.toInt()).toByte() + i++ + for (valueByteOffset in valueBytes.size - 1 downTo 1) { + // Technically it's only a nibble if the bitOffset is 4, so we call it nibble-ish + val highNibbleIsh = (valueBytes[valueByteOffset - 1].toInt() shl valueBitOffset).toByte() + val lowNibbleIsh = (valueBytes[valueByteOffset].toInt() and 0xFF shr 8 - valueBitOffset).toByte() + data[offset + i] = (highNibbleIsh.toInt() or lowNibbleIsh.toInt()).toByte() + i++ + } + if (i < numBytes) { + data[offset + i] = (valueBytes[0].toInt() shr 8 - valueBitOffset).toByte() + } + } +} diff --git a/src/main/java/com/amazon/ion/impl/bin/IonEncoder_1_1.java b/src/main/java/com/amazon/ion/impl/bin/IonEncoder_1_1.java index a543a54b47..0276c8278e 100644 --- a/src/main/java/com/amazon/ion/impl/bin/IonEncoder_1_1.java +++ b/src/main/java/com/amazon/ion/impl/bin/IonEncoder_1_1.java @@ -6,7 +6,6 @@ import com.amazon.ion.IonType; import com.amazon.ion.Timestamp; import com.amazon.ion.impl.bin.utf8.Utf8StringEncoder; -import com.amazon.ion.impl.bin.utf8.Utf8StringEncoderPool; import java.math.BigDecimal; import java.math.BigInteger; @@ -98,7 +97,7 @@ public static int writeIntValue(WriteBuffer buffer, final long value) { buffer.writeByte(OpCodes.INTEGER_ZERO_LENGTH); return 1; } - int length = WriteBuffer.fixedIntLength(value); + int length = FixedInt.fixedIntLength(value); buffer.writeByte((byte) (OpCodes.INTEGER_ZERO_LENGTH + length)); buffer.writeFixedInt(value); return 1 + length; @@ -131,14 +130,14 @@ public static int writeIntValue(WriteBuffer buffer, final BigInteger value) { * Writes a float to the given WriteBuffer using the Ion 1.1 encoding for Ion Floats. * @return the number of bytes written */ - public static int writeFloat(WriteBuffer buffer, final float value) { + public static int writeFloatValue(WriteBuffer buffer, final float value) { // TODO: Optimization to write a 16 bit float for non-finite and possibly other values if (value == 0.0) { buffer.writeByte(OpCodes.FLOAT_ZERO_LENGTH); return 1; } else { buffer.writeByte(OpCodes.FLOAT_32); - buffer.writeUInt32(floatToIntBits(value)); + buffer.writeUInt32(Integer.reverseBytes(floatToIntBits(value))); return 5; } } @@ -147,18 +146,30 @@ public static int writeFloat(WriteBuffer buffer, final float value) { * Writes a double to the given WriteBuffer using the Ion 1.1 encoding for Ion Floats. * @return the number of bytes written */ - public static int writeFloat(WriteBuffer buffer, final double value) { + public static int writeFloatValue(WriteBuffer buffer, final double value) { // TODO: Optimization to write a 16 bit float for non-finite and possibly other values - if (value == 0.0) { - buffer.writeByte(OpCodes.FLOAT_ZERO_LENGTH); - return 1; - } else if (!Double.isFinite(value) || value == (float) value) { - buffer.writeByte(OpCodes.FLOAT_32); - buffer.writeUInt32(floatToIntBits((float) value)); - return 5; + // We could check the number of significand bits and the value of the exponent + // to determine if it can be represented in a smaller format without having a + // complete representation of half-precision floating point numbers. + + if (!Double.isFinite(value) || value == (float) value) { + int floatBits = Float.floatToIntBits((float) value); + switch (floatBits) { + case 0: // Positive zero + buffer.writeByte(OpCodes.FLOAT_ZERO_LENGTH); + return 1; + case FLOAT_32_NEGATIVE_ZERO_BITS: + buffer.writeByte(OpCodes.FLOAT_16); + buffer.writeFixedIntOrUInt(FLOAT_16_NEGATIVE_ZERO_BITS, 2); + return 3; + default: + buffer.writeByte(OpCodes.FLOAT_32); + buffer.writeUInt32(Integer.reverseBytes(floatToIntBits((float) value))); + return 5; + } } else { buffer.writeByte(OpCodes.FLOAT_64); - buffer.writeUInt64(doubleToRawLongBits(value)); + buffer.writeUInt64(Long.reverseBytes(doubleToRawLongBits(value))); return 9; } } @@ -169,39 +180,41 @@ public static int writeDecimalValue(WriteBuffer buffer, final BigDecimal value) } int exponent = -value.scale(); - - if (BigDecimal.ZERO.compareTo(value) == 0 && !Decimal.isNegativeZero(value)) { - if (exponent == 0) { + int numExponentBytes = FlexInt.flexIntLength(exponent); + + byte[] coefficientBytes = null; + int numCoefficientBytes; + if (BigDecimal.ZERO.compareTo(value) == 0) { + if (Decimal.isNegativeZero(value)) { + numCoefficientBytes = 1; + } else if (exponent == 0) { buffer.writeByte(OpCodes.DECIMAL_ZERO_LENGTH); return 1; } else { - // A decimal with a coefficient of +0 is encoded using opcode 6F. - // The opcode is followed by a FlexInt representing the exponent. - buffer.writeByte(OpCodes.POSITIVE_ZERO_DECIMAL); - return 1 + buffer.writeFlexInt(exponent); + numCoefficientBytes = 0; } - } - - BigInteger coefficient = value.unscaledValue(); - int numCoefficientBytes = WriteBuffer.flexIntLength(coefficient); - - int numExponentBytes = 0; - if (exponent != 0) { - numExponentBytes = WriteBuffer.fixedIntLength(exponent); + } else { + coefficientBytes = value.unscaledValue().toByteArray(); + numCoefficientBytes = coefficientBytes.length; } int opCodeAndLengthBytes = 1; - if (numExponentBytes + numCoefficientBytes < 15) { + if (numExponentBytes + numCoefficientBytes < 16) { int opCode = OpCodes.DECIMAL_ZERO_LENGTH + numExponentBytes + numCoefficientBytes; buffer.writeByte((byte) opCode); } else { - // Decimal values that require more than 14 bytes can be encoded using the variable-length decimal opcode: 0xF6. + // Decimal values that require more than 15 bytes can be encoded using the variable-length decimal opcode: 0xF6. buffer.writeByte(OpCodes.VARIABLE_LENGTH_DECIMAL); opCodeAndLengthBytes += buffer.writeFlexUInt(numExponentBytes + numCoefficientBytes); } - buffer.writeFlexInt(coefficient); - if (exponent != 0) { - buffer.writeFixedInt(exponent); + + buffer.writeFlexInt(exponent); + if (numCoefficientBytes > 0) { + if (coefficientBytes != null) { + buffer.writeFixedIntOrUInt(coefficientBytes); + } else { + buffer.writeByte((byte) 0); + } } return opCodeAndLengthBytes + numCoefficientBytes + numExponentBytes; @@ -313,7 +326,7 @@ private static int writeShortFormTimestampValue(WriteBuffer buffer, Timestamp va } } else { long localOffset = (value.getLocalOffset().longValue() / 15) + (14 * 4); - bits |= (localOffset & LEAST_SIGNIFICANT_7_BITS) << S_O_TIMESTAMP_OFFSET_BIT_OFFSET; + bits |= (localOffset & SEVEN_BIT_MASK) << S_O_TIMESTAMP_OFFSET_BIT_OFFSET; if (value.getPrecision() == Timestamp.Precision.MINUTE) { buffer.writeByte(OpCodes.TIMESTAMP_MINUTE_PRECISION_WITH_OFFSET); @@ -403,7 +416,6 @@ static int writeLongFormTimestampValue(WriteBuffer buffer, Timestamp value) { return 8; // OpCode + FlexUInt + 6 bytes data } - bits |= ((long) value.getSecond()) << L_TIMESTAMP_SECOND_BIT_OFFSET; int secondsScale = 0; if (value.getZFractionalSecond() != null) { @@ -416,27 +428,36 @@ static int writeLongFormTimestampValue(WriteBuffer buffer, Timestamp value) { } BigDecimal fractionalSeconds = value.getZFractionalSecond(); - BigInteger coefficient = fractionalSeconds.unscaledValue(); + long exponent = fractionalSeconds.scale(); - int numCoefficientBytes = WriteBuffer.flexUIntLength(coefficient); - int numExponentBytes = WriteBuffer.fixedUIntLength(exponent); + int numExponentBytes = FlexInt.flexUIntLength(exponent); + + BigInteger coefficient = fractionalSeconds.unscaledValue(); + byte[] coefficientBytes = null; + int numCoefficientBytes = 0; + if (!coefficient.equals(BigInteger.ZERO)) { + coefficientBytes = coefficient.toByteArray(); + numCoefficientBytes = coefficientBytes.length; + } + // Years-seconds data (7 bytes) + fraction coefficient + fraction exponent int dataLength = 7 + numCoefficientBytes + numExponentBytes; - buffer.writeFlexUInt(dataLength); + int numLengthBytes = buffer.writeFlexUInt(dataLength); buffer.writeFixedIntOrUInt(bits, 7); - buffer.writeFlexUInt(coefficient); - buffer.writeFixedUInt(exponent); - + buffer.writeFlexUInt(exponent); + if (coefficientBytes != null) { + buffer.writeFixedIntOrUInt(coefficientBytes); + } // OpCode + FlexUInt length + dataLength - return 1 + WriteBuffer.flexUIntLength(dataLength) + dataLength; + return 1 + numLengthBytes + dataLength; } /** * Writes a String to the given WriteBuffer using the Ion 1.1 encoding for Ion Strings. * @return the number of bytes written */ - public static int writeStringValue(WriteBuffer buffer, String value) { + public static int writeStringValue(WriteBuffer buffer, Utf8StringEncoder.Result value) { return writeInlineText(buffer, value, IonType.STRING, OpCodes.STRING_ZERO_LENGTH, OpCodes.VARIABLE_LENGTH_STRING); } @@ -444,21 +465,17 @@ public static int writeStringValue(WriteBuffer buffer, String value) { * Writes an inline Symbol to the given WriteBuffer using the Ion 1.1 encoding for Ion Symbols. * @return the number of bytes written */ - public static int writeSymbolValue(WriteBuffer buffer, String value) { + public static int writeSymbolValue(WriteBuffer buffer, Utf8StringEncoder.Result value) { return writeInlineText(buffer, value, IonType.SYMBOL, OpCodes.INLINE_SYMBOL_ZERO_LENGTH, OpCodes.VARIABLE_LENGTH_INLINE_SYMBOL); } - private static int writeInlineText(WriteBuffer buffer, String value, IonType type, byte zeroLengthOpCode, byte variableLengthOpCode) { + private static int writeInlineText(WriteBuffer buffer, Utf8StringEncoder.Result value, IonType type, byte zeroLengthOpCode, byte variableLengthOpCode) { if (value == null) { return writeNullValue(buffer, type); } - // TODO: When merging into the Ion 1.1 raw writer, keep a single instance of the Utf8StringEncoder - // instead of fetching one on every call. - Utf8StringEncoder.Result encoderResult = Utf8StringEncoderPool.getInstance().getOrCreate().encode(value); - - byte[] utf8Buffer = encoderResult.getBuffer(); - int numValueBytes = encoderResult.getEncodedLength(); + byte[] utf8Buffer = value.getBuffer(); + int numValueBytes = value.getEncodedLength(); int numLengthBytes = 0; if (numValueBytes <= 0xF) { @@ -474,10 +491,8 @@ private static int writeInlineText(WriteBuffer buffer, String value, IonType typ /** * Writes an interned Symbol's address to the given WriteBuffer using the Ion 1.1 encoding for Ion Symbols. * @return the number of bytes written - * - * TODO: Do we need to support Symbol Addresses greater than Long.MAX_VALUE? */ - public static int writeSymbolValue(WriteBuffer buffer, long value) { + public static int writeSymbolValue(WriteBuffer buffer, int value) { if (value < 0) { throw new IllegalArgumentException("Symbol Address cannot be negative; was: " + value); } else if (value < FIRST_2_BYTE_SYMBOL_ADDRESS) { @@ -499,34 +514,32 @@ public static int writeSymbolValue(WriteBuffer buffer, long value) { * Writes a Blob to the given WriteBuffer using the Ion 1.1 encoding for Ion Blobs. * @return the number of bytes written */ - public static int writeBlobValue(WriteBuffer buffer, byte[] value) { + public static int writeBlobValue(WriteBuffer buffer, byte[] value, int start, int length) { if (value == null) { return writeNullValue(buffer, IonType.BLOB); } buffer.writeByte(OpCodes.VARIABLE_LENGTH_BLOB); - int numLengthBytes = buffer.writeFlexUInt(value.length); - buffer.writeBytes(value); - return 1 + numLengthBytes + value.length; + int numLengthBytes = buffer.writeFlexUInt(length); + buffer.writeBytes(value, start, length); + return 1 + numLengthBytes + length; } /** * Writes a Clob to the given WriteBuffer using the Ion 1.1 encoding for Ion Clobs. * @return the number of bytes written */ - public static int writeClobValue(WriteBuffer buffer, byte[] value) { + public static int writeClobValue(WriteBuffer buffer, byte[] value, int start, int length) { if (value == null) { return writeNullValue(buffer, IonType.CLOB); } buffer.writeByte(OpCodes.VARIABLE_LENGTH_CLOB); - int numLengthBytes = buffer.writeFlexUInt(value.length); - buffer.writeBytes(value); - return 1 + numLengthBytes + value.length; + int numLengthBytes = buffer.writeFlexUInt(length); + buffer.writeBytes(value, start, length); + return 1 + numLengthBytes + length; } - // TODO: Implement FlexSym Annotations - /** * Writes annotations using the given symbol addresses. */ @@ -546,7 +559,7 @@ public static int writeAnnotations(WriteBuffer buffer, long[] annotations) { } else { int numAddressBytes = 0; for (long ann : annotations) { - numAddressBytes += WriteBuffer.flexUIntLength(ann); + numAddressBytes += FlexInt.flexUIntLength(ann); } buffer.writeByte(OpCodes.ANNOTATIONS_MANY_SYMBOL_ADDRESS); int numLengthBytes = buffer.writeFlexUInt(numAddressBytes); diff --git a/src/main/java/com/amazon/ion/impl/bin/IonManagedBinaryWriter.java b/src/main/java/com/amazon/ion/impl/bin/IonManagedBinaryWriter.java index 61f5cbc170..dccbd3bebb 100644 --- a/src/main/java/com/amazon/ion/impl/bin/IonManagedBinaryWriter.java +++ b/src/main/java/com/amazon/ion/impl/bin/IonManagedBinaryWriter.java @@ -1,23 +1,11 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl.bin; import static com.amazon.ion.IonType.LIST; import static com.amazon.ion.IonType.STRUCT; import static com.amazon.ion.SystemSymbols.IMPORTS_SID; +import static com.amazon.ion.SystemSymbols.ION_1_0; import static com.amazon.ion.SystemSymbols.ION_1_0_MAX_ID; import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE_SID; import static com.amazon.ion.SystemSymbols.MAX_ID_SID; @@ -1033,8 +1021,14 @@ public void writeSymbol(String content) throws IOException writeSymbolToken(intern(content)); } - private boolean handleIVM(int sid) throws IOException { - if (user.isIVM(sid)) + private boolean handleIVM(SymbolToken symbol) throws IOException { + if (getDepth() != 0 || user.hasAnnotations()) { + return false; + } + // A symbol's text always takes precedence over its symbol ID. Only symbols with unknown text are compared + // against SID 2. + String text = symbol.getText(); + if (ION_1_0.equals(text) || (text == null && user.isIVM(symbol.getSid()))) { if (user.hasWrittenValuesSinceFinished()) { @@ -1054,7 +1048,7 @@ private boolean handleIVM(int sid) throws IOException { public void writeSymbolToken(SymbolToken token) throws IOException { - if (token != null && handleIVM(token.getSid())) + if (token != null && handleIVM(token)) { return; } diff --git a/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt new file mode 100644 index 0000000000..7848944fc2 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt @@ -0,0 +1,1031 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +import com.amazon.ion.* +import com.amazon.ion.SymbolTable.* +import com.amazon.ion.impl.* +import com.amazon.ion.impl._Private_IonWriter.* +import com.amazon.ion.impl.bin.LengthPrefixStrategy.* +import com.amazon.ion.impl.bin.SymbolInliningStrategy.* +import com.amazon.ion.impl.macro.* +import com.amazon.ion.system.* +import java.io.OutputStream +import java.math.BigDecimal +import java.math.BigInteger +import java.util.* + +/** + * A managed writer for Ion 1.1 that is generic over whether the raw encoding is text or binary. + * + * TODO: + * - Handling of shared symbol tables + * - Proper handling of user-supplied symbol tables + * - Auto-flush (for binary and text) + * + * TODO: What package does this really belong in? + * + * See also [ManagedWriterOptions_1_1], [SymbolInliningStrategy], and [LengthPrefixStrategy]. + */ +internal class IonManagedWriter_1_1( + private val userData: IonRawWriter_1_1, + private val systemData: PrivateIonRawWriter_1_1, + private val options: ManagedWriterOptions_1_1, + private val onClose: () -> Unit, +) : _Private_IonWriter, MacroAwareIonWriter { + + internal fun getRawUserWriter(): IonRawWriter_1_1 = userData + + companion object { + private val ION_VERSION_MARKER_REGEX = Regex("^\\\$ion_\\d+_\\d+$") + + // These are chosen subjectively to be neither too big nor too small. + private const val MAX_PARAMETERS_IN_ONE_LINE_SIGNATURE = 4 + private const val MAX_SYMBOLS_IN_SINGLE_LINE_SYMBOL_TABLE = 10 + private const val MAX_EXPRESSIONS_IN_SINGLE_LINE_MACRO_BODY = 8 + + @JvmStatic + fun textWriter(output: OutputStream, managedWriterOptions: ManagedWriterOptions_1_1, textOptions: _Private_IonTextWriterBuilder_1_1): IonManagedWriter_1_1 { + // TODO support all options configurable via IonTextWriterBuilder_1_1 + val appender = { + val bufferedOutput = BufferedOutputStreamFastAppendable(output, BlockAllocatorProviders.basicProvider().vendAllocator(4096)) + _Private_IonTextAppender.forFastAppendable(bufferedOutput, Charsets.UTF_8) + } + + return IonManagedWriter_1_1( + userData = IonRawTextWriter_1_1( + options = textOptions, + output = appender(), + ), + systemData = IonRawTextWriter_1_1( + options = textOptions, + output = appender(), + ), + options = managedWriterOptions.copy(internEncodingDirectiveSymbols = false), + onClose = output::close, + ) + } + + @JvmStatic + fun textWriter(output: Appendable, managedWriterOptions: ManagedWriterOptions_1_1, textOptions: _Private_IonTextWriterBuilder_1_1): IonManagedWriter_1_1 { + val appender = { + val bufferedOutput = BufferedAppendableFastAppendable(output) + _Private_IonTextAppender.forFastAppendable(bufferedOutput, Charsets.UTF_8) + } + + return IonManagedWriter_1_1( + userData = IonRawTextWriter_1_1( + options = textOptions, + output = appender(), + ), + systemData = IonRawTextWriter_1_1( + options = textOptions, + output = appender(), + ), + options = managedWriterOptions.copy(internEncodingDirectiveSymbols = false), + onClose = {}, + ) + } + + @JvmStatic + fun binaryWriter(output: OutputStream, managedWriterOptions: ManagedWriterOptions_1_1, binaryOptions: _Private_IonBinaryWriterBuilder_1_1): IonManagedWriter_1_1 { + // TODO: Add autoflush + return IonManagedWriter_1_1( + userData = IonRawBinaryWriter_1_1( + out = output, + buffer = WriteBuffer(BlockAllocatorProviders.basicProvider().vendAllocator(binaryOptions.blockSize)) {}, + lengthPrefixPreallocation = 1 + ), + systemData = IonRawBinaryWriter_1_1( + out = output, + buffer = WriteBuffer(BlockAllocatorProviders.basicProvider().vendAllocator(binaryOptions.blockSize)) {}, + lengthPrefixPreallocation = 1 + ), + options = managedWriterOptions.copy(internEncodingDirectiveSymbols = true), + onClose = output::close, + ) + } + } + + // Since this is Ion 1.1, we must always start with the IVM. + private var needsIVM: Boolean = true + + // We take a slightly different approach here by handling the encoding context as a prior encoding context + // plus a list of symbols added by the current encoding context. + /** The symbol table for the prior encoding context */ + private var symbolTable: HashMap = HashMap() + + /** Symbols to be interned since the prior encoding context. */ + private var newSymbols: HashMap = LinkedHashMap() // Preserves insertion order. + + /** The macro table of the prior encoding context. Map value is the user-space address. */ + private var macroTable: HashMap = LinkedHashMap() + /** Macros to be added since the last encoding directive was flushed. Map value is the user-space address. */ + private var newMacros: HashMap = LinkedHashMap() + /** Macro names by user-space address, including new macros. */ + private var macroNames = ArrayList() + /** Macro definitions by user-space address, including new macros. */ + private var macrosById = ArrayList() + /** The first symbol ID in the current encoding context. */ + private var firstLocalSid: Int = 0 + /** True if the current encoding context contains the system symbols. */ + private var areSystemSymbolsInScope = true + + /** + * Transformer for symbol IDs encountered during writeValues. Can be used to upgrade Ion 1.0 symbol IDs to the + * Ion 1.1 equivalents. + */ + private var sidTransformer: IntTransformer? = null + + /** + * Adds a new symbol to the table for this writer, or finds an existing definition of it. This writer does not + * implement [IonWriter.getSymbolTable], so this method supplies some of that functionality. + * + * @return an SID for the given symbol text + * @see SymbolTable.intern + */ + fun intern(text: String): Int { + // Check the current symbol table + var sid = symbolTable[text] + if (sid != null) return sid + // Check the to-be-appended symbols + sid = newSymbols[text] + if (sid != null) return sid + // Add to the to-be-appended symbols + sid = firstLocalSid + symbolTable.size + newSymbols.size + 1 + newSymbols[text] = sid + return sid + } + + /** + * Checks for macro invocations in the body of a TemplateMacro and ensure that those macros are added to the + * macro table. + * + * This is essentially a recursive, memoized, topological sort. Given a dependency graph, it runs in O(V + E) time. + * Memoization is done using the macro table, so the O(V + E) cost is only paid the first time a macro is added to + * the macro table. + */ + private fun addMacroDependencies(macro: Macro) { + macro.dependencies.forEach { + if (it !is SystemMacro && it !in macroTable && it !in newMacros) { + addMacroDependencies(it) + assignMacroAddress(it) + } + } + } + + /** + * Adds a named macro to the macro table + * + * Steps: + * - If the name is not already in use... + * - And the macro is already in `newMacros`... + * 1. Get the address of the macro in `newMacros` + * 2. Add the name to `macroNames` for the that address + * 3. return the address + * - Else... + * 1. Add a new entry for the macro to `newMacros` and get a new address + * 2. Add the name to `macroNames` for the new address + * 3. Return the new address + * - If the name is already in use... + * - And it is associated with the same macro... + * 1. Return the address associated with the name + * - And it is associated with a different macro... + * - This is where the managed writer take an opinion. (Or be configurable.) + * - It could mangle the name + * - It could remove the name from a macro in macroTable, but then it would have to immediately flush to + * make sure that any prior e-expressions are still valid. In addition, we would need to re-export all + * the other macros from `_` (the default module). + * - For now, we're just throwing an Exception. + * + * Visible for testing. + */ + internal fun getOrAssignMacroAddressAndName(name: String, macro: Macro): Int { + // TODO: This is O(n), but could be O(1). + var existingAddress = macroNames.indexOf(name) + if (existingAddress < 0) { + // Name is not already in use + existingAddress = newMacros.getOrDefault(macro, -1) + + val address = if (existingAddress < 0) { + // Macro is not in newMacros + + // If it's in macroTable, we can skip adding dependencies + if (macro !in macroTable) addMacroDependencies(macro) + // Add to newMacros and get a macro address + assignMacroAddress(macro) + } else { + // Macro already exists in newMacros, but doesn't have a name + existingAddress + } + // Set the name of the macro + macroNames[address] = name + return address + } else if (macrosById[existingAddress] == macro) { + // Macro already in table, and already using the same name + return existingAddress + } else { + // Name is already in use for a different macro. + // This macro may or may not be in the table under a different name, but that's + // not particularly relevant unless we want to try to fall back to a different name. + TODO("Name shadowing is not supported yet. Call finish() before attempting to shadow an existing macro.") + } + } + + /** + * Steps for adding an anonymous macro to the macro table + * 1. Check macroTable, if found, return that address + * 2. Check newMacros, if found, return that address + * 3. Add to newMacros, return new address + * + * Visible for testing + */ + internal fun getOrAssignMacroAddress(macro: Macro): Int { + var address = macroTable.getOrDefault(macro, -1) + if (address >= 0) return address + address = newMacros.getOrDefault(macro, -1) + if (address >= 0) return address + + addMacroDependencies(macro) + return assignMacroAddress(macro) + } + + override fun startEncodingSegmentWithIonVersionMarker() { + if (!newSymbols.isEmpty() || !newMacros.isEmpty()) { + throw IonException("Cannot start a new encoding segment while the previous segment is active.") + } + needsIVM = false + flush() + systemData.writeIVM() + resetEncodingContext() + } + + override fun startEncodingSegmentWithEncodingDirective( + macros: Map, + isMacroTableAppend: Boolean, + symbols: List, + isSymbolTableAppend: Boolean, + encodingDirectiveAlreadyWritten: Boolean + ) { + // It is assumed that the IVM is written manually when using endEncodingSegment. + needsIVM = false + // First, flush the previous segment. This method begins a new segment. + flush() + firstLocalSid = if (isSymbolTableAppend) { + if (areSystemSymbolsInScope) SystemSymbols_1_1.size() else 0 + } else { + symbolTable.clear() + areSystemSymbolsInScope = false + 0 + } + for (symbol in symbols) { + intern(symbol) + } + if (!isMacroTableAppend) { + macroNames.clear() + macrosById.clear() + macroTable.clear() + newMacros.clear() + } + for (entry in macros.entries) { + when (entry.key) { + is MacroRef.ByName -> getOrAssignMacroAddressAndName((entry.key as MacroRef.ByName).name, entry.value) + is MacroRef.ById -> getOrAssignMacroAddress(entry.value) + } + } + if (encodingDirectiveAlreadyWritten) { + // This prevents another encoding directive from being written for this context. + symbolTable.putAll(newSymbols) + newSymbols.clear() + macroTable.putAll(newMacros) + newMacros.clear() + } else { + writeVerboseEncodingDirective() + } + } + + /** Unconditionally adds a macro to the macro table data structures and returns the new address. */ + private fun assignMacroAddress(macro: Macro): Int { + val address = macrosById.size + macrosById.add(macro) + macroNames.add(null) + newMacros[macro] = address + return address + } + + // Only called by `finish()` + private fun resetEncodingContext() { + if (depth != 0) throw IllegalStateException("Cannot reset the encoding context while stepped in any value.") + symbolTable.clear() + macroNames.clear() + macrosById.clear() + macroTable.clear() + newMacros.clear() + + needsIVM = true + firstLocalSid = 0 + areSystemSymbolsInScope = true + } + + /** Helper function for writing encoding directives */ + private inline fun writeSystemSexp(content: PrivateIonRawWriter_1_1.() -> Unit) { + systemData.stepInSExp(usingLengthPrefix = false) + systemData.content() + systemData.stepOut() + } + + /** Helper function for writing encoding directives */ + private inline fun writeSystemMacro(macro: SystemMacro, content: PrivateIonRawWriter_1_1.() -> Unit) { + systemData.stepInEExp(macro) + systemData.content() + systemData.stepOut() + } + + /** + * Writes an encoding directive for the current encoding context, and updates internal state accordingly. + * This always appends to the current encoding context. If there is nothing to append, calling this function + * is a no-op. + */ + private fun writeEncodingDirective() { + if (newSymbols.isEmpty() && newMacros.isEmpty()) return + + writeSymbolTableClause() + symbolTable.putAll(newSymbols) + newSymbols.clear() + // NOTE: Once we have emitted the symbol table update with set/add_symbols those symbols become available + // for use in set/add_macros (if relevant) + + writeMacroTableClause() + macroTable.putAll(newMacros) + newMacros.clear() + } + + /** + * Writes an encoding directive for the current encoding context using the verbose `$ion::(module _ ...)` syntax, + * and updates internal state accordingly. This always appends to the current encoding context. If there is nothing + * to append, calling this function is a no-op. + */ + private fun writeVerboseEncodingDirective() { + if (newSymbols.isEmpty() && newMacros.isEmpty()) return + + systemData.writeAnnotations(SystemSymbols_1_1.ION) + writeSystemSexp { + writeSymbol(SystemSymbols_1_1.MODULE) + writeSymbol(SystemSymbols.DEFAULT_MODULE) + writeVerboseSymbolTableClause() + writeVerboseMacroTableClause() + } + symbolTable.putAll(newSymbols) + newSymbols.clear() + macroTable.putAll(newMacros) + newMacros.clear() + } + + /** + * Writes the `(symbol_table ...)` clause into the encoding expression by invoking + * the `add_symbols` or `set_symbols` system macro. + * If the symbol table would be empty, writes nothing, which is equivalent + * to an empty symbol table. + */ + private fun writeSymbolTableClause() { + val hasSymbolsToAdd = newSymbols.isNotEmpty() + val hasSymbolsToRetain = symbolTable.isNotEmpty() + if (!hasSymbolsToAdd) return + + val macro = if (!hasSymbolsToRetain) SystemMacro.SetSymbols else SystemMacro.AddSymbols + + // Add new symbols + writeSystemMacro(macro) { + stepInExpressionGroup(usingLengthPrefix = false) + if (newSymbols.size <= MAX_SYMBOLS_IN_SINGLE_LINE_SYMBOL_TABLE) forceNoNewlines(true) + newSymbols.forEach { (text, _) -> writeString(text) } + stepOut() + } + systemData.forceNoNewlines(false) + } + + /** + * Writes the `(symbol_table ...)` clause into the encoding expression using the + * verbose s-expression syntax. + * If the symbol table would be empty, writes nothing, which is equivalent + * to an empty symbol table. + */ + private fun writeVerboseSymbolTableClause() { + val hasSymbolsToAdd = newSymbols.isNotEmpty() + val hasSymbolsToRetain = symbolTable.isNotEmpty() + if (!hasSymbolsToAdd && !hasSymbolsToRetain) return + + writeSystemSexp { + forceNoNewlines(true) + systemData.writeSymbol(SystemSymbols_1_1.SYMBOL_TABLE) + + // Add previous symbol table + if (hasSymbolsToRetain) { + if (newSymbols.size > 0) forceNoNewlines(false) + writeSymbol(SystemSymbols.DEFAULT_MODULE) + } + + // Add new symbols + if (hasSymbolsToAdd) { + stepInList(usingLengthPrefix = false) + if (newSymbols.size <= MAX_SYMBOLS_IN_SINGLE_LINE_SYMBOL_TABLE) forceNoNewlines(true) + newSymbols.forEach { (text, _) -> writeString(text) } + stepOut() + } + forceNoNewlines(true) + } + systemData.forceNoNewlines(false) + } + + /** + * Writes the `(macro_table ...)` clause into the encoding expression by invoking + * the `add_macros` or `set_macros` system macro. + * If the macro table would be empty, writes nothing, which is equivalent + * to an empty macro table. + */ + private fun writeMacroTableClause() { + val hasMacrosToAdd = newMacros.isNotEmpty() + val hasMacrosToRetain = macroTable.isNotEmpty() + if (!hasMacrosToAdd) return + + val macro = if (!hasMacrosToRetain) SystemMacro.SetMacros else SystemMacro.AddMacros + + writeSystemMacro(macro) { + forceNoNewlines(false) + stepInExpressionGroup(usingLengthPrefix = false) + newMacros.forEach { (macro, address) -> + val name = macroNames[address] + when (macro) { + is TemplateMacro -> writeMacroDefinition(name, macro) + is SystemMacro -> { + if (name != macro.macroName) { + exportSystemMacro(macro, name) + } + // Else, no need to export the macro since it's already known by the desired name + } + } + } + stepOut() + } + systemData.forceNoNewlines(false) + } + + /** + * Writes the `(macro_table ...)` clause into the encoding expression using the + * verbose s-expression syntax. + * If the macro table would be empty, writes nothing, which is equivalent + * to an empty macro table. + */ + private fun writeVerboseMacroTableClause() { + val hasMacrosToAdd = newMacros.isNotEmpty() + val hasMacrosToRetain = macroTable.isNotEmpty() + if (!hasMacrosToAdd && !hasMacrosToRetain) return + + writeSystemSexp { + forceNoNewlines(true) + writeSymbol(SystemSymbols_1_1.MACRO_TABLE) + if (newMacros.size > 0) forceNoNewlines(false) + if (hasMacrosToRetain) { + writeSymbol(SystemSymbols.DEFAULT_MODULE) + } + forceNoNewlines(false) + newMacros.forEach { (macro, address) -> + val name = macroNames[address] + when (macro) { + is TemplateMacro -> writeMacroDefinition(name, macro) + is SystemMacro -> { + if (name != macro.macroName) { + exportSystemMacro(macro, name) + } + // Else, no need to export the macro since it's already known by the desired name + } + } + } + forceNoNewlines(true) + } + systemData.forceNoNewlines(false) + } + + private fun exportSystemMacro(macro: SystemMacro, alias: String?) { + writeSystemSexp { + forceNoNewlines(true) + writeSymbol(SystemSymbols_1_1.EXPORT) + writeAnnotations(SystemSymbols_1_1.ION) + writeSymbol(macro.macroName) + if (alias != null && alias != macro.macroName) { + writeSymbol(alias) + } + } + systemData.forceNoNewlines(false) + } + + private fun writeMacroDefinition(name: String?, macro: TemplateMacro) { + writeSystemSexp { + forceNoNewlines(true) + writeSymbol(SystemSymbols_1_1.MACRO) + if (name != null) writeSymbol(name) else writeNull() + + if (macro.signature.size > MAX_PARAMETERS_IN_ONE_LINE_SIGNATURE) forceNoNewlines(false) + + // Signature + writeSystemSexp { + macro.signature.forEach { parameter -> + if (parameter.type != Macro.ParameterEncoding.Tagged) { + writeAnnotations(parameter.type.ionTextName) + } + writeSymbol(parameter.variableName) + if (parameter.cardinality != Macro.ParameterCardinality.ExactlyOne) { + writeMacroParameterCardinality(parameter.cardinality) + } + } + } + + if (macro.body.size > MAX_EXPRESSIONS_IN_SINGLE_LINE_MACRO_BODY) forceNoNewlines(false) + + // Template Body + + // TODO: See if there's any benefit to using a smaller number type, if we can + // memoize this in the macro definition, or replace it with a list of precomputed + // step-out indices. + /** Tracks where and how many times to step out. */ + val numberOfTimesToStepOut = IntArray(macro.body.size + 1) + + macro.body.forEachIndexed { index, expression -> + if (numberOfTimesToStepOut[index] > 0) { + repeat(numberOfTimesToStepOut[index]) { stepOut() } + } + + when (expression) { + is Expression.DataModelValue -> { + expression.annotations.forEach { + if (it.text != null) { + // TODO: If it's already in the symbol table we could check the + // symbol-inline strategy and possibly write a SID. + writeAnnotations(it.text) + } else { + writeAnnotations(it.sid) + } + } + + if (expression is Expression.NullValue) { + writeNull(expression.type) + } else when (expression.type) { + IonType.NULL -> error("Unreachable") + IonType.BOOL -> writeBool((expression as Expression.BoolValue).value) + IonType.INT -> { + if (expression is Expression.LongIntValue) + writeInt(expression.value) + else + writeInt((expression as Expression.BigIntValue).value) + } + IonType.FLOAT -> writeFloat((expression as Expression.FloatValue).value) + IonType.DECIMAL -> writeDecimal((expression as Expression.DecimalValue).value) + IonType.TIMESTAMP -> writeTimestamp((expression as Expression.TimestampValue).value) + IonType.SYMBOL -> { + val symbolToken = (expression as Expression.SymbolValue).value + if (symbolToken.text != null) { + // TODO: If it's already in the symbol table we could check the + // symbol-inline strategy and possibly write a SID. + writeSymbol(symbolToken.text) + } else { + writeSymbol(symbolToken.sid) + } + } + IonType.STRING -> writeString((expression as Expression.StringValue).value) + IonType.CLOB -> writeClob((expression as Expression.ClobValue).value) + IonType.BLOB -> writeBlob((expression as Expression.BlobValue).value) + IonType.LIST -> { + expression as Expression.HasStartAndEnd + stepInList(usingLengthPrefix = false) + numberOfTimesToStepOut[expression.endExclusive]++ + } + IonType.SEXP -> { + expression as Expression.HasStartAndEnd + stepInSExp(usingLengthPrefix = false) + numberOfTimesToStepOut[expression.endExclusive]++ + } + IonType.STRUCT -> { + expression as Expression.HasStartAndEnd + stepInStruct(usingLengthPrefix = false) + numberOfTimesToStepOut[expression.endExclusive]++ + } + IonType.DATAGRAM -> error("Unreachable") + } + } + is Expression.FieldName -> { + val text = expression.value.text + if (text == null) { + writeFieldName(expression.value.sid) + } else { + // TODO: If it's already in the symbol table we could check the symbol-inline strategy and possibly write a SID. + writeFieldName(text) + } + } + is Expression.ExpressionGroup -> { + stepInTdlExpressionGroup() + numberOfTimesToStepOut[expression.endExclusive]++ + } + is Expression.MacroInvocation -> { + val invokedMacro = expression.macro + if (invokedMacro is SystemMacro) { + stepInTdlSystemMacroInvocation(invokedMacro.systemSymbol) + } else { + val invokedAddress = macroTable[invokedMacro] + ?: newMacros[invokedMacro] + ?: throw IllegalStateException("A macro in the macro table is missing a dependency") + val invokedName = macroNames[invokedAddress] + if (options.invokeTdlMacrosByName && invokedName != null) { + stepInTdlMacroInvocation(invokedName) + } else { + stepInTdlMacroInvocation(invokedAddress) + } + } + numberOfTimesToStepOut[expression.endExclusive]++ + } + is Expression.VariableRef -> writeTdlVariableExpansion(macro.signature[expression.signatureIndex].variableName) + else -> error("Unreachable") + } + } + + // Step out for anything where endExclusive is beyond the end of the expression list. + repeat(numberOfTimesToStepOut.last()) { stepOut() } + forceNoNewlines(true) + } + systemData.forceNoNewlines(false) + } + + override fun getCatalog(): IonCatalog { + TODO("Not part of the public API.") + } + + /** No facets supported */ + override fun asFacet(facetType: Class?): T? = null + + override fun getSymbolTable(): SymbolTable { + TODO("Why do we need to expose this to users in the first place?") + } + + override fun setFieldName(name: String) { + handleSymbolToken(UNKNOWN_SYMBOL_ID, name, SymbolKind.FIELD_NAME, userData) + } + + override fun setFieldNameSymbol(name: SymbolToken) { + handleSymbolToken(name.sid, name.text, SymbolKind.FIELD_NAME, userData) + } + + override fun addTypeAnnotation(annotation: String) { + handleSymbolToken(UNKNOWN_SYMBOL_ID, annotation, SymbolKind.ANNOTATION, userData) + } + + override fun setTypeAnnotations(annotations: Array?) { + // Interning happens in addTypeAnnotation + userData._private_clearAnnotations() + annotations?.forEach { addTypeAnnotation(it) } + } + + override fun setTypeAnnotationSymbols(annotations: Array?) { + userData._private_clearAnnotations() + annotations?.forEach { handleSymbolToken(it.sid, it.text, SymbolKind.ANNOTATION, userData) } + } + + override fun stepIn(containerType: IonType?) { + val newDepth = depth + 1 + when (containerType) { + IonType.LIST -> userData.stepInList(options.writeLengthPrefix(ContainerType.LIST, newDepth)) + IonType.SEXP -> userData.stepInSExp(options.writeLengthPrefix(ContainerType.SEXP, newDepth)) + IonType.STRUCT -> { + if (depth == 0 && userData._private_hasFirstAnnotation(SystemSymbols_1_1.ION_SYMBOL_TABLE.id, SystemSymbols_1_1.ION_SYMBOL_TABLE.text)) { + throw IonException("User-defined symbol tables not permitted by the Ion 1.1 managed writer.") + } + userData.stepInStruct(options.writeLengthPrefix(ContainerType.STRUCT, newDepth)) + } + else -> throw IllegalArgumentException("Not a container type: $containerType") + } + } + + override fun stepOut() = userData.stepOut() + + override fun isInStruct(): Boolean = userData.isInStruct() + + private inline fun T?.writeMaybeNull(type: IonType, writeNotNull: (T) -> Unit) { + if (this == null) { + writeNull(type) + } else { + writeNotNull(this) + } + } + + override fun writeSymbol(content: String?) { + if (content == null) { + userData.writeNull(IonType.SYMBOL) + } else { + handleSymbolToken(UNKNOWN_SYMBOL_ID, content, SymbolKind.VALUE, userData) + } + } + + override fun writeSymbolToken(content: SymbolToken?) { + if (content == null) { + userData.writeNull(IonType.SYMBOL) + } else { + val text: String? = content.text + // TODO: Check to see if the SID refers to a user symbol with text that looks like an IVM + if (text == SystemSymbols_1_1.ION_1_0.text && depth == 0) throw IonException("Can't write a top-level symbol that is the same as the IVM.") + handleSymbolToken(content.sid, content.text, SymbolKind.VALUE, userData) + } + } + + private inline fun IonRawWriter_1_1.write(kind: SymbolKind, sid: Int) = when (kind) { + SymbolKind.VALUE -> writeSymbol(sid) + SymbolKind.FIELD_NAME -> writeFieldName(sid) + SymbolKind.ANNOTATION -> writeAnnotations(sid) + } + + private inline fun IonRawWriter_1_1.write(kind: SymbolKind, text: String) = when (kind) { + SymbolKind.VALUE -> writeSymbol(text) + SymbolKind.FIELD_NAME -> writeFieldName(text) + SymbolKind.ANNOTATION -> writeAnnotations(text) + } + + private inline fun IonRawWriter_1_1.write(kind: SymbolKind, symbol: SystemSymbols_1_1) = when (kind) { + SymbolKind.VALUE -> writeSymbol(symbol) + SymbolKind.FIELD_NAME -> writeFieldName(symbol) + SymbolKind.ANNOTATION -> writeAnnotations(symbol) + } + + /** Helper function that determines whether to write a symbol token as a SID or inline symbol */ + private inline fun handleSymbolToken(sid: Int, text: String?, kind: SymbolKind, rawWriter: IonRawWriter_1_1, preserveEncoding: Boolean = false) { + if (text == null) { + // No text. Decide whether to write $0 or some other SID + if (sid == UNKNOWN_SYMBOL_ID) { + // No (known) SID either. + throw UnknownSymbolException("Cannot write a symbol token with unknown text and unknown SID.") + } else if (sid == 0) { + rawWriter.write(kind, 0) + } else { + rawWriter.write(kind, sidTransformer?.transform(sid) ?: sid) + } + } else if (preserveEncoding && sid < 0) { + rawWriter.write(kind, text) + } else if (options.shouldWriteInline(kind, text)) { + rawWriter.write(kind, text) + } else if (SystemSymbols_1_1.contains(text)) { + rawWriter.write(kind, SystemSymbols_1_1[text]!!) + } else { + rawWriter.write(kind, intern(text)) + } + } + + override fun writeNull() = userData.writeNull() + override fun writeNull(type: IonType?) = userData.writeNull(type ?: IonType.NULL) + override fun writeBool(value: Boolean) = userData.writeBool(value) + override fun writeInt(value: Long) = userData.writeInt(value) + + override fun writeInt(value: BigInteger?) = value.writeMaybeNull(IonType.INT, userData::writeInt) + override fun writeFloat(value: Double) = userData.writeFloat(value) + override fun writeDecimal(value: BigDecimal?) = value.writeMaybeNull(IonType.DECIMAL, userData::writeDecimal) + override fun writeTimestamp(value: Timestamp?) = value.writeMaybeNull(IonType.TIMESTAMP, userData::writeTimestamp) + override fun writeString(value: String?) = value.writeMaybeNull(IonType.STRING, userData::writeString) + + override fun writeClob(value: ByteArray?) = value.writeMaybeNull(IonType.CLOB, userData::writeClob) + override fun writeClob(value: ByteArray?, start: Int, len: Int) = value.writeMaybeNull(IonType.CLOB) { userData.writeClob(it, start, len) } + + override fun writeBlob(value: ByteArray?) = value.writeMaybeNull(IonType.BLOB, userData::writeBlob) + override fun writeBlob(value: ByteArray?, start: Int, len: Int) = value.writeMaybeNull(IonType.BLOB) { userData.writeBlob(it, start, len) } + + override fun isFieldNameSet(): Boolean { + return userData._private_hasFieldName() + } + + override fun getDepth(): Int { + return userData.depth() + } + + override fun writeIonVersionMarker() { + if (depth == 0) { + // Make sure we write out any symbol tables and buffered values before the IVM + finish() + } else { + writeSymbol("\$ion_1_1") + } + } + + @Deprecated("Use IonValue.writeTo(IonWriter) instead.") + override fun writeValue(value: IonValue) = value.writeTo(this) + + @Deprecated("Use writeTimestamp instead.") + override fun writeTimestampUTC(value: Date?) { + TODO("Use writeTimestamp instead.") + } + + override fun isStreamCopyOptimized(): Boolean = false + + override fun writeValues(reader: IonReader, symbolIdTransformer: IntTransformer) { + sidTransformer = symbolIdTransformer + try { + writeValues(reader) + } finally { + sidTransformer = null + } + } + + override fun writeValues(reader: IonReader) { + // There's a possibility that we could have interference between encoding contexts if we're transferring from a + // system reader. However, this is the same behavior as the other implementations. + + val startingDepth = reader.depth + while (true) { + val nextType = reader.next() + if (nextType == null) { + // Nothing more *and* we're at the starting depth? We're all done. + if (reader.depth == startingDepth) return + // Otherwise, step out and continue. + userData.stepOut() + reader.stepOut() + } else { + transferScalarOrStepIn(reader, nextType) + } + } + } + + override fun writeValue(reader: IonReader, symbolIdTransformer: IntTransformer) { + sidTransformer = symbolIdTransformer + try { + writeValue(reader) + } finally { + sidTransformer = null + } + } + + override fun writeValue(reader: IonReader) { + // There's a possibility that we could have interference between encoding contexts if we're transferring from a + // system reader. However, this is the same behavior as the other implementations. + + if (reader.type == null) return + val startingDepth = reader.depth + transferScalarOrStepIn(reader, reader.type) + if (reader.depth != startingDepth) { + // We stepped into a container, so write the content of the container and then step out. + writeValues(reader) + reader.stepOut() + userData.stepOut() + } + } + + override fun writeObject(obj: WriteAsIon) { + obj.writeToMacroAware(this) + } + + /** + * Can only be called when the reader is positioned on a value. Having [currentType] in the + * function signature helps to enforce that requirement because [currentType] is not allowed + * to be `null`. + */ + private fun transferScalarOrStepIn(reader: IonReader, currentType: IonType) { + // TODO: If the Ion 1.1 symbol table differs at all from the Ion 1.0 symbol table, and we're copying + // from Ion 1.0, we will have to adjust any SIDs that we are writing. + + reader.typeAnnotationSymbols.forEach { + if (it.text == SystemSymbols_1_1.ION_SYMBOL_TABLE.text) { + userData.writeAnnotations(SystemSymbols_1_1.ION_SYMBOL_TABLE) + } else { + handleSymbolToken(it.sid, it.text, SymbolKind.ANNOTATION, userData, preserveEncoding = true) + } + } + if (isInStruct) { + // TODO: Can't use reader.fieldId, reader.fieldName because it will throw UnknownSymbolException. + // However, this might mean we're unnecessarily constructing `SymbolToken` instances. + val fieldName = reader.fieldNameSymbol + // If there is no field name, it still may have been set externally, e.g. + // writer.setFieldName(...); writer.writeValue(reader); + // This occurs when serializing a sequence of Expressions, which hold field names separate from + // values. + if (fieldName != null) { + handleSymbolToken(fieldName.sid, fieldName.text, SymbolKind.FIELD_NAME, userData, preserveEncoding = true) + } + } + + if (reader.isNullValue) { + userData.writeNull(currentType) + } else when (currentType) { + IonType.BOOL -> userData.writeBool(reader.booleanValue()) + IonType.INT -> { + if (reader.integerSize == IntegerSize.BIG_INTEGER) { + userData.writeInt(reader.bigIntegerValue()) + } else { + userData.writeInt(reader.longValue()) + } + } + IonType.FLOAT -> userData.writeFloat(reader.doubleValue()) + IonType.DECIMAL -> userData.writeDecimal(reader.decimalValue()) + IonType.TIMESTAMP -> userData.writeTimestamp(reader.timestampValue()) + IonType.SYMBOL -> { + if (reader.isCurrentValueAnIvm()) { + // TODO: What about the case where it's an IVM, but the writer is not at depth==0? Should we write + // it as a symbol or just ignore it? (This can only happen if the writer is stepped in, but + // the reader starts at depth==0.) + + // Just in case—call finish to flush the current system values, then user values, and then write the IVM. + finish() + } else { + val symbol = reader.symbolValue() + handleSymbolToken(symbol.sid, symbol.text, SymbolKind.VALUE, userData, preserveEncoding = true) + } + } + IonType.STRING -> userData.writeString(reader.stringValue()) + IonType.CLOB -> userData.writeClob(reader.newBytes()) + IonType.BLOB -> userData.writeBlob(reader.newBytes()) + // TODO: See if we can preserve the encoding of containers (delimited vs length-prefixed) + IonType.LIST -> { + userData.stepInList(options.writeLengthPrefix(ContainerType.LIST, reader.depth)) + reader.stepIn() + } + IonType.SEXP -> { + userData.stepInSExp(options.writeLengthPrefix(ContainerType.SEXP, reader.depth)) + reader.stepIn() + } + IonType.STRUCT -> { + userData.stepInStruct(options.writeLengthPrefix(ContainerType.STRUCT, reader.depth)) + reader.stepIn() + } + else -> TODO("NULL and DATAGRAM are unreachable.") + } + } + + private fun IonReader.isCurrentValueAnIvm(): Boolean { + if (depth != 0 || type != IonType.SYMBOL || typeAnnotationSymbols.isNotEmpty()) return false + val symbol = symbolValue() ?: return false + if (symbol.text == null) { + // TODO FIX: Ion 1.1 system symbols can be removed from the encoding context, so an IVM may not always + // have symbol ID 2. + return symbol.sid == 2 + } + return ION_VERSION_MARKER_REGEX.matches(symbol.assumeText()) + } + + // Stream termination + + override fun close() { + flush() + systemData.close() + userData.close() + onClose() + } + + override fun flush() { + if (needsIVM) { + systemData.writeIVM() + needsIVM = false + } + writeEncodingDirective() + systemData.flush() + userData.flush() + } + + override fun finish() { + flush() + resetEncodingContext() + } + + override fun startMacro(macro: Macro) { + if (macro is SystemMacro) { + startSystemMacro(macro) + } else { + val address = getOrAssignMacroAddress(macro) + // Note: macroNames[address] will be null if the macro is unnamed. + startMacro(macroNames[address], address, macro) + } + } + + override fun startMacro(name: String, macro: Macro) { + if (macro is SystemMacro && macro.macroName == name) { + startSystemMacro(macro) + } else { + val address = getOrAssignMacroAddressAndName(name, macro) + startMacro(name, address, macro) + } + } + + private fun startMacro(name: String?, address: Int, definition: Macro) { + val useNames = options.eExpressionIdentifierStrategy == ManagedWriterOptions_1_1.EExpressionIdentifierStrategy.BY_NAME + if (useNames && name != null) { + userData.stepInEExp(name) + } else { + val includeLengthPrefix = options.writeLengthPrefix(ContainerType.EEXP, depth + 1) + userData.stepInEExp(address, includeLengthPrefix, definition) + } + } + + private fun startSystemMacro(macro: SystemMacro) = userData.stepInEExp(macro) + + override fun startExpressionGroup() { + userData.stepInExpressionGroup(options.writeLengthPrefix(ContainerType.EXPRESSION_GROUP, depth + 1)) + } + + override fun endMacro() { + userData.stepOut() + } + + override fun endExpressionGroup() { + userData.stepOut() + } +} diff --git a/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter.java b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter.java index e2073126c0..c437380cdc 100644 --- a/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter.java +++ b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter.java @@ -278,39 +278,6 @@ public String toString() } } - private static class PatchPoint - { - /** position of the data being patched out. */ - public long oldPosition; - /** length of the data being patched out.*/ - public int oldLength; - /** size of the container data or annotations.*/ - public long length; - public PatchPoint() - { - oldPosition = -1; - oldLength = -1; - length = -1; - } - - @Override - public String toString() - { - return "(PP old::(" + oldPosition + " " + oldLength + ") patch::(" + length + ")"; - } - - public PatchPoint initialize(final long oldPosition, final int oldLength, final long length) { - this.oldPosition = oldPosition; - this.oldLength = oldLength; - this.length = length; - return this; - } - - public PatchPoint clear() { - return initialize(-1, -1, -1); - } - } - /*package*/ enum StreamCloseMode { NO_CLOSE, @@ -1076,12 +1043,12 @@ private void patchSingleByteTypedOptimisticValue(final byte type, final Containe if (info.length <= 0xD) { // we fit -- overwrite the type byte - buffer.writeUInt8At(info.position - 1, type | info.length); + buffer.writeByteAt(info.position - 1, type | info.length); } else { // side patch - buffer.writeUInt8At(info.position - 1, type | 0xE); + buffer.writeByteAt(info.position - 1, type | 0xE); addPatchPoint(info, info.position, 0, info.length); } } diff --git a/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt new file mode 100644 index 0000000000..36fe0b2681 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt @@ -0,0 +1,982 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +import com.amazon.ion.* +import com.amazon.ion.impl.* +import com.amazon.ion.impl.bin.IonEncoder_1_1.* +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1.ContainerType.* +import com.amazon.ion.impl.bin.Ion_1_1_Constants.* +import com.amazon.ion.impl.bin.utf8.* +import com.amazon.ion.impl.macro.* +import com.amazon.ion.util.* +import java.io.OutputStream +import java.lang.Double.doubleToRawLongBits +import java.lang.Float.floatToIntBits +import java.lang.IllegalArgumentException +import java.math.BigDecimal +import java.math.BigInteger + +class IonRawBinaryWriter_1_1 internal constructor( + private val out: OutputStream, + private val buffer: WriteBuffer, + private val lengthPrefixPreallocation: Int, +) : IonRawWriter_1_1, PrivateIonRawWriter_1_1 { + + /** + * Types of encoding containers. + */ + enum class ContainerType { + LIST, + SEXP, + STRUCT, + EEXP, + EXPR_GROUP, + /** + * Represents the top level stream. The [containerStack] always has [ContainerInfo] for [TOP] at the bottom + * of the stack so that we never have to check if [currentContainer] is null. + * + * TODO: Test if performance is better if we just check currentContainer for nullness. + */ + TOP, + /** + * Represents a group of annotations. May only contain FlexSyms or FlexUInt symbol IDs. + */ + ANNOTATIONS, + } + + private class ContainerInfo( + var type: ContainerType? = null, + var isLengthPrefixed: Boolean = true, + var usesFlexSym: Boolean = false, + var position: Long = -1, + /** + * Where should metadata such as the length prefix and/or the presence bitmap be written, + * relative to the start of this container. + */ + var metadataOffset: Int = 1, + /** + * The number of bytes for everything following the length-prefix (if applicable) in this container. + */ + var length: Long = 0, + // TODO: Test if performance is better with an Object Reference or an index into the PatchPoint queue. + var patchPoint: PatchPoint? = null, + /** + * The number of elements in the expression group or arguments to the macro. + * This is updated when _finishing_ writing a value or expression group. + */ + var numChildren: Int = 0, + /** + * The kind of tagless encoding to use if this is a tagless expression group. + */ + var taglessEncodingKind: TaglessEncoding? = null + ) { + /** + * Clears this [ContainerInfo] of old data and initializes it with the given new data. + */ + fun reset(type: ContainerType, position: Long, isLengthPrefixed: Boolean = true, metadataOffset: Int = 1) { + this.type = type + this.isLengthPrefixed = isLengthPrefixed + this.position = position + this.metadataOffset = metadataOffset + usesFlexSym = false + length = 0 + patchPoint = null + numChildren = 0 + } + } + + companion object { + /** Flag to indicate that annotations need to be written using FlexSyms */ + private const val FLEX_SYMS_REQUIRED = -1 + + /** + * Annotations container always requires at least one length prefix byte. In practice, it's almost certain to + * never require more than one byte for SID annotations. We assume that it will infrequently require more than + * one byte for FlexSym annotations. + */ + private const val ANNOTATIONS_LENGTH_PREFIX_ALLOCATION_SIZE = 1 + + /** + * Create a new instance for the given OutputStream with the given block size and length preallocation. + */ + @JvmStatic + fun from(out: OutputStream, blockSize: Int, preallocation: Int): IonRawBinaryWriter_1_1 { + return IonRawBinaryWriter_1_1(out, WriteBuffer(BlockAllocatorProviders.basicProvider().vendAllocator(blockSize)) {}, preallocation) + } + } + + private val utf8StringEncoder = Utf8StringEncoderPool.getInstance().getOrCreate() + + private var annotationsFlexSymBuffer = arrayOfNulls(8) + private var annotationsIdBuffer = IntArray(8) + private var numAnnotations = 0 + /** + * Flag indicating whether to use FlexSyms to write the annotations. When FlexSyms are required, the flag should be + * set to `-1` so that we can `xor` it with [numAnnotations] to get a distinct integer that represents the number + * and type of annotations required. + */ + private var annotationFlexSymFlag = 0 + + private var hasFieldName = false + + private var closed = false + + private val patchPoints = _Private_RecyclingQueue(512) { PatchPoint() } + private val containerStack = _Private_RecyclingStack(8) { ContainerInfo() } + private val presenceBitmapStack = _Private_RecyclingStack(8) { PresenceBitmap() } + + private var currentContainer: ContainerInfo = containerStack.push { it.reset(TOP, 0L) } + + override fun flush() { + if (closed) return + confirm(depth() == 0) { "Cannot call finish() while in a container" } + confirm(numAnnotations == 0) { "Cannot call finish with dangling annotations" } + + if (patchPoints.isEmpty) { + // nothing to patch--write 'em out! + buffer.writeTo(out) + } else { + var bufferPosition: Long = 0 + + // Patch length values are long, so they always fit in 10 bytes or fewer. + val flexUIntScratch = ByteArray(10) + + val iterator = patchPoints.iterate() + + while (iterator.hasNext()) { + val patch = iterator.next() + if (patch.length < 0) { + continue + } + // write up to the thing to be patched + val bufferLength = patch.oldPosition - bufferPosition + buffer.writeTo(out, bufferPosition, bufferLength) + + // write out the patch + // TODO: See if there's a measurable performance benefit if we write directly to the output stream vs using the flexUIntScratch + val numBytes = FlexInt.flexUIntLength(patch.length) + FlexInt.writeFlexIntOrUIntInto(flexUIntScratch, 0, patch.length, numBytes) + out.write(flexUIntScratch, 0, numBytes) + + // skip over the preallocated field + bufferPosition = patch.oldPosition + bufferPosition += patch.oldLength.toLong() + } + buffer.writeTo(out, bufferPosition, buffer.position() - bufferPosition) + } + + buffer.reset() + patchPoints.clear() + + // TODO: Stream flush mode + } + + override fun close() { + if (closed) return + flush() + buffer.close() + closed = true + } + + override fun depth(): Int = containerStack.size() - 1 // "Top" doesn't count when counting depth. + + override fun isInStruct(): Boolean = currentContainer.type == STRUCT + + override fun writeIVM() { + confirm(currentContainer.type == TOP) { "IVM can only be written at the top level of an Ion stream." } + confirm(numAnnotations == 0) { "Cannot write an IVM with annotations" } + buffer.writeBytes(_Private_IonConstants.BINARY_VERSION_MARKER_1_1) + } + + /** + * Ensures that there is enough space in the annotation buffers for [n] annotations. + * If more space is needed, it over-allocates by 8 to ensure that we're not continually allocating when annotations + * are being added one by one. + */ + private inline fun ensureAnnotationSpace(n: Int) { + if (annotationsIdBuffer.size < n || annotationsFlexSymBuffer.size < n) { + val oldIds = annotationsIdBuffer + annotationsIdBuffer = IntArray(n + 8) + oldIds.copyInto(annotationsIdBuffer) + val oldText = annotationsFlexSymBuffer + annotationsFlexSymBuffer = arrayOfNulls(n + 8) + oldText.copyInto(annotationsFlexSymBuffer) + } + } + + override fun writeAnnotations(annotation0: SystemSymbols_1_1) { + ensureAnnotationSpace(numAnnotations + 1) + annotationsFlexSymBuffer[numAnnotations++] = annotation0 + annotationFlexSymFlag = FLEX_SYMS_REQUIRED + } + + override fun writeAnnotations(annotation0: Int) { + confirm(annotation0 >= 0) { "Invalid SID: $annotation0" } + ensureAnnotationSpace(numAnnotations + 1) + annotationsIdBuffer[numAnnotations++] = annotation0 + } + + override fun writeAnnotations(annotation0: Int, annotation1: Int) { + confirm(annotation0 >= 0 && annotation1 >= 0) { "One or more invalid SIDs: $annotation0, $annotation1" } + ensureAnnotationSpace(numAnnotations + 2) + annotationsIdBuffer[numAnnotations++] = annotation0 + annotationsIdBuffer[numAnnotations++] = annotation1 + } + + override fun writeAnnotations(annotations: IntArray) { + confirm(annotations.all { it >= 0 }) { "One or more invalid SIDs: ${annotations.filter { it < 0 }.joinToString()}" } + ensureAnnotationSpace(numAnnotations + annotations.size) + annotations.copyInto(annotationsIdBuffer, numAnnotations) + numAnnotations += annotations.size + } + + override fun writeAnnotations(annotation0: CharSequence) { + ensureAnnotationSpace(numAnnotations + 1) + annotationsFlexSymBuffer[numAnnotations++] = annotation0 + annotationFlexSymFlag = FLEX_SYMS_REQUIRED + } + + override fun writeAnnotations(annotation0: CharSequence, annotation1: CharSequence) { + ensureAnnotationSpace(numAnnotations + 2) + annotationsFlexSymBuffer[numAnnotations++] = annotation0 + annotationsFlexSymBuffer[numAnnotations++] = annotation1 + annotationFlexSymFlag = FLEX_SYMS_REQUIRED + } + + override fun writeAnnotations(annotations: Array) { + if (annotations.isEmpty()) return + ensureAnnotationSpace(numAnnotations + annotations.size) + annotations.copyInto(annotationsFlexSymBuffer, numAnnotations) + numAnnotations += annotations.size + annotationFlexSymFlag = FLEX_SYMS_REQUIRED + } + + override fun _private_clearAnnotations() { + numAnnotations = 0 + annotationFlexSymFlag = 0 + // erase the first entries to ensure old values don't leak into `_private_hasFirstAnnotation()` + annotationsIdBuffer[0] = -1 + annotationsFlexSymBuffer[0] = null + } + + override fun _private_hasFirstAnnotation(sid: Int, text: String?): Boolean { + if (numAnnotations == 0) return false + if (sid >= 0 && annotationsIdBuffer[0] == sid) { + return true + } + if (text != null && annotationsFlexSymBuffer[0] == text) { + return true + } + return false + } + + /** + * Helper function for handling annotations and field names when starting a value. + */ + private inline fun openValue(valueWriterExpression: () -> Unit) { + + if (isInStruct()) { + confirm(hasFieldName) { "Values in a struct must have a field name." } + } else if (currentContainer.type == EEXP) { + presenceBitmapStack.peek()[currentContainer.numChildren] = PresenceBitmap.EXPRESSION + } + + // Start at 1, assuming there's an annotations OpCode byte. + // We'll clear this if there are no annotations. + var annotationsTotalLength = 1L + + // Effect of the xor: if annotationsFlexSymFlag is -1, then we're matching `-1 * numAnnotations - 1` + when (numAnnotations xor annotationFlexSymFlag) { + 0, -1 -> annotationsTotalLength = 0 + 1 -> { + buffer.writeByte(OpCodes.ANNOTATIONS_1_SYMBOL_ADDRESS) + annotationsTotalLength += buffer.writeFlexUInt(annotationsIdBuffer[0]) + } + 2 -> { + buffer.writeByte(OpCodes.ANNOTATIONS_2_SYMBOL_ADDRESS) + annotationsTotalLength += buffer.writeFlexUInt(annotationsIdBuffer[0]) + annotationsTotalLength += buffer.writeFlexUInt(annotationsIdBuffer[1]) + } + -2 -> { + // If there's only one annotation, and we know that at least one has text, we don't need to check + // whether this is SID. + buffer.writeByte(OpCodes.ANNOTATIONS_1_FLEX_SYM) + annotationsTotalLength += writeFlexSymFromAnnotationsBuffer(0) + annotationsFlexSymBuffer[0] = null + } + -3 -> { + buffer.writeByte(OpCodes.ANNOTATIONS_2_FLEX_SYM) + annotationsTotalLength += writeFlexSymFromAnnotationsBuffer(0) + annotationsTotalLength += writeFlexSymFromAnnotationsBuffer(1) + } + else -> annotationsTotalLength += writeManyAnnotations() + } + currentContainer.length += annotationsTotalLength + + numAnnotations = 0 + annotationFlexSymFlag = 0 + hasFieldName = false + valueWriterExpression() + } + + /** + * Writes a FlexSym annotation for the specified position in the annotations buffers. + */ + private fun writeFlexSymFromAnnotationsBuffer(i: Int): Int { + val annotationText = annotationsFlexSymBuffer[i] + return if (annotationText != null) { + annotationsFlexSymBuffer[i] = null + if (annotationText is SystemSymbols_1_1) { + buffer.writeFlexSym(annotationText) + } else { + buffer.writeFlexSym(utf8StringEncoder.encode(annotationText.toString())) + } + } else { + buffer.writeFlexSym(annotationsIdBuffer[i]) + } + } + + /** + * Writes 3 or more annotations for SIDs or FlexSyms + */ + private fun writeManyAnnotations(): Long { + currentContainer = containerStack.push { it.reset(ANNOTATIONS, position = buffer.position()) } + if (annotationFlexSymFlag == FLEX_SYMS_REQUIRED) { + buffer.writeByte(OpCodes.ANNOTATIONS_MANY_FLEX_SYM) + buffer.reserve(ANNOTATIONS_LENGTH_PREFIX_ALLOCATION_SIZE) + for (i in 0 until numAnnotations) { + currentContainer.length += writeFlexSymFromAnnotationsBuffer(i) + } + } else { + buffer.writeByte(OpCodes.ANNOTATIONS_MANY_SYMBOL_ADDRESS) + buffer.reserve(ANNOTATIONS_LENGTH_PREFIX_ALLOCATION_SIZE) + for (i in 0 until numAnnotations) { + currentContainer.length += buffer.writeFlexUInt(annotationsIdBuffer[i]) + } + } + + val numAnnotationsBytes = currentContainer.length + val numLengthPrefixBytes = writeCurrentContainerLength(ANNOTATIONS_LENGTH_PREFIX_ALLOCATION_SIZE) + + // Set the new current container + containerStack.pop() + currentContainer = containerStack.peek() + + return numLengthPrefixBytes + numAnnotationsBytes + } + + /** + * Helper function for writing scalar values that builds on [openValue] and also includes updating + * the length of the current container. + * + * @param valueWriterExpression should be a function that writes the scalar value to the buffer, and + * returns the number of bytes that were written. + */ + private inline fun writeScalar(valueWriterExpression: () -> Int) = openValue { + val numBytesWritten = valueWriterExpression() + currentContainer.length += numBytesWritten + currentContainer.numChildren++ + } + + /** + * Helper function for writing scalar values that could be tagless. + * + * @param ifTagged should be a function that writes the scalar value to the buffer, and returns the number of bytes that were written. + * @param ifTagless should be a function that writes the scalar value to the buffer _without an opcode_ and returns the number of bytes that were written. + */ + private inline fun writeTaggedOrTaglessScalar( + taggedEncoder: () -> Int, + taglessEncoder: (TaglessEncoding) -> Int, + ) { + val primitiveType = when (currentContainer.type) { + EEXP -> { + val signature = presenceBitmapStack.peek().signature + if (currentContainer.numChildren >= signature.size) throw IllegalArgumentException("Too many arguments for macro with signature $signature") + signature[currentContainer.numChildren].type.taglessEncodingKind + } + EXPR_GROUP -> currentContainer.taglessEncodingKind + else -> null + } + if (primitiveType != null) { + confirm(numAnnotations == 0) { "Tagless values cannot be annotated" } + if (currentContainer.type == EEXP) { + presenceBitmapStack.peek()[currentContainer.numChildren] = PresenceBitmap.EXPRESSION + } + val numBytesWritten = taglessEncoder(primitiveType) + currentContainer.length += numBytesWritten + currentContainer.numChildren++ + } else { + writeScalar { taggedEncoder() } + } + } + + override fun writeFieldName(sid: Int) { + confirm(currentContainer.type == STRUCT) { "Can only write a field name inside of a struct." } + if (sid == 0 && !currentContainer.usesFlexSym) switchCurrentStructToFlexSym() + + currentContainer.length += if (currentContainer.usesFlexSym) { + buffer.writeFlexSym(sid) + } else { + buffer.writeFlexUInt(sid) + } + hasFieldName = true + } + + override fun writeFieldName(text: CharSequence) { + confirm(currentContainer.type == STRUCT) { "Can only write a field name inside of a struct." } + if (!currentContainer.usesFlexSym) switchCurrentStructToFlexSym() + + currentContainer.length += buffer.writeFlexSym(utf8StringEncoder.encode(text.toString())) + hasFieldName = true + } + + override fun writeFieldName(symbol: SystemSymbols_1_1) { + confirm(currentContainer.type == STRUCT) { "Can only write a field name inside of a struct." } + if (!currentContainer.usesFlexSym) switchCurrentStructToFlexSym() + currentContainer.length += buffer.writeFlexSym(symbol) + hasFieldName = true + } + + override fun _private_hasFieldName(): Boolean = hasFieldName + + private fun switchCurrentStructToFlexSym() { + // To switch, we need to insert the sid-to-flexsym switch marker. + buffer.writeByte(SID_TO_FLEX_SYM_SWITCH_MARKER) + currentContainer.length += 1 + currentContainer.usesFlexSym = true + } + + override fun writeNull() = writeScalar { writeNullValue(buffer, IonType.NULL) } + + override fun writeNull(type: IonType) = writeScalar { writeNullValue(buffer, type) } + + override fun writeBool(value: Boolean) = writeScalar { writeBoolValue(buffer, value) } + + override fun writeInt(value: Long) = writeTaggedOrTaglessScalar( + taggedEncoder = { writeIntValue(buffer, value) }, + taglessEncoder = { primitiveType -> + when (primitiveType) { + TaglessEncoding.UINT8 -> { + confirm((value and 0xFF) == value) { "value $value is not a valid uint8" } + buffer.writeFixedIntOrUInt(value, 1) + } + TaglessEncoding.UINT16 -> { + confirm((value and 0xFFFF) == value) { "value $value is not a valid uint16" } + buffer.writeFixedIntOrUInt(value, 2) + } + TaglessEncoding.UINT32 -> { + confirm((value and 0xFFFFFFFF) == value) { "value $value is not a valid uint32" } + buffer.writeFixedIntOrUInt(value, 4) + } + TaglessEncoding.UINT64 -> { + confirm(value >= 0) { "value $value is not a valid uint64" } + buffer.writeFixedIntOrUInt(value, 8) + } + TaglessEncoding.FLEX_UINT -> { + confirm(value >= 0) { "value $value is not a valid flex_uint" } + buffer.writeFlexUInt(value) + } + TaglessEncoding.INT8 -> { + confirm(value.toByte().toLong() == value) { "value $value is not a value int8" } + buffer.writeFixedIntOrUInt(value, 1) + } + TaglessEncoding.INT16 -> { + confirm(value.toShort().toLong() == value) { "value $value is not a value int16" } + buffer.writeFixedIntOrUInt(value, 2) + } + TaglessEncoding.INT32 -> { + confirm(value.toInt().toLong() == value) { "value $value is not a value int32" } + buffer.writeFixedIntOrUInt(value, 4) + } + TaglessEncoding.INT64 -> buffer.writeFixedIntOrUInt(value, 8) + TaglessEncoding.FLEX_INT -> buffer.writeFlexInt(value) + else -> throw IonException("Cannot write an int when the macro signature requires $primitiveType.") + } + } + ) + + override fun writeInt(value: BigInteger) = writeTaggedOrTaglessScalar( + taggedEncoder = { writeIntValue(buffer, value) }, + taglessEncoder = { primitiveType -> + when (primitiveType) { + TaglessEncoding.UINT8 -> { + confirm(value.signum() >= 0 && value.bitLength() <= 8) { "value $value is not a value uint8" } + buffer.writeFixedIntOrUInt(value.toLong(), 1) + } + TaglessEncoding.UINT16 -> { + confirm(value.signum() >= 0 && value.bitLength() <= 16) { "value $value is not a value uint16" } + buffer.writeFixedIntOrUInt(value.toLong(), 2) + } + TaglessEncoding.UINT32 -> { + confirm(value.signum() >= 0 && value.bitLength() <= 32) { "value $value is not a value uint32" } + buffer.writeFixedIntOrUInt(value.toLong(), 4) + } + TaglessEncoding.UINT64 -> { + confirm(value.signum() >= 0 && value.bitLength() <= 64) { "value $value is not a value uint64" } + buffer.writeFixedIntOrUInt(value.toLong(), 8) + } + TaglessEncoding.FLEX_UINT -> { + confirm(value.signum() >= 0) { "value $value is not a value flex_uint" } + buffer.writeFlexUInt(value) + } + TaglessEncoding.INT8 -> { + confirm(value.bitLength() < 8) { "value $value is not a value int8" } + buffer.writeFixedIntOrUInt(value.toLong(), 1) + } + TaglessEncoding.INT16 -> { + confirm(value.bitLength() < 16) { "value $value is not a value int16" } + buffer.writeFixedIntOrUInt(value.toLong(), 2) + } + TaglessEncoding.INT32 -> { + confirm(value.bitLength() < 32) { "value $value is not a value int32" } + buffer.writeFixedIntOrUInt(value.toLong(), 4) + } + TaglessEncoding.INT64 -> { + confirm(value.bitLength() < 64) { "value $value is not a value int64" } + buffer.writeFixedIntOrUInt(value.toLong(), 8) + } + TaglessEncoding.FLEX_INT -> buffer.writeFlexInt(value) + else -> throw IonException("Cannot write an int when the macro signature requires $primitiveType.") + } + } + ) + + override fun writeFloat(value: Float) = writeTaggedOrTaglessScalar( + taggedEncoder = { writeFloatValue(buffer, value) }, + taglessEncoder = { primitiveType -> + when (primitiveType) { + TaglessEncoding.FLOAT16 -> TODO("Writing FLOAT16 not supported yet") + TaglessEncoding.FLOAT32 -> buffer.writeFixedIntOrUInt(floatToIntBits(value).toLong(), 4) + TaglessEncoding.FLOAT64 -> buffer.writeFixedIntOrUInt(doubleToRawLongBits(value.toDouble()), 8) + else -> throw IonException("Cannot write a float when the macro signature requires $primitiveType.") + } + } + ) + + override fun writeFloat(value: Double) = writeTaggedOrTaglessScalar( + taggedEncoder = { writeFloatValue(buffer, value) }, + taglessEncoder = { primitiveType -> + when (primitiveType) { + TaglessEncoding.FLOAT16 -> TODO("Writing FLOAT16 not supported yet") + // Bounds check for Double->Float would be surprising to some users since floating point numbers + // normally just accept loss of precision for amy operations instead of throwing and Exception. + TaglessEncoding.FLOAT32 -> buffer.writeFixedIntOrUInt(floatToIntBits(value.toFloat()).toLong(), 4) + TaglessEncoding.FLOAT64 -> buffer.writeFixedIntOrUInt(doubleToRawLongBits(value), 8) + else -> throw IonException("Cannot write a float when the macro signature requires $primitiveType.") + } + } + ) + + override fun writeDecimal(value: BigDecimal) = writeScalar { writeDecimalValue(buffer, value) } + + override fun writeTimestamp(value: Timestamp) = writeScalar { writeTimestampValue(buffer, value) } + + override fun writeSymbol(id: Int) { + confirm(id >= 0) { "Invalid SID: $id" } + writeTaggedOrTaglessScalar( + taggedEncoder = { writeSymbolValue(buffer, id) }, + taglessEncoder = { primitiveType -> + when (primitiveType) { + TaglessEncoding.FLEX_SYM -> buffer.writeFlexSym(id) + else -> throw IonException("Cannot write a symbol when the macro signature requires $primitiveType.") + } + } + ) + } + + override fun writeSymbol(text: CharSequence) = writeTaggedOrTaglessScalar( + taggedEncoder = { writeSymbolValue(buffer, utf8StringEncoder.encode(text.toString())) }, + taglessEncoder = { primitiveType -> + when (primitiveType) { + TaglessEncoding.FLEX_SYM -> buffer.writeFlexSym(utf8StringEncoder.encode(text.toString())) + else -> throw IonException("Cannot write a symbol when the macro signature requires $primitiveType.") + } + } + ) + + override fun writeSymbol(symbol: SystemSymbols_1_1) = writeScalar { + buffer.writeByte(OpCodes.SYSTEM_SYMBOL) + buffer.writeByte(symbol.id.toByte()) + 2 + } + + override fun writeString(value: CharSequence) = writeScalar { writeStringValue(buffer, utf8StringEncoder.encode(value.toString())) } + + override fun writeBlob(value: ByteArray, start: Int, length: Int) = writeScalar { writeBlobValue(buffer, value, start, length) } + + override fun writeClob(value: ByteArray, start: Int, length: Int) = writeScalar { writeClobValue(buffer, value, start, length) } + + override fun stepInList(usingLengthPrefix: Boolean) { + openValue { + currentContainer = containerStack.push { it.reset(LIST, buffer.position(), usingLengthPrefix) } + if (usingLengthPrefix) { + buffer.writeByte(OpCodes.VARIABLE_LENGTH_LIST) + buffer.reserve(lengthPrefixPreallocation) + } else { + buffer.writeByte(OpCodes.DELIMITED_LIST) + } + } + } + + override fun stepInSExp(usingLengthPrefix: Boolean) { + openValue { + currentContainer = containerStack.push { it.reset(SEXP, buffer.position(), usingLengthPrefix) } + if (usingLengthPrefix) { + buffer.writeByte(OpCodes.VARIABLE_LENGTH_SEXP) + buffer.reserve(lengthPrefixPreallocation) + } else { + buffer.writeByte(OpCodes.DELIMITED_SEXP) + } + } + } + + override fun stepInStruct(usingLengthPrefix: Boolean) { + openValue { + currentContainer = containerStack.push { it.reset(STRUCT, buffer.position(), usingLengthPrefix) } + if (usingLengthPrefix) { + buffer.writeByte(OpCodes.VARIABLE_LENGTH_STRUCT_WITH_SIDS) + buffer.reserve(lengthPrefixPreallocation) + } else { + buffer.writeByte(OpCodes.DELIMITED_STRUCT) + currentContainer.usesFlexSym = true + } + } + } + + override fun stepInEExp(name: CharSequence) { + throw UnsupportedOperationException("Binary writer requires macros to be invoked by their ID.") + } + + // Void can be written as an empty expression group. + override fun stepInEExp(id: Int, usingLengthPrefix: Boolean, macro: Macro) { + // Length-prefixed e-expression format: + // F5 + // Non-length-prefixed e-expression format: + //

+ confirm(numAnnotations == 0) { "Cannot annotate an E-Expression" } + + if (currentContainer.type == STRUCT && !hasFieldName) { + if (!currentContainer.usesFlexSym) switchCurrentStructToFlexSym() + buffer.writeByte(FlexInt.ZERO) + currentContainer.length++ + } + + currentContainer = containerStack.push { it.reset(EEXP, buffer.position(), usingLengthPrefix) } + + if (usingLengthPrefix) { + buffer.writeByte(OpCodes.LENGTH_PREFIXED_MACRO_INVOCATION) + currentContainer.metadataOffset += buffer.writeFlexUInt(id) + buffer.reserve(lengthPrefixPreallocation) + } else { + if (id < 64) { + buffer.writeByte(id.toByte()) + } else if (id < 4160) { + val biasedId = id - 64 + val lowNibble = biasedId / 256 + val adjustedId = biasedId % 256L + buffer.writeByte((OpCodes.BIASED_E_EXPRESSION_ONE_BYTE_FIXED_INT + lowNibble).toByte()) + currentContainer.metadataOffset += buffer.writeFixedUInt(adjustedId) + } else if (id < 1_052_736) { + val biasedId = id - 4160 + val lowNibble = biasedId / (256 * 256) + val adjustedId = biasedId % (256 * 256L) + buffer.writeByte((OpCodes.BIASED_E_EXPRESSION_TWO_BYTE_FIXED_INT + lowNibble).toByte()) + currentContainer.metadataOffset += buffer.writeFixedIntOrUInt(adjustedId, 2) + } else { + buffer.writeByte(OpCodes.E_EXPRESSION_WITH_FLEX_UINT_ADDRESS) + currentContainer.metadataOffset += buffer.writeFlexUInt(id) + } + } + + val presenceBits = presenceBitmapStack.push { it.initialize(macro.signature) } + if (presenceBits.byteSize > 0) { + // Reserve for presence bits + buffer.reserve(presenceBits.byteSize) + currentContainer.length += presenceBits.byteSize + } + + // No need to clear any of the annotation fields because we already asserted that there are no annotations + hasFieldName = false + } + + override fun stepInEExp(systemMacro: SystemMacro) { + confirm(numAnnotations == 0) { "Cannot annotate an E-Expression" } + + if (currentContainer.type == STRUCT && !hasFieldName) { + // This allows the e-expression to be written in field-name position. + // TODO: Confirm that this is still in the spec. + if (!currentContainer.usesFlexSym) switchCurrentStructToFlexSym() + buffer.writeByte(FlexInt.ZERO) + currentContainer.length++ + } + + currentContainer = containerStack.push { it.reset(EEXP, buffer.position(), isLengthPrefixed = false) } + + buffer.writeByte(OpCodes.SYSTEM_MACRO_INVOCATION) + buffer.writeByte(systemMacro.id) + currentContainer.metadataOffset += 1 // to account for the macro ID. + + val presenceBits = presenceBitmapStack.push { it.initialize(systemMacro.signature) } + if (presenceBits.byteSize > 0) { + // Reserve for presence bits + buffer.reserve(presenceBits.byteSize) + currentContainer.length += presenceBits.byteSize + } + + // No need to clear any of the annotation fields because we already asserted that there are no annotations + hasFieldName = false + } + + override fun stepInExpressionGroup(usingLengthPrefix: Boolean) { + confirm(numAnnotations == 0) { "Cannot annotate an expression group" } + confirm(currentContainer.type == EEXP) { "Can only create an expression group in a macro invocation" } + + val encoding = presenceBitmapStack.peek().signature[currentContainer.numChildren].type + + currentContainer = containerStack.push { it.reset(EXPR_GROUP, buffer.position(), usingLengthPrefix, metadataOffset = 0) } + currentContainer.taglessEncodingKind = encoding.taglessEncodingKind + + if (encoding.taglessEncodingKind != null) { + // Tagless groups always need a length (although it is actually the count of expressions in the group) + buffer.reserve(maxOf(1, lengthPrefixPreallocation)) + } else if (usingLengthPrefix) { + // Reserve length prefix for a tagged expression group + buffer.reserve(maxOf(1, lengthPrefixPreallocation)) + } else { + // At the start of a tagged expression group, signals that it is delimited. + buffer.writeByte(FlexInt.ZERO) + currentContainer.length++ + } + // No need to clear any of the annotation fields because we already asserted that there are no annotations + } + + /** + * Continues the current expression group. In most cases, this is a no-op. When in a tagless, delimited + * expression group, this finished the current "segment" of the expression group and starts a new segment. + * If the current segment is empty, this does nothing. + * + * TODO: Determine whether this should be called by the managed writer, or if some continuation strategy + * should be configured in this class. + */ + fun continueExpressionGroup() { + confirm(currentContainer.type == EXPR_GROUP) { "Can only call this method when directly in an expression group." } + val primitiveType = currentContainer.taglessEncodingKind + if (!currentContainer.isLengthPrefixed && primitiveType != null && currentContainer.length > 0) { + var thisContainerTotalLength = currentContainer.length + val thisContainerNumChildren = currentContainer.numChildren + thisContainerTotalLength += writeCurrentContainerLength(lengthPrefixPreallocation) + containerStack.pop() + containerStack.peek().length += thisContainerTotalLength + currentContainer = containerStack.push { it.reset(EXPR_GROUP, buffer.position(), isLengthPrefixed = false, metadataOffset = 0) } + currentContainer.taglessEncodingKind = primitiveType + // Carry over numChildren into the next segment (but not length) + currentContainer.numChildren = thisContainerNumChildren + // Reserve for the next pre-allocation + buffer.reserve(1) + } + } + + override fun stepOut() { + confirm(!hasFieldName) { "Cannot step out with dangling field name." } + confirm(numAnnotations == 0) { "Cannot step out with dangling annotations." } + + // The length of the current container. By the end of this method, the total must include + // any opcodes, length prefixes, or other data that is not counted in ContainerInfo.length + var thisContainerTotalLength: Long = currentContainer.length + + // currentContainer.type is non-null for any initialized ContainerInfo + when (currentContainer.type.assumeNotNull()) { + LIST, SEXP, STRUCT -> { + // Add one byte to account for the op code + thisContainerTotalLength++ + // Write closing delimiter if we're in a delimited container. + // Update length prefix if we're in a prefixed container. + if (currentContainer.isLengthPrefixed) { + val contentLength = currentContainer.length + if (contentLength <= 0xF && !currentContainer.usesFlexSym) { + // TODO: Right now, this is skipped if we switch to FlexSym after starting a struct + // because we have no way to differentiate a struct that started as FlexSym + // from a struct that switched to FlexSym. + // Clean up any unused space that was pre-allocated. + buffer.shiftBytesLeft(currentContainer.length.toInt(), lengthPrefixPreallocation) + val zeroLengthOpCode = when (currentContainer.type) { + LIST -> OpCodes.LIST_ZERO_LENGTH + SEXP -> OpCodes.SEXP_ZERO_LENGTH + STRUCT -> OpCodes.STRUCT_SID_ZERO_LENGTH + else -> TODO("Unreachable") + } + buffer.writeByteAt(currentContainer.position, zeroLengthOpCode + contentLength) + } else { + thisContainerTotalLength += writeCurrentContainerLength(lengthPrefixPreallocation) + } + } else { + if (isInStruct()) { + // Need a 0 FlexInt before the end delimiter + buffer.writeByte(FlexInt.ZERO) + thisContainerTotalLength += 1 + } + thisContainerTotalLength += 1 // For the end marker + buffer.writeByte(OpCodes.DELIMITED_END_MARKER) + } + } + EEXP -> { + // Add to account for the opcode and/or address + thisContainerTotalLength += currentContainer.metadataOffset + + val presenceBitmap = presenceBitmapStack.pop() + val requiresWritingPresenceBits = presenceBitmap.byteSize > 0 + val presenceBitmapPosition = if (currentContainer.isLengthPrefixed) { + // TODO: If the length is 0, see if we can go back and rewrite this as a non-length-prefixed e-exp + thisContainerTotalLength += writeCurrentContainerLength(lengthPrefixPreallocation) + currentContainer.position + currentContainer.metadataOffset + lengthPrefixPreallocation + } else { + currentContainer.position + currentContainer.metadataOffset + } + + if (requiresWritingPresenceBits) { + presenceBitmap.writeTo(buffer, presenceBitmapPosition) + } + } + EXPR_GROUP -> { + val isTagless = currentContainer.taglessEncodingKind != null + // TODO: Consider whether we can rewrite groups that have only one expression as a single expression + + // Elide empty containers if we're going to be writing a presence bitmap + if (currentContainer.numChildren == 0 && presenceBitmapStack.peek().byteSize > 0) { + // NOTE: This check is safe after calling `continueExpressionGroup` because that function + // only resets the container length, not the number of children. + + // It is not always safe to truncate like this without clearing the patch points for the + // truncated part of the buffer. However, it is safe to do so here because we can only get to + // this particular branch if this expression group is empty, ergo it contains no patch points. + buffer.truncate(currentContainer.position) + thisContainerTotalLength = 0 + } else if (isTagless && currentContainer.length == 0L) { + // If we've called `continueExpressionGroup` and then `stepOut` without adding any more items... + buffer.truncate(currentContainer.position) + buffer.writeByte(FlexInt.ZERO) + thisContainerTotalLength++ + } else if (isTagless) { + // End tagless group -- write the number of expressions, end with FlexUInt 0 + thisContainerTotalLength += writeCurrentContainerLength(maxOf(1, lengthPrefixPreallocation)) + buffer.writeByte(FlexInt.ZERO) + thisContainerTotalLength++ + } else if (currentContainer.isLengthPrefixed) { + // Length-prefixed, tagged -- write the number of bytes + thisContainerTotalLength += writeCurrentContainerLength(maxOf(1, lengthPrefixPreallocation)) + } else { + // Delimited, tagged -- start with `01` end with `F0` + buffer.writeByte(OpCodes.DELIMITED_END_MARKER) + thisContainerTotalLength++ + } + } + ANNOTATIONS -> TODO("Unreachable.") + TOP -> throw IonException("Nothing to step out of.") + } + + // Set the new current container + val justExitedContainer = containerStack.pop() + currentContainer = containerStack.peek() + + if (currentContainer.type == EEXP) { + val signature = presenceBitmapStack.peek().signature + if (currentContainer.numChildren >= signature.size) throw IllegalArgumentException("Too many arguments for macro with signature $signature") + presenceBitmapStack.peek()[currentContainer.numChildren] = when (justExitedContainer.type) { + LIST, SEXP, STRUCT, EEXP -> PresenceBitmap.EXPRESSION + EXPR_GROUP -> if (thisContainerTotalLength == 0L) PresenceBitmap.VOID else PresenceBitmap.GROUP + else -> TODO("Unreachable") + } + } + + // Update the length of the new current container to include the length of the container that we just stepped out of. + currentContainer.length += thisContainerTotalLength + currentContainer.numChildren++ + } + + /** + * Writes the length of the current container and returns the number of bytes needed to do so. + * Transparently handles PatchPoints as necessary. + * + * @param numPreAllocatedLengthPrefixBytes the number of bytes that were pre-allocated for the length prefix of the + * current container. + */ + private fun writeCurrentContainerLength(numPreAllocatedLengthPrefixBytes: Int): Int { + val lengthToWrite = currentContainer.length + val lengthPosition = currentContainer.position + currentContainer.metadataOffset + val lengthPrefixBytesRequired = FlexInt.flexUIntLength(lengthToWrite) + if (lengthPrefixBytesRequired == numPreAllocatedLengthPrefixBytes) { + // We have enough space, so write in the correct length. + buffer.writeFlexIntOrUIntAt(lengthPosition, lengthToWrite, lengthPrefixBytesRequired) + } else { + addPatchPointsToStack() + // All ContainerInfos are in the stack, so we know that its patchPoint is non-null. + currentContainer.patchPoint.assumeNotNull().apply { + oldPosition = lengthPosition + oldLength = numPreAllocatedLengthPrefixBytes + length = lengthToWrite + } + } + return lengthPrefixBytesRequired + } + + private fun addPatchPointsToStack() { + // TODO: We may be able to improve this by skipping patch points on ancestors that are delimited containers, + // since the patch points for delimited containers will go unused anyway. However, the additional branching + // may negate the effect of any reduction in allocations. + + // If we're adding a patch point we first need to ensure that all of our ancestors (containing values) already + // have a patch point. No container can be smaller than the contents, so all outer layers also require patches. + // Instead of allocating iterator, we share one iterator instance within the scope of the container stack and + // reset the cursor every time we track back to the ancestors. + val stackIterator: ListIterator = containerStack.iterator() + // Walk down the stack until we find an ancestor which already has a patch point + while (stackIterator.hasNext() && stackIterator.next().patchPoint == null); + + // The iterator cursor is now positioned on an ancestor container that has a patch point + // Ascend back up the stack, fixing the ancestors which need a patch point assigned before us + while (stackIterator.hasPrevious()) { + val ancestor = stackIterator.previous() + if (ancestor.patchPoint == null) { + ancestor.patchPoint = patchPoints.pushAndGet { it.clear() } + } + } + } + + override fun writeMacroParameterCardinality(cardinality: Macro.ParameterCardinality) { + // TODO: Write as a system symbol + writeSymbol(cardinality.sigil.toString()) + } + + override fun stepInTdlMacroInvocation(macroRef: Int) { + stepInSExp(usingLengthPrefix = false) + writeSymbol(".") + writeInt(macroRef.toLong()) + } + + override fun stepInTdlMacroInvocation(macroRef: String) { + stepInSExp(usingLengthPrefix = false) + writeSymbol(".") + writeSymbol(macroRef) + } + + override fun stepInTdlSystemMacroInvocation(systemSymbol: SystemSymbols_1_1) { + stepInSExp(usingLengthPrefix = false) + writeSymbol(".") + writeAnnotations(SystemSymbols_1_1.ION) + writeSymbol(systemSymbol) + } + + override fun writeTdlVariableExpansion(variableName: String) { + stepInSExp(usingLengthPrefix = false) + writeSymbol("%") + writeSymbol(variableName) + stepOut() + } + + override fun stepInTdlExpressionGroup() { + stepInSExp(usingLengthPrefix = false) + // TODO: Write as a system symbol + writeSymbol("..") + } +} diff --git a/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java b/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java index 8df501b349..14305e3830 100644 --- a/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java +++ b/src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java @@ -1,39 +1,150 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl.bin; +import com.amazon.ion.Timestamp; +import com.amazon.ion._private.SuppressFBWarnings; +import com.amazon.ion.impl.SystemSymbols_1_1; + /** * Contains constants (other than OpCodes) which are generally applicable to both reading and writing binary Ion 1.1 */ public class Ion_1_1_Constants { private Ion_1_1_Constants() {} + public static final String TDL_MACRO_INVOCATION_SIGIL = "."; + public static final String TDL_EXPRESSION_GROUP_SIGIL = ".."; + public static final String TDL_VARIABLE_EXPANSION_SIGIL = "%"; + + // When writing system symbols (or $0) in a flex sym, the SID must be offset to + // avoid clashing with E-Expression op codes. + public static final int FLEX_SYM_SYSTEM_SYMBOL_OFFSET = 0x60; + public static final int FLEX_SYM_MAX_SYSTEM_SYMBOL = 0x60 + SystemSymbols_1_1.size(); + static final int FIRST_2_BYTE_SYMBOL_ADDRESS = 256; static final int FIRST_MANY_BYTE_SYMBOL_ADDRESS = 65792; + static final byte SID_TO_FLEX_SYM_SWITCH_MARKER = FlexInt.ZERO; + + public static final int MAX_NANOSECONDS = 999999999; + public static final int NANOSECOND_SCALE = 9; + public static final int MAX_MICROSECONDS = 999999; + public static final int MICROSECOND_SCALE = 6; + public static final int MAX_MILLISECONDS = 999; + public static final int MILLISECOND_SCALE = 3; + + //////// Special Float Value Constants //////// + + public static final int FLOAT_16_NEGATIVE_ZERO_BITS = 0x8000; + public static final int FLOAT_32_NEGATIVE_ZERO_BITS = 0x80000000; + //////// Timestamp Field Constants //////// // S_TIMESTAMP_* is applicable to all short-form timestamps - static final int S_TIMESTAMP_MONTH_BIT_OFFSET = 7; - static final int S_TIMESTAMP_DAY_BIT_OFFSET = 11; - static final int S_TIMESTAMP_HOUR_BIT_OFFSET = 16; - static final int S_TIMESTAMP_MINUTE_BIT_OFFSET = 21; + public static final int S_TIMESTAMP_YEAR_BIAS = 1970; + public static final int S_TIMESTAMP_MONTH_BIT_OFFSET = 7; + public static final int S_TIMESTAMP_DAY_BIT_OFFSET = 11; + public static final int S_TIMESTAMP_HOUR_BIT_OFFSET = 16; + public static final int S_TIMESTAMP_MINUTE_BIT_OFFSET = 21; // S_U_TIMESTAMP_* is applicable to all short-form timestamps with a `U` bit - static final int S_U_TIMESTAMP_UTC_FLAG = 1 << 27; - static final int S_U_TIMESTAMP_SECOND_BIT_OFFSET = 28; - static final int S_U_TIMESTAMP_FRACTION_BIT_OFFSET = 34; + public static final int S_U_TIMESTAMP_NANOSECOND_LOWER_NIBBLE = 0x7; + public static final int S_U_TIMESTAMP_MICROSECOND_LOWER_NIBBLE = 0x6; + public static final int S_U_TIMESTAMP_MILLISECOND_LOWER_NIBBLE = 0x5; + public static final int S_U_TIMESTAMP_UTC_FLAG = 1 << 27; + public static final int S_U_TIMESTAMP_SECOND_BIT_OFFSET = 28; + public static final int S_U_TIMESTAMP_FRACTION_BIT_OFFSET = 34; // S_O_TIMESTAMP_* is applicable to all short-form timestamps with `o` (offset) bits - static final int S_O_TIMESTAMP_OFFSET_BIT_OFFSET = 27; - static final int S_O_TIMESTAMP_SECOND_BIT_OFFSET = 34; + public static final int S_O_TIMESTAMP_NANOSECOND_LOWER_NIBBLE = 0xC; + public static final int S_O_TIMESTAMP_MICROSECOND_LOWER_NIBBLE = 0xB; + public static final int S_O_TIMESTAMP_MILLISECOND_LOWER_NIBBLE = 0xA; + public static final int S_O_TIMESTAMP_MINUTE_LOWER_NIBBLE = 0x8; + public static final int S_O_TIMESTAMP_OFFSET_BIT_OFFSET = 27; + public static final int S_O_TIMESTAMP_SECOND_BIT_OFFSET = 34; + public static final int S_O_TIMESTAMP_FRACTION_BIT_OFFSET = 40; + public static final int S_O_TIMESTAMP_NANOSECOND_BITS_IN_EIGHTH_BYTE = 24; + + // Explicit offsets are encoded in increments of 15 minutes, from -56. + public static final int S_O_TIMESTAMP_OFFSET_BIAS = 56; + public static final int S_O_TIMESTAMP_OFFSET_INCREMENT = 15; // L_TIMESTAMP_* is applicable to all long-form timestamps - static final int L_TIMESTAMP_MONTH_BIT_OFFSET = 14; - static final int L_TIMESTAMP_DAY_BIT_OFFSET = 18; - static final int L_TIMESTAMP_HOUR_BIT_OFFSET = 23; - static final int L_TIMESTAMP_MINUTE_BIT_OFFSET = 28; - static final int L_TIMESTAMP_OFFSET_BIT_OFFSET = 34; - static final int L_TIMESTAMP_SECOND_BIT_OFFSET = 46; - static final int L_TIMESTAMP_UNKNOWN_OFFSET_VALUE = 0b111111111111; + public static final int L_TIMESTAMP_MONTH_BIT_OFFSET = 14; + public static final int L_TIMESTAMP_DAY_BIT_OFFSET = 18; + public static final int L_TIMESTAMP_HOUR_BIT_OFFSET = 23; + public static final int L_TIMESTAMP_MINUTE_BIT_OFFSET = 28; + public static final int L_TIMESTAMP_OFFSET_BIT_OFFSET = 34; + public static final int L_TIMESTAMP_SECOND_BIT_OFFSET = 46; + public static final int L_TIMESTAMP_UNKNOWN_OFFSET_VALUE = 0b111111111111; + public static final int L_TIMESTAMP_SECOND_BYTE_LENGTH = 7; + public static final int L_TIMESTAMP_MINUTE_BYTE_LENGTH = 6; + public static final int L_TIMESTAMP_DAY_OR_MONTH_BYTE_LENGTH = 3; + public static final int L_TIMESTAMP_YEAR_BYTE_LENGTH = 2; + public static final int L_TIMESTAMP_OFFSET_BIAS = 1440; // 24 hours * 60 min/hour + + //////// Lookup tables //////// + @SuppressFBWarnings("MS_MUTABLE_ARRAY") + public static final Timestamp.Precision[] S_TIMESTAMP_PRECISION_FOR_TYPE_ID_OFFSET = new Timestamp.Precision[] { + Timestamp.Precision.YEAR, // 0x70 + Timestamp.Precision.MONTH, // 0x71 + Timestamp.Precision.DAY, // 0x72 + Timestamp.Precision.MINUTE, // 0x73 (minute UTC) + Timestamp.Precision.SECOND, // 0x74 (second UTC) + Timestamp.Precision.SECOND, // 0x75 (millisecond UTC) + Timestamp.Precision.SECOND, // 0x76 (microsecond UTC) + Timestamp.Precision.SECOND, // 0x77 (nanosecond UTC) + Timestamp.Precision.MINUTE, // 0x78 (minute offset) + Timestamp.Precision.SECOND, // 0x79 (second offset) + Timestamp.Precision.SECOND, // 0x7A (millisecond offset) + Timestamp.Precision.SECOND, // 0x7B (microsecond offset) + Timestamp.Precision.SECOND, // 0x7C (nanosecond offset) + }; + + @SuppressFBWarnings("MS_MUTABLE_ARRAY") + public static final Timestamp.Precision[] L_TIMESTAMP_PRECISION_FOR_LENGTH = new Timestamp.Precision[] { + null, // Length 0: illegal + null, // Length 1: illegal + Timestamp.Precision.YEAR, + null, // Length 3: Month or Day; additional examination required. + null, // Length 4: illegal + null, // Length 5: illegal + Timestamp.Precision.MINUTE, + Timestamp.Precision.SECOND + }; //////// Bit masks //////// - static final long LEAST_SIGNIFICANT_7_BITS = 0b01111111L; + public static final int FOUR_BIT_MASK = 0xF; + public static final int FIVE_BIT_MASK = 0x1F; + public static final int SIX_BIT_MASK = 0x3F; + public static final int SEVEN_BIT_MASK = 0x7F; + public static final int TEN_BIT_MASK = 0x3FF; + public static final int TWELVE_BIT_MASK = 0xFFF; + public static final int FOURTEEN_BIT_MASK = 0x3FFF; + public static final int TWENTY_BIT_MASK = 0xFFFFF; + public static final int TWENTY_FOUR_BIT_MASK = 0xFFFFFF; + public static final int THIRTY_BIT_MASK = 0x3FFFFFFF; + + public static final long L_TIMESTAMP_SECOND_MASK = (long) SIX_BIT_MASK << L_TIMESTAMP_SECOND_BIT_OFFSET; + public static final long L_TIMESTAMP_OFFSET_MASK = (long) TWELVE_BIT_MASK << L_TIMESTAMP_OFFSET_BIT_OFFSET; + public static final long L_TIMESTAMP_MINUTE_MASK = (long) SIX_BIT_MASK << L_TIMESTAMP_MINUTE_BIT_OFFSET; + public static final int L_TIMESTAMP_HOUR_MASK = FIVE_BIT_MASK << L_TIMESTAMP_HOUR_BIT_OFFSET; + public static final int L_TIMESTAMP_DAY_MASK = FIVE_BIT_MASK << L_TIMESTAMP_DAY_BIT_OFFSET; + public static final int L_TIMESTAMP_MONTH_MASK = FOUR_BIT_MASK << L_TIMESTAMP_MONTH_BIT_OFFSET; + public static final int L_TIMESTAMP_YEAR_MASK = FOURTEEN_BIT_MASK; + + public static final long S_O_TIMESTAMP_NANOSECOND_EIGHTH_BYTE_MASK = (long) TWENTY_FOUR_BIT_MASK << S_O_TIMESTAMP_FRACTION_BIT_OFFSET; + public static final long S_O_TIMESTAMP_NANOSECOND_NINTH_BYTE_MASK = SIX_BIT_MASK; + public static final long S_U_TIMESTAMP_NANOSECOND_MASK = (long) THIRTY_BIT_MASK << S_U_TIMESTAMP_FRACTION_BIT_OFFSET; + public static final long S_O_TIMESTAMP_MICROSECOND_MASK = (long) TWENTY_BIT_MASK << S_O_TIMESTAMP_FRACTION_BIT_OFFSET; + public static final long S_U_TIMESTAMP_MICROSECOND_MASK = (long) TWENTY_BIT_MASK << S_U_TIMESTAMP_FRACTION_BIT_OFFSET; + public static final long S_O_TIMESTAMP_MILLISECOND_MASK = (long) TEN_BIT_MASK << S_O_TIMESTAMP_FRACTION_BIT_OFFSET; + public static final long S_U_TIMESTAMP_MILLISECOND_MASK = (long) TEN_BIT_MASK << S_U_TIMESTAMP_FRACTION_BIT_OFFSET; + public static final long S_O_TIMESTAMP_SECOND_MASK = (long) SIX_BIT_MASK << S_O_TIMESTAMP_SECOND_BIT_OFFSET; + public static final long S_U_TIMESTAMP_SECOND_MASK = (long) SIX_BIT_MASK << S_U_TIMESTAMP_SECOND_BIT_OFFSET; + public static final long S_O_TIMESTAMP_OFFSET_MASK = (long) SEVEN_BIT_MASK << S_O_TIMESTAMP_OFFSET_BIT_OFFSET; + public static final int S_TIMESTAMP_MINUTE_MASK = SIX_BIT_MASK << S_TIMESTAMP_MINUTE_BIT_OFFSET; + public static final int S_TIMESTAMP_HOUR_MASK = FIVE_BIT_MASK << S_TIMESTAMP_HOUR_BIT_OFFSET; + public static final int S_TIMESTAMP_DAY_MASK = FIVE_BIT_MASK << S_TIMESTAMP_DAY_BIT_OFFSET; + public static final int S_TIMESTAMP_MONTH_MASK = FOUR_BIT_MASK << S_TIMESTAMP_MONTH_BIT_OFFSET; + public static final int S_TIMESTAMP_YEAR_MASK = SEVEN_BIT_MASK; } diff --git a/src/main/java/com/amazon/ion/impl/bin/LengthPrefixStrategy.kt b/src/main/java/com/amazon/ion/impl/bin/LengthPrefixStrategy.kt new file mode 100644 index 0000000000..c760d68faf --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/bin/LengthPrefixStrategy.kt @@ -0,0 +1,41 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +/** + * TODO: Proper documentation. + * + * See [SymbolInliningStrategy] for a similar strategy interface. + */ +fun interface LengthPrefixStrategy { + /** + * Indicates whether a container should be written using a length prefix. + * + * TODO: See if we can add other context, such as annotations that are going to be added to this container, + * the field name (if this container is in a struct), or the delimited/prefixed status of the parent + * container. + * + * With more context, we could enable strategies like: + * - Write lists with annotation `X` as a delimited container. + */ + fun writeLengthPrefix(containerType: ContainerType, depth: Int): Boolean + + companion object { + @JvmField + val NEVER_PREFIXED = LengthPrefixStrategy { _, _ -> false } + @JvmField + val ALWAYS_PREFIXED = LengthPrefixStrategy { _, _ -> true } + } + + enum class ContainerType { + LIST, + STRUCT, + SEXP, + /** + * These are only containers at an encoding/syntax level. + * There isn't really a "delimited" option for macros, but there is a length-prefix option. + */ + EEXP, + EXPRESSION_GROUP, + } +} diff --git a/src/main/java/com/amazon/ion/impl/bin/ManagedWriterOptions_1_1.kt b/src/main/java/com/amazon/ion/impl/bin/ManagedWriterOptions_1_1.kt new file mode 100644 index 0000000000..4c940f4fe5 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/bin/ManagedWriterOptions_1_1.kt @@ -0,0 +1,34 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +/** + * Options that are specific to Ion 1.1 and handled in the managed writer. + * These are (mostly) generalizable to both text and binary. + * + * TODO: data classes cannot be changed in a backward compatible way because of the auto-generated `copy` method. + * See if we can get away with using a non-"data" class here, but if not then replace this with a public + * interface, a public builder, and a private/internal implementation class. + */ +data class ManagedWriterOptions_1_1( + /** + * Whether the symbols in the encoding directive should be interned or not. + * For binary, almost certainly want this to be true, and for text, it's + * more readable if it's false. + */ + val internEncodingDirectiveSymbols: Boolean, + val invokeTdlMacrosByName: Boolean, + val symbolInliningStrategy: SymbolInliningStrategy, + val lengthPrefixStrategy: LengthPrefixStrategy, + val eExpressionIdentifierStrategy: EExpressionIdentifierStrategy, +) : SymbolInliningStrategy by symbolInliningStrategy, LengthPrefixStrategy by lengthPrefixStrategy { + + /** + * Indicates whether e-expressions should be written using macro + * names or macro addresses (when a choice is available). + */ + enum class EExpressionIdentifierStrategy { + BY_NAME, + BY_ADDRESS, + } +} diff --git a/src/main/java/com/amazon/ion/impl/bin/OpCodes.java b/src/main/java/com/amazon/ion/impl/bin/OpCodes.java index a84c4bc853..93b28ea2e1 100644 --- a/src/main/java/com/amazon/ion/impl/bin/OpCodes.java +++ b/src/main/java/com/amazon/ion/impl/bin/OpCodes.java @@ -1,3 +1,5 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl.bin; /** @@ -6,38 +8,51 @@ public class OpCodes { private OpCodes() {} - public static final byte INTEGER_ZERO_LENGTH = 0x50; - // 0x51-0x58 are additional lengths of integers. - // 0x59 Reserved - public static final byte FLOAT_ZERO_LENGTH = 0x5A; - public static final byte FLOAT_16 = 0x5B; - public static final byte FLOAT_32 = 0x5C; - public static final byte FLOAT_64 = 0x5D; - public static final byte BOOLEAN_TRUE = 0x5E; - public static final byte BOOLEAN_FALSE = 0x5F; + public static final byte BIASED_E_EXPRESSION_ONE_BYTE_FIXED_INT = 0x40; + public static final byte BIASED_E_EXPRESSION_TWO_BYTE_FIXED_INT = 0x50; - public static final byte DECIMAL_ZERO_LENGTH = 0x60; - // 0x61-0x6E are additional lengths of decimals. - public static final byte POSITIVE_ZERO_DECIMAL = 0x6F; + public static final byte INTEGER_ZERO_LENGTH = 0x60; + // 0x61-0x68 are additional lengths of integers. + // 0x69 Reserved + public static final byte FLOAT_ZERO_LENGTH = 0x6A; + public static final byte FLOAT_16 = 0x6B; + public static final byte FLOAT_32 = 0x6C; + public static final byte FLOAT_64 = 0x6D; + public static final byte BOOLEAN_TRUE = 0x6E; + public static final byte BOOLEAN_FALSE = 0x6F; - public static final byte TIMESTAMP_YEAR_PRECISION = 0x70; - public static final byte TIMESTAMP_MONTH_PRECISION = 0x71; - public static final byte TIMESTAMP_DAY_PRECISION = 0x72; - public static final byte TIMESTAMP_MINUTE_PRECISION = 0x73; - public static final byte TIMESTAMP_SECOND_PRECISION = 0x74; - public static final byte TIMESTAMP_MILLIS_PRECISION = 0x75; - public static final byte TIMESTAMP_MICROS_PRECISION = 0x76; - public static final byte TIMESTAMP_NANOS_PRECISION = 0x77; - public static final byte TIMESTAMP_MINUTE_PRECISION_WITH_OFFSET = 0x78; - public static final byte TIMESTAMP_SECOND_PRECISION_WITH_OFFSET = 0x79; - public static final byte TIMESTAMP_MILLIS_PRECISION_WITH_OFFSET = 0x7A; - public static final byte TIMESTAMP_MICROS_PRECISION_WITH_OFFSET = 0x7B; - public static final byte TIMESTAMP_NANOS_PRECISION_WITH_OFFSET = 0x7C; - // 0x7D-0x7F Reserved + public static final byte DECIMAL_ZERO_LENGTH = 0x70; - public static final byte STRING_ZERO_LENGTH = (byte) 0x80; + public static final byte TIMESTAMP_YEAR_PRECISION = (byte) 0x80; + public static final byte TIMESTAMP_MONTH_PRECISION = (byte) 0x81; + public static final byte TIMESTAMP_DAY_PRECISION = (byte) 0x82; + public static final byte TIMESTAMP_MINUTE_PRECISION = (byte) 0x83; + public static final byte TIMESTAMP_SECOND_PRECISION = (byte) 0x84; + public static final byte TIMESTAMP_MILLIS_PRECISION = (byte) 0x85; + public static final byte TIMESTAMP_MICROS_PRECISION = (byte) 0x86; + public static final byte TIMESTAMP_NANOS_PRECISION = (byte) 0x87; + public static final byte TIMESTAMP_MINUTE_PRECISION_WITH_OFFSET = (byte) 0x88; + public static final byte TIMESTAMP_SECOND_PRECISION_WITH_OFFSET = (byte) 0x89; + public static final byte TIMESTAMP_MILLIS_PRECISION_WITH_OFFSET = (byte) 0x8A; + public static final byte TIMESTAMP_MICROS_PRECISION_WITH_OFFSET = (byte) 0x8B; + public static final byte TIMESTAMP_NANOS_PRECISION_WITH_OFFSET = (byte) 0x8C; + // 0x8D-0x8F Reserved - public static final byte INLINE_SYMBOL_ZERO_LENGTH = (byte) 0x90; + public static final byte STRING_ZERO_LENGTH = (byte) 0x90; + // 0x91-0x9F are additional lengths of strings. + + public static final byte INLINE_SYMBOL_ZERO_LENGTH = (byte) 0xA0; + // 0xA1-0xAF are additional lengths of symbols. + + public static final byte LIST_ZERO_LENGTH = (byte) 0xB0; + // 0xB1-0xBF are additional lengths of lists. + + public static final byte SEXP_ZERO_LENGTH = (byte) 0xC0; + // 0xC1-0xCF are additional lengths of sexps. + + public static final byte STRUCT_SID_ZERO_LENGTH = (byte) 0xD0; + // 0xD1 Reserved + // 0xD2-0xDF are additional lengths of structs. public static final byte SYMBOL_ADDRESS_1_BYTE = (byte) 0xE1; public static final byte SYMBOL_ADDRESS_2_BYTES = (byte) 0xE2; @@ -50,15 +65,25 @@ private OpCodes() {} public static final byte ANNOTATIONS_MANY_FLEX_SYM = (byte) 0xE9; public static final byte NULL_UNTYPED = (byte) 0xEA; public static final byte NULL_TYPED = (byte) 0xEB; - // 0xEC, 0xED NOP - // 0xEE Reserved - // 0xEF System Macro Invocation + public static final byte ONE_BYTE_NOP = (byte) 0xEC; + public static final byte VARIABLE_LENGTH_NOP = (byte) 0xED; + public static final byte SYSTEM_SYMBOL = (byte) 0xEE; + public static final byte SYSTEM_MACRO_INVOCATION = (byte) 0xEF; - public static final byte VARIABLE_LENGTH_INTEGER = (byte) 0xF5; - public static final byte VARIABLE_LENGTH_DECIMAL = (byte) 0xF6; - public static final byte VARIABLE_LENGTH_TIMESTAMP = (byte) 0xF7; - public static final byte VARIABLE_LENGTH_STRING = (byte) 0xF8; - public static final byte VARIABLE_LENGTH_INLINE_SYMBOL = (byte) 0xF9; + public static final byte DELIMITED_END_MARKER = (byte) 0xF0; + public static final byte DELIMITED_LIST = (byte) 0xF1; + public static final byte DELIMITED_SEXP = (byte) 0xF2; + public static final byte DELIMITED_STRUCT = (byte) 0xF3; + public static final byte E_EXPRESSION_WITH_FLEX_UINT_ADDRESS = (byte) 0xF4; + public static final byte LENGTH_PREFIXED_MACRO_INVOCATION = (byte) 0xF5; + public static final byte VARIABLE_LENGTH_INTEGER = (byte) 0xF6; + public static final byte VARIABLE_LENGTH_DECIMAL = (byte) 0xF7; + public static final byte VARIABLE_LENGTH_TIMESTAMP = (byte) 0xF8; + public static final byte VARIABLE_LENGTH_STRING = (byte) 0xF9; + public static final byte VARIABLE_LENGTH_INLINE_SYMBOL = (byte) 0xFA; + public static final byte VARIABLE_LENGTH_LIST = (byte) 0xFB; + public static final byte VARIABLE_LENGTH_SEXP = (byte) 0xFC; + public static final byte VARIABLE_LENGTH_STRUCT_WITH_SIDS = (byte) 0xFD; public static final byte VARIABLE_LENGTH_BLOB = (byte) 0xFE; public static final byte VARIABLE_LENGTH_CLOB = (byte) 0xFF; } diff --git a/src/main/java/com/amazon/ion/impl/bin/PatchPoint.java b/src/main/java/com/amazon/ion/impl/bin/PatchPoint.java new file mode 100644 index 0000000000..8355549da1 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/bin/PatchPoint.java @@ -0,0 +1,44 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin; + +/** + * Represents a slice of bytes that need to be overwritten by a variable length, unsigned integer that is too large + * to fit into the specified slice. + */ +class PatchPoint { + /** + * position of the data being patched out. + */ + public long oldPosition; + /** + * length of the data being patched out. + */ + public int oldLength; + /** + * size of the container data or annotations. + */ + public long length; + + public PatchPoint() { + oldPosition = -1; + oldLength = -1; + length = -1; + } + + @Override + public String toString() { + return "(PP old::(" + oldPosition + " " + oldLength + ") patch::(" + length + ")"; + } + + public PatchPoint initialize(final long oldPosition, final int oldLength, final long length) { + this.oldPosition = oldPosition; + this.oldLength = oldLength; + this.length = length; + return this; + } + + public PatchPoint clear() { + return initialize(-1, -1, -1); + } +} diff --git a/src/main/java/com/amazon/ion/impl/bin/PresenceBitmap.kt b/src/main/java/com/amazon/ion/impl/bin/PresenceBitmap.kt new file mode 100644 index 0000000000..d8a27b6e2f --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/bin/PresenceBitmap.kt @@ -0,0 +1,237 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +import com.amazon.ion.* +import com.amazon.ion.impl.macro.* +import com.amazon.ion.impl.macro.Macro.* + +/** + * Utility class for setting, storing, reading, and writing presence bits. + * + * This class provides an API that maps 1:1 with parameters, with a maximum of 128 parameters. + * + * ### Usage – Binary Writer + * When stepping into an E-Expression, obtain a [PresenceBitmap] instance, [initialize] using the macro signature, and + * then reserve the correct number of bytes (see [byteSize]) to later encode the presence bits. + * While in the E-Expression, track the number of expressions or expression groups that have been written with that + * E-Expression as the immediate parent—this is the _parameter_ index. For each expression or expression group that is + * written directly in that container, call [PresenceBitmap.set] with the _parameter_ index and one of [VOID], + * [EXPRESSION], or [GROUP]. To omit an argument, callers to the binary writer will need to write an empty expression + * group (which should be elided and the corresponding presence bits set to `00`) or the binary writer must expose a + * `writeNoExpression()` method or similar. + * When stepping out of the E-Expression, use [PresenceBitmap.writeTo] to encode them into the appropriate location. + * + * ### Usage – Binary Reader + * When stepping into an E-Expression, obtain a [PresenceBitmap] instance, [initialize] using the macro signature, ensure + * that [byteSize] number of bytes is available in the reader's buffer, and call [readFrom] to populate the + * [PresenceBitmap] instance. Then, the presence bits for each parameter can be accessed by its _parameter_ index. + * + * ### Implementation Notes + * + * - We pretend that all parameters (including `!` (required) parameter) will get presence bits, and when reading we + * set the bits for the positions of the `!` parameters to `01` (single expression). + * - Since all the parameter cardinalities (other than `!`) use the same presence bit semantics, the writer doesn't + * need to inspect the signature to figure out what bits to put in our presence bits buffer. + * - Because we have dummy bits for `!` parameters, [PresenceBits] can present an API that corresponds 1:1 with + * parameters, so we don't need to separately keep track of a presence bit index and the parameter count. + * - Why longs instead of an array? + * - An array would add another level of indirection + * - An array would require a loop in order to reset all the bytes to zero. + * - Why only 128 parameters? + * - Until proven otherwise, we should not assume that an arbitrarily large number of parameters MUST be supported. + * - The number of parameters could be increased (within limits). It seems reasonable to try to keep this class small + * enough to fit in a single cache line for a modern system—typically 64 bytes. + * + * TODO: Consider whether we can "compile" a specific function that can read the presence bits when we compile a macro. + * That _might_ be more efficient than this approach. + */ +internal class PresenceBitmap { + + companion object { + const val VOID = 0b00L + const val EXPRESSION = 0b01L + const val GROUP = 0b10L + const val RESERVED = 0b11L + + private const val TWO_BIT_MASK = 0b11L + private const val PRESENCE_BITS_SIZE_THRESHOLD = 0 + private const val PB_SLOTS_PER_BYTE = 4 + private const val PB_SLOTS_PER_LONG = 32 + private const val PB_BITS_PER_SLOT = 2 + + const val MAX_SUPPORTED_PARAMETERS = PB_SLOTS_PER_LONG * 4 + } + + var signature: List = emptyList() + private set + + /** The number of parameters for which presence bits must be written. */ + private var size: Int = 0 + + /** The total number of parameters in the macro signature */ + val totalParameterCount: Int + get() = signature.size + + /** The first 32 presence bits slots */ + private var a: Long = 0 + /** The second 32 presence bits slots */ + private var b: Long = 0 + /** The third 32 presence bits slots */ + private var c: Long = 0 + /** The fourth 32 presence bits slots */ + private var d: Long = 0 + + /** The number of bytes required to encode this [PresenceBitmap] */ + val byteSize: Int + get() = size divideByRoundingUp PB_SLOTS_PER_BYTE + + /** Resets this [PresenceBitmap] for the given [macro]. */ + fun initialize(signature: List) { + if (signature.size > MAX_SUPPORTED_PARAMETERS) throw IonException("Macros with more than 128 parameters are not supported by this implementation.") + this.signature = signature + a = 0 + b = 0 + c = 0 + d = 0 + // TODO – performance: consider calculating this once for a macro when it is compiled + // Calculate the actual number of presence bits that will be encoded for the given signature. + val nonRequiredParametersCount = signature.count { it.cardinality != ParameterCardinality.ExactlyOne } + val usePresenceBits = nonRequiredParametersCount > PRESENCE_BITS_SIZE_THRESHOLD || signature.any { it.type.taglessEncodingKind != null } + size = if (usePresenceBits) nonRequiredParametersCount else 0 + } + + /** + * Checks that all presence bits are valid for their corresponding parameters. + * Throws [IonException] if any are not. + */ + fun validate() { + val parameters = signature.iterator() + var i = 0 + while (parameters.hasNext()) { + val p = parameters.next() + val v = getUnchecked(i++) + val isValid = when (p.cardinality) { + ParameterCardinality.ZeroOrOne -> v == VOID || v == EXPRESSION + ParameterCardinality.ExactlyOne -> v == EXPRESSION + ParameterCardinality.OneOrMore -> v == EXPRESSION || v == GROUP + ParameterCardinality.ZeroOrMore -> v != RESERVED + } + if (!isValid) throw IonException("Invalid argument for parameter: $p") + } + } + + /** + * Populates this [PresenceBitmap] from the given [ByteArray] that is positioned on the first + * byte that (potentially) contains presence bits. + * + * When complete, the buffer is positioned on the first byte that does not contain presence bits. + */ + fun readFrom(bytes: ByteArray, startInclusive: Int) { + // Doesn't always contain the full byte. We shift the bits over every time we read a value + // so that the next value is always the least significant bits. + var currentByte: Long = -1 + var currentPosition: Int = startInclusive + var bitmapIndex = 0 + var i = 0 + + val parameters = signature.iterator() + while (parameters.hasNext()) { + val p = parameters.next() + if (p.cardinality == ParameterCardinality.ExactlyOne) { + setUnchecked(i++, EXPRESSION) + } else { + if (bitmapIndex % PB_SLOTS_PER_BYTE == 0) { + currentByte = bytes[currentPosition++].toLong() + } + setUnchecked(i++, currentByte and TWO_BIT_MASK) + currentByte = currentByte shr PB_BITS_PER_SLOT + bitmapIndex++ + } + } + } + + /** + * Gets by _parameter_ index, which includes _required_ parameters that have no presence bits. + * The slots corresponding to a required parameter with always return [RESERVED]. + */ + operator fun get(index: Int): Long { + if (index >= totalParameterCount || index < 0) throw IndexOutOfBoundsException("$index") + return getUnchecked(index) + } + + /** Gets a presence bits "slot" without any bounds checking. See [get]. */ + private inline fun getUnchecked(index: Int): Long { + val shift = (index % PB_SLOTS_PER_LONG) * PB_BITS_PER_SLOT + when (index / PB_SLOTS_PER_LONG) { + 0 -> return (a shr shift) and TWO_BIT_MASK + 1 -> return (b shr shift) and TWO_BIT_MASK + 2 -> return (c shr shift) and TWO_BIT_MASK + 3 -> return (d shr shift) and TWO_BIT_MASK + else -> TODO("Unreachable") + } + } + + /** + * Sets a presence bits "slot" using bitwise OR with the existing contents. + * + * It is not possible to reset individual presence bits, nor + * is it possible to change the presence bits for a required parameter. + */ + operator fun set(index: Int, value: Long) { + if (index >= totalParameterCount || index < 0) throw IndexOutOfBoundsException("$index") + setUnchecked(index, value) + } + + /** Sets a presence bits "slot" without any bounds checking. See [set]. */ + private inline fun setUnchecked(index: Int, value: Long) { + val shiftedBits = (value shl ((index % PB_SLOTS_PER_LONG) * PB_BITS_PER_SLOT)) + when (index / PB_SLOTS_PER_LONG) { + 0 -> a = a or shiftedBits + 1 -> b = b or shiftedBits + 2 -> c = c or shiftedBits + 3 -> d = d or shiftedBits + } + } + + /** + * Writes this [PresenceBitmap] to [buffer] at the given [position]. + */ + fun writeTo(buffer: WriteBuffer, position: Long) { + if (size == 0) return + var resultBuffer: Long = 0 + var resultPosition = 0 + var writePosition = position + var i = 0 + val parameters = signature.iterator() + + while (parameters.hasNext()) { + val parameter = parameters.next() + val bits = getUnchecked(i++) + if (parameter.cardinality == ParameterCardinality.ExactlyOne) continue + val destShift = resultPosition * PB_BITS_PER_SLOT + resultBuffer = resultBuffer or (bits shl destShift) + resultPosition++ + if (resultPosition == PB_SLOTS_PER_LONG) { + buffer.writeFixedIntOrUIntAt(writePosition, resultBuffer, Long.SIZE_BYTES) + writePosition += Long.SIZE_BYTES + resultPosition = 0 + resultBuffer = 0 + } + } + + val numBytes = resultPosition divideByRoundingUp PB_SLOTS_PER_BYTE + if (numBytes > 0) buffer.writeFixedIntOrUIntAt(writePosition, resultBuffer, numBytes) + } + + /** + * Integer division that rounds up instead of down. + * E.g.: + * - 0/4 = 0 + * - 1/4 = 1 + * - ... + * - 4/4 = 1 + * - 5/4 = 2 + */ + private infix fun Int.divideByRoundingUp(other: Int): Int = (this + (other - 1)) / other +} diff --git a/src/main/java/com/amazon/ion/impl/bin/SymbolInliningStrategy.kt b/src/main/java/com/amazon/ion/impl/bin/SymbolInliningStrategy.kt new file mode 100644 index 0000000000..94a74b8519 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/bin/SymbolInliningStrategy.kt @@ -0,0 +1,58 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +/** + * A strategy to determine whether a SymbolToken with known text should be encoded by Symbol ID (SID) or as inline text. + * Symbols with unknown text are always be written as a SID because the text is unknown. + * + * Some possible implementation ideas: + * + * - A simple implementation could elect to inline symbols that are less than `N` characters long. + * - A domain-specific implementation could choose to inline symbols with specific prefixes. E.g. annotations starting + * with `org.example` always get written inline. + * - A stateful implementation could keep track of how often a symbol is used, and elect to write the symbol inline + * until it has been used at least `N` times. + * - A streaming-oriented implementation could keep track of the symbols that are used, and inline any symbols not + * already in the symbol table. Once a top-level value is complete, some other component could inspect the list of + * new symbols and emit a Local Symbol Table append with those symbols so that they can be interned use in future + * top-level values. + */ +fun interface SymbolInliningStrategy { + /** + * Represents the different kinds of usage of a symbol token. + */ + enum class SymbolKind { + VALUE, + FIELD_NAME, + ANNOTATION, + } + + /** + * Indicates whether a particular symbol text should be written inline (as opposed to writing as a SID). + */ + fun shouldWriteInline(symbolKind: SymbolKind, text: String): Boolean + + companion object { + /** + * A [SymbolInliningStrategy] that causes all symbols to be written as a SID, + * interning the text in the Local Symbol Table if necessary. + * + * This is equivalent to the behavior of symbols in Ion 1.0. + */ + @JvmField + val NEVER_INLINE = object : SymbolInliningStrategy { + override fun shouldWriteInline(symbolKind: SymbolKind, text: String): Boolean = false + override fun toString(): String = "NEVER_INLINE" + } + + /** + * A [SymbolInliningStrategy] that causes all symbols with known text to have their text written inline. + */ + @JvmField + val ALWAYS_INLINE = object : SymbolInliningStrategy { + override fun shouldWriteInline(symbolKind: SymbolKind, text: String): Boolean = true + override fun toString(): String = "ALWAYS_INLINE" + } + } +} diff --git a/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java b/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java index 2955147be7..2bb73772c1 100644 --- a/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java +++ b/src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java @@ -1,20 +1,10 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl.bin; +import com.amazon.ion.impl.SystemSymbols_1_1; +import com.amazon.ion.impl.bin.utf8.Utf8StringEncoder; + import java.io.Closeable; import java.io.IOException; import java.io.OutputStream; @@ -22,21 +12,32 @@ import java.util.ArrayList; import java.util.List; +import static com.amazon.ion.impl.bin.Ion_1_1_Constants.FLEX_SYM_SYSTEM_SYMBOL_OFFSET; + /** * A facade over {@link Block} management and low-level Ion encoding concerns for the {@link IonRawBinaryWriter}. + * + * The allocator must always have a block size of at least 10 bytes, otherwise writing a FlexInt or FlexUInt may result + * in an IndexOutOfBoundsException. The number 10 is chosen because it is the maximum number of bytes required to write + * a long value as a FlexInt or VarInt. */ -/*package*/ final class WriteBuffer implements Closeable +public final class WriteBuffer implements Closeable { private final BlockAllocator allocator; private final List blocks; private Block current; private int index; private Runnable endOfBlockCallBack; - + private byte[] scratch = new byte[32]; public WriteBuffer(final BlockAllocator allocator, Runnable endOfBlockCallBack) { this.allocator = allocator; + + if (allocator.getBlockSize() < 10) { + throw new IllegalArgumentException("WriteBuffer requires an allocator with a block size of at least 10."); + } + this.blocks = new ArrayList(); // initial seed of the first block @@ -90,10 +91,46 @@ public void truncate(final long position) { final int index = index(position); final int offset = offset(position); - final Block block = blocks.get(index); - this.index = index; - block.limit = offset; - current = block; + while (this.index != index) { + blocks.remove(this.index--); + } + current = blocks.get(index); + current.limit = offset; + } + + /** + * Moves forward without writing any data. + * + * There is no guarantee as to what values the reserved bytes will have. + * Only use this method if you will overwrite the bytes later with valid data, or if you have already written dato + * to these bytes. + * + * Returns the position of the first reserved byte. + */ + public long reserve(int numBytes) { + long startOfReservedBytes = position(); + // It would also fit in the current block if numBytes == current.remaining(), but then we would have to + // increment `index` and check whether to allocate a new block. So, we'll optimize the early return for the most + // common situation, and lump the == case into the slower path. + if (numBytes < current.remaining()) { + current.limit += numBytes; + return startOfReservedBytes; + } + + while (numBytes > 0) { + int numBytesInThisBlock = Math.min(current.remaining(), numBytes); + current.limit += numBytesInThisBlock; + numBytes -= numBytesInThisBlock; + + if (current.remaining() == 0) { + if (index == blocks.size() - 1) { + allocateNewBlock(); + } + index++; + current = blocks.get(index); + } + } + return startOfReservedBytes; } /** Returns the amount of capacity left in the current block. */ @@ -1227,7 +1264,7 @@ else if (magnitude < VAR_INT_6_OCTET_MIN_VALUE && remaining >= 5) public void writeVarUIntDirect1At(final long position, final long value) { - writeUInt8At(position, (value & VAR_INT_MASK) | VAR_INT_FINAL_OCTET_SIGNAL_MASK); + writeByteAt(position, (value & VAR_INT_MASK) | VAR_INT_FINAL_OCTET_SIGNAL_MASK); } private void writeVarUIntDirect2StraddlingAt(final int index, final int offset, final long value) @@ -1255,7 +1292,11 @@ public void writeVarUIntDirect2At(long position, long value) block.data[offset + 1] = (byte) ((value & VAR_INT_MASK) | VAR_INT_FINAL_OCTET_SIGNAL_MASK); } - public void writeUInt8At(final long position, final long value) + public void writeByteAt(final long position, final byte value) { + writeByteAt(position, (long) value); + } + + public void writeByteAt(final long position, final long value) { final int index = index(position); final int offset = offset(position); @@ -1277,30 +1318,25 @@ public void writeLowerNibbleAt(final long position, final long value) { block.data[offset] = (byte) (bitValue & 0xF0 | value) ; } - /** Get the length of FlexInt for the provided value. */ - public static int flexIntLength(final long value) { - int numMagnitudeBitsRequired; - if (value < 0) { - int numLeadingOnes = Long.numberOfLeadingZeros(~value); - numMagnitudeBitsRequired = 64 - numLeadingOnes; - } else { - int numLeadingZeros = Long.numberOfLeadingZeros(value); - numMagnitudeBitsRequired = 64 - numLeadingZeros; - } - return numMagnitudeBitsRequired / 7 + 1; - } - /** Writes a FlexInt to this WriteBuffer, returning the number of bytes that were needed to encode the value */ public int writeFlexInt(final long value) { - int numBytes = flexIntLength(value); - return writeFlexIntOrUInt(value, numBytes); + int numBytes = FlexInt.flexIntLength(value); + // writeFlexIntOrUIntAt does not advance index or limit, so we reserve the bytes, and then write out the number + long position = reserve(numBytes); + writeFlexIntOrUIntAt(position, value, numBytes); + return numBytes; } - /** Get the length of FlexUInt for the provided value. */ - public static int flexUIntLength(final long value) { - int numLeadingZeros = Long.numberOfLeadingZeros(value); - int numMagnitudeBitsRequired = 64 - numLeadingZeros; - return (numMagnitudeBitsRequired - 1) / 7 + 1; + /** Writes a FlexUInt to this WriteBuffer, returning the number of bytes that were needed to encode the value */ + public int writeFlexUInt(final int value) { + if (value < 0) { + throw new IllegalArgumentException("Attempted to write a FlexUInt for " + value); + } + int numBytes = FlexInt.flexUIntLength(value); + // writeFlexIntOrUIntAt does not advance index or limit, so we reserve the bytes, and then write out the number + long position = reserve(numBytes); + writeFlexIntOrUIntAt(position, value, numBytes); + return numBytes; } /** Writes a FlexUInt to this WriteBuffer, returning the number of bytes that were needed to encode the value */ @@ -1308,131 +1344,71 @@ public int writeFlexUInt(final long value) { if (value < 0) { throw new IllegalArgumentException("Attempted to write a FlexUInt for " + value); } - int numBytes = flexUIntLength(value); - return writeFlexIntOrUInt(value, numBytes); + int numBytes = FlexInt.flexUIntLength(value); + // writeFlexIntOrUIntAt does not advance index or limit, so we reserve the bytes, and then write out the number + long position = reserve(numBytes); + writeFlexIntOrUIntAt(position, value, numBytes); + return numBytes; } /** + * Writes a FlexInt or FlexUInt to this WriteBuffer at the specified position. + * * Because the flex int and flex uint encodings are so similar, we can use this method to write either one as long * as we provide the correct number of bytes needed to encode the value. + * + * If the allocator's block size is ever less than 10 bytes, this may throw an IndexOutOfBoundsException. */ - private int writeFlexIntOrUInt(final long value, final int numBytes) { - if (numBytes == 1) { - writeByte((byte) (0x01 | (byte)(value << 1))); - } else if (numBytes == 2) { - writeByte((byte) (0x02 | (byte)(value << 2))); - writeByte((byte) (value >> 6)); - } else if (numBytes == 3) { - writeByte((byte) (0x04 | (byte)(value << 3))); - writeByte((byte) (value >> 5)); - writeByte((byte) (value >> 13)); - } else if (numBytes == 4) { - writeByte((byte) (0x08 | (byte)(value << 4))); - writeByte((byte) (value >> 4)); - writeByte((byte) (value >> 12)); - writeByte((byte) (value >> 20)); + public void writeFlexIntOrUIntAt(final long position, final long value, final int numBytes) { + int index = index(position); + Block block = blocks.get(index); + int dataOffset = offset(position); + if (dataOffset + numBytes < block.capacity()) { + FlexInt.writeFlexIntOrUIntInto(block.data, dataOffset, value, numBytes); } else { - // Finally, fall back to a loop based approach. - - int i = 0; // `i` gets incremented for every byte written. - - // Start with leading zero bytes. - // If there's 1-8 total bytes, we need no leading zero-bytes. - // If there's 9-16 total bytes, we need one zero-byte - // If there's 17-24 total bytes, we need two zero-bytes, etc. - for (; i < (numBytes - 1)/8; i++) { - writeByte((byte) 0); + FlexInt.writeFlexIntOrUIntInto(scratch, 0, value, numBytes); + if (index == blocks.size() - 1) { + allocateNewBlock(); } - - // Write the last length bits, possibly also containing some value bits. - int remainingLengthBits = (numBytes - 1) % 8; - byte lengthPart = (byte) (0x01 << remainingLengthBits); - - int valueBitOffset = remainingLengthBits + 1; - byte valuePart = (byte) (value << valueBitOffset); - - writeByte((byte) (valuePart | lengthPart)); - i++; - - int valueByteOffset = 1; - for (; i < numBytes; i++) { - writeByte((byte) (value >> (8 * valueByteOffset - valueBitOffset))); - valueByteOffset++; + for (int i = 0; i < numBytes; i++) { + writeByteAt(position + i, scratch[i]); } - } - return numBytes; - } - - public static int flexIntLength(final BigInteger value) { - return value.bitLength() / 7 + 1; - } - - public static int flexUIntLength(final BigInteger value) { - return (value.bitLength() - 1) / 7 + 1; } + /** Writes a FlexInt to this WriteBuffer, returning the number of bytes that were needed to encode the value */ public int writeFlexInt(final BigInteger value) { - int numBytes = flexIntLength(value); - return writeFlexIntOrUIntForBigInteger(value, numBytes); + int numBytes = FlexInt.flexIntLength(value); + if (numBytes > current.remaining()) { + if (scratch.length < numBytes) { + scratch = new byte[numBytes]; + } + FlexInt.writeFlexIntOrUIntInto(scratch, 0, value, numBytes); + writeBytesSlow(scratch, 0, numBytes); + } else { + FlexInt.writeFlexIntOrUIntInto(current.data, current.limit, value, numBytes); + current.limit += numBytes; + } + return numBytes; } + /** Writes a FlexUInt to this WriteBuffer, returning the number of bytes that were needed to encode the value */ public int writeFlexUInt(final BigInteger value) { if (value.signum() < 0) { throw new IllegalArgumentException("Attempted to write a FlexUInt for " + value); } - int numBytes = flexUIntLength(value); - return writeFlexIntOrUIntForBigInteger(value, numBytes); - } - - private int writeFlexIntOrUIntForBigInteger(final BigInteger value, final int numBytes) { - // TODO: Should we branch to the implementation for long if the number is small enough? - // https://github.com/amazon-ion/ion-java/issues/614 - byte[] valueBytes = value.toByteArray(); - - int i = 0; // `i` gets incremented for every byte written. - - // Start with leading zero bytes. - // If there's 1-8 total bytes, we need no leading zero-bytes. - // If there's 9-16 total bytes, we need one zero-byte - // If there's 17-24 total bytes, we need two zero-bytes, etc. - for (; i < (numBytes - 1)/8; i++) { - writeByte((byte) 0); - } - - // Write the last length bits, possibly also containing some value bits. - int remainingLengthBits = (numBytes - 1) % 8; - byte lengthPart = (byte) (0x01 << remainingLengthBits); - int valueBitOffset = remainingLengthBits + 1; - byte valuePart = (byte) (valueBytes[valueBytes.length - 1] << valueBitOffset); - writeByte((byte) (valuePart | lengthPart)); - i++; - - for (int valueByteOffset = valueBytes.length - 1; valueByteOffset > 0; valueByteOffset--) { - // Technically it's only a nibble if the bitOffset is 4, so we call it nibble-ish - byte highNibbleIsh = (byte) (valueBytes[valueByteOffset - 1] << (valueBitOffset)); - byte lowNibbleIsh = (byte) ((valueBytes[valueByteOffset] & 0xFF) >> (8 - valueBitOffset)); - writeByte((byte) (highNibbleIsh | lowNibbleIsh)); - i++; - } - if (i < numBytes) { - writeByte((byte) ((valueBytes[0]) >> (8 - valueBitOffset))); - } - - return numBytes; - } - - /** Get the length of FixedInt for the provided value. */ - public static int fixedIntLength(final long value) { - int numMagnitudeBitsRequired; - if (value < 0) { - int numLeadingOnes = Long.numberOfLeadingZeros(~value); - numMagnitudeBitsRequired = 64 - numLeadingOnes; + int numBytes = FlexInt.flexUIntLength(value); + if (numBytes > current.remaining()) { + if (scratch.length < numBytes) { + scratch = new byte[numBytes]; + } + FlexInt.writeFlexIntOrUIntInto(scratch, 0, value, numBytes); + writeBytesSlow(scratch, 0, numBytes); } else { - int numLeadingZeros = Long.numberOfLeadingZeros(value); - numMagnitudeBitsRequired = 64 - numLeadingZeros; + FlexInt.writeFlexIntOrUIntInto(current.data, current.limit, value, numBytes); + current.limit += numBytes; } - return numMagnitudeBitsRequired / 8 + 1; + return numBytes; } /** @@ -1440,15 +1416,8 @@ public static int fixedIntLength(final long value) { * Returns the number of bytes that were needed to encode the value. */ public int writeFixedInt(final long value) { - int numBytes = fixedIntLength(value); - return _writeFixedIntOrUInt(value, numBytes); - } - - /** Get the length of FixedUInt for the provided value. */ - public static int fixedUIntLength(final long value) { - int numLeadingZeros = Long.numberOfLeadingZeros(value); - int numMagnitudeBitsRequired = 64 - numLeadingZeros; - return (numMagnitudeBitsRequired - 1) / 8 + 1; + int numBytes = FixedInt.fixedIntLength(value); + return writeFixedIntOrUInt(value, numBytes); } /** @@ -1459,7 +1428,7 @@ public int writeFixedUInt(final long value) { if (value < 0) { throw new IllegalArgumentException("Attempted to write a FixedUInt for " + value); } - int numBytes = fixedUIntLength(value); + int numBytes = FixedInt.fixedUIntLength(value); return _writeFixedIntOrUInt(value, numBytes); } @@ -1516,6 +1485,74 @@ private int _writeFixedIntOrUInt(final long value, final int numBytes) { return numBytes; } + /** + * Writes a FixedInt or FixedUInt to this WriteBuffer at the specified position. + * If the allocator's block size is ever less than 8 bytes, this may throw an IndexOutOfBoundsException. + */ + public void writeFixedIntOrUIntAt(final long position, final long value, final int numBytes) { + int index = index(position); + Block block = blocks.get(index); + int dataOffset = offset(position); + if (dataOffset + numBytes < block.capacity()) { + FixedInt.writeFixedIntOrUIntInto(block.data, dataOffset, value, numBytes); + } else { + FixedInt.writeFixedIntOrUIntInto(scratch, 0, value, numBytes); + if (index == blocks.size() - 1) { + allocateNewBlock(); + } + for (int i = 0; i < numBytes; i++) { + writeByteAt(position + i, scratch[i]); + } + } + } + + /** + * Writes a FixedInt or FixedUInt for an arbitrarily large integer that is represented + * as a byte array in which the most significant byte is the first in the array, and the least + * significant byte is the last in the array. + */ + public int writeFixedIntOrUInt(final byte[] value) { + for (int i = value.length - 1; i >= 0; i--) { + writeByte(value[i]); + } + return value.length; + } + + /** + * Writes a FlexSym with a symbol id. + */ + public int writeFlexSym(int sid) { + if (sid != 0) { + return writeFlexInt(sid); + } else { + writeByte(FlexInt.ZERO); + writeByte((byte) FLEX_SYM_SYSTEM_SYMBOL_OFFSET); + return 2; + } + } + + /** + * Writes a FlexSym with inline text. + */ + public int writeFlexSym(Utf8StringEncoder.Result text) { + if (text.getEncodedLength() == 0) { + return writeFlexSym(SystemSymbols_1_1.EMPTY_TEXT); + } else { + int numLengthBytes = writeFlexInt(-text.getEncodedLength()); + writeBytes(text.getBuffer(), 0, text.getEncodedLength()); + return numLengthBytes + text.getEncodedLength(); + } + } + + /** + * Writes a FlexSym with inline text. + */ + public int writeFlexSym(SystemSymbols_1_1 symbol) { + writeByte(FlexInt.ZERO); + writeByte((byte) (symbol.getId() + FLEX_SYM_SYSTEM_SYMBOL_OFFSET)); + return 2; + } + /** Write the entire buffer to output stream. */ public void writeTo(final OutputStream out) throws IOException { diff --git a/src/main/java/com/amazon/ion/impl/lite/IonDatagramLite.java b/src/main/java/com/amazon/ion/impl/lite/IonDatagramLite.java index 6edf35a668..cd528be673 100644 --- a/src/main/java/com/amazon/ion/impl/lite/IonDatagramLite.java +++ b/src/main/java/com/amazon/ion/impl/lite/IonDatagramLite.java @@ -323,7 +323,14 @@ public ListIterator listIterator(int index) @Override public IonValue set(int index, IonValue element){ - throw new UnsupportedOperationException(); + if (((IonValueLite) element)._context.getContextSymbolTable() != getContextForIndex(null, index).getContextSymbolTable()) { + // Note: this isn't impossible to support, but it requires care in the case where 'element' may depend + // on symbol table mappings unique to its own context. In order to sidestep this complexity until a use + // case is identified for it, only setting the element at an index that uses the same symbol table is + // currently supported. + throw new UnsupportedOperationException(); + } + return super.set(index, element); } @Override diff --git a/src/main/java/com/amazon/ion/impl/macro/EExpressionArgsReader.java b/src/main/java/com/amazon/ion/impl/macro/EExpressionArgsReader.java new file mode 100644 index 0000000000..0549b0ef98 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/EExpressionArgsReader.java @@ -0,0 +1,287 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro; + +import com.amazon.ion.IonType; +import com.amazon.ion.SymbolToken; +import com.amazon.ion.impl.bin.PresenceBitmap; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * An {@link EExpressionArgsReader} reads an E-Expression from a {@link ReaderAdapter}, constructs + * a list of {@link Expression}s representing the E-Expression and its arguments, and prepares a {@link MacroEvaluator} + * to evaluate these expressions. + *

+ * There are two sources of expressions. The template macro definitions, and the macro arguments. + * The {@link MacroEvaluator} merges those. + *

+ * The {@link Expression} model does not (yet) support lazily reading values, so for now, all macro arguments must + * be read eagerly. + */ +public abstract class EExpressionArgsReader { + + private final ReaderAdapter reader; + + /** + * Constructor. + * @param reader the {@link ReaderAdapter} from which to read {@link Expression}s. + * @see ReaderAdapterIonReader + * @see ReaderAdapterContinuable + */ + public EExpressionArgsReader(ReaderAdapter reader) { + this.reader = reader; + } + + /** + * @return true if the value upon which the reader is positioned represents a macro invocation; otherwise, false. + */ + protected abstract boolean isMacroInvocation(); + + /** + * @return true if the container value on which the reader is positioned represents an expression group; otherwise, + * false. + */ + protected abstract boolean isContainerAnExpressionGroup(); + + /** + * Eagerly collects the annotations on the current value. + * @return the annotations, or an empty list if there are none. + */ + protected abstract List getAnnotations(); + + /** + * Navigates the reader to the next raw value, without interpreting any system values. + * @return true if there is a next value; false if the end of the container was reached. + */ + protected abstract boolean nextRaw(); + + /** + * Steps into a container on which the reader has been positioned by calling {@link #nextRaw()}. + */ + protected abstract void stepInRaw(); + + /** + * Steps out of a container on which the reader had been positioned by calling {@link #nextRaw()}. + */ + protected abstract void stepOutRaw(); + + /** + * Steps into an e-expression. + */ + protected abstract void stepIntoEExpression(); + + /** + * Steps out of an e-expression. + */ + protected abstract void stepOutOfEExpression(); + + /** + * Reads a single parameter to a macro invocation. + * @param parameter information about the parameter from the macro signature. + * @param parameterPresence the presence bits dedicated to this parameter (unused in text). + * @param expressions receives the expressions as they are materialized. + * @param isTrailing true if this parameter is the last one in the signature; otherwise, false (unused in binary). + */ + protected abstract void readParameter(Macro.Parameter parameter, long parameterPresence, List expressions, boolean isTrailing); + + /** + * Reads the macro's address and attempts to resolve that address to a Macro from the macro table. + * @return the loaded macro. + */ + protected abstract Macro loadMacro(); + + /** + * Reads the argument encoding bitmap into a PresenceBitmap. This is only applicable to binary. + * @param signature the macro signature. + * @return a PresenceBitmap created from the argument encoding bitmap, or null. + */ + protected abstract PresenceBitmap loadPresenceBitmapIfNecessary(List signature); + + /** + * Reads a scalar value from the stream into an expression. + * @param type the type of scalar. + * @param annotations any annotations on the scalar. + * @param expressions receives the expressions as they are materialized. + */ + private void readScalarValueAsExpression( + IonType type, + List annotations, + List expressions + ) { + Expression.EExpressionBodyExpression expression; + if (reader.isNullValue()) { + expression = new Expression.NullValue(annotations, type); + } else { + switch (type) { + case BOOL: + expression = new Expression.BoolValue(annotations, reader.booleanValue()); + break; + case INT: + switch (reader.getIntegerSize()) { + case INT: + case LONG: + expression = new Expression.LongIntValue(annotations, reader.longValue()); + break; + case BIG_INTEGER: + expression = new Expression.BigIntValue(annotations, reader.bigIntegerValue()); + break; + default: + throw new IllegalStateException(); + } + break; + case FLOAT: + expression = new Expression.FloatValue(annotations, reader.doubleValue()); + break; + case DECIMAL: + expression = new Expression.DecimalValue(annotations, reader.decimalValue()); + break; + case TIMESTAMP: + expression = new Expression.TimestampValue(annotations, reader.timestampValue()); + break; + case SYMBOL: + expression = new Expression.SymbolValue(annotations, reader.symbolValue()); + break; + case STRING: + expression = new Expression.StringValue(annotations, reader.stringValue()); + break; + case CLOB: + expression = new Expression.ClobValue(annotations, reader.newBytes()); + break; + case BLOB: + expression = new Expression.BlobValue(annotations, reader.newBytes()); + break; + default: + throw new IllegalStateException(); + } + } + expressions.add(expression); + } + + /** + * Reads a container value from the stream into a list of expressions that will eventually be passed to + * the MacroEvaluator responsible for evaluating the e-expression to which this container belongs. + * @param type the type of container. + * @param annotations any annotations on the container. + * @param expressions receives the expressions as they are materialized. + */ + private void readContainerValueAsExpression( + IonType type, + List annotations, + List expressions + ) { + int startIndex = expressions.size(); + expressions.add(Expression.Placeholder.INSTANCE); + boolean isExpressionGroup = isContainerAnExpressionGroup(); + // Eagerly parse the container, "compiling" it into expressions to be evaluated later. + stepInRaw(); + while (nextRaw()) { + if (type == IonType.STRUCT) { + expressions.add(new Expression.FieldName(reader.getFieldNameSymbol())); + } + readValueAsExpression(false, expressions); // TODO avoid recursion + } + stepOutRaw(); + // Overwrite the placeholder with an expression representing the actual type of the container and the + // start and end indices of its expressions. + Expression.EExpressionBodyExpression expression; + if (isExpressionGroup) { + expression = new Expression.ExpressionGroup(startIndex, expressions.size()); + } else { + switch (type) { + case LIST: + expression = new Expression.ListValue(annotations, startIndex, expressions.size()); + break; + case SEXP: + expression = new Expression.SExpValue(annotations, startIndex, expressions.size()); + break; + case STRUCT: + // TODO consider whether templateStructIndex could be leveraged or should be removed + expression = new Expression.StructValue(annotations, startIndex, expressions.size(), Collections.emptyMap()); + break; + default: + throw new IllegalStateException(); + } + } + expressions.set(startIndex, expression); + } + + /** + * Reads the rest of the stream into a single expression group. + * @param expressions receives the expressions as they are materialized. + */ + private void readStreamAsExpressionGroup( + List expressions + ) { + int startIndex = expressions.size(); + expressions.add(Expression.Placeholder.INSTANCE); + do { + readValueAsExpression(false, expressions); // TODO avoid recursion + } while (nextRaw()); + expressions.set(startIndex, new Expression.ExpressionGroup(startIndex, expressions.size())); + } + + /** + * Reads a value from the stream into expression(s) that will eventually be passed to the MacroEvaluator + * responsible for evaluating the e-expression to which this value belongs. + * @param isImplicitRest true if this is the final parameter in the signature, it is variadic, and the format + * supports implicit rest parameters (text only); otherwise, false. + * @param expressions receives the expressions as they are materialized. + */ + protected void readValueAsExpression(boolean isImplicitRest, List expressions) { + if (isMacroInvocation()) { + collectEExpressionArgs(expressions); // TODO avoid recursion + return; + } + IonType type = reader.encodingType(); + List annotations = getAnnotations(); + if (isImplicitRest && !isContainerAnExpressionGroup()) { + readStreamAsExpressionGroup(expressions); + } else if (IonType.isContainer(type)) { + readContainerValueAsExpression(type, annotations, expressions); + } else { + readScalarValueAsExpression(type, annotations, expressions); + } + } + + /** + * Collects the expressions that compose the current macro invocation. + * @param expressions receives the expressions as they are materialized. + */ + private void collectEExpressionArgs(List expressions) { + if (reader.isInStruct()) { + expressions.add(new Expression.FieldName(reader.getFieldNameSymbol())); + } + Macro macro = loadMacro(); + List signature = macro.getSignature(); + PresenceBitmap presenceBitmap = loadPresenceBitmapIfNecessary(signature); + int invocationStartIndex = expressions.size(); + expressions.add(Expression.Placeholder.INSTANCE); + int numberOfParameters = signature.size(); + stepIntoEExpression(); + for (int i = 0; i < numberOfParameters; i++) { + readParameter( + signature.get(i), + presenceBitmap == null ? 0 : presenceBitmap.get(i), + expressions, + i == (numberOfParameters - 1) + ); + } + stepOutOfEExpression(); + expressions.set(invocationStartIndex, new Expression.EExpression(macro, invocationStartIndex, expressions.size())); + } + + /** + * Materializes the expressions that compose the macro invocation on which the reader is positioned and feeds + * them to the macro evaluator. + */ + public void beginEvaluatingMacroInvocation(MacroEvaluator macroEvaluator) { + // TODO performance: use a pool of expression lists to avoid repetitive allocations. + List expressions = new ArrayList<>(); + // TODO performance: avoid fully materializing all expressions up-front. + collectEExpressionArgs(expressions); + macroEvaluator.initExpansion(expressions); + } +} diff --git a/src/main/java/com/amazon/ion/impl/macro/EncodingContext.kt b/src/main/java/com/amazon/ion/impl/macro/EncodingContext.kt new file mode 100644 index 0000000000..046f97acd4 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/EncodingContext.kt @@ -0,0 +1,25 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +/** + * When we implement modules, this will likely need to be replaced. + * For now, it is a placeholder for what is to come and a container for the macro table. + */ +class EncodingContext { + + val macroTable: MacroTable + val isMutable: Boolean + + @JvmOverloads + constructor(macroTable: MacroTable, isMutable: Boolean = true) { + this.macroTable = macroTable + this.isMutable = isMutable + } + + companion object { + @JvmStatic + @get:JvmName("getDefault") + val DEFAULT = EncodingContext(SystemMacro.SYSTEM_MACRO_TABLE, false) + } +} diff --git a/src/main/java/com/amazon/ion/impl/macro/Environment.kt b/src/main/java/com/amazon/ion/impl/macro/Environment.kt new file mode 100644 index 0000000000..dfbd05f759 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/Environment.kt @@ -0,0 +1,30 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +/** + * An `Environment` contains variable bindings for a given macro evaluation. + * + * The [arguments] is a list of expressions for the arguments that were passed to the current macro. + * It may also contain other expressions if the current macro invocation is part of a larger evaluation. + * + * The [argumentIndices] is a mapping from parameter index to the start of the corresponding expression in [arguments]. + * + * The [parentEnvironment] is an environment to use if any of the expressions in this environment + * contains a variable that references something from an outer macro invocation. + */ +data class Environment private constructor( + // Any variables found here have to be looked up in [parentEnvironment] + val arguments: List, + // TODO: Replace with IntArray + val argumentIndices: List, + val parentEnvironment: Environment?, +) { + fun createChild(arguments: List, argumentIndices: List) = Environment(arguments, argumentIndices, this) + companion object { + @JvmStatic + val EMPTY = Environment(emptyList(), emptyList(), null) + @JvmStatic + fun create(arguments: List, argumentIndices: List) = Environment(arguments, argumentIndices, null) + } +} diff --git a/src/main/java/com/amazon/ion/impl/macro/Expression.kt b/src/main/java/com/amazon/ion/impl/macro/Expression.kt new file mode 100644 index 0000000000..555214f7b6 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/Expression.kt @@ -0,0 +1,241 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +import com.amazon.ion.* +import java.math.BigDecimal +import java.math.BigInteger + +/** + * In-memory expression model. + * + * We cannot use [`IonValue`](com.amazon.ion.IonValue) for this because `IonValue` requires references to parent + * containers and to an IonSystem which makes it impractical for reading and writing macros definitions. Furthermore, + * there is information we need to capture that cannot be expressed in the IonValue model, such as macro invocations + * and variable references. + * + * Template bodies are compiled into a list of expressions, without nesting, for ease and efficiency of evaluating + * e-expressions. Because of this, the container types do not have other values nested in them; rather they contain a + * range that indicates which of the following expressions are part of that container. + * + * TODO: Consider creating an enum or integer-based expression type id so that we can `switch` efficiently on it. + */ +sealed interface Expression { + + /** Interface for expressions that "contain" other expressions */ + sealed interface HasStartAndEnd : Expression { + /** + * The position of this expression in its containing list. + * Child expressions (if any) start at `selfIndex + 1`. + */ + val selfIndex: Int + /** + * The index of the first child expression (if any). + * Always equal to `selfIndex + 1`. + */ + val startInclusive: Int get() = selfIndex + 1 + /** + * The exclusive end of the child expressions (if any). + * If there are no child expressions, will be equal to [startInclusive]. + */ + val endExclusive: Int + } + + /** Marker interface representing expressions that can be present in E-Expressions. */ + sealed interface EExpressionBodyExpression : Expression + + /** Marker interface representing expressions in the body of a template. */ + sealed interface TemplateBodyExpression : Expression + + /** + * Marker interface for things that are part of the Ion data model. + * These expressions are the only ones that may be the output from the macro evaluator. + * All [DataModelExpression]s are also valid to use as [TemplateBodyExpression]s and [EExpressionBodyExpression]s. + */ + sealed interface DataModelExpression : Expression, EExpressionBodyExpression, TemplateBodyExpression + + /** + * Interface for expressions that are _values_ in the Ion data model. + */ + sealed interface DataModelValue : DataModelExpression { + val annotations: List + val type: IonType + + fun withAnnotations(annotations: List): DataModelValue + } + + /** Expressions that represent Ion container types */ + sealed interface DataModelContainer : HasStartAndEnd, DataModelValue + + /** + * A temporary placeholder that is used only while a macro or e-expression is partially compiled. + */ + object Placeholder : TemplateBodyExpression, EExpressionBodyExpression + + /** + * A group of expressions that form the argument for one macro parameter. + * + * TODO: Should we include the parameter name for ease of debugging? + * We'll hold off for now and see how the macro evaluator shakes out. + * + * @property selfIndex the index of the first expression of the expression group (i.e. this instance) + * @property endExclusive the index of the last expression contained in the expression group + */ + data class ExpressionGroup(override val selfIndex: Int, override val endExclusive: Int) : EExpressionBodyExpression, TemplateBodyExpression, HasStartAndEnd + + // Scalars + data class NullValue(override val annotations: List = emptyList(), override val type: IonType) : DataModelValue { + override fun withAnnotations(annotations: List) = copy(annotations = annotations) + } + + data class BoolValue(override val annotations: List = emptyList(), val value: Boolean) : DataModelValue { + override val type: IonType get() = IonType.BOOL + override fun withAnnotations(annotations: List) = copy(annotations = annotations) + } + + sealed interface IntValue : DataModelValue { + val bigIntegerValue: BigInteger + val longValue: Long + } + + data class LongIntValue(override val annotations: List = emptyList(), val value: Long) : IntValue { + override val type: IonType get() = IonType.INT + override fun withAnnotations(annotations: List) = copy(annotations = annotations) + override val bigIntegerValue: BigInteger get() = BigInteger.valueOf(value) + override val longValue: Long get() = value + } + + data class BigIntValue(override val annotations: List = emptyList(), val value: BigInteger) : IntValue { + override val type: IonType get() = IonType.INT + override fun withAnnotations(annotations: List) = copy(annotations = annotations) + override val bigIntegerValue: BigInteger get() = value + override val longValue: Long get() = value.longValueExact() + } + + data class FloatValue(override val annotations: List = emptyList(), val value: Double) : DataModelValue { + override val type: IonType get() = IonType.FLOAT + override fun withAnnotations(annotations: List) = copy(annotations = annotations) + } + + data class DecimalValue(override val annotations: List = emptyList(), val value: BigDecimal) : DataModelValue { + override val type: IonType get() = IonType.DECIMAL + override fun withAnnotations(annotations: List) = copy(annotations = annotations) + } + + data class TimestampValue(override val annotations: List = emptyList(), val value: Timestamp) : DataModelValue { + override val type: IonType get() = IonType.TIMESTAMP + override fun withAnnotations(annotations: List) = copy(annotations = annotations) + } + + sealed interface TextValue : DataModelValue { + val stringValue: String + } + + data class StringValue(override val annotations: List = emptyList(), val value: String) : TextValue { + override val type: IonType get() = IonType.STRING + override val stringValue: String get() = value + override fun withAnnotations(annotations: List) = copy(annotations = annotations) + } + + data class SymbolValue(override val annotations: List = emptyList(), val value: SymbolToken) : TextValue { + override val type: IonType get() = IonType.SYMBOL + override val stringValue: String get() = value.assumeText() + override fun withAnnotations(annotations: List) = copy(annotations = annotations) + } + + sealed interface LobValue : DataModelValue { + // TODO: Consider replacing this with a ByteArray "View" that is backed by the original + // data source to avoid eagerly copying data. + val value: ByteArray + } + + // We must override hashcode and equals in the lob types because `value` is a `byte[]` + data class BlobValue(override val annotations: List = emptyList(), override val value: ByteArray) : LobValue { + override val type: IonType get() = IonType.BLOB + override fun withAnnotations(annotations: List) = copy(annotations = annotations) + override fun hashCode(): Int = annotations.hashCode() * 31 + value.contentHashCode() + override fun equals(other: Any?): Boolean { + if (this === other) return true + if (other !is BlobValue) return false + if (other.annotations != this.annotations) return false + return value === other.value || value.contentEquals(other.value) + } + } + + data class ClobValue(override val annotations: List = emptyList(), override val value: ByteArray) : LobValue { + override val type: IonType get() = IonType.CLOB + override fun withAnnotations(annotations: List) = copy(annotations = annotations) + override fun hashCode(): Int = annotations.hashCode() * 31 + value.contentHashCode() + override fun equals(other: Any?): Boolean { + if (this === other) return true + if (other !is ClobValue) return false + if (other.annotations != this.annotations) return false + return value === other.value || value.contentEquals(other.value) + } + } + + /** + * An Ion List that could contain variables or macro invocations. + * + * @property selfIndex the index of the first expression of the list (i.e. this instance) + * @property endExclusive the index of the last expression contained in the list + */ + data class ListValue( + override val annotations: List = emptyList(), + override val selfIndex: Int, + override val endExclusive: Int + ) : DataModelContainer { + override val type: IonType get() = IonType.LIST + override fun withAnnotations(annotations: List) = copy(annotations = annotations) + } + + /** + * An Ion SExp that could contain variables or macro invocations. + */ + data class SExpValue( + override val annotations: List = emptyList(), + override val selfIndex: Int, + override val endExclusive: Int + ) : DataModelContainer { + override val type: IonType get() = IonType.SEXP + override fun withAnnotations(annotations: List) = copy(annotations = annotations) + } + + /** + * An Ion Struct that could contain variables or macro invocations. + */ + data class StructValue( + override val annotations: List = emptyList(), + override val selfIndex: Int, + override val endExclusive: Int, + val templateStructIndex: Map> + ) : DataModelContainer { + override val type: IonType get() = IonType.STRUCT + override fun withAnnotations(annotations: List) = copy(annotations = annotations) + } + + data class FieldName(val value: SymbolToken) : DataModelExpression + + /** + * A reference to a variable that needs to be expanded. + */ + data class VariableRef(val signatureIndex: Int) : TemplateBodyExpression + + /** + * A macro invocation that needs to be expanded. + */ + data class MacroInvocation( + val macro: Macro, + override val selfIndex: Int, + override val endExclusive: Int + ) : TemplateBodyExpression, HasStartAndEnd + + /** + * An e-expression that needs to be expanded. + */ + data class EExpression( + val macro: Macro, + override val selfIndex: Int, + override val endExclusive: Int + ) : EExpressionBodyExpression, HasStartAndEnd +} diff --git a/src/main/java/com/amazon/ion/impl/macro/ExpressionBuilderDsl.kt b/src/main/java/com/amazon/ion/impl/macro/ExpressionBuilderDsl.kt new file mode 100644 index 0000000000..7a1756361c --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/ExpressionBuilderDsl.kt @@ -0,0 +1,193 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +import com.amazon.ion.* +import com.amazon.ion.impl.* +import com.amazon.ion.impl.macro.Expression.* +import java.math.BigInteger +import kotlin.reflect.KFunction1 + +/** + * Nothing in this file should be made public because it would expose the shaded kotlin std library in our public API. + */ + +/** A marker annotation for a [type-safe builder](https://kotlinlang.org/docs/type-safe-builders.html). */ +@DslMarker +internal annotation class ExpressionBuilderDslMarker + +/** Base DSL; functions are common for [DataModelExpression], [TemplateBodyExpression], and [EExpressionBodyExpression]. */ +internal interface ValuesDsl { + fun annotated(annotations: List, valueFn: KFunction1, value: T) + fun annotated(annotation: SystemSymbols_1_1, valueFn: KFunction1, value: T) = + annotated(listOf(annotation.token), valueFn, value) + fun nullValue(value: IonType = IonType.NULL) + fun bool(value: Boolean) + fun int(value: Long) + fun int(value: BigInteger) + fun float(value: Double) + fun decimal(value: Decimal) + fun timestamp(value: Timestamp) + fun symbol(value: SymbolToken) + fun symbol(value: String) = symbol(_Private_Utils.newSymbolToken(value)) + fun symbol(value: SystemSymbols_1_1) = symbol(value.token) + fun string(value: String) + fun clob(value: ByteArray) + fun blob(value: ByteArray) + + /** Helper interface for use when building the content of a struct */ + interface Fields { + fun fieldName(fieldName: SymbolToken) + fun fieldName(fieldName: String) = fieldName(_Private_Utils.newSymbolToken(fieldName)) + } +} + +/** DSL for building [DataModelExpression] lists. */ +@ExpressionBuilderDslMarker +internal interface DataModelDsl : ValuesDsl { + fun list(content: DataModelDsl.() -> Unit) + fun sexp(content: DataModelDsl.() -> Unit) + fun struct(content: Fields.() -> Unit) + + @ExpressionBuilderDslMarker + interface Fields : ValuesDsl.Fields, DataModelDsl +} + +/** DSL for building [TemplateBodyExpression] lists. */ +@ExpressionBuilderDslMarker +internal interface TemplateDsl : ValuesDsl { + fun macro(macro: Macro, arguments: InvocationBody.() -> Unit) + fun variable(signatureIndex: Int) + fun list(content: TemplateDsl.() -> Unit) + fun sexp(content: TemplateDsl.() -> Unit) + fun struct(content: Fields.() -> Unit) + + @ExpressionBuilderDslMarker + interface Fields : ValuesDsl.Fields, TemplateDsl + + @ExpressionBuilderDslMarker + interface InvocationBody : TemplateDsl { + fun expressionGroup(content: TemplateDsl.() -> Unit) + } +} + +/** DSL for building [EExpressionBodyExpression] lists. */ +@ExpressionBuilderDslMarker +internal interface EExpDsl : ValuesDsl { + fun eexp(macro: Macro, arguments: InvocationBody.() -> Unit) + fun list(content: EExpDsl.() -> Unit) + fun sexp(content: EExpDsl.() -> Unit) + fun struct(content: Fields.() -> Unit) + + @ExpressionBuilderDslMarker + interface Fields : ValuesDsl.Fields, EExpDsl + + @ExpressionBuilderDslMarker + interface InvocationBody : EExpDsl { + fun expressionGroup(content: EExpDsl.() -> Unit) + } +} + +/** + * The implementation of all the expression builder DSL interfaces. + * + * How does this work? We implement everything in one class, but methods are exposed by being selective + * about which interface we are using at any given time. For example, if you want to build a template + * expression, you will get an interface that will not allow you to create an E-Expression. Likewise, if + * you are building a struct, you will not get an interface with a method to create an expression group + * in the middle of a struct (you must create a macro/eexp first). + */ +internal sealed class ExpressionBuilderDsl : ValuesDsl, ValuesDsl.Fields { + + companion object { + // Entry points to the DSL builders. + fun templateBody(block: TemplateDsl.() -> Unit): List = Template().apply(block).build() + fun dataModel(block: DataModelDsl.() -> Unit): List = DataModel().apply(block).build() + fun eExpBody(block: EExpDsl.() -> Unit): List = EExp().apply(block).build() + } + + protected val expressions = mutableListOf() + private var pendingAnnotations = mutableListOf() + + override fun annotated(annotations: List, valueFn: KFunction1, value: T) { + pendingAnnotations.addAll(annotations) + valueFn.invoke(value) + } + + override fun nullValue(value: IonType) = scalar(::NullValue, value) + override fun bool(value: Boolean) = scalar(::BoolValue, value) + override fun int(value: Long) = scalar(::LongIntValue, value) + override fun int(value: BigInteger) = scalar(::BigIntValue, value) + override fun float(value: Double) = scalar(::FloatValue, value) + override fun decimal(value: Decimal) = scalar(::DecimalValue, value) + override fun timestamp(value: Timestamp) = scalar(::TimestampValue, value) + override fun symbol(value: SymbolToken) = scalar(::SymbolValue, value) + override fun string(value: String) = scalar(::StringValue, value) + override fun clob(value: ByteArray) = scalar(::ClobValue, value) + override fun blob(value: ByteArray) = scalar(::BlobValue, value) + + override fun fieldName(fieldName: SymbolToken) { expressions.add(FieldName(fieldName)) } + + protected fun newStruct(annotations: List, structStart: Int, structEndExclusive: Int): StructValue { + val nestedStructs = expressions + .subList(structStart + 1, structEndExclusive) + .filterIsInstance() + val templateStructIndex = expressions + .mapIndexed { i, it -> it to i } + // Find all field names that are _not_ part of a nested struct + .filter { (expr, i) -> + expr is FieldName && + nestedStructs.none { i > it.selfIndex && i < it.endExclusive } && + structStart < i && + i < structEndExclusive + } + .groupBy({ (expr, _) -> (expr as FieldName).value.text }) { (_, index) -> index + 1 } + return StructValue(annotations, structStart, structEndExclusive, templateStructIndex) + } + + fun build(): List = expressions.map { it as T } + + // Helpers + private fun takePendingAnnotations(): List = pendingAnnotations.also { pendingAnnotations = mutableListOf() } + + private fun scalar(constructor: (List, T) -> Expression, value: T) { + expressions.add(constructor(takePendingAnnotations(), value)) + } + + protected fun container(content: T.() -> Unit, constructor: (Int, Int) -> Expression) { + val selfIndex = expressions.size + expressions.add(Placeholder) + (this as T).content() + expressions[selfIndex] = constructor(selfIndex, /* endExclusive= */ expressions.size) + } + + protected fun containerWithAnnotations(content: T.() -> Unit, constructor: (List, Int, Int) -> Expression) { + val ann = takePendingAnnotations() + container(content) { start, end -> constructor(ann, start, end) } + } + + // Subclasses for each expression variant so that we don't have conflicting signatures between their list, sexp, etc. implementations. + + class DataModel : ExpressionBuilderDsl(), DataModelDsl, DataModelDsl.Fields { + override fun list(content: DataModelDsl.() -> Unit) = containerWithAnnotations(content, ::ListValue) + override fun sexp(content: DataModelDsl.() -> Unit) = containerWithAnnotations(content, ::SExpValue) + override fun struct(content: DataModelDsl.Fields.() -> Unit) = containerWithAnnotations(content, ::newStruct) + } + + class EExp : ExpressionBuilderDsl(), EExpDsl, EExpDsl.Fields, EExpDsl.InvocationBody { + override fun sexp(content: EExpDsl.() -> Unit) = containerWithAnnotations(content, ::SExpValue) + override fun list(content: EExpDsl.() -> Unit) = containerWithAnnotations(content, ::ListValue) + override fun struct(content: EExpDsl.Fields.() -> Unit) = containerWithAnnotations(content, ::newStruct) + override fun eexp(macro: Macro, arguments: EExpDsl.InvocationBody.() -> Unit) = container(arguments) { start, end -> EExpression(macro, start, end) } + override fun expressionGroup(content: EExpDsl.() -> Unit) = container(content, ::ExpressionGroup) + } + + class Template : ExpressionBuilderDsl(), TemplateDsl, TemplateDsl.Fields, TemplateDsl.InvocationBody { + override fun list(content: TemplateDsl.() -> Unit) = containerWithAnnotations(content, ::ListValue) + override fun sexp(content: TemplateDsl.() -> Unit) = containerWithAnnotations(content, ::SExpValue) + override fun struct(content: TemplateDsl.Fields.() -> Unit) = containerWithAnnotations(content, ::newStruct) + override fun variable(signatureIndex: Int) { expressions.add(VariableRef(signatureIndex)) } + override fun macro(macro: Macro, arguments: TemplateDsl.InvocationBody.() -> Unit) = container(arguments) { start, end -> MacroInvocation(macro, start, end) } + override fun expressionGroup(content: TemplateDsl.() -> Unit) = container(content, ::ExpressionGroup) + } +} diff --git a/src/main/java/com/amazon/ion/impl/macro/IonReaderFromReaderAdapter.kt b/src/main/java/com/amazon/ion/impl/macro/IonReaderFromReaderAdapter.kt new file mode 100644 index 0000000000..79853e9287 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/IonReaderFromReaderAdapter.kt @@ -0,0 +1,85 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +import com.amazon.ion.* +import java.lang.UnsupportedOperationException +import java.math.BigDecimal +import java.math.BigInteger +import java.util.* + +/** + * An [IonReader] that delegates to a [ReaderAdapter]. + */ +internal class IonReaderFromReaderAdapter(val reader: ReaderAdapter) : IonReader { + + override fun close() { + // Do nothing. ReaderAdapter does not implement close(). + } + + override fun asFacet(facetType: Class?): T { + throw UnsupportedOperationException() + } + + override fun hasNext(): Boolean { + throw UnsupportedOperationException() + } + + override fun next(): IonType? = if (reader.nextValue()) reader.encodingType()!! else null + + override fun stringValue(): String = reader.stringValue() + + override fun intValue(): Int = reader.intValue() + + override fun bigDecimalValue(): BigDecimal = reader.decimalValue() + + override fun decimalValue(): Decimal = reader.ionDecimalValue() + + override fun dateValue(): Date = TODO("Not yet implemented") + + override fun doubleValue(): Double = reader.doubleValue() + + override fun stepIn() = reader.stepIntoContainer() + + override fun stepOut() = reader.stepOutOfContainer() + + override fun getDepth(): Int = reader.getDepth() + + override fun getSymbolTable(): SymbolTable = TODO("Not yet implemented") + + override fun getType(): IonType? = reader.encodingType() + + override fun getTypeAnnotationSymbols(): Array = reader.getTypeAnnotationSymbols().toTypedArray() + + override fun iterateTypeAnnotations(): MutableIterator = TODO("Not yet implemented") + + override fun getFieldId(): Int = TODO("Not yet implemented") + + override fun getFieldName(): String = TODO("Not yet implemented") + + override fun booleanValue(): Boolean = reader.booleanValue() + + override fun isNullValue(): Boolean = reader.isNullValue() + + override fun longValue(): Long = reader.longValue() + + override fun bigIntegerValue(): BigInteger = reader.bigIntegerValue() + + override fun timestampValue(): Timestamp = reader.timestampValue() + + override fun newBytes(): ByteArray = reader.newBytes() + + override fun getBytes(buffer: ByteArray?, offset: Int, len: Int): Int = TODO("Not yet implemented") + + override fun symbolValue(): SymbolToken = reader.symbolValue() + + override fun byteSize(): Int = TODO("Not yet implemented") + + override fun getIntegerSize(): IntegerSize = reader.getIntegerSize() + + override fun getTypeAnnotations(): Array = TODO("Not yet implemented") + + override fun getFieldNameSymbol(): SymbolToken = reader.getFieldNameSymbol() + + override fun isInStruct(): Boolean = reader.isInStruct() +} diff --git a/src/main/java/com/amazon/ion/impl/macro/Macro.kt b/src/main/java/com/amazon/ion/impl/macro/Macro.kt new file mode 100644 index 0000000000..7d6bf2266f --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/Macro.kt @@ -0,0 +1,76 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +import com.amazon.ion.impl.TaglessEncoding + +/** + * A [Macro] is either a [SystemMacro] or a [TemplateMacro]. + */ +sealed interface Macro { + val signature: List + val body: List? + val dependencies: Iterable + + data class Parameter(val variableName: String, val type: ParameterEncoding, val cardinality: ParameterCardinality) { + override fun toString() = "$type::$variableName${cardinality.sigil}" + } + + // TODO: See if we can DRY up ParameterEncoding and PrimitiveType + enum class ParameterEncoding(val ionTextName: String, @JvmField val taglessEncodingKind: TaglessEncoding? = null) { + // TODO: Update this to support macro shapes + Tagged("any"), + Uint8("uint8", TaglessEncoding.UINT8), + Uint16("uint16", TaglessEncoding.UINT16), + Uint32("uint32", TaglessEncoding.UINT32), + Uint64("uint64", TaglessEncoding.UINT64), + FlexUint("flex_uint", TaglessEncoding.FLEX_UINT), + Int8("int8", TaglessEncoding.INT8), + Int16("int16", TaglessEncoding.INT16), + Int32("int32", TaglessEncoding.INT32), + Int64("int64", TaglessEncoding.INT64), + FlexInt("flex_int", TaglessEncoding.FLEX_INT), + Float16("float16", TaglessEncoding.FLOAT16), + Float32("float32", TaglessEncoding.FLOAT32), + Float64("float64", TaglessEncoding.FLOAT64), + FlexSym("flex_sym", TaglessEncoding.FLEX_SYM), + ; + companion object { + @JvmStatic + fun fromPrimitiveType(taglessEncoding: TaglessEncoding) = when (taglessEncoding) { + TaglessEncoding.UINT8 -> Uint8 + TaglessEncoding.UINT16 -> Uint16 + TaglessEncoding.UINT32 -> Uint32 + TaglessEncoding.UINT64 -> Uint64 + TaglessEncoding.FLEX_UINT -> FlexUint + TaglessEncoding.INT8 -> Int8 + TaglessEncoding.INT16 -> Int16 + TaglessEncoding.INT32 -> Int32 + TaglessEncoding.INT64 -> Int64 + TaglessEncoding.FLEX_INT -> FlexInt + TaglessEncoding.FLOAT16 -> Float16 + TaglessEncoding.FLOAT32 -> Float32 + TaglessEncoding.FLOAT64 -> Float64 + TaglessEncoding.FLEX_SYM -> FlexSym + } + } + } + + enum class ParameterCardinality(@JvmField val sigil: Char, @JvmField val canBeVoid: Boolean, @JvmField val canBeMulti: Boolean) { + ZeroOrOne('?', true, false), + ExactlyOne('!', false, false), + OneOrMore('+', false, true), + ZeroOrMore('*', true, true); + + companion object { + @JvmStatic + fun fromSigil(sigil: String): ParameterCardinality? = when (sigil.singleOrNull()) { + '?' -> ZeroOrOne + '!' -> ExactlyOne + '+' -> OneOrMore + '*' -> ZeroOrMore + else -> null + } + } + } +} diff --git a/src/main/java/com/amazon/ion/impl/macro/MacroCompiler.kt b/src/main/java/com/amazon/ion/impl/macro/MacroCompiler.kt new file mode 100644 index 0000000000..f64da15a6f --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/MacroCompiler.kt @@ -0,0 +1,312 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +import com.amazon.ion.* +import com.amazon.ion.impl.* +import com.amazon.ion.impl.bin.Ion_1_1_Constants.* +import com.amazon.ion.impl.macro.Expression.* +import com.amazon.ion.util.* + +/** + * [MacroCompiler] wraps an Ion reader. When directed to do so, it will take over advancing and getting values from the + * reader in order to read one [TemplateMacro]. + */ +internal class MacroCompiler( + private val getMacro: (MacroRef) -> Macro?, + private val reader: ReaderAdapter +) { + + /** The name of the macro that was read. Returns `null` if no macro name is available. */ + var macroName: String? = null + private set // Only mutable internally + + private val signature: MutableList = mutableListOf() + private val expressions: MutableList = mutableListOf() + + /** + * Compiles a template macro definition from the reader. Caller is responsible for positioning the reader at—but not + * stepped into—the macro template s-expression. + */ + fun compileMacro(): TemplateMacro { + macroName = null + signature.clear() + expressions.clear() + + confirm(reader.encodingType() == IonType.SEXP) { "macro compilation expects a sexp starting with the keyword `macro`" } + reader.confirmNoAnnotations("a macro definition sexp") + reader.readContainer { + reader.nextValue() + confirm(reader.encodingType() == IonType.SYMBOL && reader.stringValue() == "macro") { "macro compilation expects a sexp starting with the keyword `macro`" } + + reader.nextAndCheckType(IonType.SYMBOL, IonType.NULL, "macro name") + + reader.confirmNoAnnotations("macro name") + if (reader.encodingType() != IonType.NULL) { + macroName = reader.stringValue().also { confirm(isIdentifierSymbol(it)) { "invalid macro name: '$it'" } } + } + reader.nextAndCheckType(IonType.SEXP, "macro signature") + reader.confirmNoAnnotations("macro signature") + reader.readSignature() + confirm(reader.nextValue()) { "Macro definition is missing a template body expression." } + reader.compileTemplateBodyExpression() + confirm(!reader.nextValue()) { "Unexpected ${reader.encodingType()} after template body expression." } + } + return TemplateMacro(signature.toList(), expressions.toList()) + } + + /** + * Reads the macro signature, populating parameters in [signature]. + * Caller is responsible for making sure that the reader is positioned on (but not stepped into) the signature sexp. + */ + private fun ReaderAdapter.readSignature() { + var pendingParameter: Macro.Parameter? = null + + forEachInContainer { + if (encodingType() != IonType.SYMBOL) throw IonException("parameter must be a symbol; found ${encodingType()}") + + val symbolText = stringValue() + + val cardinality = Macro.ParameterCardinality.fromSigil(symbolText) + + if (cardinality != null) { + confirmNoAnnotations("cardinality sigil") + // The symbol is a cardinality modifier + if (pendingParameter == null) { + throw IonException("Found an orphaned cardinality in macro signature") + } else { + signature.add(pendingParameter!!.copy(cardinality = cardinality)) + pendingParameter = null + return@forEachInContainer + } + } + + // If we have a pending parameter, add it to the signature before we read the next parameter + if (pendingParameter != null) signature.add(pendingParameter!!) + + // Read the next parameter name + val annotations = getTypeAnnotationSymbols() + val parameterEncoding = when (annotations.size) { + 0 -> Macro.ParameterEncoding.Tagged + 1 -> { + val encodingText = annotations[0].text + val encoding = Macro.ParameterEncoding.entries.singleOrNull { it.ionTextName == encodingText } + if (encoding == null) { + // TODO: Check for macro-shaped parameter encodings, and only if it's still null, we throw. + throw IonException("unsupported parameter encoding $annotations") + } + encoding + } + 2 -> TODO("Qualified references for macro-shaped parameters") + else -> throw IonException("unsupported parameter encoding $annotations") + } + confirm(isIdentifierSymbol(symbolText)) { "invalid parameter name: '$symbolText'" } + confirm(signature.none { it.variableName == symbolText }) { "redeclaration of parameter '$symbolText'" } + pendingParameter = Macro.Parameter(symbolText, parameterEncoding, Macro.ParameterCardinality.ExactlyOne) + } + // If we have a pending parameter than hasn't been added to the signature, add it here. + if (pendingParameter != null) signature.add(pendingParameter!!) + } + + private fun isIdentifierSymbol(symbol: String): Boolean { + if (symbol.isEmpty()) return false + + // If the symbol's text matches an Ion keyword, it's not an identifier symbol. + // Eg, the symbol 'false' must be quoted and is not an identifier symbol. + if (_Private_IonTextAppender.isIdentifierKeyword(symbol)) return false + + if (!_Private_IonTextAppender.isIdentifierStart(symbol[0].code)) return false + + return symbol.all { c -> _Private_IonTextAppender.isIdentifierPart(c.code) } + } + + /** + * Compiles the current value on the reader into a [TemplateBodyExpression] and adds it to [expressions]. + * Caller is responsible for ensuring that the reader is positioned on a value. + * + * If called when the reader is not positioned on any value, throws [IllegalStateException]. + */ + private fun ReaderAdapter.compileTemplateBodyExpression() { + // NOTE: `toList()` does not allocate for an empty list. + val annotations: List = getTypeAnnotationSymbols() + + if (isNullValue()) { + expressions.add(NullValue(annotations, encodingType()!!)) + } else when (encodingType()) { + IonType.BOOL -> expressions.add(BoolValue(annotations, booleanValue())) + IonType.INT -> expressions.add( + when (integerSize()!!) { + IntegerSize.INT, + IntegerSize.LONG -> LongIntValue(annotations, longValue()) + IntegerSize.BIG_INTEGER -> BigIntValue(annotations, bigIntegerValue()) + } + ) + IonType.FLOAT -> expressions.add(FloatValue(annotations, doubleValue())) + IonType.DECIMAL -> expressions.add(DecimalValue(annotations, decimalValue())) + IonType.TIMESTAMP -> expressions.add(TimestampValue(annotations, timestampValue())) + IonType.STRING -> expressions.add(StringValue(annotations, stringValue())) + IonType.BLOB -> expressions.add(BlobValue(annotations, newBytes())) + IonType.CLOB -> expressions.add(ClobValue(annotations, newBytes())) + IonType.SYMBOL -> expressions.add(SymbolValue(annotations, symbolValue())) + IonType.LIST -> compileList(annotations) + IonType.SEXP -> compileSExpression(annotations) + IonType.STRUCT -> compileStruct(annotations) + // IonType.NULL, IonType.DATAGRAM, null + else -> throw IllegalStateException("Found ${encodingType()}; this should be unreachable.") + } + } + + /** + * Compiles a struct in a macro template. + * When calling, the reader should be positioned at the struct, but not stepped into it. + * If this function returns normally, it will be stepped out of the struct. + * Caller will need to call [IonReader.next] to get the next value. + */ + private fun ReaderAdapter.compileStruct(annotations: List) { + val start = expressions.size + expressions.add(Placeholder) + val templateStructIndex = mutableMapOf>() + forEachInContainer { + val fieldName: SymbolToken = fieldNameSymbol() + expressions.add(FieldName(fieldName)) + fieldName.text?.let { + val valueIndex = expressions.size + // Default is an array list with capacity of 1, since the most common case is that a field name occurs once. + templateStructIndex.getOrPut(it) { ArrayList(1) } += valueIndex + } + compileTemplateBodyExpression() + } + val end = expressions.size + expressions[start] = StructValue(annotations, start, end, templateStructIndex) + } + + /** + * Compiles a list or sexp in a macro template. + * When calling, the reader should be positioned at the sequence, but not stepped into it. + * If this function returns normally, it will be stepped out of the sequence. + * Caller will need to call [IonReader.next] to get the next value. + */ + private fun ReaderAdapter.compileList(annotations: List) { + val start = expressions.size + stepIntoContainer() + expressions.add(Placeholder) + compileExpressionTail(start) { end -> ListValue(annotations, start, end) } + } + + /** + * Compiles an unclassified S-Expression in a template body expression. + * When calling, the reader should be positioned at the sexp, but not stepped into it. + * If this function returns normally, it will be stepped out of the sexp. + * Caller will need to call [IonReader.next] to get the next value. + */ + private fun ReaderAdapter.compileSExpression(sexpAnnotations: List) { + val start = expressions.size + stepIntoContainer() + expressions.add(Placeholder) + if (nextValue()) { + if (encodingType() == IonType.SYMBOL) { + when (stringValue()) { + TDL_VARIABLE_EXPANSION_SIGIL -> { + confirm(sexpAnnotations.isEmpty()) { "Variable expansion may not be annotated" } + confirmNoAnnotations("Variable expansion operator") + compileVariableExpansion(start) + return + } + + TDL_EXPRESSION_GROUP_SIGIL -> { + confirm(sexpAnnotations.isEmpty()) { "Expression group may not be annotated" } + confirmNoAnnotations("Expression group operator") + compileExpressionTail(start) { end -> ExpressionGroup(start, end) } + return + } + + TDL_MACRO_INVOCATION_SIGIL -> { + confirm(sexpAnnotations.isEmpty()) { "Macro invocation may not be annotated" } + confirmNoAnnotations("Macro invocation operator") + nextValue() + val macro = readMacroReference() + compileExpressionTail(start) { end -> MacroInvocation(macro, start, end) } + return + } + } + } + // Compile the value we're already positioned on before compiling the rest of the s-expression + compileTemplateBodyExpression() + } + compileExpressionTail(start) { end -> SExpValue(sexpAnnotations, start, end) } + } + + /** + * Must be positioned on the (expected) macro reference. + */ + private fun ReaderAdapter.readMacroReference(): Macro { + + val annotations = getTypeAnnotationSymbols() + val isQualifiedSystemMacro = annotations.size == 1 && SystemSymbols_1_1.ION.text == annotations[0].getText() + + val macroRef = when (encodingType()) { + IonType.SYMBOL -> { + val macroName = stringValue() + // TODO: Come up with a consistent strategy for handling special forms. + MacroRef.ByName(macroName) + } + + IonType.INT -> { + val sid = intValue() + if (sid < 0) throw IonException("Macro ID must be non-negative: $sid") + MacroRef.ById(intValue()) + } + else -> throw IonException("macro invocation must start with an id (int) or identifier (symbol); found ${encodingType() ?: "nothing"}\"") + } + val m = if (isQualifiedSystemMacro) SystemMacro.getMacroOrSpecialForm(macroRef) else getMacro(macroRef) + return m ?: throw IonException("Unrecognized macro: $macroRef") + } + + private fun ReaderAdapter.compileVariableExpansion(placeholderIndex: Int) { + nextValue() + confirm(encodingType() == IonType.SYMBOL) { "Variable names must be symbols" } + val name = stringValue() + confirmNoAnnotations("on variable reference '$name'") + val index = signature.indexOfFirst { it.variableName == name } + confirm(index >= 0) { "variable '$name' is not recognized" } + expressions[placeholderIndex] = VariableRef(index) + confirm(!nextValue()) { "Variable expansion should contain only the variable name." } + stepOutOfContainer() + } + + private inline fun ReaderAdapter.compileExpressionTail(seqStart: Int, constructor: (Int) -> TemplateBodyExpression) { + forEachRemaining { compileTemplateBodyExpression() } + val seqEnd = expressions.size + expressions[seqStart] = constructor(seqEnd) + stepOutOfContainer() + } + + // Helper functions + + /** Utility method for checking that annotations are empty or a single array with the given annotations */ + private fun List.isEmptyOr(text: String): Boolean = isEmpty() || (size == 1 && this[0].assumeText() == text) + + /** Throws [IonException] if any annotations are on the current value in this [IonReader]. */ + private fun ReaderAdapter.confirmNoAnnotations(location: String) { + confirm(!hasAnnotations()) { "found annotations on $location" } + } + + /** Moves to the next type and throw [IonException] if it is not the `expected` [IonType]. */ + private fun ReaderAdapter.nextAndCheckType(expected: IonType, location: String) { + confirm(nextValue() && encodingType() == expected) { "$location must be a $expected; found ${encodingType() ?: "nothing"}" } + } + + /** Moves to the next type and throw [IonException] if it is not the `expected` [IonType]. */ + private fun ReaderAdapter.nextAndCheckType(expected0: IonType, expected1: IonType, location: String) { + confirm(nextValue() && (encodingType() == expected0 || encodingType() == expected1)) { "$location must be a $expected0 or $expected1; found ${encodingType() ?: "nothing"}" } + } + + /** Steps into a container, executes [block], and steps out. */ + private inline fun ReaderAdapter.readContainer(block: () -> Unit) { stepIntoContainer(); block(); stepOutOfContainer() } + + /** Executes [block] for each remaining value at the current reader depth. */ + private inline fun ReaderAdapter.forEachRemaining(block: (IonType) -> Unit) { while (nextValue()) { block(encodingType()!!) } } + + /** Steps into a container, executes [block] for each value at that reader depth, and steps out. */ + private inline fun ReaderAdapter.forEachInContainer(block: (IonType) -> Unit) = readContainer { forEachRemaining(block) } +} diff --git a/src/main/java/com/amazon/ion/impl/macro/MacroEvaluator.kt b/src/main/java/com/amazon/ion/impl/macro/MacroEvaluator.kt new file mode 100644 index 0000000000..008890994a --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/MacroEvaluator.kt @@ -0,0 +1,804 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +import com.amazon.ion.* +import com.amazon.ion.impl._Private_RecyclingStack +import com.amazon.ion.impl._Private_Utils.newSymbolToken +import com.amazon.ion.impl.macro.Expression.* +import java.io.ByteArrayOutputStream +import java.math.BigDecimal +import java.math.BigInteger + +/** + * Evaluates an EExpression from a List of [EExpressionBodyExpression] and the [TemplateBodyExpression]s + * given in the macro table of the [EncodingContext]. + * + * General Usage: + * - To start evaluating an e-expression, call [initExpansion] + * - Call [expandNext] to get the next field name or value, or null + * if the end of the container or end of expansion has been reached. + * - Call [stepIn] when positioned on a container to step into that container. + * - Call [stepOut] to step out of the current container. + * + * TODO: Add expansion limit + */ +class MacroEvaluator { + + /** + * Implementations must update [ExpansionInfo.i] in order for [ExpansionInfo.hasNext] to work properly. + */ + private fun interface Expander { + fun nextExpression(expansionInfo: ExpansionInfo, macroEvaluator: MacroEvaluator): Expression + + /** + * Read the expanded values from one argument, returning exactly one value. + * Throws an exception if there is not exactly one expanded value. + */ + fun readExactlyOneExpandedArgumentValue(expansionInfo: ExpansionInfo, macroEvaluator: MacroEvaluator, argName: String): DataModelExpression { + return readZeroOrOneExpandedArgumentValues(expansionInfo, macroEvaluator, argName) + ?: throw IonException("Argument $argName expanded to nothing.") + } + + /** + * Read the expanded values from one argument, returning zero or one values. + * Throws an exception if there is more than one expanded value. + */ + fun readZeroOrOneExpandedArgumentValues(expansionInfo: ExpansionInfo, macroEvaluator: MacroEvaluator, argName: String): DataModelExpression? { + var value: DataModelExpression? = null + readExpandedArgumentValues(expansionInfo, macroEvaluator) { + if (value == null) { + value = it + } else { + throw IonException("Too many values for argument $argName") + } + true // Continue expansion + } + return value + } + + /** + * Reads the expanded values from one argument. + * + * The callback should return true to continue the expansion or false to abandon the expansion early. + */ + fun readExpandedArgumentValues(expansionInfo: ExpansionInfo, macroEvaluator: MacroEvaluator, callback: (DataModelExpression) -> Boolean) { + val i = expansionInfo.i + expansionInfo.nextSourceExpression() + + macroEvaluator.pushExpansion( + expansionKind = ExpansionKind.Values, + argsStartInclusive = i, + // There can only be one top-level expression for an argument (it's either a value, macro, or + // expression group) so we can set the end to one more than the start. + argsEndExclusive = i + 1, + environment = expansionInfo.environment ?: Environment.EMPTY, + expressions = expansionInfo.expressions!!, + ) + + val depth = macroEvaluator.expansionStack.size() + var expr = macroEvaluator.expandNext(depth) + var continueExpansion: Boolean + while (expr != null) { + continueExpansion = callback(expr) + if (!continueExpansion) break + expr = macroEvaluator.expandNext(depth) + } + // Step back out to the original depth (in case we exited the expansion early) + while (macroEvaluator.expansionStack.size() > depth) { + macroEvaluator.expansionStack.pop() + } + } + + /** + * Reads the first expanded value from one argument. + * + * Does not perform any sort of cardinality check, and leaves the evaluator stepped into the level of the + * returned expression. Returns null if the argument expansion produces no values. + */ + fun readFirstExpandedArgumentValue(expansionInfo: ExpansionInfo, macroEvaluator: MacroEvaluator): DataModelExpression? { + val i = expansionInfo.i + expansionInfo.nextSourceExpression() + + macroEvaluator.pushExpansion( + expansionKind = ExpansionKind.Values, + argsStartInclusive = i, + // There can only be one top-level expression for an argument (it's either a value, macro, or + // expression group) so we can set the end to one more than the start. + argsEndExclusive = i + 1, + environment = expansionInfo.environment ?: Environment.EMPTY, + expressions = expansionInfo.expressions!!, + ) + + val depth = macroEvaluator.expansionStack.size() + return macroEvaluator.expandNext(depth) + } + } + + private object SimpleExpander : Expander { + override fun nextExpression(expansionInfo: ExpansionInfo, macroEvaluator: MacroEvaluator): Expression { + return expansionInfo.nextSourceExpression() + } + } + + private object AnnotateExpander : Expander { + // TODO: Handle edge cases mentioned in https://github.com/amazon-ion/ion-docs/issues/347 + override fun nextExpression(expansionInfo: ExpansionInfo, macroEvaluator: MacroEvaluator): Expression { + val annotations = mutableListOf() + + readExpandedArgumentValues(expansionInfo, macroEvaluator) { + when (it) { + is StringValue -> annotations.add(newSymbolToken(it.value)) + is SymbolValue -> annotations.add(it.value) + is DataModelValue -> throw IonException("Annotation arguments must be string or symbol; found: ${it.type}") + is FieldName -> TODO("Unreachable. Must encounter a StructValue first.") + } + } + + val valueToAnnotate = readExactlyOneExpandedArgumentValue(expansionInfo, macroEvaluator, SystemMacro.Annotate.signature[1].variableName) + + // It cannot be a FieldName expression because we haven't stepped into a struct, so it must be DataModelValue + valueToAnnotate as DataModelValue + // Combine the annotations + annotations.addAll(valueToAnnotate.annotations) + return valueToAnnotate.withAnnotations(annotations) + } + } + + private object MakeStringExpander : Expander { + override fun nextExpression(expansionInfo: ExpansionInfo, macroEvaluator: MacroEvaluator): Expression { + val sb = StringBuilder() + readExpandedArgumentValues(expansionInfo, macroEvaluator) { + when (it) { + is StringValue -> sb.append(it.value) + is SymbolValue -> sb.append(it.value.assumeText()) + is DataModelValue -> throw IonException("Invalid argument type for 'make_string': ${it.type}") + is FieldName -> TODO("Unreachable. We shouldn't be able to get here without first encountering a StructValue.") + } + true // continue expansion + } + return StringValue(value = sb.toString()) + } + } + + private object MakeSymbolExpander : Expander { + override fun nextExpression(expansionInfo: ExpansionInfo, macroEvaluator: MacroEvaluator): Expression { + val sb = StringBuilder() + readExpandedArgumentValues(expansionInfo, macroEvaluator) { + when (it) { + is StringValue -> sb.append(it.value) + is SymbolValue -> sb.append(it.value.assumeText()) + is DataModelValue -> throw IonException("Invalid argument type for 'make_symbol': ${it.type}") + is FieldName -> TODO("Unreachable. We shouldn't be able to get here without first encountering a StructValue.") + } + true // continue expansion + } + return SymbolValue(value = newSymbolToken(sb.toString())) + } + } + + private object MakeBlobExpander : Expander { + override fun nextExpression(expansionInfo: ExpansionInfo, macroEvaluator: MacroEvaluator): Expression { + // TODO: See if we can create a `ByteArrayView` or similar class based on the principles of a Persistent + // Collection in order to minimize copying (and therefore allocation). + val baos = ByteArrayOutputStream() + readExpandedArgumentValues(expansionInfo, macroEvaluator) { + when (it) { + is LobValue -> baos.write(it.value) + is DataModelValue -> throw IonException("Invalid argument type for 'make_blob': ${it.type}") + is FieldName -> TODO("Unreachable. We shouldn't be able to get here without first encountering a StructValue.") + } + true // continue expansion + } + return BlobValue(value = baos.toByteArray()) + } + } + + private object MakeDecimalExpander : Expander { + override fun nextExpression(expansionInfo: ExpansionInfo, macroEvaluator: MacroEvaluator): Expression { + val coefficient = readExactlyOneExpandedArgumentValue(expansionInfo, macroEvaluator, SystemMacro.MakeDecimal.signature[0].variableName) + .let { it as? IntValue } + ?.bigIntegerValue + ?: throw IonException("Coefficient must be an integer") + val exponent = readExactlyOneExpandedArgumentValue(expansionInfo, macroEvaluator, SystemMacro.MakeDecimal.signature[1].variableName) + .let { it as? IntValue } + ?.bigIntegerValue + ?: throw IonException("Exponent must be an integer") + + return DecimalValue(value = BigDecimal(coefficient, -1 * exponent.intValueExact())) + } + } + + private object MakeTimestampExpander : Expander { + private fun readOptionalIntArg( + signatureIndex: Int, + expansionInfo: ExpansionInfo, + macroEvaluator: MacroEvaluator + ): Int? { + if (expansionInfo.i == expansionInfo.endExclusive) return null + val parameterName = SystemMacro.MakeTimestamp.signature[signatureIndex].variableName + val arg = readZeroOrOneExpandedArgumentValues(expansionInfo, macroEvaluator, parameterName) + return arg?.let { + it as? IntValue ?: throw IonException("$parameterName must be an integer") + it.longValue.toInt() + } + } + + override fun nextExpression(expansionInfo: ExpansionInfo, macroEvaluator: MacroEvaluator): Expression { + val year = readExactlyOneExpandedArgumentValue(expansionInfo, macroEvaluator, SystemMacro.MakeTimestamp.signature[0].variableName) + .let { it as? IntValue ?: throw IonException("year must be an integer") } + .longValue.toInt() + val month = readOptionalIntArg(1, expansionInfo, macroEvaluator) + val day = readOptionalIntArg(2, expansionInfo, macroEvaluator) + val hour = readOptionalIntArg(3, expansionInfo, macroEvaluator) + val minute = readOptionalIntArg(4, expansionInfo, macroEvaluator) + val second = if (expansionInfo.i == expansionInfo.endExclusive) { + null + } else when (val arg = readZeroOrOneExpandedArgumentValues(expansionInfo, macroEvaluator, SystemMacro.MakeTimestamp.signature[5].variableName)) { + null -> null + is DecimalValue -> arg.value + is IntValue -> arg.longValue.toBigDecimal() + else -> throw IonException("second must be a decimal") + } + val offsetMinutes = readOptionalIntArg(6, expansionInfo, macroEvaluator) + + try { + val ts = if (second != null) { + month ?: throw IonException("make_timestamp: month is required when second is present") + day ?: throw IonException("make_timestamp: day is required when second is present") + hour ?: throw IonException("make_timestamp: hour is required when second is present") + minute ?: throw IonException("make_timestamp: minute is required when second is present") + Timestamp.forSecond(year, month, day, hour, minute, second, offsetMinutes) + } else if (minute != null) { + month ?: throw IonException("make_timestamp: month is required when minute is present") + day ?: throw IonException("make_timestamp: day is required when minute is present") + hour ?: throw IonException("make_timestamp: hour is required when minute is present") + Timestamp.forMinute(year, month, day, hour, minute, offsetMinutes) + } else if (hour != null) { + throw IonException("make_timestamp: minute is required when hour is present") + } else { + if (offsetMinutes != null) throw IonException("make_timestamp: offset_minutes is prohibited when hours and minute are not present") + if (day != null) { + month ?: throw IonException("make_timestamp: month is required when day is present") + Timestamp.forDay(year, month, day) + } else if (month != null) { + Timestamp.forMonth(year, month) + } else { + Timestamp.forYear(year) + } + } + return TimestampValue(value = ts) + } catch (e: IllegalArgumentException) { + throw IonException(e.message) + } + } + } + + private object SumExpander : Expander { + override fun nextExpression(expansionInfo: ExpansionInfo, macroEvaluator: MacroEvaluator): Expression { + val a = readExactlyOneExpandedArgumentValue(expansionInfo, macroEvaluator, "a") + val b = readExactlyOneExpandedArgumentValue(expansionInfo, macroEvaluator, "b") + if (a !is IntValue || b !is IntValue) throw IonException("operands of sum must be integers") + // TODO: Use LongIntValue when possible. + return BigIntValue(value = a.bigIntegerValue + b.bigIntegerValue) + } + } + + private object DeltaExpander : Expander { + override fun nextExpression(expansionInfo: ExpansionInfo, macroEvaluator: MacroEvaluator): Expression { + // TODO: Optimize to use Long and only fallback to BigInteger if needed. + // TODO: Optimize for lazy evaluation + if (expansionInfo.additionalState == null) { + val position = expansionInfo.i + var runningTotal = BigInteger.ZERO + val values = ArrayDeque() + readExpandedArgumentValues(expansionInfo, macroEvaluator) { + when (it) { + is IntValue -> { + runningTotal += it.bigIntegerValue + values += runningTotal + } + is DataModelValue -> throw IonException("Invalid argument type for 'delta': ${it.type}") + is FieldName -> TODO("Unreachable. We shouldn't be able to get here without first encountering a StructValue.") + } + true // continue expansion + } + + if (values.isEmpty()) { + // Return fake, empty expression group + return ExpressionGroup(position, position) + } + + expansionInfo.additionalState = values + expansionInfo.i = position + } + + val valueQueue = expansionInfo.additionalState as ArrayDeque + val nextValue = valueQueue.removeFirst() + if (valueQueue.isEmpty()) { + expansionInfo.i = expansionInfo.endExclusive + } + return BigIntValue(value = nextValue) + } + } + + private enum class IfExpander(private val minInclusive: Int, private val maxExclusive: Int) : Expander { + IF_NONE(0, 1), + IF_SOME(1, -1), + IF_SINGLE(1, 2), + IF_MULTI(2, -1), + ; + + override fun nextExpression(expansionInfo: ExpansionInfo, macroEvaluator: MacroEvaluator): Expression { + var n = 0 + readExpandedArgumentValues(expansionInfo, macroEvaluator) { + n++ + // If there's no max, then we'll only continue the expansion if we haven't yet reached the min + // If there is a max, then we'll continue the expansion until we reach the max + if (maxExclusive < 0) n < minInclusive else n < maxExclusive + } + val isConditionTrue = n >= minInclusive && (maxExclusive < 0 || n < maxExclusive) + // Save the current expansion index. This is the index of the "true" expression + val trueExpressionPosition = expansionInfo.i + // Now we are positioned on the "false" expression + expansionInfo.nextSourceExpression() + if (isConditionTrue) { + // If the condition is true, we can set the EXCLUSIVE END of this expansion to the position of the + // "false" expression, and then we reset the current index to the position of the "true" expression. + expansionInfo.endExclusive = expansionInfo.i + expansionInfo.i = trueExpressionPosition + } + return expansionInfo.nextSourceExpression() + } + } + + private object RepeatExpander : Expander { + /** + * Initializes the counter of the number of iterations remaining. + * [ExpansionInfo.additionalState] is the number of iterations remaining. Once initialized, it is always `Int`. + */ + private fun init(expansionInfo: ExpansionInfo, macroEvaluator: MacroEvaluator): Int { + val nExpression = readExactlyOneExpandedArgumentValue(expansionInfo, macroEvaluator, "n") + var iterationsRemaining = when (nExpression) { + is LongIntValue -> nExpression.value.toInt() + is BigIntValue -> { + if (nExpression.value.bitLength() >= Int.SIZE_BITS) { + throw IonException("ion-java does not support repeats of more than ${Int.MAX_VALUE}") + } + nExpression.value.intValueExact() + } + else -> throw IonException("The first argument of repeat must be a non-negative integer") + } + if (iterationsRemaining < 0) { + throw IonException("The first argument of repeat must be a non-negative integer") + } + // Decrement because we're starting the first iteration right away. + iterationsRemaining-- + expansionInfo.additionalState = iterationsRemaining + return iterationsRemaining + } + + override fun nextExpression(expansionInfo: ExpansionInfo, macroEvaluator: MacroEvaluator): Expression { + val repeatsRemainingAfterTheCurrentOne = expansionInfo.additionalState as? Int + ?: init(expansionInfo, macroEvaluator) + + if (repeatsRemainingAfterTheCurrentOne < 0) { + expansionInfo.nextSourceExpression() + return ExpressionGroup(0, 0) + } + + val repeatedExpressionIndex = expansionInfo.i + val next = readFirstExpandedArgumentValue(expansionInfo, macroEvaluator) + next ?: return ExpressionGroup(0, 0) + if (repeatsRemainingAfterTheCurrentOne > 0) { + expansionInfo.additionalState = repeatsRemainingAfterTheCurrentOne - 1 + expansionInfo.i = repeatedExpressionIndex + } + return next + } + } + + private object MakeFieldExpander : Expander { + // This is wrong! + override fun nextExpression(expansionInfo: ExpansionInfo, macroEvaluator: MacroEvaluator): Expression { + /** + * Uses [ExpansionInfo.additionalState] to track whether the expansion is on the field name or value. + * If unset, reads the field name. If set to 0, reads the field value. + */ + return when (expansionInfo.additionalState) { + // First time, get the field name + null -> { + val fieldName = readExactlyOneExpandedArgumentValue(expansionInfo, macroEvaluator, "field_name") + val fieldNameExpression = when (fieldName) { + is SymbolValue -> FieldName(fieldName.value) + else -> throw IonException("the first argument of make_field must expand to exactly one symbol value") + } + expansionInfo.additionalState = 0 + fieldNameExpression + } + 0 -> { + val value = readExactlyOneExpandedArgumentValue(expansionInfo, macroEvaluator, "value") + expansionInfo.additionalState = 1 + value + } + else -> throw IllegalStateException("Unreachable") + } + } + } + + private enum class ExpansionKind(val expander: Expander) { + Container(SimpleExpander), + TemplateBody(SimpleExpander), + Values(SimpleExpander), + Annotate(AnnotateExpander), + MakeString(MakeStringExpander), + MakeSymbol(MakeSymbolExpander), + MakeBlob(MakeBlobExpander), + MakeDecimal(MakeDecimalExpander), + MakeTimestamp(MakeTimestampExpander), + MakeField(MakeFieldExpander), + Sum(SumExpander), + Delta(DeltaExpander), + IfNone(IfExpander.IF_NONE), + IfSome(IfExpander.IF_SOME), + IfSingle(IfExpander.IF_SINGLE), + IfMulti(IfExpander.IF_MULTI), + Repeat(RepeatExpander), + ; + + companion object { + @JvmStatic + fun forSystemMacro(macro: SystemMacro): ExpansionKind { + return when (macro) { + SystemMacro.IfNone -> IfNone + SystemMacro.IfSome -> IfSome + SystemMacro.IfSingle -> IfSingle + SystemMacro.IfMulti -> IfMulti + SystemMacro.None -> Values // "none" takes no args, so we can treat it as an empty "values" expansion + SystemMacro.Values -> Values + SystemMacro.Annotate -> Annotate + SystemMacro.MakeString -> MakeString + SystemMacro.MakeSymbol -> MakeSymbol + SystemMacro.MakeDecimal -> MakeDecimal + SystemMacro.MakeTimestamp -> MakeTimestamp + SystemMacro.MakeBlob -> MakeBlob + SystemMacro.MakeField -> MakeField + SystemMacro.Repeat -> Repeat + SystemMacro.Sum -> Sum + SystemMacro.Delta -> Delta + else -> if (macro.body != null) { + TemplateBody + } else { + TODO("System macro ${macro.macroName} needs either a template body or a hard-coded expander.") + } + } + } + } + } + + private inner class ExpansionInfo : Iterator { + /** The [ExpansionKind]. */ + @JvmField var expansionKind: ExpansionKind = ExpansionKind.Values + /** + * The evaluation [Environment]—i.e. variable bindings. + */ + @JvmField var environment: Environment? = null + /** + * The [Expression]s being expanded. This MUST be the original list, not a sublist because + * (a) we don't want to be allocating new sublists all the time, and (b) the + * start and end indices of the expressions may be incorrect if a sublist is taken. + */ + @JvmField var expressions: List? = null + // /** Start of [expressions] that are applicable for this [ExpansionInfo] */ + // TODO: Do we actually need this for anything other than debugging? + // @JvmField var startInclusive: Int = 0 + /** End of [expressions] that are applicable for this [ExpansionInfo] */ + @JvmField var endExclusive: Int = 0 + /** Current position within [expressions] of this expansion */ + @JvmField var i: Int = 0 + + /** + * Field for storing any additional state required in an expander. + * + * TODO: Once all system macros are implemented, see if we can make this an int instead + * + * There is currently some lost space in ExpansionInfo. We can add one more `additionalState` field without + * actually increasing the object size. + */ + @JvmField + var additionalState: Any? = null + + /** Checks if this expansion can produce any more expressions */ + override fun hasNext(): Boolean = i < endExclusive + + /** Returns the next expression from this expansion */ + override fun next(): Expression { + return expansionKind.expander.nextExpression(this, this@MacroEvaluator) + } + + /** + * Returns the next expression from the input expressions ([expressions]) of this Expansion. + * This is intended for use in [Expander] implementations. + */ + fun nextSourceExpression(): Expression { + val next = expressions!![i] + i++ + if (next is HasStartAndEnd) i = next.endExclusive + return next + } + + override fun toString() = """ + |ExpansionInfo( + | expansionKind: $expansionKind, + | environment: $environment, + | expressions: [ + | ${expressions!!.joinToString(",\n| ") { it.toString() } } + | ], + | endExclusive: $endExclusive, + | i: $i, + |) + """.trimMargin() + } + + private val expansionStack = _Private_RecyclingStack(8) { ExpansionInfo() } + + private var currentExpr: DataModelExpression? = null + + /** + * Initialize the macro evaluator with an E-Expression. + */ + fun initExpansion(encodingExpressions: List) { + // Pretend that the whole thing is a "values" expansion so that we don't have to care about what + // the first expression actually is. + pushExpansion(ExpansionKind.Values, 0, encodingExpressions.size, Environment.EMPTY, encodingExpressions) + } + + /** + * Returns the e-expression argument expressions that this MacroEvaluator would evaluate. + */ + fun getArguments(): List { + return expansionStack.peek().expressions!! + } + + /** + * Evaluate the macro expansion until the next [DataModelExpression] can be returned. + * Returns null if at the end of a container or at the end of the expansion. + */ + fun expandNext(): DataModelExpression? { + return expandNext(-1) + } + + /** + * Evaluate the macro expansion until the next [DataModelExpression] can be returned. + * Returns null if at the end of a container or at the end of the expansion. + * + * Treats [minDepth] as the minimum expansion depth allowed—i.e. it will not step out any further than + * [minDepth]. This is used for built-in macros when they need to delegate something to the macro evaluator + * but don't want the macro evaluator to step out beyond the invoking built-in macro. + */ + private fun expandNext(minDepth: Int): DataModelExpression? { + + /* ==== Evaluation Algorithm ==== + 01 | Check the top expansion in the expansion stack + 02 | If there is none, return null (macro expansion is over) + 03 | If there is one, but it has no more expressions... + 04 | If the expansion kind is a data-model container type, return null (user needs to step out) + 05 | If the expansion kind is not a data-model container type, automatically step out + 06 | If there is one, and it has more expressions... + 07 | If it is a scalar, return that + 08 | If it is a container, return that (user needs to step in) + 09 | If it is a variable, using parent Environment, push variable ExpansionInfo onto the stack and goto 1 + 10 | If it is an expression group, using current Environment, push expression group ExpansionInfo onto the stack and goto 1 + 11 | If it is a macro invocation, create updated Environment, push ExpansionInfo onto stack, and goto 1 + 12 | If it is an e-expression, using empty Environment, push ExpansionInfo onto stack and goto 1 + */ + + currentExpr = null + while (!expansionStack.isEmpty) { + if (!expansionStack.peek().hasNext()) { + if (expansionStack.peek().expansionKind == ExpansionKind.Container) { + // End of container. User needs to step out. + // TODO: Do we need something to distinguish End-Of-Expansion from End-Of-Container? + return null + } else { + // End of a macro invocation or something else that is not part of the data model, + // so we seamlessly close this out and continue with the parent expansion. + if (expansionStack.size() > minDepth) { + expansionStack.pop() + continue + } else { + // End of expansion for something internal. + return null + } + } + } + when (val currentExpr = expansionStack.peek().next()) { + Placeholder -> TODO("unreachable") + is MacroInvocation -> pushTdlMacroExpansion(currentExpr) + is EExpression -> pushEExpressionExpansion(currentExpr) + is VariableRef -> pushVariableExpansion(currentExpr) + is ExpressionGroup -> pushExpressionGroup(currentExpr) + is DataModelExpression -> { + this.currentExpr = currentExpr + break + } + } + } + return currentExpr + } + + /** + * Steps out of the current [DataModelContainer]. + */ + fun stepOut() { + // step out of anything we find until we have stepped out of a container. + while (expansionStack.pop()?.expansionKind != ExpansionKind.Container) { + if (expansionStack.isEmpty) throw IonException("Nothing to step out of.") + } + } + + /** + * Steps in to the current [DataModelContainer]. + * Throws [IonException] if not positioned on a container. + */ + fun stepIn() { + val expression = requireNotNull(currentExpr) { "Not positioned on a value" } + expression as? DataModelContainer ?: throw IonException("Not positioned on a container.") + val currentExpansion = expansionStack.peek() + pushExpansion(ExpansionKind.Container, expression.startInclusive, expression.endExclusive, currentExpansion.environment!!, currentExpansion.expressions!!) + } + + /** + * Push a variable onto the expansion stack. + * + * Variables are a little bit different from other expansions. There is only one (top) expression + * in a variable expansion. It can be another variable, a value, a macro invocation, or an expression group. + * Furthermore, the current environment becomes the "source expressions" for the expansion, and the + * parent of the current environment becomes the environment in which the variable is expanded (thus + * maintaining the proper scope of variables). + */ + private fun pushVariableExpansion(expression: VariableRef) { + val currentEnvironment = expansionStack.peek().environment ?: Environment.EMPTY + val argumentExpressionIndex = currentEnvironment.argumentIndices[expression.signatureIndex] + + // Argument was elided; don't push anything so that we skip the empty expansion + if (argumentExpressionIndex < 0) return + + pushExpansion( + expansionKind = ExpansionKind.Values, + argsStartInclusive = argumentExpressionIndex, + // There can only be one expression for an argument. It's either a value, macro, or expression group. + argsEndExclusive = argumentExpressionIndex + 1, + environment = currentEnvironment.parentEnvironment ?: Environment.EMPTY, + expressions = currentEnvironment.arguments + ) + } + + private fun pushExpressionGroup(expr: ExpressionGroup) { + val currentExpansion = expansionStack.peek() + pushExpansion(ExpansionKind.Values, expr.startInclusive, expr.endExclusive, currentExpansion.environment!!, currentExpansion.expressions!!) + } + + /** + * Push a macro from a TDL macro invocation, found in the current expansion, to the expansion stack + */ + private fun pushTdlMacroExpansion(expression: MacroInvocation) { + val currentExpansion = expansionStack.peek() + pushMacro( + macro = expression.macro, + argsStartInclusive = expression.startInclusive, + argsEndExclusive = expression.endExclusive, + currentExpansion.environment!!, + encodingExpressions = currentExpansion.expressions!!, + ) + } + + /** + * Push a macro from the e-expression [expression] onto the expansionStack, handling concerns such as + * looking up the macro reference, setting up the environment, etc. + */ + private fun pushEExpressionExpansion(expression: EExpression) { + val currentExpansion = expansionStack.peek() + pushMacro( + macro = expression.macro, + argsStartInclusive = expression.startInclusive, + argsEndExclusive = expression.endExclusive, + environment = Environment.EMPTY, + encodingExpressions = currentExpansion.expressions!!, + ) + } + + /** + * Pushes a macro invocation to the expansionStack + */ + private fun pushMacro( + macro: Macro, + argsStartInclusive: Int, + argsEndExclusive: Int, + environment: Environment, + encodingExpressions: List, + ) { + val argIndices = calculateArgumentIndices(macro, encodingExpressions, argsStartInclusive, argsEndExclusive) + val templateBody = macro.body + if (templateBody == null) { + // If there's no template body, it must be a system macro. + macro as SystemMacro + val kind = ExpansionKind.forSystemMacro(macro) + pushExpansion(kind, argsStartInclusive, argsEndExclusive, environment, encodingExpressions) + } else { + pushExpansion( + ExpansionKind.TemplateBody, + argsStartInclusive = 0, + argsEndExclusive = templateBody.size, + expressions = templateBody, + environment = environment.createChild(encodingExpressions, argIndices) + ) + } + } + + /** + * Pushes an expansion to the expansion stack. + */ + private fun pushExpansion( + expansionKind: ExpansionKind, + argsStartInclusive: Int, + argsEndExclusive: Int, + environment: Environment, + expressions: List, + ) { + expansionStack.push { + it.expansionKind = expansionKind + it.environment = environment + it.expressions = expressions + it.i = argsStartInclusive + it.endExclusive = argsEndExclusive + it.additionalState = null + } + } + + /** + * Given a [Macro] (or more specifically, its signature), calculates the position of each of its arguments + * in [encodingExpressions]. The result is a list that can be used to map from a parameter's + * signature index to the encoding expression index. Any trailing, optional arguments that are + * elided have a value of -1. + * + * This function also validates that the correct number of parameters are present. If there are + * too many parameters or too few parameters, this will throw [IonException]. + */ + private fun calculateArgumentIndices( + macro: Macro, + encodingExpressions: List, + argsStartInclusive: Int, + argsEndExclusive: Int + ): List { + // TODO: For TDL macro invocations, see if we can calculate this during the "compile" step. + var numArgs = 0 + val argsIndices = IntArray(macro.signature.size) + var currentArgIndex = argsStartInclusive + for (p in macro.signature) { + if (currentArgIndex >= argsEndExclusive) { + if (!p.cardinality.canBeVoid) throw IonException("No value provided for parameter ${p.variableName}") + // Elided rest parameter. + argsIndices[numArgs] = -1 + } else { + argsIndices[numArgs] = currentArgIndex + currentArgIndex = when (val expr = encodingExpressions[currentArgIndex]) { + is HasStartAndEnd -> expr.endExclusive + else -> currentArgIndex + 1 + } + } + numArgs++ + } + while (currentArgIndex < argsEndExclusive) { + currentArgIndex = when (val expr = encodingExpressions[currentArgIndex]) { + is HasStartAndEnd -> expr.endExclusive + else -> currentArgIndex + 1 + } + numArgs++ + } + if (numArgs > macro.signature.size) { + throw IonException("Too many arguments. Expected ${macro.signature.size}, but found $numArgs") + } + return argsIndices.toList() + } +} diff --git a/src/main/java/com/amazon/ion/impl/macro/MacroEvaluatorAsIonReader.kt b/src/main/java/com/amazon/ion/impl/macro/MacroEvaluatorAsIonReader.kt new file mode 100644 index 0000000000..4945e5c47a --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/MacroEvaluatorAsIonReader.kt @@ -0,0 +1,200 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +import com.amazon.ion.* +import com.amazon.ion.impl.* +import java.math.BigDecimal +import java.math.BigInteger +import java.util.* + +/** + * This class is an example of how we might wrap the macro evaluator's [Expression] model, adapting it to an [IonReader]. + * + * TODO: + * - Consider merging this with [MacroEvaluator]. + * - Error handling is inconsistent with other [IonReader] implementations + * - Testing + */ +class MacroEvaluatorAsIonReader( + private val evaluator: MacroEvaluator, +) : IonReader { + + private class ContainerInfo { + @JvmField var currentFieldName: Expression.FieldName? = null + @JvmField var container: Expression.DataModelContainer? = null + } + private val containerStack = _Private_RecyclingStack(8) { ContainerInfo() } + + private var currentFieldName: Expression.FieldName? = null + private var currentValueExpression: Expression.DataModelValue? = null + + private var queuedFieldName: Expression.FieldName? = null + private var queuedValueExpression: Expression.DataModelValue? = null + + private fun queueNext() { + queuedValueExpression = null + while (queuedValueExpression == null) { + val nextCandidate = evaluator.expandNext() ?: return + when (nextCandidate) { + is Expression.FieldName -> queuedFieldName = nextCandidate + is Expression.DataModelValue -> queuedValueExpression = nextCandidate + } + } + } + + @Deprecated("Deprecated in Java") + override fun hasNext(): Boolean { + if (queuedValueExpression == null) queueNext() + return queuedValueExpression != null + } + + override fun next(): IonType? { + if (!hasNext()) { + currentValueExpression = null + return null + } + currentValueExpression = queuedValueExpression + currentFieldName = queuedFieldName + queuedValueExpression = null + return getType() + } + + /** + * Transcodes the e-expression argument expressions provided to this MacroEvaluator + * without evaluation. + * @param writer the writer to which the expressions will be transcoded. + */ + fun transcodeArgumentsTo(writer: MacroAwareIonWriter) { + var index = 0 + val arguments: List = evaluator.getArguments() + val numberOfContainerEndsAtExpressionIndex = IntArray(arguments.size + 1) + + while (index < arguments.size) { + for (i in 0 until numberOfContainerEndsAtExpressionIndex[index]) { + writer.stepOut() + } + when (val argument = arguments[index]) { + is Expression.DataModelContainer -> { + if (hasAnnotations()) { + writer.setTypeAnnotationSymbols(*typeAnnotationSymbols!!) + } + writer.stepIn(argument.type) + numberOfContainerEndsAtExpressionIndex[argument.endExclusive]++ + } + is Expression.DataModelValue -> { + currentValueExpression = argument + writer.writeValue(this) + } + is Expression.FieldName -> { + queuedFieldName = argument + writer.setFieldNameSymbol(argument.value) + } + is Expression.EExpression -> { + writer.startMacro(argument.macro) + numberOfContainerEndsAtExpressionIndex[argument.endExclusive]++ + } + is Expression.ExpressionGroup -> { + writer.startExpressionGroup() + numberOfContainerEndsAtExpressionIndex[argument.endExclusive]++ + } + else -> throw IllegalStateException("Unexpected branch") + } + index++ + } + for (i in 0 until numberOfContainerEndsAtExpressionIndex[index]) { + writer.stepOut() + } + } + + override fun stepIn() { + // This is essentially a no-op for Lists and SExps + containerStack.peek()?.currentFieldName = this.currentFieldName + + val containerToStepInto = currentValueExpression + evaluator.stepIn() + containerStack.push { + it.container = containerToStepInto as Expression.DataModelContainer + it.currentFieldName = null + } + currentFieldName = null + currentValueExpression = null + queuedFieldName = null + queuedValueExpression = null + } + + override fun stepOut() { + evaluator.stepOut() + containerStack.pop() + // This is essentially a no-op for Lists and SExps + currentFieldName = containerStack.peek()?.currentFieldName + currentValueExpression = null // Must call `next()` to get the next value + queuedFieldName = null + queuedValueExpression = null + } + + override fun close() { /* Nothing to do (yet) */ } + override fun asFacet(facetType: Class?): Nothing? = null + override fun getDepth(): Int = containerStack.size() + override fun getSymbolTable(): SymbolTable? = null + + override fun getType(): IonType? = currentValueExpression?.type + + fun hasAnnotations(): Boolean = currentValueExpression != null && currentValueExpression!!.annotations.isNotEmpty() + + override fun getTypeAnnotations(): Array? = currentValueExpression?.annotations?.let { Array(it.size) { i -> it[i].assumeText() } } + override fun getTypeAnnotationSymbols(): Array? = currentValueExpression?.annotations?.toTypedArray() + // TODO: Make this into an iterator that unwraps the SymbolTokens as it goes instead of allocating a new list + override fun iterateTypeAnnotations(): MutableIterator { + return currentValueExpression?.annotations?.mapTo(mutableListOf()) { it.assumeText() }?.iterator() + ?: return Collections.emptyIterator() + } + + override fun isInStruct(): Boolean = containerStack.peek()?.container?.type == IonType.STRUCT + + override fun getFieldId(): Int = currentFieldName?.value?.sid ?: 0 + override fun getFieldName(): String? = currentFieldName?.value?.text + override fun getFieldNameSymbol(): SymbolToken? = currentFieldName?.value + + override fun isNullValue(): Boolean = currentValueExpression is Expression.NullValue + override fun booleanValue(): Boolean = (currentValueExpression as Expression.BoolValue).value + + override fun getIntegerSize(): IntegerSize { + // TODO: Make this more efficient, more precise + return when (val intExpression = currentValueExpression as Expression.IntValue) { + is Expression.LongIntValue -> if (intExpression.value.toInt().toLong() == intExpression.value) { + IntegerSize.INT + } else { + IntegerSize.LONG + } + is Expression.BigIntValue -> IntegerSize.BIG_INTEGER + } + } + + /** TODO: Throw on data loss */ + override fun intValue(): Int = longValue().toInt() + + override fun longValue(): Long = when (val intExpression = currentValueExpression as Expression.IntValue) { + is Expression.LongIntValue -> intExpression.value + is Expression.BigIntValue -> intExpression.value.longValueExact() + } + + override fun bigIntegerValue(): BigInteger = when (val intExpression = currentValueExpression as Expression.IntValue) { + is Expression.LongIntValue -> intExpression.value.toBigInteger() + is Expression.BigIntValue -> intExpression.value + } + + override fun doubleValue(): Double = (currentValueExpression as Expression.FloatValue).value + override fun bigDecimalValue(): BigDecimal = (currentValueExpression as Expression.DecimalValue).value + override fun decimalValue(): Decimal = Decimal.valueOf(bigDecimalValue()) + override fun timestampValue(): Timestamp = (currentValueExpression as Expression.TimestampValue).value + override fun dateValue(): Date = timestampValue().dateValue() + override fun stringValue(): String = (currentValueExpression as Expression.TextValue).stringValue + override fun symbolValue(): SymbolToken = (currentValueExpression as Expression.SymbolValue).value + override fun byteSize(): Int = (currentValueExpression as Expression.LobValue).value.size + override fun newBytes(): ByteArray = (currentValueExpression as Expression.LobValue).value.copyOf() + + override fun getBytes(buffer: ByteArray?, offset: Int, len: Int): Int { + TODO("Not yet implemented") + } +} diff --git a/src/main/java/com/amazon/ion/impl/macro/MacroMatcher.java b/src/main/java/com/amazon/ion/impl/macro/MacroMatcher.java new file mode 100644 index 0000000000..7bf2edcfcd --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/MacroMatcher.java @@ -0,0 +1,257 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro; + +import com.amazon.ion.IonReader; +import com.amazon.ion.IonType; +import com.amazon.ion.IonValue; +import com.amazon.ion.system.IonReaderBuilder; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Iterator; +import java.util.function.Function; + +/** + * Matches source data to macro definitions. + * TODO not supported yet: nested invocations + */ +public class MacroMatcher { + + private final TemplateMacro macro; + private final String name; + + /** + * Creates a matcher for the given TDL text. + * @param macroText the TDL text that defines a single macro. + * @param macroTable the macro table's mapping function. + */ + public MacroMatcher(String macroText, Function macroTable) { + try (IonReader macroReader = IonReaderBuilder.standard().build(macroText)) { + MacroCompiler compiler = new MacroCompiler(macroTable::apply, new ReaderAdapterIonReader(macroReader)); + macroReader.next(); + macro = compiler.compileMacro(); + name = compiler.getMacroName(); + } catch (IOException e) { + throw new IllegalStateException(e); + } + } + + /** + * Creates a matcher for the macro on which the given reader is positioned. + * @param macroReader the reader positioned on a TDL definition of a single macro. + * @param macroTable the macro table's mapping function. + */ + public MacroMatcher(IonReader macroReader, Function macroTable) { + MacroCompiler compiler = new MacroCompiler(macroTable::apply, new ReaderAdapterIonReader(macroReader)); + macro = compiler.compileMacro(); + name = compiler.getMacroName(); + } + + /** + * @return the name of the macro. + */ + public String name() { + return name; + } + + /** + * @return the macro. + */ + public TemplateMacro macro() { + return macro; + } + + private T requireExpressionType(Expression.TemplateBodyExpression expression, Class requiredType) { + if (requiredType.isAssignableFrom(expression.getClass())) { + return requiredType.cast(expression); + } + return null; + } + + /** + * Attempts to match the value on which the reader is positioned to this matcher's macro by iterating over the value + * and the macro body in lockstep until either an incompatibility is found (no match) or the value and body end + * (match). + * @param reader a reader positioned on a value to attempt to match to this matcher's macro. + * @return true if the value matches this matcher's macro. + */ + public boolean match(IonReader reader) { + Iterator bodyIterator = macro.getBody().iterator(); + int index = 0; + int[] numberOfContainerEndsAtExpressionIndex = new int[macro.getBody().size() + 1]; + while (true) { + for (int i = 0; i < numberOfContainerEndsAtExpressionIndex[index]; i++) { + if (reader.next() != null) { + return false; + } + reader.stepOut(); + } + IonType type = reader.next(); + boolean hasNextExpression = bodyIterator.hasNext(); + Expression.TemplateBodyExpression expression = null; + if (hasNextExpression) { + expression = bodyIterator.next(); + } else if (type != null) { + return false; + } + if (type == null) { + if (expression instanceof Expression.FieldName) { + expression = bodyIterator.next(); + } + if (expression instanceof Expression.VariableRef) { + if (macro.getSignature().get(((Expression.VariableRef) expression).getSignatureIndex()).getCardinality().canBeVoid) { + // This is a trailing optional argument that is omitted in the source data, which is still + // considered compatible with the signature. + continue; + } + return false; + } else if (hasNextExpression) { + return false; + } + break; + } + index++; + if (expression instanceof Expression.FieldName) { + if (!((Expression.FieldName) expression).getValue().assumeText().equals(reader.getFieldName())) { + return false; + } + if (!bodyIterator.hasNext()) { + throw new IllegalStateException("dangling field name"); + } + expression = bodyIterator.next(); + index++; + } + if (expression instanceof Expression.VariableRef) { + // For now, a variable matches any value at the current position. + // TODO check cardinality and encoding type. + continue; + } + if (expression instanceof Expression.ExpressionGroup) { + throw new UnsupportedOperationException("TODO: handle expression groups"); + } + if (expression instanceof Expression.MacroInvocation) { + throw new UnsupportedOperationException("TODO: handle nested invocations"); + } + if (expression instanceof Expression.DataModelValue) { + Expression.DataModelValue dataModelValueExpression = (Expression.DataModelValue) expression; + if (!Arrays.asList(reader.getTypeAnnotationSymbols()).equals(dataModelValueExpression.getAnnotations())) { + return false; + } + } + switch (type) { + case NULL: + Expression.NullValue nullValue = requireExpressionType(expression, Expression.NullValue.class); + if (nullValue == null) { + return false; + } + break; + case BOOL: + Expression.BoolValue boolValue = requireExpressionType(expression, Expression.BoolValue.class); + if (boolValue == null || (boolValue.getValue() != reader.booleanValue())) { + return false; + } + break; + case INT: + switch (reader.getIntegerSize()) { + case INT: + case LONG: + Expression.LongIntValue intValue = requireExpressionType(expression, Expression.LongIntValue.class); + if (intValue == null || (intValue.getValue() != reader.longValue())) { + return false; + } + break; + case BIG_INTEGER: + Expression.BigIntValue bigIntValue = requireExpressionType(expression, Expression.BigIntValue.class); + if (bigIntValue == null || (!bigIntValue.getBigIntegerValue().equals(reader.bigIntegerValue()))) { + return false; + } + break; + } + break; + case FLOAT: + Expression.FloatValue floatValue = requireExpressionType(expression, Expression.FloatValue.class); + if (floatValue == null || (Double.compare(floatValue.getValue(), reader.doubleValue()) != 0)) { + return false; + } + break; + case DECIMAL: + Expression.DecimalValue decimalValue = requireExpressionType(expression, Expression.DecimalValue.class); + if (decimalValue == null || (!decimalValue.getValue().equals(reader.bigDecimalValue()))) { + return false; + } + break; + case TIMESTAMP: + Expression.TimestampValue timestampValue = requireExpressionType(expression, Expression.TimestampValue.class); + if (timestampValue == null || (!timestampValue.getValue().equals(reader.timestampValue()))) { + return false; + } + break; + case SYMBOL: + Expression.SymbolValue symbolValue = requireExpressionType(expression, Expression.SymbolValue.class); + if (symbolValue == null || (!symbolValue.getValue().assumeText().equals(reader.symbolValue().assumeText()))) { + return false; + } + break; + case STRING: + Expression.StringValue stringValue = requireExpressionType(expression, Expression.StringValue.class); + if (stringValue == null || (!stringValue.getValue().equals(reader.stringValue()))) { + return false; + } + break; + case CLOB: + Expression.ClobValue clobValue = requireExpressionType(expression, Expression.ClobValue.class); + if (clobValue == null || (!Arrays.equals(clobValue.getValue(), reader.newBytes()))) { + return false; + } + break; + case BLOB: + Expression.BlobValue blobValue = requireExpressionType(expression, Expression.BlobValue.class); + if (blobValue == null || (!Arrays.equals(blobValue.getValue(), reader.newBytes()))) { + return false; + } + break; + case LIST: + reader.stepIn(); + Expression.ListValue listValue = requireExpressionType(expression, Expression.ListValue.class); + if (listValue == null) { + return false; + } + numberOfContainerEndsAtExpressionIndex[listValue.getEndExclusive()]++; + break; + case SEXP: + reader.stepIn(); + Expression.SExpValue sexpValue = requireExpressionType(expression, Expression.SExpValue.class); + if (sexpValue == null) { + return false; + } + numberOfContainerEndsAtExpressionIndex[sexpValue.getEndExclusive()]++; + break; + case STRUCT: + reader.stepIn(); + Expression.StructValue structValue = requireExpressionType(expression, Expression.StructValue.class); + if (structValue == null) { + return false; + } + numberOfContainerEndsAtExpressionIndex[structValue.getEndExclusive()]++; + break; + case DATAGRAM: + throw new IllegalStateException(); + } + } + return true; + } + + /** + * @see #match(IonReader) + * @param value the value to attempt to match. + * @return true if the value matches this matcher's macro. + */ + public boolean match(IonValue value) { + try (IonReader domReader = IonReaderBuilder.standard().build(value)) { + return match(domReader); + } catch (IOException e) { + throw new IllegalStateException(e); + } + } +} diff --git a/src/main/java/com/amazon/ion/impl/macro/MacroRef.kt b/src/main/java/com/amazon/ion/impl/macro/MacroRef.kt new file mode 100644 index 0000000000..9fd669a54f --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/MacroRef.kt @@ -0,0 +1,22 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +/** + * A reference to a particular macro, either by name or by template id. + */ +sealed interface MacroRef { + // TODO: See if these could be inline value classes + @JvmInline value class ByName(val name: String) : MacroRef + // Ion is not limited to Int.MAX_VALUE macro addresses, but Java collection sizes are limited by Int.MAX_VALUE + @JvmInline value class ById(val id: Int) : MacroRef + // TODO: Since system macros have an independent address space, do we need to have a `SystemById` variant? + + companion object { + @JvmStatic + fun byId(id: Int): MacroRef = ById(id) + + @JvmStatic + fun byName(name: String): MacroRef = ByName(name) + } +} diff --git a/src/main/java/com/amazon/ion/impl/macro/MacroTable.kt b/src/main/java/com/amazon/ion/impl/macro/MacroTable.kt new file mode 100644 index 0000000000..1ec4b9fc8e --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/MacroTable.kt @@ -0,0 +1,16 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +interface MacroTable { + fun get(address: MacroRef): Macro? + fun putAll(mappings: Map): Unit = throw UnsupportedOperationException() + + companion object { + @JvmStatic + @get:JvmName("empty") + val EMPTY = object : MacroTable { + override fun get(address: MacroRef): Macro? = null + } + } +} diff --git a/src/main/java/com/amazon/ion/impl/macro/MutableMacroTable.kt b/src/main/java/com/amazon/ion/impl/macro/MutableMacroTable.kt new file mode 100644 index 0000000000..62b3c0416f --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/MutableMacroTable.kt @@ -0,0 +1,18 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +// This needs modeling attention. +// - do we want to model an antecedent chain, or have a flat mutable table? +// - antecedent allows cheap reference to immutable system table or empty table +// - flat mutable table allows simpler implementation, GC of unneeded values, constant number of lookups +// - at some point we'll need the capability to communicate immutable encoding contexts to interpret Ion bytes, but +// this is neither here nor there +class MutableMacroTable(private val antecedent: MacroTable) : MacroTable { + private val macroTable = HashMap() + + override fun get(address: MacroRef): Macro? { + return macroTable[address] ?: antecedent.get(address) + } + override fun putAll(mappings: Map) = macroTable.putAll(mappings) +} diff --git a/src/main/java/com/amazon/ion/impl/macro/ParameterFactory.kt b/src/main/java/com/amazon/ion/impl/macro/ParameterFactory.kt new file mode 100644 index 0000000000..3678ddc528 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/ParameterFactory.kt @@ -0,0 +1,19 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +import com.amazon.ion.impl.macro.Macro.* + +/** + * Convenience functions for concisely creating [Macro.Parameter]s. + */ +object ParameterFactory { + @JvmStatic + fun zeroToManyTagged(name: String) = Parameter(name, ParameterEncoding.Tagged, ParameterCardinality.ZeroOrMore) + @JvmStatic + fun zeroOrOneTagged(name: String) = Parameter(name, ParameterEncoding.Tagged, ParameterCardinality.ZeroOrOne) + @JvmStatic + fun oneToManyTagged(name: String) = Parameter(name, ParameterEncoding.Tagged, ParameterCardinality.OneOrMore) + @JvmStatic + fun exactlyOneTagged(name: String) = Parameter(name, ParameterEncoding.Tagged, ParameterCardinality.ExactlyOne) +} diff --git a/src/main/java/com/amazon/ion/impl/macro/ReaderAdapter.kt b/src/main/java/com/amazon/ion/impl/macro/ReaderAdapter.kt new file mode 100644 index 0000000000..3a0715a55a --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/ReaderAdapter.kt @@ -0,0 +1,42 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +import com.amazon.ion.* +import java.math.BigDecimal +import java.math.BigInteger + +/** + * Provides a single abstraction over any Ion reader, e.g. [IonReader] or IonReaderContinuableCore. + * @see ReaderAdapterIonReader + * @see ReaderAdapterContinuable + */ +internal interface ReaderAdapter { + + fun hasAnnotations(): Boolean + fun fieldNameSymbol(): SymbolToken + fun encodingType(): IonType? + + /** Returns true if positioned on a value; false if at container or stream end. */ + fun nextValue(): Boolean + fun getDepth(): Int + fun stringValue(): String + fun intValue(): Int + fun decimalValue(): BigDecimal + fun ionDecimalValue(): Decimal + fun doubleValue(): Double + fun stepIntoContainer() + fun stepOutOfContainer() + fun getTypeAnnotationSymbols(): List + fun integerSize(): IntegerSize? + fun booleanValue(): Boolean + fun isNullValue(): Boolean + fun longValue(): Long + fun bigIntegerValue(): BigInteger + fun timestampValue(): Timestamp + fun newBytes(): ByteArray + fun symbolValue(): SymbolToken + fun getIntegerSize(): IntegerSize + fun getFieldNameSymbol(): SymbolToken + fun isInStruct(): Boolean +} diff --git a/src/main/java/com/amazon/ion/impl/macro/ReaderAdapterContinuable.kt b/src/main/java/com/amazon/ion/impl/macro/ReaderAdapterContinuable.kt new file mode 100644 index 0000000000..cece480f7b --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/ReaderAdapterContinuable.kt @@ -0,0 +1,134 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +import com.amazon.ion.* +import com.amazon.ion.impl.* +import java.math.BigDecimal +import java.math.BigInteger + +/** + * A [ReaderAdapter] that wraps an [IonReaderContinuableCore]. + */ +internal class ReaderAdapterContinuable(val reader: IonReaderContinuableCore) : ReaderAdapter { + + // TODO: Make sure that we can throw exceptions if there's an over-sized value. + + override fun hasAnnotations(): Boolean = reader.hasAnnotations() + + override fun fieldNameSymbol(): SymbolToken = reader.fieldNameSymbol + + override fun encodingType(): IonType? = reader.encodingType + + override fun nextValue(): Boolean { + val event = reader.nextValue() + return event != IonCursor.Event.NEEDS_DATA && event != IonCursor.Event.END_CONTAINER + } + + override fun getDepth(): Int = reader.depth + + /** + * Ensures that the value on which the reader is positioned is fully buffered. + */ + private fun prepareValue() { + // TODO performance: fill entire expression groups up-front so that the reader will usually not be in slow + // mode when this is called. + if (reader.fillValue() != IonCursor.Event.VALUE_READY) { + throw IonException("TODO: support continuable reading and oversize value handling via this adapter.") + } + } + + override fun stringValue(): String { + prepareValue() + return reader.stringValue() + } + + override fun intValue(): Int { + prepareValue() + return reader.intValue() + } + + override fun decimalValue(): BigDecimal { + prepareValue() + return reader.decimalValue() + } + + override fun ionDecimalValue(): Decimal { + prepareValue() + return reader.decimalValue() + } + + override fun doubleValue(): Double { + prepareValue() + return reader.doubleValue() + } + + override fun stepIntoContainer() { + // Note: the following line ensures the entire container is buffered. This improves performance when reading the + // container's elements because there is less work to do per element. However, very large containers would + // increase memory usage. The current implementation already assumes this risk by eagerly materializing + // macro invocation arguments. However, if that is changed, then removing the following line should also be + // considered. + prepareValue() + reader.stepIntoContainer() + } + + override fun stepOutOfContainer() { + reader.stepOutOfContainer() + } + + override fun getTypeAnnotationSymbols(): List { + if (!reader.hasAnnotations()) { + return emptyList() + } + val annotations = arrayListOf() + reader.consumeAnnotationTokens { annotations += it } + return annotations + } + + override fun symbolValue(): SymbolToken { + prepareValue() + return reader.symbolValue() + } + + override fun getIntegerSize(): IntegerSize { + prepareValue() + return reader.integerSize + } + + override fun getFieldNameSymbol(): SymbolToken = reader.fieldNameSymbol + + override fun isInStruct(): Boolean = reader.isInStruct + + override fun newBytes(): ByteArray { + prepareValue() + return reader.newBytes() + } + + override fun timestampValue(): Timestamp { + prepareValue() + return reader.timestampValue() + } + + override fun bigIntegerValue(): BigInteger { + prepareValue() + return reader.bigIntegerValue() + } + + override fun longValue(): Long { + prepareValue() + return reader.longValue() + } + + override fun isNullValue(): Boolean = reader.isNullValue + + override fun booleanValue(): Boolean { + prepareValue() + return reader.booleanValue() + } + + override fun integerSize(): IntegerSize? { + prepareValue() + return reader.integerSize + } +} diff --git a/src/main/java/com/amazon/ion/impl/macro/ReaderAdapterIonReader.kt b/src/main/java/com/amazon/ion/impl/macro/ReaderAdapterIonReader.kt new file mode 100644 index 0000000000..91683b68ad --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/ReaderAdapterIonReader.kt @@ -0,0 +1,61 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +import com.amazon.ion.* +import java.math.BigDecimal +import java.math.BigInteger + +/** + * A [ReaderAdapter] that wraps an [IonReader]. + */ +internal class ReaderAdapterIonReader(val reader: IonReader) : ReaderAdapter { + + // TODO performance: when there are annotations, this causes a redundant allocation if the allocations are + // later consumed. + override fun hasAnnotations(): Boolean = reader.typeAnnotations.isNotEmpty() + + override fun fieldNameSymbol(): SymbolToken = reader.fieldNameSymbol + + override fun encodingType(): IonType? = reader.type + + override fun nextValue(): Boolean = reader.next() != null + override fun getDepth(): Int = reader.depth + + override fun stringValue(): String = reader.stringValue() + + override fun intValue(): Int = reader.intValue() + + override fun decimalValue(): BigDecimal = reader.bigDecimalValue() + override fun ionDecimalValue(): Decimal = reader.decimalValue() + + override fun doubleValue(): Double = reader.doubleValue() + + override fun stepIntoContainer() = reader.stepIn() + + override fun stepOutOfContainer() = reader.stepOut() + + override fun getTypeAnnotationSymbols(): List = reader.typeAnnotationSymbols.asList() + + override fun integerSize(): IntegerSize? = reader.integerSize + + override fun booleanValue(): Boolean = reader.booleanValue() + + override fun isNullValue(): Boolean = reader.isNullValue + + override fun longValue(): Long = reader.longValue() + + override fun bigIntegerValue(): BigInteger = reader.bigIntegerValue() + + override fun timestampValue(): Timestamp = reader.timestampValue() + + override fun newBytes(): ByteArray = reader.newBytes() + + override fun symbolValue(): SymbolToken = reader.symbolValue() + + override fun getIntegerSize(): IntegerSize = reader.integerSize + + override fun getFieldNameSymbol(): SymbolToken = reader.fieldNameSymbol + + override fun isInStruct(): Boolean = reader.isInStruct +} diff --git a/src/main/java/com/amazon/ion/impl/macro/SystemMacro.kt b/src/main/java/com/amazon/ion/impl/macro/SystemMacro.kt new file mode 100644 index 0000000000..eaa7e89dc7 --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/SystemMacro.kt @@ -0,0 +1,274 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +import com.amazon.ion.SystemSymbols +import com.amazon.ion.impl.* +import com.amazon.ion.impl.SystemSymbols_1_1.* +import com.amazon.ion.impl.macro.ExpressionBuilderDsl.Companion.templateBody +import com.amazon.ion.impl.macro.ParameterFactory.exactlyOneTagged +import com.amazon.ion.impl.macro.ParameterFactory.zeroOrOneTagged +import com.amazon.ion.impl.macro.ParameterFactory.zeroToManyTagged + +/** + * Macros that are built in, rather than being defined by a template. + */ +enum class SystemMacro( + val id: Byte, + val systemSymbol: SystemSymbols_1_1, + override val signature: List, + override val body: List? = null +) : Macro { + // Technically not system macros, but special forms. However, it's easier to model them as if they are macros in TDL. + // We give them an ID of -1 to distinguish that they are not addressable outside TDL. + IfNone(-1, IF_NONE, listOf(zeroToManyTagged("stream"), zeroToManyTagged("true_branch"), zeroToManyTagged("false_branch"))), + IfSome(-1, IF_SOME, listOf(zeroToManyTagged("stream"), zeroToManyTagged("true_branch"), zeroToManyTagged("false_branch"))), + IfSingle(-1, IF_SINGLE, listOf(zeroToManyTagged("stream"), zeroToManyTagged("true_branch"), zeroToManyTagged("false_branch"))), + IfMulti(-1, IF_MULTI, listOf(zeroToManyTagged("stream"), zeroToManyTagged("true_branch"), zeroToManyTagged("false_branch"))), + + // The real macros + None(0, NONE, emptyList()), + Values(1, VALUES, listOf(zeroToManyTagged("values"))), + Default( + 2, DEFAULT, listOf(zeroToManyTagged("expr"), zeroToManyTagged("default_expr")), + templateBody { + macro(IfNone) { variable(0); variable(1); variable(0) } + } + ), + Meta(3, META, listOf(zeroToManyTagged("values")), templateBody { macro(None) {} }), + Repeat(4, REPEAT, listOf(exactlyOneTagged("n"), zeroToManyTagged("value"))), + Flatten(5, FLATTEN, listOf(zeroToManyTagged("values")), null), // TODO: flatten + Delta(6, DELTA, listOf(zeroToManyTagged("deltas"))), + Sum(7, SUM, listOf(exactlyOneTagged("a"), exactlyOneTagged("b"))), + + Annotate(8, ANNOTATE, listOf(zeroToManyTagged("ann"), exactlyOneTagged("value"))), + MakeString(9, MAKE_STRING, listOf(zeroToManyTagged("text"))), + MakeSymbol(10, MAKE_SYMBOL, listOf(zeroToManyTagged("text"))), + MakeDecimal(11, MAKE_DECIMAL, listOf(exactlyOneTagged("coefficient"), exactlyOneTagged("exponent"))), + MakeTimestamp( + 12, MAKE_TIMESTAMP, + listOf( + exactlyOneTagged("year"), + zeroOrOneTagged("month"), + zeroOrOneTagged("day"), + zeroOrOneTagged("hour"), + zeroOrOneTagged("minute"), + zeroOrOneTagged("second"), + zeroOrOneTagged("offset_minutes"), + ) + ), + MakeBlob(13, MAKE_BLOB, listOf(zeroToManyTagged("bytes"))), + MakeList(14, MAKE_LIST, listOf(zeroToManyTagged("sequences")), null), // TODO: make_list + MakeSExp(15, MAKE_SEXP, listOf(zeroToManyTagged("sequences")), null), // TODO: make_sexp + MakeField( + 16, MAKE_FIELD, + listOf( + Macro.Parameter("field_name", Macro.ParameterEncoding.FlexSym, Macro.ParameterCardinality.ExactlyOne), exactlyOneTagged("value") + ) + ), + MakeStruct(17, MAKE_STRUCT, listOf(zeroToManyTagged("structs")), null), // TODO: make_struct + ParseIon(18, PARSE_ION, listOf(zeroToManyTagged("data")), null), // TODO: parse_ion + + /** + * ```ion + * (macro set_symbols (symbols*) + * $ion::(module _ + * (symbol_table [(%symbols)]) + * (macro_table _) + * )) + * ``` + */ + SetSymbols( + 19, SET_SYMBOLS, listOf(zeroToManyTagged("symbols")), + templateBody { + annotated(ION, ::sexp) { + symbol(MODULE) + symbol(SystemSymbols.DEFAULT_MODULE) + sexp { + symbol(SYMBOL_TABLE) + list { variable(0) } + } + sexp { + symbol(MACRO_TABLE) + symbol(SystemSymbols.DEFAULT_MODULE) + } + } + } + ), + + /** + * ```ion + * (macro add_symbols (symbols*) + * $ion::(module _ + * (symbol_table _ [(%symbols)]) + * (macro_table _) + * )) + * ``` + */ + AddSymbols( + 20, ADD_SYMBOLS, listOf(zeroToManyTagged("symbols")), + templateBody { + annotated(ION, ::sexp) { + symbol(MODULE) + symbol(com.amazon.ion.SystemSymbols.DEFAULT_MODULE) + sexp { + symbol(SYMBOL_TABLE) + symbol(com.amazon.ion.SystemSymbols.DEFAULT_MODULE) + list { variable(0) } + } + sexp { + symbol(MACRO_TABLE) + symbol(SystemSymbols.DEFAULT_MODULE) + } + } + } + ), + + /** + * ```ion + * (macro set_macros (macros*) + * $ion::(module _ + * (symbol_table _) + * (macro_table (%macros)) + * )) + * ``` + */ + SetMacros( + 21, SET_MACROS, listOf(zeroToManyTagged("macros")), + templateBody { + annotated(ION, ::sexp) { + symbol(MODULE) + symbol(SystemSymbols.DEFAULT_MODULE) + sexp { + symbol(SYMBOL_TABLE) + symbol(SystemSymbols.DEFAULT_MODULE) + } + sexp { + symbol(MACRO_TABLE) + variable(0) + } + } + } + ), + + /** + * ```ion + * (macro add_macros (macros*) + * $ion::(module _ + * (symbol_table _) + * (macro_table _ (%macros)) + * )) + * ``` + */ + AddMacros( + 22, ADD_MACROS, listOf(zeroToManyTagged("macros")), + templateBody { + annotated(ION, ::sexp) { + symbol(MODULE) + symbol(SystemSymbols.DEFAULT_MODULE) + sexp { + symbol(SYMBOL_TABLE) + symbol(SystemSymbols.DEFAULT_MODULE) + } + sexp { + symbol(MACRO_TABLE) + symbol(SystemSymbols.DEFAULT_MODULE) + variable(0) + } + } + } + ), + + /** + * ```ion + * (macro use (catalog_key version?) + * $ion::(module _ + * (import the_module catalog_key (.default (%version) 1)) + * (symbol_table _ the_module) + * (macro_table _ the_module) + * )) + * ``` + */ + Use( + 23, USE, listOf(exactlyOneTagged("catalog_key"), zeroOrOneTagged("version")), + templateBody { + val theModule = _Private_Utils.newSymbolToken("the_module") + annotated(ION, ::sexp) { + symbol(MODULE) + symbol(SystemSymbols.DEFAULT_MODULE) + sexp { + symbol(IMPORT) + symbol(theModule) + variable(0) + // This is equivalent to `(.default (%version) 1)`, but eliminates a layer of indirection. + macro(IfNone) { + variable(1) + int(1) + variable(1) + } + } + sexp { + symbol(SYMBOL_TABLE) + symbol(SystemSymbols.DEFAULT_MODULE) + symbol(theModule) + } + sexp { + symbol(MACRO_TABLE) + symbol(SystemSymbols.DEFAULT_MODULE) + symbol(theModule) + } + } + } + ), + ; + + val macroName: String get() = this.systemSymbol.text + + override val dependencies: List + get() = body + ?.filterIsInstance() + ?.map(Expression.MacroInvocation::macro) + ?.distinct() + ?: emptyList() + + companion object : MacroTable { + + private val MACROS_BY_NAME: Map = SystemMacro.entries.associateBy { it.macroName } + + // TODO: Once all of the macros are implemented, replace this with an array as in SystemSymbols_1_1 + private val MACROS_BY_ID: Map = SystemMacro.entries + .filterNot { it.id < 0 } + .associateBy { it.id } + + @JvmStatic + fun size() = MACROS_BY_ID.size + + /** Gets a [SystemMacro] by its address in the system table */ + @JvmStatic + operator fun get(id: Int): SystemMacro? = MACROS_BY_ID[id.toByte()] + + /** Gets, by name, a [SystemMacro] with an address in the system table (i.e. that can be invoked as E-Expressions) */ + @JvmStatic + operator fun get(name: String): SystemMacro? = MACROS_BY_NAME[name]?.takeUnless { it.id < 0 } + + @JvmStatic + override operator fun get(address: MacroRef): SystemMacro? { + return when (address) { + is MacroRef.ById -> get(address.id) + is MacroRef.ByName -> get(address.name) + } + } + + /** Gets a [SystemMacro] by name, including those which are not in the system table (i.e. special forms) */ + @JvmStatic + fun getMacroOrSpecialForm(ref: MacroRef): SystemMacro? { + return when (ref) { + is MacroRef.ById -> get(ref.id) + is MacroRef.ByName -> MACROS_BY_NAME[ref.name] + } + } + + @JvmStatic + val SYSTEM_MACRO_TABLE = this + } +} diff --git a/src/main/java/com/amazon/ion/impl/macro/TemplateMacro.kt b/src/main/java/com/amazon/ion/impl/macro/TemplateMacro.kt new file mode 100644 index 0000000000..bdcaaba01b --- /dev/null +++ b/src/main/java/com/amazon/ion/impl/macro/TemplateMacro.kt @@ -0,0 +1,34 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +/** + * Represents a template macro. A template macro is defined by a signature, and a list of template expressions. + * A template macro only gains a name and/or ID when it is added to a macro table. + */ +class TemplateMacro(override val signature: List, override val body: List) : + Macro { + // TODO: Consider rewriting the body of the macro if we discover that there are any macros invoked using only + // constants as arguments—either at compile time or lazily. + // For example, the body of: (macro foo (x) (values (make_string "foo" "bar") x)) + // could be rewritten as: (values "foobar" x) + + private val cachedHashCode by lazy { signature.hashCode() * 31 + body.hashCode() } + override fun hashCode(): Int = cachedHashCode + + override fun equals(other: Any?): Boolean { + if (this === other) return true + if (other !is TemplateMacro) return false + // Check the hashCode as a quick check before we dive into the actual data. + if (cachedHashCode != other.cachedHashCode) return false + if (signature != other.signature) return false + if (body != other.body) return false + return true + } + + override val dependencies: List by lazy { + body.filterIsInstance() + .map { it.macro } + .distinct() + } +} diff --git a/src/main/java/com/amazon/ion/system/IonBinaryWriterBuilder_1_1.java b/src/main/java/com/amazon/ion/system/IonBinaryWriterBuilder_1_1.java new file mode 100644 index 0000000000..ede17960be --- /dev/null +++ b/src/main/java/com/amazon/ion/system/IonBinaryWriterBuilder_1_1.java @@ -0,0 +1,99 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.system; + +import com.amazon.ion.IonWriter; +import com.amazon.ion.impl.bin.LengthPrefixStrategy; + +/** + * The builder for creating {@link IonWriter}s emitting the 1.1 version of the Ion binary format. + *

+ * Builders may be configured once and reused to construct multiple + * objects. + *

+ * Instances of this class are not not safe for use by multiple threads + * unless they are {@linkplain #immutable() immutable}. + * + */ +public interface IonBinaryWriterBuilder_1_1 extends IonWriterBuilder_1_1 { + + // TODO add auto-flush (see IonBinaryWriterBuilder.withAutoFlushEnabled) + // TODO consider adding stream-copy optimization (see IonBinaryWriterBuilder withStreamCopyOptimized) + // TODO consider adding user-configurable length prefix preallocation (see _Private_IonManagedBinaryWriterBuilder.withPaddedLengthPreallocation) + // TODO consider allowing symbol/macro table block size to be configured separately (see _Private_IonManagedBinaryWriterBuilder.withSymbolsBlockSize) + // TODO add Ion 1.1-specific configuration + + /** + * Gets the size of the blocks of memory the writer will allocate to hold encoded bytes between flushes. + * + * @return the block size currently configured. + * + * @see #setBlockSize(int) + * @see #withBlockSize(int) + */ + int getBlockSize(); + + /** + * Sets the size of the blocks of memory the writer will allocate to hold encoded bytes between flushes. + * + * @param size the block size in bytes. If unset, the default block size of 32768 bytes will be used. + * + * @see #getBlockSize() + * @see #withBlockSize(int) + */ + void setBlockSize(int size); + + /** + * Declares the size of the blocks of memory the writer will allocate to hold encoded bytes between flushes. + * + * @param size the block size in bytes. If unset, the default block size of 32768 bytes will be used. + * + * @return this instance, if mutable; + * otherwise a mutable copy of this instance. + * + * @see #getBlockSize() + * @see #setBlockSize(int) + */ + IonBinaryWriterBuilder_1_1 withBlockSize(int size); + + /** + * Gets the LengthPrefixStrategy that will be used to determine which containers will use a delimited encoding + * vs a length-prefixed encoding. + * + * @return the LengthPrefixStrategy currently configured + * + * @see #setLengthPrefixStrategy(LengthPrefixStrategy) + * @see #withLengthPrefixStrategy(LengthPrefixStrategy) + */ + LengthPrefixStrategy getLengthPrefixStrategy(); + + /** + * Sets the LengthPrefixStrategy that will be used to determine which containers will use a delimited encoding + * vs a length-prefixed encoding. + * + * @param lengthPrefixStrategy If unset, the default strategy of {@link LengthPrefixStrategy#ALWAYS_PREFIXED} + * will be used. + * + * @see #getLengthPrefixStrategy() + * @see #withLengthPrefixStrategy(LengthPrefixStrategy) + */ + void setLengthPrefixStrategy(LengthPrefixStrategy lengthPrefixStrategy); + + /** + * Declares the LengthPrefixStrategy that will be used to determine which containers will use a delimited + * encoding vs a length-prefixed encoding. + * + * @param lengthPrefixStrategy If unset, the default strategy of {@link LengthPrefixStrategy#ALWAYS_PREFIXED} + * will be used. + * + * @return this instance, if mutable; + * otherwise a mutable copy of this instance. + * + * @see #getLengthPrefixStrategy() + * @see #setLengthPrefixStrategy(LengthPrefixStrategy) + */ + IonBinaryWriterBuilder_1_1 withLengthPrefixStrategy(LengthPrefixStrategy lengthPrefixStrategy); + + // NOTE: Unlike in Ion 1.0, local symbol table append is always enabled in the Ion 1.1 writers. + // NOTE: Unlike in Ion 1.0, writing float 32 is always enabled in the Ion 1.1 writers. +} diff --git a/src/main/java/com/amazon/ion/system/IonTextWriterBuilder.java b/src/main/java/com/amazon/ion/system/IonTextWriterBuilder.java index 40785cd1f4..685a4839c1 100644 --- a/src/main/java/com/amazon/ion/system/IonTextWriterBuilder.java +++ b/src/main/java/com/amazon/ion/system/IonTextWriterBuilder.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.system; import static com.amazon.ion.system.IonWriterBuilder.InitialIvmHandling.SUPPRESS; @@ -21,13 +8,15 @@ import com.amazon.ion.IonWriter; import com.amazon.ion.SymbolTable; import com.amazon.ion.Timestamp; -import com.amazon.ion.impl._Private_IonTextWriterBuilder; +import com.amazon.ion.impl._Private_IonTextWriterBuilder_1_0; import com.amazon.ion.impl._Private_Utils; import java.io.OutputStream; import java.nio.charset.Charset; /** - * The builder for creating {@link IonWriter}s emitting the Ion text syntax. + * The builder for creating {@link IonWriter}s emitting the Ion text syntax + * in any encoding version. Subclasses may provide version-specific + * configuration. *

* WARNING: This class should not be extended by code outside of * this library. @@ -161,7 +150,7 @@ public CharSequence getCharSequence() { */ public static IonTextWriterBuilder standard() { - return _Private_IonTextWriterBuilder.standard(); + return _Private_IonTextWriterBuilder_1_0.standard(); } /** @@ -261,13 +250,13 @@ protected IonTextWriterBuilder(IonTextWriterBuilder that) @Override - public final IonTextWriterBuilder withCatalog(IonCatalog catalog) + public IonTextWriterBuilder withCatalog(IonCatalog catalog) { return super.withCatalog(catalog); } @Override - public final IonTextWriterBuilder withImports(SymbolTable... imports) + public IonTextWriterBuilder withImports(SymbolTable... imports) { return super.withImports(imports); } @@ -331,7 +320,7 @@ public void setCharset(Charset charset) * @see #getCharset() * @see #setCharset(Charset) */ - public final IonTextWriterBuilder withCharset(Charset charset) + public IonTextWriterBuilder withCharset(Charset charset) { IonTextWriterBuilder b = mutable(); b.setCharset(charset); @@ -344,7 +333,7 @@ public final IonTextWriterBuilder withCharset(Charset charset) * @return this instance, if mutable; * otherwise a mutable copy of this instance. */ - public final IonTextWriterBuilder withCharsetAscii() + public IonTextWriterBuilder withCharsetAscii() { return withCharset(ASCII); } @@ -370,7 +359,7 @@ public final IonTextWriterBuilder withCharsetAscii() * */ - public final IonTextWriterBuilder withMinimalSystemData() + public IonTextWriterBuilder withMinimalSystemData() { IonTextWriterBuilder b = mutable(); b.setInitialIvmHandling(SUPPRESS); @@ -536,7 +525,7 @@ public void setIvmMinimizing(IvmMinimizing minimizing) * */ - public final IonTextWriterBuilder + public IonTextWriterBuilder withIvmMinimizing(IvmMinimizing minimizing) { IonTextWriterBuilder b = mutable(); @@ -598,7 +587,7 @@ public void setLstMinimizing(LstMinimizing minimizing) * */ - public final IonTextWriterBuilder + public IonTextWriterBuilder withLstMinimizing(LstMinimizing minimizing) { IonTextWriterBuilder b = mutable(); @@ -655,7 +644,7 @@ public void setLongStringThreshold(int threshold) * @return this instance, if mutable; * otherwise a mutable copy of this instance. */ - public final IonTextWriterBuilder withLongStringThreshold(int threshold) + public IonTextWriterBuilder withLongStringThreshold(int threshold) { IonTextWriterBuilder b = mutable(); b.setLongStringThreshold(threshold); @@ -708,7 +697,7 @@ public void setNewLineType(NewLineType newLineType) * @return this instance, if mutable; * otherwise a mutable copy of this instance. */ - public final IonTextWriterBuilder withNewLineType(NewLineType newLineType) + public IonTextWriterBuilder withNewLineType(NewLineType newLineType) { IonTextWriterBuilder b = mutable(); b.setNewLineType(newLineType); @@ -758,7 +747,7 @@ public void setWriteTopLevelValuesOnNewLines(boolean writeTopLevelValuesOnNewLin * @see #getWriteTopLevelValuesOnNewLines() * @see #setWriteTopLevelValuesOnNewLines(boolean) */ - public final IonTextWriterBuilder withWriteTopLevelValuesOnNewLines(boolean writeTopLevelValuesOnNewLines) + public IonTextWriterBuilder withWriteTopLevelValuesOnNewLines(boolean writeTopLevelValuesOnNewLines) { IonTextWriterBuilder b = mutable(); b.setWriteTopLevelValuesOnNewLines(writeTopLevelValuesOnNewLines); @@ -800,7 +789,7 @@ public void setMaximumTimestampPrecisionDigits(int maximumTimestampPrecisionDigi * @see #getMaximumTimestampPrecisionDigits() * @see #setMaximumTimestampPrecisionDigits(int) */ - public final IonTextWriterBuilder withMaximumTimestampPrecisionDigits(int maximumTimestampPrecisionDigits) { + public IonTextWriterBuilder withMaximumTimestampPrecisionDigits(int maximumTimestampPrecisionDigits) { IonTextWriterBuilder b = mutable(); b.setMaximumTimestampPrecisionDigits(maximumTimestampPrecisionDigits); return b; diff --git a/src/main/java/com/amazon/ion/system/IonTextWriterBuilder_1_1.java b/src/main/java/com/amazon/ion/system/IonTextWriterBuilder_1_1.java new file mode 100644 index 0000000000..50ab297c23 --- /dev/null +++ b/src/main/java/com/amazon/ion/system/IonTextWriterBuilder_1_1.java @@ -0,0 +1,368 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.system; + +import com.amazon.ion.IonWriter; +import com.amazon.ion.Timestamp; + +import java.nio.charset.Charset; + +/** + * The builder for creating {@link IonWriter}s emitting the 1.1 version of the Ion text format. + *

+ * Builders may be configured once and reused to construct multiple + * objects. + *

+ * Instances of this class are not not safe for use by multiple threads + * unless they are {@linkplain #immutable() immutable}. + * + */ +public interface IonTextWriterBuilder_1_1 extends IonWriterBuilder_1_1 { + // TODO add any configuration specific to writing 1.1 text. + + /** + * Gets the charset denoting the output encoding. + * Only ASCII and UTF-8 are supported. + * + * @return may be null, denoting the default of UTF-8. + * + * @see #setCharset(Charset) + * @see #withCharset(Charset) + */ + Charset getCharset(); + + /** + * Sets the charset denoting the output encoding. + * Only ASCII and UTF-8 are supported. + * + * @param charset may be null, denoting the default of UTF-8. + * + * @see #getCharset() + * @see #withCharset(Charset) + * + * @throws UnsupportedOperationException if this is immutable. + */ + void setCharset(Charset charset); + + /** + * Declares the charset denoting the output encoding, + * returning a new mutable builder if this is immutable. + * + * @param charset may be null, denoting the default of UTF-8. + * + * @return this instance, if mutable; + * otherwise a mutable copy of this instance. + * + * @see #getCharset() + * @see #setCharset(Charset) + */ + IonTextWriterBuilder_1_1 withCharset(Charset charset); + + /** + * Declares the output encoding to be {@code US-ASCII}. + * + * @return this instance, if mutable; + * otherwise a mutable copy of this instance. + */ + IonTextWriterBuilder_1_1 withCharsetAscii(); + + /** + * Declares that this builder should minimize system-level output + * (Ion version markers and local symbol tables). + *

+ * This is equivalent to: + *

    + *
  • {@link #setIvmMinimizing(IonWriterBuilder.IvmMinimizing) + * setIvmMinimizing}{@code (}{@link IonWriterBuilder.IvmMinimizing#DISTANT DISTANT}{@code )} + *
  • {@link #setLstMinimizing(IonTextWriterBuilder.LstMinimizing) + * setLstMinimizing}{@code (}{@link IonTextWriterBuilder.LstMinimizing#EVERYTHING EVERYTHING}{@code )}. + * This requires all macros to be expanded. + *
+ * + * @return this instance, if mutable; + * otherwise a mutable copy of this instance. + * + */ + IonTextWriterBuilder_1_1 withMinimalSystemData(); + + + /** + * Declares that this builder should use basic pretty-printing. + * Does not alter the handling of system data. + * Calling this method alters several other configuration properties, + * so code should call it first, then make any necessary overrides. + *

+ * The specifics of this configuration may change between releases of this + * library, so automated processes should not depend on the exact output + * formatting. + * + * @return this instance, if mutable; + * otherwise a mutable copy of this instance. + */ + IonTextWriterBuilder_1_1 withPrettyPrinting(); + + /** + * Declares that this builder should downgrade the writers' output to + * JSON compatibility. This format cannot round-trip back to Ion with full + * fidelity. + *

+ * The specific conversions are as follows: + *

    + *
  • System data is suppressed per {@link #withMinimalSystemData()}. + *
  • All annotations are suppressed. + *
  • Nulls of any type are printed as JSON {@code null}. + *
  • Blobs are printed as strings, containing Base64. + *
  • Clobs are printed as strings, containing only Unicode code points + * U+00 through U+FF. + *
  • Sexps are printed as lists. + *
  • Symbols are printed as strings. + *
  • Timestamps are printed as strings, using Ion timestamp format. + *
+ * + * @return this instance, if mutable; + * otherwise a mutable copy of this instance. + */ + IonTextWriterBuilder_1_1 withJsonDowngrade(); + + /** + * {@inheritDoc} + * + * @return the IVM minimizing strategy. + * The default value ({@code null}) indicates that no minimization occurs + * and IVMs are emitted as received by the writer. + * + * @see #setIvmMinimizing(IonWriterBuilder.IvmMinimizing) + * @see #withIvmMinimizing(IonWriterBuilder.IvmMinimizing) + * + + */ + IonWriterBuilder.IvmMinimizing getIvmMinimizing(); + + /** + * Sets the strategy for reducing or eliminating non-initial Ion version + * markers. When null, IVMs are emitted as they are written. + * + * @param minimizing the IVM minimization strategy. + * Null indicates that all explicitly-written IVMs will be emitted. + * + * @see #getIvmMinimizing() + * @see #withIvmMinimizing(IonWriterBuilder.IvmMinimizing) + * + * @throws UnsupportedOperationException if this is immutable. + * + + */ + void setIvmMinimizing(IonWriterBuilder.IvmMinimizing minimizing); + + /** + * Declares the strategy for reducing or eliminating non-initial Ion version + * markers, returning a new mutable builder if this is immutable. + * When null, IVMs are emitted as they are written. + * + * @param minimizing the IVM minimization strategy. + * Null indicates that all explicitly-written IVMs will be emitted. + * + * @return this instance, if mutable; + * otherwise a mutable copy of this instance. + * + * @see #setIvmMinimizing(IonWriterBuilder.IvmMinimizing) + * @see #getIvmMinimizing() + * + + */ + IonTextWriterBuilder_1_1 withIvmMinimizing(IonWriterBuilder.IvmMinimizing minimizing); + + /** + * Gets the strategy for reducing or eliminating local symbol tables. + * By default, LST data is emitted as received or when necessary + * (for example, binary data will always collect and emit local symbols). + * + * @see #setLstMinimizing(IonTextWriterBuilder.LstMinimizing) + * @see #withLstMinimizing(IonTextWriterBuilder.LstMinimizing) + * + + */ + IonTextWriterBuilder.LstMinimizing getLstMinimizing(); + + /** + * Sets the strategy for reducing or eliminating local symbol tables. + * By default, LST data is emitted as received or when necessary + * (for example, binary data will always collect and emit local symbols). + * + * @param minimizing the LST minimization strategy. + * Null indicates that LSTs will be emitted as received. + * + * @see #getLstMinimizing() + * @see #withLstMinimizing(IonTextWriterBuilder.LstMinimizing) + * + * @throws UnsupportedOperationException if this is immutable. + * + + */ + void setLstMinimizing(IonTextWriterBuilder.LstMinimizing minimizing); + + /** + * Sets the strategy for reducing or eliminating local symbol tables. + * By default, LST data is emitted as received or when necessary + * (for example, binary data will always collect and emit local symbols). + * + * @param minimizing the LST minimization strategy. + * Null indicates that LSTs will be emitted as received. + * + * @return this instance, if mutable; + * otherwise a mutable copy of this instance. + * + * @see #getLstMinimizing() + * @see #setLstMinimizing(IonTextWriterBuilder.LstMinimizing) + * + */ + IonTextWriterBuilder_1_1 withLstMinimizing(IonTextWriterBuilder.LstMinimizing minimizing); + + /** + * Gets the length beyond which string and clob content will be rendered + * as triple-quoted "long strings". + * At present, such content will only line-break on extant newlines. + * + * @return the threshold for printing triple-quoted strings and clobs. + * Zero means no limit. + * + * @see #setLongStringThreshold(int) + * @see #withLongStringThreshold(int) + */ + int getLongStringThreshold(); + + /** + * Sets the length beyond which string and clob content will be rendered + * as triple-quoted "long strings". + * At present, such content will only line-break on extant newlines. + * + * @param threshold the new threshold; zero means none. + * + * @see #getLongStringThreshold() + * @see #withLongStringThreshold(int) + * + * @throws UnsupportedOperationException if this is immutable. + */ + void setLongStringThreshold(int threshold); + + /** + * Declares the length beyond which string and clob content will be rendered + * as triple-quoted "long strings". + * At present, such content will only line-break on extant newlines. + * + * @param threshold the new threshold; zero means none. + * + * @see #getLongStringThreshold() + * @see #setLongStringThreshold(int) + * + * @return this instance, if mutable; + * otherwise a mutable copy of this instance. + */ + IonTextWriterBuilder_1_1 withLongStringThreshold(int threshold); + + /** + * Gets the character sequence that will be written as a line separator. + * The default is {@link IonTextWriterBuilder.NewLineType#PLATFORM_DEPENDENT} + * + * @return the character sequence to be written between top-level values; null means the default should be used. + * + * @see #setNewLineType(IonTextWriterBuilder.NewLineType) + * @see #withNewLineType(IonTextWriterBuilder.NewLineType) + */ + IonTextWriterBuilder.NewLineType getNewLineType(); + + /** + * Sets the character sequence that will be written as a line separator. + * The default is {@link IonTextWriterBuilder.NewLineType#PLATFORM_DEPENDENT} + * + * @param newLineType the character sequence to be written between top-level values; null means the default should be used. + * + * @see #getNewLineType() + * @see #withNewLineType(IonTextWriterBuilder.NewLineType) + * + * @throws UnsupportedOperationException if this is immutable. + */ + void setNewLineType(IonTextWriterBuilder.NewLineType newLineType); + + /** + * Declares the character sequence that will be written as a line separator. + * The default is {@link IonTextWriterBuilder.NewLineType#PLATFORM_DEPENDENT} + * + * @param newLineType the character sequence to be written between top-level values; null means the default should be used. + * + * @see #getNewLineType() + * @see #setNewLineType(IonTextWriterBuilder.NewLineType) + * + * @return this instance, if mutable; + * otherwise a mutable copy of this instance. + */ + IonTextWriterBuilder_1_1 withNewLineType(IonTextWriterBuilder.NewLineType newLineType); + + /** + * Gets whether each top level value for standard printing should start on a new line. The default value is {@code false}. + * When false, the IonTextWriter will insert a single space character (U+0020) between top-level values. + * When pretty-printing, this setting is ignored; the pretty printer will always start top-level values on a new line. + * + * @return value indicating whether standard printing will insert a newline between top-level values + * + * @see #setWriteTopLevelValuesOnNewLines(boolean) + * @see #withWriteTopLevelValuesOnNewLines(boolean) + */ + boolean getWriteTopLevelValuesOnNewLines(); + + /** + * Sets whether each top level value for standard printing should start on a new line. The default value is {@code false}. + * When false, the IonTextWriter will insert a single space character (U+0020) between top-level values. + * When pretty-printing, this setting is ignored; the pretty printer will always start top-level values on a new line. + * + * @param writeTopLevelValuesOnNewLines value indicating whether standard printing will insert a newline between top-level values + * + * @see #getWriteTopLevelValuesOnNewLines() + * @see #withWriteTopLevelValuesOnNewLines(boolean) + */ + void setWriteTopLevelValuesOnNewLines(boolean writeTopLevelValuesOnNewLines); + + /** + * Declares whether each top level value for standard printing should start on a new line. The default value is {@code false}. + * When false, the IonTextWriter will insert a single space character (U+0020) between top-level values. + * When pretty-printing, this setting is ignored; the pretty printer will always start top-level values on a new line. + * + * @param writeTopLevelValuesOnNewLines value indicating whether standard printing will insert a newline between top-level values + * + * @see #getWriteTopLevelValuesOnNewLines() + * @see #setWriteTopLevelValuesOnNewLines(boolean) + */ + IonTextWriterBuilder_1_1 withWriteTopLevelValuesOnNewLines(boolean writeTopLevelValuesOnNewLines); + + /** + * Gets the maximum number of digits of fractional second precision allowed to be written for timestamp values. + * + * @return the currently configured maximum. + * + * @see #setMaximumTimestampPrecisionDigits(int) + * @see #withMaximumTimestampPrecisionDigits(int) + */ + int getMaximumTimestampPrecisionDigits(); + + /** + * Sets the maximum number of digits of fractional second precision allowed to be written for timestamp values. + * Default: {@link Timestamp#DEFAULT_MAXIMUM_DIGITS_TEXT}. + * + * @see #getMaximumTimestampPrecisionDigits() + * @see #withMaximumTimestampPrecisionDigits(int) + */ + void setMaximumTimestampPrecisionDigits(int maximumTimestampPrecisionDigits); + + /** + * Sets the maximum number of digits of fractional second precision allowed to be written for timestamp values. + * Default: {@link Timestamp#DEFAULT_MAXIMUM_DIGITS_TEXT}. + * + * @return this instance, if mutable; otherwise a mutable copy of this instance. + * + * @see #getMaximumTimestampPrecisionDigits() + * @see #setMaximumTimestampPrecisionDigits(int) + */ + IonTextWriterBuilder_1_1 withMaximumTimestampPrecisionDigits(int maximumTimestampPrecisionDigits); + + IonWriter build(Appendable out); +} diff --git a/src/main/java/com/amazon/ion/system/IonWriterBuilder_1_1.java b/src/main/java/com/amazon/ion/system/IonWriterBuilder_1_1.java new file mode 100644 index 0000000000..669f5a5ca2 --- /dev/null +++ b/src/main/java/com/amazon/ion/system/IonWriterBuilder_1_1.java @@ -0,0 +1,182 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.system; + +import com.amazon.ion.IonCatalog; +import com.amazon.ion.IonWriter; +import com.amazon.ion.SymbolTable; +import com.amazon.ion.impl.bin.SymbolInliningStrategy; + +import java.io.OutputStream; + + +/** + * The builder for creating {@link IonWriter}s emitting the 1.1 version of either + * the text or binary Ion formats. + *

+ * Builders may be configured once and reused to construct multiple + * objects. + *

+ * Instances of this class are not not safe for use by multiple threads + * unless they are {@linkplain #immutable() immutable}. + * + */ +public interface IonWriterBuilder_1_1> { + + /** + * Gets the catalog to use when building an {@link IonWriter}. + * The catalog is needed to resolve manually-written imports (not common). + * By default, this property is null. + * + * @see #setCatalog(IonCatalog) + * @see #withCatalog(IonCatalog) + */ + IonCatalog getCatalog(); + + /** + * Sets the catalog to use when building an {@link IonWriter}. + * + * @param catalog the catalog to use in built writers. + * If null, the writer will be unable to resolve manually-written imports + * and may throw an exception. + * + * @see #getCatalog() + * @see #withCatalog(IonCatalog) + * + * @throws UnsupportedOperationException if this is immutable. + */ + void setCatalog(IonCatalog catalog); + + /** + * Declares the catalog to use when building an {@link IonWriter}, + * returning a new mutable builder if this is immutable. + * + * @param catalog the catalog to use in built writers. + * If null, the writer will be unable to resolve manually-written imports + * and may throw an exception. + * + * @return this instance, if mutable; + * otherwise a mutable copy of this instance. + * + * @see #getCatalog() + * @see #setCatalog(IonCatalog) + */ + T withCatalog(IonCatalog catalog); + + /** + * Gets the imports that will be used to construct the initial local + * symbol table. + * + * @return may be null or empty. + * + * @see #setImports(SymbolTable...) + * @see #withImports(SymbolTable...) + */ + SymbolTable[] getImports(); + + /** + * Sets the shared symbol tables that will be used to construct the + * initial local symbol table. + *

+ * If the imports sequence is not null and not empty, the output stream + * will be bootstrapped with a local symbol table that uses the given + * {@code imports}. + * + * @param imports a sequence of shared symbol tables. + * The first (and only the first) may be a system table. + * + * @see #getImports() + * @see #withImports(SymbolTable...) + * + * @throws UnsupportedOperationException if this is immutable. + */ + void setImports(SymbolTable... imports); + + /** + * Declares the imports to use when building an {@link IonWriter}, + * returning a new mutable builder if this is immutable. + *

+ * If the imports sequence is not null and not empty, the output stream + * will be bootstrapped with a local symbol table that uses the given + * {@code imports}. + * + * @param imports a sequence of shared symbol tables. + * The first (and only the first) may be a system table. + * + * @return this instance, if mutable; + * otherwise a mutable copy of this instance. + * + * @see #getImports() + * @see #setImports(SymbolTable...) + */ + T withImports(SymbolTable... imports); + + /** + * Gets the SymbolInliningStrategy that will be used to determine which symbols will be written with inline text. + * + * @return the SymbolInliningStrategy currently configured + * + * @see #setSymbolInliningStrategy(SymbolInliningStrategy) + * @see #withSymbolInliningStrategy(SymbolInliningStrategy) + */ + SymbolInliningStrategy getSymbolInliningStrategy(); + + /** + * Sets the SymbolInliningStrategy that will be used to determine which symbols will be written with inline text. + * + * @param symbolInliningStrategy if unset, the default of {@link SymbolInliningStrategy#NEVER_INLINE} will be used. + * + * @see #getSymbolInliningStrategy() + * @see #withSymbolInliningStrategy(SymbolInliningStrategy) + */ + void setSymbolInliningStrategy(SymbolInliningStrategy symbolInliningStrategy); + + /** + * Declares the SymbolInliningStrategy that will be used to determine which symbols will be written with inline text. + * + * @param symbolInliningStrategy if unset, the default of {@link SymbolInliningStrategy#NEVER_INLINE} will be used. + * + * @return this instance, if mutable; + * otherwise a mutable copy of this instance. + * + * @see #getSymbolInliningStrategy() + * @see #withSymbolInliningStrategy(SymbolInliningStrategy) + */ + T withSymbolInliningStrategy(SymbolInliningStrategy symbolInliningStrategy); + + /** + * Creates a mutable copy of this builder. + * + * @return a new builder with the same configuration as {@code this}. + */ + T copy(); + + /** + * Returns an immutable builder configured exactly like this one. + * + * @return this instance, if immutable; + * otherwise an immutable copy of this instance. + */ + T immutable(); + + /** + * Returns a mutable builder configured exactly like this one. + * + * @return this instance, if mutable; + * otherwise a mutable copy of this instance. + */ + T mutable(); + + /** + * Builds a new writer based on this builder's configuration + * properties. + * + * @param out the stream that will receive Ion data. + * Must not be null. + * + * @return a new {@link IonWriter} instance; not {@code null}. + */ + IonWriter build(OutputStream out); + + // TODO add a build() method that returns a 1.1-specific writer interface, allowing opt-in to new APIs. +} diff --git a/src/main/java/com/amazon/ion/system/_Private_IonBinaryWriterBuilder_1_1.java b/src/main/java/com/amazon/ion/system/_Private_IonBinaryWriterBuilder_1_1.java new file mode 100644 index 0000000000..8b7306fa57 --- /dev/null +++ b/src/main/java/com/amazon/ion/system/_Private_IonBinaryWriterBuilder_1_1.java @@ -0,0 +1,197 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.system; + +import com.amazon.ion.IonWriter; +import com.amazon.ion._private.SuppressFBWarnings; +import com.amazon.ion.impl._Private_IonConstants; +import com.amazon.ion.impl.bin.LengthPrefixStrategy; +import com.amazon.ion.impl.bin.IonManagedWriter_1_1; +import com.amazon.ion.impl.bin.ManagedWriterOptions_1_1; +import com.amazon.ion.impl.bin.SymbolInliningStrategy; + +import java.io.OutputStream; +import java.util.Objects; + +/** + * NOT FOR APPLICATION USE. + */ +public class _Private_IonBinaryWriterBuilder_1_1 + extends IonWriterBuilderBase<_Private_IonBinaryWriterBuilder_1_1> + implements IonBinaryWriterBuilder_1_1 +{ + + public static final int DEFAULT_BLOCK_SIZE = 32768; + // A block must be able to hold at least the IVM and the smallest-possible value. + public static final int MINIMUM_BLOCK_SIZE = 5; + public static final int MAXIMUM_BLOCK_SIZE = _Private_IonConstants.ARRAY_MAXIMUM_SIZE; + + private int blockSize = DEFAULT_BLOCK_SIZE; + private LengthPrefixStrategy lengthPrefixStrategy = LengthPrefixStrategy.ALWAYS_PREFIXED; + private SymbolInliningStrategy symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE; + + /** + * @return a new mutable builder. + */ + public static _Private_IonBinaryWriterBuilder_1_1 standard() + { + return new _Private_IonBinaryWriterBuilder_1_1.Mutable(); + } + + private _Private_IonBinaryWriterBuilder_1_1() { + + } + + private _Private_IonBinaryWriterBuilder_1_1(_Private_IonBinaryWriterBuilder_1_1 that) { + super(that); + blockSize = that.blockSize; + lengthPrefixStrategy = that.lengthPrefixStrategy; + symbolInliningStrategy = that.symbolInliningStrategy; + } + + @Override + public int getBlockSize() { + return blockSize; + } + + @Override + public void setBlockSize(int size) { + mutationCheck(); + if (size < MINIMUM_BLOCK_SIZE || size > MAXIMUM_BLOCK_SIZE) { + throw new IllegalArgumentException( + String.format("Block size must be between %d and %d bytes.", MINIMUM_BLOCK_SIZE, MAXIMUM_BLOCK_SIZE) + ); + } + blockSize = size; + } + + @Override + public IonBinaryWriterBuilder_1_1 withBlockSize(int size) { + _Private_IonBinaryWriterBuilder_1_1 b = mutable(); + b.setBlockSize(size); + return b; + } + + // Note: The IvmHandling / IvmMinimizing behavior is copied from the Ion 1.0 binary writer (IonBinaryWriterBuilder). + + /** + * @return always {@link IonWriterBuilder.InitialIvmHandling#ENSURE}. + */ + @Override + public InitialIvmHandling getInitialIvmHandling() + { + return InitialIvmHandling.ENSURE; + } + + /** + * @return always null. + */ + @Override + public IvmMinimizing getIvmMinimizing() + { + return null; + } + + // LengthPrefixStrategy is an interface. We have no way to make a defensive copy or ensure immutability. + // It is unclear why SpotBugs flagged these methods and not the similar methods for SymbolInliningStrategy. + @SuppressFBWarnings("EI_EXPOSE_REP") + @Override + public LengthPrefixStrategy getLengthPrefixStrategy() { + return lengthPrefixStrategy; + } + + @SuppressFBWarnings("EI_EXPOSE_REP2") + @Override + public void setLengthPrefixStrategy(LengthPrefixStrategy lengthPrefixStrategy) { + mutationCheck(); + this.lengthPrefixStrategy = Objects.requireNonNull(lengthPrefixStrategy); + } + + @Override + public IonBinaryWriterBuilder_1_1 withLengthPrefixStrategy(LengthPrefixStrategy lengthPrefixStrategy) { + _Private_IonBinaryWriterBuilder_1_1 b = mutable(); + b.setLengthPrefixStrategy(lengthPrefixStrategy); + return b; + } + + @Override + public SymbolInliningStrategy getSymbolInliningStrategy() { + return symbolInliningStrategy; + } + + @Override + public void setSymbolInliningStrategy(SymbolInliningStrategy symbolInliningStrategy) { + mutationCheck(); + this.symbolInliningStrategy = Objects.requireNonNull(symbolInliningStrategy); + } + + @Override + public IonBinaryWriterBuilder_1_1 withSymbolInliningStrategy(SymbolInliningStrategy symbolInliningStrategy) { + _Private_IonBinaryWriterBuilder_1_1 b = mutable(); + b.setSymbolInliningStrategy(symbolInliningStrategy); + return b; + } + + @Override + public IonWriter build(OutputStream out) { + if (out == null) { + throw new IllegalArgumentException("Cannot construct a writer with a null OutputStream."); + } + ManagedWriterOptions_1_1 options = new ManagedWriterOptions_1_1( + true, + false, + symbolInliningStrategy, + lengthPrefixStrategy, + ManagedWriterOptions_1_1.EExpressionIdentifierStrategy.BY_ADDRESS + ); + return IonManagedWriter_1_1.binaryWriter(out, options, this); + } + + // Note: the copy/immutable/mutable pattern is copied from _Private_IonBinaryWriterBuilder. + + @Override + public final _Private_IonBinaryWriterBuilder_1_1 copy() + { + return new Mutable(this); + } + + @Override + public _Private_IonBinaryWriterBuilder_1_1 immutable() + { + return this; + } + + @Override + public _Private_IonBinaryWriterBuilder_1_1 mutable() + { + return copy(); + } + + private static final class Mutable + extends _Private_IonBinaryWriterBuilder_1_1 + { + private Mutable() { } + + private Mutable(_Private_IonBinaryWriterBuilder_1_1 that) + { + super(that); + } + + @Override + public _Private_IonBinaryWriterBuilder_1_1 immutable() + { + return new _Private_IonBinaryWriterBuilder_1_1(this); + } + + @Override + public _Private_IonBinaryWriterBuilder_1_1 mutable() + { + return this; + } + + @Override + protected void mutationCheck() + { + } + } +} diff --git a/src/main/java/com/amazon/ion/util/Assumptions.kt b/src/main/java/com/amazon/ion/util/Assumptions.kt new file mode 100644 index 0000000000..b1d86b0d42 --- /dev/null +++ b/src/main/java/com/amazon/ion/util/Assumptions.kt @@ -0,0 +1,49 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.util + +import com.amazon.ion.IonException +import kotlin.contracts.ExperimentalContracts +import kotlin.contracts.contract + +/** + * Similar to the `!!` operator, this function assumes that the value is not null. Unlike the + * `!!` operator, this function does it without actually checking if the value is null. + * + * Why? This has no branches. If we actually checked if it was null, then there would be branching. + */ +@OptIn(ExperimentalContracts::class) +internal inline fun T?.assumeNotNull(): T { + contract { returns() implies (this@assumeNotNull != null) } + privateAssumeNotNull(this) + return this +} + +/** + * Supporting function for `assumeNotNull`. + * This function exists just to hold the contract to trick the Kotlin compiler into deducing that a value is not null. + */ +@OptIn(ExperimentalContracts::class) +private inline fun privateAssumeNotNull(value: T?) { + contract { returns() implies (value != null) } +} + +/** + * Tell the compiler that some condition is true. Must have a comment indicating why it is safe to trick the compiler. + */ +@OptIn(ExperimentalContracts::class) +internal inline fun assumeUnchecked(assumption: Boolean) { + contract { returns() implies assumption } +} + +/** + * Checks an assumption, throwing an [IonException] with a lazily created message if the assumption is false. + * + * This is named `confirm` because `check` and `require` are already similar functions in the Kotlin Std Lib, and + * `expect`, `verify`, and `assert` are used for test frameworks. + */ +internal inline fun confirm(assumption: Boolean, lazyMessage: () -> String) { + if (!assumption) { + throw IonException(lazyMessage()) + } +} diff --git a/src/main/java/com/amazon/ion/util/IonStreamUtils.java b/src/main/java/com/amazon/ion/util/IonStreamUtils.java index f7ba3a9c84..8ceb34d165 100644 --- a/src/main/java/com/amazon/ion/util/IonStreamUtils.java +++ b/src/main/java/com/amazon/ion/util/IonStreamUtils.java @@ -1,21 +1,9 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.util; import static com.amazon.ion.impl._Private_IonConstants.BINARY_VERSION_MARKER_1_0; +import static com.amazon.ion.impl._Private_IonConstants.BINARY_VERSION_MARKER_1_1; import static com.amazon.ion.util.GzipOrRawInputStream.GZIP_HEADER; import com.amazon.ion.IonException; @@ -70,7 +58,8 @@ public static boolean isIonBinary(byte[] buffer) */ public static boolean isIonBinary(byte[] buffer, int offset, int length) { - return cookieMatches(BINARY_VERSION_MARKER_1_0, buffer, offset, length); + return cookieMatches(BINARY_VERSION_MARKER_1_0, buffer, offset, length) + || cookieMatches(BINARY_VERSION_MARKER_1_1, buffer, offset, length); } diff --git a/src/main/java/com/amazon/ion/util/IonTextUtils.java b/src/main/java/com/amazon/ion/util/IonTextUtils.java index 089aa4a89a..6e54a42234 100644 --- a/src/main/java/com/amazon/ion/util/IonTextUtils.java +++ b/src/main/java/com/amazon/ion/util/IonTextUtils.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.util; import static com.amazon.ion.impl._Private_IonConstants.isHighSurrogate; @@ -21,7 +8,7 @@ import static com.amazon.ion.impl._Private_IonTextAppender.ZERO_PADDING; import static com.amazon.ion.impl._Private_IonTextAppender.isIdentifierKeyword; import static com.amazon.ion.impl._Private_IonTextAppender.symbolNeedsQuoting; -import static com.amazon.ion.impl._Private_IonTextWriterBuilder.STANDARD; +import static com.amazon.ion.impl._Private_IonTextWriterBuilder_1_0.STANDARD; import com.amazon.ion.SymbolToken; import com.amazon.ion.impl._Private_IonTextAppender; diff --git a/src/main/java/com/amazon/ion/util/Printer.java b/src/main/java/com/amazon/ion/util/Printer.java index 24379ee6f7..59c2b2d983 100644 --- a/src/main/java/com/amazon/ion/util/Printer.java +++ b/src/main/java/com/amazon/ion/util/Printer.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.util; import static com.amazon.ion.SystemSymbols.IMPORTS; @@ -45,7 +32,7 @@ import com.amazon.ion.Timestamp; import com.amazon.ion.impl._Private_IonSymbol; import com.amazon.ion.impl._Private_IonSystem; -import com.amazon.ion.impl._Private_IonTextWriterBuilder; +import com.amazon.ion.impl._Private_IonTextWriterBuilder_1_0; import com.amazon.ion.impl._Private_IonValue; import com.amazon.ion.impl._Private_IonValue.SymbolTableProvider; import com.amazon.ion.system.IonTextWriterBuilder; @@ -441,8 +428,8 @@ public void print(IonValue value, Appendable out) boolean dg = value instanceof IonDatagram; - _Private_IonTextWriterBuilder o = - _Private_IonTextWriterBuilder.standard(); + _Private_IonTextWriterBuilder_1_0 o = + _Private_IonTextWriterBuilder_1_0.standard(); o.setCharset(IonTextWriterBuilder.ASCII); if (dg) { diff --git a/src/test/java/com/amazon/ion/BinaryByteArrayIteratorSystemProcessingTest.java b/src/test/java/com/amazon/ion/BinaryByteArrayIteratorSystemProcessingTest.java index 8ce34efe35..f1ad1e0cdc 100644 --- a/src/test/java/com/amazon/ion/BinaryByteArrayIteratorSystemProcessingTest.java +++ b/src/test/java/com/amazon/ion/BinaryByteArrayIteratorSystemProcessingTest.java @@ -1,22 +1,14 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion; +import org.junit.Test; + import java.util.Iterator; +import static com.amazon.ion.TestUtils.cleanCommentedHexBytes; +import static com.amazon.ion.TestUtils.hexStringToByteArray; + public class BinaryByteArrayIteratorSystemProcessingTest extends IteratorSystemProcessingTestCase @@ -48,4 +40,74 @@ protected Iterator systemIterate() { return system().systemIterate(system().newSystemReader(myBytes)); } + + private void prepareBinary(String commentedHexBytes) { + myMissingSymbolTokensHaveText = false; + myBytes = hexStringToByteArray(cleanCommentedHexBytes(commentedHexBytes)); + } + + @Test + public void inlineFieldName() { + prepareBinary( + "E0 01 01 EA | Ion 1.1 IVM \n" + + "FD | Variable-length struct \n" + + "0F | Length 7 \n" + + "01 | Switch to FlexSym field names \n" + + "FF | Inline field name, length 1 \n" + + "61 | UTF-8 byte 'a' \n" + + "D3 | Struct length 3 \n" + + "09 | Field name SID 4 ('name') \n" + + "A1 | Inline symbol value, length 1 \n" + + "62 | UTF-8 byte 'b' \n" + ); + Iterator iterator = systemIterate(); + assertTrue(iterator.hasNext()); + IonValue shouldBeAnIvm = iterator.next(); + assertEquals(IonType.SYMBOL, shouldBeAnIvm.getType()); + assertEquals("$ion_1_1", ((IonSymbol) shouldBeAnIvm).stringValue()); + assertTrue(iterator.hasNext()); + IonStruct struct = (IonStruct) iterator.next(); + assertEquals(1, struct.size()); + IonStruct nested = (IonStruct) struct.get("a"); + assertEquals("a", nested.getFieldName()); + assertEquals(1, nested.size()); + IonSymbol b = (IonSymbol) nested.get("name"); + assertEquals("name", b.getFieldName()); + assertEquals("b", b.stringValue()); + SymbolToken bToken = b.symbolValue(); + assertEquals("b", bToken.getText()); + assertEquals(-1, bToken.getSid()); + assertFalse(iterator.hasNext()); + } + + @Test + public void inlineAnnotation() { + prepareBinary( + "E0 01 01 EA | Ion 1.1 IVM \n" + + "E8 | Two annotation FlexSyms follow \n" + + "09 | Annotation SID 4 ('name') \n" + + "FF | Inline field name, length 1 \n" + + "61 | UTF-8 byte 'a' \n" + + "6F | boolean false\n" + ); + Iterator iterator = systemIterate(); + assertTrue(iterator.hasNext()); + IonValue shouldBeAnIvm = iterator.next(); + assertEquals(IonType.SYMBOL, shouldBeAnIvm.getType()); + assertEquals("$ion_1_1", ((IonSymbol) shouldBeAnIvm).stringValue()); + assertTrue(iterator.hasNext()); + IonBool value = (IonBool) iterator.next(); + String[] annotations = value.getTypeAnnotations(); + assertEquals(2, annotations.length); + assertEquals("name", annotations[0]); + assertEquals("a", annotations[1]); + SymbolToken[] annotationTokens = value.getTypeAnnotationSymbols(); + assertEquals(2, annotationTokens.length); + assertEquals("name", annotationTokens[0].getText()); + assertEquals(4, annotationTokens[0].getSid()); + assertEquals("a", annotationTokens[1].getText()); + assertEquals(-1, annotationTokens[1].getSid()); + assertFalse(value.booleanValue()); + assertFalse(iterator.hasNext()); + } } diff --git a/src/test/java/com/amazon/ion/BinaryReaderSystemProcessingTest.java b/src/test/java/com/amazon/ion/BinaryReaderSystemProcessingTest.java index c9798c5a57..a109ae5e56 100644 --- a/src/test/java/com/amazon/ion/BinaryReaderSystemProcessingTest.java +++ b/src/test/java/com/amazon/ion/BinaryReaderSystemProcessingTest.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion; @@ -20,9 +7,12 @@ import java.math.BigDecimal; import java.math.BigInteger; +import java.util.Iterator; import static com.amazon.ion.SystemSymbols.ION_1_0; import static com.amazon.ion.SystemSymbols.ION_1_0_SID; +import static com.amazon.ion.TestUtils.cleanCommentedHexBytes; +import static com.amazon.ion.TestUtils.hexStringToByteArray; public class BinaryReaderSystemProcessingTest extends ReaderSystemProcessingTestCase @@ -208,4 +198,90 @@ public void testSystemReaderReadsUserValues() checkTopEof(); } + + private void prepareBinary(String commentedHexBytes) { + myMissingSymbolTokensHaveText = false; + myBytes = hexStringToByteArray(cleanCommentedHexBytes(commentedHexBytes)); + } + + @Test + public void inlineFieldName() throws Exception { + prepareBinary( + "E0 01 01 EA | Ion 1.1 IVM \n" + + "FD | Variable-length struct \n" + + "0F | Length 7 \n" + + "01 | Switch to FlexSym field names \n" + + "FF | Inline field name, length 1 \n" + + "61 | UTF-8 byte 'a' \n" + + "D3 | Struct length 3 \n" + + "09 | Field name SID 4 ('name') \n" + + "A1 | Inline symbol value, length 1 \n" + + "62 | UTF-8 byte 'b' \n" + ); + IonReader reader = systemRead(); + + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("$ion_1_1", reader.symbolValue().getText()); + + assertEquals(IonType.STRUCT, reader.next()); + reader.stepIn(); + assertEquals(IonType.STRUCT, reader.next()); + assertEquals("a", reader.getFieldName()); + SymbolToken aToken = reader.getFieldNameSymbol(); + assertEquals("a", aToken.getText()); + assertEquals(-1, aToken.getSid()); + reader.stepIn(); + assertNull(reader.getFieldNameSymbol()); + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("name", reader.getFieldName()); + assertEquals("b", reader.stringValue()); + SymbolToken bToken = reader.symbolValue(); + assertEquals("b", bToken.getText()); + assertEquals(-1, bToken.getSid()); + assertNull(reader.next()); + assertNull(reader.getFieldName()); + reader.stepOut(); + assertNull(reader.getFieldName()); + assertNull(reader.next()); + reader.stepOut(); + assertNull(reader.next()); + reader.close(); + } + + @Test + public void inlineAnnotation() throws Exception { + prepareBinary( + "E0 01 01 EA | Ion 1.1 IVM \n" + + "E8 | Two annotation FlexSyms follow \n" + + "09 | Annotation SID 4 ('name') \n" + + "FF | Inline field name, length 1 \n" + + "61 | UTF-8 byte 'a' \n" + + "6F | boolean false\n" + ); + IonReader reader = systemRead(); + + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("$ion_1_1", reader.symbolValue().getText()); + + assertEquals(IonType.BOOL, reader.next()); + String[] annotations = reader.getTypeAnnotations(); + assertEquals(2, annotations.length); + assertEquals("name", annotations[0]); + assertEquals("a", annotations[1]); + SymbolToken[] annotationTokens = reader.getTypeAnnotationSymbols(); + assertEquals(2, annotationTokens.length); + assertEquals("name", annotationTokens[0].getText()); + assertEquals(4, annotationTokens[0].getSid()); + assertEquals("a", annotationTokens[1].getText()); + assertEquals(-1, annotationTokens[1].getSid()); + Iterator annotationIterator = reader.iterateTypeAnnotations(); + assertTrue(annotationIterator.hasNext()); + assertEquals("name", annotationIterator.next()); + assertTrue(annotationIterator.hasNext()); + assertEquals("a", annotationIterator.next()); + assertFalse(reader.booleanValue()); + assertNull(reader.next()); + assertEquals(0, reader.getTypeAnnotations().length); + reader.close(); + } } diff --git a/src/test/java/com/amazon/ion/DatagramTest.java b/src/test/java/com/amazon/ion/DatagramTest.java index 831ccfd036..458e3b6b2c 100644 --- a/src/test/java/com/amazon/ion/DatagramTest.java +++ b/src/test/java/com/amazon/ion/DatagramTest.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion; import static com.amazon.ion.SymbolTable.UNKNOWN_SYMBOL_ID; @@ -673,15 +660,13 @@ public void testGetAssignedSymbolTable() ((_Private_IonValue)dg).getAssignedSymbolTable(); } - /** - * TODO amazon-ion/ion-java/issues/50 Datagram.set() should work, but it's documented to throw - */ - @Test(expected = UnsupportedOperationException.class) + @Test public void testSet() { IonDatagram dg = system().newDatagram(); dg.add().newNull(); dg.set(0, system().newBool(true)); + assertEquals(system().getLoader().load("true"), dg); } @Test diff --git a/src/test/java/com/amazon/ion/FakeSymbolToken.java b/src/test/java/com/amazon/ion/FakeSymbolToken.java index 426e95a40e..10d6528900 100644 --- a/src/test/java/com/amazon/ion/FakeSymbolToken.java +++ b/src/test/java/com/amazon/ion/FakeSymbolToken.java @@ -1,25 +1,14 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion; +import com.amazon.ion.impl._Private_SymbolToken; + /** * NOT SUITABLE FOR PUBLIC USE since it doesn't enforce correctness. */ public class FakeSymbolToken - implements SymbolToken + implements SymbolToken, _Private_SymbolToken { private final String myText; private final int mySid; @@ -45,4 +34,28 @@ public int getSid() { return mySid; } + + @Override + public String toString() + { + return "SymbolToken::{text:" + myText + ",id:" + mySid + "}"; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || !(o instanceof SymbolToken)) return false; + + SymbolToken other = (SymbolToken) o; + if(getText() == null || other.getText() == null){ + return getText() == other.getText(); + } + return getText().equals(other.getText()); + } + + @Override + public int hashCode() { + if(getText() != null) return getText().hashCode(); + return 0; + } } diff --git a/src/test/java/com/amazon/ion/IonEncodingVersionTest.java b/src/test/java/com/amazon/ion/IonEncodingVersionTest.java new file mode 100644 index 0000000000..a267463188 --- /dev/null +++ b/src/test/java/com/amazon/ion/IonEncodingVersionTest.java @@ -0,0 +1,23 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion; + +import com.amazon.ion.system.IonBinaryWriterBuilder; +import com.amazon.ion.system.IonBinaryWriterBuilder_1_1; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNotSame; + +public class IonEncodingVersionTest { + + @Test + public void vendsBinaryWriterBuilders() { + IonBinaryWriterBuilder writerBuilder_1_0 = IonEncodingVersion.ION_1_0.binaryWriterBuilder(); + assertNotNull(writerBuilder_1_0); + assertNotSame(writerBuilder_1_0, IonEncodingVersion.ION_1_0.binaryWriterBuilder()); + IonBinaryWriterBuilder_1_1 writerBuilder_1_1 = IonEncodingVersion.ION_1_1.binaryWriterBuilder(); + assertNotNull(writerBuilder_1_1); + assertNotSame(writerBuilder_1_1, IonEncodingVersion.ION_1_1.binaryWriterBuilder()); + } +} diff --git a/src/test/java/com/amazon/ion/Ion_1_1_RoundTripTest.kt b/src/test/java/com/amazon/ion/Ion_1_1_RoundTripTest.kt new file mode 100644 index 0000000000..1a1764188d --- /dev/null +++ b/src/test/java/com/amazon/ion/Ion_1_1_RoundTripTest.kt @@ -0,0 +1,627 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion + +import com.amazon.ion.IonEncodingVersion.* +import com.amazon.ion.TestUtils.* +import com.amazon.ion.impl._Private_IonReaderBuilder +import com.amazon.ion.impl._Private_IonSystem +import com.amazon.ion.impl._Private_IonWriter +import com.amazon.ion.impl.bin.* +import com.amazon.ion.system.* +import java.io.ByteArrayInputStream +import java.io.ByteArrayOutputStream +import java.io.FilenameFilter +import java.io.OutputStream +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertFalse +import org.junit.jupiter.api.Disabled +import org.junit.jupiter.api.Nested +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.MethodSource + +/** + * Suite of tests for running round trip tests on user and system values for various Ion 1.1 encodings. + */ +class Ion_1_1_RoundTripTest { + + @Nested + inner class Text : Ion_1_1_RoundTripTextBase() { + private val builder = ION_1_1.textWriterBuilder() + .withNewLineType(IonTextWriterBuilder.NewLineType.LF) + .withSymbolInliningStrategy(SymbolInliningStrategy.ALWAYS_INLINE) + + override val writerFn: (OutputStream) -> IonWriter = builder::build + override val newWriterForAppendable: (Appendable) -> IonWriter = builder::build + } + + @Nested + inner class TextWithSymbolTable : Ion_1_1_RoundTripTextBase() { + private val builder = ION_1_1.textWriterBuilder() + .withNewLineType(IonTextWriterBuilder.NewLineType.LF) + .withSymbolInliningStrategy(SymbolInliningStrategy.NEVER_INLINE) + + override val writerFn: (OutputStream) -> IonWriter = builder::build + override val newWriterForAppendable: (Appendable) -> IonWriter = builder::build + } + + // Writer: Interned/Prefixed + + @Nested + inner class BinaryWithInternedSymbolsAndPrefixedContainers_ReaderContinuableBufferDefault : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INTERNED_PREFIXED + override val readerFn: (ByteArray) -> IonReader = READER_CONTINUABLE_BUFFER_DEFAULT + } + + @Nested + inner class BinaryWithInternedSymbolsAndPrefixedContainers_ReaderContinuableBuffer16 : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INTERNED_PREFIXED + override val readerFn: (ByteArray) -> IonReader = READER_CONTINUABLE_BUFFER_16 + } + + @Nested + inner class BinaryWithInternedSymbolsAndPrefixedContainers_ReaderContinuableStreamDefault : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INTERNED_PREFIXED + override val readerFn: (ByteArray) -> IonReader = READER_CONTINUABLE_STREAM_DEFAULT + } + + @Nested + inner class BinaryWithInternedSymbolsAndPrefixedContainers_ReaderContinuableStream16 : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INTERNED_PREFIXED + override val readerFn: (ByteArray) -> IonReader = READER_CONTINUABLE_STREAM_16 + } + + @Nested + inner class BinaryWithInternedSymbolsAndPrefixedContainers_ReaderNonContinuableBufferDefault : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INTERNED_PREFIXED + override val readerFn: (ByteArray) -> IonReader = READER_NON_CONTINUABLE_BUFFER_DEFAULT + } + + @Nested + inner class BinaryWithInternedSymbolsAndPrefixedContainers_ReaderNonContinuableBuffer16 : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INTERNED_PREFIXED + override val readerFn: (ByteArray) -> IonReader = READER_NON_CONTINUABLE_BUFFER_16 + } + + @Nested + inner class BinaryWithInternedSymbolsAndPrefixedContainers_ReaderNonContinuableStreamDefault : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INTERNED_PREFIXED + override val readerFn: (ByteArray) -> IonReader = READER_NON_CONTINUABLE_STREAM_DEFAULT + } + + @Nested + inner class BinaryWithInternedSymbolsAndPrefixedContainers_ReaderNonContinuableStream16 : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INTERNED_PREFIXED + override val readerFn: (ByteArray) -> IonReader = READER_NON_CONTINUABLE_STREAM_16 + } + + // Writer: Inline/Prefixed + + @Nested + inner class BinaryWithInlineSymbolsAndPrefixedContainers_ReaderContinuableBufferDefault : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INLINE_PREFIXED + override val readerFn: (ByteArray) -> IonReader = READER_CONTINUABLE_BUFFER_DEFAULT + } + + @Nested + inner class BinaryWithInlineSymbolsAndPrefixedContainers_ReaderContinuableBuffer16 : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INLINE_PREFIXED + override val readerFn: (ByteArray) -> IonReader = READER_CONTINUABLE_BUFFER_16 + } + + @Nested + inner class BinaryWithInlineSymbolsAndPrefixedContainers_ReaderContinuableStreamDefault : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INLINE_PREFIXED + override val readerFn: (ByteArray) -> IonReader = READER_CONTINUABLE_STREAM_DEFAULT + } + + @Nested + inner class BinaryWithInlineSymbolsAndPrefixedContainers_ReaderContinuableStream16 : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INLINE_PREFIXED + override val readerFn: (ByteArray) -> IonReader = READER_CONTINUABLE_STREAM_16 + } + + @Nested + inner class BinaryWithInlineSymbolsAndPrefixedContainers_ReaderNonContinuableBufferDefault : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INLINE_PREFIXED + override val readerFn: (ByteArray) -> IonReader = READER_NON_CONTINUABLE_BUFFER_DEFAULT + } + + @Nested + inner class BinaryWithInlineSymbolsAndPrefixedContainers_ReaderNonContinuableBuffer16 : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INLINE_PREFIXED + override val readerFn: (ByteArray) -> IonReader = READER_NON_CONTINUABLE_BUFFER_16 + } + + @Nested + inner class BinaryWithInlineSymbolsAndPrefixedContainers_ReaderNonContinuableStreamDefault : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INLINE_PREFIXED + override val readerFn: (ByteArray) -> IonReader = READER_NON_CONTINUABLE_STREAM_DEFAULT + } + + @Nested + inner class BinaryWithInlineSymbolsAndPrefixedContainers_ReaderNonContinuableStream16 : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INLINE_PREFIXED + override val readerFn: (ByteArray) -> IonReader = READER_NON_CONTINUABLE_STREAM_16 + } + + // Writer: Inline/Delimited + + @Nested + inner class BinaryWithInlineSymbolsAndDelimitedContainers_ReaderContinuableBufferDefault : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INLINE_DELIMITED + override val readerFn: (ByteArray) -> IonReader = READER_CONTINUABLE_BUFFER_DEFAULT + } + + @Nested + inner class BinaryWithInlineSymbolsAndDelimitedContainers_ReaderContinuableBuffer16 : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INLINE_DELIMITED + override val readerFn: (ByteArray) -> IonReader = READER_CONTINUABLE_BUFFER_16 + } + + @Nested + inner class BinaryWithInlineSymbolsAndDelimitedContainers_ReaderContinuableStreamDefault : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INLINE_DELIMITED + override val readerFn: (ByteArray) -> IonReader = READER_CONTINUABLE_STREAM_DEFAULT + } + + @Nested + inner class BinaryWithInlineSymbolsAndDelimitedContainers_ReaderContinuableStream16 : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INLINE_DELIMITED + override val readerFn: (ByteArray) -> IonReader = READER_CONTINUABLE_STREAM_16 + } + + @Nested + inner class BinaryWithInlineSymbolsAndDelimitedContainers_ReaderNonContinuableBufferDefault : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INLINE_DELIMITED + override val readerFn: (ByteArray) -> IonReader = READER_NON_CONTINUABLE_BUFFER_DEFAULT + } + + @Nested + inner class BinaryWithInlineSymbolsAndDelimitedContainers_ReaderNonContinuableBuffer16 : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INLINE_DELIMITED + override val readerFn: (ByteArray) -> IonReader = READER_NON_CONTINUABLE_BUFFER_16 + } + + @Nested + inner class BinaryWithInlineSymbolsAndDelimitedContainers_ReaderNonContinuableStreamDefault : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INLINE_DELIMITED + override val readerFn: (ByteArray) -> IonReader = READER_NON_CONTINUABLE_STREAM_DEFAULT + } + + @Nested + inner class BinaryWithInlineSymbolsAndDelimitedContainers_ReaderNonContinuableStream16 : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INLINE_DELIMITED + override val readerFn: (ByteArray) -> IonReader = READER_NON_CONTINUABLE_STREAM_16 + } + + // Writer: Interned / Delimited + + @Nested + inner class BinaryWithInternedSymbolsAndDelimitedContainers_ReaderContinuableBufferDefault : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INTERNED_DELIMITED + override val readerFn: (ByteArray) -> IonReader = READER_CONTINUABLE_BUFFER_DEFAULT + } + + @Nested + inner class BinaryWithInternedSymbolsAndDelimitedContainers_ReaderContinuableBuffer16 : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INTERNED_DELIMITED + override val readerFn: (ByteArray) -> IonReader = READER_CONTINUABLE_BUFFER_16 + } + + @Nested + inner class BinaryWithInternedSymbolsAndDelimitedContainers_ReaderContinuableStreamDefault : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INTERNED_DELIMITED + override val readerFn: (ByteArray) -> IonReader = READER_CONTINUABLE_STREAM_DEFAULT + } + + @Nested + inner class BinaryWithInternedSymbolsAndDelimitedContainers_ReaderContinuableStream16 : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INTERNED_DELIMITED + override val readerFn: (ByteArray) -> IonReader = READER_CONTINUABLE_STREAM_16 + } + + @Nested + inner class BinaryWithInternedSymbolsAndDelimitedContainers_ReaderNonContinuableBufferDefault : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INTERNED_DELIMITED + override val readerFn: (ByteArray) -> IonReader = READER_NON_CONTINUABLE_BUFFER_DEFAULT + } + + @Nested + inner class BinaryWithInternedSymbolsAndDelimitedContainers_ReaderNonContinuableBuffer16 : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INTERNED_DELIMITED + override val readerFn: (ByteArray) -> IonReader = READER_NON_CONTINUABLE_BUFFER_16 + } + + @Nested + inner class BinaryWithInternedSymbolsAndDelimitedContainers_ReaderNonContinuableStreamDefault : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INTERNED_DELIMITED + override val readerFn: (ByteArray) -> IonReader = READER_NON_CONTINUABLE_STREAM_DEFAULT + } + + @Nested + inner class BinaryWithInternedSymbolsAndDelimitedContainers_ReaderNonContinuableStream16 : Ion_1_1_RoundTripBase() { + override val writerFn: (OutputStream) -> IonWriter = WRITER_INTERNED_DELIMITED + override val readerFn: (ByteArray) -> IonReader = READER_NON_CONTINUABLE_STREAM_16 + } + + // Macro-aware Ion 1.1 transcode + + @Nested + inner class BinaryMacroAwareTranscode_ReaderNonContinuableBufferDefault { + + // TODO refactor the following method into a base class and add nested inner class implementations to exercise + // all combinations of reading from [ByteArray, InputStream] in [Text, Binary], and to [Text, Binary]. + @ParameterizedTest(name = "{0}") + @MethodSource("com.amazon.ion.Ion_1_1_RoundTripTest#testData") + fun testEncodingDirectivesAndMacroInvocationsArePreservedWhenPerformingLowLevelTranscode(name: String, ion: ByteArray) { + if (!ion.isIonBinary()) { + return + } + val actual = StringBuilder() + val reader: MacroAwareIonReader = (IonReaderBuilder.standard() as _Private_IonReaderBuilder).buildMacroAware(ion) + val writer: MacroAwareIonWriter = ION_1_1.textWriterBuilder().build(actual) as MacroAwareIonWriter + + reader.transcodeAllTo(writer) + + reader.close() + writer.close() + + assertReadersHaveEquivalentValues( + ION.newReader(ion), + ION.newReader(actual.toString()) + ) + } + } + + /** + * Base class that contains text-specific cases + */ + abstract class Ion_1_1_RoundTripTextBase : Ion_1_1_RoundTripBase() { + abstract val newWriterForAppendable: (Appendable) -> IonWriter + override val readerFn: (ByteArray) -> IonReader = IonReaderBuilder.standard()::build + + @ParameterizedTest(name = "{0}") + @MethodSource("com.amazon.ion.Ion_1_1_RoundTripTest#testData") + fun testUserValuesSurviveRoundTripWrittenToAppendable(name: String, ion: ByteArray) { + val data: List = ION.loader.load(ion) + val appendable = StringBuilder() + val writer = newWriterForAppendable(appendable) + data.forEach { it.writeTo(writer) } + writer.close() + val actual = appendable.toString() + + if (DEBUG_MODE) { + println("Expected:") + ion.printDisplayString() + println("Actual:") + println(actual) + } + + assertReadersHaveEquivalentValues( + ION.newReader(ion), + ION.newReader(actual) + ) + } + } + + abstract class Ion_1_1_RoundTripBase { + + abstract val writerFn: (OutputStream) -> IonWriter + abstract val readerFn: (ByteArray) -> IonReader + val systemReaderFn: (ByteArray) -> IonReader = ION::newSystemReader + + @ParameterizedTest(name = "{0}") + @MethodSource("com.amazon.ion.Ion_1_1_RoundTripTest#testData") + fun testUserValuesArePreservedWhenTransferringUserValues(name: String, ion: ByteArray) { + + // Read and compare the data. + val actual = roundTripToByteArray { w -> newReader(ion).let(::iterate).forEach { it.writeTo(w) } } + + printDebugInfo(ion, actual) + + assertReadersHaveEquivalentValues( + readerFn(ion), + readerFn(actual) + ) + } + + @ParameterizedTest(name = "{0}") + @MethodSource("com.amazon.ion.Ion_1_1_RoundTripTest#testData") + fun testUserValuesArePreservedWhenTransferringUserValuesUsingWriteValueForReader(name: String, ion: ByteArray) { + + // Read and compare the data. + val actual = roundTripToByteArray { w -> newReader(ion).let { r -> while (r.next() != null) w.writeValue(r) } } + + printDebugInfo(ion, actual) + + assertReadersHaveEquivalentValues( + readerFn(ion), + readerFn(actual) + ) + } + + @ParameterizedTest(name = "{0}") + @MethodSource("com.amazon.ion.Ion_1_1_RoundTripTest#testData") + fun testUserValuesArePreservedWhenTransferringUserValuesUsingWriteValueForIonValue(name: String, ion: ByteArray) { + // Read and compare the data. + val actual = roundTripToByteArray { w -> newReader(ion).let(::iterate).forEach { w.writeValue(it) } } + + printDebugInfo(ion, actual) + + assertReadersHaveEquivalentValues( + readerFn(ion), + readerFn(actual) + ) + } + + @ParameterizedTest(name = "{0}") + @MethodSource("com.amazon.ion.Ion_1_1_RoundTripTest#testData") + @Disabled("Re-interpreting system directives is not supported yet.") + open fun testUserValuesArePreservedWhenTransferringSystemValues(name: String, ion: ByteArray) { + + // Read and compare the data. + val actual = roundTripToByteArray { w -> + w as _Private_IonWriter + w.writeValues(newSystemReader(ion)) { x -> x - 9 } + } + + printDebugInfo(ion, actual) + + // Check the user values + assertReadersHaveEquivalentValues( + readerFn(ion), + readerFn(actual) + ) + } + + @ParameterizedTest(name = "{0}") + @MethodSource("com.amazon.ion.Ion_1_1_RoundTripTest#testData") + @Disabled("Re-interpreting system directives is not supported yet.") + open fun testSystemValuesArePreservedWhenTransferringSystemValues(name: String, ion: ByteArray) { + + // Read and compare the data. + val actual = roundTripToByteArray { w -> + w as _Private_IonWriter + w.writeValues(newSystemReader(ion)) { x -> x - 9 } + } + + printDebugInfo(ion, actual) + + // Check the system values + assertReadersHaveEquivalentValues( + systemReaderFn(ion), + // Skip the initial IVM since it ends up being doubled when we're copying. + systemReaderFn(actual).apply { next() } + ) + } + + private fun roundTripToByteArray(block: _Private_IonSystem.(IonWriter) -> Unit): ByteArray { + // Create a new copy of the data in Ion 1.1 + val baos = object : ByteArrayOutputStream() { + var closed = false + override fun close() { + assertFalse(closed) + closed = true + super.close() + } + } + val writer = writerFn(baos) + block(ION, writer) + writer.close() + return baos.toByteArray() + } + } + + @OptIn(ExperimentalStdlibApi::class) + companion object { + + @JvmStatic + protected val DEBUG_MODE = false + + @JvmStatic + protected val ION = IonSystemBuilder.standard().build() as _Private_IonSystem + private val ION_VERSION_MARKER_REGEX = Regex("^\\\$ion_[0-9]+_[0-9]+$") + + @JvmStatic + private val BUFFER_CONFIGURATION_INITIAL_SIZE_16: IonBufferConfiguration = IonBufferConfiguration.Builder.standard().withInitialBufferSize(16).build() + + @JvmStatic + protected val READER_NON_CONTINUABLE_BUFFER_DEFAULT: (ByteArray) -> IonReader = IonReaderBuilder.standard()::build + + @JvmStatic + protected val READER_NON_CONTINUABLE_STREAM_DEFAULT: (ByteArray) -> IonReader = { IonReaderBuilder.standard().build(ByteArrayInputStream(it)) } + + @JvmStatic + protected val READER_NON_CONTINUABLE_BUFFER_16: (ByteArray) -> IonReader = IonReaderBuilder.standard().withBufferConfiguration(BUFFER_CONFIGURATION_INITIAL_SIZE_16)::build + + @JvmStatic + protected val READER_NON_CONTINUABLE_STREAM_16: (ByteArray) -> IonReader = { IonReaderBuilder.standard().withBufferConfiguration(BUFFER_CONFIGURATION_INITIAL_SIZE_16).build(ByteArrayInputStream(it)) } + + @JvmStatic + protected val READER_CONTINUABLE_BUFFER_DEFAULT: (ByteArray) -> IonReader = IonReaderBuilder.standard().withIncrementalReadingEnabled(true)::build + + @JvmStatic + protected val READER_CONTINUABLE_STREAM_DEFAULT: (ByteArray) -> IonReader = { IonReaderBuilder.standard().withIncrementalReadingEnabled(true).build(ByteArrayInputStream(it)) } + + @JvmStatic + protected val READER_CONTINUABLE_BUFFER_16: (ByteArray) -> IonReader = IonReaderBuilder.standard().withIncrementalReadingEnabled(true).withBufferConfiguration(BUFFER_CONFIGURATION_INITIAL_SIZE_16)::build + + @JvmStatic + protected val READER_CONTINUABLE_STREAM_16: (ByteArray) -> IonReader = { IonReaderBuilder.standard().withIncrementalReadingEnabled(true).withBufferConfiguration(BUFFER_CONFIGURATION_INITIAL_SIZE_16).build(ByteArrayInputStream(it)) } + + @JvmStatic + protected val WRITER_INTERNED_PREFIXED: (OutputStream) -> IonWriter = ION_1_1.binaryWriterBuilder() + .withSymbolInliningStrategy(SymbolInliningStrategy.NEVER_INLINE) + .withLengthPrefixStrategy(LengthPrefixStrategy.ALWAYS_PREFIXED)::build + + @JvmStatic + protected val WRITER_INLINE_PREFIXED: (OutputStream) -> IonWriter = ION_1_1.binaryWriterBuilder() + .withSymbolInliningStrategy(SymbolInliningStrategy.ALWAYS_INLINE) + .withLengthPrefixStrategy(LengthPrefixStrategy.ALWAYS_PREFIXED)::build + + @JvmStatic + protected val WRITER_INTERNED_DELIMITED: (OutputStream) -> IonWriter = ION_1_1.binaryWriterBuilder() + .withSymbolInliningStrategy(SymbolInliningStrategy.NEVER_INLINE) + .withLengthPrefixStrategy(LengthPrefixStrategy.NEVER_PREFIXED)::build + + @JvmStatic + protected val WRITER_INLINE_DELIMITED: (OutputStream) -> IonWriter = ION_1_1.binaryWriterBuilder() + .withSymbolInliningStrategy(SymbolInliningStrategy.ALWAYS_INLINE) + .withLengthPrefixStrategy(LengthPrefixStrategy.NEVER_PREFIXED)::build + + @JvmStatic + fun assertReadersHaveEquivalentValues(expectedDataReader: IonReader, actualDataReader: IonReader) { + // Read and compare the data. + val expectedData: Iterator = ION.iterate(expectedDataReader) + val actualData: Iterator = ION.iterate(actualDataReader) + + var ie = 0 + while (expectedData.hasNext() && actualData.hasNext()) { + val expected = expectedData.next() + try { + val actual = actualData.next() + + if (expected is IonSymbol && actual is IonSymbol) { + if (expected.typeAnnotationSymbols.isEmpty() && + isIonVersionMarker(expected.symbolValue()) && + actual.typeAnnotationSymbols.isEmpty() && + isIonVersionMarker(actual.symbolValue()) + ) { + // Both are IVMs. We won't actually compare them because we + // could be comparing data from different Ion versions + continue + } + } + + assertEquals(expected, actual, "value $ie is different") + } catch (e: IonException) { + throw AssertionError("Encountered IonException when reading the transcribed version of value #$ie\nExpected: $expected", e) + } + ie++ + } + + // Make sure that both are fully consumed. + var ia = ie + while (expectedData.hasNext()) { + expectedData.next(); ie++ + } + while (actualData.hasNext()) { + actualData.next(); ia++ + } + + assertEquals(ie, ia, "Data is unequal length") + expectedDataReader.close() + actualDataReader.close() + } + + @JvmStatic + fun isIonVersionMarker(symbol: SymbolToken?): Boolean { + symbol ?: return false + if (symbol.sid == 2) return true + symbol.text ?: return false + return ION_VERSION_MARKER_REGEX.matches(symbol.assumeText()) + } + + /** + * Checks if this ByteArray contains Ion Binary. + */ + fun ByteArray.isIonBinary(): Boolean { + return get(0) == 0xE0.toByte() && + get(1) == 0x01.toByte() && + get(2) in setOf(0, 1) && + get(3) == 0xEA.toByte() + } + + /** + * Prints this ByteArray as hex octets if this contains Ion Binary, otherwise prints as UTF-8 decoded string. + */ + @JvmStatic + protected fun ByteArray.printDisplayString() { + if (isIonBinary()) { + map { it.toHexString(HexFormat.UpperCase) } + .windowed(4, 4, partialWindows = true) + .windowed(8, 8, partialWindows = true) + .forEach { + println(it.joinToString(" ") { it.joinToString(" ") }) + } + } else { + println(toString(Charsets.UTF_8)) + } + } + + fun printDebugInfo(expected: ByteArray, actual: ByteArray) { + if (DEBUG_MODE) { + println("Expected:") + expected.printDisplayString() + println("Actual:") + actual.printDisplayString() + } + } + + private fun ionText(text: String): Array = arrayOf(text, text.encodeToByteArray()) + private fun ionBinary(name: String, bytes: String): Array = arrayOf(name, hexStringToByteArray(bytes)) + + // Arguments here are an array containing a String for the test case name, and a ByteArray of the test data. + @JvmStatic + fun testData() = listOf( + ionText("\$ion_1_1 true \$ion_1_0 true \$ion_1_1 true"), + ionBinary("Binary IVMs", "E0 01 01 EA 6F E0 01 00 EA 10 E0 01 01 EA 6F"), + ionBinary("{a:{$4:b}}", "E0 01 01 EA FD 0F 01 FF 61 D3 09 A1 62"), + ionText("""a::a::c::a::0 a::a::0"""), + ionText("""a::a::c::a::0 a::0"""), + ionText("""foo::bar::baz::false foo::0"""), + ionText("""a::b::c::0 d::0"""), + ionText("""a::0 b::c::d::0"""), + ionText("""a::b::c::d::0 a::b::c::0"""), + ionText("""a::b::c::d::0 a::0 a::0"""), + ionText("""abc"""), + // This test case has a top-level annotation that is the same number of utf-8 bytes as $ion_symbol_table + ionText("fake_symbol_table::{}"), + ionText( + """ + ${'$'}ion_1_0 + ${'$'}ion_symbol_table::{ + symbols:[ "a", "b", "c", "d", "e" ] + } + $10 $11 $12 $13 $14 + ${'$'}ion_1_0 + ${'$'}ion_symbol_table::{ + symbols:[ "rock", "paper", "scissors", "lizard", "spock" ] + } + $10 $11 $12 $13 $14 + """.trimIndent() + ), + ionText("foo::(bar::baz::{abc: zar::qux::xyz::123, def: 456})") + ) + files().flatMap { f -> + val ion = ION.loader.load(f) + // If there are embedded documents, flatten them into separate test cases. + if (ion.size == 1 && ion.first().hasTypeAnnotation("embedded_documents")) { + (ion.first() as IonContainer).mapIndexed { i, ionValue -> + arrayOf("${f.path}[$i]", (ionValue as IonString).stringValue().toByteArray(Charsets.UTF_8)) + } + } else { + listOf(arrayOf(f.path, f.readBytes())) + } + } + + @JvmStatic + fun files() = testdataFiles( + And(GLOBAL_SKIP_LIST, LOCAL_SKIP_LIST), + GOOD_IONTESTS_FILES + ) + + @JvmField + val LOCAL_SKIP_LIST = setOf( + // Has an unknown, imported symbol + "symbolTablesUnknownText.ion", + // Skipped because there are no user values in these, and IonReaderNonContinuableSystem will throw an exception. + "blank.ion", + "empty.ion", + "emptyThreeByteNopPad.10n", + "nopPad16Bytes.10n", + "nopPadOneByte.10n", + "T15.10n", + ).let { FilenameFilter { _, name -> name !in it } } + } +} diff --git a/src/test/java/com/amazon/ion/SymbolTest.java b/src/test/java/com/amazon/ion/SymbolTest.java index 9a44d1d3ad..1a10ad4f65 100644 --- a/src/test/java/com/amazon/ion/SymbolTest.java +++ b/src/test/java/com/amazon/ion/SymbolTest.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion; import static com.amazon.ion.SymbolTable.UNKNOWN_SYMBOL_ID; @@ -236,9 +223,9 @@ public void rejectsUnsupportedVersion_0_0() } @Test(expected = UnsupportedIonVersionException.class) - public void rejectsUnsupportedVersion_1_1() + public void rejectsUnsupportedVersion_1_2() { - system().singleValue("$ion_1_1"); + system().singleValue("$ion_1_2"); } @Test(expected = UnsupportedIonVersionException.class) diff --git a/src/test/java/com/amazon/ion/TestUtils.java b/src/test/java/com/amazon/ion/TestUtils.java index 81e8b8c536..63c059b090 100644 --- a/src/test/java/com/amazon/ion/TestUtils.java +++ b/src/test/java/com/amazon/ion/TestUtils.java @@ -1,18 +1,5 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion; import static com.amazon.ion.BitUtils.bytes; @@ -21,6 +8,8 @@ import com.amazon.ion.impl._Private_IonConstants; import com.amazon.ion.impl._Private_Utils; import com.amazon.ion.util.IonStreamUtils; +import org.junit.jupiter.params.converter.ArgumentConversionException; +import org.junit.jupiter.params.converter.TypedArgumentConverter; import java.io.ByteArrayOutputStream; import java.io.File; @@ -29,10 +18,14 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; import java.util.zip.GZIPOutputStream; @@ -139,7 +132,7 @@ public boolean accept(File dir, String name) public static final FilenameFilter GLOBAL_SKIP_LIST = new And( // Skips documentation that accompanies some test vectors - NOT_MARKDOWN_FILTER, + NOT_MARKDOWN_FILTER, new FileIsNot( "bad/clobWithNullCharacter.ion" // TODO amazon-ion/ion-java/43 ,"bad/emptyAnnotatedInt.10n" // TODO amazon-ion/ion-java/55 @@ -150,10 +143,11 @@ public boolean accept(File dir, String name) ,"good/whitespace.ion" ,"good/item1.10n" // TODO amazon-ion/ion-java#126 (roundtrip symbols with unknown text) ,"bad/typecodes/type_6_length_0.10n" // TODO amazon-ion/ion-java#272 - ,"good/typecodes/T7-large.10n" // TODO amazon-ion/ion-java#273 + ,"good/typecodes/T7-large.10n" // TODO amazon-ion/ion-java#273 ,"good/equivs/clobNewlines.ion" // TODO amazon-ion/ion-java#274 ,"bad/minLongWithLenTooSmall.10n" // Note: The long itself is fine. The data ends with 0x01, a 2-byte NOP pad header. It is not worth adding the logic to detect this as unexpected EOF. ,"bad/nopPadTooShort.10n" // Note: There are fewer bytes than the NOP pad header declares. It is not worth adding the logic to detect this as unexpected EOF. + ,"bad/invalidVersionMarker_ion_1_1.ion" // We're working on Ion 1.1 support. ) ); @@ -567,8 +561,18 @@ public static boolean symbolTableEquals(final SymbolTable first, final SymbolTab public static class BinaryIonAppender { private final ByteArrayOutputStream out = new ByteArrayOutputStream(); + public BinaryIonAppender(int minorVersion) throws Exception { + if (minorVersion == 0) { + out.write(_Private_IonConstants.BINARY_VERSION_MARKER_1_0); + } else if (minorVersion == 1) { + out.write(_Private_IonConstants.BINARY_VERSION_MARKER_1_1); + } else { + throw new IllegalStateException(); + } + } + public BinaryIonAppender() throws Exception { - out.write(_Private_IonConstants.BINARY_VERSION_MARKER_1_0); + this(0); } public BinaryIonAppender append(int... data) throws Exception { @@ -585,6 +589,16 @@ public byte[] toByteArray() { } } + /** + * Returns the given data prepended with an IVM for the requested 1.x minor version. + * @param minorVersion the IVM version to prepend. + * @param data the data. + * @return the data with an IVM prepended. + */ + public static byte[] withIvm(int minorVersion, byte[] data) throws Exception { + return new TestUtils.BinaryIonAppender(minorVersion).append(data).toByteArray(); + } + /** * Compresses the given bytes using GZIP. * @param bytes the bytes to compress. @@ -598,4 +612,179 @@ public static byte[] gzippedBytes(int... bytes) throws Exception { } return out.toByteArray(); } + + /** + * Utility method to make it easier to write test cases that assert specific sequences of bytes. + */ + public static String byteArrayToHex(byte[] bytes) { + StringBuilder sb = new StringBuilder(); + for (byte b : bytes) { + sb.append(String.format("%02X ", b)); + } + return sb.toString().trim(); + } + + /** + * Determines the number of bytes needed to represent a series of hexadecimal digits. + */ + public static int byteLengthFromHexString(String hexString) { + return (hexString.replaceAll("[^\\dA-F]", "").length()) / 2; + } + + /** + * Converts a string of octets in the given radix to a byte array. Octets must be separated by a space. + * @param octetString the string of space-separated octets. + * @param radix the radix of the octets in the string. + * @return a new byte array. + */ + private static byte[] octetStringToByteArray(String octetString, int radix) { + if (octetString.isEmpty()) return new byte[0]; + String[] bytesAsStrings = octetString.split(" "); + byte[] bytesAsBytes = new byte[bytesAsStrings.length]; + for (int i = 0; i < bytesAsBytes.length; i++) { + bytesAsBytes[i] = (byte) (Integer.parseInt(bytesAsStrings[i], radix) & 0xFF); + } + return bytesAsBytes; + } + + /** + * Converts a string of hex octets, such as "BE EF", to a byte array. + */ + public static byte[] hexStringToByteArray(String hexString) { + return octetStringToByteArray(hexString, 16); + } + + /** + * Converts a byte array to a string of bits, such as "00110110 10001001". + * The purpose of this method is to make it easier to read and write test assertions. + */ + public static String byteArrayToBitString(byte[] bytes) { + StringBuilder s = new StringBuilder(); + for (byte aByte : bytes) { + for (int bit = 7; bit >= 0; bit--) { + if (((0x01 << bit) & aByte) != 0) { + s.append("1"); + } else { + s.append("0"); + } + } + s.append(" "); + } + return s.toString().trim(); + } + + /** + * Determines the number of bytes needed to represent a series of hexadecimal digits. + */ + public static int byteLengthFromBitString(String bitString) { + return (bitString.replaceAll("[^01]", "").length()) / 8; + } + + /** + * Converts a string of bits, such as "00110110 10001001", to a byte array. + */ + public static byte[] bitStringToByteArray(String bitString) { + return octetStringToByteArray(bitString, 2); + } + + /** + * @param hexBytes a string containing white-space delimited pairs of hex digits representing the expected output. + * The string may contain multiple lines. Anything after a `|` character on a line is ignored, so + * you can use `|` to add comments. + */ + public static String cleanCommentedHexBytes(String hexBytes) { + return Stream.of(hexBytes.split("\n")) + .map(it -> it.replaceAll("\\|.*$", "").trim()) + .filter(it -> !it.trim().isEmpty()) + .collect(Collectors.joining(" ")) + .replaceAll("\\s+", " ") + .toUpperCase() + .trim(); + } + + /** + * Converts a String to a Timestamp for a @Parameterized test + */ + public static class StringToTimestamp extends TypedArgumentConverter { + protected StringToTimestamp() { + super(String.class, Timestamp.class); + } + + @Override + protected Timestamp convert(String source) throws ArgumentConversionException { + if (source == null) return null; + return Timestamp.valueOf(source); + } + } + + /** + * Converts a String to a Decimal for a @Parameterized test + */ + public static class StringToDecimal extends TypedArgumentConverter { + protected StringToDecimal() { + super(String.class, Decimal.class); + } + + @Override + protected Decimal convert(String source) throws ArgumentConversionException { + if (source == null) return null; + return Decimal.valueOf(source); + } + } + + /** + * Converts a Hex String to a Byte Array for a @Parameterized test + */ + public static class HexStringToByteArray extends TypedArgumentConverter { + + private static final CharsetEncoder ASCII_ENCODER = StandardCharsets.US_ASCII.newEncoder(); + + protected HexStringToByteArray() { + super(String.class, byte[].class); + } + + @Override + protected byte[] convert(String source) throws ArgumentConversionException { + if (source == null) return null; + if (source.trim().isEmpty()) return new byte[0]; + String[] octets = source.split(" "); + byte[] result = new byte[octets.length]; + for (int i = 0; i < octets.length; i++) { + if (octets[i].length() == 1) { + char c = octets[i].charAt(0); + if (!ASCII_ENCODER.canEncode(c)) { + throw new IllegalArgumentException("Cannot convert non-ascii character: " + c); + } + result[i] = (byte) c; + } else { + result[i] = (byte) Integer.parseInt(octets[i], 16); + } + } + return result; + } + } + + /** + * Converts a String of symbol ids to a long[] for a @Parameterized test + */ + public static class SymbolIdsToLongArray extends TypedArgumentConverter { + protected SymbolIdsToLongArray() { + super(String.class, long[].class); + } + + @Override + protected long[] convert(String source) throws ArgumentConversionException { + if (source == null) return null; + int size = (int) source.chars().filter(i -> i == '$').count(); + String[] sids = source.split("\\$"); + long[] result = new long[size]; + int i = 0; + for (String sid : sids) { + if (sid.isEmpty()) continue; + result[i] = Long.parseLong(sid.trim()); + i++; + } + return result; + } + } } diff --git a/src/test/java/com/amazon/ion/apps/macroize/MacroizeTest.java b/src/test/java/com/amazon/ion/apps/macroize/MacroizeTest.java new file mode 100644 index 0000000000..47c66e18a0 --- /dev/null +++ b/src/test/java/com/amazon/ion/apps/macroize/MacroizeTest.java @@ -0,0 +1,75 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.apps.macroize; + +import com.amazon.ion.IonDatagram; +import com.amazon.ion.IonSystem; +import com.amazon.ion.system.IonReaderBuilder; +import com.amazon.ion.system.IonSystemBuilder; +import com.amazon.ion.system.IonTextWriterBuilder; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class MacroizeTest { + + private static final IonSystem SYSTEM = IonSystemBuilder.standard().build(); + + private static void testMacroize( + String input, + String spec, + boolean outputBinary, + Map expectedOccurrences + ) throws IOException { + StringBuilder invocations = new StringBuilder(); + ByteArrayOutputStream headless = new ByteArrayOutputStream(); + ByteArrayOutputStream context = new ByteArrayOutputStream(); + ByteArrayOutputStream complete = new ByteArrayOutputStream(); + StringBuilder log = new StringBuilder(); + Macroize.macroize( + () -> IonReaderBuilder.standard().build(input), + () -> IonTextWriterBuilder.pretty().build(invocations), + () -> IonReaderBuilder.standard().build(invocations.toString()), + () -> headless, + () -> context, + () -> { + complete.write(context.toByteArray()); + complete.write(headless.toByteArray()); + }, + () -> IonReaderBuilder.standard().build(spec), + outputBinary, + log + ); + IonDatagram from10 = SYSTEM.getLoader().load(input); + IonDatagram from11 = SYSTEM.getLoader().load(complete.toByteArray()); + assertEquals(from10, from11); + for (Map.Entry expectedOccurrence : expectedOccurrences.entrySet()) { + assertTrue(log.toString().contains( + String.format("%s (total occurrences: %d)", expectedOccurrence.getKey(), expectedOccurrence.getValue())) + ); + } + // TODO assert that the text patterns were matched as expected + } + + @ParameterizedTest(name = "outputBinary={0}") + @ValueSource(booleans = {true, false}) + public void macroizeWithSpec(boolean outputBinary) throws IOException { + String spec = "{macros: [(macro foobar (foo bar?) {foo: (%foo), bar: (%bar)})], textPatterns: [(verbatim [baz]), (prefix \"/user/files/\" [a, b])]}"; + String input = "{foo: 1, bar: 2} {foo: 3} \"baz\" {foobar: {foo: 4, bar: 5}, path: \"/user/files/a\"} \"/user/files/c\""; + Map expectedOccurrences = new HashMap() {{ + put("foobar", 3); + }}; + testMacroize(input, spec, outputBinary, expectedOccurrences); + } + + // TODO add tests that exercise using every Ion type in macro definitions + // TODO test substring text pattern + // TODO address known limitations, as documented in the top-level JavaDoc on MacroizeSpec +} diff --git a/src/test/java/com/amazon/ion/conformance/Config.kt b/src/test/java/com/amazon/ion/conformance/Config.kt new file mode 100644 index 0000000000..daba009e9a --- /dev/null +++ b/src/test/java/com/amazon/ion/conformance/Config.kt @@ -0,0 +1,20 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.conformance + +import com.amazon.ion.system.IonReaderBuilder +import java.io.File + +/** Top-level configuration for running the conformance tests */ +data class Config( + /** Controls whether debug printing should be turned on. */ + val debugEnabled: Boolean = true, + /** If a NotImplementedError is encountered, should we fail the test or ignore it. */ + val failUnimplemented: Boolean = false, + /** Use for a skip list, or for running only one or two tests. Return true to run the test. */ + val testFilter: (File, String) -> Boolean = { _, _ -> true }, + /** Named set of reader builders (i.e. different reader configurations) to use for all tests. */ + val readerBuilder: IonReaderBuilder, +) { + fun newCaseBuilder(file: File) = ConformanceTestBuilder(this, file) +} diff --git a/src/test/java/com/amazon/ion/conformance/ConformanceTestBuilder.kt b/src/test/java/com/amazon/ion/conformance/ConformanceTestBuilder.kt new file mode 100644 index 0000000000..41e9f89038 --- /dev/null +++ b/src/test/java/com/amazon/ion/conformance/ConformanceTestBuilder.kt @@ -0,0 +1,101 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.conformance + +import com.amazon.ion.* +import com.amazon.ion.system.* +import com.amazon.ionelement.api.IonElement +import com.amazon.ionelement.api.SeqElement +import com.amazon.ionelement.api.location +import java.io.File +import org.junit.jupiter.api.DynamicContainer.dynamicContainer +import org.junit.jupiter.api.DynamicNode +import org.junit.jupiter.api.DynamicTest.dynamicTest +import org.opentest4j.AssertionFailedError +import org.opentest4j.TestAbortedException + +data class ConformanceTestBuilder( + val config: Config, + /** File that the cases are bring created from. */ + val file: File, + // Internal fields for building up state from which to create a test case + private val nameParts: List = listOf("[${ION_CONFORMANCE_DIR.toPath().relativize(file.toPath())}]"), + private val fragments: List = listOf(), +) { + + /** + * Helper class that provides runtime support to the test cases. + */ + class TestCaseSupport(private val testBuilder: ConformanceTestBuilder, private val readerBuilder: IonReaderBuilder) { + + private val data: ByteArray by lazy { readFragments(testBuilder.fragments) } + + /** Creates a new reader for this test case */ + fun createFragmentReader(): IonReader = readerBuilder.build(data) + + /** Logs a lazily-evaluated message, if debug is enabled. */ + fun debug(message: () -> String) = testBuilder.debug(message) + + /** Throws an exception for a syntax error in the tests */ + fun reportSyntaxError(element: IonElement, details: String? = null): Nothing = + testBuilder.reportSyntaxError(element, details) + + /** Creates a file URI for the given IonElement */ + fun locationOf(element: IonElement) = "file://${testBuilder.file.absolutePath}:${element.metas.location}" + + /** Creates a failure message that includes a file link to [element] */ + fun createFailureMessage(element: IonElement, details: String? = null): String = + "${details ?: "Assertion failed"} at ${locationOf(element)}; $element" + + /** Throws an [AssertionFailedError] to fail a test case */ + fun fail(expectation: IonElement, details: String, t: Throwable? = null): Nothing = + throw AssertionFailedError(createFailureMessage(expectation, details), t) + } + + // Leaf nodes need a full name or else the HTML report is incomprehensible. + private val fullName: String + get() = nameParts.joinToString(" ") + + // TODO: this could be fullName or nameParts.last() + // Both have drawbacks, but it only affects the display of the interior nodes of the test tree + val containerName: String + get() = fullName // nameParts.last() + + /** Prints a debug message, if debug messages are enabled in the config. */ + fun debug(message: () -> String) { + if (config.debugEnabled) println("[TEST: $fullName] ${message()}") + } + + // Copy-on-write setters + fun plusName(name: String): ConformanceTestBuilder = copy(nameParts = nameParts + name) + fun plusFragment(fragment: SeqElement): ConformanceTestBuilder = copy(fragments = fragments + fragment) + fun plusFragments(newFragments: List): ConformanceTestBuilder = copy(fragments = fragments + newFragments) + fun plus(name: String, fragment: SeqElement): ConformanceTestBuilder = copy(nameParts = nameParts + name, fragments = fragments + fragment) + fun plus(name: String, newFragments: List): ConformanceTestBuilder = copy(nameParts = nameParts + name, fragments = fragments + newFragments) + + fun build(executable: TestCaseSupport.() -> Unit): DynamicNode { + val readerBuilder = config.readerBuilder + val testName = fullName + val testCaseSupport = TestCaseSupport(this, readerBuilder) + return dynamicTest(testName) { + if (!config.testFilter(file, testName)) throw TestAbortedException(testName) + try { + executable(testCaseSupport) + } catch (e: NotImplementedError) { + if (config.failUnimplemented) throw e + debug { "Ignored because ${e.message}" } + throw TestAbortedException("$e") + } + } + } + + /** Builds a [DynamicNode] container with the correct name */ + fun buildContainer(children: Iterable): DynamicNode = dynamicContainer(containerName, children) + + /** Builds a [DynamicNode] container with the correct name */ + fun buildContainer(vararg children: DynamicNode): DynamicNode = dynamicContainer(containerName, children.toList()) + + /** Signals to the test builder that there is a syntax error */ + fun reportSyntaxError(element: IonElement, details: String? = null): Nothing = + throw ConformanceTestInvalidSyntaxException(file, element, details) +} diff --git a/src/test/java/com/amazon/ion/conformance/ConformanceTestDslInterpreterTest.kt b/src/test/java/com/amazon/ion/conformance/ConformanceTestDslInterpreterTest.kt new file mode 100644 index 0000000000..770de5c351 --- /dev/null +++ b/src/test/java/com/amazon/ion/conformance/ConformanceTestDslInterpreterTest.kt @@ -0,0 +1,106 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.conformance + +import com.amazon.ion.system.* +import java.io.File +import kotlin.streams.toList +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.DynamicContainer +import org.junit.jupiter.api.DynamicNode +import org.junit.jupiter.api.DynamicTest +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.MethodSource + +/** Some minimal tests for the DSL interpreter. */ +object ConformanceTestDslInterpreterTest { + + private val CONFIG = Config( + debugEnabled = true, + failUnimplemented = false, + readerBuilder = IonReaderBuilder.standard(), + ) + + @JvmStatic + fun data(): Iterable> = listOf( + """ + (document "a test using 'produces'" + (produces)) + """ to 1, + """ + (ion_1_0 "a test using 'text'" + (text ''' {a:1, b:2} "two" ''') + (produces {b:2, a:1} "two")) + """ to 1, + """ + (ion_1_0 "a test using 'signals'" + (text ''' {a:1, b:2 "two" ''') + (signals "struct missing closing delimiter")) + """ to 1, + """ + (ion_1_1 "a test that uses binary" + (bytes "6F 6E 60") + (produces false true 0)) + """ to 1, + """ + (ion_1_0 "a test that uses denotes" + (text "${'$'}4") + (denotes (Symbol "name"))) + """ to 1, + """ + (ion_1_0 "a test using 'then'" + (text ''' 1 ''') + (then (text "2") + (produces 1 2))) + """ to 1, + """ + (ion_1_0 "a test using 'then' to create more than one test case" + (text ''' 1 ''') + (then "then 2" + (text "2") + (produces 1 2)) + (then "then 3" + (text "3") + (produces 1 3))) + """ to 2, + """ + (ion_1_0 "a test using 'each' to create more than one test case" + (text " 1 ") + (each "unclosed container" + (text " { ") + (text " [ ") + (text " ( ") + "invalid timestamp" + (text "2022-99-99T") + (signals "something bad"))) + """ to 4, + """ + (ion_1_x "a test using 'ion_1_x' to create more than one test case" + (text " 1 ") + (produces 1)) + """ to 2, + // TODO: Tests to check the demangling behavior, use different types of fragments + ) + + @MethodSource("data") + @ParameterizedTest + fun interpreterTests(testInput: Pair) { + val (dsl, expectedNumberOfTestCases) = testInput + + val testBuilder = CONFIG.newCaseBuilder(File("fake-file")) + val testCases = testBuilder.readAllTests(ION.newReader(dsl)).flatten() + + // It should have the correct number of test cases + assertEquals(expectedNumberOfTestCases, testCases.size) + // All the test case executables should run without throwing any exceptions (i.e. pass) + testCases.forEach { it.executable.execute() } + } + + private fun DynamicNode.flatten(): List { + return when (this@flatten) { + is DynamicContainer -> children.toList().flatMap { it.flatten() } + is DynamicTest -> listOf(this) + else -> TODO("Unreachable") + } + } +} diff --git a/src/test/java/com/amazon/ion/conformance/ConformanceTestInvalidSyntaxException.kt b/src/test/java/com/amazon/ion/conformance/ConformanceTestInvalidSyntaxException.kt new file mode 100644 index 0000000000..b19fe05376 --- /dev/null +++ b/src/test/java/com/amazon/ion/conformance/ConformanceTestInvalidSyntaxException.kt @@ -0,0 +1,21 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.conformance + +import com.amazon.ionelement.api.IonElement +import com.amazon.ionelement.api.location +import java.io.File + +/** Exception for signalling invalid syntax in the conformance tests. */ +class ConformanceTestInvalidSyntaxException( + file: File, + element: IonElement, + description: String? = null, + cause: Throwable? = null +) : Error(cause) { + override val message: String = """ + Invalid conformance dsl syntax${ description?.let { "; $it" } ?: ""} + - at file://${file.absolutePath}:${element.metas.location} + - invalid clause was: $element + """.trimIndent() +} diff --git a/src/test/java/com/amazon/ion/conformance/ConformanceTestRunner.kt b/src/test/java/com/amazon/ion/conformance/ConformanceTestRunner.kt new file mode 100644 index 0000000000..a96f8f8f03 --- /dev/null +++ b/src/test/java/com/amazon/ion/conformance/ConformanceTestRunner.kt @@ -0,0 +1,142 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.conformance + +import com.amazon.ion.system.* +import java.io.File +import org.junit.jupiter.api.DynamicNode +import org.junit.jupiter.api.TestFactory + +object DefaultReaderConformanceTests : ConformanceTestRunner( + IonReaderBuilder.standard() + .withCatalog(ION_CONFORMANCE_TEST_CATALOG) +) + +object IncrementalReaderConformanceTests : ConformanceTestRunner( + IonReaderBuilder.standard() + .withCatalog(ION_CONFORMANCE_TEST_CATALOG) + .withIncrementalReadingEnabled(true), + additionalSkipFilter = { _, testName -> "Incomplete floats signal an error for unexpected EOF" in testName } +) + +abstract class ConformanceTestRunner( + readerBuilder: IonReaderBuilder, + /** A predicate that returns `true` iff the test case should be skipped. */ + additionalSkipFilter: (File, String) -> Boolean = { _, _ -> false } +) { + + private val DEFAULT_SKIP_FILTER: (File, String) -> Boolean = { file, completeTestName -> + // `completeTestName` is the complete name of the test — that is all the test descriptions in a particular + // execution path, joined by " ". (This is how it appears in the JUnit report.) + when { + // IonElement can't load $0. TODO: Use IonValue for `produces`, I guess. + "$0" in completeTestName -> false + // For some reason, $ion_symbol_table::null.struct is not handled as expected + "IST structs are elided from app view" in completeTestName -> false + // IonWriter is making it difficult to write invalid data + "If no max_id, lack of exact-match must raise an error «then»" in completeTestName -> false + // IonCatalog's "best choice" logic is not spec compliant + "When max_id is valid, pad/truncate mismatched or absent SSTs" in completeTestName -> false + // No support for reading `$ion_encoding` directives yet. + "conformance/ion_encoding/" in file.absolutePath -> false + file.endsWith("local_symtab_imports.ion") -> when { + // FIXME: The writer seems to remove "imports" field if the value is `$ion_symbol_table`. This should be + // legal as per https://amazon-ion.github.io/ion-docs/docs/symbols.html#imports + "Importing the current symbol table" in completeTestName -> false + + // WON'T FIX: + + // If you inspect the debug output, the serialized data does not include the repeated fields. + // This implies that the writer is attempting to clean a user-supplied symbol table. + "Repeated fields" in completeTestName -> false + // For these tests, the writer is validating the max_id field, and failing before + // we have a chance to test the reader. + "If no max_id, lack of exact-match must raise an error" in completeTestName -> false + "If max_id not non-negative int, lack of exact-match must raise an error" in completeTestName -> false + else -> true + } + + // FIXME: Contains test cases that are out of date, lack descriptions to have more specific exclusions + "eexp/basic_system_macros.ion" in file.absolutePath -> false + "eexp/arg_inlining.ion" in file.absolutePath -> false + + // FIXME: + // 1. Test cases expect a zero-or-one-valued expression group to be valid for ? parameters, implementation disagrees + // 2. One-to-many parameters are not raising an error for an empty expression group. This may need to be + // fixed in the macro evaluator and/or in the reader. + // 3. All other failures for tagless type cases are due to "Encountered an unknown macro address: N" where + // N is the first byte of the macro argument (after any AEB and/or expression group prefixes). + "eexp/binary/argument_encoding.ion" in file.absolutePath -> false + + // FIXME: All failing for reason #3 for argument_encoding.ion + "eexp/binary/tagless_types.ion" in file.absolutePath -> false + + // FIXME: Fails because the encoding context isn't properly populated with the system module/macros + "conformance/system_macros/" in file.absolutePath && + "in binary with a user macro address" in completeTestName -> false + + // FIXME: Timestamp should not allow an offset of +/-1440 + "the offset argument must be less than 1440" in completeTestName -> false + "the offset argument must be greater than -1440" in completeTestName -> false + + // FIXME: Ensure Ion 1.1 symbol tables are properly validated + "add_symbols does not accept null.symbol" in completeTestName -> false + "add_symbols does not accept null.string" in completeTestName -> false + "add_symbols does not accept annotated arguments" in completeTestName -> false + "set_symbols does not accept null.symbol" in completeTestName -> false + "set_symbols does not accept null.string" in completeTestName -> false + "set_symbols does not accept annotated arguments" in completeTestName -> false + + // FIXME: Ensure that the text reader throws if unexpected extra args are encountered + "sum arguments may not be more than two integers" in completeTestName -> false + "none signals an error when argument is" in completeTestName -> false + + // TODO: support continuable parsing of macro arguments + "make_decimal can be invoked in binary using system macro address 6" in completeTestName -> false + + // TODO: Macro-shaped parameters not implemented yet + "macro-shape" in completeTestName -> false + + // TODO: Not implemented yet + "subnormal f16" in completeTestName -> false + "conformance/system_macros/" in file.absolutePath -> when { + file.endsWith("parse_ion.ion") || + file.endsWith("make_list.ion") || + file.endsWith("make_sexp.ion") || + file.endsWith("make_field.ion") || + file.endsWith("flatten.ion") || + file.endsWith("make_struct.ion") -> false + else -> true + } + // Some of these are failing because + // - Ion Java doesn't support the Ion 1.1 system symbol table yet + // - The tokens `$ion_1_0` and `'$ion_1_0'` are never user values. + // TODO: Add test names once they are added to this file + file.endsWith("system_symbols.ion") -> false + // $ion_literal not supported yet + file.endsWith("ion_literal.ion") -> false + else -> true + } + } + + private val CONFIG = Config( + debugEnabled = true, + failUnimplemented = false, + readerBuilder = readerBuilder, + testFilter = { file, name -> DEFAULT_SKIP_FILTER(file, name) && !additionalSkipFilter(file, name) }, + ) + + @TestFactory + fun `Conformance Tests`(): Iterable { + return ION_CONFORMANCE_DIR.walk() + .filter { it.isFile && it.extension == "ion" } + .map { file -> + with(CONFIG.newCaseBuilder(file)) { + file.inputStream() + .let(ION::newReader) + .use { reader -> readAllTests(reader) } + } + } + .asIterable() + } +} diff --git a/src/test/java/com/amazon/ion/conformance/expectations.kt b/src/test/java/com/amazon/ion/conformance/expectations.kt new file mode 100644 index 0000000000..a9ecae7a7c --- /dev/null +++ b/src/test/java/com/amazon/ion/conformance/expectations.kt @@ -0,0 +1,398 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.conformance + +import com.amazon.ion.* +import com.amazon.ion.conformance.ConformanceTestBuilder.* +import com.amazon.ionelement.api.AnyElement +import com.amazon.ionelement.api.BoolElement +import com.amazon.ionelement.api.IntElement +import com.amazon.ionelement.api.IntElementSize +import com.amazon.ionelement.api.SeqElement +import com.amazon.ionelement.api.SexpElement +import com.amazon.ionelement.api.StringElement +import com.amazon.ionelement.api.TextElement +import java.lang.AssertionError +import kotlin.streams.toList +import org.junit.jupiter.api.Assertions.assertArrayEquals +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertFalse +import org.junit.jupiter.api.Assertions.assertNull +import org.junit.jupiter.api.Assertions.assertTrue + +/** + * Asserts that fully traversing the reader will result in an [IonException]. + * It's expected to be for the reason given in [sexp], but we don't have a way + * to check that right now because the `signals` message is non-normative. + */ +fun TestCaseSupport.assertSignals(sexp: SeqElement, r: IonReader) { + val signalDescription = sexp.tail.single().textValue + // The usual `assertThrows` doesn't give us the ability to add our own context to the failure message. + val events = try { + // Just walk the reader without materializing so that we can ensure that the error is raised + // specifically by the reader. + r.walk() + } catch (e: IonException) { + debug { "Expected an IonException because '$signalDescription'; found $e" } + // Test case passes + return + } catch (t: Throwable) { + fail(sexp, "Expected an IonException because '$signalDescription' but was ${t::class.simpleName}", t) + } + fail( + sexp, + "Expected an IonException because '$signalDescription'; " + + "successfully read: ${events.joinToString("\n")}" + ) +} + +/** + * Walks all data available from an IonReader. Records all data as a stream of events so that + * if an error is _not_ encountered, we have some useful information for debugging the test failure. + */ +private fun IonReader.walk(): List { + val events = mutableListOf() + fun recordEvent(eventType: String = type.toString(), value: Any? = "") { + events.add("[$eventType] $value") + } + recordEvent("START") + + while (true) { + next() + val currentType = type + if (currentType == null) { + if (depth > 0) { + stepOut() + recordEvent("STEP-OUT") + continue + } else { + recordEvent("END") + return events + } + } + + if (isInStruct) recordEvent("FIELD-NAME", fieldNameSymbol) + typeAnnotationSymbols.forEach { recordEvent("ANNOTATION", it) } + + if (isNullValue) { + recordEvent("NULL", currentType) + } else when (currentType) { + // TODO: See if we can exercise multiple APIs here. + // Since `walk()` is used for looking for errors, we might need to create + // multiple versions of walk that use different subsets of the APIs so we + // can ensure that all of them result in the expected error. + IonType.BOOL -> recordEvent(value = booleanValue()) + IonType.INT -> recordEvent(value = bigIntegerValue()) + IonType.FLOAT -> recordEvent(value = doubleValue()) + IonType.DECIMAL -> recordEvent(value = decimalValue()) + IonType.TIMESTAMP -> recordEvent(value = timestampValue()) + IonType.SYMBOL -> recordEvent(value = symbolValue()) + IonType.STRING -> recordEvent(value = stringValue()) + IonType.CLOB, + IonType.BLOB -> recordEvent(value = newBytes()) + IonType.LIST, + IonType.SEXP, + IonType.STRUCT -> { + recordEvent("STEP-IN", type) + stepIn() + } + IonType.NULL, + IonType.DATAGRAM -> TODO("Unreachable") + } + } +} + +/** + * Entry point into `denotes` evaluation. Asserts that each top-level value on the reader + * matches its respective model-value, and that there are no extra, unexpected values. + * + * See https://github.com/amazon-ion/ion-tests/tree/master/conformance#modeling-outputs + */ +fun TestCaseSupport.assertDenotes(modelValues: List, reader: IonReader) { + modelValues.forEach { + reader.next() + denotesModelValue(it, reader) + } + // Assert no more elements in sequence + assertNull(reader.next(), "unexpected extra element(s) at end of stream") +} + +/** + * Assert that the data at the reader's current position matches a particular Ion value. + */ +private fun TestCaseSupport.denotesModelValue(expectation: AnyElement, reader: IonReader) { + if (reader.type == null) fail(expectation, "no more values; expected $expectation") + if (expectation is SexpElement && expectation.head == "annot") { + val actualAnnotations = reader.typeAnnotationSymbols + expectation.tailFrom(2) + .forEachIndexed { i, it -> denotesSymtok(it, actualAnnotations[i]) } + denotesModelContent(expectation.tail.first(), reader) + } else { + assertEquals(SymbolToken.EMPTY_ARRAY, reader.typeAnnotationSymbols, createFailureMessage(expectation, "expected no annotations")) + denotesModelContent(expectation, reader) + } +} + +private fun TestCaseSupport.denotesModelContent(modelContent: AnyElement, reader: IonReader) { + when (modelContent) { + is IntElement -> denotesInt(modelContent, reader) + is BoolElement -> denotesBool(modelContent, reader) + is StringElement -> { + val failureContext = createFailureMessage(modelContent) + assertEquals(IonType.STRING, reader.type, failureContext) + assertEquals(modelContent.stringValue, reader.stringValue(), failureContext) + } + is SeqElement -> when (modelContent.head) { + "Null" -> denotesNull(modelContent, reader) + "Bool" -> denotesBool(modelContent, reader) + "Int" -> denotesInt(modelContent, reader) + "Float" -> denotesFloat(modelContent, reader) + "Decimal" -> denotesDecimal(modelContent, reader) + "Timestamp" -> denotesTimestamp(modelContent, reader) + "Symbol" -> denotesSymtok(modelContent.tail.single(), reader.symbolValue()) + "String" -> denotesCodepoints(modelContent, reader.stringValue()) + "Blob" -> denotesLob(IonType.BLOB, modelContent, reader) + "Clob" -> denotesLob(IonType.CLOB, modelContent, reader) + "List" -> denotesSeq(IonType.LIST, modelContent, reader) + "Sexp" -> denotesSeq(IonType.SEXP, modelContent, reader) + "Struct" -> denotesStruct(modelContent, reader) + else -> reportSyntaxError(modelContent, "model-content") + } + else -> reportSyntaxError(modelContent, "model-content") + } +} + +private fun TestCaseSupport.denotesNull(expectation: SeqElement, reader: IonReader) { + val expectedType = expectation.tail.singleOrNull()?.textValue?.uppercase()?.let(IonType::valueOf) ?: IonType.NULL + val actualType = reader.type + assertTrue(reader.isNullValue, createFailureMessage(expectation)) + assertEquals(expectedType, actualType) +} + +private fun TestCaseSupport.denotesBool(modelBoolean: AnyElement, reader: IonReader) { + val expected = when (modelBoolean) { + is BoolElement -> modelBoolean.booleanValue + is SexpElement -> modelBoolean.tail.single().booleanValue + else -> reportSyntaxError(modelBoolean, "model-boolean") + } + assertEquals(IonType.BOOL, reader.type, createFailureMessage(modelBoolean)) + assertEquals(expected, reader.booleanValue(), createFailureMessage(modelBoolean)) +} + +private fun TestCaseSupport.denotesInt(expectation: AnyElement, reader: IonReader) { + val expectedValue = when (expectation) { + is SexpElement -> expectation.tail.single().asInt() + is IntElement -> expectation + else -> reportSyntaxError(expectation, "model-integer") + } + assertEquals(IonType.INT, reader.type, createFailureMessage(expectation)) + assertFalse(reader.isNullValue, createFailureMessage(expectation)) + when (expectedValue.integerSize) { + IntElementSize.LONG -> { + assertEquals(expectedValue.longValue, reader.longValue(), createFailureMessage(expectation)) + } + IntElementSize.BIG_INTEGER -> { + assertEquals(IntegerSize.BIG_INTEGER, reader.integerSize, createFailureMessage(expectation)) + } + } + assertEquals(expectedValue.bigIntegerValue, reader.bigIntegerValue(), createFailureMessage(expectation)) +} + +private fun TestCaseSupport.denotesFloat(expectation: SeqElement, reader: IonReader) { + assertFalse(reader.isNullValue, createFailureMessage(expectation)) + assertEquals(IonType.FLOAT, reader.type, createFailureMessage(expectation)) + + val actualValue = reader.doubleValue() + + when (val floatValueAsString = expectation.tail.single().asString().textValue) { + "nan" -> assertTrue(actualValue.isNaN(), "expected 'nan'; was $actualValue") + "+inf" -> assertEquals(Double.POSITIVE_INFINITY, actualValue) + "-inf" -> assertEquals(Double.NEGATIVE_INFINITY, actualValue) + else -> { + val expected = floatValueAsString.toDouble() + assertEquals(expected, actualValue, createFailureMessage(expectation)) + } + } +} + +private fun TestCaseSupport.denotesDecimal(expectation: SeqElement, reader: IonReader) { + assertFalse(reader.isNullValue, createFailureMessage(expectation)) + assertEquals(IonType.DECIMAL, reader.type, createFailureMessage(expectation)) + val actualValue = reader.decimalValue() + + val exponent = expectation.values[2].bigIntegerValue + assertEquals(exponent, actualValue.scale() * -1, createFailureMessage(expectation, "exponent not equal")) + when (val coefficient = expectation.values[1]) { + is IntElement -> assertEquals( + coefficient.bigIntegerValue, + actualValue.bigDecimalValue().unscaledValue(), + createFailureMessage(expectation, "coefficient not equal") + ) + is TextElement -> { + if (coefficient.textValue != "negative_0") reportSyntaxError(coefficient, "model-decimal") + assertTrue(actualValue.isNegativeZero, createFailureMessage(expectation, "coefficient expected to be negative 0")) + } + } +} + +private fun TestCaseSupport.denotesTimestamp(expectation: SeqElement, reader: IonReader) { + assertFalse(reader.isNullValue, createFailureMessage(expectation)) + assertEquals(IonType.TIMESTAMP, reader.type, createFailureMessage(expectation)) + val actualValue = reader.timestampValue() + + val modelTimestamp = expectation.tail + val precision = modelTimestamp.first().textValue + + assertEquals(modelTimestamp[1].longValue, actualValue.year, createFailureMessage(expectation, "unexpected year")) + if (precision == "year") { + assertEquals(Timestamp.Precision.YEAR, actualValue.precision) + return + } + + assertEquals(modelTimestamp[2].longValue, actualValue.month, createFailureMessage(expectation, "unexpected month")) + if (precision == "month") { + assertEquals(Timestamp.Precision.MONTH, actualValue.precision) + return + } + + assertEquals(modelTimestamp[3].longValue, actualValue.day, createFailureMessage(expectation, "unexpected day")) + if (precision == "day") { + assertEquals(Timestamp.Precision.DAY, actualValue.precision) + return + } + + val expectedOffsetMinutes = modelTimestamp[4].seqValues[1].longValueOrNull + assertEquals(expectedOffsetMinutes, actualValue.localOffset, createFailureMessage(expectation, "unexpected offset")) + assertEquals(modelTimestamp[5].longValue, actualValue.hour, createFailureMessage(expectation, "unexpected hour")) + assertEquals(modelTimestamp[6].longValue, actualValue.minute, createFailureMessage(expectation, "unexpected minute")) + if (precision == "minute") { + assertEquals(Timestamp.Precision.MINUTE, actualValue.precision, createFailureMessage(expectation)) + return + } + + val expectedSecond = modelTimestamp[7].longValue + assertEquals(expectedSecond, actualValue.second, createFailureMessage(expectation, "unexpected second")) + if (precision == "second") { + assertEquals(Timestamp.Precision.SECOND, actualValue.precision) + return + } + + // Timestamps cannot have -0 as the fractional second coefficient. + val subsecondCoefficient = modelTimestamp[8].longValue + val subsecondScale = modelTimestamp[9].longValue.toInt() * -1 + + if (precision == "fraction") { + val expectedDecimalSecond = Decimal.valueOf(subsecondCoefficient, subsecondScale).add(Decimal.valueOf(expectedSecond)) + assertEquals(expectedDecimalSecond, actualValue.decimalSecond, createFailureMessage(expectation, "unexpected seconds fraction")) + return + } + + reportSyntaxError(expectation, "model-timestamp with unknown precision: $precision") +} + +private fun TestCaseSupport.denotesSeq(type: IonType, expectation: SeqElement, reader: IonReader) { + assertFalse(reader.isNullValue, createFailureMessage(expectation)) + assertEquals(type, reader.type, createFailureMessage(expectation)) + reader.stepIn() + expectation.tail.forEach { + reader.next() + denotesModelValue(it, reader) + } + // Assert no more elements in sequence + assertNull(reader.next(), createFailureMessage(expectation, "unexpected extra element(s) at end of sequence")) + reader.stepOut() +} + +private fun TestCaseSupport.denotesStruct(expectation: SeqElement, reader: IonReader) { + assertFalse(reader.isNullValue, createFailureMessage(expectation)) + assertEquals(IonType.STRUCT, reader.type, createFailureMessage(expectation)) + reader.stepIn() + + val expectedFields = expectation.tail + val hasSeenField = BooleanArray(expectedFields.size) + + // FIXME: For structs with repeated field names, this will break because we can't rewind and replay from the + // reader, so we can't test the same nested stream multiple times from the reader. This issue is not + // caused by using exceptions for control flow. + while (reader.next() != null) { + // This is a low-effort solution. If the performance of these tests becomes a problem, rewrite to not + // use exceptions for control flow. + + // Find all field names that match + val matchingFieldNameIndices = expectedFields.mapIndexedNotNull { i, modelField -> + modelField as SeqElement + val modelFieldName = modelField.values[0] + try { + denotesSymtok(modelFieldName, reader.fieldNameSymbol) + i + } catch (e: AssertionError) { + null + } + } + + // Now check the field value, if needed. + when (matchingFieldNameIndices.size) { + 0 -> fail(expectation, "Found unexpected field name: ${reader.fieldNameSymbol}") + 1 -> { + val modelFieldIndex = matchingFieldNameIndices.single() + if (hasSeenField[modelFieldIndex]) { + fail(expectedFields[modelFieldIndex], "Found multiple matching fields") + } + val modelFieldValue = expectedFields[modelFieldIndex].seqValues[1] + denotesModelValue(modelFieldValue, reader) + hasSeenField[modelFieldIndex] = true + } + else -> TODO("Test runner implementation does not support repeated field names yet.") + } + } + + val firstUnseenField = hasSeenField.indexOfFirst { !it } + if (firstUnseenField != -1) { + fail(expectation, "Missing at least one expected field, including ${expectedFields[firstUnseenField]}") + } + reader.stepOut() +} + +private fun TestCaseSupport.denotesSymtok(expectation: AnyElement, actual: SymbolToken) { + when (expectation) { + is TextElement -> assertEquals(expectation.textValue, actual.text, createFailureMessage(expectation)) + is IntElement -> assertEquals(expectation.longValue.toInt(), actual.sid, createFailureMessage(expectation)) + is SeqElement -> when (expectation.head) { + "absent" -> { + if (actual.text != null) fail(expectation, "Expected unknown text; was '${actual.text}'") + // TODO: Calculate offset, Symtab name? + } + "text" -> + actual.text + ?.let { denotesCodepoints(expectation, it) } + ?: fail(expectation, "Expected known text; none present in $actual") + else -> reportSyntaxError(expectation, "model-symtok") + } + else -> reportSyntaxError(expectation, "model-symtok") + } +} + +private fun TestCaseSupport.denotesCodepoints(expectation: SeqElement, actual: String) { + val expectedCodePoints: List = expectation.tail.map { it.longValue.toInt() } + val actualCodePoints: List = actual.codePoints().toList() + assertEquals(expectedCodePoints, actualCodePoints, createFailureMessage(expectation)) +} + +private fun TestCaseSupport.denotesLob(type: IonType, expectation: SeqElement, reader: IonReader) { + val expectedBytes = readBytes(expectation) + assertEquals(type, reader.type, createFailureMessage(expectation)) + assertEquals(expectedBytes.size, reader.byteSize(), createFailureMessage(expectation)) + + // bufferSize is intentionally small but >1 so that we can test reading chunks of a lob. + val bufferSize = 3 + val buffer = ByteArray(bufferSize) + expectedBytes.toList().chunked(bufferSize).forEachIndexed { i, chunk -> + val bytesRead = reader.getBytes(buffer, i * 3, 3) + if (bytesRead == bufferSize) { + assertArrayEquals(chunk.toByteArray(), buffer, createFailureMessage(expectation)) + } else { + chunk.forEachIndexed { j, byte -> assertEquals(byte, buffer[j], createFailureMessage(expectation)) } + } + } + assertArrayEquals(expectedBytes, reader.newBytes(), createFailureMessage(expectation)) +} diff --git a/src/test/java/com/amazon/ion/conformance/fragments.kt b/src/test/java/com/amazon/ion/conformance/fragments.kt new file mode 100644 index 0000000000..ae6dc79943 --- /dev/null +++ b/src/test/java/com/amazon/ion/conformance/fragments.kt @@ -0,0 +1,383 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.conformance + +import com.amazon.ion.* +import com.amazon.ion.IonEncodingVersion.* +import com.amazon.ion.TestUtils.* +import com.amazon.ion.conformance.ConformanceTestBuilder.* +import com.amazon.ion.conformance.Encoding.* +import com.amazon.ion.impl.* +import com.amazon.ion.impl.bin.* +import com.amazon.ion.impl.macro.* +import com.amazon.ion.system.* +import com.amazon.ion.util.* +import com.amazon.ionelement.api.AnyElement +import com.amazon.ionelement.api.ElementType +import com.amazon.ionelement.api.IntElement +import com.amazon.ionelement.api.SeqElement +import com.amazon.ionelement.api.StringElement +import com.amazon.ionelement.api.SymbolElement +import com.amazon.ionelement.api.TextElement +import com.amazon.ionelement.api.ionInt +import com.amazon.ionelement.api.ionSexpOf +import com.amazon.ionelement.api.ionSymbol +import com.amazon.ionelement.api.loadSingleElement +import java.io.ByteArrayOutputStream +import kotlin.contracts.ExperimentalContracts +import kotlin.contracts.contract + +/** Helper function for creating ivm fragments for the `ion_1_*` keywords */ +fun ivm(sexp: SeqElement, major: Int, minor: Int): SeqElement { + return ionSexpOf(listOf(ionSymbol("ivm"), ionInt(major.toLong()), ionInt(minor.toLong())), metas = sexp.metas) +} + +@OptIn(ExperimentalContracts::class) +fun AnyElement.isFragment(): Boolean { + contract { returns(true) implies (this@isFragment is SeqElement) } + return this is SeqElement && this.head in FRAGMENT_KEYWORDS +} + +// All known fragment keywords +// TODO: When we update the ion-tests commit to include https://github.com/amazon-ion/ion-tests/pull/129 +// we need to remove "bytes" from this list +private val FRAGMENT_KEYWORDS = setOf("ivm", "text", "binary", "bytes", "toplevel", "encoding", "mactab") +// Insert this between every fragment when transcoding to text +val SERIALIZED_TEXT_FRAGMENT_SEPARATOR = "\n".toByteArray(Charsets.UTF_8) + +// TODO: Update these so that they provide raw writers. That will resolve some of the issues +// such as not being able to write system values to the Ion 1.1 managed writer and not +// being able to write invalid imports. +private sealed interface Encoding { + val writerBuilder: IonWriterBuilder + + sealed interface Binary : Encoding + sealed interface Text : Encoding + sealed interface `1,0` : Encoding + sealed interface `1,1` : Encoding + + object Binary10 : Binary, `1,0` { + override val writerBuilder: IonWriterBuilder = + ION_1_0.binaryWriterBuilder().withCatalog(ION_CONFORMANCE_TEST_CATALOG) + } + + object Binary11 : Binary, `1,1` { + override val writerBuilder: IonWriterBuilder = + ION_1_1.binaryWriterBuilder().withCatalog(ION_CONFORMANCE_TEST_CATALOG) as IonWriterBuilder + } + + object Text10 : Text, `1,0` { + override val writerBuilder: IonWriterBuilder = + (ION_1_0.textWriterBuilder() as _Private_IonTextWriterBuilder<*>) + .withInvalidSidsAllowed(true) + .withWriteTopLevelValuesOnNewLines(true) + .withCatalog(ION_CONFORMANCE_TEST_CATALOG) + .withInitialIvmHandling(IonWriterBuilder.InitialIvmHandling.SUPPRESS) + } + + object Text11 : Text, `1,1` { + override val writerBuilder: IonWriterBuilder = + (ION_1_1.textWriterBuilder() as _Private_IonTextWriterBuilder<*>) + .withInvalidSidsAllowed(true) + .withWriteTopLevelValuesOnNewLines(true) + .withInitialIvmHandling(IonWriterBuilder.InitialIvmHandling.SUPPRESS) + .withCatalog(ION_CONFORMANCE_TEST_CATALOG) + .also { + it as _Private_IonTextWriterBuilder_1_1 + it.withSymbolInliningStrategy(SymbolInliningStrategy.ALWAYS_INLINE) + } + } +} + +/** + * If we have an invalid version, don't complain here. Bad fragments should propagate through + * and be detected by the test expectations. + */ +private fun Encoding.getEncodingVersion(minor: Int): Encoding = when (minor) { + 0 -> if (this is Text) Text10 else Binary10 + 1 -> if (this is Text) Text11 else Binary11 + // Unknown version -- just return this. + else -> this +} + +private val Encoding.ivmBytes: ByteArray + get() = when (this) { + Binary10 -> byteArrayOf(0xE0.toByte(), 1, 0, 0xEA.toByte()) + Binary11 -> byteArrayOf(0xE0.toByte(), 1, 1, 0xEA.toByte()) + Text10 -> "\$ion_1_0".toByteArray(Charsets.UTF_8) + Text11 -> "\$ion_1_1".toByteArray(Charsets.UTF_8) + } + +/** + * Read all fragments, transcoding and combining the data into Ion binary or Ion text UTF-8 encoded bytes. + */ +fun TestCaseSupport.readFragments(fragments: List): ByteArray { + debug { "Initializing Input Data..." } + // TODO: Detect versions and switch accordingly. + val encodeToBinary = 0 < fragments.count { + debug { "Inspecting (${it.head} ...) at ${locationOf(it)}" } + // TODO: When we update the ion-tests commit to include https://github.com/amazon-ion/ion-tests/pull/129 + // we need to remove "bytes" from this check + it.head == "bytes" || it.head == "binary" + } + + val encoding: Encoding = if (encodeToBinary) Binary10 else Text10 + + fun debugString(i: Int, bytes: ByteArray): String = + with(bytes) { if (encodeToBinary) toPrettyHexString() else toString(Charsets.UTF_8) } + .replaceIndent(" | ") + .let { "Fragment $i\n$it" } + + val serializedFragments = mutableListOf() + + // All documents start as Ion 1.0, but we must explicitly ensure that the IVM is present if + // transcoding fragments to binary. + if (encodeToBinary) serializedFragments.add(encoding.ivmBytes) + + fragments.foldIndexed(encoding) { i, encodingVersion, fragment -> + val (bytes, continueWithVersion) = readFragment(fragment, encodingVersion) + serializedFragments.add(bytes) + debug { debugString(i, bytes) } + // If it's text, we need to ensure there is whitespace between fragments + if (encodingVersion is Text) serializedFragments.add(SERIALIZED_TEXT_FRAGMENT_SEPARATOR) + continueWithVersion + } + return serializedFragments.joinToByteArray() +} + +/** Reads a single fragment */ +private fun TestCaseSupport.readFragment(fragment: SeqElement, encoding: Encoding): Pair { + return when (fragment.head) { + "ivm" -> readIvmFragment(fragment, encoding) + "text" -> readTextFragment(fragment, encoding) + "binary" -> readBytesFragment(fragment, encoding) + // TODO: When we update the ion-tests commit to include https://github.com/amazon-ion/ion-tests/pull/129 + // we need to remove "bytes" from this when expression + "bytes" -> readBytesFragment(fragment, encoding) + "toplevel" -> readTopLevelFragment(fragment, encoding) + "mactab" -> readMactabFragment(fragment, encoding) + "encoding" -> TODO("encoding") + else -> reportSyntaxError(fragment, "not a valid fragment") + } +} + +/** Reads an `IVM` fragment and returns a byte array with an IVM for the given [encoding]. */ +private fun TestCaseSupport.readIvmFragment(fragment: SeqElement, encoding: Encoding): Pair { + val major = fragment.values[1].longValue + val minor = fragment.values[2].longValue + val ivmBytes = if (encoding is Text) { + "\$ion_${major}_$minor".toByteArray() + } else { + byteArrayOf(0xE0.toByte(), major.toByte(), minor.toByte(), 0xEA.toByte()) + } + // If the IVM is for an unknown version, then the ivmBytes will not match the returned Encoding. + // This is generally fine because the test should be expecting the invalid IVM. If there's something + // wrong with the test framework, it could manifest in strange ways. + return ivmBytes to encoding.getEncodingVersion(minor.toInt()) +} + +/** + * Reads a `text` fragment. Does not transcode, but (to-do) keeps track of whether an IVM is encountered, + * and returns the text as a UTF-8 [ByteArray] along with the current encoding version at the end of the fragment. + */ +private fun TestCaseSupport.readTextFragment(fragment: SeqElement, encoding: Encoding): Pair { + if (encoding !is Text) { + TODO("Changing between binary and text is not supported.") + } + val text = fragment.tail.joinToString("\n") { + // TODO: Detect and update the encoding if there's an IVM midstream + (it as? StringElement)?.textValue + ?: reportSyntaxError(it, "text fragment may only contain strings") + } + return text.toByteArray(Charsets.UTF_8) to encoding +} + +/** + * Reads a `bytes` fragment. Does not transcode, but (to-do) keeps track of whether an IVM is encountered, + * and returns bytes and the current encoding version at the end of the fragment. + */ +private fun TestCaseSupport.readBytesFragment(fragment: SeqElement, encoding: Encoding): Pair { + require(encoding is Binary) + // TODO: Detect and update the encoding if there's an IVM midstream + return readBytes(fragment) to encoding +} + +/** + * Reads a `bytes` clause, returning a [ByteArray]. + */ +fun TestCaseSupport.readBytes(sexp: SeqElement): ByteArray { + val bytes = mutableListOf() + sexp.tail.forEach { + when (it) { + is StringElement -> hexStringToByteArray(cleanCommentedHexBytes(it.stringValue)) + is IntElement -> byteArrayOf(it.longValue.toByte()) + else -> reportSyntaxError(it, "Not a valid element in a binary clause") + }.let(bytes::add) + } + return bytes.joinToByteArray() +} + +/** + * Reads a `toplevel` clause, transcoding it to the requested [encoding]. + */ +private fun TestCaseSupport.readTopLevelFragment(fragment: SeqElement, encoding: Encoding): Pair { + val baos = ByteArrayOutputStream() + var currentEncoding = encoding + var currentWriter = encoding.writerBuilder.build(baos) + + fragment.tail.forEach { + // TODO: Check for IVMs and update `currentEncoding` and `currentWriter` accordingly + // Alternately, we could check for IVMs and split into multiple fragments so that + // each fragment can be written separately. + if (it is SymbolElement && it.textValue.matches(Regex("#?\\\$ion_\\d+_\\d+"))) { + TODO("change Ion version while in in toplevel fragment") + } + it.asAnyElement().demangledWriteTo(currentWriter) + } + currentWriter.close() + val bytes = baos.toByteArray() + // Drop the initial IVM + .let { if (encoding is Binary) it.drop(4).toByteArray() else it } + .let { if (encoding is Text11) it.drop("\$ion_1_1".length).toByteArray() else it } + return bytes to currentEncoding +} + +private fun TestCaseSupport.readMactabFragment(fragment: SeqElement, encoding: Encoding): Pair { + val baos = ByteArrayOutputStream() + var currentEncoding = encoding + var currentWriter = encoding.writerBuilder.build(baos) + + // TODO: Consider replacing this to use literal values instead of the `set_macros` macro to + // minimize dependencies in tests. + + // Can't have a mactab for an Ion 1.0 segment, so this should be safe + currentWriter as MacroAwareIonWriter + currentWriter.startMacro(SystemMacro.SetMacros) + currentWriter.startExpressionGroup() + fragment.tail.forEach { + it.writeTo(currentWriter) + } + currentWriter.endExpressionGroup() + currentWriter.endMacro() + currentWriter.close() + val bytes = baos.toByteArray() + // Drop the initial IVM + .let { if (encoding is Binary) it.drop(4).toByteArray() else it } + .let { if (encoding is Text11) it.drop("\$ion_1_1".length).toByteArray() else it } + return bytes to currentEncoding +} + +/** + * Writes this [AnyElement] to an [IonWriter], applying the de-mangling logic described at + * [Conformance – Abstract Syntax Forms](https://github.com/amazon-ion/ion-tests/tree/master/conformance#abstract-syntax-forms). + */ +private fun AnyElement.demangledWriteTo(writer: IonWriter) { + writer.setTypeAnnotationSymbols(*annotations.map(::demangleSymbolToken).toTypedArray()) + if (isNull) { + writer.writeNull(type.toIonType()) + } else when (type) { + ElementType.BOOL -> writer.writeBool(booleanValue) + ElementType.INT -> writer.writeInt(bigIntegerValue) + ElementType.FLOAT -> writer.writeFloat(doubleValue) + ElementType.DECIMAL -> writer.writeDecimal(decimalValue) + ElementType.TIMESTAMP -> writer.writeTimestamp(timestampValue) + ElementType.SYMBOL -> writer.writeSymbolToken(demangleSymbolToken(symbolValue)) + ElementType.STRING -> writer.writeString(stringValue) + ElementType.CLOB -> writer.writeClob(bytesValue.copyOfBytes()) + ElementType.BLOB -> writer.writeBlob(bytesValue.copyOfBytes()) + ElementType.LIST -> { + writer.stepIn(IonType.LIST) + listValues.forEach { it.demangledWriteTo(writer) } + writer.stepOut() + } + ElementType.SEXP -> { + val head = sexpValues.firstOrNull() + if (head is TextElement && head.textValue.startsWith("#$:")) { + val tail = sexpValues.drop(1) + if (head.textValue == "#$::") { + // Write an expression group + writer as IonManagedWriter_1_1 + val rawWriter = writer.getRawUserWriter() + rawWriter.stepInExpressionGroup(usingLengthPrefix = true) + tail.forEach { it.demangledWriteTo(writer) } + rawWriter.stepOut() + } else { + // Write an e-expression + writer.writeDemangledEExpression(head, tail) + } + } else { + writer.stepIn(IonType.SEXP) + sexpValues.forEach { it.demangledWriteTo(writer) } + writer.stepOut() + } + } + ElementType.STRUCT -> { + writer.stepIn(IonType.STRUCT) + structFields.forEach { (k, v) -> + writer.setFieldNameSymbol(demangleSymbolToken(k)) + v.demangledWriteTo(writer) + } + writer.stepOut() + } + ElementType.NULL -> TODO("Unreachable") + } +} + +private fun IonWriter.writeDemangledEExpression(head: TextElement, tail: List) { + this as IonManagedWriter_1_1 + val rawWriter = this.getRawUserWriter() + + // Drop the first 3 characters (the `#$:`) and then parse as Ion + val macroReference = loadSingleElement(head.textValue.drop(3)) + val annotations = macroReference.annotations + if (annotations.isNotEmpty()) { + if (annotations.singleOrNull() == "\$ion") { + val systemMacro = when (macroReference) { + is SymbolElement -> SystemMacro[macroReference.textValue]!! + is IntElement -> SystemMacro[macroReference.longValue.toInt()]!! + else -> throw IllegalArgumentException("Not a valid macro reference: $head") + } + rawWriter.stepInEExp(systemMacro) + tail.forEach { it.demangledWriteTo(this) } + rawWriter.stepOut() + } else { + TODO("demangled, non-system, qualified e-expressions") + } + } else if (macroReference is IntElement) { + val macro = if (rawWriter is IonRawBinaryWriter_1_1) { + // For this to work in binary, we need to look up the signature. + TODO("For Ion binary, we need to look up the macro definition") + } else { + // For Ion Text, we can cheat and use a placeholder because the macro arg isn't used. + SystemMacro.None + } + rawWriter.stepInEExp(macroReference.longValue.toInt(), usingLengthPrefix = false, macro) + tail.forEach { it.demangledWriteTo(this) } + rawWriter.stepOut() + } else if (macroReference is SymbolElement) { + if (rawWriter is IonRawBinaryWriter_1_1) { + TODO("For Ion binary, we need to look up the address for the macro and invoke by ID") + } + rawWriter.stepInEExp(macroReference.textValue) + tail.forEach { it.demangledWriteTo(this) } + rawWriter.stepOut() + } else { + throw IllegalArgumentException("Not a valid macro reference: $head") + } +} + +private fun demangleSymbolToken(text: String): SymbolToken { + return if (text.startsWith("#\$ion_")) { + // Escaped IVM or system symbol + FakeSymbolToken(text.drop(1), -1) + } else if (text.startsWith("#$:")) { + // E-Expression macro id -- Should be unreachable; handled elsewhere + TODO("Should be unreachable! demangled e-expressions - $text") + } else if (text.startsWith("#$")) { + // Escaped SID + val id = text.drop(2).toInt() + FakeSymbolToken(null, id) + } else { + FakeSymbolToken(text, -1) + } +} diff --git a/src/test/java/com/amazon/ion/conformance/structure.kt b/src/test/java/com/amazon/ion/conformance/structure.kt new file mode 100644 index 0000000000..07f595bf15 --- /dev/null +++ b/src/test/java/com/amazon/ion/conformance/structure.kt @@ -0,0 +1,193 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.conformance + +import com.amazon.ion.* +import com.amazon.ionelement.api.AnyElement +import com.amazon.ionelement.api.ElementType +import com.amazon.ionelement.api.SeqElement +import com.amazon.ionelement.api.StringElement +import com.amazon.ionelement.api.loadAllElements +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assumptions.assumeTrue +import org.junit.jupiter.api.DynamicContainer +import org.junit.jupiter.api.DynamicNode +import org.junit.jupiter.api.DynamicTest + +// There are three distinct parts to this DSL +// 1. The structure clauses (document, then, each, etc.) +// 2. The input (fragment) clauses +// 3. The expectation clauses +// +// The structure is eagerly evaluated. The other clauses are lazily evaluated in the actual test cases. + +/** + * Tuple of a [ConformanceTestBuilder], the current s-expression, and the current position in the s-expression. + * + * This is immutable. Branches in the `read` functions require creating a new updated copy of [ParserState]. + */ +private data class ParserState(val builder: ConformanceTestBuilder, val sexp: SeqElement, val pos: Int = 0) + +private fun ParserState.updateState(pos: Int = this.pos, builderUpdate: ConformanceTestBuilder.() -> ConformanceTestBuilder = { this }): ParserState = + copy(pos = pos, builder = builderUpdate(builder)) + +/** + * Entry point to reading the test structure. + */ +fun ConformanceTestBuilder.readAllTests(reader: IonReader): DynamicNode { + return loadAllElements(reader, ELEMENT_LOADER_OPTIONS) + .mapIndexed { i, it -> + try { + readTest(it) + } catch (e: ConformanceTestInvalidSyntaxException) { + // If there's a syntax error in this test tree, we'll create a test case to represent it + // and rethrow the error in there. This will allow other tests to run even if some malformed + // tests exist. + DynamicTest.dynamicTest("$file[$i]") { throw e } + } catch (e: NotImplementedError) { + // Hack to report something useful if we can't read the test case because we + // haven't implemented something yet. This creates a test case that always skips. + DynamicTest.dynamicTest("$file[$i] - ${e.message}") { assumeTrue(false) } + } + } + .let { DynamicContainer.dynamicContainer(file.path, it) } +} + +/** Reads a top-level test clause. */ +fun ConformanceTestBuilder.readTest(element: AnyElement): DynamicNode { + val sexp = element as? SeqElement ?: reportSyntaxError(element, "test-case") + val parserState = ParserState(this, sexp, 1) + + return when (sexp.head) { + "document" -> + parserState.readDescription() + .readFragmentSequence() + .readContinuation() + + "ion_1_0" -> + parserState.updateState { plusFragment(ivm(sexp, 1, 0)) } + .readDescription() + .readFragmentSequence() + .readContinuation() + + "ion_1_1" -> + parserState.updateState { plusFragment(ivm(sexp, 1, 1)) } + .readDescription() + .readFragmentSequence() + .readContinuation() + + "ion_1_x" -> { + parserState.readDescription() + .let { p -> + val ion10Branch = p.updateState { plus("In Ion 1.0", ivm(sexp, 1, 0)) } + val ion11Branch = p.updateState { plus("In Ion 1.1", ivm(sexp, 1, 1)) } + p.builder.buildContainer( + ion10Branch + .readFragmentSequence() + .readContinuation(), + ion11Branch + .readFragmentSequence() + .readContinuation(), + ) + } + } + else -> reportSyntaxError(sexp) + } +} + +/** + * Reads 0 or more fragments from an s-expression starting from the position + * given in [ParserState]. Returns a [ParserState] with an updated position + * and a list of any fragment expressions that were found. + */ +private fun ParserState.readFragmentSequence(): ParserState { + val fragments = sexp.tailFrom(pos) + .takeWhile { it.isFragment() } as List + return this.updateState(pos = pos + fragments.size) { plusFragments(fragments) } +} + +/** + * Reads an optional description, returning an updated [ParserState]. + * This function always adds _some_ description to the [ParserState]. + * If the clause contains no description, it uses the clause keyword as a description. + */ +private fun ParserState.readDescription(): ParserState { + return sexp.values[pos].let { + // If it's a string (even null), update position + val newPos = pos + if (it.type == ElementType.STRING) 1 else 0 + // If there is no description, or the description is null, use the clause name instead. + val text = (it as? StringElement)?.textValue ?: "«${sexp.head}»" + updateState(newPos) { plusName(text) } + } +} + +/** Reads a `then` clause, starting _after_ the `then` keyword. */ +private fun ParserState.readThen(): List { + return readDescription() + .readFragmentSequence() + .readContinuation() + .let(::listOf) +} + +/** Reads an `each` clause, starting _after_ the `each` keyword. */ +private fun ParserState.readEach(): List { + // TODO: Handle case where 0 fragments, if such a case ever gets introduced to the conformance suite + var currentDescription = "«${sexp.head}»" + var continuationPosition = pos + val nameFragmentPairs = mutableListOf>() + for (element in sexp.tailFrom(pos)) { + when { + element is StringElement -> currentDescription = element.textValueOrNull ?: currentDescription + element.isFragment() -> nameFragmentPairs.add(currentDescription to element) + else -> break + } + continuationPosition++ + } + + return nameFragmentPairs.mapIndexed { i, (name, frag) -> + updateState(continuationPosition) { plus(name = "$name [$i]", frag) }.readContinuation() + } +} + +/** Reads an extension, returning a list of test case nodes constructed from those extensions. */ +private fun ParserState.readExtension(): List { + return when (sexp.head) { + "each" -> updateState(pos = 1).readEach() + "then" -> updateState(pos = 1).readThen() + else -> builder.reportSyntaxError(sexp, "unknown extension") + } +} + +/** Reads a continuation—a single expectation or one-to-many extensions. */ +private fun ParserState.readContinuation(): DynamicNode { + val continuation = sexp.tailFrom(pos) + + val firstExpression = continuation.first() + firstExpression as? SeqElement ?: builder.reportSyntaxError(firstExpression, "continuation") + + return continuation.flatMap { + it as? SeqElement ?: builder.reportSyntaxError(it, "extension") + with(ParserState(builder, it)) { + readExpectation()?.let { expectation -> return expectation } + readExtension() + } + }.let(builder::buildContainer) +} + +/** + * Reads an optional expectation clause. If the current clause is not an expectation, + * returns null. + */ +private fun ParserState.readExpectation(): DynamicNode? { + return when (sexp.head) { + "and" -> TODO("'and' not implemented") + "not" -> TODO("'not' not implemented") + "produces" -> builder.build { + val actual = loadAllElements(createFragmentReader()).toList() + assertEquals(sexp.tail, actual, createFailureMessage(sexp)) + } + "signals" -> builder.build { assertSignals(sexp, createFragmentReader()) } + "denotes" -> builder.build { assertDenotes(sexp.tail, createFragmentReader()) } + else -> null + } +} diff --git a/src/test/java/com/amazon/ion/conformance/util.kt b/src/test/java/com/amazon/ion/conformance/util.kt new file mode 100644 index 0000000000..a59adc9e7e --- /dev/null +++ b/src/test/java/com/amazon/ion/conformance/util.kt @@ -0,0 +1,64 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.conformance + +import com.amazon.ion.* +import com.amazon.ion.system.* +import com.amazon.ionelement.api.AnyElement +import com.amazon.ionelement.api.IonElementLoaderOptions +import com.amazon.ionelement.api.SeqElement +import java.io.File + +val ION: IonSystem = IonSystemBuilder.standard().build() + +val ELEMENT_LOADER_OPTIONS = IonElementLoaderOptions(includeLocationMeta = true) + +val ION_CONFORMANCE_DIR = File("ion-tests/conformance") + +val TEST_CATALOG_DIR = File("ion-tests/catalog") + +/** + * Catalog for conformance tests. + */ +val ION_CONFORMANCE_TEST_CATALOG = SimpleCatalog().apply { + TEST_CATALOG_DIR.walk() + .filter { it.isFile && it.extension == "ion" } + .onEach { println(it.absolutePath) } + .forEach { file -> + file.inputStream() + .let(ION::newReader) + .use { r -> while (r.next() != null) putTable(ION.newSharedSymbolTable(r, true)) } + } +} + +/** + * Gets the first value of a [SeqElement]. + * Throws an exception if the first value is not text. + */ +val SeqElement.head: String + get() = values.first().textValue + +/** + * Gets all elements of a [SeqElement], except for [head]. + */ +val SeqElement.tail: List + get() = tailFrom(1) + +/** + * Gets the tail of a [SeqElement], starting with position [i]. + */ +fun SeqElement.tailFrom(i: Int) = values.subList(i, size) + +/** + * Join a list of [ByteArray] into a single [ByteArray] + */ +fun List.joinToByteArray(): ByteArray { + val size = sumOf { it.size } + var offset = 0 + val combined = ByteArray(size) + forEach { + it.copyInto(combined, offset) + offset += it.size + } + return combined +} diff --git a/src/test/java/com/amazon/ion/impl/EncodingDirectiveCompilationTest.java b/src/test/java/com/amazon/ion/impl/EncodingDirectiveCompilationTest.java new file mode 100644 index 0000000000..cf79062d5a --- /dev/null +++ b/src/test/java/com/amazon/ion/impl/EncodingDirectiveCompilationTest.java @@ -0,0 +1,2035 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl; + +import com.amazon.ion.FakeSymbolToken; +import com.amazon.ion.IntegerSize; +import com.amazon.ion.IonDatagram; +import com.amazon.ion.IonEncodingVersion; +import com.amazon.ion.IonException; +import com.amazon.ion.IonLoader; +import com.amazon.ion.IonReader; +import com.amazon.ion.IonSystem; +import com.amazon.ion.IonText; +import com.amazon.ion.IonType; +import com.amazon.ion.MacroAwareIonReader; +import com.amazon.ion.MacroAwareIonWriter; +import com.amazon.ion.SystemSymbols; +import com.amazon.ion.impl.bin.IonRawBinaryWriter_1_1; +import com.amazon.ion.impl.bin.SymbolInliningStrategy; +import com.amazon.ion.impl.macro.EncodingContext; +import com.amazon.ion.impl.macro.Expression; +import com.amazon.ion.impl.macro.Macro; +import com.amazon.ion.impl.macro.MacroRef; +import com.amazon.ion.impl.macro.MacroTable; +import com.amazon.ion.impl.macro.ParameterFactory; +import com.amazon.ion.impl.macro.SystemMacro; +import com.amazon.ion.impl.macro.TemplateMacro; +import com.amazon.ion.system.IonReaderBuilder; +import com.amazon.ion.system.IonSystemBuilder; +import org.hamcrest.Description; +import org.hamcrest.Matcher; +import org.hamcrest.TypeSafeMatcher; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.OutputStream; +import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.function.Consumer; + +import static com.amazon.ion.BitUtils.bytes; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.allOf; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests that Ion 1.1 encoding directives are correctly compiled from streams of Ion data. + */ +public class EncodingDirectiveCompilationTest { + + private static final int FIRST_LOCAL_SYMBOL_ID = 1; + + private static final String DEFAULT_MODULE_DIRECTIVE_PREFIX = "$ion::(module _"; + + private static void assertMacroTablesContainsExpectedMappings(IonReader reader, StreamType streamType, SortedMap expected) { + Map expectedByRef = streamType.newMacroTableByMacroRef(expected); + + MacroTable actual = streamType.getEncodingContext(reader).getMacroTable(); + // TODO: This assertion is weak, we don't know that the actual macro table contains *only* the expectations + expectedByRef.forEach((k,v) -> assertEquals(v, actual.get(k))); + } + + private static void startModuleDirectiveForDefaultModule(IonRawWriter_1_1 writer) { + writer.writeAnnotations(SystemSymbols_1_1.ION); + writer.stepInSExp(false); + writer.writeSymbol(SystemSymbols_1_1.MODULE); + writer.writeSymbol(SystemSymbols.DEFAULT_MODULE); + } + + private static void endEncodingDirective(IonRawWriter_1_1 writer) { + writer.stepOut(); + } + + private static void writeEncodingDirectiveSymbolTable(IonRawWriter_1_1 writer, boolean append, String... userSymbols) { + writer.stepInSExp(false); + writer.writeSymbol(SystemSymbols_1_1.SYMBOL_TABLE); + if (append) { + writer.writeSymbol(SystemSymbols.DEFAULT_MODULE); + } + writer.stepInList(false); + for (String userSymbol : userSymbols) { + writer.writeString(userSymbol); + } + writer.stepOut(); + writer.stepOut(); + } + + private static void writeEncodingDirectiveSymbolTable(IonRawWriter_1_1 writer, String... userSymbols) { + writeEncodingDirectiveSymbolTable(writer, false, userSymbols); + } + + private static Map makeSymbolsMap(int startId, String... userSymbols) { + Map symbols = new HashMap<>(); + int localSymbolId = startId; + for (String userSymbol : userSymbols) { + symbols.put(userSymbol, localSymbolId++); + } + return symbols; + } + + private static Map initializeSymbolTable(IonRawWriter_1_1 writer, String... userSymbols) { + startModuleDirectiveForDefaultModule(writer); + writeEncodingDirectiveSymbolTable(writer, userSymbols); + endEncodingDirective(writer); + return makeSymbolsMap(FIRST_LOCAL_SYMBOL_ID, userSymbols); + } + + private static void startMacroTable(IonRawWriter_1_1 writer) { + writer.stepInSExp(false); + writer.writeSymbol(SystemSymbols_1_1.MACRO_TABLE); + } + + private static void endMacroTable(IonRawWriter_1_1 writer) { + writer.stepOut(); + } + + private static void writeSymbolToken(Consumer tokenTextWriter, Consumer tokenSidWriter, Map symbols, String value) { + Integer sid = symbols.get(value); + if (sid == null) { + // There is no mapping; write as text + tokenTextWriter.accept(value); + } else { + tokenSidWriter.accept(sid); + } + } + + private static void writeSymbol(IonRawWriter_1_1 writer, Map symbols, String value) { + writeSymbolToken(writer::writeSymbol, writer::writeSymbol, symbols, value); + } + + private static void writeFieldName(IonRawWriter_1_1 writer, Map symbols, String name) { + writeSymbolToken(writer::writeFieldName, writer::writeFieldName, symbols, name); + } + + private static void startMacro(IonRawWriter_1_1 writer, Map symbols, String name) { + writer.stepInSExp(false); + writer.writeSymbol(SystemSymbols_1_1.MACRO); + writeSymbol(writer, symbols, name); + } + + private static void endMacro(IonRawWriter_1_1 writer) { + writer.stepOut(); + } + + private static void writeMacroSignature(IonRawWriter_1_1 writer, Map symbols, String... signature) { + writer.stepInSExp(false); + for (String parameter : signature) { + writeSymbol(writer, symbols, parameter); + } + writer.stepOut(); + } + + private static void writeMacroSignatureFromDatagram(IonRawWriter_1_1 writer, Map symbols, IonDatagram... signature) { + writer.stepInSExp(false); + for (IonDatagram parameter : signature) { + if (parameter.size() > 2) { + throw new IllegalStateException("Parameters can only have two components: a name and a cardinality."); + } + IonText name = (IonText) parameter.get(0); + String[] encoding = name.getTypeAnnotations(); + if (encoding.length == 1) { + // The encoding, e.g. uint8 + writer.writeAnnotations(encoding); + } else if (encoding.length > 1) { + throw new IllegalStateException("Only one encoding annotation is allowed."); + } + // The name + writeSymbol(writer, symbols, name.stringValue()); + if (parameter.size() == 2) { + // The cardinality, e.g. * + writeSymbol(writer, symbols, ((IonText) parameter.get(1)).stringValue()); + } + } + writer.stepOut(); + } + + private static void writeVariableExpansion(IonRawWriter_1_1 writer, Map symbols, String variableName) { + writer.stepInSExp(false); + writer.writeSymbol("%"); + writeSymbol(writer, symbols, variableName); + writer.stepOut(); + } + + private static void stepInTdlMacroInvocation(IonRawWriter_1_1 writer, Integer macroAddress) { + writer.stepInSExp(false); + writer.writeSymbol("."); + writer.writeInt(macroAddress); + } + + private static void writeVariableField(IonRawWriter_1_1 writer, Map symbols, String fieldName, String variableName) { + writeFieldName(writer, symbols, fieldName); + writeVariableExpansion(writer, symbols, variableName); + } + + private static byte[] getBytes(IonRawWriter_1_1 writer, ByteArrayOutputStream out) { + writer.close(); + return out.toByteArray(); + } + + public enum StreamType { + BINARY { + @Override + IonRawWriter_1_1 newWriter(OutputStream out) { + return IonRawBinaryWriter_1_1.from(out, 256, 0); + } + + @Override + EncodingContext getEncodingContext(IonReader reader) { + return ((IonReaderContinuableCoreBinary) reader).getEncodingContext(); + } + + @Override + Map newMacroTableByMacroRef(SortedMap macrosByName) { + int address = 0; + Map macroTable = new HashMap<>(); + for (Macro macro : macrosByName.values()) { + macroTable.put(MacroRef.byId(address++), macro); + } + return macroTable; + } + + @Override + void startMacroInvocationByName(IonRawWriter_1_1 writer, String name, Map macrosByName) { + int id = 0; + for (Map.Entry nameAndMacro : macrosByName.entrySet()) { + if (nameAndMacro.getKey().equals(name)) { + break; + } + id++; + } + writer.stepInEExp(id, false, macrosByName.get(name)); + } + + @Override + MacroAwareIonWriter newMacroAwareWriter(OutputStream out) { + return (MacroAwareIonWriter) IonEncodingVersion.ION_1_1.binaryWriterBuilder().build(out); + } + }, + TEXT { + @Override + IonRawWriter_1_1 newWriter(OutputStream out) { + return IonRawTextWriter_1_1.from(out, 256, IonEncodingVersion.ION_1_1.textWriterBuilder()); + } + + @Override + EncodingContext getEncodingContext(IonReader reader) { + return ((IonReaderTextSystemX) reader).getEncodingContext(); + } + + @Override + Map newMacroTableByMacroRef(SortedMap macrosByName) { + Map macroTable = new HashMap<>(); + int id = 0; + for (Map.Entry nameAndMacro : macrosByName.entrySet()) { + Macro macro = nameAndMacro.getValue(); + macroTable.put(MacroRef.byId(id++), macro); + String name = nameAndMacro.getKey(); + if (name != null) { + macroTable.put(MacroRef.byName(name), macro); + } + } + return macroTable; + } + + @Override + void startMacroInvocationByName(IonRawWriter_1_1 writer, String name, Map macrosByName) { + writer.stepInEExp(name); + } + + @Override + MacroAwareIonWriter newMacroAwareWriter(OutputStream out) { + return (MacroAwareIonWriter) IonEncodingVersion.ION_1_1.textWriterBuilder().build(out); + } + }; + + abstract IonRawWriter_1_1 newWriter(OutputStream out); + abstract EncodingContext getEncodingContext(IonReader reader); + abstract Map newMacroTableByMacroRef(SortedMap macrosByName); + abstract void startMacroInvocationByName(IonRawWriter_1_1 writer, String name, Map macrosByName); + abstract MacroAwareIonWriter newMacroAwareWriter(OutputStream out); + } + + public enum InputType { + INPUT_STREAM { + @Override + IonReader newReader(byte[] input) { + return IonReaderBuilder.standard().build(new ByteArrayInputStream(input)); + } + + @Override + MacroAwareIonReader newMacroAwareReader(byte[] input) { + return ((_Private_IonReaderBuilder) IonReaderBuilder.standard()).buildMacroAware(new ByteArrayInputStream(input)); + } + }, + BYTE_ARRAY { + @Override + IonReader newReader(byte[] input) { + return IonReaderBuilder.standard().build(input); + } + + @Override + MacroAwareIonReader newMacroAwareReader(byte[] input) { + return ((_Private_IonReaderBuilder) IonReaderBuilder.standard()).buildMacroAware(input); + } + }; + + abstract IonReader newReader(byte[] input); + abstract MacroAwareIonReader newMacroAwareReader(byte[] input); + } + + public static Arguments[] allCombinations() { + InputType[] inputTypes = InputType.values(); + StreamType[] streamTypes = StreamType.values(); + Arguments[] combinations = new Arguments[inputTypes.length * streamTypes.length]; + int i = 0; + for (InputType inputType : inputTypes) { + for (StreamType streamType : streamTypes) { + combinations[i++] = Arguments.of(inputType, streamType); + } + } + return combinations; + } + + public static Arguments[] allInputFormatsInputTypesAndOutputFormats() { + InputType[] inputTypes = InputType.values(); + StreamType[] streamTypes = StreamType.values(); + Arguments[] combinations = new Arguments[inputTypes.length * streamTypes.length * streamTypes.length]; + int i = 0; + for (InputType inputType : inputTypes) { + for (StreamType inputFormat : streamTypes) { + for (StreamType outputFormat : streamTypes) { + combinations[i++] = Arguments.of(inputType, inputFormat, outputFormat); + } + } + } + return combinations; + } + + private static int getSymbolId(Map symbols, String value) { + Integer sid = symbols.get(value); + return sid == null ? -1 : sid; + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void symbolsOnly(InputType inputType, StreamType streamType) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + writer.writeIVM(); + startModuleDirectiveForDefaultModule(writer); + writeEncodingDirectiveSymbolTable(writer, "foo", "bar"); + endEncodingDirective(writer); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID + 1); + byte[] data = getBytes(writer, out); + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("foo", reader.stringValue()); + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("bar", reader.stringValue()); + assertNull(reader.next()); + } + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void symbolAppendWithoutMacros(InputType inputType, StreamType streamType) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + writer.writeIVM(); + startModuleDirectiveForDefaultModule(writer); + writeEncodingDirectiveSymbolTable(writer, "foo", "bar"); + endEncodingDirective(writer); + startModuleDirectiveForDefaultModule(writer); + writeEncodingDirectiveSymbolTable(writer, true, "baz"); + endEncodingDirective(writer); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID + 1); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID + 2); + byte[] data = getBytes(writer, out); + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("foo", reader.stringValue()); + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("bar", reader.stringValue()); + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("baz", reader.stringValue()); + assertNull(reader.next()); + } + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void structMacroWithOneOptional(InputType inputType, StreamType streamType) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + writer.writeIVM(); + Map symbols; + if (streamType == StreamType.BINARY) { + symbols = initializeSymbolTable(writer, "People", "ID", "Name", "Bald", "$ID", "$Name", "$Bald", "?"); + } else { + symbols = Collections.emptyMap(); + } + startModuleDirectiveForDefaultModule(writer); + startMacroTable(writer); + startMacro(writer, symbols, "People"); + writeMacroSignature(writer, symbols, "$ID", "$Name", "$Bald", "?"); + // The macro body + writer.stepInStruct(false); + writeVariableField(writer, symbols, "ID", "$ID"); + writeVariableField(writer, symbols, "Name", "$Name"); + writeVariableField(writer, symbols, "Bald", "$Bald"); + writer.stepOut(); + endMacro(writer); + endMacroTable(writer); + endEncodingDirective(writer); + writer.writeInt(0); + byte[] data = getBytes(writer, out); + + SortedMap expectedMacroTable = new TreeMap<>(); + expectedMacroTable.put("People", new TemplateMacro( + Arrays.asList( + new Macro.Parameter("$ID", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ExactlyOne), + new Macro.Parameter("$Name", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ExactlyOne), + new Macro.Parameter("$Bald", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ZeroOrOne) + ), + Arrays.asList( + new Expression.StructValue(Collections.emptyList(), 0, 7, new HashMap>() {{ + put("ID", Collections.singletonList(2)); + put("Name", Collections.singletonList(4)); + put("Bald", Collections.singletonList(6)); + }}), + new Expression.FieldName(new FakeSymbolToken("ID", getSymbolId(symbols, "ID"))), + new Expression.VariableRef(0), + new Expression.FieldName(new FakeSymbolToken("Name", getSymbolId(symbols, "Name"))), + new Expression.VariableRef(1), + new Expression.FieldName(new FakeSymbolToken("Bald", getSymbolId(symbols, "Bald"))), + new Expression.VariableRef(2) + ) + )); + + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.INT, reader.next()); + assertMacroTablesContainsExpectedMappings(reader, streamType, expectedMacroTable); + } + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void constantMacroWithUserSymbol(InputType inputType, StreamType streamType) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + writer.writeIVM(); + Map symbols = initializeSymbolTable(writer, "Pi"); + startModuleDirectiveForDefaultModule(writer); + writeEncodingDirectiveSymbolTable(writer, "foo"); + startMacroTable(writer); + startMacro(writer, symbols, "Pi"); + writeMacroSignature(writer, symbols); // Empty signature + writer.writeDecimal(new BigDecimal("3.14159")); // The body: a constant + endMacro(writer); + endMacroTable(writer); + endEncodingDirective(writer); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID); // foo + byte[] data = getBytes(writer, out); + + SortedMap expectedMacroTable = new TreeMap<>(); + expectedMacroTable.put("Pi", new TemplateMacro( + Collections.emptyList(), + Collections.singletonList(new Expression.DecimalValue(Collections.emptyList(), new BigDecimal("3.14159"))) + )); + + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.SYMBOL, reader.next()); + assertMacroTablesContainsExpectedMappings(reader, streamType, expectedMacroTable); + assertEquals("foo", reader.stringValue()); + } + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void structMacroWithOneOptionalInvoked(InputType inputType, StreamType streamType) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + writer.writeIVM(); + Map symbols = initializeSymbolTable(writer, "People", "ID", "Name", "Bald", "$ID", "$Name", "$Bald", "?"); + startModuleDirectiveForDefaultModule(writer); + startMacroTable(writer); + startMacro(writer, symbols, "People"); + writeMacroSignature(writer, symbols, "$ID", "$Name", "$Bald", "?"); + // The macro body + writer.stepInStruct(false); + writeVariableField(writer, symbols, "ID", "$ID"); + writeVariableField(writer, symbols, "Name", "$Name"); + writeVariableField(writer, symbols, "Bald", "$Bald"); + writer.stepOut(); + endMacro(writer); + endMacroTable(writer); + endEncodingDirective(writer); + + SortedMap expectedMacroTable = new TreeMap<>(); + expectedMacroTable.put("People", new TemplateMacro( + Arrays.asList( + new Macro.Parameter("$ID", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ExactlyOne), + new Macro.Parameter("$Name", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ExactlyOne), + new Macro.Parameter("$Bald", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ZeroOrOne) + ), + Arrays.asList( + new Expression.StructValue(Collections.emptyList(), 0, 7, new HashMap>() {{ + put("ID", Collections.singletonList(2)); + put("Name", Collections.singletonList(4)); + put("Bald", Collections.singletonList(6)); + }}), + new Expression.FieldName(new FakeSymbolToken("ID", symbols.get("ID"))), + new Expression.VariableRef(0), + new Expression.FieldName(new FakeSymbolToken("Name", symbols.get("Name"))), + new Expression.VariableRef(1), + new Expression.FieldName(new FakeSymbolToken("Bald", symbols.get("Bald"))), + new Expression.VariableRef(2) + ) + )); + streamType.startMacroInvocationByName(writer, "People", expectedMacroTable); + writer.writeInt(123); + writer.writeString("Bob"); + writer.writeBool(false); + writer.stepOut(); + writer.stepInEExp(0, false, expectedMacroTable.get("People")); + writer.writeInt(Long.MIN_VALUE); + writer.writeString("Sue"); + // The optional "Bald" is not included. + writer.stepOut(); + writer.writeInt(42); // Not a macro invocation + byte[] data = getBytes(writer, out); + + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.STRUCT, reader.next()); + assertMacroTablesContainsExpectedMappings(reader, streamType, expectedMacroTable); + reader.stepIn(); + assertEquals(1, reader.getDepth()); + assertEquals(IonType.INT, reader.next()); + assertEquals("ID", reader.getFieldName()); + assertEquals(123, reader.intValue()); + assertEquals(IonType.STRING, reader.next()); + assertEquals("Name", reader.getFieldName()); + assertEquals("Bob", reader.stringValue()); + assertEquals(IonType.BOOL, reader.next()); + assertEquals("Bald", reader.getFieldName()); + assertFalse(reader.booleanValue()); + assertNull(reader.next()); + reader.stepOut(); + + assertEquals(0, reader.getDepth()); + assertEquals(IonType.STRUCT, reader.next()); + reader.stepIn(); + assertEquals(IonType.INT, reader.next()); + assertEquals("ID", reader.getFieldName()); + assertEquals(Long.MIN_VALUE, reader.longValue()); + assertEquals(IonType.STRING, reader.next()); + assertEquals("Name", reader.getFieldName()); + assertEquals("Sue", reader.stringValue()); + assertNull(reader.next()); + reader.stepOut(); + + assertEquals(IonType.INT, reader.next()); + assertEquals(42, reader.intValue()); + + assertNull(reader.next()); + } + } + + private byte[] macroInvocationWithinStruct(StreamType streamType, SortedMap expectedMacroTable) { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + writer.writeIVM(); + Map symbols = initializeSymbolTable(writer, "People", "ID", "Name", "Bald", "$ID", "$Name", "$Bald", "?"); + startModuleDirectiveForDefaultModule(writer); + writeEncodingDirectiveSymbolTable(writer, "foo"); + startMacroTable(writer); + startMacro(writer, symbols, "People"); + writeMacroSignature(writer, symbols, "$ID", "$Name", "?", "$Bald", "?"); + // The macro body + writer.stepInStruct(false); + writeVariableField(writer, symbols, "ID", "$ID"); + writeVariableField(writer, symbols, "Name", "$Name"); + writeVariableField(writer, symbols, "Bald", "$Bald"); + writer.stepOut(); + endMacro(writer); + endMacroTable(writer); + endEncodingDirective(writer); + + expectedMacroTable.put("People", new TemplateMacro( + Arrays.asList( + new Macro.Parameter("$ID", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ExactlyOne), + new Macro.Parameter("$Name", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ZeroOrOne), + new Macro.Parameter("$Bald", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ZeroOrOne) + ), + Arrays.asList( + new Expression.StructValue(Collections.emptyList(), 0, 7, new HashMap>() {{ + put("ID", Collections.singletonList(2)); + put("Name", Collections.singletonList(4)); + put("Bald", Collections.singletonList(6)); + }}), + new Expression.FieldName(new FakeSymbolToken("ID", symbols.get("ID"))), + new Expression.VariableRef(0), + new Expression.FieldName(new FakeSymbolToken("Name", symbols.get("Name"))), + new Expression.VariableRef(1), + new Expression.FieldName(new FakeSymbolToken("Bald", symbols.get("Bald"))), + new Expression.VariableRef(2) + ) + )); + + writer.stepInStruct(true); + writer.writeFieldName(FIRST_LOCAL_SYMBOL_ID); + streamType.startMacroInvocationByName(writer, "People", expectedMacroTable); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID); + // Two trailing optionals are elided. + writer.stepOut(); + writer.stepOut(); + + return getBytes(writer, out); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void macroInvocationWithinStruct(InputType inputType, StreamType streamType) throws Exception { + SortedMap expectedMacroTable = new TreeMap<>(); + byte[] data = macroInvocationWithinStruct(streamType, expectedMacroTable); + + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.STRUCT, reader.next()); + assertMacroTablesContainsExpectedMappings(reader, streamType, expectedMacroTable); + reader.stepIn(); + assertEquals(IonType.STRUCT, reader.next()); + assertEquals("foo", reader.getFieldName()); + reader.stepIn(); + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("ID", reader.getFieldName()); + assertEquals("foo", reader.stringValue()); + assertNull(reader.next()); + reader.stepOut(); + // TODO future fix: currently this next() is needed, otherwise the reader thinks it's still evaluating a + // macro on the next stepOut. + assertNull(reader.next()); + reader.stepOut(); + assertNull(reader.next()); + } + } + + /** + * Performs a macro-aware transcode by repetitively calling {@link MacroAwareIonReader#transcodeNext()}. + * @param data the data to transcode. + * @param inputType the input type for the data to transcode. + * @param outputFormat the output format for the transcoded data. + * @param numberOfValues the number of values to transcode. + * @param assertEnd true if, after transcoding the requested number of values, this method should assert that + * calling `transcodeNext()` one more time would result in stream end (i.e., return `false`). + * @return a stream containing the transcoded data. + * @throws Exception if thrown during transcoding. + */ + private ByteArrayOutputStream macroAwareTranscodeValueByValue( + byte[] data, + InputType inputType, + StreamType outputFormat, + int numberOfValues, + boolean assertEnd + ) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try ( + MacroAwareIonReader reader = inputType.newMacroAwareReader(data); + MacroAwareIonWriter rewriter = outputFormat.newMacroAwareWriter(out) + ) { + reader.prepareTranscodeTo(rewriter); + for (int i = 0; i < numberOfValues; i++) { + assertTrue(reader.transcodeNext()); + } + if (assertEnd) { + assertFalse(reader.transcodeNext()); + } + } + return out; + } + + @ParameterizedTest(name = "{0},{1},{2}") + @MethodSource("allInputFormatsInputTypesAndOutputFormats") + public void nestedInvocationMacroAwareTranscode( + InputType inputType, + StreamType inputFormat, + StreamType outputFormat + ) throws Exception { + byte[] data = macroInvocationWithinStruct(inputFormat, new TreeMap<>()); + + ByteArrayOutputStream out = macroAwareTranscodeValueByValue(data, inputType, outputFormat, 1, false); + + verifyStream(data, out, outputFormat, + substringCount("$ion_1_1", 1), + substringCount(DEFAULT_MODULE_DIRECTIVE_PREFIX, 2), + substringCount(SystemSymbols_1_1.SYMBOL_TABLE, 2), + substringCount(SystemSymbols_1_1.MACRO_TABLE, 1), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.ADD_MACROS, 0), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.SET_MACROS, 0), + substringCount("(:People", 1) + ); + } + + @ParameterizedTest(name = "{0},{1},{2}") + @MethodSource("allInputFormatsInputTypesAndOutputFormats") + public void multipleNestedInvocationMacroAwareTranscode( + InputType inputType, + StreamType inputFormat, + StreamType outputFormat + ) throws Exception { + ByteArrayOutputStream source = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = inputFormat.newWriter(source); + writer.writeIVM(); + + writeSymbolTableEExpression(false, writer, "foo", "bar", "baz", "zar"); + + writer.stepInStruct(true); + writer.writeFieldName(FIRST_LOCAL_SYMBOL_ID); // foo + writer.stepInEExp(SystemMacro.Values); + writer.stepInExpressionGroup(false); + writer.writeInt(1); + writer.writeInt(2); + writer.stepOut(); + writer.stepOut(); + writer.writeFieldName(FIRST_LOCAL_SYMBOL_ID + 1); // bar + writer.stepInEExp(SystemMacro.Values); + writer.stepInExpressionGroup(false); + writer.writeInt(3); + writer.writeInt(4); + writer.stepOut(); + writer.stepOut(); + writer.writeFieldName(FIRST_LOCAL_SYMBOL_ID + 2); // baz + writer.writeAnnotations(FIRST_LOCAL_SYMBOL_ID); // foo + writer.writeInt(5); + writer.writeFieldName(FIRST_LOCAL_SYMBOL_ID + 3); // zar + writer.writeAnnotations(FIRST_LOCAL_SYMBOL_ID + 1); // bar + writer.stepInStruct(true); + writer.stepOut(); + writer.stepOut(); + writer.writeInt(123); + + byte[] data = getBytes(writer, source); + ByteArrayOutputStream out = macroAwareTranscodeValueByValue(data, inputType, outputFormat, 2, true); + + verifyStream(data, out, outputFormat, + substringCount("$ion_1_1", 1), + substringCount(DEFAULT_MODULE_DIRECTIVE_PREFIX, 0), + substringCount(SystemSymbols_1_1.SYMBOL_TABLE, 0), + substringCount(SystemSymbols_1_1.MACRO_TABLE, 0), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.ADD_MACROS, 0), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 1), + substringCount(SystemSymbols_1_1.SET_MACROS, 0), + substringCount(SystemSymbols_1_1.VALUES, 2) + ); + } + + private byte[] zeroArgMacroThatExpandsToEncodingDirective(StreamType outputFormat) { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = outputFormat.newWriter(out); + writer.writeIVM(); + + Map symbols = initializeSymbolTable(writer, "foo", "bar"); + + startModuleDirectiveForDefaultModule(writer); + startMacroTable(writer); + startMacro(writer, symbols, "abcdef"); + writeMacroSignature(writer, symbols); // empty signature + // The body: an encoding directive that sets the symbol table to ["abc", "def"] + startModuleDirectiveForDefaultModule(writer); + writeEncodingDirectiveSymbolTable(writer, "abc", "def"); + endEncodingDirective(writer); + endMacro(writer); + endMacroTable(writer); + endEncodingDirective(writer); + + SortedMap expectedMacroTable = new TreeMap<>(); + expectedMacroTable.put("abcdef", new TemplateMacro( + Collections.emptyList(), + Arrays.asList( + new Expression.SExpValue(Collections.singletonList(new FakeSymbolToken(SystemSymbols_1_1.ION.name(), SystemSymbols_1_1.ION.getId())), 0, 5), + new Expression.SExpValue(Collections.emptyList(), 1, 5), + new Expression.SymbolValue(Collections.emptyList(), new FakeSymbolToken(SystemSymbols_1_1.SYMBOL_TABLE.name(), SystemSymbols_1_1.SYMBOL_TABLE.getId())), + new Expression.ListValue(Collections.emptyList(), 3, 5), + new Expression.StringValue(Collections.emptyList(), "abc"), + new Expression.StringValue(Collections.emptyList(), "def") + ) + )); + + outputFormat.startMacroInvocationByName(writer, "abcdef", expectedMacroTable); + writer.stepOut(); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID + 1); // def + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID); // abc + + return getBytes(writer, out); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void zeroArgMacroThatExpandsToEncodingDirective(InputType inputType, StreamType streamType) throws Exception { + byte[] data = zeroArgMacroThatExpandsToEncodingDirective(streamType); + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("def", reader.stringValue()); + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("abc", reader.stringValue()); + assertNull(reader.next()); + } + } + + @ParameterizedTest(name = "{0},{1},{2}") + @MethodSource("allInputFormatsInputTypesAndOutputFormats") + public void zeroArgMacroThatExpandsToEncodingDirectiveMacroAwareTranscode( + InputType inputType, + StreamType inputFormat, + StreamType outputFormat + ) throws Exception { + byte[] data = zeroArgMacroThatExpandsToEncodingDirective(inputFormat); + ByteArrayOutputStream out = macroAwareTranscodeValueByValue(data, inputType, outputFormat, 2, true); + + verifyStream("def abc".getBytes(StandardCharsets.UTF_8), out, outputFormat, + substringCount("$ion_1_1", 1), + substringCount(DEFAULT_MODULE_DIRECTIVE_PREFIX, 3), // Initial symbols, directive with macro, macro body with encoding directive + substringCount("(:abcdef)", 1) + ); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void macroInvocationWithOptionalSuppressedBeforeEndWithinStruct(InputType inputType, StreamType streamType) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + writer.writeIVM(); + Map symbols = initializeSymbolTable(writer, "People", "ID", "Name", "Bald", "$ID", "$Name", "$Bald", "?"); + startModuleDirectiveForDefaultModule(writer); + writeEncodingDirectiveSymbolTable(writer, "foo"); + startMacroTable(writer); + startMacro(writer, symbols, "People"); + writeMacroSignature(writer, symbols, "$ID", "$Name", "?", "$Bald", "?"); + // The macro body + writer.stepInStruct(false); + writeVariableField(writer, symbols, "ID", "$ID"); + writeVariableField(writer, symbols, "Name", "$Name"); + writeVariableField(writer, symbols, "Bald", "$Bald"); + writer.stepOut(); + endMacro(writer); + endMacroTable(writer); + endEncodingDirective(writer); + + SortedMap expectedMacroTable = new TreeMap<>(); + expectedMacroTable.put("People", new TemplateMacro( + Arrays.asList( + new Macro.Parameter("$ID", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ExactlyOne), + new Macro.Parameter("$Name", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ZeroOrOne), + new Macro.Parameter("$Bald", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ZeroOrOne) + ), + Arrays.asList( + new Expression.StructValue(Collections.emptyList(), 0, 7, new HashMap>() {{ + put("ID", Collections.singletonList(2)); + put("Name", Collections.singletonList(4)); + put("Bald", Collections.singletonList(6)); + }}), + new Expression.FieldName(new FakeSymbolToken("ID", symbols.get("ID"))), + new Expression.VariableRef(0), + new Expression.FieldName(new FakeSymbolToken("Name", symbols.get("Name"))), + new Expression.VariableRef(1), + new Expression.FieldName(new FakeSymbolToken("Bald", symbols.get("Bald"))), + new Expression.VariableRef(2) + ) + )); + + writer.stepInStruct(true); + writer.writeFieldName(FIRST_LOCAL_SYMBOL_ID); + writer.stepInEExp(0, false, expectedMacroTable.get("People")); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID); + // Explicitly elide the optional "Name" + writer.stepInExpressionGroup(false); + writer.stepOut(); + writer.writeBool(true); + writer.stepOut(); + writer.stepOut(); + + byte[] data = getBytes(writer, out); + + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.STRUCT, reader.next()); + assertMacroTablesContainsExpectedMappings(reader, streamType, expectedMacroTable); + reader.stepIn(); + assertEquals(IonType.STRUCT, reader.next()); + assertEquals("foo", reader.getFieldName()); + reader.stepIn(); + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("ID", reader.getFieldName()); + assertEquals("foo", reader.stringValue()); + assertEquals(IonType.BOOL, reader.next()); + assertEquals("Bald", reader.getFieldName()); + assertTrue(reader.booleanValue()); + assertNull(reader.next()); + reader.stepOut(); + assertNull(reader.next()); + reader.stepOut(); + assertNull(reader.next()); + } + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void constantMacroInvoked(InputType inputType, StreamType streamType) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + writer.writeIVM(); + Map symbols = initializeSymbolTable(writer, "Pi"); + startModuleDirectiveForDefaultModule(writer); + writeEncodingDirectiveSymbolTable(writer, "foo"); + startMacroTable(writer); + startMacro(writer, symbols, "Pi"); + writeMacroSignature(writer, symbols); // Empty signature + writer.writeDecimal(new BigDecimal("3.14159")); // The body: a constant + endMacro(writer); + endMacroTable(writer); + endEncodingDirective(writer); + + SortedMap expectedMacroTable = new TreeMap<>(); + expectedMacroTable.put("Pi", new TemplateMacro( + Collections.emptyList(), + Collections.singletonList(new Expression.DecimalValue(Collections.emptyList(), new BigDecimal("3.14159"))) + )); + + writer.stepInEExp(0, false, expectedMacroTable.get("Pi")); + writer.stepOut(); + streamType.startMacroInvocationByName(writer, "Pi", expectedMacroTable); + writer.stepOut(); + + byte[] data = getBytes(writer, out); + + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.DECIMAL, reader.next()); + assertMacroTablesContainsExpectedMappings(reader, streamType, expectedMacroTable); + assertEquals(new BigDecimal("3.14159"), reader.decimalValue()); + assertEquals(IonType.DECIMAL, reader.next()); + assertEquals(new BigDecimal("3.14159"), reader.decimalValue()); + } + } + + private Macro writeSimonSaysMacro(IonRawWriter_1_1 writer) { + writer.writeIVM(); + Map symbols = initializeSymbolTable(writer, "SimonSays", "anything"); + startModuleDirectiveForDefaultModule(writer); + writeEncodingDirectiveSymbolTable(writer, "foo"); + startMacroTable(writer); + startMacro(writer, symbols, "SimonSays"); + writeMacroSignature(writer, symbols, "anything"); + // The body + writeVariableExpansion(writer, symbols, "anything"); + endMacro(writer); + endMacroTable(writer); + endEncodingDirective(writer); + + return new TemplateMacro( + Collections.singletonList(new Macro.Parameter("anything", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ExactlyOne)), + Collections.singletonList(new Expression.VariableRef(0)) + ); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void structAsParameter(InputType inputType, StreamType streamType) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + SortedMap expectedMacroTable = new TreeMap() {{ + put("SimonSays", writeSimonSaysMacro(writer)); + }}; + + streamType.startMacroInvocationByName(writer, "SimonSays", expectedMacroTable); + writer.stepInStruct(true); + writer.writeFieldName(FIRST_LOCAL_SYMBOL_ID); + writer.writeInt(123); + writer.stepOut(); + writer.stepOut(); + + byte[] data = getBytes(writer, out); + + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.STRUCT, reader.next()); + assertMacroTablesContainsExpectedMappings(reader, streamType, expectedMacroTable); + reader.stepIn(); + assertEquals(IonType.INT, reader.next()); + assertEquals("foo", reader.getFieldName()); + assertEquals(123, reader.intValue()); + reader.stepOut(); + assertNull(reader.next()); + } + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void macroInvocationAsParameter(InputType inputType, StreamType streamType) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + Macro expectedMacro = writeSimonSaysMacro(writer); + + writer.stepInEExp(0, false, expectedMacro); + writer.stepInEExp(0, false, expectedMacro); + writer.writeFloat(1.23); + writer.stepOut(); + writer.stepOut(); + + byte[] data = getBytes(writer, out); + + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.FLOAT, reader.next()); + assertEquals(1.23, reader.doubleValue(), 1e-9); + assertNull(reader.next()); + } + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void macroInvocationNestedWithinParameter(InputType inputType, StreamType streamType) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + Macro expectedMacro = writeSimonSaysMacro(writer); + + writer.stepInEExp(0, false, expectedMacro); + writer.stepInList(true); + writer.stepInEExp(0, false, expectedMacro); + writer.writeFloat(1.23); + writer.stepOut(); + writer.stepOut(); + writer.stepOut(); + + byte[] data = getBytes(writer, out); + + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.LIST, reader.next()); + reader.stepIn(); + assertEquals(IonType.FLOAT, reader.next()); + assertEquals(1.23, reader.doubleValue(), 1e-9); + assertNull(reader.next()); + reader.stepOut(); + assertNull(reader.next()); + } + } + + private byte[] macroInvocationsNestedWithinParameter(StreamType streamType) { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + Macro expectedMacro = writeSimonSaysMacro(writer); + + writer.stepInEExp(0, false, expectedMacro); + writer.stepInList(true); + writer.stepInEExp(0, false, expectedMacro); + writer.stepInStruct(true); + writer.writeFieldName(FIRST_LOCAL_SYMBOL_ID); + writer.writeFloat(1.23); + writer.stepOut(); + writer.stepOut(); + writer.stepInEExp(0, false, expectedMacro); + writer.writeInt(123); + writer.stepOut(); + writer.writeString("abc"); + writer.stepOut(); + writer.stepOut(); + writer.stepInList(true); + writer.stepOut(); + + return getBytes(writer, out); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void macroInvocationsNestedWithinParameter(InputType inputType, StreamType streamType) throws Exception { + byte[] data = macroInvocationsNestedWithinParameter(streamType); + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.LIST, reader.next()); + reader.stepIn(); + assertEquals(IonType.STRUCT, reader.next()); + reader.stepIn(); + assertEquals(IonType.FLOAT, reader.next()); + assertEquals("foo", reader.getFieldName()); + assertEquals(1.23, reader.doubleValue(), 1e-9); + assertNull(reader.next()); + reader.stepOut(); + assertEquals(IonType.INT, reader.next()); + assertEquals(123, reader.intValue()); + assertEquals(IonType.STRING, reader.next()); + assertEquals("abc", reader.stringValue()); + assertNull(reader.next()); + reader.stepOut(); + assertEquals(IonType.LIST, reader.next()); + assertNull(reader.next()); + } + } + + public static class SubstringCountMatcher extends TypeSafeMatcher { + int expectedCount; + String substring; + + private SubstringCountMatcher(String substring, int expectedCount) { + this.expectedCount = expectedCount; + this.substring = substring; + } + + @Override + protected boolean matchesSafely(String s) { + return countOccurrencesOfSubstring(s, substring) == expectedCount; + } + + @Override + public void describeTo(Description description) { + description.appendText("a String including " + expectedCount + " occurrences of " + substring); + } + + /** + * Counts the number of times the given substring occurs in the given string (non-overlapping). + * @param string the string. + * @param substring the substring. + * @return the number of occurrences. + */ + private static int countOccurrencesOfSubstring(String string, String substring) { + int lastMatchIndex = 0; + int count = 0; + while (lastMatchIndex >= 0) { + lastMatchIndex = string.indexOf(substring, lastMatchIndex); + if (lastMatchIndex >= 0) { + lastMatchIndex += substring.length(); + count++; + } + } + return count; + } + } + + static SubstringCountMatcher substringCount(String sub, int count) { + return new SubstringCountMatcher(sub, count); + } + + static SubstringCountMatcher substringCount(SystemSymbols_1_1 sub, int count) { + return new SubstringCountMatcher(sub.getText(), count); + } + + /** + * Verifies a stream has the characteristics described by the arguments to this method and that it is data-model + * equivalent to the expected output. + * @param expectedOutput the expected output. + * @param actualOutput the actual output. + * @param streamType the StreamType to which the source data will be transcoded. + * @param expectations a list of expectations for the text representation of the transcoded data. + */ + @SafeVarargs + private static void verifyStream( + byte[] expectedOutput, + ByteArrayOutputStream actualOutput, + StreamType streamType, + Matcher... expectations + ) throws Exception { + if (streamType == StreamType.TEXT) { + String rewritten = actualOutput.toString(StandardCharsets.UTF_8.name()); + assertThat(rewritten, allOf(expectations)); + } + IonSystem system = IonSystemBuilder.standard().build(); + IonDatagram actual = system.getLoader().load(actualOutput.toByteArray()); + IonDatagram expected = system.getLoader().load(expectedOutput); + assertEquals(expected, actual); + } + + /** + * Performs a macro-aware transcode of the given data, verifying that the resulting stream has the + * characteristics described by the arguments to this method and that it is data-model equivalent + * to the source data. + * @param data the source data. + * @param inputType the InputType to test. + * @param streamType the StreamType to which the source data will be transcoded. + * @param expectations a list of expectations for the text representation of the transcoded data. + */ + @SafeVarargs + private static void verifyMacroAwareTranscode( + byte[] data, + InputType inputType, + StreamType streamType, + Matcher... expectations + ) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try ( + MacroAwareIonReader reader = inputType.newMacroAwareReader(data); + MacroAwareIonWriter rewriter = streamType.newMacroAwareWriter(out); + ) { + reader.transcodeAllTo(rewriter); + } + verifyStream(data, out, streamType, expectations); + } + + @ParameterizedTest(name = "{0},{1},{2}") + @MethodSource("allInputFormatsInputTypesAndOutputFormats") + public void macroInvocationsNestedWithinParameterMacroAwareTranscode( + InputType inputType, + StreamType inputFormat, + StreamType outputFormat + ) throws Exception { + byte[] data = macroInvocationsNestedWithinParameter(inputFormat); + verifyMacroAwareTranscode(data, inputType, outputFormat, + substringCount("$ion_1_1", 1), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.ADD_MACROS, 0), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.SET_MACROS, 0), + substringCount(DEFAULT_MODULE_DIRECTIVE_PREFIX, 2) + ); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void annotationInParameter(InputType inputType, StreamType streamType) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + Macro expectedMacro = writeSimonSaysMacro(writer); + + writer.stepInEExp(0, false, expectedMacro); + writer.writeAnnotations(FIRST_LOCAL_SYMBOL_ID); + writer.writeNull(IonType.TIMESTAMP); + writer.stepOut(); + + byte[] data = getBytes(writer, out); + + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.TIMESTAMP, reader.next()); + assertTrue(reader.isNullValue()); + String[] annotation = reader.getTypeAnnotations(); + assertEquals(1, annotation.length); + assertEquals("foo", annotation[0]); + assertNull(reader.next()); + } + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void twoArgumentGroups(InputType inputType, StreamType streamType) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + + writer.writeIVM(); + Map symbols = initializeSymbolTable(writer, "Groups", "these", "those", "*", "+"); + startModuleDirectiveForDefaultModule(writer); + writeEncodingDirectiveSymbolTable(writer, "foo"); + startMacroTable(writer); + startMacro(writer, symbols, "Groups"); + writeMacroSignature(writer, symbols, "these", "*", "those", "+"); + writer.stepInList(true); + writer.stepInList(true); + writeVariableExpansion(writer, symbols, "those"); + writer.stepOut(); + writer.stepInList(true); + writeVariableExpansion(writer, symbols, "these"); + writer.stepOut(); + writer.stepOut(); + endMacro(writer); + endMacroTable(writer); + endEncodingDirective(writer); + + Macro expectedMacro = new TemplateMacro( + Arrays.asList( + new Macro.Parameter("these", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ZeroOrMore), + new Macro.Parameter("those", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.OneOrMore) + ), + Arrays.asList( + new Expression.ListValue(Collections.emptyList(), 0, 2), + new Expression.VariableRef(1), // those + new Expression.SExpValue(Collections.emptyList(), 2, 4), + new Expression.VariableRef(0) // these + ) + ); + + writer.stepInEExp(0, false, expectedMacro); + writer.stepInExpressionGroup(false); // TODO add a test for length-prefixed argument groups + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID); + writer.writeString("bar"); + writer.stepOut(); + writer.stepInExpressionGroup(false); + writer.writeBool(true); + writer.stepOut(); + writer.stepOut(); + + byte[] data = getBytes(writer, out); + + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.LIST, reader.next()); + reader.stepIn(); + assertEquals(IonType.LIST, reader.next()); + reader.stepIn(); + assertEquals(2, reader.getDepth()); + assertEquals(IonType.BOOL, reader.next()); + assertTrue(reader.booleanValue()); + reader.stepOut(); + assertEquals(1, reader.getDepth()); + assertEquals(IonType.LIST, reader.next()); + reader.stepIn(); + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("foo", reader.symbolValue().assumeText()); + assertEquals(IonType.STRING, reader.next()); + assertEquals("bar", reader.stringValue()); + assertNull(reader.next()); + reader.stepOut(); + reader.stepOut(); + assertNull(reader.next()); + } + } + + private byte[] macroInvocationInMacroDefinition(StreamType streamType) { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + + writer.writeIVM(); + Map symbols = initializeSymbolTable(writer, "SimonSays", "anything", "Echo"); + startModuleDirectiveForDefaultModule(writer); + writeEncodingDirectiveSymbolTable(writer, "foo"); + startMacroTable(writer); + startMacro(writer, symbols, "SimonSays"); + writeMacroSignature(writer, symbols, "anything"); + writeVariableExpansion(writer, symbols, "anything"); // The body: a variable + endMacro(writer); + startMacro(writer, symbols, "Echo"); + writeMacroSignature(writer, symbols); // empty signature + stepInTdlMacroInvocation(writer, 0); // Macro ID 0 ("SimonSays") + writer.writeInt(123); // The argument to SimonSays + writer.stepOut(); + endMacro(writer); + endMacroTable(writer); + endEncodingDirective(writer); + + Macro simonSaysMacro = new TemplateMacro( + Collections.singletonList( + ParameterFactory.exactlyOneTagged("anything") + ), + Collections.singletonList( + new Expression.VariableRef(0) + ) + ); + + Macro expectedMacro = new TemplateMacro( + Collections.emptyList(), + Arrays.asList( + new Expression.MacroInvocation(simonSaysMacro, 0, 2), + new Expression.LongIntValue(Collections.emptyList(), 123) + ) + ); + + writer.stepInEExp(1, false, expectedMacro); + writer.stepOut(); + + return getBytes(writer, out); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void macroInvocationInMacroDefinition(InputType inputType, StreamType streamType) throws Exception { + byte[] data = macroInvocationInMacroDefinition(streamType); + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.INT, reader.next()); + assertEquals(IntegerSize.INT, reader.getIntegerSize()); + assertEquals(123, reader.intValue()); + assertNull(reader.next()); + } + } + + @ParameterizedTest(name = "{0},{1},{2}") + @MethodSource("allInputFormatsInputTypesAndOutputFormats") + public void macroInvocationInMacroDefinitionMacroAwareTranscode( + InputType inputType, + StreamType inputFormat, + StreamType outputFormat + ) throws Exception { + byte[] data = macroInvocationInMacroDefinition(inputFormat); + verifyMacroAwareTranscode(data, inputType, outputFormat, + substringCount("$ion_1_1", 1), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.ADD_MACROS, 0), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.SET_MACROS, 0), + substringCount(DEFAULT_MODULE_DIRECTIVE_PREFIX, 2) + ); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void blobsAndClobs(InputType inputType, StreamType streamType) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + + byte[] blobContents = new byte[] {1, 2}; + byte[] clobContents = new byte[] {3}; + writer.writeIVM(); + Map symbols = initializeSymbolTable(writer, "lobs", "a"); + startModuleDirectiveForDefaultModule(writer); + startMacroTable(writer); + startMacro(writer, symbols, "lobs"); + writeMacroSignature(writer, symbols, "a"); + writer.stepInSExp(true); + writer.writeBlob(blobContents); + writeVariableExpansion(writer, symbols, "a"); + writer.stepOut(); + endMacro(writer); + endMacroTable(writer); + endEncodingDirective(writer); + + Macro expectedMacro = new TemplateMacro( + Collections.singletonList(new Macro.Parameter("a", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ExactlyOne)), + Arrays.asList( + new Expression.SExpValue(Collections.emptyList(), 0, 3), + new Expression.BlobValue(Collections.emptyList(), blobContents), + new Expression.VariableRef(0) + ) + ); + + writer.stepInEExp(0, false, expectedMacro); + writer.writeClob(clobContents); + writer.stepOut(); + + byte[] data = getBytes(writer, out); + + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.SEXP, reader.next()); + reader.stepIn(); + assertEquals(IonType.BLOB, reader.next()); + assertArrayEquals(blobContents, reader.newBytes()); + assertEquals(IonType.CLOB, reader.next()); + assertArrayEquals(clobContents, reader.newBytes()); + reader.stepOut(); + assertNull(reader.next()); + } + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void macroInvocationInTaggedExpressionGroup(InputType inputType, StreamType streamType) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + + writer.writeIVM(); + Map symbols = initializeSymbolTable(writer, "foo", "value"); + startModuleDirectiveForDefaultModule(writer); + startMacroTable(writer); + startMacro(writer, symbols, "foo"); + writeMacroSignature(writer, symbols, "value", "*"); + writeVariableExpansion(writer, symbols, "value"); + endMacro(writer); + endMacroTable(writer); + endEncodingDirective(writer); + + Macro expectedMacro = new TemplateMacro( + Collections.singletonList(new Macro.Parameter("value", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ZeroOrMore)), + Collections.singletonList(new Expression.VariableRef(0)) + ); + + writer.stepInEExp(0, false, expectedMacro); { + writer.stepInExpressionGroup(true); { + writer.stepInEExp(SystemMacro.Values); { + writer.writeString("bar"); + } writer.stepOut(); + } writer.stepOut(); + } writer.stepOut(); + + byte[] data = getBytes(writer, out); + + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.STRING, reader.next()); + assertEquals("bar", reader.stringValue()); + assertNull(reader.next()); + } + } + + private static final IonLoader LOADER = IonSystemBuilder.standard().build().getLoader(); + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void taglessExpressionGroup(InputType inputType, StreamType streamType) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + + writer.writeIVM(); + Map symbols = initializeSymbolTable(writer, "foo", "value"); + startModuleDirectiveForDefaultModule(writer); + startMacroTable(writer); + startMacro(writer, symbols, "foo"); + writeMacroSignatureFromDatagram(writer, symbols, LOADER.load("uint8::value '*'")); + writeVariableExpansion(writer, symbols, "value"); + endMacro(writer); + endMacroTable(writer); + endEncodingDirective(writer); + + Macro expectedMacro = new TemplateMacro( + Collections.singletonList(new Macro.Parameter("value", Macro.ParameterEncoding.Uint8, Macro.ParameterCardinality.ZeroOrMore)), + Collections.singletonList(new Expression.VariableRef(0)) + ); + + writer.stepInEExp(0, false, expectedMacro); { + writer.stepInExpressionGroup(true); { + writer.writeInt(1); + writer.writeInt(2); + } writer.stepOut(); + } writer.stepOut(); + + byte[] data = getBytes(writer, out); + + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.INT, reader.next()); + assertEquals(1, reader.intValue()); + assertEquals(IonType.INT, reader.next()); + assertEquals(2, reader.intValue()); + assertNull(reader.next()); + } + } + + private static void writeSymbolTableEExpression(boolean isAppend, IonRawWriter_1_1 writer, String... symbols) { + writer.stepInEExp(isAppend ? SystemMacro.AddSymbols : SystemMacro.SetSymbols); + writer.stepInExpressionGroup(false); + for (String symbol : symbols) { + writer.writeString(symbol); + } + writer.stepOut(); + writer.stepOut(); + } + + private static Map writeSymbolTableSetEExpression(IonRawWriter_1_1 writer, String... symbols) { + writeSymbolTableEExpression(false, writer, symbols); + return makeSymbolsMap(FIRST_LOCAL_SYMBOL_ID, symbols); + } + + private static void writeSymbolTableAppendEExpression(IonRawWriter_1_1 writer, Map existingSymbols, String... newSymbols) { + writeSymbolTableEExpression(true, writer, newSymbols); + int localSymbolId = FIRST_LOCAL_SYMBOL_ID + existingSymbols.size(); + for (String newSymbol : newSymbols) { + existingSymbols.putIfAbsent(newSymbol, localSymbolId++); + } + } + + private static byte[] macroInvocationsProduceEncodingDirectivesThatModifySymbolTable(StreamType streamType) { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + writer.writeIVM(); + + Map symbols = writeSymbolTableSetEExpression(writer, "foo", "bar"); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID + 1); + + writeSymbolTableAppendEExpression(writer, symbols, "baz"); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID + 2); + + writeSymbolTableSetEExpression(writer, "abc", "def"); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID + 1); + + return getBytes(writer, out); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void macroInvocationsProduceEncodingDirectivesThatModifySymbolTable(InputType inputType, StreamType streamType) throws Exception { + byte[] data = macroInvocationsProduceEncodingDirectivesThatModifySymbolTable(streamType); + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("foo", reader.stringValue()); + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("bar", reader.stringValue()); + // Symbol "baz" added + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("foo", reader.stringValue()); + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("baz", reader.stringValue()); + // Symbol table replaced with "abc", "def" + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("abc", reader.stringValue()); + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("def", reader.stringValue()); + } + } + + @ParameterizedTest(name = "{0},{1},{2}") + @MethodSource("allInputFormatsInputTypesAndOutputFormats") + public void macroInvocationsProduceEncodingDirectivesThatModifySymbolTableMacroAwareTranscode( + InputType inputType, + StreamType inputFormat, + StreamType outputFormat + ) throws Exception { + byte[] data = macroInvocationsProduceEncodingDirectivesThatModifySymbolTable(inputFormat); + verifyMacroAwareTranscode(data, inputType, outputFormat, + substringCount("$ion_1_1", 1), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 1), + substringCount(SystemSymbols_1_1.ADD_MACROS, 0), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 2), + substringCount(SystemSymbols_1_1.SET_MACROS, 0), + substringCount(DEFAULT_MODULE_DIRECTIVE_PREFIX, 0), + // Symbol tokens are written using inline text, not symbol identifiers. + substringCount("$1", 0), + substringCount("$2", 0), + substringCount("$3", 0) + ); + } + + @ParameterizedTest(name = "{0},{1},{2}") + @MethodSource("allInputFormatsInputTypesAndOutputFormats") + public void macroInvocationsProduceEncodingDirectivesThatModifySymbolTableMacroAwareTranscodeWithoutInlining( + InputType inputType, + StreamType inputFormat, + StreamType outputFormat + ) throws Exception { + byte[] data = macroInvocationsProduceEncodingDirectivesThatModifySymbolTable(inputFormat); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try ( + MacroAwareIonReader reader = inputType.newMacroAwareReader(data); + MacroAwareIonWriter rewriter = (MacroAwareIonWriter) (outputFormat == StreamType.TEXT + ? IonEncodingVersion.ION_1_1.textWriterBuilder().withSymbolInliningStrategy(SymbolInliningStrategy.NEVER_INLINE).build(out) + : IonEncodingVersion.ION_1_1.binaryWriterBuilder().withSymbolInliningStrategy(SymbolInliningStrategy.NEVER_INLINE).build(out)) + ) { + reader.transcodeAllTo(rewriter); + } + verifyStream(data, out, outputFormat, + substringCount("$ion_1_1", 1), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 1), + substringCount(SystemSymbols_1_1.ADD_MACROS, 0), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 2), + substringCount(SystemSymbols_1_1.SET_MACROS, 0), + substringCount(DEFAULT_MODULE_DIRECTIVE_PREFIX, 0), + // Symbol tokens are written using symbol identifiers, not inline text. + substringCount("$1", 3), + substringCount("$2", 2), + substringCount("$3", 1) + ); + } + + private static Map systemSymbols() { + return makeSymbolsMap(FIRST_LOCAL_SYMBOL_ID, SystemSymbols_1_1.allSymbolTexts().toArray(new String[0])); + } + + private static byte[] macroInvocationsProduceEncodingDirectivesThatModifyMacroTable(StreamType streamType) { + BigDecimal pi = new BigDecimal("3.14159"); + SortedMap macroTable = new TreeMap<>(); + macroTable.put("Pi", new TemplateMacro( + Collections.emptyList(), + Collections.singletonList(new Expression.DecimalValue(Collections.emptyList(), pi)) + )); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + writer.writeIVM(); + + Map symbols = systemSymbols(); + writeSymbolTableAppendEExpression(writer, symbols, "Pi"); // appends Pi after the system symbols. + + writer.stepInEExp(SystemMacro.AddMacros); + startMacro(writer, symbols, "Pi"); + writeMacroSignature(writer, symbols); // Empty signature + writer.writeDecimal(pi); + endMacro(writer); + writer.stepOut(); + + writer.writeSymbol(SystemSymbols_1_1.size() + FIRST_LOCAL_SYMBOL_ID); // Pi + streamType.startMacroInvocationByName(writer, "Pi", macroTable); + writer.stepOut(); + + symbols = writeSymbolTableSetEExpression(writer, "Pi", "foo"); + + macroTable.put("foo", new TemplateMacro( + Collections.emptyList(), + Collections.singletonList(new Expression.StringValue(Collections.emptyList(), "bar")) + )); + + writer.stepInEExp(SystemMacro.AddMacros); + startMacro(writer, symbols, "foo"); + writeMacroSignature(writer, symbols); // Empty signature + writer.writeString("bar"); + endMacro(writer); + writer.stepOut(); + + writer.stepInEExp(1, false, macroTable.get("foo")); // ID 1 because Pi (ID 0) is still in the table. + writer.stepOut(); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID); // Now "Pi" because SetSymbols was used, replacing the system symbols. + + writer.stepInEExp(SystemMacro.SetMacros); + startMacro(writer, symbols, "foo"); + writeMacroSignature(writer, symbols); // Empty signature + writer.writeString("baz"); + endMacro(writer); + writer.stepOut(); + + writer.stepInEExp(0, false, macroTable.get("foo")); // ID 0 now because SetMacros was used. + writer.stepOut(); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID + 1); // Still foo because AddMacros/SetMacros does not mutate the symbol table. + + return getBytes(writer, out); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void macroInvocationsProduceEncodingDirectivesThatModifyMacroTable(InputType inputType, StreamType streamType) throws Exception { + byte[] data = macroInvocationsProduceEncodingDirectivesThatModifyMacroTable(streamType); + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("Pi", reader.stringValue()); + assertEquals(IonType.DECIMAL, reader.next()); + assertEquals(new BigDecimal("3.14159"), reader.bigDecimalValue()); + + assertEquals(IonType.STRING, reader.next()); + assertEquals("bar", reader.stringValue()); + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("Pi", reader.stringValue()); + + assertEquals(IonType.STRING, reader.next()); + assertEquals("baz", reader.stringValue()); + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("foo", reader.stringValue()); + + assertNull(reader.next()); + } + } + + @ParameterizedTest(name = "{0},{1},{2}") + @MethodSource("allInputFormatsInputTypesAndOutputFormats") + public void macroInvocationsProduceEncodingDirectivesThatModifyMacroTableMacroAwareTranscode( + InputType inputType, + StreamType inputFormat, + StreamType outputFormat + ) throws Exception { + byte[] data = macroInvocationsProduceEncodingDirectivesThatModifyMacroTable(inputFormat); + verifyMacroAwareTranscode(data, inputType, outputFormat, + substringCount("$ion_1_1", 1), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 1), + substringCount(SystemSymbols_1_1.ADD_MACROS, 2), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 1), + substringCount(SystemSymbols_1_1.SET_MACROS, 1), + substringCount(DEFAULT_MODULE_DIRECTIVE_PREFIX, 0) + ); + } + + @ParameterizedTest(name = "{0},{1},{2}") + @MethodSource("allInputFormatsInputTypesAndOutputFormats") + public void multiValuePartialMacroAwareTranscode( + InputType inputType, + StreamType inputFormat, + StreamType outputFormat + ) throws Exception { + byte[] data = macroInvocationsProduceEncodingDirectivesThatModifyMacroTable(inputFormat); + ByteArrayOutputStream out = macroAwareTranscodeValueByValue(data, inputType, outputFormat, 2, false); + + verifyStream("Pi 3.14159".getBytes(StandardCharsets.UTF_8), out, outputFormat, + substringCount("$ion_1_1", 1), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 1), + substringCount(SystemSymbols_1_1.ADD_MACROS, 1), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.SET_MACROS, 0), + substringCount(DEFAULT_MODULE_DIRECTIVE_PREFIX, 0), + substringCount("(:Pi)", 1), + substringCount("(:foo)", 0) + ); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void multipleListsWithinSymbolTableDeclaration(InputType inputType, StreamType streamType) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + writer.writeIVM(); + + startModuleDirectiveForDefaultModule(writer); + writer.stepInSExp(false); + writer.writeSymbol(SystemSymbols_1_1.SYMBOL_TABLE); + writer.stepInList(false); + writer.writeString("foo"); + writer.stepOut(); + writer.stepInList(true); + writer.writeString("bar"); + writer.stepOut(); + writer.stepOut(); + endEncodingDirective(writer); + + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID + 1); + writer.writeSymbol(FIRST_LOCAL_SYMBOL_ID); + + byte[] data = getBytes(writer, out); + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("bar", reader.stringValue()); + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("foo", reader.stringValue()); + + assertNull(reader.next()); + } + } + + private byte[] emptyMacroAppendToEmptyTable(StreamType streamType) { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + writer.writeIVM(); + + startModuleDirectiveForDefaultModule(writer); + startMacroTable(writer); + writer.writeSymbol(SystemSymbols.DEFAULT_MODULE); + endMacroTable(writer); + endEncodingDirective(writer); + + return getBytes(writer, out); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void emptyMacroAppendToEmptyTable(InputType inputType, StreamType streamType) throws Exception { + byte[] data = emptyMacroAppendToEmptyTable(streamType); + try (IonReader reader = inputType.newReader(data)) { + assertNull(reader.next()); + } + } + + @ParameterizedTest(name = "{0},{1},{2}") + @MethodSource("allInputFormatsInputTypesAndOutputFormats") + public void emptyMacroAppendToEmptyTableMacroAwareTranscode( + InputType inputType, + StreamType inputFormat, + StreamType outputFormat + ) throws Exception { + byte[] data = emptyMacroAppendToEmptyTable(inputFormat); + verifyMacroAwareTranscode(data, inputType, outputFormat, + substringCount("$ion_1_1", 1), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.ADD_MACROS, 0), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.SET_MACROS, 0), + substringCount(DEFAULT_MODULE_DIRECTIVE_PREFIX, 0) // The empty append to an empty table has no effect, and it is not transcoded. This is a known limitation. + ); + } + + private byte[] emptyMacroAppendToNonEmptyTable(StreamType streamType) { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + writer.writeIVM(); + + SortedMap macroTable = new TreeMap<>(); + macroTable.put("foo", new TemplateMacro( + Collections.singletonList(new Macro.Parameter("foo", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ExactlyOne)), + Collections.singletonList(new Expression.VariableRef(0)) + )); + Map symbols = Collections.emptyMap(); + + startModuleDirectiveForDefaultModule(writer); { + startMacroTable(writer); { + startMacro(writer, symbols, "foo"); { + writeMacroSignature(writer, symbols, "x"); + writeVariableExpansion(writer, symbols, "x"); + } endMacro(writer); + } endMacroTable(writer); + } endEncodingDirective(writer); + + + startModuleDirectiveForDefaultModule(writer); { + startMacroTable(writer); { + writer.writeSymbol(SystemSymbols.DEFAULT_MODULE); + } endMacroTable(writer); + writeEncodingDirectiveSymbolTable(writer, true, "bar"); + } endEncodingDirective(writer); + + writer.stepInEExp(0, true, macroTable.get("foo")); { + writer.writeSymbol(1); + } writer.stepOut(); + + return getBytes(writer, out); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void emptyMacroAppendToNonEmptyTable(InputType inputType, StreamType streamType) throws Exception { + byte[] data = emptyMacroAppendToNonEmptyTable(streamType); + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("bar", reader.stringValue()); + } + } + + @ParameterizedTest(name = "{0},{1},{2}") + @MethodSource("allInputFormatsInputTypesAndOutputFormats") + public void emptyMacroAppendToNonEmptyTableMacroAwareTranscode( + InputType inputType, + StreamType inputFormat, + StreamType outputFormat + ) throws Exception { + byte[] data = emptyMacroAppendToNonEmptyTable(inputFormat); + verifyMacroAwareTranscode(data, inputType, outputFormat, + substringCount("$ion_1_1", 1), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.ADD_MACROS, 0), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.SET_MACROS, 0), + substringCount(DEFAULT_MODULE_DIRECTIVE_PREFIX, 2) // Two encoding directives + ); + } + + private byte[] invokeUnqualifiedSystemMacroInTDL(StreamType streamType) { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + writer.writeIVM(); + + SortedMap macroTable = new TreeMap<>(); + macroTable.put("foo", new TemplateMacro( + Collections.singletonList(new Macro.Parameter("x", Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ZeroOrMore)), + Arrays.asList( + new Expression.MacroInvocation(SystemMacro.Default, 0, 3), + new Expression.VariableRef(0), + new Expression.StringValue(Collections.emptyList(), "hello world") + ) + )); + Map symbols = Collections.emptyMap(); + + + startModuleDirectiveForDefaultModule(writer); { + startMacroTable(writer); { + // Define our macro (macro foo (x) (.default (%x) "hello world")) + startMacro(writer, symbols, "foo"); { + writeMacroSignatureFromDatagram(writer, symbols, LOADER.load("x '*'")); + stepInTdlMacroInvocation(writer, (int) SystemMacro.Default.getId()); { + writeVariableExpansion(writer, symbols, "x"); + writer.writeString("hello world"); + } endMacro(writer); // (.default + } endMacro(writer); // (macro foo + } endMacroTable(writer); + } endEncodingDirective(writer); + + // Invoke (:foo) with no parameter + writer.stepInEExp(0, true, macroTable.get("foo")); { + } writer.stepOut(); + + return getBytes(writer, out); + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void invokeUnqualifiedSystemMacroInTDL(InputType inputType, StreamType streamType) throws Exception { + byte[] data = invokeUnqualifiedSystemMacroInTDL(streamType); + try (IonReader reader = inputType.newReader(data)) { + assertEquals(IonType.STRING, reader.next()); + assertEquals("hello world", reader.stringValue()); + } + } + + @ParameterizedTest(name = "{0},{1},{2}") + @MethodSource("allInputFormatsInputTypesAndOutputFormats") + public void invokeUnqualifiedSystemMacroInTDLMacroAwareTranscode( + InputType inputType, + StreamType inputFormat, + StreamType outputFormat + ) throws Exception { + byte[] data = invokeUnqualifiedSystemMacroInTDL(inputFormat); + verifyMacroAwareTranscode(data, inputType, outputFormat, + substringCount("$ion_1_1", 1), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.ADD_MACROS, 0), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.SET_MACROS, 0), + substringCount(DEFAULT_MODULE_DIRECTIVE_PREFIX, 1) + ); + } + + @ParameterizedTest(name = "{0},{1},{2}") + @MethodSource("allInputFormatsInputTypesAndOutputFormats") + public void multipleIonVersionMarkersMacroAwareTranscode( + InputType inputType, + StreamType inputFormat, + StreamType outputFormat + ) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = inputFormat.newWriter(out); + Map symbols = new HashMap<>(); + writer.writeIVM(); + writeSymbolTableAppendEExpression(writer, symbols, "foo"); + writer.writeSymbol(SystemSymbols_1_1.size() + FIRST_LOCAL_SYMBOL_ID); // foo + writer.writeIVM(); + writeSymbolTableAppendEExpression(writer, symbols, "bar"); // bar + writer.writeSymbol(SystemSymbols_1_1.size() + FIRST_LOCAL_SYMBOL_ID); + byte[] data = getBytes(writer, out); + verifyMacroAwareTranscode(data, inputType, outputFormat, + substringCount("$ion_1_1", 2), + substringCount(SystemSymbols_1_1.ADD_SYMBOLS, 2), + substringCount(SystemSymbols_1_1.ADD_MACROS, 0), + substringCount(SystemSymbols_1_1.SET_SYMBOLS, 0), + substringCount(SystemSymbols_1_1.SET_MACROS, 0), + substringCount(DEFAULT_MODULE_DIRECTIVE_PREFIX, 0) + ); + } + + // TODO finalize handling of Ion 1.0-style symbol tables in Ion 1.1: https://github.com/amazon-ion/ion-java/issues/1002 + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void ion10SymbolTableMacroAwareTranscode(InputType inputType, StreamType outputFormat) throws Exception { + byte[] data = bytes( + 0xE0, 0x01, 0x01, 0xEA, // Ion 1.1 IVM + 0xE4, 0x07, // $ion_symbol_table:: + 0xD4, // { + 0x0F, // symbols: + 0xB2, // [ + 0x91, 'a', // "a" + // ]} + 0xE1, 0x01 + ); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try ( + MacroAwareIonReader reader = inputType.newMacroAwareReader(data); + MacroAwareIonWriter rewriter = outputFormat.newMacroAwareWriter(out); + ) { + // This may at some point be supported. + assertThrows(IonException.class, () -> reader.transcodeAllTo(rewriter)); + } + } + + // TODO cover every Ion type + // TODO annotations in macro definition (using 'annotate' system macro) + // TODO test error conditions + // TODO support continuable and lazy evaluation + // TODO early step-out of evaluation; skipping evaluation. + // TODO ZeroOrOne and ExactlyOne cardinality parameter with single-element group (legal?) +} diff --git a/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java index 4708b49ba1..c9dc13093e 100644 --- a/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java @@ -5,18 +5,31 @@ import com.amazon.ion.IonBufferConfiguration; import com.amazon.ion.IonCursor; import com.amazon.ion.IonException; +import com.amazon.ion.IonType; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; import org.junit.jupiter.params.provider.ValueSource; import java.io.ByteArrayInputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Consumer; +import java.util.function.Function; +import java.util.function.Supplier; import static com.amazon.ion.BitUtils.bytes; import static com.amazon.ion.IonCursor.Event.NEEDS_DATA; +import static com.amazon.ion.IonCursor.Event.NEEDS_INSTRUCTION; import static com.amazon.ion.IonCursor.Event.VALUE_READY; import static com.amazon.ion.IonCursor.Event.START_CONTAINER; import static com.amazon.ion.IonCursor.Event.START_SCALAR; +import static com.amazon.ion.TestUtils.cleanCommentedHexBytes; +import static com.amazon.ion.TestUtils.hexStringToByteArray; +import static com.amazon.ion.TestUtils.withIvm; import static com.amazon.ion.impl.IonCursorTestUtilities.STANDARD_BUFFER_CONFIGURATION; import static com.amazon.ion.impl.IonCursorTestUtilities.Expectation; import static com.amazon.ion.impl.IonCursorTestUtilities.ExpectationProvider; @@ -33,26 +46,73 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; public class IonCursorBinaryTest { - private static IonCursorBinary initializeCursor(boolean constructFromBytes, int... data) { + private static IonCursorBinary initializeCursor(IonBufferConfiguration configuration, boolean constructFromBytes, byte[] data) { IonCursorBinary cursor; if (constructFromBytes) { - cursor = new IonCursorBinary(STANDARD_BUFFER_CONFIGURATION, bytes(data), 0, data.length); + cursor = new IonCursorBinary(configuration, data, 0, data.length); } else { cursor = new IonCursorBinary( - STANDARD_BUFFER_CONFIGURATION, - new ByteArrayInputStream(bytes(data)), + configuration, + new ByteArrayInputStream(data), null, 0, 0 ); } - cursor.registerOversizedValueHandler(STANDARD_BUFFER_CONFIGURATION.getOversizedValueHandler()); + cursor.registerOversizedValueHandler(configuration.getOversizedValueHandler()); return cursor; } + private static IonCursorBinary initializeCursor(boolean constructFromBytes, int... data) { + return initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, bytes(data)); + } + + public enum InputType { + + /** + * The cursor will be constructed from a fixed byte array. + */ + FIXED_BYTES { + @Override + IonCursorBinary initializeCursor(byte[] data) { + return IonCursorBinaryTest.initializeCursor(STANDARD_BUFFER_CONFIGURATION, true, data); + } + }, + + /** + * The cursor will be constructed from an InputStream with all bytes available up front. + */ + FIXED_STREAM { + @Override + IonCursorBinary initializeCursor(byte[] data) { + return IonCursorBinaryTest.initializeCursor(STANDARD_BUFFER_CONFIGURATION, false, data); + } + }, + + /** + * The cursor will be constructed from an InputStream that is fed bytes one by one, expecting NEEDS_DATA + * after each byte except the final one. + */ + INCREMENTAL { + @Override + IonCursorBinary initializeCursor(byte[] data) { + ResizingPipedInputStream pipe = new ResizingPipedInputStream(data.length); + IonCursorBinary cursor = new IonCursorBinary(STANDARD_BUFFER_CONFIGURATION, pipe, null, 0, 0); + for (byte b : data) { + assertEquals(NEEDS_DATA, cursor.nextValue()); + pipe.receive(b); + } + return cursor; + } + }; + + abstract IonCursorBinary initializeCursor(byte[] data); + } + /** * Provides Expectations that verify that advancing the cursor to the next value results in the given event, and * filling that value results in a Marker with the given start and end indices. @@ -70,6 +130,33 @@ private static ExpectationProvider fill(IonCursor.Event expecte )); } + /** + * Provides Expectations that verify that advancing the cursor to the next value results in the given event, and + * attempting to fill that value results in NEEDS_INSTRUCTION, indicating that the value could not be filled due + * to being oversize. + */ + private static ExpectationProvider fillIsOversize(IonCursor.Event expectedEvent, Supplier oversizeCounter) { + return consumer -> consumer.accept(new Expectation<>( + String.format("fillOversized(%s)", expectedEvent), + cursor -> { + assertEquals(expectedEvent, cursor.nextValue()); + assertEquals(NEEDS_INSTRUCTION, cursor.fillValue()); + assertEquals(1, oversizeCounter.get()); + } + )); + } + + /** + * Provides an Expectation that verifies that the value on which the cursor is currently positioned has the given + * type. + */ + static ExpectationProvider type(IonType expectedType) { + return consumer -> consumer.accept(new Expectation<>( + String.format("type(%s)", expectedType), + cursor -> assertEquals(expectedType, cursor.getValueMarker().typeId.type)) + ); + } + /** * Provides Expectations that verify that advancing the cursor positions it on a scalar, and filling that scalar * results in a Marker with the given start and end indices. @@ -257,6 +344,36 @@ public void fillContainerAtDepth0(boolean constructFromBytes) { ); } + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void fillDelimitedContainerAtDepth0(boolean constructFromBytes) { + IonCursorBinary cursor = initializeCursor( + constructFromBytes, + 0xE0, 0x01, 0x01, 0xEA, + 0xF3, // Delimited struct + 0x07, // Field SID 3 + 0xF1, // Delimited list, contents start at index 7 + 0x6A, // Float length 0 + 0xF0, // End delimited list + 0x09, // Field SID 4 + 0x61, 0x01, // Int length 1, starting at byte index 11 + 0x01, 0xF0 // End delimited struct + ); + assertSequence( + cursor, + // When reading from a fixed-size input source, the cursor does not need peek ahead to find the end of + // the delimited container during fill, so it remains -1 in that case. Otherwise, fill looks ahead to + // find the end index and stores in the index so that it does not need to be repetitively calculated. + fillContainer(5, constructFromBytes ? -1 : 14, + container( + scalar() + ), + fillScalar(11, 12) + ), + endStream() + ); + } + @ParameterizedTest(name = "constructFromBytes={0}") @ValueSource(booleans = {true, false}) public void fillContainerAtDepth1(boolean constructFromBytes) { @@ -281,6 +398,78 @@ public void fillContainerAtDepth1(boolean constructFromBytes) { ); } + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void fillDelimitedContainerAtDepth1(boolean constructFromBytes) { + IonCursorBinary cursor = initializeCursor( + constructFromBytes, + 0xE0, 0x01, 0x01, 0xEA, + 0xF3, // Delimited struct + 0x07, // Field SID 3 + 0xF1, // Delimited list, contents start at index 7 + 0x6A, // Float length 0 + 0xF0, // End delimited list + 0x09, // Field SID 4 + 0x61, 0x01, // Int length 1, starting at byte index 11 + 0x01, 0xF0 // End delimited struct + ); + assertSequence( + cursor, + container( + // When reading from a fixed-size input source, the cursor does not need peek ahead to find the end of + // the delimited container during fill, so it remains -1 in that case. Otherwise, fill looks ahead to + // find the end index and stores in the index so that it does not need to be repetitively calculated. + fillContainer(7, constructFromBytes ? -1 : 9, + scalar(), + endContainer() + ) + ) + ); + } + + @Test + public void skipOversizeDelimitedContainerAtDepth1() { + AtomicInteger oversizeValueCounter = new AtomicInteger(0); + AtomicInteger oversizeSymbolTableCounter = new AtomicInteger(0); + AtomicInteger byteCounter = new AtomicInteger(0); + byte[] data = bytes( + 0xE0, 0x01, 0x01, 0xEA, + 0xF3, // Delimited struct + 0x07, // Field SID 3 + 0xF1, // Delimited list, contents start at index 7 + 0x6A, 0x6A, 0x6A, 0x6A, 0x6A, 0x6A, // Six floats 0e0 + 0xF0, // End delimited list + 0x09, // Field SID 4 + 0x61, 0x01, // Int length 1, starting at byte index 16 + 0x01, 0xF0 // End delimited struct + ); + IonCursorBinary cursor = initializeCursor( + IonBufferConfiguration.Builder.standard() + .withInitialBufferSize(5) + .withMaximumBufferSize(5) + .onData(byteCounter::addAndGet) + .onOversizedValue(oversizeValueCounter::incrementAndGet) + .onOversizedSymbolTable(oversizeSymbolTableCounter::incrementAndGet) + .build(), + false, + data + ); + assertSequence( + cursor, + container( + // The oversize delimited list is skipped. + fillIsOversize(START_CONTAINER, oversizeValueCounter::get), + scalar(), type(IonType.INT), + endContainer() + ), + endStream() + ); + cursor.close(); + assertEquals(1, oversizeValueCounter.get()); + assertEquals(0, oversizeSymbolTableCounter.get()); + assertEquals(data.length, byteCounter.get()); + } + @ParameterizedTest(name = "constructFromBytes={0}") @ValueSource(booleans = {true, false}) public void fillContainerThenSkip(boolean constructFromBytes) { @@ -307,6 +496,38 @@ public void fillContainerThenSkip(boolean constructFromBytes) { ); } + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void fillDelimitedContainerThenSkip(boolean constructFromBytes) { + IonCursorBinary cursor = initializeCursor( + constructFromBytes, + 0xE0, 0x01, 0x01, 0xEA, + 0xF3, // Delimited struct + 0x07, // Field SID 3 + 0xF1, // Delimited list, contents start at index 7 + 0x6A, // Float length 0 + 0xF0, // End delimited list + 0x09, // Field SID 4 + 0x61, 0x01, // Int length 1, starting at byte index 11 + 0x01, 0xF0, // End delimited struct + 0xF3, // Delimited struct + 0x09, // Field SID 4 + 0x60, // Int length 0, at byte index 17 + 0x01, 0xF0 // End delimited struct + ); + assertSequence( + cursor, + // When reading from a fixed-size input source, the cursor does not need peek ahead to find the end of + // the delimited container during fill, so it remains -1 in that case. Otherwise, fill looks ahead to + // find the end index and stores in the index so that it does not need to be repetitively calculated. + fill(START_CONTAINER, 5, constructFromBytes ? -1 : 14), + container( + fillScalar(17, 17) + ), + endStream() + ); + } + @Test public void expectMalformedListHeaderToFailCleanly() { // The following test is expected to fail because the VarUInt length would extend beyond the end of the buffer. @@ -548,4 +769,1024 @@ public void annotationWrapperLengthZeroFailsCleanly(boolean constructFromBytes) assertThrows(IonException.class, cursor::nextValue); } } + + /** + * Asserts that the given data contains macro invocation that matches the given attributes. + * @param input the data (without IVM) to test. + * @param inputType the type of input to provide to the cursor. + * @param expectedStartIndex the expected start index of the invocation's body. + * @param expectedEndIndex the expected end index of the invocation's body, or -1 if the end index cannot be + * computed from the encoding alone. + * @param expectedId the ID of the macro being invoked. + * @param isSystemInvocation whether the invocation is of a system macro. + */ + private static void testMacroInvocation( + byte[] input, + InputType inputType, + int expectedStartIndex, + int expectedEndIndex, + int expectedId, + boolean isSystemInvocation + ) throws Exception { + try (IonCursorBinary cursor = inputType.initializeCursor(withIvm(1, input))) { + assertEquals(NEEDS_INSTRUCTION, cursor.nextValue()); + Marker invocationMarker = cursor.getValueMarker(); + assertTrue(invocationMarker.typeId.isMacroInvocation); + assertEquals(expectedStartIndex, invocationMarker.startIndex); + assertEquals(expectedEndIndex, invocationMarker.endIndex); + assertEquals(expectedId, cursor.getMacroInvocationId()); + assertEquals(isSystemInvocation, cursor.isSystemInvocation()); + } + } + + @ParameterizedTest(name = "inputType={0}") + @EnumSource(InputType.class) + public void macroInvocationWithIdInOpcode(InputType inputType) throws Exception { + // Opcode 0x13 -> macro ID 0x13 + testMacroInvocation(bytes(0x13), inputType, 5, -1, 0x13, false); + } + + @ParameterizedTest(name = "inputType={0}") + @EnumSource(InputType.class) + public void macroInvocationWithOneByteFixedUIntId(InputType inputType) throws Exception { + // Opcode 0x43; 1-byte FixedUInt 0x09 follows + testMacroInvocation(bytes(0x43, 0x09), inputType, 6, -1, 841, false); + } + + @ParameterizedTest(name = "inputType={0}") + @EnumSource(InputType.class) + public void macroInvocationWithTwoByteFixedUIntId(InputType inputType) throws Exception { + // Opcode 0x52; 2-byte FixedUInt 0x06, 0x1E follows + testMacroInvocation(bytes(0x52, 0x06, 0x1E), inputType, 7, -1, 142918, false); + } + + @ParameterizedTest(name = "inputType={0}") + @EnumSource(InputType.class) + public void macroInvocationWithFlexUIntId(InputType inputType) throws Exception { + // Opcode 0xF4; 3-byte FlexUInt 0xFC, 0xFF, 0xFF follows + testMacroInvocation(bytes(0xF4, 0xFC, 0xFF, 0xFF), inputType, 8, -1, 2097151, false); + } + + @ParameterizedTest(name = "inputType={0}") + @EnumSource(InputType.class) + public void macroInvocationLengthPrefixed(InputType inputType) throws Exception { + // Opcode 0xF5; FlexUInt ID 2 followed by FlexUInt length 0 + testMacroInvocation(bytes(0xF5, 0x05, 0x01), inputType, 7, 7, 2, false); + } + + @ParameterizedTest(name = "inputType={0}") + @EnumSource(InputType.class) + public void systemMacroInvocation(InputType inputType) throws Exception { + // Opcode 0xEF; 1-byte FixedInt follows. Positive 4 indicates system macro ID 4. + testMacroInvocation(bytes(0xEF, 0x04), inputType, 6, -1, 4, true); + } + + /** + * Asserts that the given cursor's current value marker has the given attributes. + */ + private static void assertValueMarker(IonCursorBinary cursor, IonType expectedType, int expectedStartIndex, int expectedEndIndex) { + Marker marker = cursor.getValueMarker(); + if (expectedType == null) { + assertTrue(marker.typeId == null || marker.typeId.type == null); + } else { + assertEquals(expectedType, marker.typeId.type); + } + assertEquals(expectedStartIndex, marker.startIndex); + assertEquals(expectedEndIndex, marker.endIndex); + } + + /** + * Provides Expectations that verify that the cursor is currently positioned at a value marker with the given attributes. + */ + private static ExpectationProvider valueMarker(IonType expectedType, int expectedStartIndex, int expectedEndIndex) { + return consumer -> consumer.accept(new Expectation<>( + String.format("%s[%d,%d]", expectedType, expectedStartIndex, expectedEndIndex), + cursor -> assertValueMarker(cursor, expectedType, expectedStartIndex, expectedEndIndex) + )); + } + + /** + * Provides Expectations that verify that the cursor is currently positioned at a value with a field text marker + * that matches the given attributes. + */ + static ExpectationProvider fieldNameText(int expectedStartIndex, int expectedEndIndex) { + return consumer -> consumer.accept(new Expectation<>( + String.format("field(%d, %d)", expectedStartIndex, expectedEndIndex), + cursor -> { + assertEquals(expectedStartIndex, cursor.fieldTextMarker.startIndex); + assertEquals(expectedEndIndex, cursor.fieldTextMarker.endIndex); + } + )); + } + + /** + * Provides Expectations that verify that the cursor is currently positioned at a value with annotation token + * markers that match the start and end indices of the given markers. + */ + static ExpectationProvider annotationTokens(Marker... expectedMarkers) { + return consumer -> consumer.accept(new Expectation<>( + String.format("annotations%s", Arrays.toString(expectedMarkers)), + cursor -> { + assertTrue(cursor.hasAnnotations()); + for (int i = 0; i < cursor.annotationTokenMarkers.size(); i++) { + Marker annotationTokenMarker = cursor.annotationTokenMarkers.get(i); + assertEquals(expectedMarkers[i].startIndex, annotationTokenMarker.startIndex); + assertEquals(expectedMarkers[i].endIndex, annotationTokenMarker.endIndex); + } + } + )); + } + + /** + * Provides Expectations that verify that the cursor is currently at a value with the given attributes that has been + * successfully filled. + */ + private static ExpectationProvider valueReady(IonType expectedType, int expectedStartIndex, int expectedEndIndex) { + return consumer -> consumer.accept(new Expectation<>( + String.format("ready: %s[%d,%d]", expectedType, expectedStartIndex, expectedEndIndex), + cursor -> { + assertEquals(VALUE_READY, cursor.getCurrentEvent()); + assertValueMarker(cursor, expectedType, expectedStartIndex, expectedEndIndex); + } + )); + } + + /** + * Provides Expectations that verify that the cursor's current event matches the expected event. + */ + private static ExpectationProvider event(IonCursor.Event expectedEvent) { + return consumer -> consumer.accept(new Expectation<>( + String.format("%s", expectedEvent), + cursor -> { + assertEquals(expectedEvent, cursor.getCurrentEvent()); + } + )); + } + + /** + * Provides Expectations that verify that the cursor is currently positioned at macro invocation with the given ID. + */ + private static ExpectationProvider macroInvocation(int id) { + return consumer -> consumer.accept(new Expectation<>( + String.format("macro invocation %d", id), + cursor -> { + assertEquals(NEEDS_INSTRUCTION, cursor.getCurrentEvent()); + assertTrue(cursor.getValueMarker().typeId.isMacroInvocation); + assertEquals(id, cursor.getMacroInvocationId()); + } + )); + } + + /** + * Provides Expectations that advance the reader to the next value and verify that it is a macro invocation with + * the given ID. + */ + static ExpectationProvider nextMacroInvocation(int id) { + return consumer -> consumer.accept(new Expectation<>( + String.format("next macro invocation %d", id), + cursor -> { + assertEquals(NEEDS_INSTRUCTION, cursor.nextValue()); + assertTrue(cursor.getValueMarker().typeId.isMacroInvocation); + assertEquals(id, cursor.getMacroInvocationId()); + } + )); + } + + /** + * Provides Expectations that advance the reader to the next tagless value and verify that it has the given + * attributes. + */ + private static ExpectationProvider nextTaglessValue(TaglessEncoding taglessEncoding, IonType expectedType, int expectedStartIndex, int expectedEndIndex) { + return consumer -> consumer.accept(new Expectation<>( + String.format("next tagless %s", taglessEncoding.name()), + cursor -> { + assertEquals(START_SCALAR, cursor.nextTaglessValue(taglessEncoding)); + assertValueMarker(cursor, expectedType, expectedStartIndex, expectedEndIndex); + } + )); + } + + /** + * Provides Expectations that fill the argument encoding bitmap (AEB) at the cursor's current index and verify that + * the AEB has the given start and end indices. + */ + private static ExpectationProvider fillArgumentEncodingBitmap(int numberOfBytes, int expectedStartIndex, int expectedEndIndex) { + return consumer -> consumer.accept(new Expectation<>( + String.format("next %d-byte AEB", numberOfBytes), + cursor -> { + assertEquals(NEEDS_INSTRUCTION, cursor.fillArgumentEncodingBitmap(numberOfBytes)); + assertValueMarker(cursor, null, expectedStartIndex, expectedEndIndex); + } + )); + } + + /** + * Provides Expectations that advance the reader to the next tagless value, fill the value, and verify that it has + * the given attributes. + */ + private static ExpectationProvider fillNextTaglessValue(TaglessEncoding taglessEncoding, IonType expectedType, int expectedStartIndex, int expectedEndIndex) { + return consumer -> consumer.accept(new Expectation<>( + String.format("fill tagless %s", taglessEncoding.name()), + cursor -> { + assertEquals(START_SCALAR, cursor.nextTaglessValue(taglessEncoding)); + assertEquals(VALUE_READY, cursor.fillValue()); + assertValueMarker(cursor, expectedType, expectedStartIndex, expectedEndIndex); + } + )); + } + + /** + * Provides Expectations that verify the cursor is positioned on a container with the given attributes, then step + * into the container. + */ + private static ExpectationProvider stepInToContainer(IonType expectedType, int expectedStartIndex, int expectedEndIndex) { + return consumer -> consumer.accept(new Expectation<>( + String.format("stepIn: %s[%d,%d]", expectedType, expectedStartIndex, expectedEndIndex), + cursor -> { + assertEquals(START_CONTAINER, cursor.getCurrentEvent()); + assertValueMarker(cursor, expectedType, expectedStartIndex, expectedEndIndex); + assertEquals(NEEDS_INSTRUCTION, cursor.stepIntoContainer()); + } + )); + } + + /** + * Provides Expectations that step out of a container and verify that the resulting event is NEEDS_INSTRUCTION. + */ + private static ExpectationProvider stepOutOfContainer() { + return consumer -> consumer.accept(new Expectation<>( + "stepOut", + cursor -> assertEquals(NEEDS_INSTRUCTION, cursor.stepOutOfContainer()) + )); + } + + /** + * Provides a single Expectation that performs the function of each expectation in the given sequence. + * @param expectationProviders the expectations to perform in one step. + * @return the composite ExpectationProvider. + */ + @SafeVarargs + private static ExpectationProvider allOf(ExpectationProvider... expectationProviders) { + List> expectations = new ArrayList<>(expectationProviders.length); + Arrays.stream(expectationProviders).forEach(provider -> provider.accept(expectations::add)); + return consumer -> consumer.accept(new Expectation<>( + String.format("%s", expectations), + cursor -> { + for (Expectation expectation : expectations) { + expectation.test(cursor); + } + } + )); + } + + /** + * Provides Expectations that advance the reader to the next tagged value and verify that it has the given + * attributes. + */ + private static ExpectationProvider nextTaggedValue(IonType expectedType, int expectedStartIndex, int expectedEndIndex) { + return scalar(new Expectation<>( + String.format("next tagged %s", expectedType), + cursor -> assertValueMarker(cursor, expectedType, expectedStartIndex, expectedEndIndex) + )); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void taglessInts(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0x00, // User macro ID 0 + 0xFF, // Interpreted as uint8 + 0xFF, 0xFF, // Interpreted as int16 + 0xFF, 0xFF, 0xFF, 0xFF, // Interpreted as uint32 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // Interpreted as int64 + 0xFC, 0xFF, 0xFF, // Interpreted as flex_uint + 0xFC, 0xFF, 0xFF // Interpreted as flex_int + )); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertSequence( + cursor, + nextMacroInvocation(0), + nextTaglessValue(TaglessEncoding.UINT8, IonType.INT, 5, 6), + nextTaglessValue(TaglessEncoding.INT16, IonType.INT, 6, 8), + nextTaglessValue(TaglessEncoding.UINT32, IonType.INT, 8, 12), + nextTaglessValue(TaglessEncoding.INT64, IonType.INT, 12, 20), + nextTaglessValue(TaglessEncoding.FLEX_UINT, IonType.INT, 20, 23), + nextTaglessValue(TaglessEncoding.FLEX_INT, IonType.INT, 23, 26), + endStream() + ); + } + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void taglessFloats(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0x00, // User macro ID 0 + 0x00, 0x3C, // Interpreted as float16 (1.0) + 0x00, 0x00, 0x80, 0x3F, // Interpreted as float32 (1.0) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x3F // Interpreted as float64 (1.0) + )); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertSequence( + cursor, + nextMacroInvocation(0), + nextTaglessValue(TaglessEncoding.FLOAT16, IonType.FLOAT, 5, 7), + nextTaglessValue(TaglessEncoding.FLOAT32, IonType.FLOAT, 7, 11), + nextTaglessValue(TaglessEncoding.FLOAT64, IonType.FLOAT, 11, 19), + endStream() + ); + } + } + + @Disabled("Until we fix the 'FIXME' in IonReaderContinuableCore") + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void taglessCompactSymbols(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0x00, // User macro ID 0 + 0xF9, 0x6E, 0x61, 0x6D, 0x65, // interpreted as compact symbol (FlexSym with inline text "name") + 0x09, // interpreted as compact symbol (FlexSym with SID 4) + 0x01, 0x75 // interpreted as compact symbol (special FlexSym) + )); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertSequence( + cursor, + nextMacroInvocation(0), + nextTaglessValue(TaglessEncoding.FLEX_SYM, IonType.SYMBOL, 6, 10), + nextTaglessValue(TaglessEncoding.FLEX_SYM, IonType.SYMBOL, 10, 11), + nextTaglessValue(TaglessEncoding.FLEX_SYM, IonType.SYMBOL, 13, 13), + endStream() + ); + } + } + + private static byte[] taggedAndTaglessValues() throws Exception { + return withIvm(1, bytes( + 0x00, // User macro ID 0 + 0xFF, // Interpreted as uint8 + 0x60, // Tagged int 0 + 0x00, 0x00, 0x80, 0x3F, // Interpreted as float32 (1.0) + 0x6C, 0x00, 0x00, 0x80, 0x3F, // Tagged float32 (1.0) + 0xF9, 0x6E, 0x61, 0x6D, 0x65, // interpreted as compact symbol (FlexSym with inline text "name") + 0xA4, 0x6E, 0x61, 0x6D, 0x65 // Inline symbol value "name" + )); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void taglessValuesInterspersedWithTaggedValues(boolean constructFromBytes) throws Exception { + byte[] data = taggedAndTaglessValues(); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertSequence( + cursor, + nextMacroInvocation(0), + nextTaglessValue(TaglessEncoding.UINT8, IonType.INT, 5, 6), + nextTaggedValue(IonType.INT, 7, 7), + nextTaglessValue(TaglessEncoding.FLOAT32, IonType.FLOAT, 7, 11), + nextTaggedValue(IonType.FLOAT, 12, 16), + nextTaglessValue(TaglessEncoding.FLEX_SYM, IonType.SYMBOL, 17, 21), + nextTaggedValue(IonType.SYMBOL, 22, 26), + endStream() + ); + } + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void fillTaglessValuesInterspersedWithTaggedValues(boolean constructFromBytes) throws Exception { + byte[] data = taggedAndTaglessValues(); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertSequence( + cursor, + nextMacroInvocation(0), + fillNextTaglessValue(TaglessEncoding.UINT8, IonType.INT, 5, 6), + fillScalar(7, 7), type(IonType.INT), + fillNextTaglessValue(TaglessEncoding.FLOAT32, IonType.FLOAT, 7, 11), + fillScalar(12, 16), type(IonType.FLOAT), + fillNextTaglessValue(TaglessEncoding.FLEX_SYM, IonType.SYMBOL, 17, 21), + fillScalar(22, 26), type(IonType.SYMBOL), + endStream() + ); + } + } + + /** + * Provides a consumer that executes its expectation immediately. + */ + private static Consumer> evaluateImmediately(IonCursorBinary cursor) { + return e -> e.test(cursor); + } + + /** + * A request to be applied to an IonCursorBinary, and the expected response. + */ + private static class Instruction { + final Function request; + final ExpectationProvider response; + + private Instruction(Function request, ExpectationProvider response) { + this.request = request; + this.response = response; + } + + /** + * Executes the request. If the response was not `NEEDS_DATA`, compares it to the expected response. + * @param cursor the cursor to which the request is to be applied. + * @return false if the response was `NEEDS_DATA`, true if the response matched the expectation. + */ + boolean executeAndValidate(IonCursorBinary cursor) { + if (request.apply(cursor) != NEEDS_DATA) { + response.accept(evaluateImmediately(cursor)); + return true; + } + return false; + } + } + + /** + * Creates a new Instruction. + */ + private static Instruction instruction( + Function request, + ExpectationProvider response + ) { + return new Instruction(request, response); + } + + /** + * Feeds bytes from the given data one-by-one to a new cursor, attempting to execute the next instruction + * in the given list after each byte. Each time an instruction is successfully executed, the next one in the + * list will be attempted after the next byte is fed. After all bytes have been fed, the cursor is expected to + * convey the end of the stream. + * @param data the data to read incrementally. + * @param instructions the instructions to execute. + */ + private static void executeIncrementally(byte[] data, List instructions) { + ResizingPipedInputStream pipe = new ResizingPipedInputStream(data.length); + int valuesEncountered = 0; + try (IonCursorBinary cursor = new IonCursorBinary(STANDARD_BUFFER_CONFIGURATION, pipe, null, 0, 0)) { + for (byte b : data) { + pipe.receive(b & 0xFF); + if (instructions.get(valuesEncountered).executeAndValidate(cursor)) { + valuesEncountered++; + } + } + endStream().accept(evaluateImmediately(cursor)); + } + // All instructions should result in a value except the final one, which results in stream end. + assertEquals(instructions.size() - 1, valuesEncountered); + } + + @Test + public void fillTaglessValuesInterspersedWithTaggedValuesIncremental() throws Exception { + byte[] data = taggedAndTaglessValues(); + List instructions = Arrays.asList( + instruction(IonCursorBinary::nextValue, macroInvocation(0)), + instruction( + cursor -> cursor.nextTaglessValue(TaglessEncoding.UINT8), + valueMarker(IonType.INT, 5, 6) + ), + instruction( + IonCursorBinary::fillValue, + valueReady(IonType.INT, 5, 6) + ), + instruction( + IonCursorBinary::nextValue, + valueMarker(IonType.INT,7, 7) + ), + instruction( + IonCursorBinary::fillValue, + valueReady(IonType.INT, 7, 7) + ), + instruction( + cursor -> cursor.nextTaglessValue(TaglessEncoding.FLOAT32), + valueMarker(IonType.FLOAT, 7, 11) + ), + instruction( + IonCursorBinary::fillValue, + valueReady(IonType.FLOAT, 7, 11) + ), + instruction( + IonCursorBinary::nextValue, + valueMarker(IonType.FLOAT, 12, 16) + ), + instruction( + IonCursorBinary::fillValue, + valueReady(IonType.FLOAT, 12, 16) + ), + instruction( + cursor -> cursor.nextTaglessValue(TaglessEncoding.FLEX_SYM), + valueMarker(IonType.SYMBOL, 17, 21) + ), + instruction( + IonCursorBinary::fillValue, + valueReady(IonType.SYMBOL, 17, 21) + ), + instruction( + IonCursorBinary::nextValue, + valueMarker(IonType.SYMBOL, 22, 26) + ), + instruction( + IonCursorBinary::fillValue, + valueReady(IonType.SYMBOL, 22, 26) + ), + // This is the end of the stream, so the response is not used. + instruction(IonCursorBinary::nextValue, null) + ); + executeIncrementally(data, instructions); + } + + @Test + public void skipTaglessValuesInterspersedWithTaggedValuesIncremental() throws Exception { + byte[] data = taggedAndTaglessValues(); + List instructions = Arrays.asList( + instruction(IonCursorBinary::nextValue, macroInvocation(0)), + instruction( + cursor -> cursor.nextTaglessValue(TaglessEncoding.UINT8), + // 0xFF is skipped. + valueMarker(IonType.INT, 5, 6) + ), + instruction( + IonCursorBinary::nextValue, + // 0x60 is buffered because it's a type ID; everything else is skipped. + valueMarker(IonType.INT,6, 6) + ), + instruction( + cursor -> cursor.nextTaglessValue(TaglessEncoding.FLOAT32), + // All four bytes are skipped. + valueMarker(IonType.FLOAT, 6, 10) + ), + instruction( + IonCursorBinary::nextValue, + // 0x6C is buffered because it's a type ID; everything else is skipped. + valueMarker(IonType.FLOAT, 7, 11) + ), + instruction( + cursor -> cursor.nextTaglessValue(TaglessEncoding.FLEX_SYM), + // All four bytes are skipped. + valueMarker(IonType.SYMBOL, 8, 12) + ), + instruction( + IonCursorBinary::nextValue, + // 0xF9 is buffered to determine the length and 0xA4 is buffered because it's a type ID; + // everything else is skipped. + valueMarker(IonType.SYMBOL, 9, 13) + ), + // This is the end of the stream, so the response is not used. + instruction(IonCursorBinary::nextValue, null) + ); + executeIncrementally(data, instructions); + } + + /** + * Creates a new Marker. + */ + private Marker marker(int startIndex, int endIndex) { + return new Marker(startIndex, endIndex); + } + + @Test + public void readFlexSymsIncrementally() throws Exception { + byte[] data = withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "DB | Struct length 11 \n" + + "01 | Switch to FlexSym field names \n" + + "FF 61 | FlexSym a \n" + + "E8 0F F9 6E 61 6D 65 60 | Two annotation FlexSyms SID = 7 (symbols), text = name; value int 0 \n" + ))); + List instructions = Arrays.asList( + instruction(IonCursorBinary::nextValue, stepInToContainer(IonType.STRUCT, 5, 16)), + instruction( + IonCursorBinary::nextValue, + allOf( + fieldNameText(7, 8), + annotationTokens(marker(-1, 7), marker(11, 15)), + valueMarker(IonType.INT, 16, 16), + stepOutOfContainer() + ) + ), + // This is the end of the stream, so the response is not used. + instruction(IonCursorBinary::nextValue, null) + ); + executeIncrementally(data, instructions); + } + + private static byte[] macroWithOneByteAEBThenIntZero() throws Exception { + return withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "13 | Opcode 0x13 -> macro ID 0x13 \n" + + "00 | AEB 0x00 \n" + + "60 | int 0 \n" + ))); + } + + private static byte[] macroWithThreeByteAEBThenIntZero() throws Exception { + return withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "13 | Opcode 0x13 -> macro ID 0x13 \n" + + "01 00 00 | AEB 0x01 0x00 0x00 \n" + + "60 | int 0 \n" + ))); + } + + private static void assertAEBThenIntZero(byte[] data, boolean constructFromBytes, int numberOfBytesInAEB) { + // The given data will always have a four-byte IVM followed by a 1-byte macro invocation opcode. Therefore, + // the AEB starts at index 5. + int expectedAEBEndIndex = 5 + numberOfBytesInAEB; + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertSequence( + cursor, + nextMacroInvocation(0x13), valueMarker(null, 5, -1), + fillArgumentEncodingBitmap(numberOfBytesInAEB, 5, expectedAEBEndIndex), + nextTaggedValue(IonType.INT, expectedAEBEndIndex + 1, expectedAEBEndIndex + 1), + endStream() + ); + } + } + + private static void assertAEBThenIntZeroIncremental(byte[] data, int numberOfBytesInAEB) { + // The given data will always have a four-byte IVM followed by a 1-byte macro invocation opcode. Therefore, + // the AEB starts at index 5. + int expectedAEBEndIndex = 5 + numberOfBytesInAEB; + List instructions = Arrays.asList( + instruction(IonCursorBinary::nextValue, macroInvocation(0x13)), + instruction(cursor -> cursor.fillArgumentEncodingBitmap(numberOfBytesInAEB), valueMarker(null, 5, expectedAEBEndIndex)), + instruction(IonCursorBinary::nextValue, valueMarker(IonType.INT, expectedAEBEndIndex + 1, expectedAEBEndIndex + 1)), + // This is the end of the stream, so the response is not used. + instruction(IonCursorBinary::nextValue, null) + ); + executeIncrementally(data, instructions); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void macroInvocationWithIdInOpcodeAndOneByteAEB(boolean constructFromBytes) throws Exception { + assertAEBThenIntZero(macroWithOneByteAEBThenIntZero(), constructFromBytes, 1); + } + + @Test + public void macroInvocationWithIdInOpcodeAndOneByteAEBIncremental() throws Exception { + assertAEBThenIntZeroIncremental(macroWithOneByteAEBThenIntZero(), 1); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void macroInvocationWithIdInOpcodeAndMultiByteAEB(boolean constructFromBytes) throws Exception { + assertAEBThenIntZero(macroWithThreeByteAEBThenIntZero(), constructFromBytes, 3); + } + + @Test + public void macroInvocationWithIdInOpcodeAndMultiByteAEBIncremental() throws Exception { + assertAEBThenIntZeroIncremental(macroWithThreeByteAEBThenIntZero(), 3); + } + + + private static ExpectationProvider enterTaglessArgumentGroup(TaglessEncoding type) { + return consumer -> consumer.accept(new Expectation<>( + String.format("enter tagless %s group", type.name()), + cursor -> assertEquals(NEEDS_INSTRUCTION, cursor.enterTaglessArgumentGroup(type)) + )); + } + + private static ExpectationProvider enterTaggedArgumentGroup() { + return consumer -> consumer.accept(new Expectation<>( + "enter tagged group", + cursor -> assertEquals(NEEDS_INSTRUCTION, cursor.enterTaggedArgumentGroup()) + )); + } + + private static ExpectationProvider nextGroupedValue(IonType expectedType, int expectedStartIndex, int expectedEndIndex) { + return consumer -> consumer.accept(new Expectation<>( + String.format("grouped value %s[%d, %d]", expectedType, expectedStartIndex, expectedEndIndex), + cursor -> { + assertEquals(IonType.isContainer(expectedType) ? START_CONTAINER : START_SCALAR, cursor.nextGroupedValue()); + assertValueMarker(cursor, expectedType, expectedStartIndex, expectedEndIndex); + } + )); + } + + private static ExpectationProvider endOfGroup() { + return consumer -> consumer.accept(new Expectation<>( + "end of group", + cursor -> assertEquals(NEEDS_INSTRUCTION, cursor.nextGroupedValue()) + )); + } + + private static ExpectationProvider exitArgumentGroup() { + return consumer -> consumer.accept(new Expectation<>( + "exit group", + cursor -> assertEquals(NEEDS_INSTRUCTION, cursor.exitArgumentGroup()) + )); + } + + private static byte[] taglessArgumentGroup() throws Exception { + return withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "13 | Opcode 0x13 -> macro ID 0x13 \n" + + "05 | AEB 0b00000010 -- one grouped argument \n" + + "03 | FlexUInt 1 - page length 1 byte \n" + + "0A | int 10 \n" + + "03 | FlexUInt 1 - page length 1 byte \n" + + "0B | int 11 \n" + + "01 | FlexUInt 0 - end of argument group \n" + ))); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void fullyTraverseTaglessArgumentGroup(boolean constructFromBytes) throws Exception { + byte[] data = taglessArgumentGroup(); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertSequence( + cursor, + nextMacroInvocation(0x13), valueMarker(null, 5, -1), + fillArgumentEncodingBitmap(1, 5, 6), + enterTaglessArgumentGroup(TaglessEncoding.UINT8), + nextGroupedValue(IonType.INT, 7, 8), + nextGroupedValue(IonType.INT, 9, 10), + endOfGroup(), + exitArgumentGroup(), + endStream() + ); + } + } + + private static byte[] taggedPrefixedArgumentGroup() throws Exception { + return withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "13 | Opcode 0x13 -> macro ID 0x13 \n" + + "05 | AEB 0b00000010 -- one grouped argument \n" + + "0B | FlexUInt 1 - group length 5 bytes \n" + + "60 | int 0 \n" + + "B3 | List length 3 \n" + + "91 | String length 1 \n" + + "61 | 'a' \n" + + "6A | Float 0 \n" + ))); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void fullyTraverseTaggedPrefixedArgumentGroup(boolean constructFromBytes) throws Exception { + byte[] data = taggedPrefixedArgumentGroup(); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertSequence( + cursor, + nextMacroInvocation(0x13), valueMarker(null, 5, -1), + fillArgumentEncodingBitmap(1, 5, 6), + enterTaggedArgumentGroup(), + nextGroupedValue(IonType.INT, 8, 8), + nextGroupedValue(IonType.LIST, 9, 12), + stepInToContainer(IonType.LIST, 9, 12), + nextTaggedValue(IonType.STRING, 10, 11), + nextTaggedValue(IonType.FLOAT, 12, 12), + stepOutOfContainer(), + endOfGroup(), + exitArgumentGroup(), + endStream() + ); + } + } + + private static byte[] taggedDelimitedArgumentGroup() throws Exception { + return withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "13 | Opcode 0x13 -> macro ID 0x13 \n" + + "05 | AEB 0b00000010 -- one grouped argument \n" + + "01 | FlexUInt 0 - delimited group \n" + + "60 | int 0 \n" + + "B3 | List length 3 \n" + + "91 | String length 1 \n" + + "61 | 'a' \n" + + "6A | Float 0 \n" + + "F0 | End of delimited group \n" + ))); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void fullyTraverseTaggedDelimitedArgumentGroup(boolean constructFromBytes) throws Exception { + byte[] data = taggedDelimitedArgumentGroup(); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertSequence( + cursor, + nextMacroInvocation(0x13), valueMarker(null, 5, -1), + fillArgumentEncodingBitmap(1, 5, 6), + enterTaggedArgumentGroup(), + nextGroupedValue(IonType.INT, 8, 8), + nextGroupedValue(IonType.LIST, 9, 12), + stepInToContainer(IonType.LIST, 9, 12), + nextTaggedValue(IonType.STRING, 10, 11), + nextTaggedValue(IonType.FLOAT, 12, 12), + stepOutOfContainer(), + endOfGroup(), + exitArgumentGroup(), + endStream() + ); + } + } + + private static byte[] emptyArgumentGroups() throws Exception { + return withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "13 | Opcode 0x13 -> macro ID 0x13 \n" + + "05 | AEB 0b00001010 -- two grouped arguments \n" + + // First group: interpreted as tagged + "01 | FlexUInt 0 - delimited group \n" + + "F0 | End of delimited group \n" + + // Second group: interpreted as tagless + "01 | FlexUInt 0 - end of argument group \n" + ))); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void emptyArgumentGroups(boolean constructFromBytes) throws Exception { + byte[] data = emptyArgumentGroups(); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertSequence( + cursor, + nextMacroInvocation(0x13), valueMarker(null, 5, -1), + fillArgumentEncodingBitmap(1, 5, 6), + enterTaggedArgumentGroup(), + endOfGroup(), + exitArgumentGroup(), + enterTaglessArgumentGroup(TaglessEncoding.UINT8), + endOfGroup(), + exitArgumentGroup(), + endStream() + ); + } + } + + private static byte[] twoArgumentGroupsFollowedBySingleValue() throws Exception { + return withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "13 | Opcode 0x13 -> macro ID 0x13 \n" + + "05 | AEB 0b00001010 -- two grouped arguments \n" + + // First group: interpreted as tagged + "01 | FlexUInt 0 - delimited group \n" + + "60 | int 0 \n" + + "B3 | List length 3 \n" + + "91 | String length 1 \n" + + "61 | 'a' \n" + + "6A | Float 0 \n" + + "F0 | End of delimited group \n" + + // Second group: interpreted as tagless + "03 | FlexUInt 1 - page length 1 byte \n" + + "0A | int 10 \n" + + "03 | FlexUInt 1 - page length 1 byte \n" + + "0B | int 11 \n" + + "01 | FlexUInt 0 - end of argument group \n" + + "B1 | List length 1 \n" + + "60 | int 0 \n" + ))); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void twoArgumentGroupsFollowedBySingleValue(boolean constructFromBytes) throws Exception { + byte[] data = twoArgumentGroupsFollowedBySingleValue(); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertSequence( + cursor, + nextMacroInvocation(0x13), valueMarker(null, 5, -1), + fillArgumentEncodingBitmap(1, 5, 6), + enterTaggedArgumentGroup(), + nextGroupedValue(IonType.INT, 8, 8), + nextGroupedValue(IonType.LIST, 9, 12), + stepInToContainer(IonType.LIST, 9, 12), + nextTaggedValue(IonType.STRING, 10, 11), + nextTaggedValue(IonType.FLOAT, 12, 12), + stepOutOfContainer(), + endOfGroup(), + exitArgumentGroup(), + enterTaglessArgumentGroup(TaglessEncoding.UINT8), + nextGroupedValue(IonType.INT, 14, 15), + nextGroupedValue(IonType.INT, 16, 17), + endOfGroup(), + exitArgumentGroup(), + container( + scalar(), valueMarker(IonType.INT, 20, 20) + ), + endStream() + ); + } + } + + @Test + public void twoArgumentGroupsFollowedBySingleValueIncremental() throws Exception { + byte[] data = twoArgumentGroupsFollowedBySingleValue(); + List instructions = Arrays.asList( + instruction(IonCursorBinary::nextValue, macroInvocation(0x13)), + instruction(cursor -> cursor.fillArgumentEncodingBitmap(1), valueMarker(null, 5, 6)), + instruction(IonCursorBinary::enterTaggedArgumentGroup, event(NEEDS_INSTRUCTION)), + instruction(IonCursorBinary::nextGroupedValue, valueMarker(IonType.INT, 8, 8)), + instruction( + cursor -> { + if (cursor.nextGroupedValue() == NEEDS_DATA) { + return NEEDS_DATA; + } + assertValueMarker(cursor, IonType.LIST, 9, 12); + return cursor.stepIntoContainer(); + }, + event(NEEDS_INSTRUCTION) + ), + instruction(IonCursorBinary::nextValue, valueMarker(IonType.STRING, 10, 11)), + instruction( + cursor -> { + if (cursor.nextValue() == NEEDS_DATA) { + return NEEDS_DATA; + } + // Note: the value byte of the string is skipped, not buffered. + assertValueMarker(cursor, IonType.FLOAT, 11, 11); + return cursor.stepOutOfContainer(); + }, + event(NEEDS_INSTRUCTION) + ), + instruction( + cursor -> { + if (cursor.nextGroupedValue() == NEEDS_DATA) { + return NEEDS_DATA; + } + return cursor.exitArgumentGroup(); + }, + event(NEEDS_INSTRUCTION) + ), + instruction(cursor -> cursor.enterTaglessArgumentGroup(TaglessEncoding.UINT8), event(NEEDS_INSTRUCTION)), + instruction(IonCursorBinary::nextGroupedValue, valueMarker(IonType.INT, 13, 14)), + instruction(IonCursorBinary::nextGroupedValue, valueMarker(IonType.INT, 15, 16)), + instruction( + cursor -> { + if (cursor.nextGroupedValue() == NEEDS_DATA) { + return NEEDS_DATA; + } + return cursor.exitArgumentGroup(); + }, + event(NEEDS_INSTRUCTION) + ), + instruction( + cursor -> { + if (cursor.nextValue() == NEEDS_DATA) { + return NEEDS_DATA; + } + assertValueMarker(cursor, IonType.LIST, 18, 19); + return cursor.stepIntoContainer(); + }, + event(NEEDS_INSTRUCTION) + ), + instruction( + cursor -> { + if (cursor.nextValue() == NEEDS_DATA) { + return NEEDS_DATA; + } + assertValueMarker(cursor, IonType.INT, 19, 19); + return cursor.stepOutOfContainer(); + }, + event(NEEDS_INSTRUCTION) + ), + // This is the end of the stream, so the response is not used. + instruction(IonCursorBinary::nextValue, null) + ); + executeIncrementally(data, instructions); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void skipOverArgumentGroups(boolean constructFromBytes) throws Exception { + byte[] data = twoArgumentGroupsFollowedBySingleValue(); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertSequence( + cursor, + nextMacroInvocation(0x13), valueMarker(null, 5, -1), + fillArgumentEncodingBitmap(1, 5, 6), + enterTaggedArgumentGroup(), + nextGroupedValue(IonType.INT, 8, 8), + exitArgumentGroup(), // Early exit + enterTaglessArgumentGroup(TaglessEncoding.UINT8), + exitArgumentGroup(), // Skip over group + container( + scalar(), valueMarker(IonType.INT, 20, 20) + ), + endStream() + ); + } + } + + @Test + public void skipOverArgumentGroupsIncremental() throws Exception { + byte[] data = twoArgumentGroupsFollowedBySingleValue(); + List instructions = Arrays.asList( + instruction(IonCursorBinary::nextValue, macroInvocation(0x13)), + instruction(cursor -> cursor.fillArgumentEncodingBitmap(1), valueMarker(null, 5, 6)), + instruction(IonCursorBinary::enterTaggedArgumentGroup, event(NEEDS_INSTRUCTION)), + instruction(IonCursorBinary::nextGroupedValue, valueMarker(IonType.INT, 8, 8)), + // Skip the list argument + instruction(IonCursorBinary::exitArgumentGroup, event(NEEDS_INSTRUCTION)), + instruction(cursor -> cursor.enterTaglessArgumentGroup(TaglessEncoding.UINT8), event(NEEDS_INSTRUCTION)), + // Skip all arguments in the group + instruction(IonCursorBinary::exitArgumentGroup, event(NEEDS_INSTRUCTION)), + instruction(IonCursorBinary::nextValue, valueMarker(IonType.LIST, 16, 17)), + // This is the end of the stream, so the response is not used. + instruction(IonCursorBinary::nextValue, null) + ); + executeIncrementally(data, instructions); + } + + // TODO Nest argument groups >8 deep, exercising argument group stack growth. + // TODO Add more incremental tests for various argument group combinations, improving coverage of NEEDS_DATA cases. + // TODO Extend a tagged prefixed argument group page beyond the current buffer limit. In slow mode, this should + // cause the whole page to be filled. In unchecked mode, this should be an error for unexpected EOF. } diff --git a/src/test/java/com/amazon/ion/impl/IonCursorTestUtilities.java b/src/test/java/com/amazon/ion/impl/IonCursorTestUtilities.java index 69c009b913..159a1675fc 100644 --- a/src/test/java/com/amazon/ion/impl/IonCursorTestUtilities.java +++ b/src/test/java/com/amazon/ion/impl/IonCursorTestUtilities.java @@ -1,12 +1,15 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 - package com.amazon.ion.impl; +import com.amazon.ion.IntegerSize; import com.amazon.ion.IonBufferConfiguration; import com.amazon.ion.IonType; +import com.amazon.ion.SymbolToken; +import java.math.BigInteger; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.function.Consumer; @@ -17,6 +20,7 @@ import static com.amazon.ion.IonCursor.Event.START_SCALAR; import static com.amazon.ion.IonCursor.Event.VALUE_READY; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; public class IonCursorTestUtilities { @@ -209,6 +213,64 @@ static ExpectationProvider )); } + /** + * Provides Expectations that verify that advancing the cursor to the next value positions the cursor on a scalar + * with type symbol and the given expected value. + */ + static ExpectationProvider fillSymbolValue(int expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("symbol($%s)", expectedValue), + reader -> { + assertEquals(VALUE_READY, reader.fillValue()); + assertEquals(IonType.SYMBOL, reader.getType()); + assertEquals(expectedValue, reader.symbolValueId()); + } + )); + } + + static ExpectationProvider integerSize(IntegerSize expectedSize) { + return consumer -> consumer.accept(new Expectation<>( + String.format("integerSize(%s)", expectedSize), + reader -> { + assertEquals(expectedSize, reader.getIntegerSize()); + } + )); + } + + static ExpectationProvider intValue(int expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("int(%d)", expectedValue), + reader -> { + assertEquals(IntegerSize.INT, reader.getIntegerSize()); + assertEquals(expectedValue, reader.intValue()); + } + )); + } + + static ExpectationProvider longValue(long expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("long(%d)", expectedValue), + reader -> { + assertTrue(reader.getIntegerSize().ordinal() <= IntegerSize.LONG.ordinal()); + assertEquals(expectedValue, reader.longValue()); + } + )); + } + + static ExpectationProvider bigIntegerValue(BigInteger expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("bigInteger(%s)", expectedValue), + reader -> assertEquals(expectedValue, reader.bigIntegerValue()) + )); + } + + static ExpectationProvider doubleValue(double expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("double(%f)", expectedValue), + reader -> assertEquals(expectedValue, reader.doubleValue(), 1e-9) + )); + } + /** * Provides an Expectation that verifies that advancing the cursor positions it on a container value, without * filling that container. @@ -253,6 +315,32 @@ static ExpectationProvider fillContainer(IonType }; } + + static ExpectationProvider fieldName(String expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("fieldName(%s)", expectedValue), + reader -> { + assertEquals(expectedValue, reader.getFieldText()); + } + )); + } + + static ExpectationProvider annotations(String... expectedAnnotations) { + return consumer -> consumer.accept(new Expectation<>( + String.format("annotations(%s)", Arrays.toString(expectedAnnotations)), + reader -> { + reader.nextValue(); + assertTrue(reader.hasAnnotations(), "Expected there to be annotations"); + List tokens = new ArrayList<>(); + reader.consumeAnnotationTokens(tokens::add); + for (int i = 0; i < Math.min(tokens.size(), expectedAnnotations.length); i++) { + assertEquals(expectedAnnotations[i], tokens.get(i).getText()); + } + assertEquals(expectedAnnotations.length, tokens.size()); + } + )); + } + /** * Provides an Expectation that verifies that advancing the cursor positions it at the current end of the stream. */ diff --git a/src/test/java/com/amazon/ion/impl/IonRawTextReaderTest_1_1.java b/src/test/java/com/amazon/ion/impl/IonRawTextReaderTest_1_1.java new file mode 100644 index 0000000000..d164951064 --- /dev/null +++ b/src/test/java/com/amazon/ion/impl/IonRawTextReaderTest_1_1.java @@ -0,0 +1,158 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl; + +import com.amazon.ion.IonReader; +import com.amazon.ion.IonType; +import com.amazon.ion.system.SimpleCatalog; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.MethodSource; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class IonRawTextReaderTest_1_1 { + + public enum ExpressionType { + E_EXPRESSION, + EXPRESSION_GROUP, + NONE; + + void verifyExpressionType(IonReaderTextRawX rawReader) { + switch (this) { + case E_EXPRESSION: + assertTrue(rawReader._container_is_e_expression); + assertFalse(rawReader._container_is_expression_group); + break; + case EXPRESSION_GROUP: + assertFalse(rawReader._container_is_e_expression); + assertTrue(rawReader._container_is_expression_group); + break; + case NONE: + assertFalse(rawReader._container_is_e_expression); + assertFalse(rawReader._container_is_expression_group); + break; + } + } + } + + static Arguments[] validSyntax() { + return new Arguments[] { + Arguments.of(1, "(:foo)", "foo", null, ExpressionType.E_EXPRESSION), + Arguments.of(1, "(:foo bar)", "foo", "bar", ExpressionType.E_EXPRESSION), + Arguments.of(1, "(::foo)", "foo", null, ExpressionType.EXPRESSION_GROUP), // TODO do we want to require whitespace after ::? + Arguments.of(1, "(:: foo bar)", "foo", "bar", ExpressionType.EXPRESSION_GROUP), + Arguments.of(1, "(:: foo::bar)", "bar", null, ExpressionType.EXPRESSION_GROUP), + Arguments.of(1, "(::)", null, null, ExpressionType.EXPRESSION_GROUP), + Arguments.of(1, "(.foo)", ".", "foo", ExpressionType.NONE), + Arguments.of(1, "(.. foo)", "..", "foo", ExpressionType.NONE), + Arguments.of(1, "(.+ foo)", ".+", "foo", ExpressionType.NONE), + Arguments.of(1, "(..+ foo)", "..+", "foo", ExpressionType.NONE), + Arguments.of(1, "(.+ foo)", ".+", "foo", ExpressionType.NONE), + Arguments.of(1, "(..+ foo)", "..+", "foo", ExpressionType.NONE), + Arguments.of(0, "{ foo: bar }", "bar", null, ExpressionType.NONE), + Arguments.of(1, "{ foo: bar }", "bar", null, ExpressionType.NONE), + Arguments.of(1, "(foo::bar)", "bar", null, ExpressionType.NONE), + }; + } + + private static IonReaderTextRawX newTextReader(String input) { + return new IonReaderTextUserX( + new SimpleCatalog(), + LocalSymbolTable.DEFAULT_LST_FACTORY, + UnifiedInputStreamX.makeStream(input) + ); + } + + @ParameterizedTest(name = "v={0}:{1}") + @MethodSource("validSyntax") + public void validExpressionSyntax(int minorVersion, String input, String firstSymbol, String secondSymbol, ExpressionType expressionType) throws Exception { + try (IonReaderTextRawX reader = newTextReader(input)) { + reader.setMinorVersion(minorVersion); + reader.nextRaw(); + expressionType.verifyExpressionType(reader); + reader.stepIn(); + if (firstSymbol == null) { + assertNull(reader.nextRaw()); + } else { + assertEquals(IonType.SYMBOL, reader.nextRaw()); + assertEquals(firstSymbol, reader.stringValue()); + } + if (secondSymbol == null) { + assertNull(reader.nextRaw()); + } else { + assertEquals(IonType.SYMBOL, reader.nextRaw()); + assertEquals(secondSymbol, reader.stringValue()); + assertNull(reader.nextRaw()); + } + reader.stepOut(); + assertNull(reader.nextRaw()); + } + } + + static Arguments[] invalidSyntax() { + return new Arguments[] { + // Colon is not a valid operator in Ion 1.0. + Arguments.of(0, "(:foo)", null), + Arguments.of(0, "(::foo)", null), + // Colon is not a valid operator in Ion 1.1 except at the beginning of an s-expression. + Arguments.of(1, "(:foo :)", "foo"), + // The following fails on the first next() because the second double-colon does not have a value to follow. + Arguments.of(1, "(::foo ::)", null), + // The following fails on the first next() because the double-colon does not have a value to follow. + Arguments.of(1, "(foo ::)", null), + Arguments.of(1, "(foo :)", "foo"), + Arguments.of(1, "{:foo}", null), + Arguments.of(1, "{::foo}", null), + Arguments.of(1, "[:foo]", null), + Arguments.of(1, "[::foo]", null), + }; + } + + @ParameterizedTest(name = "v={0}:{1}") + @MethodSource("invalidSyntax") + public void invalidExpressionSyntax(int minorVersion, String input, String firstSymbol) throws Exception { + try (IonReaderTextRawX reader = newTextReader(input)) { + reader.setMinorVersion(minorVersion); + reader.nextRaw(); + reader.stepIn(); + if (firstSymbol != null) { + assertEquals(IonType.SYMBOL, reader.nextRaw()); + assertEquals(firstSymbol, reader.stringValue()); + } + assertThrows(IonReaderTextRawX.IonReaderTextParsingException.class, reader::nextRaw); + } + } + + @ParameterizedTest + @CsvSource({ + "(:values 0) (:values 1)", + "(:values (: values 0)) 1", + "(:values) 0 (:values) 1", + "(:values) (:values 0) 1", + "(:values) (:values) (:values 0) 1", + "(:values (:: ) ) 0 1", + "(:values (:: 0 1))", + "(:values 0 1)", + // "(:values (:: (:: 0) (:values (:: 1))))", // TODO make this illegal: https://github.com/amazon-ion/ion-java/issues/1009 + // "(:values (:: 0) (:values (:: 1)))", // TODO make this illegal: https://github.com/amazon-ion/ion-java/issues/1009 + "(:values (:values (:: 0 1)))", + "(:values (:values 0 1))", + "(:1 (:1 0 1))", + // "(:1 (:: (:: 0) (:1 (:: 1))))" // TODO make this illegal: https://github.com/amazon-ion/ion-java/issues/1009 + }) + public void validValuesInvocations(String text) throws Exception { + try (IonReader reader = newTextReader("$ion_1_1 " + text)) { + assertEquals(IonType.INT, reader.next()); + assertEquals(0, reader.intValue()); + assertEquals(IonType.INT, reader.next()); + assertEquals(1, reader.intValue()); + assertNull(reader.next()); + } + } +} diff --git a/src/test/java/com/amazon/ion/impl/IonRawTextWriterTest_1_1.kt b/src/test/java/com/amazon/ion/impl/IonRawTextWriterTest_1_1.kt new file mode 100644 index 0000000000..66b15cbd65 --- /dev/null +++ b/src/test/java/com/amazon/ion/impl/IonRawTextWriterTest_1_1.kt @@ -0,0 +1,1111 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl + +import com.amazon.ion.* +import com.amazon.ion.impl.macro.* +import com.amazon.ion.system.* +import java.math.BigDecimal +import java.math.BigInteger +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertFalse +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.CsvSource +import org.junit.jupiter.params.provider.EnumSource + +class IonRawTextWriterTest_1_1 { + + private fun IonRawTextWriter_1_1.stepInEExp(id: Int) = stepInEExp(id, false, SystemMacro.Values) + + private fun standardBuilder(): _Private_IonTextWriterBuilder_1_1 { + return _Private_IonTextWriterBuilder_1_1.standard() + } + + private inline fun ionWriter( + out: StringBuilder = StringBuilder(), + builderConfigurator: IonTextWriterBuilder_1_1.() -> Unit = { /* noop */ }, + block: IonRawTextWriter_1_1.() -> Unit = {}, + ): IonRawTextWriter_1_1 { + val b = standardBuilder() + .apply(builderConfigurator) + // Always use LF because the tests' expected data uses LF. + .withNewLineType(IonTextWriterBuilder.NewLineType.LF) + + val rawWriter = IonRawTextWriter_1_1( + options = b as _Private_IonTextWriterBuilder_1_1, + output = _Private_IonTextAppender.forAppendable(out) + ) + block.invoke(rawWriter) + return rawWriter + } + + private inline fun writeAsString( + builderConfigurator: IonTextWriterBuilder_1_1.() -> Unit = { /* noop */ }, + autoClose: Boolean = true, + block: IonRawTextWriter_1_1.() -> Unit, + ): String { + val out = StringBuilder() + val rawWriter = ionWriter(out, builderConfigurator, block) + if (autoClose) rawWriter.close() + return out.toString() + } + + private inline fun assertWriterOutputEquals( + text: String, + builderConfigurator: IonTextWriterBuilder_1_1.() -> Unit = { /* noop */ }, + autoClose: Boolean = true, + block: IonRawTextWriter_1_1.() -> Unit, + ) { + // Trim whitespace since the IonRawTextWriter_1_1 eagerly writes top-level separators. + assertEquals(text, writeAsString(builderConfigurator, autoClose, block).trim()) + } + + @Test + fun `calling close while in a container should throw IonException`() { + ionWriter { + stepInList(false) + assertThrows { close() } + } + } + + @Test + fun `calling finish while in a container should throw IonException`() { + ionWriter { + stepInList(true) + assertThrows { flush() } + } + } + + @Test + fun `calling finish with a dangling annotation should throw IonException`() { + ionWriter { + writeAnnotations(10) + assertThrows { flush() } + } + } + + @Test + fun `calling stepOut while not in a container should throw IonException`() { + ionWriter { + assertThrows { stepOut() } + } + } + + @Test + fun `calling stepOut with a dangling annotation should throw IonException`() { + ionWriter { + stepInList(true) + writeAnnotations(10) + assertThrows { stepOut() } + } + } + + @Test + fun `calling writeIVM when in a container should throw IonException`() { + ionWriter { + stepInList(false) + assertThrows { writeIVM() } + } + } + + @Test + fun `calling writeIVM with a dangling annotation should throw IonException`() { + ionWriter { + writeAnnotations(10) + assertThrows { writeIVM() } + } + } + + @Test + fun `calling finish should cause the buffered data to be written to the output stream`() { + val actual = writeAsString(autoClose = false) { + writeIVM() + flush() + } + // Just checking that data is written, not asserting the content. + assertTrue(actual.isNotBlank()) + } + + @Test + fun `after calling finish, it should still be possible to write more data`() { + val actual = writeAsString { + flush() + writeIVM() + } + // Just checking that data is written, not asserting the content. + assertTrue(actual.isNotBlank()) + } + + @Test + fun `calling close should cause the buffered data to be written to the output stream`() { + val actual = writeAsString(autoClose = false) { + writeIVM() + close() + } + // Just checking that data is written, not asserting the content. + assertTrue(actual.isNotBlank()) + } + + @Test + fun `calling close or finish multiple times should not throw any exceptions`() { + val actual = writeAsString { + writeIVM() + flush() + close() + flush() + close() + flush() + } + // Just checking that data is written, not asserting the content. + assertTrue(actual.isNotBlank()) + } + + @Test + fun `write the IVM`() { + assertWriterOutputEquals("\$ion_1_1") { + writeIVM() + } + } + + @Test + fun `write nothing`() { + assertWriterOutputEquals("") { + } + } + + @Test + fun `write a null`() { + assertWriterOutputEquals("null") { + writeNull() + } + } + + @Test + fun `write a null with a specific type`() { + // Just checking one type. The full range of types are checked in IonEncoder_1_1Test + assertWriterOutputEquals("null.bool") { + writeNull(IonType.BOOL) + } + } + + @ParameterizedTest + @CsvSource("true, true", "false, false") + fun `write a boolean`(value: Boolean, expected: String) { + assertWriterOutputEquals(expected) { + writeBool(value) + } + } + + @Test + fun `write a delimited list`() { + assertWriterOutputEquals("[true,false]") { + stepInList(true) + writeBool(true) + writeBool(false) + stepOut() + } + } + + @Test + fun `write a prefixed list`() { + assertWriterOutputEquals("[true,false]") { + stepInList(false) + writeBool(true) + writeBool(false) + stepOut() + } + } + + @Test + fun `write multiple nested prefixed lists`() { + assertWriterOutputEquals("[[[[[]]]]]") { + repeat(5) { stepInList(false) } + repeat(5) { stepOut() } + } + } + + @Test + fun `write multiple nested delimited lists`() { + assertWriterOutputEquals("[[[[]]]]") { + repeat(4) { stepInList(true) } + repeat(4) { stepOut() } + } + } + + @Test + fun `write multiple nested delimited and prefixed lists`() { + assertWriterOutputEquals("[[[[[[[[]]]]]]]]") { + repeat(4) { + stepInList(true) + stepInList(false) + } + repeat(8) { stepOut() } + } + } + + @Test + fun `write a sexp`() { + assertWriterOutputEquals("(true false)") { + stepInSExp(usingLengthPrefix = false) + writeBool(true) + writeBool(false) + stepOut() + } + assertWriterOutputEquals("(true false)") { + stepInSExp(usingLengthPrefix = true) + writeBool(true) + writeBool(false) + stepOut() + } + } + + @Test + fun `write multiple nested sexps`() { + assertWriterOutputEquals("(((((((())))))))") { + repeat(4) { + stepInSExp(usingLengthPrefix = false) + stepInSExp(usingLengthPrefix = true) + } + repeat(8) { stepOut() } + } + } + + @Test + fun `write a struct`() { + assertWriterOutputEquals( + """{$11:true,$12:false}""" + ) { + stepInStruct(usingLengthPrefix = true) + writeFieldName(11) + writeBool(true) + writeFieldName(12) + writeBool(false) + stepOut() + } + assertWriterOutputEquals( + """{$11:true,$12:false}""" + ) { + stepInStruct(usingLengthPrefix = false) + writeFieldName(11) + writeBool(true) + writeFieldName(12) + writeBool(false) + stepOut() + } + } + + @Test + fun `write multiple nested structs`() { + assertWriterOutputEquals( + "{a:{b:{a:{b:{a:{b:{a:{b:{}}}}}}}}}" + ) { + stepInStruct(usingLengthPrefix = true) + repeat(4) { + writeFieldName("a") + stepInStruct(usingLengthPrefix = false) + writeFieldName("b") + stepInStruct(usingLengthPrefix = true) + } + repeat(9) { + stepOut() + } + } + } + + @Test + fun `write empty struct`() { + assertWriterOutputEquals("{}") { + stepInStruct(usingLengthPrefix = true) + stepOut() + } + assertWriterOutputEquals("{}") { + stepInStruct(usingLengthPrefix = false) + stepOut() + } + } + + @Test + fun `write prefixed struct with a single text field name`() { + assertWriterOutputEquals( + """{foo:true}""" + ) { + stepInStruct(false) + writeFieldName("foo") + writeBool(true) + stepOut() + } + } + + @Test + fun `write a struct with sid 0`() { + assertWriterOutputEquals( + "{\$0:true}" + ) { + stepInStruct(usingLengthPrefix = true) + writeFieldName(0) + writeBool(true) + stepOut() + } + assertWriterOutputEquals( + "{\$0:true}" + ) { + stepInStruct(usingLengthPrefix = false) + writeFieldName(0) + writeBool(true) + stepOut() + } + } + + @Test + fun `writing a value in a struct with no field name should throw an exception`() { + ionWriter { + stepInStruct(true) + assertThrows { writeBool(true) } + } + ionWriter { + stepInStruct(false) + assertThrows { writeBool(true) } + } + } + + @Test + fun `calling writeFieldName outside of a struct should throw an exception`() { + ionWriter { + assertThrows { writeFieldName(12) } + } + ionWriter { + assertThrows { writeFieldName("foo") } + } + } + + @Test + fun `calling stepOut with a dangling field name should throw an exception`() { + ionWriter { + stepInStruct(false) + writeFieldName(12) + assertThrows { stepOut() } + } + ionWriter { + stepInStruct(true) + writeFieldName("foo") + assertThrows { stepOut() } + } + } + + @Test + fun `writeAnnotations with empty int array should write no annotations`() { + assertWriterOutputEquals("true") { + writeAnnotations(intArrayOf()) + writeBool(true) + } + } + + @Test + fun `write one sid annotation`() { + val expectedBytes = "\$3::true" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(3) + writeBool(true) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(3) + writeAnnotations(intArrayOf()) + writeAnnotations(arrayOf()) + writeBool(true) + } + } + + @Test + fun `write two sid annotations`() { + val expectedBytes = "\$3::\$4::true" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(3) + writeAnnotations(4) + writeBool(true) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(3, 4) + writeBool(true) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(intArrayOf(3, 4)) + writeBool(true) + } + } + + @Test + fun `write three sid annotations`() { + val expectedBytes = "\$3::\$4::\$256::true" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(3) + writeAnnotations(4) + writeAnnotations(256) + writeBool(true) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(3) + writeAnnotations(4, 256) + writeBool(true) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(intArrayOf(3, 4)) + writeAnnotations(256) + writeBool(true) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(intArrayOf(3, 4, 256)) + writeBool(true) + } + } + + @Test + fun `write sid 0 annotation`() { + assertWriterOutputEquals("\$0::true") { + writeAnnotations(0) + writeBool(true) + } + } + + @Test + fun `write one text annotation`() { + val expectedBytes = "foo::false" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations("foo") + writeBool(false) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations("foo") + writeAnnotations(intArrayOf()) + writeBool(false) + } + } + + @Test + fun `write two text annotations`() { + val expectedBytes = "foo::bar::false" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations("foo") + writeAnnotations("bar") + writeBool(false) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(arrayOf("foo", "bar")) + writeBool(false) + } + } + + @Test + fun `write three text annotations`() { + val expectedBytes = "foo::bar::baz::false" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations("foo") + writeAnnotations("bar") + writeAnnotations("baz") + writeBool(false) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations("foo") + writeAnnotations("bar", "baz") + writeBool(false) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(arrayOf("foo", "bar")) + writeAnnotations("baz") + writeBool(false) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(arrayOf("foo", "bar", "baz")) + writeBool(false) + } + } + + @Test + fun `write empty text and sid 0 annotations`() { + assertWriterOutputEquals("\$0::''::true") { + writeAnnotations(0) + writeAnnotations("") + writeBool(true) + } + } + + @Test + fun `write two mixed sid and text annotations`() { + val expectedBytes = "\$10::foo::false" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(10) + writeAnnotations("foo") + writeBool(false) + } + } + + @Test + fun `write three mixed sid and inline annotations`() { + val expectedBytes = "\$10::foo::bar::false" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(10) + writeAnnotations("foo") + writeAnnotations("bar") + writeBool(false) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(10) + writeAnnotations(arrayOf("foo", "bar")) + writeBool(false) + } + } + + @Test + fun `_private_hasFirstAnnotation() should return false when there are no annotations`() { + val rawWriter = ionWriter() + assertFalse(rawWriter._private_hasFirstAnnotation(SystemSymbols.ION_SID, SystemSymbols.ION)) + } + + @Test + fun `_private_hasFirstAnnotation() should return true if only the sid matches`() { + val rawWriter = ionWriter() + rawWriter.writeAnnotations(SystemSymbols.ION_SID) + assertTrue(rawWriter._private_hasFirstAnnotation(SystemSymbols.ION_SID, null)) + } + + @Test + fun `_private_hasFirstAnnotation() should return true if only the text matches`() { + val rawWriter = ionWriter() + rawWriter.writeAnnotations(SystemSymbols.ION) + assertTrue(rawWriter._private_hasFirstAnnotation(-1, SystemSymbols.ION)) + } + + @Test + fun `_private_hasFirstAnnotation() should return false if the first annotation does not match the sid or text`() { + val rawWriter = ionWriter() + rawWriter.writeAnnotations(SystemSymbols.IMPORTS_SID) + rawWriter.writeAnnotations(SystemSymbols.ION) + rawWriter.writeAnnotations(SystemSymbols.ION_SID) + // Matches the second and third annotations, but not the first one. + assertFalse(rawWriter._private_hasFirstAnnotation(SystemSymbols.ION_SID, SystemSymbols.ION)) + } + + @Test + fun `write int`() { + assertWriterOutputEquals( + """1 10""" + ) { + writeInt(1) + writeInt(BigInteger.TEN) + } + } + + @Test + fun `write float`() { + assertWriterOutputEquals( + """0e0 3.140000104904175e0 3.14e0""" + ) { + writeFloat(0.0) + writeFloat(3.14f) + writeFloat(3.14) + } + } + + @Test + fun `write decimal`() { + assertWriterOutputEquals( + """0. -0.""" + ) { + writeDecimal(BigDecimal.ZERO) + writeDecimal(Decimal.NEGATIVE_ZERO) + } + } + + @Test + fun `write timestamp`() { + assertWriterOutputEquals( + """2023-12-08T15:37:23.190583253Z 2123T""" + ) { + writeTimestamp(Timestamp.valueOf("2023-12-08T15:37:23.190583253Z")) + writeTimestamp(Timestamp.valueOf("2123T")) + } + } + + @Test + fun `write symbol`() { + assertWriterOutputEquals( + "\$0 \$1 \$12345 foo 'null' 'null.int' 'bat\\'leth' '$99' 'true' 'false' 'nan' \$ion_1_1 '+' '==' '.'" + ) { + writeSymbol(0) + writeSymbol(1) + writeSymbol(12345) + writeSymbol("foo") + writeSymbol("null") + writeSymbol("null.int") + writeSymbol("bat'leth") + writeSymbol("$99") + writeSymbol("true") + writeSymbol("false") + writeSymbol("nan") + writeSymbol("\$ion_1_1") + writeSymbol("+") + writeSymbol("==") + writeSymbol(".") + } + } + + @Test + fun `write symbols in a sexp`() { + assertWriterOutputEquals( + "(\$0 \$1 \$12345 foo 'null' 'null.int' 'bat\\'leth' '$99' 'true' 'false' 'nan' \$ion_1_1 + == .)" + ) { + writeSexp { + writeSymbol(0) + writeSymbol(1) + writeSymbol(12345) + writeSymbol("foo") + writeSymbol("null") + writeSymbol("null.int") + writeSymbol("bat'leth") + writeSymbol("$99") + writeSymbol("true") + writeSymbol("false") + writeSymbol("nan") + writeSymbol("\$ion_1_1") + writeSymbol("+") + writeSymbol("==") + writeSymbol(".") + } + } + } + + @Test + fun `write string`() { + assertWriterOutputEquals("\"foo\"") { + writeString("foo") + } + } + + @Test + fun `write blob`() { + assertWriterOutputEquals("{{AQID}}") { + writeBlob(byteArrayOf(1, 2, 3), 0, 3) + } + } + + @Test + fun `write clob`() { + assertWriterOutputEquals("{{\"abc\"}}") { + writeClob(byteArrayOf(0x61, 0x62, 0x63), 0, 3) + } + } + + @Test + fun `write E-expression by name`() { + assertWriterOutputEquals("(:foo)") { + stepInEExp("foo") + stepOut() + } + assertWriterOutputEquals("(:'1A')") { + stepInEExp("1A") + stepOut() + } + } + + @ParameterizedTest + @EnumSource(SystemMacro::class) + fun `write system macro E-expression by name`(systemMacro: SystemMacro) { + assertWriterOutputEquals("(:\$ion::${systemMacro.macroName})") { + stepInEExp(systemMacro) + stepOut() + } + } + + @Test + fun `write E-expression by id`() { + assertWriterOutputEquals("(:1)") { + stepInEExp(1) + stepOut() + } + } + + @Test + fun `write E-Expression with one arg`() { + assertWriterOutputEquals("(:foo true)") { + stepInEExp("foo") + writeBool(true) + stepOut() + } + } + + @Test + fun `write an expression group`() { + assertWriterOutputEquals("(:foo (:: true true) (:: false false))") { + writeEExp("foo") { + writeExpressionGroup { + writeBool(true) + writeBool(true) + } + // Can't use writeExpressionGroup for this because it sets usingLengthPrefix = false + stepInExpressionGroup(usingLengthPrefix = true) + writeBool(false) + writeBool(false) + stepOut() + } + } + } + + @Test + fun `write an empty expression group`() { + assertWriterOutputEquals("(:foo (::))") { + writeEExp("foo") { + stepInExpressionGroup(false) + stepOut() + } + } + } + + @Test + fun `calling stepInExpressionGroup with an annotation should throw IonException`() { + ionWriter { + stepInEExp(1) + writeAnnotations("foo") + assertThrows { stepInExpressionGroup(false) } + } + } + + @Test + fun `calling stepInExpressionGroup while not directly in a Macro container should throw IonException`() { + ionWriter { + assertThrows { stepInExpressionGroup(true) } + } + ionWriter { + writeList { + assertThrows { stepInExpressionGroup(true) } + } + } + ionWriter { + writeSexp { + assertThrows { stepInExpressionGroup(true) } + } + } + ionWriter { + writeStruct { + assertThrows { stepInExpressionGroup(true) } + } + } + ionWriter { + writeEExp(123) { + writeExpressionGroup { + assertThrows { stepInExpressionGroup(true) } + } + } + } + } + + /** + * Writes this Ion, taken from https://amazon-ion.github.io/ion-docs/ + * ``` + * { + * name: "Fido", + * age: years::4, + * birthday: 2012-03-01T, + * toys: [ball, rope], + * weight: pounds::41.2, + * buzz: {{VG8gaW5maW5pdHkuLi4gYW5kIGJleW9uZCE=}}, + * } + * ``` + */ + @Test + fun `write something complex with symtab`() { + assertWriterOutputEquals( + """${'$'}ion_1_1 $3::{$7:["name","age","years","birthday","toys","ball","weight","buzz"]} {$10:"Fido",$11:$12::4,$13:2012-03-01,$14:[$15,rope],$16:pounds::41.2,$17:{{VG8gaW5maW5pdHkuLi4gYW5kIGJleW9uZCE=}}}""" + ) { + writeIVM() + writeAnnotations(3) + writeStruct { + writeFieldName(7) + writeList { + writeString("name") + writeString("age") + writeString("years") + writeString("birthday") + writeString("toys") + writeString("ball") + writeString("weight") + writeString("buzz") + } + } + writeStruct { + writeFieldName(10) + writeString("Fido") + writeFieldName(11) + writeAnnotations(12) + writeInt(4) + writeFieldName(13) + writeTimestamp(Timestamp.valueOf("2012-03-01T")) + writeFieldName(14) + writeList { + writeSymbol(15) + writeSymbol("rope") + } + writeFieldName(16) + writeAnnotations("pounds") + writeDecimal(BigDecimal.valueOf(41.2)) + writeFieldName(17) + writeBlob( + byteArrayOf( + 84, 111, 32, 105, 110, 102, 105, 110, 105, + 116, 121, 46, 46, 46, 32, 97, 110, 100, + 32, 98, 101, 121, 111, 110, 100, 33 + ) + ) + } + } + } + + @Test + fun `write something complex`() { + assertWriterOutputEquals( + """${'$'}ion_1_1 {name:"Fido",age:years::4,birthday:2012-03-01,toys:[ball,rope],weight:pounds::41.2,buzz:{{VG8gaW5maW5pdHkuLi4gYW5kIGJleW9uZCE=}}}""" + ) { + writeIVM() + writeStruct { + writeFieldName("name") + writeString("Fido") + writeFieldName("age") + writeAnnotations("years") + writeInt(4) + writeFieldName("birthday") + writeTimestamp(Timestamp.valueOf("2012-03-01T")) + writeFieldName("toys") + writeList { + writeSymbol("ball") + writeSymbol("rope") + } + writeFieldName("weight") + writeAnnotations("pounds") + writeDecimal(BigDecimal.valueOf(41.2)) + writeFieldName("buzz") + writeBlob( + byteArrayOf( + 84, 111, 32, 105, 110, 102, 105, 110, 105, + 116, 121, 46, 46, 46, 32, 97, 110, 100, + 32, 98, 101, 121, 111, 110, 100, 33 + ) + ) + } + } + } + + @Test + fun `write something complex and pretty`() { + val expected = """ + ${'$'}ion_1_1 + { + name: "Fido", + age: years::4, + birthday: 2012-03-01, + toys: [ + ball, + rope + ], + weight: pounds::41.2, + buzz: {{ VG8gaW5maW5pdHkuLi4gYW5kIGJleW9uZCE= }} + } + """.trimIndent() + assertWriterOutputEquals( + text = expected, + builderConfigurator = { withPrettyPrinting() } + ) { + writeIVM() + writeStruct { + writeFieldName("name") + writeString("Fido") + writeFieldName("age") + writeAnnotations("years") + writeInt(4) + writeFieldName("birthday") + writeTimestamp(Timestamp.valueOf("2012-03-01T")) + writeFieldName("toys") + writeList { + writeSymbol("ball") + writeSymbol("rope") + } + writeFieldName("weight") + writeAnnotations("pounds") + writeDecimal(BigDecimal.valueOf(41.2)) + writeFieldName("buzz") + writeBlob( + byteArrayOf( + 84, 111, 32, 105, 110, 102, 105, 110, 105, + 116, 121, 46, 46, 46, 32, 97, 110, 100, + 32, 98, 101, 121, 111, 110, 100, 33 + ) + ) + } + } + } + + @Test + fun `write something complex and compact`() { + val expected = """ + ${'$'}ion_1_1 + {name:"Fido",age:years::4,birthday:2012-03-01,toys:[ball,rope],weight:pounds::41.2,buzz:{{VG8gaW5maW5pdHkuLi4gYW5kIGJleW9uZCE=}}} + {name:"Rufus",age:years::5,birthday:2012-03-02,toys:[textbook],weight:pounds::98.5} + """.trimIndent() + assertWriterOutputEquals( + text = expected, + builderConfigurator = { withWriteTopLevelValuesOnNewLines(true) } + ) { + writeIVM() + writeStruct { + writeFieldName("name") + writeString("Fido") + writeFieldName("age") + writeAnnotations("years") + writeInt(4) + writeFieldName("birthday") + writeTimestamp(Timestamp.valueOf("2012-03-01T")) + writeFieldName("toys") + writeList { + writeSymbol("ball") + writeSymbol("rope") + } + writeFieldName("weight") + writeAnnotations("pounds") + writeDecimal(BigDecimal.valueOf(41.2)) + writeFieldName("buzz") + writeBlob( + byteArrayOf( + 84, 111, 32, 105, 110, 102, 105, 110, 105, + 116, 121, 46, 46, 46, 32, 97, 110, 100, + 32, 98, 101, 121, 111, 110, 100, 33 + ) + ) + } + writeStruct { + writeFieldName("name") + writeString("Rufus") + writeFieldName("age") + writeAnnotations("years") + writeInt(5) + writeFieldName("birthday") + writeTimestamp(Timestamp.valueOf("2012-03-02T")) + writeFieldName("toys") + writeList { + writeSymbol("textbook") + } + writeFieldName("weight") + writeAnnotations("pounds") + writeDecimal(BigDecimal.valueOf(98.5)) + } + } + } + + @Test + fun `write something pretty with a macro`() { + val expected = """ + ${'$'}ion_1_1 + { + name: (:make_string + "F" + "ido" + ) + } + """.trimIndent() + assertWriterOutputEquals( + text = expected, + builderConfigurator = { withPrettyPrinting() } + ) { + writeIVM() + writeStruct { + writeFieldName("name") + stepInEExp("make_string") + writeString("F") + writeString("ido") + stepOut() + } + } + } + + @Test + fun `when pretty printing, empty containers should be on one line`() { + val expected = """ + ${'$'}ion_1_1 + { + a: {} + } + [ + [] + ] + ( + () + ) + (:foo + (:foo) + ) + (:1 + (:1) + ) + (:1 + (::) + ) + """.trimIndent() + assertWriterOutputEquals( + text = expected, + builderConfigurator = { withPrettyPrinting() } + ) { + writeIVM() + writeStruct { + writeFieldName("a") + stepInStruct(false); stepOut() + } + writeList { writeList { } } + writeSexp { writeSexp { } } + writeEExp("foo") { writeEExp("foo") { } } + writeEExp(1) { writeEExp(1) { } } + writeEExp(1) { writeExpressionGroup { } } + } + } + + /* + * Helper functions that steps into a container, applies the contents of [block] to + * the writer, and then steps out of that container. + * Using these functions makes it easy for the indentation of the writer code to + * match the indentation of the equivalent pretty-printed Ion. + */ + + private inline fun IonRawWriter_1_1.writeStruct(block: IonRawWriter_1_1.() -> Unit) { + stepInStruct(true) + block() + stepOut() + } + + private inline fun IonRawWriter_1_1.writeList(block: IonRawWriter_1_1.() -> Unit) { + stepInList(true) + block() + stepOut() + } + + private inline fun IonRawWriter_1_1.writeSexp(block: IonRawWriter_1_1.() -> Unit) { + stepInSExp(true) + block() + stepOut() + } + + private inline fun IonRawWriter_1_1.writeEExp(name: String, block: IonRawWriter_1_1.() -> Unit) { + stepInEExp(name) + block() + stepOut() + } + + private inline fun IonRawTextWriter_1_1.writeEExp(id: Int, block: IonRawWriter_1_1.() -> Unit) { + stepInEExp(id) + block() + stepOut() + } + + private inline fun IonRawWriter_1_1.writeExpressionGroup(block: IonRawWriter_1_1.() -> Unit) { + stepInExpressionGroup(true) + block() + stepOut() + } +} diff --git a/src/test/java/com/amazon/ion/impl/IonReaderBinaryLargeStreamTest.java b/src/test/java/com/amazon/ion/impl/IonReaderBinaryLargeStreamTest.java index 50123ac277..0feeb76db2 100644 --- a/src/test/java/com/amazon/ion/impl/IonReaderBinaryLargeStreamTest.java +++ b/src/test/java/com/amazon/ion/impl/IonReaderBinaryLargeStreamTest.java @@ -1,6 +1,5 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 - package com.amazon.ion.impl; import com.amazon.ion.BufferConfiguration; @@ -13,9 +12,8 @@ import com.amazon.ion.system.IonBinaryWriterBuilder; import com.amazon.ion.system.IonReaderBuilder; import com.amazon.ion.util.RepeatInputStream; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -23,12 +21,18 @@ import java.io.InputStream; import java.io.SequenceInputStream; import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; import static com.amazon.ion.impl._Private_IonConstants.BINARY_VERSION_MARKER_1_0; -import static junit.framework.TestCase.assertNull; -import static org.junit.Assert.assertEquals; - -// NOTE: these tests each take several seconds to complete. +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; + +@EnabledIfEnvironmentVariable( + // GitHub actions and other CI systems will set CI=true + named = "CI", matches = "true", + disabledReason = "It regularly takes about 1 minute to run all the tests in this class." +) public class IonReaderBinaryLargeStreamTest { private byte[] testData(Timestamp timestamp) throws IOException { @@ -295,11 +299,8 @@ public void skipLargeAnnotatedContainer() throws Exception { // when they *do* fail due to limitations of the current implementation, they fail by throwing an IonException // and not something unexpected and ugly. - @Rule - public ExpectedException thrown = ExpectedException.none(); - private void cleanlyFailsOnLargeScalar(IonReaderBuilder readerBuilder) throws Exception { - byte[] data = "foobarbaz".getBytes("UTF-8"); + byte[] data = "foobarbaz".getBytes(StandardCharsets.UTF_8); final int totalNumberOfBatches = (Integer.MAX_VALUE / data.length) + 123; // 123 makes the value exceed Integer.MAX_VALUE by an arbitrary amount. ByteArrayOutputStream header = new ByteArrayOutputStream(); header.write(BINARY_VERSION_MARKER_1_0); @@ -313,12 +314,10 @@ private void cleanlyFailsOnLargeScalar(IonReaderBuilder readerBuilder) throws Ex // If support for large scalars is added, the following will be deleted and the rest of the test // completed to assert the correctness of the value. if (readerBuilder.isIncrementalReadingEnabled()) { - thrown.expect(IonException.class); - reader.next(); + assertThrows(IonException.class, reader::next); } else { assertEquals(IonType.STRING, reader.next()); - thrown.expect(IonException.class); - reader.stringValue(); + assertThrows(IonException.class, reader::stringValue); } } @@ -333,7 +332,7 @@ public void cleanlyFailsOnLargeScalarIncremental() throws Exception { } private void cleanlyFailsOnLargeAnnotatedScalar(IonReaderBuilder readerBuilder) throws Exception { - byte[] data = "foobarbaz".getBytes("UTF-8"); + byte[] data = "foobarbaz".getBytes(StandardCharsets.UTF_8); final int totalNumberOfBatches = (Integer.MAX_VALUE / data.length) + 9999; // 9999 makes the value exceed Integer.MAX_VALUE by an arbitrary amount. final long stringLength = (long) totalNumberOfBatches * data.length; ByteArrayOutputStream header = new ByteArrayOutputStream(); @@ -352,12 +351,10 @@ private void cleanlyFailsOnLargeAnnotatedScalar(IonReaderBuilder readerBuilder) // If support for large scalars is added, the following will be deleted and the rest of the test // completed to assert the correctness of the value. if (readerBuilder.isIncrementalReadingEnabled()) { - thrown.expect(IonException.class); - reader.next(); + assertThrows(IonException.class, reader::next); } else { assertEquals(IonType.STRING, reader.next()); - thrown.expect(IonException.class); - reader.stringValue(); + assertThrows(IonException.class, reader::stringValue); } } @@ -388,8 +385,7 @@ public void cleanlyFailsOnLargeContainerIncremental() throws Exception { ); IonReader reader = newReaderBuilderThatThrowsOnOversizedValues(true).build(inputStream); - thrown.expect(IonException.class); - reader.next(); + assertThrows(IonException.class, reader::next); } } diff --git a/src/test/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinaryTest.java index efaf7a1586..7343768cf5 100644 --- a/src/test/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinaryTest.java @@ -1,6 +1,5 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 - package com.amazon.ion.impl; import com.amazon.ion.system.IonReaderBuilder; @@ -20,7 +19,6 @@ import static com.amazon.ion.impl.IonCursorTestUtilities.endStream; import static com.amazon.ion.impl.IonCursorTestUtilities.fillIntValue; import static com.amazon.ion.impl.IonCursorTestUtilities.scalar; -import static com.amazon.ion.impl.IonCursorTestUtilities.scalar; import static com.amazon.ion.impl.IonCursorTestUtilities.fillSymbolValue; public class IonReaderContinuableApplicationBinaryTest { @@ -102,6 +100,22 @@ public void basicSystemSymbols(boolean constructFromBytes) { ); } + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void basicSystemSymbols_1_1(boolean constructFromBytes) { + IonReaderContinuableApplicationBinary reader = initializeReader( + constructFromBytes, + 0xE0, 0x01, 0x01, 0xEA, + 0xEE, 0x04, // Symbol value SID 4 ("name") + 0xEE, 0x05 // Symbol value SID 5 ("version") + ); + assertSequence( + reader, + scalar(), fillSymbolValue("name"), + scalar(), fillSymbolValue("version") + ); + } + @ParameterizedTest(name = "constructFromBytes={0}") @ValueSource(booleans = {true, false}) public void basicLocalSymbols(boolean constructFromBytes) { @@ -126,6 +140,22 @@ public void basicLocalSymbols(boolean constructFromBytes) { ); } + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void basicInlineSymbols(boolean constructFromBytes) { + IonReaderContinuableApplicationBinary reader = initializeReader( + constructFromBytes, + 0xE0, 0x01, 0x01, 0xEA, + 0xA0, // Empty inline symbol + 0xA3, 0x61, 0x62, 0x63 // Inline symbol 'abc' + ); + assertSequence( + reader, + scalar(), fillSymbolValue(""), + scalar(), fillSymbolValue("abc") + ); + } + @ParameterizedTest(name = "constructFromBytes={0}") @ValueSource(booleans = {true, false}) public void basicNoFill(boolean constructFromBytes) { @@ -164,4 +194,33 @@ public void basicStepOutEarly(boolean constructFromBytes) { endStream() ); } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void systemSymbolsEncodedUsingUserIdsAndInlineText_1_1(boolean constructFromBytes) { + IonReaderContinuableApplicationBinary reader = initializeReader( + constructFromBytes, + 0xE0, 0x01, 0x01, 0xEA, + 0xE7, 0xF9, '$', 'i', 'o', 'n', // $ion:: + 0xCD, // s-expression, length 13 + 0xEE, 0x10, // 'module' (encoded as system symbol ID 16) + 0xA1, '_', // Inline symbol '_' + 0xC8, // s-expression, length 8 + 0xEE, 0x0F, // 'symbol_table' (encoded as system symbol ID 15) + 0xB5, 0x94, '$', 'i', 'o', 'n', // ["$ion"] + 0xE4, 0x03, // $1::, where $1 is a local SID that points to the text "$ion" + 0xCA, // s-expression, length 10 + 0xEE, 0x10, // 'module' (encoded as system symbol ID 16) + 0xA1, '_', // Inline symbol '_' + 0xC5, // s-expression, length 5 + 0xEE, 0x0F, // 'symbol_table' (encoded as system symbol ID 15) + 0xB2, 0x91, 'a', // ["a"] + 0xE1, 0x01 // $1, which now points to "a" + ); + assertSequence( + reader, + scalar(), fillSymbolValue("a"), + endStream() + ); + } } diff --git a/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java index 4b5c92fa2e..a44feeb3ae 100644 --- a/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonReaderContinuableCoreBinaryTest.java @@ -2,45 +2,75 @@ // SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl; +import com.amazon.ion.IntegerSize; import com.amazon.ion.IonCursor; +import com.amazon.ion.IonDatagram; +import com.amazon.ion.IonEncodingVersion; import com.amazon.ion.IonException; +import com.amazon.ion.IonReader; +import com.amazon.ion.IonSystem; import com.amazon.ion.IonType; +import com.amazon.ion.MacroAwareIonWriter; +import com.amazon.ion.TestUtils; +import com.amazon.ion.system.IonSystemBuilder; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.EnumSource; import org.junit.jupiter.params.provider.ValueSource; import java.io.ByteArrayInputStream; +import java.math.BigInteger; import static com.amazon.ion.BitUtils.bytes; import static com.amazon.ion.IonCursor.Event.START_SCALAR; +import static com.amazon.ion.IonCursor.Event.VALUE_READY; +import static com.amazon.ion.TestUtils.withIvm; +import static com.amazon.ion.impl.IonCursorTestUtilities.annotations; +import static com.amazon.ion.impl.IonCursorTestUtilities.fieldName; +import static com.amazon.ion.impl.TaglessEncoding.FLEX_INT; +import static com.amazon.ion.impl.TaglessEncoding.FLEX_UINT; +import static com.amazon.ion.impl.TaglessEncoding.INT16; +import static com.amazon.ion.impl.TaglessEncoding.INT32; +import static com.amazon.ion.impl.TaglessEncoding.INT64; +import static com.amazon.ion.impl.TaglessEncoding.UINT32; +import static com.amazon.ion.impl.TaglessEncoding.UINT64; +import static com.amazon.ion.impl.TaglessEncoding.UINT8; +import static com.amazon.ion.impl.IonCursorBinaryTest.nextMacroInvocation; import static com.amazon.ion.impl.IonCursorTestUtilities.STANDARD_BUFFER_CONFIGURATION; import static com.amazon.ion.impl.IonCursorTestUtilities.Expectation; import static com.amazon.ion.impl.IonCursorTestUtilities.ExpectationProvider; import static com.amazon.ion.impl.IonCursorTestUtilities.assertSequence; import static com.amazon.ion.impl.IonCursorTestUtilities.container; -import static com.amazon.ion.impl.IonCursorTestUtilities.container; +import static com.amazon.ion.impl.IonCursorTestUtilities.doubleValue; import static com.amazon.ion.impl.IonCursorTestUtilities.endContainer; import static com.amazon.ion.impl.IonCursorTestUtilities.endStream; import static com.amazon.ion.impl.IonCursorTestUtilities.fillContainer; import static com.amazon.ion.impl.IonCursorTestUtilities.fillIntValue; -import static com.amazon.ion.impl.IonCursorTestUtilities.scalar; +import static com.amazon.ion.impl.IonCursorTestUtilities.fillStringValue; +import static com.amazon.ion.impl.IonCursorTestUtilities.fillSymbolValue; import static com.amazon.ion.impl.IonCursorTestUtilities.scalar; import static com.amazon.ion.impl.IonCursorTestUtilities.startContainer; -import static com.amazon.ion.impl.IonCursorTestUtilities.fillStringValue; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; public class IonReaderContinuableCoreBinaryTest { private IonReaderContinuableCoreBinary initializeReader(boolean constructFromBytes, int... data) { + return initializeReader(constructFromBytes, bytes(data)); + } + + private IonReaderContinuableCoreBinary initializeReader(boolean constructFromBytes, byte[] data) { IonReaderContinuableCoreBinary reader; if (constructFromBytes) { - reader = new IonReaderContinuableCoreBinary(STANDARD_BUFFER_CONFIGURATION, bytes(data), 0, data.length); + reader = new IonReaderContinuableCoreBinary(STANDARD_BUFFER_CONFIGURATION, data, 0, data.length); } else { reader = new IonReaderContinuableCoreBinary( STANDARD_BUFFER_CONFIGURATION, - new ByteArrayInputStream(bytes(data)), + new ByteArrayInputStream(data), null, 0, 0 @@ -119,6 +149,104 @@ public void basicStrings(boolean constructFromBytes) { ); } + @ParameterizedTest(name = "constructFromBytes={0}") + @CsvSource({ + "0, E1 00 60", + "1, E1 01 60", + "255, E1 FF 60", + "256, E2 00 00 60", + "257, E2 01 00 60", + "512, E2 00 01 60", + "513, E2 01 01 60", + "65535, E2 FF FE 60", + "65791, E2 FF FF 60", + "65792, E3 01 60", + "65793, E3 03 60", + "65919, E3 FF 60", + "65920, E3 02 02 60", + "2147483647 , E3 F0 DF DF FF 0F 60", + }) + public void sidSymbols_1_1(int sid, String bytes) { + sidSymbols_1_1_helper(sid, bytes, true); + sidSymbols_1_1_helper(sid, bytes, false); + } + void sidSymbols_1_1_helper(int sid, String bytes, boolean constructFromBytes) { + IonReaderContinuableCoreBinary reader = initializeReader( + constructFromBytes, + TestUtils.hexStringToByteArray("E0 01 01 EA " + bytes) + ); + assertSequence( + reader, + scalar(), fillSymbolValue(sid), + scalar(), fillIntValue(0), + endStream() + ); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @EnumSource(SystemSymbols_1_1.class) + public void systemSymbols_1_1(SystemSymbols_1_1 systemSymbol) { + systemSymbols_1_1_helper(systemSymbol, true); + systemSymbols_1_1_helper(systemSymbol, false); + } + void systemSymbols_1_1_helper(SystemSymbols_1_1 systemSymbol, boolean constructFromBytes) { + String systemSidBytes = Integer.toHexString(systemSymbol.getId()); + IonReaderContinuableCoreBinary reader = initializeReader( + constructFromBytes, + TestUtils.hexStringToByteArray("E0 01 01 EA EE " + systemSidBytes + " 60") + ); + assertSequence( + reader, + scalar(), + symbolValue(systemSymbol.getText()), + scalar(), + fillIntValue(0), + endStream() + ); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @EnumSource(SystemSymbols_1_1.class) + public void systemSymbols_1_1_fieldNames(SystemSymbols_1_1 systemSymbol) { + systemSymbols_1_1_fieldNamesHelper(systemSymbol, true); + systemSymbols_1_1_fieldNamesHelper(systemSymbol, false); + } + void systemSymbols_1_1_fieldNamesHelper(SystemSymbols_1_1 systemSymbol, boolean constructFromBytes) { + String systemSidBytes = Integer.toHexString(0x60 + systemSymbol.getId()); + IonReaderContinuableCoreBinary reader = initializeReader( + constructFromBytes, + TestUtils.hexStringToByteArray("E0 01 01 EA F3 01 " + systemSidBytes + " 60 01 F0") + ); + assertSequence( + reader, + fillContainer(IonType.STRUCT, + scalar(), + fieldName(systemSymbol.getText()), + fillIntValue(0) + ), + endStream() + ); + } + + @ParameterizedTest(name = "symbol={0}") + @EnumSource(SystemSymbols_1_1.class) + public void systemSymbols_1_1_annotations(SystemSymbols_1_1 systemSymbol) { + systemSymbols_1_1_annotationsHelper(systemSymbol, true); + systemSymbols_1_1_annotationsHelper(systemSymbol, false); + } + void systemSymbols_1_1_annotationsHelper(SystemSymbols_1_1 systemSymbol, boolean constructFromBytes) { + String systemSidBytes = Integer.toHexString(0x60 + systemSymbol.getId()); + IonReaderContinuableCoreBinary reader = initializeReader( + constructFromBytes, + TestUtils.hexStringToByteArray("E0 01 01 EA E7 01 " + systemSidBytes + " 60") + ); + assertSequence( + reader, + annotations(systemSymbol.getText()), + fillIntValue(0), + endStream() + ); + } @ParameterizedTest(name = "constructFromBytes={0}") @ValueSource(booleans = {true, false}) @@ -664,4 +792,420 @@ public void timestampLengthZeroFailsCleanly(boolean constructFromBytes) { assertThrows(IonException.class, reader::timestampValue); } } + + /** + * Provides Expectations that advance the reader to the next tagless value, fill the value, and verify that it has + * the given attributes. + */ + private static ExpectationProvider fillNextTaglessValue(TaglessEncoding taglessEncoding, IonType expectedType) { + return consumer -> consumer.accept(new Expectation<>( + String.format("fill tagless %s", taglessEncoding.name()), + reader -> { + assertEquals(START_SCALAR, reader.nextTaglessValue(taglessEncoding)); + assertEquals(VALUE_READY, reader.fillValue()); + assertEquals(expectedType, reader.getType()); + } + )); + } + + /** + * Provides Expectations that advance the reader to the next tagless value, fill the value, and verify that it is + * an integer that fits in a Java int with the expected value. + */ + private static ExpectationProvider nextTaglessIntValue(TaglessEncoding taglessEncoding, int expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("fill tagless int from %s", taglessEncoding.name()), + reader -> { + assertEquals(START_SCALAR, reader.nextTaglessValue(taglessEncoding)); + assertEquals(VALUE_READY, reader.fillValue()); + assertEquals(IonType.INT, reader.getType()); + assertEquals(IntegerSize.INT, reader.getIntegerSize()); + assertEquals(expectedValue, reader.intValue()); + } + )); + } + + /** + * Provides Expectations that advance the reader to the next tagless value, fill the value, and verify that it is + * an integer that fits in a Java long with the expected value. + */ + private static ExpectationProvider nextTaglessLongValue(TaglessEncoding taglessEncoding, long expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("fill tagless long from %s", taglessEncoding.name()), + reader -> { + assertEquals(START_SCALAR, reader.nextTaglessValue(taglessEncoding)); + assertEquals(VALUE_READY, reader.fillValue()); + assertEquals(IonType.INT, reader.getType()); + assertEquals(IntegerSize.LONG, reader.getIntegerSize()); + assertEquals(expectedValue, reader.longValue()); + } + )); + } + + /** + * Provides Expectations that advance the reader to the next tagless value, fill the value, and verify that it is + * an integer that fits in a BigInteger with the expected value. + */ + private static ExpectationProvider nextTaglessBigIntegerValue(TaglessEncoding taglessEncoding, BigInteger expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("fill tagless BigInteger from %s", taglessEncoding.name()), + reader -> { + assertEquals(START_SCALAR, reader.nextTaglessValue(taglessEncoding)); + assertEquals(VALUE_READY, reader.fillValue()); + assertEquals(IonType.INT, reader.getType()); + assertEquals(IntegerSize.BIG_INTEGER, reader.getIntegerSize()); + assertEquals(expectedValue, reader.bigIntegerValue()); + } + )); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void taglessInts(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0x00, // User macro ID 0 + 0xFF, // Interpreted as uint8 + 0xFF, 0xFF, // Interpreted as int16 + 0xFF, 0xFF, 0xFF, 0xFF, // Interpreted as uint32 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // Interpreted as int64 + 0xFC, 0xFF, 0xFF, // Interpreted as flex_uint + 0xFC, 0xFF, 0xFF // Interpreted as flex_int + )); + try (IonReaderContinuableCoreBinary reader = initializeReader(constructFromBytes, data)) { + assertSequence( + reader, + nextMacroInvocation(0), + nextTaglessIntValue(UINT8, 0xFF), + nextTaglessIntValue(INT16, -1), + nextTaglessLongValue(UINT32, 0xFFFFFFFFL), + nextTaglessLongValue(INT64, -1), + nextTaglessIntValue(FLEX_UINT, 0x1FFFFF), + nextTaglessIntValue(FLEX_INT, -1), + endStream() + ); + } + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void taglessFixedIntBoundaries(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0x00, // User macro ID 0 + 0xFF, 0xFF, 0xFF, 0x7F, // Interpreted as uint32 -- this is Integer.MAX_VALUE + 0xFF, 0xFF, 0xFF, 0x7F, // Interpreted as int32 -- this is Integer.MAX_VALUE + 0x00, 0x00, 0x00, 0x80, // Interpreted as uint32 -- this won't fit in a Java int, which is signed + 0x00, 0x00, 0x00, 0x80, // Interpreted as int32 -- this is Integer.MIN_VALUE + 0xFF, 0xFF, 0xFF, 0xFF, // Interpreted as uint32 -- this won't fit in a Java int + 0xFF, 0xFF, 0xFF, 0xFF // Interpreted as int32 -- this is -1 + )); + try (IonReaderContinuableCoreBinary reader = initializeReader(constructFromBytes, data)) { + assertSequence( + reader, + nextMacroInvocation(0), + nextTaglessIntValue(UINT32, Integer.MAX_VALUE), + nextTaglessIntValue(INT32, Integer.MAX_VALUE), + nextTaglessLongValue(UINT32, 0x80000000L), + nextTaglessIntValue(INT32, Integer.MIN_VALUE), + nextTaglessLongValue(UINT32, 0xFFFFFFFFL), + nextTaglessIntValue(INT32, -1), + endStream() + ); + } + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void taglessFixedLongBoundaries(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0x00, // User macro ID 0 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, // Interpreted as uint64 -- this is Long.MAX_VALUE + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, // Interpreted as int64 -- this is Long.MAX_VALUE + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, // Interpreted as uint64 -- this won't fit in a Java long + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, // Interpreted as int64 -- this is Long.MIN_VALUE + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // Interpreted as uint64 -- this won't fit in a Java long + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF // Interpreted as int64 -- this is -1 + )); + try (IonReaderContinuableCoreBinary reader = initializeReader(constructFromBytes, data)) { + assertSequence( + reader, + nextMacroInvocation(0), + nextTaglessLongValue(UINT64, Long.MAX_VALUE), + nextTaglessLongValue(INT64, Long.MAX_VALUE), + nextTaglessBigIntegerValue(UINT64, BigInteger.valueOf(Long.MAX_VALUE).add(BigInteger.ONE)), + nextTaglessLongValue(INT64, Long.MIN_VALUE), + nextTaglessBigIntegerValue(UINT64, BigInteger.valueOf(Long.MAX_VALUE).shiftLeft(1).add(BigInteger.ONE)), + nextTaglessLongValue(INT64, -1), + endStream() + ); + } + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void taglessFlexIntBoundaries(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0x00, // User macro ID 0 + 0xF0, 0xFF, 0xFF, 0xFF, 0x0F, // 31 set bits. As flex_uint this is Integer.MAX_VALUE + 0xF0, 0xFF, 0xFF, 0xFF, 0x0F, // 31 set bits. As flex_int this is Integer.MAX_VALUE + 0x10, 0x00, 0x00, 0x00, 0x10, // Bit 31 set. As a flex_uint this is Integer.MAX_VALUE + 1 + 0x10, 0x00, 0x00, 0x00, 0x10, // Bit 31 set (sign not extended). As flex_int this is Integer.MAX_VALUE + 1 + 0xF0, 0xFF, 0xFF, 0xFF, 0x1F, // 32 set bits. As flex_uint this is (Integer.MAX_VALUE << 1) + 1 + 0xF0, 0xFF, 0xFF, 0xFF, 0x1F, // 32 set bits. As flex_int this is (Integer.MAX_VALUE << 1) + 1 + 0x10, 0x00, 0x00, 0x00, 0xF0, // Bits 31+ set. As flex_uint this won't fit in an int + 0x10, 0x00, 0x00, 0x00, 0xF0, // Bits 31+ set (sign extended). As flex_int this is Integer.MIN_VALUE + 0xF0, 0xFF, 0xFF, 0xFF, 0xEF, // All bits except bit 31 set. As flex_uint this won't fit in an int + 0xF0, 0xFF, 0xFF, 0xFF, 0xEF, // All bits except bit 31 set (sign extended). As flex_int this is Integer.MIN_VALUE - 1 + 0xF0, 0xFF, 0xFF, 0xFF, 0xFF, // All bits set. As flex_uint this won't fit in a Java int + 0xF0, 0xFF, 0xFF, 0xFF, 0xFF // All bits set. As flex_int this is -1 + )); + try (IonReaderContinuableCoreBinary reader = initializeReader(constructFromBytes, data)) { + assertSequence( + reader, + nextMacroInvocation(0), + nextTaglessIntValue(FLEX_UINT, Integer.MAX_VALUE), + nextTaglessIntValue(FLEX_INT, Integer.MAX_VALUE), + nextTaglessLongValue(FLEX_UINT, Integer.MAX_VALUE + 1L), + nextTaglessLongValue(FLEX_INT, Integer.MAX_VALUE + 1L), + nextTaglessLongValue(FLEX_UINT, 0xFFFFFFFFL), + nextTaglessLongValue(FLEX_INT, 0xFFFFFFFFL), + nextTaglessLongValue(FLEX_UINT, 0x780000000L), // 0xF000... >> 5 == 0x780... + nextTaglessIntValue(FLEX_INT, Integer.MIN_VALUE), + nextTaglessLongValue(FLEX_UINT, 0x77FFFFFFFL), // 0xEFFF... >> 5 == 0x77F... + nextTaglessLongValue(FLEX_INT, Integer.MIN_VALUE - 1L), + nextTaglessLongValue(FLEX_UINT, 0x7FFFFFFFFL), // 0xFFFF... >> 5 == 0x7FF... + nextTaglessIntValue(FLEX_INT, -1), + endStream() + ); + } + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void taglessFlexLongBoundaries(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0x00, // User macro ID 0 + 0x00, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, // 63 set bits. As flex_uint this is Long.MAX_VALUE + 0x00, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, // 63 set bits. As flex_int this is Long.MAX_VALUE + 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, // Bit 63 set. As a flex_uint this is Long.MAX_VALUE + 1 + 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, // Bit 63 set (sign not extended). As flex_int this is Long.MAX_VALUE + 1 + 0x00, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, // 64 set bits. As flex_uint this is (Long.MAX_VALUE << 1) + 1 + 0x00, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x03, // 64 set bits. As flex_int this is (Long.MAX_VALUE << 1) + 1 + 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFE, // Bits 63+ set. As flex_uint this won't fit in a long + 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFE, // Bits 63+ set (sign extended). As flex_int this is Long.MIN_VALUE + 0x00, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFD, // All bits except bit 63 set. As flex_uint this won't fit in a long + 0x00, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFD, // All bits except bit 63 set (sign extended). As flex_int this is Long.MIN_VALUE - 1 + 0x00, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // All bits set. As flex_uint this won't fit in a Java long + 0x00, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF // All bits set. As flex_int this is -1 + )); + try (IonReaderContinuableCoreBinary reader = initializeReader(constructFromBytes, data)) { + assertSequence( + reader, + nextMacroInvocation(0), + nextTaglessLongValue(FLEX_UINT, Long.MAX_VALUE), + nextTaglessLongValue(FLEX_INT, Long.MAX_VALUE), + nextTaglessBigIntegerValue(FLEX_UINT, BigInteger.valueOf(Long.MAX_VALUE).add(BigInteger.ONE)), + nextTaglessBigIntegerValue(FLEX_INT, BigInteger.valueOf(Long.MAX_VALUE).add(BigInteger.ONE)), + nextTaglessBigIntegerValue(FLEX_UINT, BigInteger.valueOf(Long.MAX_VALUE).shiftLeft(1).add(BigInteger.ONE)), + nextTaglessBigIntegerValue(FLEX_INT, BigInteger.valueOf(Long.MAX_VALUE).shiftLeft(1).add(BigInteger.ONE)), + nextTaglessBigIntegerValue(FLEX_UINT, BigInteger.valueOf(0x3F80000000000000L).shiftLeft(8)), // 0xFE00... >>> 2 == 0x3F80... + nextTaglessLongValue(FLEX_INT, Long.MIN_VALUE), + nextTaglessBigIntegerValue(FLEX_UINT, BigInteger.valueOf(0x3F7FFFFFFFFFFFFFL).shiftLeft(8).or(BigInteger.valueOf(0xFF))), // 0xFDFF... >>> 2 == 0x3F7F... + nextTaglessBigIntegerValue(FLEX_INT, BigInteger.valueOf(Long.MIN_VALUE).subtract(BigInteger.ONE)), + nextTaglessBigIntegerValue(FLEX_UINT, BigInteger.valueOf(0x3FFFFFFFFFFFFFFFL).shiftLeft(8).or(BigInteger.valueOf(0xFF))), // 0xFF... >>> 2 == 0x3F... + nextTaglessLongValue(FLEX_INT, -1), + endStream() + ); + } + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void taglessFloats(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0x00, // User macro ID 0 + 0x00, 0x3C, // Interpreted as float16 (1.0) + 0x00, 0x00, 0x80, 0x3F, // Interpreted as float32 (1.0) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x3F // Interpreted as float64 (1.0) + )); + try (IonReaderContinuableCoreBinary reader = initializeReader(constructFromBytes, data)) { + assertSequence( + reader, + nextMacroInvocation(0), + fillNextTaglessValue(TaglessEncoding.FLOAT16, IonType.FLOAT), + doubleValue(1.0), + fillNextTaglessValue(TaglessEncoding.FLOAT32, IonType.FLOAT), + doubleValue(1.0), + fillNextTaglessValue(TaglessEncoding.FLOAT64, IonType.FLOAT), + doubleValue(1.0), + endStream() + ); + } + } + + static ExpectationProvider symbolValue(String expectedText) { + return consumer -> consumer.accept(new Expectation<>( + String.format("symbol(%s)", expectedText), + reader -> { + assertTrue(reader.hasSymbolText()); + assertEquals(expectedText, reader.getSymbolText()); + } + )); + } + + static ExpectationProvider symbolValue(int expectedSid) { + return consumer -> consumer.accept(new Expectation<>( + String.format("symbol(%d)", expectedSid), + reader -> { + assertFalse(reader.hasSymbolText()); + assertEquals(expectedSid, reader.symbolValueId()); + } + )); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void taglessCompactSymbols(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0x00, // User macro ID 0 + 0xF9, 0x6E, 0x61, 0x6D, 0x65, // interpreted as compact symbol (FlexSym with inline text "name") + 0x09, // interpreted as compact symbol (FlexSym with SID 4) + 0x01, 0x81 // interpreted as compact symbol (special FlexSym) + )); + try (IonReaderContinuableCoreBinary reader = initializeReader(constructFromBytes, data)) { + assertSequence( + reader, + nextMacroInvocation(0), + fillNextTaglessValue(TaglessEncoding.FLEX_SYM, IonType.SYMBOL), + symbolValue("name"), + fillNextTaglessValue(TaglessEncoding.FLEX_SYM, IonType.SYMBOL), + symbolValue(4), + fillNextTaglessValue(TaglessEncoding.FLEX_SYM, IonType.SYMBOL), + symbolValue(""), + endStream() + ); + } + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void addSymbolsSystemMacro(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0xEF, 0x14, // system macro add_symbols + 0x02, // AEB: 0b------aa; a=10, expression group + 0x01, // FlexInt 0, a delimited expression group + 0x93, 0x61, 0x62, 0x63, // 3-byte string, utf-8 "abc" + 0xF0, // delimited end... of expression group + 0xE1, // SID single byte + 0x42 // SID $66 + )); + try (IonReaderContinuableCoreBinary reader = initializeReader(constructFromBytes, data)) { + assertEquals(START_SCALAR, reader.nextValue()); + assertEquals(66, reader.symbolValueId()); + } + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void systemReaderWrapperReadsEncodingDirective(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0xE7, 0x01, 0x61, // One FlexSym annotation, with opcode, opcode 61 = system symbol 1 = $ion + 0xCA, // ( + 0xEE, 0x10, // module + 0xA1, '_', // _ + 0xC5, 0xEE, 0x0F, // S-exp, system symbol 0xF = symbol_table + 0xB2, 0x91, 'a', // ["a"] + 0xE1, 0x01 // $1 = a + )); + try (IonReader systemReader = new IonReaderNonContinuableSystem(initializeReader(constructFromBytes, data))) { + // Note: Ion 1.1 has a level below "system", which is invisible to IonReaderNonContinuableSystem. This is + // because IonReaderContinuableCore must interpret the encoding context so that it can evaluate macro + // invocations, which may expand to system values. Accordingly, IonReaderNonContinuableSystem does not + // surface Ion 1.1 encoding directives as it does Ion 1.0 symbol tables. Surfacing Ion 1.1 encoding + // directives will need to be done by a reader one level below IonReaderNonContinuableSystem. + assertEquals(IonType.SYMBOL, systemReader.next()); + assertEquals("$ion_1_1", systemReader.stringValue()); + assertEquals(IonType.SYMBOL, systemReader.next()); + assertEquals(1, systemReader.symbolValue().getSid()); + assertEquals("a", systemReader.symbolValue().assumeText()); + assertEquals("a", systemReader.stringValue()); + assertNull(systemReader.next()); + } + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void systemReaderWrapperReadsEncodingDirectiveWithAppend(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0xE7, 0x01, 0x61, // One FlexSym annotation, with opcode, opcode 61 = system symbol 1 = $ion + 0xCC, // ( + 0xEE, 0x10, // module + 0xA1, '_', // Inline symbol '_' + 0xC7, 0xEE, 0x0F, // S-exp, system symbol 0xF = symbol_table + 0xA1, '_', // Inline symbol '_' + 0xB2, 0x91, 'a', // ["a"] + 0xE1, SystemSymbols_1_1.size() + 1 // first local symbol = a + )); + try (IonReader systemReader = new IonReaderNonContinuableSystem(initializeReader(constructFromBytes, data))) { + assertEquals(IonType.SYMBOL, systemReader.next()); + assertEquals("$ion_1_1", systemReader.stringValue()); + assertEquals(IonType.SYMBOL, systemReader.next()); + assertEquals(SystemSymbols_1_1.size() + 1, systemReader.symbolValue().getSid()); + assertEquals("a", systemReader.symbolValue().assumeText()); + assertEquals("a", systemReader.stringValue()); + assertNull(systemReader.next()); + } + } + + /** + * Performs a macro-aware transcode of the given data, verifying that the resulting stream is data-model equivalent + * to the source data. + * @param data the source data. + * @param constructFromBytes true if the reader is to be backed by a byte array; otherwise, the reader will be + * be backed by an InputStream. + */ + private void assertMacroAwareTranscribeProducesEquivalentStream(byte[] data, boolean constructFromBytes) throws Exception { + StringBuilder sb = new StringBuilder(); + try ( + IonReaderContinuableCoreBinary reader = initializeReader(constructFromBytes, data); + MacroAwareIonWriter writer = (MacroAwareIonWriter) IonEncodingVersion.ION_1_1.textWriterBuilder().build(sb); + ) { + reader.transcodeAllTo(writer); + } + IonSystem system = IonSystemBuilder.standard().build(); + IonDatagram actual = system.getLoader().load(sb.toString()); + IonDatagram expected = system.getLoader().load(data); + assertEquals(expected, actual); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void encodingLevelTranscodeOfSystemMacroInvocation(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, bytes( + 0xEF, 0x14, // system macro add_symbols + 0x02, // AEB: 0b------aa; a=10, expression group + 0x01, // FlexInt 0, a delimited expression group + 0x93, 0x61, 0x62, 0x63, // 3-byte string, utf-8 "abc" + 0xF0, // delimited end... of expression group + 0xE1, // SID single byte + 0x40 // SID $64 + )); + assertMacroAwareTranscribeProducesEquivalentStream(data, constructFromBytes); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void encodingLevelTranscodeOfIon10SymbolTable(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(0, bytes( + 0xEA, 0x81, 0x83, // $ion_symbol_table + 0xD7, // { + 0x87, // symbols: + 0xB5, // [ + 0x84, 'a', 'b', 'c', 'd', // "abcd" -> $10 + // ]} + 0xC4, // ( + 0xE3, 0x81, 0x8A, // abcd:: + 0x20 // 0 + )); + assertMacroAwareTranscribeProducesEquivalentStream(data, constructFromBytes); + } } diff --git a/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java index 1080dc3962..a1585efba1 100644 --- a/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java @@ -20,6 +20,7 @@ import com.amazon.ion.SymbolToken; import com.amazon.ion.SystemSymbols; import com.amazon.ion.TestUtils; +import com.amazon.ion.Timestamp; import com.amazon.ion.UnknownSymbolException; import com.amazon.ion.impl.bin._Private_IonManagedBinaryWriterBuilder; import com.amazon.ion.impl.bin._Private_IonManagedWriter; @@ -29,8 +30,10 @@ import com.amazon.ion.system.IonSystemBuilder; import com.amazon.ion.system.SimpleCatalog; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.converter.ConvertWith; import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.ValueSource; @@ -40,6 +43,7 @@ import java.io.IOException; import java.io.InputStream; import java.math.BigDecimal; +import java.math.BigInteger; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; @@ -50,12 +54,17 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.function.Consumer; import java.util.function.Function; +import java.util.function.Supplier; import java.util.zip.GZIPInputStream; import static com.amazon.ion.BitUtils.bytes; -import static com.amazon.ion.TestUtils.gzippedBytes; +import static com.amazon.ion.TestUtils.*; import static com.amazon.ion.impl.IonCursorTestUtilities.Expectation; import static com.amazon.ion.impl.IonCursorTestUtilities.ExpectationProvider; +import static com.amazon.ion.impl.IonCursorTestUtilities.bigIntegerValue; +import static com.amazon.ion.impl.IonCursorTestUtilities.doubleValue; +import static com.amazon.ion.impl.IonCursorTestUtilities.intValue; +import static com.amazon.ion.impl.IonCursorTestUtilities.longValue; import static com.amazon.ion.impl.IonCursorTestUtilities.type; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -161,7 +170,7 @@ public void onData(int numberOfBytes) { .onOversizedValue(byteCountingHandler) .onData(byteCountingHandler) .build(); - + @BeforeEach public void setup() { byteCounter = new AtomicLong(); @@ -341,17 +350,37 @@ static ExpectationProvider container(IonType }; } - static ExpectationProvider intValue(int expectedValue) { + /** + * Steps in, asserts something about the content of a container, and then steps out. + * + * This enables us to use `next()` to move to a container value and check the annotations + * and/or field name of the value before checking the content of the container value. + */ + @SafeVarargs + static ExpectationProvider inContainer(ExpectationProvider... expectations) { + return consumer -> { + STEP_IN.accept(consumer); + for (Consumer>> expectation : expectations) { + expectation.accept(consumer); + } + STEP_OUT.accept(consumer); + }; + } + + static ExpectationProvider nullValue(IonType expectedType) { return consumer -> consumer.accept(new Expectation<>( - String.format("int(%d)", expectedValue), - reader -> assertEquals(expectedValue, reader.intValue()) + String.format("null(%s)", expectedType), + reader -> { + assertTrue(reader.isNullValue()); + assertEquals(expectedType, reader.getType()); + } )); } - static ExpectationProvider doubleValue(double expectedValue) { + static ExpectationProvider booleanValue(boolean expectedValue) { return consumer -> consumer.accept(new Expectation<>( - String.format("double(%f)", expectedValue), - reader -> assertEquals(expectedValue, reader.doubleValue(), 1e-9) + String.format("boolean(%s)", expectedValue), + reader -> assertEquals(expectedValue, reader.booleanValue()) )); } @@ -369,6 +398,13 @@ static ExpectationProvider bigDecimalValue(B )); } + static ExpectationProvider timestampValue(Timestamp expectedValue) { + return consumer -> consumer.accept(new Expectation<>( + String.format("timestamp(%s)", expectedValue), + reader -> assertEquals(expectedValue, reader.timestampValue()) + )); + } + static ExpectationProvider stringValue(String expectedValue) { return consumer -> consumer.accept(new Expectation<>( String.format("string(%s)", expectedValue), @@ -415,6 +451,20 @@ static ExpectationProvider annotations(Strin )); } + static ExpectationProvider annotationsIterator(String... annotations) { + return consumer -> consumer.accept(new Expectation<>( + String.format("annotations(%s)", Arrays.toString(annotations)), + reader -> { + Iterator annotationIterator = reader.iterateTypeAnnotations(); + int numberOfAnnotations = 0; + while (annotationIterator.hasNext()) { + assertEquals(annotations[numberOfAnnotations++], annotationIterator.next()); + } + assertEquals(annotations.length, numberOfAnnotations); + } + )); + } + private static void assertSymbolEquals( String expectedText, SymbolToken actual @@ -435,6 +485,29 @@ static ExpectationProvider annotationSymbols )); } + private static void assertSymbolEquals( + SymbolToken expected, + SymbolToken actual + ) { + assertEquals(expected.getText(), actual.getText()); + if (expected.getText() == null) { + assertEquals(expected.getSid(), actual.getSid()); + } + } + + static ExpectationProvider annotationSymbols(SymbolToken... annotations) { + return consumer -> consumer.accept(new Expectation<>( + String.format("annotations(%s)", Arrays.toString(annotations)), + reader -> { + SymbolToken[] actualAnnotations = reader.getTypeAnnotationSymbols(); + assertEquals(annotations.length, actualAnnotations.length); + for (int i = 0; i < annotations.length; i++) { + assertSymbolEquals(annotations[i], actualAnnotations[i]); + } + } + )); + } + static ExpectationProvider fieldName(String fieldName) { return consumer -> consumer.accept(new Expectation<>( String.format("field(%s)", fieldName), @@ -449,6 +522,17 @@ static ExpectationProvider fieldNameSymbol(S )); } + static ExpectationProvider fieldNameSymbolZero() { + return consumer -> consumer.accept(new Expectation<>( + "fieldSidZero", + reader -> { + SymbolToken field = reader.getFieldNameSymbol(); + assertNull(field.getText()); + assertEquals(0, field.getSid()); + } + )); + } + @SafeVarargs private final void assertSequence(ExpectationProvider... providers) { IonCursorTestUtilities.assertSequence((IonReaderContinuableTopLevelBinary) reader, providers); @@ -1710,7 +1794,7 @@ private final void readIntsIntoOtherType( public void doubleValueOnInt(boolean constructFromBytes) throws Exception { readIntsIntoOtherType( constructFromBytes, - IonReaderContinuableTopLevelBinaryTest::doubleValue, + IonCursorTestUtilities::doubleValue, 0.0, 1.0, -1.0, @@ -1994,7 +2078,7 @@ public void multipleSymbolTableSymbolsFieldsFails(boolean constructFromBytes) th }, constructFromBytes ); - + assertThrows(IonException.class, () -> reader.next()); reader.close(); } @@ -2166,7 +2250,7 @@ private IonReader boundedReaderFor(InputStream stream, int initialBufferSize, in @ParameterizedTest(name = "constructFromBytes={0}") @ValueSource(booleans = {true, false}) public void singleValueExceedsInitialBufferSize(boolean constructFromBytes) throws Exception { - reader = boundedReaderFor(constructFromBytes, + reader = boundedReaderFor(constructFromBytes, toBinary("\"abcdefghijklmnopqrstuvwxyz\""), 8, Integer.MAX_VALUE, @@ -2199,7 +2283,7 @@ public void maximumBufferSizeWithoutHandlerFails() { .withInitialBufferSize(9); assertThrows(IllegalArgumentException.class, builder::build); } - + private void expectOversized(int numberOfValues) { assertEquals(numberOfValues, oversizedCounter.get()); } @@ -3204,6 +3288,15 @@ public void multiByteSymbolTokens(boolean constructFromBytes) throws Exception { closeAndCount(); } + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void systemSymbolAnnotations(boolean constructFromBytes) throws Exception { + reader = readerFor(constructFromBytes,0xE0, 0x01, 0x01, 0xEA, 0xE7, 0x01, 0x61, 0x60); + assertSequence( + next(IonType.INT), annotationSymbols("$ion") + ); + } + @ParameterizedTest(name = "constructFromBytes={0}") @ValueSource(booleans = {true, false}) public void symbolTableWithOpenContentImportsListField(boolean constructFromBytes) throws Exception { @@ -4027,4 +4120,1996 @@ public void expectUseAfterCloseToHaveNoEffect(boolean constructFromBytes) throws assertNull(reader.getType()); reader.close(); } + + /** + * Creates an IonReader over the given data, which will be prepended with a binary Ion 1.1 IVM. + * @param data the data to read. + * @param constructFromBytes whether to construct the reader from bytes or an InputStream. + * @return a new reader. + */ + private IonReader readerForIon11(byte[] data, boolean constructFromBytes) throws Exception { + reader = readerFor(readerBuilder, constructFromBytes, withIvm(1, data)); + byteCounter.set(0); + return reader; + } + + /** + * Checks that the reader reads a null value of the expected type from the given input bytes. + */ + private void assertNullCorrectlyParsed(boolean constructFromBytes, IonType expectedType, String inputBytes) throws Exception { + reader = readerForIon11(hexStringToByteArray(inputBytes), constructFromBytes); + assertSequence( + next(expectedType), nullValue(expectedType), + next(null) + ); + closeAndCount(); + } + + // TODO byte-by-byte incremental mode testing for all Ion 1.1 tests + + @ParameterizedTest + @CsvSource({ + " NULL, EA", + " BOOL, EB 00", + " INT, EB 01", + " FLOAT, EB 02", + " DECIMAL, EB 03", + "TIMESTAMP, EB 04", + " STRING, EB 05", + " SYMBOL, EB 06", + " BLOB, EB 07", + " CLOB, EB 08", + " LIST, EB 09", + " SEXP, EB 0A", + " STRUCT, EB 0B", + }) + public void readNullValue(IonType expectedType, String inputBytes) throws Exception { + assertNullCorrectlyParsed(true, expectedType, inputBytes); + assertNullCorrectlyParsed(false, expectedType, inputBytes); + } + + /** + * Checks that the reader reads the expected boolean from the given input bits. + */ + private void assertBooleanCorrectlyParsed(boolean constructFromBytes, boolean expectedValue, String inputBytes) throws Exception { + reader = readerForIon11(hexStringToByteArray(inputBytes), constructFromBytes); + assertSequence( + next(IonType.BOOL), booleanValue(expectedValue), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @CsvSource({ + "true, 6E", + "false, 6F", + }) + public void readBooleanValue(Boolean expectedValue, String inputBytes) throws Exception { + assertBooleanCorrectlyParsed(true, expectedValue, inputBytes); + assertBooleanCorrectlyParsed(false, expectedValue, inputBytes); + } + + /** + * Checks that the reader reads the expected int from the given input bits. + */ + private void assertIntCorrectlyParsed(boolean constructFromBytes, int expectedValue, String inputBytes) throws Exception { + reader = readerForIon11(hexStringToByteArray(inputBytes), constructFromBytes); + assertSequence( + next(IonType.INT), intValue(expectedValue), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @CsvSource({ + " 0, 60", + " 1, 61 01", + " 17, 61 11", + " 127, 61 7F", + " 128, 62 80 00", + " 5555, 62 B3 15", + " 32767, 62 FF 7F", + " 32768, 63 00 80 00", + " 292037, 63 C5 74 04", + " 321672342, 64 96 54 2C 13", + " 2147483647, 64 FF FF FF 7F", // Integer.MAX_VALUE + " -1, 61 FF", + " -2, 61 FE", + " -14, 61 F2", + " -128, 61 80", + " -129, 62 7F FF", + " -944, 62 50 FC", + " -32768, 62 00 80", + " -32769, 63 FF 7F FF", + " -8388608, 63 00 00 80", + " -8388609, 64 FF FF 7F FF", + " -2147483648, 64 00 00 00 80", // Integer.MIN_VALUE + }) + public void readIntValue(int expectedValue, String inputBytes) throws Exception { + assertIntCorrectlyParsed(true, expectedValue, inputBytes); + assertIntCorrectlyParsed(false, expectedValue, inputBytes); + } + + /** + * Checks that the reader reads the expected long from the given input bits. + */ + private void assertLongCorrectlyParsed(boolean constructFromBytes, long expectedValue, String inputBytes) throws Exception { + reader = readerForIon11(hexStringToByteArray(inputBytes), constructFromBytes); + assertSequence( + next(IonType.INT), longValue(expectedValue), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @CsvSource({ + " 0, 60", + " 1, 61 01", + " 17, 61 11", + " 127, 61 7F", + " 127, 62 7F 00", + " 127, 64 7F 00 00 00", + " 127, 68 7F 00 00 00 00 00 00 00", + " 128, 62 80 00", + " 5555, 62 B3 15", + " 32767, 62 FF 7F", + " 32768, 63 00 80 00", + " 292037, 63 C5 74 04", + " 321672342, 64 96 54 2C 13", + " 2147483647, 64 FF FF FF 7F", // Integer.MAX_VALUE + " 64121672342, 65 96 12 F3 ED 0E", + " 1274120283167, 66 1F A4 7C A7 28 01", + " 851274120283167, 67 1F C4 8B B3 3A 06 03", + " 72624976668147840, 68 80 40 20 10 08 04 02 01", + " 9223372036854775807, 68 FF FF FF FF FF FF FF 7F", // Long.MAX_VALUE + " -1, 61 FF", + " -2, 61 FE", + " -14, 61 F2", + " -128, 61 80", + " -129, 62 7F FF", + " -944, 62 50 FC", + " -32768, 62 00 80", + " -32769, 63 FF 7F FF", + " -8388608, 63 00 00 80", + " -8388609, 64 FF FF 7F FF", + " -2147483648, 64 00 00 00 80", // Integer.MIN_VALUE + " -72624976668147841, 68 7F BF DF EF F7 FB FD FE", + " -9223372036854775808, 68 00 00 00 00 00 00 00 80", // Long.MIN_VALUE + }) + public void readLongValue(long expectedValue, String inputBytes) throws Exception { + assertLongCorrectlyParsed(true, expectedValue, inputBytes); + assertLongCorrectlyParsed(false, expectedValue, inputBytes); + } + + @ParameterizedTest + @CsvSource({ + " 0, F6 01", + " 1, F6 03 01", + " 17, F6 03 11", + " 127, F6 03 7F", + " 128, F6 05 80 00", + " 2147483647, F6 09 FF FF FF 7F", // Integer.MAX_VALUE + " 72624976668147840, F6 11 80 40 20 10 08 04 02 01", + " 9223372036854775807, F6 11 FF FF FF FF FF FF FF 7F", // Long.MAX_VALUE + " -1, F6 03 FF", + " -2, F6 03 FE", + " -14, F6 03 F2", + " -128, F6 03 80", + " -129, F6 05 7F FF", + " -2147483648, F6 09 00 00 00 80", // Integer.MIN_VALUE + " -72624976668147841, F6 11 7F BF DF EF F7 FB FD FE", + " -9223372036854775808, F6 11 00 00 00 00 00 00 00 80", // Long.MIN_VALUE + }) + public void readLongValueFromVariableLengthEncoding(long expectedValue, String inputBytes) throws Exception { + assertLongCorrectlyParsed(true, expectedValue, inputBytes); + assertLongCorrectlyParsed(false, expectedValue, inputBytes); + } + + /** + * Checks that the reader reads the expected BigInteger from the given input bits. + */ + private void assertBigIntegerCorrectlyParsed(boolean constructFromBytes, BigInteger expectedValue, String inputBytes) throws Exception { + reader = readerForIon11(hexStringToByteArray(inputBytes), constructFromBytes); + assertSequence( + next(IonType.INT), bigIntegerValue(expectedValue), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @CsvSource({ + " 0, 60", + " 1, 61 01", + " 17, 61 11", + " 127, 61 7F", + " 127, 62 7F 00", + " 127, 64 7F 00 00 00", + " 127, 68 7F 00 00 00 00 00 00 00", + " 128, 62 80 00", + " 5555, 62 B3 15", + " 32767, 62 FF 7F", + " 32768, 63 00 80 00", + " 292037, 63 C5 74 04", + " 321672342, 64 96 54 2C 13", + " 2147483647, 64 FF FF FF 7F", // Integer.MAX_VALUE + " 64121672342, 65 96 12 F3 ED 0E", + " 1274120283167, 66 1F A4 7C A7 28 01", + " 851274120283167, 67 1F C4 8B B3 3A 06 03", + " 72624976668147840, 68 80 40 20 10 08 04 02 01", + " 9223372036854775807, 68 FF FF FF FF FF FF FF 7F", // Long.MAX_VALUE + " 9223372036854775808, F6 13 00 00 00 00 00 00 00 80 00", + "999999999999999999999999999999, F6 1B FF FF FF 3F EA ED 74 46 D0 9C 2C 9F 0C", + " -1, 61 FF", + " -2, 61 FE", + " -14, 61 F2", + " -128, 61 80", + " -129, 62 7F FF", + " -944, 62 50 FC", + " -32768, 62 00 80", + " -32769, 63 FF 7F FF", + " -8388608, 63 00 00 80", + " -8388609, 64 FF FF 7F FF", + " -2147483648, 64 00 00 00 80", // Integer.MIN_VALUE + " -72624976668147841, 68 7F BF DF EF F7 FB FD FE", + " -9223372036854775808, 68 00 00 00 00 00 00 00 80", // Long.MIN_VALUE + " -9223372036854775809, F6 13 FF FF FF FF FF FF FF 7F FF", + "-99999999999999999999999999999, F6 1B 01 00 00 60 35 E8 8D 92 51 F0 E1 BC FE", + }) + public void readBigIntegerValue(BigInteger expectedValue, String inputBytes) throws Exception { + assertBigIntegerCorrectlyParsed(true, expectedValue, inputBytes); + assertBigIntegerCorrectlyParsed(false, expectedValue, inputBytes); + } + + /** + * Checks that the reader reads the expected double from the given input bits. + */ + private void assertDoubleCorrectlyParsed(boolean constructFromBytes, double expectedValue, String inputBytes) throws Exception { + reader = readerForIon11(hexStringToByteArray(inputBytes), constructFromBytes); + assertSequence( + next(IonType.FLOAT), doubleValue(expectedValue), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @CsvSource({ + " 0.0, 6A", + " 0.0, 6B 00 00", + " 0.0, 6C 00 00 00 00", + " 0.0, 6D 00 00 00 00 00 00 00 00", + " -0.0, 6B 00 80", + " -0.0, 6C 00 00 00 80", + " -0.0, 6D 00 00 00 00 00 00 00 80", + " 1.0, 6B 00 3C", + " 1.0, 6C 00 00 80 3F", + " 1.0, 6D 00 00 00 00 00 00 F0 3F", + " 1.5, 6C 00 00 C0 3F", + " 0.00006103515625, 6B 00 04", // Smallest positive normal half-precision float + " 0.333251953125, 6B 55 35", // Nearest half-precision representation of one third + " 3.141592653589793, 6D 18 2D 44 54 FB 21 09 40", + " 4.00537109375, 6C 00 2C 80 40", + " 4.11111111111, 6D 39 C2 71 1C C7 71 10 40", + " 65504, 6B FF 7B", // Largest normal half-precision float + " 423542.09375, 6C C3 CE CE 48", + " 8236423542.09375, 6D 00 80 61 97 DD AE FE 41", + " 1.79769313486231570e+308, 6D FF FF FF FF FF FF EF 7F", // Double.MAX_VALUE + " -1.0, 6C 00 00 80 BF", + " -1.5, 6C 00 00 C0 BF", + " -2, 6B 00 C0", + " -3.141592653589793, 6D 18 2D 44 54 FB 21 09 C0", + " -4.00537109375, 6C 00 2C 80 C0", + " -4.11111111111, 6D 39 C2 71 1C C7 71 10 C0", + " -65504, 6B FF FB", // Smallest normal half-precision float + " -423542.09375, 6C C3 CE CE C8", + " -8236423542.09375, 6D 00 80 61 97 DD AE FE C1", + "-1.79769313486231570e+308, 6D FF FF FF FF FF FF EF FF", // Double.MIN_VALUE + " NaN, 6B 01 7C", + " Infinity, 6B 00 7C", + " -Infinity, 6B 00 FC", + " NaN, 6C 00 00 C0 7F", + " Infinity, 6C 00 00 80 7F", + " -Infinity, 6C 00 00 80 FF", + " NaN, 6D 01 00 00 00 00 00 F0 7F", + " Infinity, 6D 00 00 00 00 00 00 F0 7F", + " -Infinity, 6D 00 00 00 00 00 00 F0 FF", + }) + public void readDoubleValue(double expectedValue, String inputBytes) throws Exception { + assertDoubleCorrectlyParsed(true, expectedValue, inputBytes); + assertDoubleCorrectlyParsed(false, expectedValue, inputBytes); + } + + /** + * Checks that the reader reads the expected Decimal or BigDecimal from the given input bits. + */ + private void assertDecimalCorrectlyParsed( + boolean constructFromBytes, + BigDecimal expectedValue, + String inputBytes, + Function> expectationProviderFunction + ) throws Exception { + reader = readerForIon11(hexStringToByteArray(inputBytes), constructFromBytes); + assertSequence( + next(IonType.DECIMAL), expectationProviderFunction.apply(expectedValue), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @CsvSource({ + " 0., 70", + " 0e1, 71 03", + " 0e63, 71 7F", + " 0e64, 72 02 01", + " 0e99, 72 8E 01", + " 0.0, 71 FF", + " 0.00, 71 FD", + " 0.000, 71 FB", + " 0e-64, 71 81", + " 0e-99, 72 76 FE", + " -0., 72 01 00", + " -0e1, 72 03 00", + " -0e3, 72 07 00", + " -0e63, 72 7F 00", + " -0e199, 73 1E 03 00", + " -0e-1, 72 FF 00", + " -0e-2, 72 FD 00", + " -0e-3, 72 FB 00", + " -0e-63, 72 83 00", + " -0e-64, 72 81 00", + " -0e-65, 73 FE FE 00", + " -0e-199, 73 E6 FC 00", + " 0.01, 72 FD 01", + " 0.1, 72 FF 01", + " 1, 72 01 01", + " 1e1, 72 03 01", + " 1e2, 72 05 01", + " 1e63, 72 7F 01", + " 1e64, 73 02 01 01", + " 1e65536, 74 04 00 08 01", + " 2, 72 01 02", + " 7, 72 01 07", + " 14, 72 01 0E", + " 14, 73 02 00 0E", // overpadded exponent + " 14, 74 01 0E 00 00", // Overpadded coefficient + " 14, 75 02 00 0E 00 00", // Overpadded coefficient and exponent + " 1.0, 72 FF 0A", + " 1.00, 72 FD 64", + " 1.27, 72 FD 7F", + " 1.28, 73 FD 80 00", + " 3.142, 73 FB 46 0C", + " 3.14159, 74 F7 2F CB 04", + " 3.1415927, 75 F3 77 5E DF 01", + " 3.141592653, 76 EF 4D E6 40 BB 00", + " 3.141592653590, 77 E9 16 9F 83 75 DB 02", + " 3.14159265358979323, 79 DF FB A0 9E F6 2F 1E 5C 04", + " 3.1415926535897932384626, 7B D5 72 49 64 CC AF EF 8F 0F A7 06", + " 3.141592653589793238462643383, 7D CB B7 3C 92 86 40 9F 1B 01 1F AA 26 0A", + " 3.14159265358979323846264338327950, 7F C1 8E 29 E5 E3 56 D5 DF C5 10 8F 55 3F 7D 0F", + "3.141592653589793238462643383279503, F7 21 BF 8F 9F F3 E6 64 55 BE BA A7 96 57 79 E4 9A 00", + }) + public void readDecimalValue(@ConvertWith(TestUtils.StringToDecimal.class) Decimal expectedValue, String inputBytes) throws Exception { + assertDecimalCorrectlyParsed(true, expectedValue, inputBytes, IonReaderContinuableTopLevelBinaryTest::decimalValue); + assertDecimalCorrectlyParsed(false, expectedValue, inputBytes, IonReaderContinuableTopLevelBinaryTest::decimalValue); + assertDecimalCorrectlyParsed(true, expectedValue, inputBytes, IonReaderContinuableTopLevelBinaryTest::bigDecimalValue); + assertDecimalCorrectlyParsed(false, expectedValue, inputBytes, IonReaderContinuableTopLevelBinaryTest::bigDecimalValue); + } + + @ParameterizedTest + @CsvSource({ + " 0., F7 01", + " 0e99, F7 05 8E 01", + " 0.0, F7 03 FF", + " 0.00, F7 03 FD", + " 0e-99, F7 05 76 FE", + " -0., F7 05 01 00", + " -0e199, F7 07 1E 03 00", + " -0e-1, F7 05 FF 00", + " -0e-65, F7 07 FE FE 00", + " 0.01, F7 05 FD 01", + " 1, F7 05 01 01", + " 1e65536, F7 09 04 00 08 01", + " 1.0, F7 05 FF 0A", + " 1.28, F7 07 FD 80 00", + " 3.141592653590, F7 0F E9 16 9F 83 75 DB 02", + " 3.14159265358979323, F7 13 DF FB A0 9E F6 2F 1E 5C 04", + " 3.1415926535897932384626, F7 17 D5 72 49 64 CC AF EF 8F 0F A7 06", + " 3.141592653589793238462643383, F7 1B CB B7 3C 92 86 40 9F 1B 01 1F AA 26 0A", + " 3.14159265358979323846264338327950, F7 1F C1 8E 29 E5 E3 56 D5 DF C5 10 8F 55 3F 7D 0F", + }) + public void readDecimalValueFromVariableLengthEncoding(@ConvertWith(TestUtils.StringToDecimal.class) Decimal expectedValue, String inputBytes) throws Exception { + assertDecimalCorrectlyParsed(true, expectedValue, inputBytes, IonReaderContinuableTopLevelBinaryTest::decimalValue); + assertDecimalCorrectlyParsed(false, expectedValue, inputBytes, IonReaderContinuableTopLevelBinaryTest::decimalValue); + assertDecimalCorrectlyParsed(true, expectedValue, inputBytes, IonReaderContinuableTopLevelBinaryTest::bigDecimalValue); + assertDecimalCorrectlyParsed(false, expectedValue, inputBytes, IonReaderContinuableTopLevelBinaryTest::bigDecimalValue); + } + + /** + * Checks that the reader reads the expected timestamp value from the given input bits. + */ + private void assertIonTimestampCorrectlyParsed(boolean constructFromBytes, Timestamp expected, String inputBits) throws Exception { + reader = readerForIon11(bitStringToByteArray(inputBits), constructFromBytes); + assertSequence( + next(IonType.TIMESTAMP), timestampValue(expected), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @CsvSource({ + // OpCode MYYYYYYY DDDDDMMM mmmHHHHH ssssUmmm ffffffss ffffffff ffffffff ffffffff + "2023-10-15T01:00Z, 10000011 00110101 01111101 00000001 00001000", + "2023-10-15T01:59Z, 10000011 00110101 01111101 01100001 00001111", + "2023-10-15T11:22Z, 10000011 00110101 01111101 11001011 00001010", + "2023-10-15T23:00Z, 10000011 00110101 01111101 00010111 00001000", + "2023-10-15T23:59Z, 10000011 00110101 01111101 01110111 00001111", + "2023-10-15T11:22:00Z, 10000100 00110101 01111101 11001011 00001010 00000000", + "2023-10-15T11:22:33Z, 10000100 00110101 01111101 11001011 00011010 00000010", + "2023-10-15T11:22:59Z, 10000100 00110101 01111101 11001011 10111010 00000011", + "2023-10-15T11:22:33.000Z, 10000101 00110101 01111101 11001011 00011010 00000010 00000000", + "2023-10-15T11:22:33.444Z, 10000101 00110101 01111101 11001011 00011010 11110010 00000110", + "2023-10-15T11:22:33.999Z, 10000101 00110101 01111101 11001011 00011010 10011110 00001111", + "2023-10-15T11:22:33.000000Z, 10000110 00110101 01111101 11001011 00011010 00000010 00000000 00000000", + "2023-10-15T11:22:33.444555Z, 10000110 00110101 01111101 11001011 00011010 00101110 00100010 00011011", + "2023-10-15T11:22:33.999999Z, 10000110 00110101 01111101 11001011 00011010 11111110 00001000 00111101", + "2023-10-15T11:22:33.000000000Z, 10000111 00110101 01111101 11001011 00011010 00000010 00000000 00000000 00000000", + "2023-10-15T11:22:33.444555666Z, 10000111 00110101 01111101 11001011 00011010 01001010 10000110 11111101 01101001", + "2023-10-15T11:22:33.999999999Z, 10000111 00110101 01111101 11001011 00011010 11111110 00100111 01101011 11101110", + }) + public void readTimestampValueWithUtcShortForm(@ConvertWith(StringToTimestamp.class) Timestamp expectedValue, String inputBits) throws Exception { + assertIonTimestampCorrectlyParsed(true, expectedValue, inputBits); + assertIonTimestampCorrectlyParsed(false, expectedValue, inputBits); + } + + @ParameterizedTest + @CsvSource({ + // OpCode MYYYYYYY DDDDDMMM mmmHHHHH ssssUmmm ffffffss ffffffff ffffffff ffffffff + "1970T, 10000000 00000000", + "2023T, 10000000 00110101", + "2097T, 10000000 01111111", + "2023-01T, 10000001 10110101 00000000", + "2023-10T, 10000001 00110101 00000101", + "2023-12T, 10000001 00110101 00000110", + "2023-10-01T, 10000010 00110101 00001101", + "2023-10-15T, 10000010 00110101 01111101", + "2023-10-31T, 10000010 00110101 11111101", + "2023-10-15T01:00-00:00, 10000011 00110101 01111101 00000001 00000000", + "2023-10-15T01:59-00:00, 10000011 00110101 01111101 01100001 00000111", + "2023-10-15T11:22-00:00, 10000011 00110101 01111101 11001011 00000010", + "2023-10-15T23:00-00:00, 10000011 00110101 01111101 00010111 00000000", + "2023-10-15T23:59-00:00, 10000011 00110101 01111101 01110111 00000111", + "2023-10-15T11:22:00-00:00, 10000100 00110101 01111101 11001011 00000010 00000000", + "2023-10-15T11:22:33-00:00, 10000100 00110101 01111101 11001011 00010010 00000010", + "2023-10-15T11:22:59-00:00, 10000100 00110101 01111101 11001011 10110010 00000011", + "2023-10-15T11:22:33.000-00:00, 10000101 00110101 01111101 11001011 00010010 00000010 00000000", + "2023-10-15T11:22:33.444-00:00, 10000101 00110101 01111101 11001011 00010010 11110010 00000110", + "2023-10-15T11:22:33.999-00:00, 10000101 00110101 01111101 11001011 00010010 10011110 00001111", + "2023-10-15T11:22:33.000000-00:00, 10000110 00110101 01111101 11001011 00010010 00000010 00000000 00000000", + "2023-10-15T11:22:33.444555-00:00, 10000110 00110101 01111101 11001011 00010010 00101110 00100010 00011011", + "2023-10-15T11:22:33.999999-00:00, 10000110 00110101 01111101 11001011 00010010 11111110 00001000 00111101", + "2023-10-15T11:22:33.000000000-00:00, 10000111 00110101 01111101 11001011 00010010 00000010 00000000 00000000 00000000", + "2023-10-15T11:22:33.444555666-00:00, 10000111 00110101 01111101 11001011 00010010 01001010 10000110 11111101 01101001", + "2023-10-15T11:22:33.999999999-00:00, 10000111 00110101 01111101 11001011 00010010 11111110 00100111 01101011 11101110", + }) + public void readTimestampValueWithUnknownOffsetShortForm(@ConvertWith(StringToTimestamp.class) Timestamp expectedValue, String inputBits) throws Exception { + assertIonTimestampCorrectlyParsed(true, expectedValue, inputBits); + assertIonTimestampCorrectlyParsed(false, expectedValue, inputBits); + } + + @ParameterizedTest + @CsvSource({ + // OpCode MYYYYYYY DDDDDMMM mmmHHHHH ooooommm ssssssoo ffffffff ffffffff ffffffff ..ffffff + "2023-10-15T01:00-14:00, 10001000 00110101 01111101 00000001 00000000 00000000", + "2023-10-15T01:00+14:00, 10001000 00110101 01111101 00000001 10000000 00000011", + "2023-10-15T01:00-01:15, 10001000 00110101 01111101 00000001 10011000 00000001", + "2023-10-15T01:00+01:15, 10001000 00110101 01111101 00000001 11101000 00000001", + "2023-10-15T01:59+01:15, 10001000 00110101 01111101 01100001 11101111 00000001", + "2023-10-15T11:22+01:15, 10001000 00110101 01111101 11001011 11101010 00000001", + "2023-10-15T23:00+01:15, 10001000 00110101 01111101 00010111 11101000 00000001", + "2023-10-15T23:59+01:15, 10001000 00110101 01111101 01110111 11101111 00000001", + "2023-10-15T11:22:00+01:15, 10001001 00110101 01111101 11001011 11101010 00000001", + "2023-10-15T11:22:33+01:15, 10001001 00110101 01111101 11001011 11101010 10000101", + "2023-10-15T11:22:59+01:15, 10001001 00110101 01111101 11001011 11101010 11101101", + "2023-10-15T11:22:33.000+01:15, 10001010 00110101 01111101 11001011 11101010 10000101 00000000 00000000", + "2023-10-15T11:22:33.444+01:15, 10001010 00110101 01111101 11001011 11101010 10000101 10111100 00000001", + "2023-10-15T11:22:33.999+01:15, 10001010 00110101 01111101 11001011 11101010 10000101 11100111 00000011", + "2023-10-15T11:22:33.000000+01:15, 10001011 00110101 01111101 11001011 11101010 10000101 00000000 00000000 00000000", + "2023-10-15T11:22:33.444555+01:15, 10001011 00110101 01111101 11001011 11101010 10000101 10001011 11001000 00000110", + "2023-10-15T11:22:33.999999+01:15, 10001011 00110101 01111101 11001011 11101010 10000101 00111111 01000010 00001111", + "2023-10-15T11:22:33.000000000+01:15, 10001100 00110101 01111101 11001011 11101010 10000101 00000000 00000000 00000000 00000000", + "2023-10-15T11:22:33.444555666+01:15, 10001100 00110101 01111101 11001011 11101010 10000101 10010010 01100001 01111111 00011010", + "2023-10-15T11:22:33.999999999+01:15, 10001100 00110101 01111101 11001011 11101010 10000101 11111111 11001001 10011010 00111011", + + }) + public void readTimestampValueWithKnownOffsetShortForm(@ConvertWith(StringToTimestamp.class) Timestamp expectedValue, String inputBits) throws Exception { + assertIonTimestampCorrectlyParsed(true, expectedValue, inputBits); + assertIonTimestampCorrectlyParsed(false, expectedValue, inputBits); + } + + @ParameterizedTest + @CsvSource({ + // OpCode Length YYYYYYYY MMYYYYYY HDDDDDMM mmmmHHHH oooooomm ssoooooo ....ssss Scale+ Coefficient + "0001T, 11111000 00000101 00000001 00000000", + "1947T, 11111000 00000101 10011011 00000111", + "9999T, 11111000 00000101 00001111 00100111", + "1947-01T, 11111000 00000111 10011011 01000111 00000000", + "1947-12T, 11111000 00000111 10011011 00000111 00000011", + "1947-01-01T, 11111000 00000111 10011011 01000111 00000100", + "1947-12-23T, 11111000 00000111 10011011 00000111 01011111", + "1947-12-31T, 11111000 00000111 10011011 00000111 01111111", + "1947-12-23T00:00Z, 11111000 00001101 10011011 00000111 01011111 00000000 10000000 00010110", + "1947-12-23T23:59Z, 11111000 00001101 10011011 00000111 11011111 10111011 10000011 00010110", + "1947-12-23T23:59:00Z, 11111000 00001111 10011011 00000111 11011111 10111011 10000011 00010110 00000000", + "1947-12-23T23:59:59Z, 11111000 00001111 10011011 00000111 11011111 10111011 10000011 11010110 00001110", + "1947-12-23T23:59:00.0Z, 11111000 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000011", + "1947-12-23T23:59:00.00Z, 11111000 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000101", + "1947-12-23T23:59:00.000Z, 11111000 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000111", + "1947-12-23T23:59:00.0000Z, 11111000 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001001", + "1947-12-23T23:59:00.00000Z, 11111000 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001011", + "1947-12-23T23:59:00.000000Z, 11111000 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001101", + "1947-12-23T23:59:00.0000000Z, 11111000 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001111", + "1947-12-23T23:59:00.00000000Z, 11111000 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00010001", + "1947-12-23T23:59:00.9Z, 11111000 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000011 00001001", + "1947-12-23T23:59:00.99Z, 11111000 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000101 01100011", + "1947-12-23T23:59:00.999Z, 11111000 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000111 11100111 00000011", + "1947-12-23T23:59:00.9999Z, 11111000 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001001 00001111 00100111", + "1947-12-23T23:59:00.99999Z, 11111000 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001011 10011111 10000110 00000001", + "1947-12-23T23:59:00.999999Z, 11111000 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001101 00111111 01000010 00001111", + "1947-12-23T23:59:00.9999999Z, 11111000 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001111 01111111 10010110 10011000 00000000", + "1947-12-23T23:59:00.99999999Z, 11111000 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00010001 11111111 11100000 11110101 00000101", + + "1947-12-23T23:59:00.36028797018963968Z, " + // 7-byte coefficient, most-significant bit set + "11111000 00011111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00100011 00000000 00000000 00000000 00000000 00000000 00000000 10000000", + + "1947-12-23T23:59:00.36028797018963967Z, " + // 7-byte coefficient, most-significant bit set + "11111000 00011111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00100011 11111111 11111111 11111111 11111111 11111111 11111111 01111111", + + "1947-12-23T23:59:00.72057594037927935Z, " + // 7-byte coefficient, all bits set + "11111000 00011111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00100011 11111111 11111111 11111111 11111111 11111111 11111111 11111111", + + "1947-12-23T23:59:00.9223372036854775807Z, " + // Long.MAX_VALUE + "11111000 00100001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00100111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 01111111", + + "1947-12-23T23:59:00.9223372036854775808Z, " + // Long.MAX_VALUE + 1 (unsigned) + "11111000 00100001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00100111 00000000 00000000 00000000 00000000 00000000 00000000 00000000 10000000", + + "1947-12-23T23:59:00.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000Z, " + + "11111000 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00110110 00000010", + + "1947-12-23T23:59:00.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + + "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + + "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000Z, " + + "11111000 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 10100010 00000101", + + "1947-12-23T23:59:00.999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999Z, " + + "11111000 10001001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00110110 00000010 11111111 11111111 11111111 11111111 11111111 11111111 " + + "11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 10011111 00110010 00110001 10001111 11001101 00011001 " + + "01001111 00011110 10101000 11001111 11110100 00011000 11010101 00101000 00101011 10101110 00001001 10100100 11011110 01001101 10001111 00100001 11100001 " + + "11111101 01101111 11011110 10000011 11100010 00011010 11101101 10001110 10010101 11001101 01010001 11100110 01010110 01010000 11011110 00000110 01001101 " + + "11111110 00010100", + + // Offsets + "2048-01-01T01:01-23:59, 11111000 00001101 00000000 01001000 10000100 00010000 00000100 00000000", + "2048-01-01T01:01-00:02, 11111000 00001101 00000000 01001000 10000100 00010000 01111000 00010110", + "2048-01-01T01:01-00:01, 11111000 00001101 00000000 01001000 10000100 00010000 01111100 00010110", + "2048-01-01T01:01-00:00, 11111000 00001101 00000000 01001000 10000100 00010000 11111100 00111111", + "2048-01-01T01:01+00:00, 11111000 00001101 00000000 01001000 10000100 00010000 10000000 00010110", + "2048-01-01T01:01+00:01, 11111000 00001101 00000000 01001000 10000100 00010000 10000100 00010110", + "2048-01-01T01:01+00:02, 11111000 00001101 00000000 01001000 10000100 00010000 10001000 00010110", + "2048-01-01T01:01+23:59, 11111000 00001101 00000000 01001000 10000100 00010000 11111100 00101100", + }) + public void readTimestampValueLongForm(@ConvertWith(StringToTimestamp.class) Timestamp expectedValue, String inputBits) throws Exception { + assertIonTimestampCorrectlyParsed(true, expectedValue, inputBits); + assertIonTimestampCorrectlyParsed(false, expectedValue, inputBits); + } + + /** + * Verifies that the reader fails to parse a value as a timestamp. + */ + private void failOnInvalidTimestamp(String inputBits, boolean constructFromBytes) throws Exception { + reader = readerForIon11(bitStringToByteArray(inputBits), constructFromBytes); + assertSequence(next(IonType.TIMESTAMP)); + assertThrows(IllegalArgumentException.class, () -> reader.timestampValue()); + } + + @ParameterizedTest + @CsvSource({ + // YYYYYYYY MMYYYYYY HDDDDDMM mmmmHHHH oooooomm ssoooooo ....ssss Scale+ Coefficient + // 1947-12-23T23:59:00.128d-2 (fraction greater than 1) + "11111000 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000101 10000000", + + }) + public void failOnInvalidLongFormTimestamp(String inputBits) throws Exception { + failOnInvalidTimestamp(inputBits, true); + failOnInvalidTimestamp(inputBits, false); + } + + /** + * Checks that the reader reads the expected string value from the given input bits. + */ + private void assertIonStringCorrectlyParsed(boolean constructFromBytes, String expected, String inputBytes) throws Exception { + reader = readerForIon11(hexStringToByteArray(inputBytes), constructFromBytes); + assertSequence( + next(IonType.STRING), stringValue(expected), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @CsvSource({ + "'', 90", + "'', F9 01", + "'a', 91 61", + "'a', F9 03 61", + "'ab', 92 61 62", + "'abc', 93 61 62 63", + "'fourteen bytes', 9E 66 6F 75 72 74 65 65 6E 20 62 79 74 65 73", + "'fourteen bytes', F9 1D 66 6F 75 72 74 65 65 6E 20 62 79 74 65 73", + "'this has sixteen', F9 21 74 68 69 73 20 68 61 73 20 73 69 78 74 65 65 6E", + "'variable length encoding', F9 31 76 61 72 69 61 62 6C 65 20 6C 65 6E 67 74 68 20 65 6E 63 6F 64 69 6E 67", + }) + public void readStringValue(String expectedValue, String inputBytes) throws Exception { + assertIonStringCorrectlyParsed(true, expectedValue, inputBytes); + assertIonStringCorrectlyParsed(false, expectedValue, inputBytes); + } + + /** + * Checks that the reader reads the expected symbol text from the given input bits. + */ + private void assertIonSymbolCorrectlyParsed(boolean constructFromBytes, String expected, String inputBytes) throws Exception { + reader = readerForIon11(hexStringToByteArray(inputBytes), constructFromBytes); + assertSequence( + next(IonType.SYMBOL), stringValue(expected), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @CsvSource({ + "'', A0", + "'', FA 01", + "'a', A1 61", + "'a', FA 03 61", + "'ab', A2 61 62", + "'abc', A3 61 62 63", + "'fourteen bytes', AE 66 6F 75 72 74 65 65 6E 20 62 79 74 65 73", + "'fourteen bytes', FA 1D 66 6F 75 72 74 65 65 6E 20 62 79 74 65 73", + "'this has sixteen', FA 21 74 68 69 73 20 68 61 73 20 73 69 78 74 65 65 6E", + "'variable length encoding', FA 31 76 61 72 69 61 62 6C 65 20 6C 65 6E 67 74 68 20 65 6E 63 6F 64 69 6E 67", + }) + public void readSymbolValueWithInlineText(String expectedValue, String inputBytes) throws Exception { + assertIonSymbolCorrectlyParsed(true, expectedValue, inputBytes); + assertIonSymbolCorrectlyParsed(false, expectedValue, inputBytes); + } + + /** + * Checks that the reader reads the expected clob or blob value from the given input bits, using IonReader.newBytes. + */ + private void assertIonLobCorrectlyParsedViaNewBytes( + boolean constructFromBytes, + IonType lobType, + byte[] expected, + String inputBytes + ) throws Exception { + reader = readerForIon11(hexStringToByteArray(inputBytes), constructFromBytes); + assertSequence( + next(lobType), newBytesValue(expected), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @CsvSource({ + "'', FE 01", + "20, FE 03 20", + "49 20 61 70 70 6C 61 75 64 20 79 6F 75 72 20 63 75 72 69 6F 73 69 74 79, " + + "FE 31 49 20 61 70 70 6C 61 75 64 20 79 6F 75 72 20 63 75 72 69 6F 73 69 74 79" + }) + public void readBlobValueViaNewBytes(@ConvertWith(TestUtils.HexStringToByteArray.class) byte[] expectedBytes, String inputBytes) throws Exception { + assertIonLobCorrectlyParsedViaNewBytes(true, IonType.BLOB, expectedBytes, inputBytes); + assertIonLobCorrectlyParsedViaNewBytes(false, IonType.BLOB, expectedBytes, inputBytes); + } + + @ParameterizedTest + @CsvSource({ + "'', FF 01", + "20, FF 03 20", + "49 20 61 70 70 6C 61 75 64 20 79 6F 75 72 20 63 75 72 69 6F 73 69 74 79, " + + "FF 31 49 20 61 70 70 6C 61 75 64 20 79 6F 75 72 20 63 75 72 69 6F 73 69 74 79" + }) + public void readClobValueViaNewBytes(@ConvertWith(TestUtils.HexStringToByteArray.class) byte[] expectedBytes, String inputBytes) throws Exception { + assertIonLobCorrectlyParsedViaNewBytes(true, IonType.CLOB, expectedBytes, inputBytes); + assertIonLobCorrectlyParsedViaNewBytes(false, IonType.CLOB, expectedBytes, inputBytes); + } + + /** + * Checks that the reader reads the expected clob or blob value from the given input bits, using IonReader.getBytes. + */ + private void assertIonLobCorrectlyParsedViaGetBytes( + boolean constructFromBytes, + IonType lobType, + byte[] expected, + String inputBytes + ) throws Exception { + reader = readerForIon11(hexStringToByteArray(inputBytes), constructFromBytes); + byte[] shiftedBytes = new byte[expected.length + 7]; + System.arraycopy(expected, 0, shiftedBytes, 3, expected.length); + assertSequence( + next(lobType), getBytesValue(shiftedBytes, 3, expected.length, expected.length), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @CsvSource({ + "'', FE 01", + "20, FE 03 20", + "49 20 61 70 70 6C 61 75 64 20 79 6F 75 72 20 63 75 72 69 6F 73 69 74 79, " + + "FE 31 49 20 61 70 70 6C 61 75 64 20 79 6F 75 72 20 63 75 72 69 6F 73 69 74 79" + }) + public void readBlobValueViaGetBytes(@ConvertWith(TestUtils.HexStringToByteArray.class) byte[] expectedBytes, String inputBytes) throws Exception { + assertIonLobCorrectlyParsedViaGetBytes(true, IonType.BLOB, expectedBytes, inputBytes); + assertIonLobCorrectlyParsedViaGetBytes(false, IonType.BLOB, expectedBytes, inputBytes); + } + + @ParameterizedTest + @CsvSource({ + "'', FF 01", + "20, FF 03 20", + "49 20 61 70 70 6C 61 75 64 20 79 6F 75 72 20 63 75 72 69 6F 73 69 74 79, " + + "FF 31 49 20 61 70 70 6C 61 75 64 20 79 6F 75 72 20 63 75 72 69 6F 73 69 74 79" + }) + public void readClobValueViaGetBytes(@ConvertWith(TestUtils.HexStringToByteArray.class) byte[] expectedBytes, String inputBytes) throws Exception { + assertIonLobCorrectlyParsedViaGetBytes(true, IonType.CLOB, expectedBytes, inputBytes); + assertIonLobCorrectlyParsedViaGetBytes(false, IonType.CLOB, expectedBytes, inputBytes); + } + + /** + * Checks that the reader reads the expected annotations and values, using the provided method for retrieving the + * annotations. + */ + private void assertAnnotationsCorrectlyParsed( + boolean constructFromBytes, + Function> expectation, + byte[] inputBytes + ) throws Exception { + for (int initialBufferSize = 5; initialBufferSize <= Math.max(5, inputBytes.length); initialBufferSize++) { + reader = boundedReaderFor(constructFromBytes, inputBytes, initialBufferSize, Integer.MAX_VALUE, byteCountingHandler); + assertSequence( + next(IonType.INT), expectation.apply(new String[]{"name"}), intValue(0), + next(IonType.INT), expectation.apply(new String[]{"symbols", "name"}), intValue(0), + next(IonType.INT), expectation.apply(new String[]{"name", "symbols", "imports"}), intValue(0), + next(IonType.INT), expectation.apply(new String[]{}), intValue(0), + next(IonType.INT), expectation.apply(new String[]{"symbols", "name"}), intValue(0), + next(null) + ); + closeAndCount(); + } + } + + @Test + public void readAnnotations_1_0() throws Exception { + byte[] inputBytes = toBinary("name::0 symbols::name::0 name::symbols::imports::0 0 symbols::name::0"); + assertAnnotationsCorrectlyParsed(true, IonReaderContinuableTopLevelBinaryTest::annotations, inputBytes); + assertAnnotationsCorrectlyParsed(true, IonReaderContinuableTopLevelBinaryTest::annotationSymbols, inputBytes); + assertAnnotationsCorrectlyParsed(true, IonReaderContinuableTopLevelBinaryTest::annotationsIterator, inputBytes); + assertAnnotationsCorrectlyParsed(false, IonReaderContinuableTopLevelBinaryTest::annotations, inputBytes); + assertAnnotationsCorrectlyParsed(false, IonReaderContinuableTopLevelBinaryTest::annotationSymbols, inputBytes); + assertAnnotationsCorrectlyParsed(false, IonReaderContinuableTopLevelBinaryTest::annotationsIterator, inputBytes); + } + + @ParameterizedTest + @ValueSource(strings = { + // SIDs + "E4 09 60 | One annotation SID = 4 (name); value int 0 \n" + + "E5 0F 09 60 | Two annotation SIDs = 7 (symbols), 4 (name); value int 0 \n " + + "E6 07 09 0F 0D 60 | Variable length = 3 SIDs = 4 (name), 7 (symbols), 6 (imports); value int 0 \n" + + "60 | Unannotated value int 0 \n" + + "E5 0F 09 60 | Two annotation SIDs = 7 (symbols), 4 (name); value int 0 \n", + // FlexSyms + "E7 F9 6E 61 6D 65 60 | One annotation FlexSym text = name; value int 0 \n" + + "E8 0F F9 6E 61 6D 65 60 | Two annotation FlexSyms SID = 7 (symbols), text = name; value int 0 \n" + + "E9 1D F9 6E 61 6D 65 0F F3 69 6D 70 6F 72 74 73 60 | Variable length = 14 FlexSyms text = name, SID = 7 (symbols), text = imports; value int 0 \n" + + "60 | Unannotated value int 0 \n" + + "E8 0F F9 6E 61 6D 65 60 | Two annotation FlexSyms SID = 7 (symbols), text = name; value int 0 \n", + // SIDs (multi-byte FlexUInts) + "E4 12 00 60 | One annotation overpadded SID = 4 (name); value int 0 \n" + + "E5 0F 24 00 00 60 | Two annotation SID = 7 (symbols), overpadded SID = 4 (name); value int 0 \n " + + "E6 0E 00 09 0F 0D 60 | Variable overpadded length = 3 SIDs = 4 (name), 7 (symbols), 6 (imports); value int 0 \n" + + "60 | Unannotated value int 0 \n" + + "E5 0F 24 00 00 60 | Two annotation SID = 7 (symbols), overpadded SID = 4 (name); value int 0 \n ", + // Multi-byte FlexSyms + "E7 F2 FF 6E 61 6D 65 60 | One annotation overpadded FlexSym text = name; value int 0 \n" + + "E8 3C 00 00 F9 6E 61 6D 65 60 | Two annotation FlexSyms = overpadded SID 7 (symbols), text = name; value int 0 \n " + + "E9 F8 00 00 00 F9 6E 61 6D 65 0F E6 FF 69 6D 70 6F 72 74 73 60 | Variable overpadded length = 15 FlexSyms text = name, SID = 7 (symbols), overpadded text = imports; value int 0 \n" + + "60 | Unannotated value int 0 \n" + + "E8 3C 00 00 F9 6E 61 6D 65 60 | Two annotation FlexSyms = overpadded SID 7 (symbols), text = name; value int 0 \n ", + }) + public void readAnnotations_1_1(String inputBytesAsText) throws Exception { + byte[] inputBytes = withIvm(1, hexStringToByteArray(cleanCommentedHexBytes(inputBytesAsText))); + assertAnnotationsCorrectlyParsed(true, IonReaderContinuableTopLevelBinaryTest::annotations, inputBytes); + assertAnnotationsCorrectlyParsed(true, IonReaderContinuableTopLevelBinaryTest::annotationSymbols, inputBytes); + assertAnnotationsCorrectlyParsed(true, IonReaderContinuableTopLevelBinaryTest::annotationsIterator, inputBytes); + assertAnnotationsCorrectlyParsed(false, IonReaderContinuableTopLevelBinaryTest::annotations, inputBytes); + assertAnnotationsCorrectlyParsed(false, IonReaderContinuableTopLevelBinaryTest::annotationSymbols, inputBytes); + assertAnnotationsCorrectlyParsed(false, IonReaderContinuableTopLevelBinaryTest::annotationsIterator, inputBytes); + } + + private void readAnnotationsThatForceBufferShift_1_1( + boolean constructFromBytes, + byte[] inputBytes, + int initialBufferSize, + Supplier> annotationExpectation, + IonType valueType, + Supplier> valueExpectation + ) throws Exception { + reader = boundedReaderFor(constructFromBytes, inputBytes, initialBufferSize, Integer.MAX_VALUE, byteCountingHandler); + assertSequence( + next(valueType), annotationExpectation.get(), valueExpectation.get(), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @ValueSource(ints={5, 6, 7, 8, 9, 10}) + public void readOneAnnotationFlexSymThatForcesBufferShift_1_1(int initialBufferSize) throws Exception { + byte[] data = withIvm(1, hexStringToByteArray("E7 F1 61 62 63 64 65 66 67 68 A8 69 6A 6B 6C 6D 6E 6F 70")); + Supplier> annotationExpectation = () -> annotations("abcdefgh"); + Supplier> valueExpectation = () -> symbolValue("ijklmnop"); + readAnnotationsThatForceBufferShift_1_1(true, data, initialBufferSize, annotationExpectation, IonType.SYMBOL, valueExpectation); + readAnnotationsThatForceBufferShift_1_1(false, data, initialBufferSize, annotationExpectation, IonType.SYMBOL, valueExpectation); + } + + @ParameterizedTest + @ValueSource(ints={5, 6, 7, 8, 9, 10}) + public void readTwoAnnotationFlexSymsThatForceBufferShift_1_1(int initialBufferSize) throws Exception { + byte[] data = withIvm(1, hexStringToByteArray("E8 F1 61 62 63 64 65 66 67 68 01 67 60")); + Supplier> annotationExpectation = () -> annotations("abcdefgh", "symbols"); + Supplier> valueExpectation = () -> intValue(0); + readAnnotationsThatForceBufferShift_1_1(true, data, initialBufferSize, annotationExpectation, IonType.INT, valueExpectation); + readAnnotationsThatForceBufferShift_1_1(false, data, initialBufferSize, annotationExpectation, IonType.INT, valueExpectation); + } + + private void readAnnotationsThatForceBufferShiftInDelimitedStruct_1_1( + boolean constructFromBytes, + byte[] inputBytes, + int initialBufferSize + ) throws Exception { + reader = boundedReaderFor(constructFromBytes, inputBytes, initialBufferSize, Integer.MAX_VALUE, byteCountingHandler); + assertSequence( + container(IonType.STRUCT, + next(IonType.INT), fieldName("ab"), annotations("abcdefgh", "symbols"), intValue(0), + next(null) + ), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @ValueSource(ints={5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}) + public void readTwoAnnotationFlexSymsThatForceBufferShiftInDelimitedStruct_1_1(int initialBufferSize) throws Exception { + byte[] data = withIvm(1, hexStringToByteArray("F3 FD 61 62 E8 F1 61 62 63 64 65 66 67 68 01 67 60 01 F0")); + readAnnotationsThatForceBufferShiftInDelimitedStruct_1_1(true, data, initialBufferSize); + readAnnotationsThatForceBufferShiftInDelimitedStruct_1_1(false, data, initialBufferSize); + } + + private void readFieldSymFlexSymThatForcesBufferShift_1_1( + boolean constructFromBytes, + byte[] inputBytes, + int initialBufferSize + ) throws Exception { + reader = boundedReaderFor(constructFromBytes, inputBytes, initialBufferSize, Integer.MAX_VALUE, byteCountingHandler); + assertSequence( + container(IonType.STRUCT, + next(IonType.INT), fieldName("abcdefgh"), intValue(0), + next(null) + ), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @ValueSource(ints={5, 6, 7, 8, 9, 10}) + public void readFieldNameFlexSymThatForcesBufferShift_1_1(int initialBufferSize) throws Exception { + byte[] data = withIvm(1, hexStringToByteArray("DB 01 F1 61 62 63 64 65 66 67 68 60")); + // Disable incremental reading so that the reader does not attempt to buffer the struct before stepping in. + readerBuilder = readerBuilder.withIncrementalReadingEnabled(false); + readFieldSymFlexSymThatForcesBufferShift_1_1(true, data, initialBufferSize); + readFieldSymFlexSymThatForcesBufferShift_1_1(false, data, initialBufferSize); + } + + @ParameterizedTest + @ValueSource(ints={5, 6, 7, 8, 9, 10}) + public void readFieldNameFlexSymThatForcesBufferShiftDelimited_1_1(int initialBufferSize) throws Exception { + byte[] data = withIvm(1, hexStringToByteArray("F3 F1 61 62 63 64 65 66 67 68 60 01 F0")); + // Disable incremental reading so that the reader does not attempt to buffer the struct before stepping in. + readerBuilder = readerBuilder.withIncrementalReadingEnabled(false); + readFieldSymFlexSymThatForcesBufferShift_1_1(true, data, initialBufferSize); + readFieldSymFlexSymThatForcesBufferShift_1_1(false, data, initialBufferSize); + } + + /** + * Checks that the reader correctly reads as annotations symbol 0 and inline empty text, which require special + * FlexSyms. + */ + private void readAnnotationsWithSpecialFlexSyms_1_1(boolean constructFromBytes, String inputBytes) throws Exception { + reader = readerForIon11(hexStringToByteArray(cleanCommentedHexBytes(inputBytes)), constructFromBytes); + SymbolToken emptyText = new SymbolTokenImpl("", -1); + SymbolToken symbolZero = new SymbolTokenImpl(null, 0); + assertSequence( + next(IonType.INT), annotations(""), intValue(0), + next(IonType.INT), annotationSymbols(symbolZero), intValue(0), + next(IonType.INT), annotationSymbols(emptyText, symbolZero), intValue(0), + next(IonType.INT), annotationSymbols(symbolZero, emptyText), intValue(0), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @ValueSource(strings = { + // Minimal representations + "E7 01 81 60 | One empty-text annotation; value int 0 \n" + + "E7 01 60 60 | One SID 0 annotation; value int 0 \n" + + "E8 01 81 01 60 60 | Two annotations: empty text, SID 0; value int 0 \n" + + "E9 09 01 60 01 81 60 | Variable length = 4 annotations: SID 0, empty text; value int 0 \n", + // Overpadded representations + "E7 02 00 81 60 | One overpadded empty-text annotation; value int 0 \n" + + "E7 04 00 00 60 60 | One overpadded SID 0 annotation; value int 0 \n" + + "E8 08 00 00 00 81 02 00 60 60 | Two overpadded annotations: empty text, SID 0; value int 0 \n" + + "E9 90 00 00 00 00 01 60 01 81 60 | Variable overpadded length = 4 annotations: SID 0, empty text; value int 0 \n" + }) + public void readAnnotationsWithSpecialFlexSyms_1_1(String inputBytes) throws Exception { + readAnnotationsWithSpecialFlexSyms_1_1(true, inputBytes); + readAnnotationsWithSpecialFlexSyms_1_1(false, inputBytes); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void getAnnotationsAsStringFailsWhenTextIsUndefined(boolean constructFromBytes) throws Exception { + reader = readerForIon11(hexStringToByteArray("E7 01 60 60"), constructFromBytes); + assertSequence(next(IonType.INT), intValue(0)); + assertThrows(IonException.class, () -> reader.getTypeAnnotations()); + assertThrows(IonException.class, () -> reader.iterateTypeAnnotations().next()); + assertSequence(next(null)); + closeAndCount(); + } + + // TODO add tests for incrementally reading Ion 1.1 annotations. + + /** + * Checks that the reader correctly reads a struct with two fields: "name" and "imports". + */ + private void assertSimpleStructCorrectlyParsed(boolean constructFromBytes, String inputBytes) throws Exception { + byte[] data = withIvm(1, hexStringToByteArray(cleanCommentedHexBytes(inputBytes))); + for (int initialBufferSize = 5; initialBufferSize <= Math.max(5, data.length); initialBufferSize++) { + reader = boundedReaderFor(constructFromBytes, data, initialBufferSize, Integer.MAX_VALUE, byteCountingHandler); + assertSequence( + container(IonType.STRUCT, + next(IonType.BOOL), fieldName("name"), booleanValue(true), + next(IonType.BOOL), fieldName("imports"), booleanValue(false), + next(null) + ), + next(null) + ); + closeAndCount(); + } + } + + @ParameterizedTest + @ValueSource(strings = { + // Length-prefixed + "D4 | Struct Length = 4 \n" + + "09 | SID 4 \n" + + "6E | true \n" + + "0D | SID 6 \n " + + "6F | false", + // Delimited + "F3 | Delimited struct \n" + + "F9 6E 61 6D 65 | name \n" + + "6E | true \n" + + "0D | SID 6 \n" + + "6F | false \n" + + "01 F0 | End delimited struct", + // FlexSym field names using SID type ID + "FD | Variable Length SID struct \n" + + "21 | Length = 16 \n" + + "01 | Switch to FlexSyms \n" + + "F9 6E 61 6D 65 | name \n" + + "6E | true \n" + + "F3 69 6D 70 6F 72 74 73 | imports \n " + + "6F | false", + // SID then FlexSym + "DC | Struct Length = 12 \n" + + "09 | SID 4 \n" + + "6E | true \n" + + "01 | Switch to FlexSyms \n" + + "F3 69 6D 70 6F 72 74 73 | imports \n " + + "6F | false", + // FlexSym then SID + "D9 | Struct Length = 12 \n" + + "01 | Switch to FlexSyms \n" + + "F9 6E 61 6D 65 | name \n" + + "6E | true \n" + + "0D | SID 6 \n " + + "6F | false", + }) + public void readStruct_1_1(String inputBytes) throws Exception { + assertSimpleStructCorrectlyParsed(true, inputBytes); + assertSimpleStructCorrectlyParsed(false, inputBytes); + } + + @Test + public void ensureFieldNameStateDoesNotLeakIntoNestedStructs() throws Exception { + // This test case covers a very specific edge case where the field name was leaking from + // an outer struct to the first field of a nested struct, when the outer field name was + // an inline field name symbol, and the first inner field name was a given by SID. + // For example, { a: { $4: b } } was incorrectly being read as { a: { a: b } } + String data = "FD 0F 01 FF 61 D3 09 A1 62"; + reader = readerForIon11(hexStringToByteArray(cleanCommentedHexBytes(data)), true); + assertSequence( + next(IonType.STRUCT), inContainer( + next(IonType.STRUCT), fieldName("a"), inContainer( + next(IonType.SYMBOL), fieldName("name"), symbolValue("b"), + next(null) + ), + next(null) + ), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void readMultipleNestedPrefixedStructs_1_1(boolean constructFromBytes) throws Exception { + byte[] input = hexStringToByteArray(cleanCommentedHexBytes( + "D8 | Struct Length = 8 \n" + + "17 | SID 11 \n" + + "D6 | Struct Length = 6 \n" + + "17 | SID 11 \n" + + "D4 | Struct Length = 4 \n" + + "17 | SID 11 \n" + + "D2 | Struct Length = 2 \n" + + "17 | SID 11 \n" + + "D0 | Struct Length = 0" + )); + reader = readerForIon11(input, constructFromBytes); + assertSequence( + container(IonType.STRUCT, + container(IonType.STRUCT, + container(IonType.STRUCT) + ) + ), + next(null) + ); + closeAndCount(); + } + + /** + * Checks that the reader correctly reads symbol zero, which requires a special FlexSym, as a field name. + */ + private void assertStructWithSymbolZeroFieldNamesCorrectlyParsed(boolean constructFromBytes, String inputBytes) throws Exception { + for (int initialBufferSize = 5; initialBufferSize <= 20; initialBufferSize++) { + reader = boundedReaderFor(constructFromBytes, withIvm(1, hexStringToByteArray(cleanCommentedHexBytes(inputBytes))), initialBufferSize, Integer.MAX_VALUE, byteCountingHandler); + assertSequence( + container(IonType.STRUCT, + next(IonType.BOOL), fieldNameSymbolZero(), booleanValue(true), + next(IonType.BOOL), fieldNameSymbolZero(), booleanValue(true), + next(IonType.BOOL), fieldName("name"), booleanValue(true), + next(IonType.BOOL), fieldNameSymbolZero(), booleanValue(true) + ), + next(null) + ); + closeAndCount(); + } + } + + @ParameterizedTest + @ValueSource(strings = { + // SID 0 in fixed-length SID struct + "DC | Struct Length = 12 \n" + + "01 | Switch to FlexSyms \n" + + "01 60 | FlexSym 0 \n" + + "6E | true \n" + + "01 60 | FlexSym SID 0 \n" + + "6E | true \n" + + "09 | FlexSym SID 4 (name) \n" + + "6E | true \n" + + "01 60 | FlexSym SID 0 \n" + + "6E | true", + // SID 0 in variable-length SID to FlexSyms + "FD | Variable length SID struct \n" + + "19 | Length = FlexUInt 12 \n" + + "01 | Switch to FlexSyms \n" + + "01 60 | SID 0 \n" + + "6E | true \n" + + "01 60 | FlexSym SID 0 \n" + + "6E | true \n" + + "09 | FlexSym SID 4 (name) \n" + + "6E | true \n" + + "01 60 | FlexSym SID 0 \n" + + "6E | true", + // SID 0 in delimited struct + "F3 | Delimited struct \n" + + "01 60 | FlexSym SID 0 \n" + + "6E | true \n" + + "01 60 | FlexSym SID 0 \n" + + "6E | true \n" + + "09 | FlexSym SID 4 (name) \n" + + "6E | true \n" + + "01 60 | FlexSym SID 0 \n" + + "6E | true \n" + + "01 F0 | End delimited struct" + }) + public void readStructWithSymbolZeroFieldNames_1_1(String inputBytes) throws Exception { + assertStructWithSymbolZeroFieldNamesCorrectlyParsed(true, inputBytes); + assertStructWithSymbolZeroFieldNamesCorrectlyParsed(false, inputBytes); + } + + /** + * Verifies that the reader considers the given input to be incomplete. + */ + private void assertIncompleteInput(boolean constructFromBytes, String inputBytes) throws Exception { + reader = readerForIon11(hexStringToByteArray(cleanCommentedHexBytes(inputBytes)), constructFromBytes); + // When closed without receiving input that completes an incomplete value, the reader throws. + assertSequence(next(null)); + assertThrows(IonException.class, () -> reader.close()); + } + + @ParameterizedTest + @ValueSource(strings = { + // FlexUInt 0 in a fixed-length SID struct + "D3 | Fixed-length 3 SID struct \n" + + "01 | FlexUInt 0 \n" + + "60 | This should not be parsed as int 0, but rather the first byte in a FlexSym field name", + // FlexUInt 0 in a variable-length SID struct + "FD | Variable-length SID struct \n" + + "07 | FlexUInt length = 3 \n" + + "01 | FlexUInt 0 \n" + + "60 | This should not be parsed as int 0, but rather the first byte in a FlexSym field name" + }) + public void symbolIdStructWithFlexUIntZeroInFieldNameIsNotTreatedAsSidZero(String inputBytes) throws Exception { + assertIncompleteInput(true, inputBytes); + assertIncompleteInput(false, inputBytes); + } + + /** + * Checks that the reader correctly reads empty inline text, which requires a special FlexSym, as a field name. + */ + public void assertStructWithEmptyInlineFieldNamesCorrectlyParsed(boolean constructFromBytes, String inputBytes) throws Exception { + for (int initialBufferSize = 5; initialBufferSize <= 20; initialBufferSize++) { + reader = boundedReaderFor(constructFromBytes, withIvm(1, hexStringToByteArray(cleanCommentedHexBytes(inputBytes))), initialBufferSize, Integer.MAX_VALUE, byteCountingHandler); + assertSequence( + container(IonType.STRUCT, + next(IonType.BOOL), fieldName(""), booleanValue(false) + ), + next(null) + ); + closeAndCount(); + } + } + + @ParameterizedTest + @ValueSource(strings = { + // Empty field name in fixed-length SID struct + "D4 | Struct Length = 4 \n" + + "01 | switch to FlexSym encoding \n" + + "01 81 | FlexSym empty text \n" + + "6F | false", + // Empty field name in variable-length SID struct + "FD | Variable length SID struct \n" + + "09 | Length = 4 \n" + + "01 | switch to FlexSym encoding \n" + + "01 81 | FlexSym empty text \n" + + "6F | false", + // Empty field name in delimited struct + "F3 | Delimited struct \n" + + "01 81 | FlexSym empty text \n" + + "6F | false \n" + + "01 F0 | End delimited struct" + }) + public void readStructWithEmptyInlineFieldName_1_1(String inputBytes) throws Exception { + assertStructWithEmptyInlineFieldNamesCorrectlyParsed(true, inputBytes); + assertStructWithEmptyInlineFieldNamesCorrectlyParsed(false, inputBytes); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void readMultipleNestedListsAndSexps_1_1(boolean constructFromBytes) throws Exception { + byte[] input = hexStringToByteArray(cleanCommentedHexBytes( + "BA | List Length = 10 \n" + + "FB | Variable-length List \n" + + "11 | Length = 8 \n" + + "B7 | List Length = 7 \n" + + "B0 | List Length = 0 \n" + + "C5 | S-exp Length = 5 \n" + + "C4 | S-exp Length = 4 \n" + + "FC | Variable-length S-exp \n" + + "05 | Length = 2 \n" + + "C0 | S-exp Length = 0 \n" + + "6F | false" + )); + reader = readerForIon11(input, constructFromBytes); + assertSequence( + container(IonType.LIST, + container(IonType.LIST, + container(IonType.LIST, + container(IonType.LIST), + container(IonType.SEXP, + container(IonType.SEXP, + container(IonType.SEXP, + container(IonType.SEXP), + next(IonType.BOOL), booleanValue(false) + ) + ) + ) + ) + ) + ), + next(null) + ); + closeAndCount(); + } + + // TODO oversized Ion 1.1 annotation wrappers + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void delimitedListNestedWithinDelimitedStruct(boolean constructFromBytes) throws Exception { + byte[] input = hexStringToByteArray(cleanCommentedHexBytes( + "F3 | Delimited struct\n" + + "09 | Field SID 4 (name)\n" + + "F1 | Delimited list\n" + + "F0 | Delimited end marker\n" + + "01 | Special FlexSym 0 in field name position\n" + + "F0 | Delimited end marker\n" + )); + reader = readerForIon11(input, constructFromBytes); + assertSequence( + container(IonType.STRUCT, + next("name", IonType.LIST), STEP_IN, + next(null), + STEP_OUT + ), + next(null) + ); + closeAndCount(); + } + + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void topLevelStepOverDelimitedListNestedWithinDelimitedStruct(boolean constructFromBytes) throws Exception { + byte[] input = hexStringToByteArray(cleanCommentedHexBytes( + "F3 | Delimited struct\n" + + "09 | Field SID 4 (name)\n" + + "F1 | Delimited list\n" + + "F0 | Delimited end marker\n" + + "01 | Special FlexSym 0 in field name position\n" + + "F0 | Delimited end marker\n" + + "60 | Int 0\n" + )); + reader = readerForIon11(input, constructFromBytes); + assertSequence( + next(IonType.STRUCT), + next(IonType.INT), intValue(0), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void topLevelStepOverDelimitedListNestedWithinDelimitedStructNonIncremental(boolean constructFromBytes) throws Exception { + byte[] input = withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "F3 | Delimited struct\n" + + "09 | Field SID 4 (name)\n" + + "F1 | Delimited list\n" + + "F0 | Delimited end marker\n" + + "01 | Special FlexSym 0 in field name position\n" + + "F0 | Delimited end marker\n" + + "60 | Int 0\n" + ))); + reader = readerFor(readerBuilder.withIncrementalReadingEnabled(false), constructFromBytes, input); + assertSequence( + next(IonType.STRUCT), + next(IonType.INT), intValue(0), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void prefixedStructNestedWithinDelimitedSexp(boolean constructFromBytes) throws Exception { + byte[] input = hexStringToByteArray(cleanCommentedHexBytes( + "F2 | Delimited s-expression\n" + + "D2 | Prefixed struct, length 2\n" + + "09 | Field SID 4 (name)\n" + + "60 | Int 0\n" + + "F0 | Delimited end marker\n" + )); + reader = readerForIon11(input, constructFromBytes); + assertSequence( + container(IonType.SEXP, + container(IonType.STRUCT, + next("name", IonType.INT), intValue(0), + next(null) + ), + next(null) + ), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void stepOverPrefixedStructNestedWithinDelimitedSexp(boolean constructFromBytes) throws Exception { + byte[] input = hexStringToByteArray(cleanCommentedHexBytes( + "F2 | Delimited s-expression\n" + + "D2 | Prefixed struct, length 2\n" + + "09 | Field SID 4 (name)\n" + + "60 | Int 0\n" + + "F0 | Delimited end marker\n" + )); + reader = readerForIon11(input, constructFromBytes); + assertSequence( + container(IonType.SEXP, + next(IonType.STRUCT), + // The nested struct is skipped. + next(null) + ), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void delimitedSexpNestedWithinPrefixedList(boolean constructFromBytes) throws Exception { + byte[] input = hexStringToByteArray(cleanCommentedHexBytes( + "B4 | Prefixed list, length 4\n" + + "F2 | Delimited s-expression\n" + + "60 | Int 0\n" + + "F0 | Delimited end marker\n" + + "60 | Int 0\n" + )); + reader = readerForIon11(input, constructFromBytes); + assertSequence( + container(IonType.LIST, + container(IonType.SEXP, + next(IonType.INT), intValue(0), + next(null) + ), + next(IonType.INT), intValue(0), + next(null) + ), + next(null) + ); + closeAndCount(); + } + + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void stepOverDelimitedSexpNestedWithinPrefixedList(boolean constructFromBytes) throws Exception { + byte[] input = hexStringToByteArray(cleanCommentedHexBytes( + "B4 | Prefixed list, length 4\n" + + "F2 | Delimited s-expression\n" + + "60 | Int 0\n" + + "F0 | Delimited end marker\n" + + "60 | Int 0\n" + )); + reader = readerForIon11(input, constructFromBytes); + assertSequence( + container(IonType.LIST, + next(IonType.SEXP), + // The nested s-expression is skipped. + next(IonType.INT), intValue(0), + next(null) + ), + next(null) + ); + closeAndCount(); + } + + @Test + public void oversizeDelimitedContainer() throws Exception { + // The outer struct is determined to be oversize after the nested delimited list is processed. + byte[] input = withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "F3 | Delimited struct\n" + + "09 | Field SID 4 (name)\n" + + "F1 | Delimited list\n" + + "60 | Int 0\n" + + "F0 | Delimited end marker\n" + + "01 | Special FlexSym 0 in field name position\n" + + "F0 | Delimited end marker\n" + + "60 | Int 0\n" + ))); + reader = boundedReaderFor(false, input, 5, 5, byteAndOversizedValueCountingHandler); + assertSequence( + // The oversize delimited struct is skipped. + next(IonType.INT), intValue(0), + next(null) + ); + expectOversized(1); + closeAndCount(); + } + + private byte[] delimitedListNestedWithinDelimitedStructFollowedByFloatZero() throws Exception { + byte[] input = withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "F3 | Delimited struct\n" + + "09 | Field SID 4 (name)\n" + + "F1 | Delimited list\n" + + "60 | Int 0\n" + + "60 | Int 0\n" + + "60 | Int 0\n" + + "F0 | Delimited end marker\n" + + "01 | Special FlexSym 0 in field name position\n" + + "F0 | Delimited end marker\n" + + "6A | Float 0e0\n" + ))); + totalBytesInStream = input.length; + return input; + } + + @Test + public void oversizeNestedDelimitedContainer() throws Exception { + // This test differs from the previous one in that the outer struct is determined to be oversize in the + // middle of the nested delimited list. + byte[] input = delimitedListNestedWithinDelimitedStructFollowedByFloatZero(); + reader = boundedReaderFor(false, input, 5, 5, byteAndOversizedValueCountingHandler); + assertSequence( + // The oversize delimited struct is skipped. + next(IonType.FLOAT), doubleValue(0e0), + next(null) + ); + expectOversized(1); + closeAndCount(); + } + + @Test + public void oversizeNestedDelimitedContainerIncremental() throws Exception { + byte[] input = delimitedListNestedWithinDelimitedStructFollowedByFloatZero(); + ResizingPipedInputStream pipe = new ResizingPipedInputStream((int) totalBytesInStream); + reader = boundedReaderFor(pipe, 5, 5, byteAndOversizedValueCountingHandler); + feedBytesOneByOne(input, pipe, reader); + assertSequence( + // The oversize delimited struct is skipped. + next(IonType.FLOAT), doubleValue(0e0), + next(null) + ); + expectOversized(1); + closeAndCount(); + } + + @Test + public void skipDelimitedContainerIncremental() throws Exception { + byte[] input = delimitedListNestedWithinDelimitedStructFollowedByFloatZero(); + ResizingPipedInputStream pipe = new ResizingPipedInputStream((int) totalBytesInStream); + reader = readerFor(pipe); + for (int i = 0; i < input.length - 1; i++) { + nextExpect(null); + pipe.receive(input[i]); + } + nextExpect(IonType.STRUCT); + pipe.receive(input[input.length - 1]); + assertSequence( + // The delimited struct is skipped. + next(IonType.FLOAT), doubleValue(0e0), + next(null) + ); + closeAndCount(); + } + + @Test + public void skipNestedDelimitedContainerIncremental() throws Exception { + byte[] input = delimitedListNestedWithinDelimitedStructFollowedByFloatZero(); + ResizingPipedInputStream pipe = new ResizingPipedInputStream((int) totalBytesInStream); + reader = readerFor(pipe); + for (int i = 0; i < input.length - 1; i++) { + nextExpect(null); + pipe.receive(input[i]); + } + assertSequence( + container(IonType.STRUCT, + next("name", IonType.LIST), STEP_IN, + next(IonType.INT), intValue(0), + STEP_OUT // Skips the last two ints + ), + next(null) + ); + pipe.receive(input[input.length - 1]); + assertSequence( + next(IonType.FLOAT), doubleValue(0e0), + next(null) + ); + closeAndCount(); + } + + @Test + public void nestedDelimitedContainerIncremental() throws Exception { + byte[] input = delimitedListNestedWithinDelimitedStructFollowedByFloatZero(); + ResizingPipedInputStream pipe = new ResizingPipedInputStream((int) totalBytesInStream); + reader = readerFor(pipe); + for (int i = 0; i < input.length - 1; i++) { + nextExpect(null); + pipe.receive(input[i]); + } + assertSequence( + container(IonType.STRUCT, + next("name", IonType.LIST), STEP_IN, + next(IonType.INT), intValue(0), + next(IonType.INT), intValue(0), + next(IonType.INT), intValue(0), + next(null), + STEP_OUT + ), + next(null) + ); + pipe.receive(input[input.length - 1]); + assertSequence( + next(IonType.FLOAT), doubleValue(0e0), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest + @ValueSource(ints={5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}) + public void nestedDelimitedContainerInlineFieldNamesIncremental(int initialBufferSize) throws Exception { + byte[] input = withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "F3 | Delimited struct\n" + + "F9 | Inline field name, length 4\n" + + "6E 61 6D 65 | name\n" + + "F1 | Delimited list\n" + + "60 | Int 0\n" + + "60 | Int 0\n" + + "60 | Int 0\n" + + "F0 | Delimited end marker\n" + + "01 | Special FlexSym 0 in field name position\n" + + "F0 | Delimited end marker\n" + + "6A | Float 0e0\n" + ))); + totalBytesInStream = input.length; + ResizingPipedInputStream pipe = new ResizingPipedInputStream((int) totalBytesInStream); + reader = boundedReaderFor(pipe, initialBufferSize, Integer.MAX_VALUE, byteCountingHandler); + for (int i = 0; i < input.length - 1; i++) { + nextExpect(null); + pipe.receive(input[i]); + } + assertSequence( + container(IonType.STRUCT, + next("name", IonType.LIST), STEP_IN, + next(IonType.INT), intValue(0), + next(IonType.INT), intValue(0), + next(IonType.INT), intValue(0), + next(null), + STEP_OUT + ), + next(null) + ); + pipe.receive(input[input.length - 1]); + assertSequence( + next(IonType.FLOAT), doubleValue(0e0), + next(null) + ); + closeAndCount(); + } + + private byte[] delimitedSymbolTable() throws Exception { + byte[] input = withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "E7 01 63 | Annotation System SID 3 ($ion_symbol_table)\n" + + "F3 | Delimited struct\n" + + "01 67 | FlexSym System SID 7 (symbols)\n" + + "F1 | Delimited list\n" + + "96 66 6F 6F 62 61 72 | string foobar\n" + + "F0 | End delimited list\n" + + "01 F0 | End delimited struct\n" + + "E1 01 | Symbol ID 1" + ))); + totalBytesInStream = input.length; + return input; + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void delimitedSymbolTable(boolean constructFromBytes) throws Exception { + for (int initialBufferSize = 5; initialBufferSize <= 20; initialBufferSize++) { + reader = boundedReaderFor(constructFromBytes, delimitedSymbolTable(), initialBufferSize, Integer.MAX_VALUE, byteCountingHandler); + assertSequence( + next(IonType.SYMBOL), symbolValue("foobar"), + next(null) + ); + closeAndCount(); + } + } + + @Test + public void oversizeDelimitedSymbolTableFailsCleanly() throws Exception { + reader = boundedReaderFor(false, delimitedSymbolTable(), 5, 5, byteAndOversizedSymbolTableCountingHandler); + assertNull(reader.next()); + expectOversized(1); + reader.close(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void readIon10then11(boolean constructFromBytes) throws Exception { + reader = readerFor( + constructFromBytes, + // Ion 1.0 + 0xE0, 0x01, 0x00, 0xEA, + 0xD3, // Struct length 3 + 0x84, // Field SID 4 ("name") + 0x21, 0x01, // Int 1 + + // Ion 1.1 + 0xE0, 0x01, 0x01, 0xEA, + 0xF3, // Delimited struct + 0xF7, 0x68, 0x65, 0x6C, 0x6C, 0x6F, // Inline field name "hello" + 0x60, // Int 0 + 0x01, 0xF0 // End delimited struct + ); + assertSequence( + container(IonType.STRUCT, + next(IonType.INT), + fieldName("name"), + intValue(1), + next(null) + ), + container(IonType.STRUCT, + next(IonType.INT), + fieldName("hello"), + intValue(0), + next(null) + ), + next(null) + ); + closeAndCount(); + } + + @Disabled("Currently failing. It seems like there is a problem switching from 1.1 to 1.0") + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void readIon11then10(boolean constructFromBytes) throws Exception { + reader = readerFor( + constructFromBytes, + // Ion 1.1 + 0xE0, 0x01, 0x01, 0xEA, + 0xF3, // Delimited struct + 0xF7, 0x68, 0x65, 0x6C, 0x6C, 0x6F, // Inline field name "hello" + 0x60, // Int 0 + 0x01, 0xF0, // End delimited struct + + // Ion 1.0 + 0xE0, 0x01, 0x00, 0xEA, + 0xD3, // Struct length 3 + 0x84, // Field SID 4 ("name") + 0x21, 0x01 // Int 1 + ); + assertSequence( + container(IonType.STRUCT, + next(IonType.INT), + fieldName("hello"), + intValue(0), + next(null) + ), + container(IonType.STRUCT, + next(IonType.INT), + fieldName("name"), + intValue(1), + next(null) + ), + next(null) + ); + closeAndCount(); + } + + + @Disabled("Currently failing. See readIon11then10()") + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void readDataThatSwitchesVersionsMultipleTimes(boolean constructFromBytes) throws Exception { + reader = readerFor( + constructFromBytes, + // Ion 1.0 + 0xE0, 0x01, 0x00, 0xEA, + 0xD3, // Struct length 3 + 0x84, // Field SID 4 ("name") + 0x21, 0x01, // Int 1 + + // Ion 1.1 + 0xE0, 0x01, 0x01, 0xEA, + 0xF3, // Delimited struct + 0xF7, 0x68, 0x65, 0x6C, 0x6C, 0x6F, // Inline field name "hello" + 0x60, // Int 0 + 0x01, 0xF0, // End delimited struct + + // Ion 1.0 + 0xE0, 0x01, 0x00, 0xEA, + 0xB3, // List length 3 + 0x01, // null.bool + 0x21, 0x01, // Int 1 + + // Ion 1.1 + 0xE0, 0x01, 0x01, 0xEA, + 0xF1, // Delimited list + 0xE5, 0x0F, 0x09, // Two annotation SIDs: $7 ("symbols"), $4 ("name") + 0xEB, 0x00, // null.bool + 0xF0 // End delimited list + ); + assertSequence( + container(IonType.STRUCT, + next(IonType.INT), + fieldName("name"), + intValue(1), + next(null) + ), + container(IonType.STRUCT, + next(IonType.INT), + fieldName("hello"), + intValue(0), + next(null) + ), + container(IonType.LIST, + next(IonType.BOOL), + nullValue(IonType.BOOL), + next(IonType.INT), + intValue(1), + next(null) + ), + container(IonType.LIST, + next(IonType.BOOL), + annotations("symbols", "name"), + nullValue(IonType.BOOL), + next(null) + ), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void prefixedAnnotatedContainerInsideDelimitedAnnotatedContainerPreservesSidAnnotations(boolean constructFromBytes) throws Exception { + byte[] input = withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "E4 09 | Annotation symbol 4 (name) \n" + + "F1 | Delimited list start \n" + + "E5 0B 0D | Annotation symbol 5 (version), annotation symbol 6 (imports) \n" + + "C7 | Prefixed s-exp length 7 \n" + + "E4 0F | Annotation symbol 7 (symbols) \n" + + "60 | Int 0 \n" + + "E5 11 13 | Annotation symbol 8 (max_id), annotation symbol 9 ($ion_shared_symbol_table) \n" + + "60 | Int 0 \n" + + "F0 | End of delimited list" + ))); + reader = readerFor(readerBuilder, constructFromBytes, input); + assertSequence( + next(IonType.LIST), annotations("name"), STEP_IN, + next(IonType.SEXP), annotations("version", "imports"), + STEP_OUT, + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void prefixedAnnotatedContainerInsideDelimitedAnnotatedContainerPreservesFlexSymAnnotations(boolean constructFromBytes) throws Exception { + byte[] input = withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "E7 FF 61 | Annotation FlexSym 'a' \n" + + "F1 | Delimited list start \n" + + "E8 FF 62 09 | Annotation FlexSym 'b', annotation FlexSym SID 4 (name) \n" + + "C8 | Prefixed s-exp length 8 \n" + + "E4 0F | Annotation symbol 7 (symbols) \n" + + "60 | Int 0 \n" + + "E8 0B FF 63 | Annotation FlexSym SID 5 (version), annotation FlexSym 'c' \n" + + "60 | Int 0 \n" + + "F0 | End of delimited list" + ))); + readerBuilder = readerBuilder.withIncrementalReadingEnabled(false); + reader = readerFor(readerBuilder, constructFromBytes, input); + assertSequence( + next(IonType.LIST), annotations("a"), STEP_IN, + next(IonType.SEXP), annotations("b", "name"), + STEP_OUT, + next(null) + ); + closeAndCount(); + } + + + @ParameterizedTest(name = "[{index}] {0}") + @CsvSource({ + // (:values 0) (:values 1) + "EF 01 01 60 EF 01 01 61 01", + // (:values (: values 0)) 1 + "EF 01 01 EF 01 01 60 61 01", + // (:values) 0 (:values) 1 + "EF 01 00 60 EF 01 00 61 01", + // (:values) (:values 0) 1 + "EF 01 00 EF 01 01 60 61 01", + // (:values) (:values) (:values 0) 1 + "EF 01 00 EF 01 00 EF 01 01 60 61 01", + }) + public void invokeValuesUsingSystemMacroOpcode(String bytes) throws Exception { + invokeValuesUsingSystemMacroOpcodeHelper(true, bytes); + invokeValuesUsingSystemMacroOpcodeHelper(false, bytes); + } + + @ParameterizedTest(name = "[{index}] {0}") + @CsvSource({ + // (:values (:: ) ) 0 1 + "EF 01 02 01 F0 60 61 01", + // (:values 0 1) // using length-prefixed expression group + "EF 01 02 07 60 61 01", + // (:values 0 1) // using delimited expression group + "EF 01 02 01 60 61 01 F0", + // (:values (:: 0) (:values (:: 1))) + "EF 01 02 03 60 EF 01 02 05 61 01", + // (:values (:values 0 1)) + "EF 01 01 EF 01 02 07 60 61 01", + }) + public void invokeValuesWithExpressionGroupsUsingSystemMacroOpcode(String bytes) throws Exception { + invokeValuesUsingSystemMacroOpcodeHelper(true, bytes); + invokeValuesUsingSystemMacroOpcodeHelper(false, bytes); + } + + private void invokeValuesUsingSystemMacroOpcodeHelper(boolean constructFromBytes, String bytes) throws Exception { + // Reset the byte counter between runs since this test utility method is called multiple times per test. + byteCounter.set(0); + byte[] input = withIvm(1, hexStringToByteArray(cleanCommentedHexBytes(bytes))); + readerBuilder = readerBuilder.withIncrementalReadingEnabled(false); + reader = readerFor(readerBuilder, constructFromBytes, input); + assertSequence( + next(IonType.INT), intValue(0), + next(IonType.INT), intValue(1), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void addSymbolsSystemMacro(boolean constructFromBytes) throws Exception { + int[] data = new int[] { + 0xE0, 0x01, 0x01, 0xEA, // Ion 1.1 IVM + 0xEF, 0x14, // system macro add_symbols + 0x02, // AEB: 0b------aa; a=10, expression group + 0x01, // FlexInt 0, a delimited expression group + 0x93, 0x61, 0x62, 0x63, // 3-byte string, utf-8 "abc" + 0xF0, // delimited end... of expression group + 0xE1, // SID single byte + 0x40 // SID $64 + }; + try (IonReader reader = readerFor(constructFromBytes,data)) { + assertEquals(IonType.SYMBOL, reader.next()); + assertEquals("abc", reader.stringValue()); + } + } + + @Test + public void addSymbolsSystemMacroWhenNotEntirelyBuffered() throws Exception { + int[] data = new int[] { + /* 0 - 3 */ // 0xEA, 0x01, 0x01, 0xE0, // implicitly provided by BinaryIonAppender + /* 4 - 5 */ 0xEF, 0x14, // system macro add_symbols + /* 6 - 7 */ 0x02, 0x01, // AEB: 0b------aa; a=10, FlexInt 0, a delimited expression group + /* 8 - 12 */ 0x93, 0x66, 0x6F, 0x72, // "for" + /* 13 - 16 */ 0x93, 0x66, 0x75, 0x72, // "fur" + /* 17 - 21 */ 0x94, 0x66, 0x6F, 0x75, 0x72, // "four" + /* 22 - 22 */ 0xF0, // delimited end... of expression group + /* 23 - 24 */ 0xE1, 0x3F + 3, // SID single byte ${usid 1} => "four" + }; + byte[] bytes = new TestUtils.BinaryIonAppender(1).append(data).toByteArray(); + totalBytesInStream = bytes.length; + readerBuilder = IonReaderBuilder.standard().withIncrementalReadingEnabled(true); + ByteArrayInputStream inputStream = new ByteArrayInputStream(bytes); + reader = boundedReaderFor(inputStream, 16, Integer.MAX_VALUE, byteCountingHandler); + assertSequence( + next(IonType.SYMBOL), + stringValue("four"), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void readIon11SystemSymbolFromUserId(boolean constructFromBytes) throws Exception { + reader = readerForIon11(bytes(0xE1, 0x07), constructFromBytes); + assertSequence( + next(IonType.SYMBOL), + stringValue("symbols"), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void readIon11EncodingDirectiveUsingSystemSymbolAnnotation(boolean constructFromBytes) throws Exception { + reader = readerForIon11( + bytes( + 0xE7, 0x01, 0x61, // One FlexSym annotation, with opcode, opcode 61 = system symbol 1 = $ion + 0xCA, // ( + 0xEE, 0x10, // module + 0xA1, '_', // _ + 0xC5, 0xEE, 0x0F, // S-exp, system symbol 0xF = symbol_table + 0xB2, 0x91, 'a', // ["a"] + 0xE1, 0x01 // $1 = a + ), + constructFromBytes + ); + assertSequence( + next(IonType.SYMBOL), + stringValue("a"), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void readIon11SymbolTableAppendUsingSystemSymbolValue(boolean constructFromBytes) throws Exception { + reader = readerForIon11( + bytes( + 0xE7, 0x01, 0x63, // One FlexSym annotation, with opcode, opcode 63 = system symbol 3 = $ion_symbol_table + 0xDA, // { + 0x01, // Switch to FlexSym field names + 0x01, 0x66, // FlexSym with opcode 66 = system symbol 6 = imports + 0xEE, 0x03, // System symbol value 3 = $ion_symbol_table (denoting symbol table append) + 0x01, 0x67, // FlexSym with opcode 67 = system symbol 7 = symbols + 0xB2, 0x91, 'a', // ["a"] + 0xE1, SystemSymbols_1_1.size() + 1 // first user symbol = a + ), + constructFromBytes + ); + assertSequence( + next(IonType.SYMBOL), + stringValue("a"), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void readIon11SymbolTableWithFlexUIntFieldNames(boolean constructFromBytes) throws Exception { + reader = readerForIon11( + bytes( + 0xE7, 0x01, 0x63, // One FlexSym annotation, with opcode, opcode 63 = system symbol 3 = $ion_symbol_table + 0xD7, // { + 0x0D, // FlexUInt 6 = imports + 0xEE, 0x03, // System symbol value 3 = $ion_symbol_table (denoting symbol table append) + 0x0F, // FlexUInt 7 = symbols + 0xB2, 0x91, 'a', // ["a"] + 0xE1, SystemSymbols_1_1.size() + 1 // first user symbol = a + ), + constructFromBytes + ); + assertSequence( + next(IonType.SYMBOL), + stringValue("a"), + next(null) + ); + closeAndCount(); + } + + // TODO Ion 1.1 symbol tables with all kinds of annotation encodings (opcodes E4 - E9, inline and SID) } diff --git a/src/test/java/com/amazon/ion/impl/LocalSymbolTableImportsTest.kt b/src/test/java/com/amazon/ion/impl/LocalSymbolTableImportsTest.kt new file mode 100644 index 0000000000..18ca3ca19c --- /dev/null +++ b/src/test/java/com/amazon/ion/impl/LocalSymbolTableImportsTest.kt @@ -0,0 +1,14 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl + +import org.hamcrest.MatcherAssert.assertThat +import org.hamcrest.Matchers +import org.junit.jupiter.api.Test + +internal class LocalSymbolTableImportsTest { + @Test + fun `EMPTY#getImportedTables should be empty`() { + assertThat(LocalSymbolTableImports.EMPTY.importedTables, Matchers.emptyArray()) + } +} diff --git a/src/test/java/com/amazon/ion/impl/bin/FlexIntTest.kt b/src/test/java/com/amazon/ion/impl/bin/FlexIntTest.kt new file mode 100644 index 0000000000..2e8f646d0a --- /dev/null +++ b/src/test/java/com/amazon/ion/impl/bin/FlexIntTest.kt @@ -0,0 +1,215 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +import com.amazon.ion.TestUtils.* +import java.math.BigInteger +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.CsvSource + +/** + * This does not test individual methods because the individual methods of [FlexInt] are not meant to be + * called in isolation. + */ +class FlexIntTest { + + @ParameterizedTest + @CsvSource( + " 0, 00000001", + " 1, 00000011", + " 2, 00000101", + " 3, 00000111", + " 4, 00001001", + " 5, 00001011", + " 14, 00011101", + " 63, 01111111", + " 64, 00000010 00000001", + " 729, 01100110 00001011", + " 8191, 11111110 01111111", + " 8192, 00000100 00000000 00000001", + " 1048575, 11111100 11111111 01111111", + " 1048576, 00001000 00000000 00000000 00000001", + " 134217727, 11111000 11111111 11111111 01111111", + " 134217728, 00010000 00000000 00000000 00000000 00000001", + " ${Int.MAX_VALUE}, 11110000 11111111 11111111 11111111 00001111", + " 17179869184, 00100000 00000000 00000000 00000000 00000000 00000001", + " 2199023255552, 01000000 00000000 00000000 00000000 00000000 00000000 00000001", + " 281474976710655, 11000000 11111111 11111111 11111111 11111111 11111111 01111111", + " 281474976710656, 10000000 00000000 00000000 00000000 00000000 00000000 00000000 00000001", + " 36028797018963967, 10000000 11111111 11111111 11111111 11111111 11111111 11111111 01111111", + " 36028797018963968, 00000000 00000001 00000000 00000000 00000000 00000000 00000000 00000000 00000001", + " 72624976668147840, 00000000 00000001 10000001 01000000 00100000 00010000 00001000 00000100 00000010", + " 4611686018427387903, 00000000 11111111 11111111 11111111 11111111 11111111 11111111 11111111 01111111", + " 4611686018427387904, 00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000001", + " ${Long.MAX_VALUE}, 00000000 11111110 11111111 11111111 11111111 11111111 11111111 11111111 11111111 00000001", + " -1, 11111111", + " -2, 11111101", + " -3, 11111011", + " -14, 11100101", + " -64, 10000001", + " -65, 11111110 11111110", + " -729, 10011110 11110100", + " -8192, 00000010 10000000", + " -8193, 11111100 11111111 11111110", + " -1048576, 00000100 00000000 10000000", + " -1048577, 11111000 11111111 11111111 11111110", + " -134217728, 00001000 00000000 00000000 10000000", + " -134217729, 11110000 11111111 11111111 11111111 11111110", + " -17179869184, 00010000 00000000 00000000 00000000 10000000", + " -17179869185, 11100000 11111111 11111111 11111111 11111111 11111110", + " -281474976710656, 01000000 00000000 00000000 00000000 00000000 00000000 10000000", + " -281474976710657, 10000000 11111111 11111111 11111111 11111111 11111111 11111111 11111110", + " -36028797018963968, 10000000 00000000 00000000 00000000 00000000 00000000 00000000 10000000", + " -36028797018963969, 00000000 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111110", + " -72624976668147841, 00000000 11111111 01111110 10111111 11011111 11101111 11110111 11111011 11111101", + "-4611686018427387904, 00000000 00000001 00000000 00000000 00000000 00000000 00000000 00000000 10000000", + "-4611686018427387905, 00000000 11111110 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111110", + " ${Long.MIN_VALUE}, 00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 11111110" + ) + fun testWriteFlexInt(value: Long, expectedBits: String) { + val numBytes: Int = FlexInt.flexIntLength(value) + val bytes = ByteArray(numBytes) + FlexInt.writeFlexIntOrUIntInto(bytes, 0, value, numBytes) + Assertions.assertEquals(expectedBits, byteArrayToBitString(bytes)) + Assertions.assertEquals((expectedBits.length + 1) / 9, numBytes) + } + + @ParameterizedTest + @CsvSource( + " 0, 00000001", + " 1, 00000011", + " 2, 00000101", + " 3, 00000111", + " 4, 00001001", + " 5, 00001011", + " 14, 00011101", + " 63, 01111111", + " 64, 10000001", + " 127, 11111111", + " 128, 00000010 00000010", + " 729, 01100110 00001011", + " 16383, 11111110 11111111", + " 16384, 00000100 00000000 00000010", + " 2097151, 11111100 11111111 11111111", + " 2097152, 00001000 00000000 00000000 00000010", + " 268435455, 11111000 11111111 11111111 11111111", + " 268435456, 00010000 00000000 00000000 00000000 00000010", + " ${Int.MAX_VALUE}, 11110000 11111111 11111111 11111111 00001111", + " 34359738368, 00100000 00000000 00000000 00000000 00000000 00000010", + " 4398046511104, 01000000 00000000 00000000 00000000 00000000 00000000 00000010", + " 562949953421311, 11000000 11111111 11111111 11111111 11111111 11111111 11111111", + " 562949953421312, 10000000 00000000 00000000 00000000 00000000 00000000 00000000 00000010", + " 72057594037927935, 10000000 11111111 11111111 11111111 11111111 11111111 11111111 11111111", + " 72057594037927936, 00000000 00000001 00000000 00000000 00000000 00000000 00000000 00000000 00000010", + " 72624976668147840, 00000000 00000001 10000001 01000000 00100000 00010000 00001000 00000100 00000010", + " ${Long.MAX_VALUE}, 00000000 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111" + ) + fun testWriteFlexUInt(value: Long, expectedBits: String) { + val numBytes: Int = FlexInt.flexUIntLength(value) + val bytes = ByteArray(numBytes) + FlexInt.writeFlexIntOrUIntInto(bytes, 0, value, numBytes) + Assertions.assertEquals(expectedBits, byteArrayToBitString(bytes)) + Assertions.assertEquals((expectedBits.length + 1) / 9, numBytes) + } + + @ParameterizedTest + @CsvSource( + " 0, 00000001", + " 1, 00000011", + " 2, 00000101", + " 3, 00000111", + " 4, 00001001", + " 5, 00001011", + " 14, 00011101", + " 63, 01111111", + " 64, 00000010 00000001", + " 729, 01100110 00001011", + " 8191, 11111110 01111111", + " 8192, 00000100 00000000 00000001", + " 1048575, 11111100 11111111 01111111", + " 1048576, 00001000 00000000 00000000 00000001", + " 134217727, 11111000 11111111 11111111 01111111", + " 134217728, 00010000 00000000 00000000 00000000 00000001", + " 17179869184, 00100000 00000000 00000000 00000000 00000000 00000001", + " 2199023255552, 01000000 00000000 00000000 00000000 00000000 00000000 00000001", + " 281474976710655, 11000000 11111111 11111111 11111111 11111111 11111111 01111111", + " 281474976710656, 10000000 00000000 00000000 00000000 00000000 00000000 00000000 00000001", + " 36028797018963967, 10000000 11111111 11111111 11111111 11111111 11111111 11111111 01111111", + " 36028797018963968, 00000000 00000001 00000000 00000000 00000000 00000000 00000000 00000000 00000001", + " 72624976668147840, 00000000 00000001 10000001 01000000 00100000 00010000 00001000 00000100 00000010", + " 4611686018427387903, 00000000 11111111 11111111 11111111 11111111 11111111 11111111 11111111 01111111", + " 4611686018427387904, 00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000001", + " ${Long.MAX_VALUE}, 00000000 11111110 11111111 11111111 11111111 11111111 11111111 11111111 11111111 00000001", + " 9223372036854775808, 00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000010", + " -1, 11111111", + " -2, 11111101", + " -3, 11111011", + " -14, 11100101", + " -64, 10000001", + " -65, 11111110 11111110", + " -729, 10011110 11110100", + " -8192, 00000010 10000000", + " -8193, 11111100 11111111 11111110", + " -1048576, 00000100 00000000 10000000", + " -1048577, 11111000 11111111 11111111 11111110", + " -134217728, 00001000 00000000 00000000 10000000", + " -134217729, 11110000 11111111 11111111 11111111 11111110", + " -17179869184, 00010000 00000000 00000000 00000000 10000000", + " -17179869185, 11100000 11111111 11111111 11111111 11111111 11111110", + " -281474976710656, 01000000 00000000 00000000 00000000 00000000 00000000 10000000", + " -281474976710657, 10000000 11111111 11111111 11111111 11111111 11111111 11111111 11111110", + " -36028797018963968, 10000000 00000000 00000000 00000000 00000000 00000000 00000000 10000000", + " -36028797018963969, 00000000 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111110", + " -72624976668147841, 00000000 11111111 01111110 10111111 11011111 11101111 11110111 11111011 11111101", + "-4611686018427387904, 00000000 00000001 00000000 00000000 00000000 00000000 00000000 00000000 10000000", + "-4611686018427387905, 00000000 11111110 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111110", + " ${Long.MIN_VALUE}, 00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 11111110", + "-9223372036854775809, 00000000 11111110 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111101" + ) + fun testWriteFlexIntForBigInteger(value: BigInteger, expectedBits: String) { + val numBytes: Int = FlexInt.flexIntLength(value) + val bytes = ByteArray(numBytes) + FlexInt.writeFlexIntOrUIntInto(bytes, 0, value, numBytes) + Assertions.assertEquals(expectedBits, byteArrayToBitString(bytes)) + Assertions.assertEquals((expectedBits.length + 1) / 9, numBytes) + } + + @ParameterizedTest + @CsvSource( + " 0, 00000001", + " 1, 00000011", + " 2, 00000101", + " 3, 00000111", + " 4, 00001001", + " 5, 00001011", + " 14, 00011101", + " 63, 01111111", + " 64, 10000001", + " 127, 11111111", + " 128, 00000010 00000010", + " 729, 01100110 00001011", + " 16383, 11111110 11111111", + " 16384, 00000100 00000000 00000010", + " 2097151, 11111100 11111111 11111111", + " 2097152, 00001000 00000000 00000000 00000010", + " 268435455, 11111000 11111111 11111111 11111111", + " 268435456, 00010000 00000000 00000000 00000000 00000010", + " 34359738368, 00100000 00000000 00000000 00000000 00000000 00000010", + " 4398046511104, 01000000 00000000 00000000 00000000 00000000 00000000 00000010", + " 562949953421311, 11000000 11111111 11111111 11111111 11111111 11111111 11111111", + " 562949953421312, 10000000 00000000 00000000 00000000 00000000 00000000 00000000 00000010", + " 72057594037927935, 10000000 11111111 11111111 11111111 11111111 11111111 11111111 11111111", + " 72057594037927936, 00000000 00000001 00000000 00000000 00000000 00000000 00000000 00000000 00000010", + " 72624976668147840, 00000000 00000001 10000001 01000000 00100000 00010000 00001000 00000100 00000010", + " ${Long.MAX_VALUE}, 00000000 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111", + "9223372036854775808, 00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000010" + ) + fun testWriteFlexUIntForBigInteger(value: BigInteger, expectedBits: String) { + val numBytes: Int = FlexInt.flexUIntLength(value) + val bytes = ByteArray(numBytes) + FlexInt.writeFlexIntOrUIntInto(bytes, 0, value, numBytes) + Assertions.assertEquals(expectedBits, byteArrayToBitString(bytes)) + Assertions.assertEquals((expectedBits.length + 1) / 9, numBytes) + } +} diff --git a/src/test/java/com/amazon/ion/impl/bin/IonEncoder_1_1Test.java b/src/test/java/com/amazon/ion/impl/bin/IonEncoder_1_1Test.java index 0e316447e7..231ae83a16 100644 --- a/src/test/java/com/amazon/ion/impl/bin/IonEncoder_1_1Test.java +++ b/src/test/java/com/amazon/ion/impl/bin/IonEncoder_1_1Test.java @@ -1,27 +1,33 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl.bin; -import com.amazon.ion.BitUtils; import com.amazon.ion.Decimal; import com.amazon.ion.IonType; import com.amazon.ion.Timestamp; +import com.amazon.ion.impl.bin.utf8.Utf8StringEncoder; +import com.amazon.ion.impl.bin.utf8.Utf8StringEncoderPool; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.converter.ArgumentConversionException; import org.junit.jupiter.params.converter.ConvertWith; -import org.junit.jupiter.params.converter.TypedArgumentConverter; import org.junit.jupiter.params.provider.CsvSource; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.math.BigInteger; -import java.nio.charset.CharsetEncoder; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; import java.util.function.BiFunction; +import static com.amazon.ion.TestUtils.HexStringToByteArray; +import static com.amazon.ion.TestUtils.StringToDecimal; +import static com.amazon.ion.TestUtils.StringToTimestamp; +import static com.amazon.ion.TestUtils.SymbolIdsToLongArray; +import static com.amazon.ion.TestUtils.byteArrayToBitString; +import static com.amazon.ion.TestUtils.byteArrayToHex; +import static com.amazon.ion.TestUtils.byteLengthFromBitString; +import static com.amazon.ion.TestUtils.byteLengthFromHexString; + public class IonEncoder_1_1Test { private static BlockAllocator ALLOCATOR = BlockAllocatorProviders.basicProvider().vendAllocator(11); @@ -99,8 +105,8 @@ public void testWriteNullValueForDatagram() { @ParameterizedTest @CsvSource({ - "true, 5E", - "false, 5F", + "true, 6E", + "false, 6F", }) public void testWriteBooleanValue(boolean value, String expectedBytes) { assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeBoolValue); @@ -108,33 +114,33 @@ public void testWriteBooleanValue(boolean value, String expectedBytes) { @ParameterizedTest @CsvSource({ - " 0, 50", - " 1, 51 01", - " 17, 51 11", - " 127, 51 7F", - " 128, 52 80 00", - " 5555, 52 B3 15", - " 32767, 52 FF 7F", - " 32768, 53 00 80 00", - " 292037, 53 C5 74 04", - " 321672342, 54 96 54 2C 13", - " 64121672342, 55 96 12 F3 ED 0E", - " 1274120283167, 56 1F A4 7C A7 28 01", - " 851274120283167, 57 1F C4 8B B3 3A 06 03", - " 72624976668147840, 58 80 40 20 10 08 04 02 01", - " 9223372036854775807, 58 FF FF FF FF FF FF FF 7F", // Long.MAX_VALUE - " -1, 51 FF", - " -2, 51 FE", - " -14, 51 F2", - " -128, 51 80", - " -129, 52 7F FF", - " -944, 52 50 FC", - " -32768, 52 00 80", - " -32769, 53 FF 7F FF", - " -8388608, 53 00 00 80", - " -8388609, 54 FF FF 7F FF", - " -72624976668147841, 58 7F BF DF EF F7 FB FD FE", - "-9223372036854775808, 58 00 00 00 00 00 00 00 80", // Long.MIN_VALUE + " 0, 60", + " 1, 61 01", + " 17, 61 11", + " 127, 61 7F", + " 128, 62 80 00", + " 5555, 62 B3 15", + " 32767, 62 FF 7F", + " 32768, 63 00 80 00", + " 292037, 63 C5 74 04", + " 321672342, 64 96 54 2C 13", + " 64121672342, 65 96 12 F3 ED 0E", + " 1274120283167, 66 1F A4 7C A7 28 01", + " 851274120283167, 67 1F C4 8B B3 3A 06 03", + " 72624976668147840, 68 80 40 20 10 08 04 02 01", + " 9223372036854775807, 68 FF FF FF FF FF FF FF 7F", // Long.MAX_VALUE + " -1, 61 FF", + " -2, 61 FE", + " -14, 61 F2", + " -128, 61 80", + " -129, 62 7F FF", + " -944, 62 50 FC", + " -32768, 62 00 80", + " -32769, 63 FF 7F FF", + " -8388608, 63 00 00 80", + " -8388609, 64 FF FF 7F FF", + " -72624976668147841, 68 7F BF DF EF F7 FB FD FE", + "-9223372036854775808, 68 00 00 00 00 00 00 00 80", // Long.MIN_VALUE }) public void testWriteIntegerValue(long value, String expectedBytes) { assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeIntValue); @@ -142,37 +148,37 @@ public void testWriteIntegerValue(long value, String expectedBytes) { @ParameterizedTest @CsvSource({ - " 0, 50", - " 1, 51 01", - " 17, 51 11", - " 127, 51 7F", - " 128, 52 80 00", - " 5555, 52 B3 15", - " 32767, 52 FF 7F", - " 32768, 53 00 80 00", - " 292037, 53 C5 74 04", - " 321672342, 54 96 54 2C 13", - " 64121672342, 55 96 12 F3 ED 0E", - " 1274120283167, 56 1F A4 7C A7 28 01", - " 851274120283167, 57 1F C4 8B B3 3A 06 03", - " 72624976668147840, 58 80 40 20 10 08 04 02 01", - " 9223372036854775807, 58 FF FF FF FF FF FF FF 7F", // Long.MAX_VALUE - " 9223372036854775808, F5 13 00 00 00 00 00 00 00 80 00", - "999999999999999999999999999999, F5 1B FF FF FF 3F EA ED 74 46 D0 9C 2C 9F 0C", - " -1, 51 FF", - " -2, 51 FE", - " -14, 51 F2", - " -128, 51 80", - " -129, 52 7F FF", - " -944, 52 50 FC", - " -32768, 52 00 80", - " -32769, 53 FF 7F FF", - " -8388608, 53 00 00 80", - " -8388609, 54 FF FF 7F FF", - " -72624976668147841, 58 7F BF DF EF F7 FB FD FE", - " -9223372036854775808, 58 00 00 00 00 00 00 00 80", // Long.MIN_VALUE - " -9223372036854775809, F5 13 FF FF FF FF FF FF FF 7F FF", - "-99999999999999999999999999999, F5 1B 01 00 00 60 35 E8 8D 92 51 F0 E1 BC FE", + " 0, 60", + " 1, 61 01", + " 17, 61 11", + " 127, 61 7F", + " 128, 62 80 00", + " 5555, 62 B3 15", + " 32767, 62 FF 7F", + " 32768, 63 00 80 00", + " 292037, 63 C5 74 04", + " 321672342, 64 96 54 2C 13", + " 64121672342, 65 96 12 F3 ED 0E", + " 1274120283167, 66 1F A4 7C A7 28 01", + " 851274120283167, 67 1F C4 8B B3 3A 06 03", + " 72624976668147840, 68 80 40 20 10 08 04 02 01", + " 9223372036854775807, 68 FF FF FF FF FF FF FF 7F", // Long.MAX_VALUE + " 9223372036854775808, F6 13 00 00 00 00 00 00 00 80 00", + "999999999999999999999999999999, F6 1B FF FF FF 3F EA ED 74 46 D0 9C 2C 9F 0C", + " -1, 61 FF", + " -2, 61 FE", + " -14, 61 F2", + " -128, 61 80", + " -129, 62 7F FF", + " -944, 62 50 FC", + " -32768, 62 00 80", + " -32769, 63 FF 7F FF", + " -8388608, 63 00 00 80", + " -8388609, 64 FF FF 7F FF", + " -72624976668147841, 68 7F BF DF EF F7 FB FD FE", + " -9223372036854775808, 68 00 00 00 00 00 00 00 80", // Long.MIN_VALUE + " -9223372036854775809, F6 13 FF FF FF FF FF FF FF 7F FF", + "-99999999999999999999999999999, F6 1B 01 00 00 60 35 E8 8D 92 51 F0 E1 BC FE", }) public void testWriteIntegerValueForBigInteger(BigInteger value, String expectedBytes) { assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeIntValue); @@ -187,97 +193,104 @@ public void testWriteIntegerValueForNullBigInteger() { @ParameterizedTest @CsvSource({ - " 0.0, 5A", - " 1.0, 5C 3F 80 00 00", - " 1.5, 5C 3F C0 00 00", - " 3.1415927, 5C 40 49 0F DB", - " 4.00537109375, 5C 40 80 2C 00", - " 423542.09375, 5C 48 CE CE C3", - " 3.40282347E+38, 5C 7F 7F FF FF", // Float.MAX_VALUE - " -1.0, 5C BF 80 00 00", - " -1.5, 5C BF C0 00 00", - " -3.1415927, 5C C0 49 0F DB", - " -4.00537109375, 5C C0 80 2C 00", - " -423542.09375, 5C C8 CE CE C3", - "-3.40282347E+38, 5C FF 7F FF FF", // Float.MIN_VALUE - " NaN, 5C 7F C0 00 00", - " Infinity, 5C 7F 80 00 00", - " -Infinity, 5C FF 80 00 00", + " 0.0, 6A", + " 1.0, 6C 00 00 80 3F", + " 1.5, 6C 00 00 C0 3F", + " 3.1415927, 6C DB 0F 49 40", + " 4.00537109375, 6C 00 2C 80 40", + " 423542.09375, 6C C3 CE CE 48", + " 3.40282347E+38, 6C FF FF 7F 7F", // Float.MAX_VALUE + " -1.0, 6C 00 00 80 BF", + " -1.5, 6C 00 00 C0 BF", + " -3.1415927, 6C DB 0F 49 C0", + " -4.00537109375, 6C 00 2C 80 C0", + " -423542.09375, 6C C3 CE CE C8", + "-3.40282347E+38, 6C FF FF 7F FF", // Float.MIN_VALUE + " NaN, 6C 00 00 C0 7F", + " Infinity, 6C 00 00 80 7F", + " -Infinity, 6C 00 00 80 FF", }) public void testWriteFloatValue(float value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeFloat); + assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeFloatValue); } @ParameterizedTest @CsvSource({ - " 0.0, 5A", - " 1.0, 5C 3F 80 00 00", - " 1.5, 5C 3F C0 00 00", - " 3.141592653589793, 5D 40 09 21 FB 54 44 2D 18", - " 4.00537109375, 5C 40 80 2C 00", - " 4.11111111111, 5D 40 10 71 C7 1C 71 C2 39", - " 423542.09375, 5C 48 CE CE C3", - " 8236423542.09375, 5D 41 FE AE DD 97 61 80 00", - " 1.79769313486231570e+308, 5D 7F EF FF FF FF FF FF FF", // Double.MAX_VALUE - " -1.0, 5C BF 80 00 00", - " -1.5, 5C BF C0 00 00", - " -3.141592653589793, 5D C0 09 21 FB 54 44 2D 18", - " -4.00537109375, 5C C0 80 2C 00", - " -4.11111111111, 5D C0 10 71 C7 1C 71 C2 39", - " -423542.09375, 5C C8 CE CE C3", - " -8236423542.09375, 5D C1 FE AE DD 97 61 80 00", - "-1.79769313486231570e+308, 5D FF EF FF FF FF FF FF FF", // Double.MIN_VALUE - " NaN, 5C 7F C0 00 00", - " Infinity, 5C 7F 80 00 00", - " -Infinity, 5C FF 80 00 00", + " 0.0, 6A", + " -0.0, 6B 00 80", + " 1.0, 6C 00 00 80 3F", + " 1.5, 6C 00 00 C0 3F", + " 3.141592653589793, 6D 18 2D 44 54 FB 21 09 40", + " 4.00537109375, 6C 00 2C 80 40", + " 4.11111111111, 6D 39 C2 71 1C C7 71 10 40", + " 423542.09375, 6C C3 CE CE 48", + " 8236423542.09375, 6D 00 80 61 97 DD AE FE 41", + " 1.79769313486231570e+308, 6D FF FF FF FF FF FF EF 7F", // Double.MAX_VALUE + " -1.0, 6C 00 00 80 BF", + " -1.5, 6C 00 00 C0 BF", + " -3.141592653589793, 6D 18 2D 44 54 FB 21 09 C0", + " -4.00537109375, 6C 00 2C 80 C0", + " -4.11111111111, 6D 39 C2 71 1C C7 71 10 C0", + " -423542.09375, 6C C3 CE CE C8", + " -8236423542.09375, 6D 00 80 61 97 DD AE FE C1", + "-1.79769313486231570e+308, 6D FF FF FF FF FF FF EF FF", // Double.MIN_VALUE + " NaN, 6C 00 00 C0 7F", + " Infinity, 6C 00 00 80 7F", + " -Infinity, 6C 00 00 80 FF", }) public void testWriteFloatValueForDouble(double value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeFloat); + assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeFloatValue); } @ParameterizedTest @CsvSource({ - " 0., 60", - " 0e1, 6F 03", - " 0e63, 6F 7F", - " 0e99, 6F 8E 01", - " 0.0, 6F FF", - " 0.00, 6F FD", - " 0.000, 6F FB", - " 0e-64, 6F 81", - " 0e-99, 6F 76 FE", - " -0., 61 01", - " -0e1, 62 01 01", - " -0e3, 62 01 03", - " -0e127, 62 01 7F", - " -0e199, 63 01 C7 00", - " -0e-1, 62 01 FF", - " -0e-2, 62 01 FE", - " -0e-3, 62 01 FD", - " -0e-127, 62 01 81", - " -0e-199, 63 01 39 FF", - " 0.01, 62 03 FE", - " 0.1, 62 03 FF", - " 1, 61 03", - " 1e1, 62 03 01", - " 1e2, 62 03 02", - " 1e127, 62 03 7F", - " 1e128, 63 03 80 00", - " 1e65536, 64 03 00 00 01", - " 2, 61 05", - " 7, 61 0F", - " 14, 61 1D", - " 1.0, 62 15 FF", - " 1.00, 63 92 01 FE", - " 1.27, 63 FE 01 FE", - " 3.142, 63 1A 31 FD", - " 3.14159, 64 7C 59 26 FB", - " 3.141593, 65 98 FD FE 02 FA", - " 3.141592653, 66 B0 C9 1C 68 17 F7", - " 3.14159265359, 67 E0 93 7D 56 49 12 F5", - " 3.1415926535897932, 69 80 4C 43 76 65 9E 9C 6F F0", - " 3.1415926535897932384626434, 6E 00 50 E0 DC F7 CC D6 08 48 99 92 3F 03 E7", - "3.141592653589793238462643383, F6 1F 00 E0 2D 8F A4 21 D0 E7 46 C0 87 AA 89 02 E5", + " 0., 70", + " 0e1, 71 03", + " 0e63, 71 7F", + " 0e64, 72 02 01", + " 0e99, 72 8E 01", + " 0.0, 71 FF", + " 0.00, 71 FD", + " 0.000, 71 FB", + " 0e-64, 71 81", + " 0e-99, 72 76 FE", + " -0., 72 01 00", + " -0e1, 72 03 00", + " -0e3, 72 07 00", + " -0e63, 72 7F 00", + " -0e199, 73 1E 03 00", + " -0e-1, 72 FF 00", + " -0e-2, 72 FD 00", + " -0e-3, 72 FB 00", + " -0e-63, 72 83 00", + " -0e-64, 72 81 00", + " -0e-65, 73 FE FE 00", + " -0e-199, 73 E6 FC 00", + " 0.01, 72 FD 01", + " 0.1, 72 FF 01", + " 1, 72 01 01", + " 1e1, 72 03 01", + " 1e2, 72 05 01", + " 1e63, 72 7F 01", + " 1e64, 73 02 01 01", + " 1e65536, 74 04 00 08 01", + " 2, 72 01 02", + " 7, 72 01 07", + " 14, 72 01 0E", + " 1.0, 72 FF 0A", + " 1.00, 72 FD 64", + " 1.27, 72 FD 7F", + " 1.28, 73 FD 80 00", + " 3.142, 73 FB 46 0C", + " 3.14159, 74 F7 2F CB 04", + " 3.1415927, 75 F3 77 5E DF 01", + " 3.141592653, 76 EF 4D E6 40 BB 00", + " 3.141592653590, 77 E9 16 9F 83 75 DB 02", + " 3.14159265358979323, 79 DF FB A0 9E F6 2F 1E 5C 04", + " 3.1415926535897932384626, 7B D5 72 49 64 CC AF EF 8F 0F A7 06", + " 3.141592653589793238462643383, 7D CB B7 3C 92 86 40 9F 1B 01 1F AA 26 0A", + " 3.14159265358979323846264338327950, 7F C1 8E 29 E5 E3 56 D5 DF C5 10 8F 55 3F 7D 0F", + "3.141592653589793238462643383279503, F7 21 BF 8F 9F F3 E6 64 55 BE BA A7 96 57 79 E4 9A 00", }) public void testWriteDecimalValue(@ConvertWith(StringToDecimal.class) Decimal value, String expectedBytes) { assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeDecimalValue); @@ -295,23 +308,23 @@ public void testWriteDecimalValueForNull() { @ParameterizedTest @CsvSource({ // OpCode MYYYYYYY DDDDDMMM mmmHHHHH ssssUmmm ffffffss ffffffff ffffffff ffffffff - "2023-10-15T01:00Z, 01110011 00110101 01111101 00000001 00001000", - "2023-10-15T01:59Z, 01110011 00110101 01111101 01100001 00001111", - "2023-10-15T11:22Z, 01110011 00110101 01111101 11001011 00001010", - "2023-10-15T23:00Z, 01110011 00110101 01111101 00010111 00001000", - "2023-10-15T23:59Z, 01110011 00110101 01111101 01110111 00001111", - "2023-10-15T11:22:00Z, 01110100 00110101 01111101 11001011 00001010 00000000", - "2023-10-15T11:22:33Z, 01110100 00110101 01111101 11001011 00011010 00000010", - "2023-10-15T11:22:59Z, 01110100 00110101 01111101 11001011 10111010 00000011", - "2023-10-15T11:22:33.000Z, 01110101 00110101 01111101 11001011 00011010 00000010 00000000", - "2023-10-15T11:22:33.444Z, 01110101 00110101 01111101 11001011 00011010 11110010 00000110", - "2023-10-15T11:22:33.999Z, 01110101 00110101 01111101 11001011 00011010 10011110 00001111", - "2023-10-15T11:22:33.000000Z, 01110110 00110101 01111101 11001011 00011010 00000010 00000000 00000000", - "2023-10-15T11:22:33.444555Z, 01110110 00110101 01111101 11001011 00011010 00101110 00100010 00011011", - "2023-10-15T11:22:33.999999Z, 01110110 00110101 01111101 11001011 00011010 11111110 00001000 00111101", - "2023-10-15T11:22:33.000000000Z, 01110111 00110101 01111101 11001011 00011010 00000010 00000000 00000000 00000000", - "2023-10-15T11:22:33.444555666Z, 01110111 00110101 01111101 11001011 00011010 01001010 10000110 11111101 01101001", - "2023-10-15T11:22:33.999999999Z, 01110111 00110101 01111101 11001011 00011010 11111110 00100111 01101011 11101110", + "2023-10-15T01:00Z, 10000011 00110101 01111101 00000001 00001000", + "2023-10-15T01:59Z, 10000011 00110101 01111101 01100001 00001111", + "2023-10-15T11:22Z, 10000011 00110101 01111101 11001011 00001010", + "2023-10-15T23:00Z, 10000011 00110101 01111101 00010111 00001000", + "2023-10-15T23:59Z, 10000011 00110101 01111101 01110111 00001111", + "2023-10-15T11:22:00Z, 10000100 00110101 01111101 11001011 00001010 00000000", + "2023-10-15T11:22:33Z, 10000100 00110101 01111101 11001011 00011010 00000010", + "2023-10-15T11:22:59Z, 10000100 00110101 01111101 11001011 10111010 00000011", + "2023-10-15T11:22:33.000Z, 10000101 00110101 01111101 11001011 00011010 00000010 00000000", + "2023-10-15T11:22:33.444Z, 10000101 00110101 01111101 11001011 00011010 11110010 00000110", + "2023-10-15T11:22:33.999Z, 10000101 00110101 01111101 11001011 00011010 10011110 00001111", + "2023-10-15T11:22:33.000000Z, 10000110 00110101 01111101 11001011 00011010 00000010 00000000 00000000", + "2023-10-15T11:22:33.444555Z, 10000110 00110101 01111101 11001011 00011010 00101110 00100010 00011011", + "2023-10-15T11:22:33.999999Z, 10000110 00110101 01111101 11001011 00011010 11111110 00001000 00111101", + "2023-10-15T11:22:33.000000000Z, 10000111 00110101 01111101 11001011 00011010 00000010 00000000 00000000 00000000", + "2023-10-15T11:22:33.444555666Z, 10000111 00110101 01111101 11001011 00011010 01001010 10000110 11111101 01101001", + "2023-10-15T11:22:33.999999999Z, 10000111 00110101 01111101 11001011 00011010 11111110 00100111 01101011 11101110", }) public void testWriteTimestampValueWithUtcShortForm(@ConvertWith(StringToTimestamp.class) Timestamp value, String expectedBytes) { assertWritingValueWithBinary(expectedBytes, value, IonEncoder_1_1::writeTimestampValue); @@ -321,32 +334,32 @@ public void testWriteTimestampValueWithUtcShortForm(@ConvertWith(StringToTimesta @ParameterizedTest @CsvSource({ // OpCode MYYYYYYY DDDDDMMM mmmHHHHH ssssUmmm ffffffss ffffffff ffffffff ffffffff - "1970T, 01110000 00000000", - "2023T, 01110000 00110101", - "2097T, 01110000 01111111", - "2023-01T, 01110001 10110101 00000000", - "2023-10T, 01110001 00110101 00000101", - "2023-12T, 01110001 00110101 00000110", - "2023-10-01T, 01110010 00110101 00001101", - "2023-10-15T, 01110010 00110101 01111101", - "2023-10-31T, 01110010 00110101 11111101", - "2023-10-15T01:00-00:00, 01110011 00110101 01111101 00000001 00000000", - "2023-10-15T01:59-00:00, 01110011 00110101 01111101 01100001 00000111", - "2023-10-15T11:22-00:00, 01110011 00110101 01111101 11001011 00000010", - "2023-10-15T23:00-00:00, 01110011 00110101 01111101 00010111 00000000", - "2023-10-15T23:59-00:00, 01110011 00110101 01111101 01110111 00000111", - "2023-10-15T11:22:00-00:00, 01110100 00110101 01111101 11001011 00000010 00000000", - "2023-10-15T11:22:33-00:00, 01110100 00110101 01111101 11001011 00010010 00000010", - "2023-10-15T11:22:59-00:00, 01110100 00110101 01111101 11001011 10110010 00000011", - "2023-10-15T11:22:33.000-00:00, 01110101 00110101 01111101 11001011 00010010 00000010 00000000", - "2023-10-15T11:22:33.444-00:00, 01110101 00110101 01111101 11001011 00010010 11110010 00000110", - "2023-10-15T11:22:33.999-00:00, 01110101 00110101 01111101 11001011 00010010 10011110 00001111", - "2023-10-15T11:22:33.000000-00:00, 01110110 00110101 01111101 11001011 00010010 00000010 00000000 00000000", - "2023-10-15T11:22:33.444555-00:00, 01110110 00110101 01111101 11001011 00010010 00101110 00100010 00011011", - "2023-10-15T11:22:33.999999-00:00, 01110110 00110101 01111101 11001011 00010010 11111110 00001000 00111101", - "2023-10-15T11:22:33.000000000-00:00, 01110111 00110101 01111101 11001011 00010010 00000010 00000000 00000000 00000000", - "2023-10-15T11:22:33.444555666-00:00, 01110111 00110101 01111101 11001011 00010010 01001010 10000110 11111101 01101001", - "2023-10-15T11:22:33.999999999-00:00, 01110111 00110101 01111101 11001011 00010010 11111110 00100111 01101011 11101110", + "1970T, 10000000 00000000", + "2023T, 10000000 00110101", + "2097T, 10000000 01111111", + "2023-01T, 10000001 10110101 00000000", + "2023-10T, 10000001 00110101 00000101", + "2023-12T, 10000001 00110101 00000110", + "2023-10-01T, 10000010 00110101 00001101", + "2023-10-15T, 10000010 00110101 01111101", + "2023-10-31T, 10000010 00110101 11111101", + "2023-10-15T01:00-00:00, 10000011 00110101 01111101 00000001 00000000", + "2023-10-15T01:59-00:00, 10000011 00110101 01111101 01100001 00000111", + "2023-10-15T11:22-00:00, 10000011 00110101 01111101 11001011 00000010", + "2023-10-15T23:00-00:00, 10000011 00110101 01111101 00010111 00000000", + "2023-10-15T23:59-00:00, 10000011 00110101 01111101 01110111 00000111", + "2023-10-15T11:22:00-00:00, 10000100 00110101 01111101 11001011 00000010 00000000", + "2023-10-15T11:22:33-00:00, 10000100 00110101 01111101 11001011 00010010 00000010", + "2023-10-15T11:22:59-00:00, 10000100 00110101 01111101 11001011 10110010 00000011", + "2023-10-15T11:22:33.000-00:00, 10000101 00110101 01111101 11001011 00010010 00000010 00000000", + "2023-10-15T11:22:33.444-00:00, 10000101 00110101 01111101 11001011 00010010 11110010 00000110", + "2023-10-15T11:22:33.999-00:00, 10000101 00110101 01111101 11001011 00010010 10011110 00001111", + "2023-10-15T11:22:33.000000-00:00, 10000110 00110101 01111101 11001011 00010010 00000010 00000000 00000000", + "2023-10-15T11:22:33.444555-00:00, 10000110 00110101 01111101 11001011 00010010 00101110 00100010 00011011", + "2023-10-15T11:22:33.999999-00:00, 10000110 00110101 01111101 11001011 00010010 11111110 00001000 00111101", + "2023-10-15T11:22:33.000000000-00:00, 10000111 00110101 01111101 11001011 00010010 00000010 00000000 00000000 00000000", + "2023-10-15T11:22:33.444555666-00:00, 10000111 00110101 01111101 11001011 00010010 01001010 10000110 11111101 01101001", + "2023-10-15T11:22:33.999999999-00:00, 10000111 00110101 01111101 11001011 00010010 11111110 00100111 01101011 11101110", }) public void testWriteTimestampValueWithUnknownOffsetShortForm(@ConvertWith(StringToTimestamp.class) Timestamp value, String expectedBytes) { assertWritingValueWithBinary(expectedBytes, value, IonEncoder_1_1::writeTimestampValue); @@ -355,26 +368,26 @@ public void testWriteTimestampValueWithUnknownOffsetShortForm(@ConvertWith(Strin @ParameterizedTest @CsvSource({ // OpCode MYYYYYYY DDDDDMMM mmmHHHHH ooooommm ssssssoo ffffffff ffffffff ffffffff ..ffffff - "2023-10-15T01:00-14:00, 01111000 00110101 01111101 00000001 00000000 00000000", - "2023-10-15T01:00+14:00, 01111000 00110101 01111101 00000001 10000000 00000011", - "2023-10-15T01:00-01:15, 01111000 00110101 01111101 00000001 10011000 00000001", - "2023-10-15T01:00+01:15, 01111000 00110101 01111101 00000001 11101000 00000001", - "2023-10-15T01:59+01:15, 01111000 00110101 01111101 01100001 11101111 00000001", - "2023-10-15T11:22+01:15, 01111000 00110101 01111101 11001011 11101010 00000001", - "2023-10-15T23:00+01:15, 01111000 00110101 01111101 00010111 11101000 00000001", - "2023-10-15T23:59+01:15, 01111000 00110101 01111101 01110111 11101111 00000001", - "2023-10-15T11:22:00+01:15, 01111001 00110101 01111101 11001011 11101010 00000001", - "2023-10-15T11:22:33+01:15, 01111001 00110101 01111101 11001011 11101010 10000101", - "2023-10-15T11:22:59+01:15, 01111001 00110101 01111101 11001011 11101010 11101101", - "2023-10-15T11:22:33.000+01:15, 01111010 00110101 01111101 11001011 11101010 10000101 00000000 00000000", - "2023-10-15T11:22:33.444+01:15, 01111010 00110101 01111101 11001011 11101010 10000101 10111100 00000001", - "2023-10-15T11:22:33.999+01:15, 01111010 00110101 01111101 11001011 11101010 10000101 11100111 00000011", - "2023-10-15T11:22:33.000000+01:15, 01111011 00110101 01111101 11001011 11101010 10000101 00000000 00000000 00000000", - "2023-10-15T11:22:33.444555+01:15, 01111011 00110101 01111101 11001011 11101010 10000101 10001011 11001000 00000110", - "2023-10-15T11:22:33.999999+01:15, 01111011 00110101 01111101 11001011 11101010 10000101 00111111 01000010 00001111", - "2023-10-15T11:22:33.000000000+01:15, 01111100 00110101 01111101 11001011 11101010 10000101 00000000 00000000 00000000 00000000", - "2023-10-15T11:22:33.444555666+01:15, 01111100 00110101 01111101 11001011 11101010 10000101 10010010 01100001 01111111 00011010", - "2023-10-15T11:22:33.999999999+01:15, 01111100 00110101 01111101 11001011 11101010 10000101 11111111 11001001 10011010 00111011", + "2023-10-15T01:00-14:00, 10001000 00110101 01111101 00000001 00000000 00000000", + "2023-10-15T01:00+14:00, 10001000 00110101 01111101 00000001 10000000 00000011", + "2023-10-15T01:00-01:15, 10001000 00110101 01111101 00000001 10011000 00000001", + "2023-10-15T01:00+01:15, 10001000 00110101 01111101 00000001 11101000 00000001", + "2023-10-15T01:59+01:15, 10001000 00110101 01111101 01100001 11101111 00000001", + "2023-10-15T11:22+01:15, 10001000 00110101 01111101 11001011 11101010 00000001", + "2023-10-15T23:00+01:15, 10001000 00110101 01111101 00010111 11101000 00000001", + "2023-10-15T23:59+01:15, 10001000 00110101 01111101 01110111 11101111 00000001", + "2023-10-15T11:22:00+01:15, 10001001 00110101 01111101 11001011 11101010 00000001", + "2023-10-15T11:22:33+01:15, 10001001 00110101 01111101 11001011 11101010 10000101", + "2023-10-15T11:22:59+01:15, 10001001 00110101 01111101 11001011 11101010 11101101", + "2023-10-15T11:22:33.000+01:15, 10001010 00110101 01111101 11001011 11101010 10000101 00000000 00000000", + "2023-10-15T11:22:33.444+01:15, 10001010 00110101 01111101 11001011 11101010 10000101 10111100 00000001", + "2023-10-15T11:22:33.999+01:15, 10001010 00110101 01111101 11001011 11101010 10000101 11100111 00000011", + "2023-10-15T11:22:33.000000+01:15, 10001011 00110101 01111101 11001011 11101010 10000101 00000000 00000000 00000000", + "2023-10-15T11:22:33.444555+01:15, 10001011 00110101 01111101 11001011 11101010 10000101 10001011 11001000 00000110", + "2023-10-15T11:22:33.999999+01:15, 10001011 00110101 01111101 11001011 11101010 10000101 00111111 01000010 00001111", + "2023-10-15T11:22:33.000000000+01:15, 10001100 00110101 01111101 11001011 11101010 10000101 00000000 00000000 00000000 00000000", + "2023-10-15T11:22:33.444555666+01:15, 10001100 00110101 01111101 11001011 11101010 10000101 10010010 01100001 01111111 00011010", + "2023-10-15T11:22:33.999999999+01:15, 10001100 00110101 01111101 11001011 11101010 10000101 11111111 11001001 10011010 00111011", }) public void testWriteTimestampValueWithKnownOffsetShortForm(@ConvertWith(StringToTimestamp.class) Timestamp value, String expectedBytes) { @@ -383,61 +396,60 @@ public void testWriteTimestampValueWithKnownOffsetShortForm(@ConvertWith(StringT @ParameterizedTest @CsvSource({ - // OpCode Length YYYYYYYY MMYYYYYY HDDDDDMM mmmmHHHH oooooomm ssoooooo ....ssss Coefficient+ Scale - "0001T, 11110111 00000101 00000001 00000000", - "1947T, 11110111 00000101 10011011 00000111", - "9999T, 11110111 00000101 00001111 00100111", - "1947-01T, 11110111 00000111 10011011 01000111 00000000", - "1947-12T, 11110111 00000111 10011011 00000111 00000011", - "1947-01-01T, 11110111 00000111 10011011 01000111 00000100", - "1947-12-23T, 11110111 00000111 10011011 00000111 01011111", - "1947-12-31T, 11110111 00000111 10011011 00000111 01111111", - "1947-12-23T00:00Z, 11110111 00001101 10011011 00000111 01011111 00000000 10000000 00010110", - "1947-12-23T23:59Z, 11110111 00001101 10011011 00000111 11011111 10111011 10000011 00010110", - "1947-12-23T23:59:00Z, 11110111 00001111 10011011 00000111 11011111 10111011 10000011 00010110 00000000", - "1947-12-23T23:59:59Z, 11110111 00001111 10011011 00000111 11011111 10111011 10000011 11010110 00001110", - "1947-12-23T23:59:00.0Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000001", - "1947-12-23T23:59:00.00Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000010", - "1947-12-23T23:59:00.000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000011", - "1947-12-23T23:59:00.0000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000100", - "1947-12-23T23:59:00.00000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000101", - "1947-12-23T23:59:00.000000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000110", - "1947-12-23T23:59:00.0000000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00000111", - "1947-12-23T23:59:00.00000000Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 00001000", - "1947-12-23T23:59:00.9Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00010011 00000001", - "1947-12-23T23:59:00.99Z, 11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11000111 00000010", - "1947-12-23T23:59:00.999Z, 11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 10011110 00001111 00000011", - "1947-12-23T23:59:00.9999Z, 11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00111110 10011100 00000100", - "1947-12-23T23:59:00.99999Z, 11110111 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111100 00110100 00001100 00000101", - "1947-12-23T23:59:00.999999Z, 11110111 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111100 00010001 01111010 00000110", - "1947-12-23T23:59:00.9999999Z, 11110111 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111000 01100111 10001001 00001001 00000111", - "1947-12-23T23:59:00.99999999Z, 11110111 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 11111000 00001111 01011110 01011111 00001000", + // OpCode Length YYYYYYYY MMYYYYYY HDDDDDMM mmmmHHHH oooooomm ssoooooo ....ssss Scale+ Coefficient + "0001T, 11111000 00000101 00000001 00000000", + "1947T, 11111000 00000101 10011011 00000111", + "9999T, 11111000 00000101 00001111 00100111", + "1947-01T, 11111000 00000111 10011011 01000111 00000000", + "1947-12T, 11111000 00000111 10011011 00000111 00000011", + "1947-01-01T, 11111000 00000111 10011011 01000111 00000100", + "1947-12-23T, 11111000 00000111 10011011 00000111 01011111", + "1947-12-31T, 11111000 00000111 10011011 00000111 01111111", + "1947-12-23T00:00Z, 11111000 00001101 10011011 00000111 01011111 00000000 10000000 00010110", + "1947-12-23T23:59Z, 11111000 00001101 10011011 00000111 11011111 10111011 10000011 00010110", + "1947-12-23T23:59:00Z, 11111000 00001111 10011011 00000111 11011111 10111011 10000011 00010110 00000000", + "1947-12-23T23:59:59Z, 11111000 00001111 10011011 00000111 11011111 10111011 10000011 11010110 00001110", + "1947-12-23T23:59:00.0Z, 11111000 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000011", + "1947-12-23T23:59:00.00Z, 11111000 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000101", + "1947-12-23T23:59:00.000Z, 11111000 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000111", + "1947-12-23T23:59:00.0000Z, 11111000 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001001", + "1947-12-23T23:59:00.00000Z, 11111000 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001011", + "1947-12-23T23:59:00.000000Z, 11111000 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001101", + "1947-12-23T23:59:00.0000000Z, 11111000 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001111", + "1947-12-23T23:59:00.00000000Z, 11111000 00010001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00010001", + "1947-12-23T23:59:00.9Z, 11111000 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000011 00001001", + "1947-12-23T23:59:00.99Z, 11111000 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000101 01100011", + "1947-12-23T23:59:00.999Z, 11111000 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000111 11100111 00000011", + "1947-12-23T23:59:00.9999Z, 11111000 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001001 00001111 00100111", + "1947-12-23T23:59:00.99999Z, 11111000 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001011 10011111 10000110 00000001", + "1947-12-23T23:59:00.999999Z, 11111000 00010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001101 00111111 01000010 00001111", + "1947-12-23T23:59:00.9999999Z, 11111000 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00001111 01111111 10010110 10011000 00000000", + "1947-12-23T23:59:00.99999999Z, 11111000 00011001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00010001 11111111 11100000 11110101 00000101", "1947-12-23T23:59:00.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000Z, " + - "11110111 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 10001101", + "11111000 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00110110 00000010", "1947-12-23T23:59:00.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000Z, " + - "11110111 00010101 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000001 01101000 00000001", + "11111000 00010011 10011011 00000111 11011111 10111011 10000011 00010110 00000000 10100010 00000101", "1947-12-23T23:59:00.999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999Z, " + - "11110111 10010111 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 " + - "11111100 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 " + - "11111111 10010100 10001001 01111001 01101100 11001110 01111000 11110010 01000000 01111101 10100110 11000111 10101000 01000110 01011001 01110001 01001101 " + - "00100000 11110101 01101110 01111010 00001100 00001001 11101111 01111111 11110011 00011110 00010100 11010111 01101000 01110111 10101100 01101100 10001110 " + - "00110010 10110111 10000010 11110010 00110110 01101000 11110010 10100111 10001101", - + "11111000 10001001 10011011 00000111 11011111 10111011 10000011 00010110 00000000 00110110 00000010 11111111 11111111 11111111 11111111 11111111 11111111 " + + "11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111 10011111 00110010 00110001 10001111 11001101 00011001 " + + "01001111 00011110 10101000 11001111 11110100 00011000 11010101 00101000 00101011 10101110 00001001 10100100 11011110 01001101 10001111 00100001 11100001 " + + "11111101 01101111 11011110 10000011 11100010 00011010 11101101 10001110 10010101 11001101 01010001 11100110 01010110 01010000 11011110 00000110 01001101 " + + "11111110 00010100", // Offsets - "2048-01-01T01:01-23:59, 11110111 00001101 00000000 01001000 10000100 00010000 00000100 00000000", - "2048-01-01T01:01-00:02, 11110111 00001101 00000000 01001000 10000100 00010000 01111000 00010110", - "2048-01-01T01:01-00:01, 11110111 00001101 00000000 01001000 10000100 00010000 01111100 00010110", - "2048-01-01T01:01-00:00, 11110111 00001101 00000000 01001000 10000100 00010000 11111100 00111111", - "2048-01-01T01:01+00:00, 11110111 00001101 00000000 01001000 10000100 00010000 10000000 00010110", - "2048-01-01T01:01+00:01, 11110111 00001101 00000000 01001000 10000100 00010000 10000100 00010110", - "2048-01-01T01:01+00:02, 11110111 00001101 00000000 01001000 10000100 00010000 10001000 00010110", - "2048-01-01T01:01+23:59, 11110111 00001101 00000000 01001000 10000100 00010000 11111100 00101100", + "2048-01-01T01:01-23:59, 11111000 00001101 00000000 01001000 10000100 00010000 00000100 00000000", + "2048-01-01T01:01-00:02, 11111000 00001101 00000000 01001000 10000100 00010000 01111000 00010110", + "2048-01-01T01:01-00:01, 11111000 00001101 00000000 01001000 10000100 00010000 01111100 00010110", + "2048-01-01T01:01-00:00, 11111000 00001101 00000000 01001000 10000100 00010000 11111100 00111111", + "2048-01-01T01:01+00:00, 11111000 00001101 00000000 01001000 10000100 00010000 10000000 00010110", + "2048-01-01T01:01+00:01, 11111000 00001101 00000000 01001000 10000100 00010000 10000100 00010110", + "2048-01-01T01:01+00:02, 11111000 00001101 00000000 01001000 10000100 00010000 10001000 00010110", + "2048-01-01T01:01+23:59, 11111000 00001101 00000000 01001000 10000100 00010000 11111100 00101100", }) public void testWriteTimestampValueLongForm(@ConvertWith(StringToTimestamp.class) Timestamp value, String expectedBytes) { assertWritingValueWithBinary(expectedBytes, value, IonEncoder_1_1::writeLongFormTimestampValue); @@ -446,18 +458,18 @@ public void testWriteTimestampValueLongForm(@ConvertWith(StringToTimestamp.class @ParameterizedTest @CsvSource({ // Long form because it's out of the year range - "0001T, 11110111 00000101 00000001 00000000", - "9999T, 11110111 00000101 00001111 00100111", + "0001T, 11111000 00000101 00000001 00000000", + "9999T, 11111000 00000101 00001111 00100111", // Long form because the offset is too high/low - "2048-01-01T01:01+14:15, 11110111 00001101 00000000 01001000 10000100 00010000 11011100 00100011", - "2048-01-01T01:01-14:15, 11110111 00001101 00000000 01001000 10000100 00010000 00100100 00001001", + "2048-01-01T01:01+14:15, 11111000 00001101 00000000 01001000 10000100 00010000 11011100 00100011", + "2048-01-01T01:01-14:15, 11111000 00001101 00000000 01001000 10000100 00010000 00100100 00001001", // Long form because the offset is not a multiple of 15 - "2048-01-01T01:01+00:01, 11110111 00001101 00000000 01001000 10000100 00010000 10000100 00010110", + "2048-01-01T01:01+00:01, 11111000 00001101 00000000 01001000 10000100 00010000 10000100 00010110", - // Long form because the fractional seconds are millis, micros, or nanos - "2023-12-31T23:59:00.0Z, 11110111 00010011 11100111 00000111 11111111 10111011 10000011 00010110 00000000 00000001 00000001", + // Long form because the fractional seconds are not millis, micros, or nanos + "2023-12-31T23:59:00.0Z, 11111000 00010001 11100111 00000111 11111111 10111011 10000011 00010110 00000000 00000011", }) public void testWriteTimestampDelegatesCorrectlyToLongForm(@ConvertWith(StringToTimestamp.class) Timestamp value, String expectedBytes) { assertWritingValueWithBinary(expectedBytes, value, IonEncoder_1_1::writeTimestampValue); @@ -472,16 +484,17 @@ public void testWriteTimestampValueForNullTimestamp() { @ParameterizedTest @CsvSource({ - "'', 80", - "'a', 81 61", - "'ab', 82 61 62", - "'abc', 83 61 62 63", - "'fourteen bytes', 8E 66 6F 75 72 74 65 65 6E 20 62 79 74 65 73", - "'this has sixteen', F8 21 74 68 69 73 20 68 61 73 20 73 69 78 74 65 65 6E", - "'variable length encoding', F8 31 76 61 72 69 61 62 6C 65 20 6C 65 6E 67 74 68 20 65 6E 63 6F 64 69 6E 67", + "'', 90", + "'a', 91 61", + "'ab', 92 61 62", + "'abc', 93 61 62 63", + "'fourteen bytes', 9E 66 6F 75 72 74 65 65 6E 20 62 79 74 65 73", + "'this has sixteen', F9 21 74 68 69 73 20 68 61 73 20 73 69 78 74 65 65 6E", + "'variable length encoding', F9 31 76 61 72 69 61 62 6C 65 20 6C 65 6E 67 74 68 20 65 6E 63 6F 64 69 6E 67", }) public void testWriteStringValue(String value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeStringValue); + Utf8StringEncoder.Result result = Utf8StringEncoderPool.getInstance().getOrCreate().encode(value); + assertWritingValue(expectedBytes, result, IonEncoder_1_1::writeStringValue); } @Test @@ -493,16 +506,17 @@ public void testWriteStringValueForNull() { @ParameterizedTest @CsvSource({ - "'', 90", - "'a', 91 61", - "'ab', 92 61 62", - "'abc', 93 61 62 63", - "'fourteen bytes', 9E 66 6F 75 72 74 65 65 6E 20 62 79 74 65 73", - "'this has sixteen', F9 21 74 68 69 73 20 68 61 73 20 73 69 78 74 65 65 6E", - "'variable length encoding', F9 31 76 61 72 69 61 62 6C 65 20 6C 65 6E 67 74 68 20 65 6E 63 6F 64 69 6E 67", + "'', A0", + "'a', A1 61", + "'ab', A2 61 62", + "'abc', A3 61 62 63", + "'fourteen bytes', AE 66 6F 75 72 74 65 65 6E 20 62 79 74 65 73", + "'this has sixteen', FA 21 74 68 69 73 20 68 61 73 20 73 69 78 74 65 65 6E", + "'variable length encoding', FA 31 76 61 72 69 61 62 6C 65 20 6C 65 6E 67 74 68 20 65 6E 63 6F 64 69 6E 67", }) public void testWriteSymbolValue(String value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeSymbolValue); + Utf8StringEncoder.Result result = Utf8StringEncoderPool.getInstance().getOrCreate().encode(value); + assertWritingValue(expectedBytes, result, IonEncoder_1_1::writeSymbolValue); } @ParameterizedTest @@ -520,9 +534,9 @@ public void testWriteSymbolValue(String value, String expectedBytes) { "65793, E3 03", "65919, E3 FF", "65920, E3 02 02", - "9223372036854775807, E3 00 FF FD FD FF FF FF FF FF" + "2147483647 , E3 F0 DF DF FF 0F" }) - public void testWriteSymbolValue(long value, String expectedBytes) { + public void testWriteSymbolValue(int value, String expectedBytes) { assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeSymbolValue); } @@ -541,12 +555,12 @@ public void testWriteSymbolValueForNull() { "FE 31 49 20 61 70 70 6C 61 75 64 20 79 6F 75 72 20 63 75 72 69 6F 73 69 74 79" }) public void testWriteBlobValue(@ConvertWith(HexStringToByteArray.class) byte[] value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeBlobValue); + assertWritingValue(expectedBytes, value, (buffer, bytes) -> IonEncoder_1_1.writeBlobValue(buffer, bytes, 0, bytes.length)); } @Test public void testWriteBlobValueForNull() { - int numBytes = IonEncoder_1_1.writeBlobValue(buf, null); + int numBytes = IonEncoder_1_1.writeBlobValue(buf, null, 0, 0); Assertions.assertEquals("EB 07", byteArrayToHex(bytes())); Assertions.assertEquals(2, numBytes); } @@ -559,12 +573,12 @@ public void testWriteBlobValueForNull() { "FF 31 49 20 61 70 70 6C 61 75 64 20 79 6F 75 72 20 63 75 72 69 6F 73 69 74 79" }) public void testWriteClobValue(@ConvertWith(HexStringToByteArray.class) byte[] value, String expectedBytes) { - assertWritingValue(expectedBytes, value, IonEncoder_1_1::writeClobValue); + assertWritingValue(expectedBytes, value, (buffer, bytes) -> IonEncoder_1_1.writeClobValue(buffer, bytes, 0, bytes.length)); } @Test public void testWriteClobValueForNull() { - int numBytes = IonEncoder_1_1.writeClobValue(buf, null); + int numBytes = IonEncoder_1_1.writeClobValue(buf, null, 0, 0); Assertions.assertEquals("EB 08", byteArrayToHex(bytes())); Assertions.assertEquals(2, numBytes); } @@ -591,133 +605,4 @@ public void testWriteAnnotationsForNull() { Assertions.assertEquals(0, numBytes); } - /** - * Utility method to make it easier to write test cases that assert specific sequences of bytes. - */ - private static String byteArrayToHex(byte[] bytes) { - StringBuilder sb = new StringBuilder(); - for (byte b : bytes) { - sb.append(String.format("%02X ", b)); - } - return sb.toString().trim(); - } - - /** - * Determines the number of bytes needed to represent a series of hexadecimal digits. - */ - private static int byteLengthFromHexString(String hexString) { - return (hexString.replaceAll("[^\\dA-F]", "").length()) / 2; - } - - /** - * Converts a byte array to a string of bits, such as "00110110 10001001". - * The purpose of this method is to make it easier to read and write test assertions. - */ - private static String byteArrayToBitString(byte[] bytes) { - StringBuilder s = new StringBuilder(); - for (byte aByte : bytes) { - for (int bit = 7; bit >= 0; bit--) { - if (((0x01 << bit) & aByte) != 0) { - s.append("1"); - } else { - s.append("0"); - } - } - s.append(" "); - } - return s.toString().trim(); - } - - /** - * Determines the number of bytes needed to represent a series of hexadecimal digits. - */ - private static int byteLengthFromBitString(String bitString) { - return (bitString.replaceAll("[^01]", "").length()) / 8; - } - - /** - * Converts a String to a Timestamp for a @Parameterized test - */ - static class StringToTimestamp extends TypedArgumentConverter { - protected StringToTimestamp() { - super(String.class, Timestamp.class); - } - - @Override - protected Timestamp convert(String source) throws ArgumentConversionException { - if (source == null) return null; - return Timestamp.valueOf(source); - } - } - - /** - * Converts a String to a Decimal for a @Parameterized test - */ - static class StringToDecimal extends TypedArgumentConverter { - protected StringToDecimal() { - super(String.class, Decimal.class); - } - - @Override - protected Decimal convert(String source) throws ArgumentConversionException { - if (source == null) return null; - return Decimal.valueOf(source); - } - } - - /** - * Converts a Hex String to a Byte Array for a @Parameterized test - */ - static class HexStringToByteArray extends TypedArgumentConverter { - - private static final CharsetEncoder ASCII_ENCODER = StandardCharsets.US_ASCII.newEncoder(); - - protected HexStringToByteArray() { - super(String.class, byte[].class); - } - - @Override - protected byte[] convert(String source) throws ArgumentConversionException { - if (source == null) return null; - if (source.trim().isEmpty()) return new byte[0]; - String[] octets = source.split(" "); - byte[] result = new byte[octets.length]; - for (int i = 0; i < octets.length; i++) { - if (octets[i].length() == 1) { - char c = octets[i].charAt(0); - if (!ASCII_ENCODER.canEncode(c)) { - throw new IllegalArgumentException("Cannot convert non-ascii character: " + c); - } - result[i] = (byte) c; - } else { - result[i] = (byte) Integer.parseInt(octets[i], 16); - } - } - return result; - } - } - - /** - * Converts a String of symbol ids to a long[] for a @Parameterized test - */ - static class SymbolIdsToLongArray extends TypedArgumentConverter { - protected SymbolIdsToLongArray() { - super(String.class, long[].class); - } - - @Override - protected long[] convert(String source) throws ArgumentConversionException { - if (source == null) return null; - int size = (int) source.chars().filter(i -> i == '$').count(); - String[] sids = source.split("\\$"); - long[] result = new long[size]; - int i = 0; - for (String sid : sids) { - if (sid.isEmpty()) continue; - result[i] = Long.parseLong(sid.trim()); - i++; - } - return result; - } - } } diff --git a/src/test/java/com/amazon/ion/impl/bin/IonManagedBinaryWriterTest.java b/src/test/java/com/amazon/ion/impl/bin/IonManagedBinaryWriterTest.java index bfa1e196d2..293d8fed4c 100644 --- a/src/test/java/com/amazon/ion/impl/bin/IonManagedBinaryWriterTest.java +++ b/src/test/java/com/amazon/ion/impl/bin/IonManagedBinaryWriterTest.java @@ -1,20 +1,8 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl.bin; +import com.amazon.ion.FakeSymbolToken; import com.amazon.ion.IonDatagram; import com.amazon.ion.IonInt; import com.amazon.ion.IonLoader; @@ -26,7 +14,9 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; +import com.amazon.ion.TestUtils; import com.amazon.ion.system.IonBinaryWriterBuilder; import com.amazon.ion.system.IonSystemBuilder; import org.junit.Before; @@ -282,10 +272,14 @@ public void testAutoFlush_67K() throws Exception{ @Test public void testAutoFlush_twiceBlockSize() throws IOException { - IonReader reader = system().newReader(singleTopLevelValue_13B.toByteArray()); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonWriter writer = IonBinaryWriterBuilder.standard().build(out); + writer.writeString("abcdefghijklmnopqrstuvwxyz"); // Write a 27-byte IonString. + writer.close(); + IonReader reader = system().newReader(out.toByteArray()); ByteArrayOutputStream actual = new ByteArrayOutputStream(); - // Set the actual writer block size as 5 bytes. The test data is a 13-byte IonString "taco_burrito". - IonBinaryWriterBuilder builder = IonBinaryWriterBuilder.standard().withAutoFlushEnabled(autoFlushMode.isEnabled()).withBlockSize(5); + // Set the actual writer block size as 10 bytes. The test data is a 27-byte IonString "abcdefghijklmnopqrstuvwxyz". + IonBinaryWriterBuilder builder = IonBinaryWriterBuilder.standard().withAutoFlushEnabled(autoFlushMode.isEnabled()).withBlockSize(10); IonWriter actualWriter = builder.build(actual); while (reader.next() != null) { actualWriter.writeValue(reader); @@ -293,9 +287,9 @@ public void testAutoFlush_twiceBlockSize() throws IOException { actualWriter.close(); if (lstAppendMode.isEnabled() && autoFlushMode.isEnabled()) { // When auto-flush is enabled, no flush is expected since this is a single top-level value and should continue encoding until this value is completed. - assertArrayEquals(actual.toByteArray(), singleTopLevelValue_13B.toByteArray()); + assertArrayEquals(actual.toByteArray(), out.toByteArray()); } - assertEquivalentDataModel(actual, singleTopLevelValue_13B); + assertEquivalentDataModel(actual, out); } @Test @@ -466,15 +460,41 @@ public void testNestedEmptyAnnotatedContainer() throws Exception assertValue("{bar: foo::[]}"); } + @Test + public void testSymbolWithKnownTextAndSid2IsNotConsideredIvm() throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonWriter writer = IonBinaryWriterBuilder.standard().build(out); + writer.writeSymbol("foo"); + // Should not be an IVM even though SID 2 is present because known text always takes precedence. + writer.writeSymbolToken(new FakeSymbolToken("abc", 2)); + // If the previous symbol were interpreted as an IVM, then the following symbol IDs would be out of range. + writer.writeSymbolToken(new FakeSymbolToken(null, 10)); + writer.writeSymbolToken(new FakeSymbolToken(null, 11)); + writer.close(); + assertEquivalentDataModel( + out.toByteArray(), + TestUtils.ensureBinary(system(), "foo abc foo abc".getBytes(StandardCharsets.UTF_8)) + ); + } + /** * Asserts equivalence of ion data model between two provided data streams. - * @param actual represents the serialized data streams when auto-flush is enabled. - * @param expected represents the expected data streams. + * @param actual represents the actual data stream. + * @param expected represents the expected data stream. */ private void assertEquivalentDataModel(ByteArrayOutputStream actual, ByteArrayOutputStream expected) { + assertEquivalentDataModel(actual.toByteArray(), expected.toByteArray()); + } + + /** + * Asserts equivalence of ion data model between two provided data streams. + * @param actual represents the actual data stream. + * @param expected represents the expected data stream. + */ + private void assertEquivalentDataModel(byte[] actual, byte[] expected) { IonLoader loader = IonSystemBuilder.standard().build().newLoader(); - IonDatagram actualDatagram = loader.load(actual.toByteArray()); - IonDatagram expectedDatagram = loader.load(expected.toByteArray()); + IonDatagram actualDatagram = loader.load(actual); + IonDatagram expectedDatagram = loader.load(expected); assertEquals(expectedDatagram, actualDatagram); } } diff --git a/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt b/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt new file mode 100644 index 0000000000..8df7407f47 --- /dev/null +++ b/src/test/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1_Test.kt @@ -0,0 +1,1045 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +import com.amazon.ion.* +import com.amazon.ion.IonEncodingVersion.* +import com.amazon.ion.impl.* +import com.amazon.ion.impl.macro.* +import com.amazon.ion.impl.macro.ExpressionBuilderDsl.Companion.templateBody +import com.amazon.ion.impl.macro.Macro.* +import com.amazon.ion.impl.macro.ParameterFactory.exactlyOneTagged +import com.amazon.ion.impl.macro.ParameterFactory.zeroToManyTagged +import com.amazon.ion.system.* +import java.io.ByteArrayOutputStream +import java.math.BigInteger +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertNull +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.Arguments +import org.junit.jupiter.params.provider.Arguments.arguments +import org.junit.jupiter.params.provider.MethodSource + +internal class IonManagedWriter_1_1_Test { + + companion object { + // Some symbols that are annoying to use with Kotlin's string substitution. + val ion = "\$ion" + val ion_1_1 = "\$ion_1_1" + val ion_encoding = "\$ion_encoding" + + // Some symbol tokens so that we don't have to keep declaring them + private val fooSymbolToken = FakeSymbolToken("foo", -1) + private val barSymbolToken = FakeSymbolToken("bar", -1) + + private val fooMacro = constantMacro { string("foo") } + private val barMacro = constantMacro { string("bar") } + + // Helper function that writes to a writer and returns the text Ion + private fun write( + topLevelValuesOnNewLines: Boolean = true, + closeWriter: Boolean = true, + pretty: Boolean = false, + symbolInliningStrategy: SymbolInliningStrategy = SymbolInliningStrategy.ALWAYS_INLINE, + block: IonManagedWriter_1_1.() -> Unit + ): String { + val appendable = StringBuilder() + val writer = ION_1_1.textWriterBuilder() + .withWriteTopLevelValuesOnNewLines(topLevelValuesOnNewLines) + .withSymbolInliningStrategy(symbolInliningStrategy) + .apply { if (pretty) withPrettyPrinting() } + .build(appendable) as IonManagedWriter_1_1 + writer.apply(block) + if (closeWriter) writer.close() + return appendable.toString().trim() + } + + // Helper function that writes to a writer and returns the binary Ion + private fun writeBinary( + closeWriter: Boolean = true, + symbolInliningStrategy: SymbolInliningStrategy = SymbolInliningStrategy.ALWAYS_INLINE, + block: IonManagedWriter_1_1.() -> Unit + ): ByteArray { + val out = ByteArrayOutputStream() + val writer = ION_1_1.binaryWriterBuilder() + .withSymbolInliningStrategy(symbolInliningStrategy) + .build(out) as IonManagedWriter_1_1 + writer.apply(block) + if (closeWriter) writer.close() + return out.toByteArray() + } + + /** Helper function to create a constant (zero arg) template macro */ + fun constantMacro(body: TemplateDsl.() -> Unit) = TemplateMacro(emptyList(), templateBody(body)) + } + + @Test + fun `attempting to manually write a symbol table throws an exception`() { + write(closeWriter = false) { + addTypeAnnotation(SystemSymbols.ION_SYMBOL_TABLE) + assertThrows { stepIn(IonType.STRUCT) } + } + } + + @Test + fun `attempting to step into a scalar type throws an exception`() { + write { + assertThrows { stepIn(IonType.NULL) } + } + } + + @Test + fun `write an IVM`() { + assertEquals( + """ + $ion_1_1 + $ion_1_1 + """.trimIndent(), + write { writeIonVersionMarker() } + ) + } + + @Test + fun `write an IVM in a container should write a symbol`() { + assertEquals( + """ + $ion_1_1 + [$ion_1_1] + """.trimIndent(), + write { + stepIn(IonType.LIST) + writeIonVersionMarker() + stepOut() + } + ) + } + + private fun newSystemReader(input: ByteArray): IonReader { + val system = IonSystemBuilder.standard().build() as _Private_IonSystem + return system.newSystemReader(input) + } + + private fun `transform symbol IDS`(writeValuesFn: _Private_IonWriter.(IonReader) -> Unit) { + // Craft the input data: {a: b::c}, encoded as {$10: $11::$12} + val input = ByteArrayOutputStream() + ION_1_0.binaryWriterBuilder().build(input).use { + it.stepIn(IonType.STRUCT) + it.setFieldName("a") + it.addTypeAnnotation("b") + it.writeSymbol("c") + it.stepOut() + } + // Do a system-level transcode of the Ion 1.0 data to Ion 1.1, adding 32 to each local symbol ID. + val output = ByteArrayOutputStream() + newSystemReader(input.toByteArray()).use { reader -> + (ION_1_1.binaryWriterBuilder().build(output) as _Private_IonWriter).use { + it.writeValuesFn(reader) + } + } + // Verify the transformed symbol IDs using another system read. + newSystemReader(output.toByteArray()).use { + while (it.next() == IonType.SYMBOL) { + assertEquals("\$ion_1_1", it.stringValue()) + } + assertEquals(IonType.STRUCT, it.next()) + it.stepIn() + assertEquals(IonType.SYMBOL, it.next()) + assertEquals(42, it.fieldNameSymbol.sid) + assertEquals(43, it.typeAnnotationSymbols[0].sid) + assertEquals(44, it.symbolValue().sid) + assertNull(it.next()) + it.stepOut() + } + } + + @Test + fun `use writeValues to transform symbol IDS`() { + `transform symbol IDS` { reader -> + writeValues(reader) { sid -> sid + 32 } + } + } + + @Test + fun `use writeValue to transform symbol IDS`() { + `transform symbol IDS` { reader -> + while (reader.next() != null) { + writeValue(reader) { sid -> sid + 32 } + } + } + } + + @Test + fun `write a symbol value using a system symbol ID in binary`() { + val actual = writeBinary(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE) { + writeSymbol(SystemSymbols_1_1.SYMBOLS.text) + } + val reader = newSystemReader(actual) + assertEquals(IonType.SYMBOL, reader.next()) + assertEquals(ion_1_1, reader.stringValue()) + assertEquals(IonType.SYMBOL, reader.next()) + assertEquals(SystemSymbols_1_1.SYMBOLS.text, reader.stringValue()) + assertNull(reader.next()) + reader.close() + } + + @Test + fun `write an annotation using a system symbol ID in binary`() { + val actual = writeBinary(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE) { + addTypeAnnotation(SystemSymbols_1_1.SYMBOLS.text) + writeInt(123) + } + val reader = newSystemReader(actual) + assertEquals(IonType.SYMBOL, reader.next()) + assertEquals(ion_1_1, reader.stringValue()) + assertEquals(IonType.INT, reader.next()) + assertEquals(SystemSymbols_1_1.SYMBOLS.text, reader.typeAnnotations[0]) + assertEquals(123, reader.intValue()) + assertNull(reader.next()) + reader.close() + } + + @Test + fun `write a field name using a system symbol ID in binary`() { + val actual = writeBinary(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE) { + stepIn(IonType.STRUCT) + setFieldName(SystemSymbols_1_1.SYMBOLS.text) + writeInt(123) + stepOut() + } + val reader = newSystemReader(actual) + assertEquals(IonType.SYMBOL, reader.next()) + assertEquals(ion_1_1, reader.stringValue()) + assertEquals(IonType.STRUCT, reader.next()) + reader.stepIn() + assertEquals(IonType.INT, reader.next()) + assertEquals(SystemSymbols_1_1.SYMBOLS.text, reader.fieldName) + assertEquals(123, reader.intValue()) + assertNull(reader.next()) + reader.stepOut() + reader.close() + } + + @Test + fun `re-write a binary Ion 1-1 stream using a system reader`() { + val binary = TestUtils.hexStringToByteArray( + TestUtils.cleanCommentedHexBytes( + """ + E0 01 01 EA | IVM + E7 01 61 | $ion:: + CA | ( + EE 10 | module + A1 5F | _ + C5 | ( + EE 0F | symbol_table + B2 91 61 | ["a"] + | ) + | ) + E1 01 | Symbol value 1 = "a" + """.trimIndent() + ) + ) + val systemReader = newSystemReader(binary) + val actual = writeBinary(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE) { + writeValues(systemReader) + } + systemReader.close() + + val reader = IonReaderBuilder.standard().build(actual) + assertEquals(IonType.SYMBOL, reader.next()) + assertEquals("a", reader.stringValue()) + assertNull(reader.next()) + reader.close() + } + + @Test + fun `write an encoding directive with a non-empty macro table`() { + val expected = """ + $ion_1_1 + (:$ion::set_macros (:: (macro null () "foo"))) + """.trimIndent() + + val actual = write { + getOrAssignMacroAddress(constantMacro { string("foo") }) + } + + assertEquals(expected, actual) + } + + @Test + fun `write an e-expression by name`() { + val expected = """ + $ion_1_1 + (:$ion::set_macros (:: (macro a () "foo"))) + (:a) + """.trimIndent() + + val actual = write { + startMacro("a", constantMacro { string("foo") }) + endMacro() + } + + assertEquals(expected, actual) + } + + @Test + fun `write an e-expression by address`() { + val expected = """ + $ion_1_1 + (:$ion::set_macros (:: (macro null () "foo"))) + (:0) + """.trimIndent() + + val actual = write { + startMacro(constantMacro { string("foo") }) + endMacro() + } + + assertEquals(expected, actual) + } + + @Test + fun `write an e-expression with a expression group argument`() { + val macro = TemplateMacro( + signature = listOf( + zeroToManyTagged("a"), + zeroToManyTagged("b"), + ), + body = templateBody { string("foo") } + ) + + val expected = """ + $ion_1_1 + (:$ion::set_macros (:: (macro null (a* b*) "foo"))) + (:0 (::) (:: 1 2 3)) + """.trimIndent() + + val actual = write { + startMacro(macro) + + startExpressionGroup() + endExpressionGroup() + + startExpressionGroup() + writeInt(1) + writeInt(2) + writeInt(3) + endExpressionGroup() + + endMacro() + } + + assertEquals(expected, actual) + } + + @Test + fun `getOrAssignMacroAddress can add a system macro to the macro table`() { + val expected = """ + $ion_1_1 + (:$ion::set_macros (:: (export $ion::make_string))) + """.trimIndent() + + val actual = write { + getOrAssignMacroAddress(SystemMacro.MakeString) + } + + assertEquals(expected, actual) + } + + @Test + fun `when a system macro is shadowed, it should be written using the system e-exp syntax`() { + val expected = """ + $ion_1_1 + (:$ion::set_macros (:: (macro make_string () "make"))) + (:make_string) + (:$ion::make_string (:: "a" b)) + """.trimIndent() + + // Makes the word "make" as a string + val makeStringShadow = constantMacro { + string("make") + } + + val actual = write { + startMacro("make_string", makeStringShadow) + endMacro() + startMacro(SystemMacro.MakeString) + startExpressionGroup() + writeString("a") + writeSymbol("b") + endExpressionGroup() + endMacro() + } + + assertEquals(expected, actual) + } + + @Test + fun `it is possible to invoke a system macro using an alias`() { + val expected = """ + $ion_1_1 + (:$ion::set_macros (:: (export $ion::make_string foo))) + (:foo (:: "a" b)) + """.trimIndent() + + val actual = write { + startMacro("foo", SystemMacro.MakeString) + startExpressionGroup() + writeString("a") + writeSymbol("b") + endExpressionGroup() + endMacro() + } + assertEquals(expected, actual) + } + + @Test + fun `write an encoding directive with a non-empty symbol table`() { + val expected = """ + $ion_1_1 + (:$ion::set_symbols (:: "foo")) + $1 + """.trimIndent() + + val actual = write(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE) { + writeSymbol("foo") + } + + assertEquals(expected, actual) + } + + @Test + fun `calling flush() causes the next encoding directive to append to a macro table`() { + val expected = """ + $ion_1_1 + (:$ion::set_macros (:: (macro null () "foo"))) + (:0) + (:$ion::add_macros (:: (macro null () "bar"))) + (:0) + (:1) + """.trimIndent() + + val actual = write { + val fooMacro = constantMacro { string("foo") } + startMacro(fooMacro) + endMacro() + flush() + startMacro(fooMacro) + endMacro() + startMacro(constantMacro { string("bar") }) + endMacro() + } + + assertEquals(expected, actual) + } + + @Test + fun `calling flush() causes the next encoding directive to append to the symbol table`() { + val expected = """ + $ion_1_1 + (:$ion::set_symbols (:: "foo")) + $1 + (:$ion::add_symbols (:: "bar")) + $2 + """.trimIndent() + + val actual = write(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE) { + writeSymbol("foo") + flush() + writeSymbol("bar") + } + + assertEquals(expected, actual) + } + + @Test + fun `calling finish() causes the next encoding directive to NOT append to a macro table`() { + val expected = """ + $ion_1_1 + (:$ion::set_macros (:: (macro null () "foo"))) + (:0) + $ion_1_1 + (:$ion::set_macros (:: (macro null () "bar"))) + (:0) + """.trimIndent() + + val actual = write { + startMacro(constantMacro { string("foo") }) + endMacro() + finish() + startMacro(constantMacro { string("bar") }) + endMacro() + } + + assertEquals(expected, actual) + } + + @Test + fun `calling finish() causes the next encoding directive to NOT append to the symbol table`() { + val expected = """ + $ion_1_1 + (:$ion::set_symbols (:: "foo")) + $1 + $ion_1_1 + (:$ion::set_symbols (:: "bar")) + $1 + """.trimIndent() + + val actual = write(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE) { + writeSymbol("foo") + finish() + writeSymbol("bar") + } + + assertEquals(expected, actual) + } + + @Test + fun `adding to the macro table should preserve existing symbols`() { + val expected = """ + $ion_1_1 + (:$ion::set_symbols (:: "foo")) + $1 + (:$ion::set_macros (:: (macro null () "foo"))) + """.trimIndent() + + val actual = write(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE) { + writeSymbol("foo") + flush() + getOrAssignMacroAddress(constantMacro { string("foo") }) + } + + assertEquals(expected, actual) + } + + @Test + fun `adding to the symbol table should preserve existing macros`() { + val expected = """ + $ion_1_1 + (:$ion::set_macros (:: (macro null () "foo"))) + (:$ion::set_symbols (:: "foo")) + $1 + (:0) + """.trimIndent() + + val actual = write(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE) { + val theMacro = constantMacro { string("foo") } + getOrAssignMacroAddress(theMacro) + flush() + writeSymbol("foo") + startMacro(theMacro) + endMacro() + } + + assertEquals(expected, actual) + } + + /** Holds a static factory method with the test cases for [testWritingMacroDefinitions]. */ + object TestWritingMacroDefinitions { + const val THE_METHOD = "com.amazon.ion.impl.bin.IonManagedWriter_1_1_Test\$TestWritingMacroDefinitions#cases" + + @JvmStatic + fun cases(): List { + fun case( + name: String, + signature: List = emptyList(), + body: TemplateDsl.() -> Unit = { nullValue() }, + expectedSignature: String = "()", + expectedBody: String = "null" + ) = arguments(name, TemplateMacro(signature, templateBody(body)), "$expectedSignature $expectedBody") + + return listOf( + case( + "single required parameter", + signature = listOf(exactlyOneTagged("x")), + expectedSignature = "(x)" + ), + case( + "multiple required parameters", + signature = listOf( + exactlyOneTagged("x"), + exactlyOneTagged("y") + ), + expectedSignature = "(x y)" + ), + case( + "optional parameter", + signature = listOf(Parameter("x", ParameterEncoding.Tagged, ParameterCardinality.ZeroOrOne)), + expectedSignature = "(x?)" + ), + case( + "zero-to-many parameter", + signature = listOf(Parameter("x", ParameterEncoding.Tagged, ParameterCardinality.ZeroOrMore)), + expectedSignature = "(x*)" + ), + case( + "one-to-many parameter", + signature = listOf(Parameter("x", ParameterEncoding.Tagged, ParameterCardinality.OneOrMore)), + expectedSignature = "(x+)" + ), + case( + "tagless parameter", + signature = listOf(Parameter("x", ParameterEncoding.Int32, ParameterCardinality.ExactlyOne)), + expectedSignature = "(int32::x)" + ), + case( + "variety of parameters", + signature = listOf( + Parameter("a", ParameterEncoding.Int32, ParameterCardinality.ExactlyOne), + Parameter("b", ParameterEncoding.Tagged, ParameterCardinality.OneOrMore), + Parameter("c", ParameterEncoding.FlexSym, ParameterCardinality.ZeroOrMore), + Parameter("d", ParameterEncoding.Float64, ParameterCardinality.ZeroOrOne), + ), + expectedSignature = "(int32::a b+ flex_sym::c* float64::d?)" + ), + case( + "null", + body = { nullValue() }, + expectedBody = "null" + ), + // Annotations on `null` are representative for all types that don't have special annotation logic + case( + "annotated null", + body = { + annotated(listOf(fooSymbolToken), ::nullValue, IonType.NULL) + }, + expectedBody = "foo::null" + ), + case( + "null annotated with $0", + body = { + annotated(listOf(FakeSymbolToken(null, 0)), ::nullValue, IonType.NULL) + }, + expectedBody = "$0::null" + ), + case( + "bool", + body = { bool(true) }, + expectedBody = "true" + ), + case( + "int", + body = { int(1) }, + expectedBody = "1" + ), + case( + "(big) int", + body = { int(BigInteger.ONE) }, + expectedBody = "1" + ), + case( + "float", + body = { float(Double.POSITIVE_INFINITY) }, + expectedBody = "+inf" + ), + case( + "decimal", + body = { decimal(Decimal.valueOf(1.1)) }, + expectedBody = "1.1" + ), + case( + "timestamp", + body = { timestamp(Timestamp.valueOf("2024T")) }, + expectedBody = "2024T" + ), + case( + "symbol", + body = { symbol(FakeSymbolToken("foo", -1)) }, + expectedBody = "foo" + ), + case( + "unknown symbol", + body = { symbol(FakeSymbolToken(null, 0)) }, + expectedBody = "$0" + ), + case( + "annotated symbol", + body = { + annotated(listOf(fooSymbolToken), ::symbol, barSymbolToken) + }, + expectedBody = "foo::bar" + ), + case( + "symbol annotated with $0", + body = { + annotated(listOf(FakeSymbolToken(null, 0)), ::symbol, barSymbolToken) + }, + expectedBody = "$0::bar" + ), + case( + "string", + body = { string("abc") }, + expectedBody = "\"abc\"" + ), + case( + "blob", + body = { blob(byteArrayOf()) }, + expectedBody = "{{}}" + ), + case( + "clob", + body = { clob(byteArrayOf()) }, + expectedBody = "{{\"\"}}" + ), + case( + "list", + body = { list { int(1) } }, + expectedBody = "[1]" + ), + case( + "sexp", + body = { sexp { int(1) } }, + expectedBody = "(1)" + ), + case( + "empty sexp", + body = { sexp { } }, + expectedBody = "()" + ), + case( + "annotated sexp", + body = { annotated(listOf(fooSymbolToken), ::sexp) { int(1) } }, + expectedBody = "foo::(1)" + ), + case( + "sexp with $0 annotation", + body = { annotated(listOf(FakeSymbolToken(null, 0)), ::sexp) { int(1) } }, + expectedBody = "$0::(1)" + ), + case( + "struct", + body = { struct { fieldName("foo"); int(1) } }, + expectedBody = "{foo:1}" + ), + case( + "struct with $0 field name", + body = { struct { fieldName(FakeSymbolToken(null, 0)); int(1) } }, + expectedBody = "{$0:1}" + ), + case( + "macro invoked by id", + body = { macro(barMacro) {} }, + expectedBody = "(.1)" + ), + case( + "macro invoked by name", + body = { macro(fooMacro) {} }, + expectedBody = "(.foo)" + ), + case( + "macro with an argument", + body = { macro(fooMacro) { int(1) } }, + expectedBody = "(.foo 1)" + ), + case( + "macro with an empty argument group", + body = { macro(fooMacro) { expressionGroup { } } }, + expectedBody = "(.foo (..))" + ), + case( + "macro with a non-empty argument group", + body = { + macro(fooMacro) { + expressionGroup { + int(1) + int(2) + int(3) + } + } + }, + expectedBody = "(.foo (.. 1 2 3))" + ), + case( + "variable", + signature = listOf(exactlyOneTagged("x")), + expectedSignature = "(x)", + body = { + variable(0) + }, + expectedBody = "(%x)" + ), + case( + "multiple variables", + signature = listOf("x", "y", "z").map(::exactlyOneTagged), + expectedSignature = "(x y z)", + body = { + list { + variable(0) + variable(1) + variable(2) + } + }, + expectedBody = "[(%x),(%y),(%z)]" + ), + case( + "nested expressions in body", + body = { + list { + sexp { int(1) } + struct { + fieldName("foo") + int(2) + } + } + }, + expectedBody = "[(1),{foo:2}]" + ), + + ) + } + } + + @MethodSource(TestWritingMacroDefinitions.THE_METHOD) + @ParameterizedTest(name = "a macro definition with {0}") + fun testWritingMacroDefinitions(description: String, macro: Macro, expectedSignatureAndBody: String) { + val expected = """ + $ion_1_1 + (:$ion::set_macros (:: (macro foo () "foo") (macro null () "bar") (macro null $expectedSignatureAndBody))) + """.trimIndent() + + val actual = write { + getOrAssignMacroAddressAndName("foo", fooMacro) + getOrAssignMacroAddress(barMacro) + getOrAssignMacroAddress(macro) + } + + assertEquals(expected, actual) + } + + @Test + fun `when pretty printing, system s-expressions should have the clause name on the first line`() { + // ...and look reasonably pleasant. + // However, this should be held loosely. + val expected = """ + $ion_1_1 + (:$ion::set_symbols + (:: "foo" "bar" "baz")) + (:$ion::set_macros + (:: + (macro null () "foo") + (macro null (x) (.0 (%x) "bar" (..) (.. "baz"))) + ) + ) + $1 + $2 + $3 + (:0) + (:1) + (:$ion::add_symbols + (:: "a" "b" "c")) + (:$ion::add_macros + (:: + (macro null () "abc") + ) + ) + $4 + $5 + $6 + (:2) + """.trimIndent() + + val fooMacro = constantMacro { string("foo") } + + val actual = write(symbolInliningStrategy = SymbolInliningStrategy.NEVER_INLINE, pretty = true) { + writeSymbol("foo") + writeSymbol("bar") + writeSymbol("baz") + startMacro(fooMacro) + endMacro() + startMacro( + TemplateMacro( + listOf(exactlyOneTagged("x")), + templateBody { + macro(fooMacro) { + variable(0) + string("bar") + expressionGroup { } + expressionGroup { + string("baz") + } + } + } + ) + ) + endMacro() + flush() + writeSymbol("a") + writeSymbol("b") + writeSymbol("c") + startMacro(constantMacro { string("abc") }) + endMacro() + } + + assertEquals(expected, actual) + } + + @Test + fun `writeObject() should write something with a macro representation`() { + val expected = """ + $ion_1_1 + (:$ion::set_macros (:: (macro Point2D (x y) {x:(%x),y:(%y)}))) + (:Point2D 2 4) + """.trimIndent() + + val actual = write { + writeObject(Point2D(2, 4)) + } + + assertEquals(expected, actual) + } + + @Test + fun `writeObject() should write something without a macro representation`() { + val expected = """ + $ion_1_1 + Red + Yellow + Green + Blue + """.trimIndent() + + val actual = write { + Colors.entries.forEach { + color -> + writeObject(color) + } + } + + assertEquals(expected, actual) + } + + @Test + fun `writeObject() should write something with nested macro representation`() { + val expected = """ + $ion_1_1 + (:$ion::set_macros (:: (macro null (x*) (%x)) (macro Polygon (vertices+ flex_sym::fill?) {vertices:[(%vertices)],fill:(.0 (%fill))}) (macro Point2D (x y) {x:(%x),y:(%y)}))) + (:Polygon (:: (:Point2D 0 0) (:Point2D 0 1) (:Point2D 1 1) (:Point2D 1 0)) Blue) + """.trimIndent() + + val data = Polygon( + listOf( + Point2D(0, 0), + Point2D(0, 1), + Point2D(1, 1), + Point2D(1, 0), + ), + Colors.Blue, + ) + + val actual = write { + writeObject(data) + } + + assertEquals(expected, actual) + } + + private data class Polygon(val vertices: List, val fill: Colors?) : WriteAsIon { + init { require(vertices.size >= 3) { "A polygon must have at least 3 edges and 3 vertices" } } + + companion object { + // Using the qualified class name would be verbose, but may be safer for general + // use so that there is almost no risk of having a name conflict with another macro. + private val MACRO_NAME = Polygon::class.simpleName!!.replace(".", "_") + private val IDENTITY = TemplateMacro(listOf(zeroToManyTagged("x")), templateBody { variable(0) }) + private val MACRO = TemplateMacro( + signature = listOf( + // TODO: Change this to a macro shape when they are supported + Parameter("vertices", ParameterEncoding.Tagged, ParameterCardinality.OneOrMore), + Parameter("fill", ParameterEncoding.FlexSym, ParameterCardinality.ZeroOrOne), + ), + templateBody { + struct { + fieldName("vertices") + list { + variable(0) + } + fieldName("fill") + macro(IDENTITY) { + variable(1) + } + } + } + ) + } + + override fun writeTo(writer: IonWriter) { + with(writer) { + stepIn(IonType.STRUCT) + setFieldName("vertices") + stepIn(IonType.LIST) + vertices.forEach { writeObject(it) } + stepOut() + if (fill != null) { + setFieldName("fill") + writeObject(fill) + } + stepOut() + } + } + + override fun writeToMacroAware(writer: MacroAwareIonWriter) { + with(writer) { + startMacro(MACRO_NAME, MACRO) + startExpressionGroup() + vertices.forEach { writer.writeObject(it) } + endExpressionGroup() + fill?.let { writeObject(it) } + endMacro() + } + } + } + + private data class Point2D(val x: Long, val y: Long) : WriteAsIon { + companion object { + // This is a very long macro name, but by using the qualified class name, + // there is almost no risk of having a name conflict with another macro. + private val MACRO_NAME = Point2D::class.simpleName!!.replace(".", "_") + private val MACRO = TemplateMacro( + signature = listOf( + exactlyOneTagged("x"), + exactlyOneTagged("y"), + ), + templateBody { + struct { + fieldName("x") + variable(0) + fieldName("y") + variable(1) + } + } + ) + } + + override fun writeToMacroAware(writer: MacroAwareIonWriter) { + with(writer) { + startMacro(MACRO_NAME, MACRO) + writeInt(x) + writeInt(y) + endMacro() + } + } + + override fun writeTo(writer: IonWriter) { + with(writer) { + stepIn(IonType.STRUCT) + setFieldName("x") + writeInt(x) + setFieldName("x") + writeInt(y) + stepOut() + } + } + } + + private enum class Colors : WriteAsIon { + Red, + Yellow, + Green, + Blue, + ; + override fun writeTo(writer: IonWriter) { + writer.writeSymbol(this.name) + } + } +} diff --git a/src/test/java/com/amazon/ion/impl/bin/IonRawBinaryWriterTest_1_1.kt b/src/test/java/com/amazon/ion/impl/bin/IonRawBinaryWriterTest_1_1.kt new file mode 100644 index 0000000000..4d8b8fa9d8 --- /dev/null +++ b/src/test/java/com/amazon/ion/impl/bin/IonRawBinaryWriterTest_1_1.kt @@ -0,0 +1,2104 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +import com.amazon.ion.* +import com.amazon.ion.TestUtils.* +import com.amazon.ion.impl.* +import com.amazon.ion.impl.macro.* +import com.amazon.ion.impl.macro.Macro.* +import java.io.ByteArrayOutputStream +import java.math.BigDecimal +import java.math.BigInteger +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertFalse +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.CsvSource +import org.junit.jupiter.params.provider.EnumSource + +class IonRawBinaryWriterTest_1_1 { + + private fun ionWriter( + baos: ByteArrayOutputStream = ByteArrayOutputStream() + ) = IonRawBinaryWriter_1_1( + out = baos, + buffer = WriteBuffer(BlockAllocatorProviders.basicProvider().vendAllocator(32)) {}, + lengthPrefixPreallocation = 1, + ) + + private inline fun writeAsHexString(autoClose: Boolean = true, block: IonRawBinaryWriter_1_1.() -> Unit): String { + val baos = ByteArrayOutputStream() + val rawWriter = ionWriter(baos) + block.invoke(rawWriter) + if (autoClose) rawWriter.close() + @OptIn(ExperimentalStdlibApi::class) + return baos.toByteArray().joinToString(" ") { it.toHexString(HexFormat.UpperCase) } + } + + /** + * @param hexBytes a string containing white-space delimited pairs of hex digits representing the expected output. + * The string may contain multiple lines. Anything after a `|` character on a line is ignored, so + * you can use `|` to add comments. + */ + @OptIn(ExperimentalStdlibApi::class) + private inline fun assertWriterOutputEquals(hexBytes: String, autoClose: Boolean = true, block: IonRawBinaryWriter_1_1.() -> Unit) { + val cleanedHexBytes = cleanCommentedHexBytes(hexBytes) + assertEquals(cleanCommentedHexBytes(hexBytes), writeAsHexString(autoClose, block)) + + // Also check to see that the correct number of bytes are being reported to an enclosing container + val expectedLength = if (cleanedHexBytes.isBlank()) 0 else cleanedHexBytes.split(' ').size + val actualByteString = writeAsHexString(autoClose) { + try { + stepInList(usingLengthPrefix = true) + block() + stepOut() + } catch (t: Throwable) { + // It's illegal to wrap `block()` in a list, so we'll just skip this check. + return + } + } + if (expectedLength > 0xF) { + // Rather than try to parse the flexuint in the output, we'll just compare them as flexuint hex strings + // If this fails, it could be confusing. It's possible that if the length is underreported as being less + // than 16, then the "actualLengthBytes" could be an empty string. + val flexUIntLen = FlexInt.flexUIntLength(expectedLength.toLong()) + val flexUIntBytes = ByteArray(flexUIntLen) + FlexInt.writeFlexIntOrUIntInto(flexUIntBytes, 0, expectedLength.toLong(), flexUIntLen) + val byteString = flexUIntBytes.joinToString(" ") { it.toHexString(HexFormat.UpperCase) } + val actualLengthBytes = actualByteString.drop(3).dropLast(expectedLength * 3) + assertEquals(byteString, actualLengthBytes) + } else { + // Take the length from the opcode and compare with the length we calculated + val actualLen = "${actualByteString[1]}".toInt(radix = 0x10) // Fun fact! Every radix is 10 unless you write it in another base. + assertEquals(expectedLength, actualLen) + } + } + + private inline fun assertWriterThrows(block: IonRawBinaryWriter_1_1.() -> Unit) { + val baos = ByteArrayOutputStream() + val rawWriter = IonRawBinaryWriter_1_1( + out = baos, + buffer = WriteBuffer(BlockAllocatorProviders.basicProvider().vendAllocator(32)) {}, + lengthPrefixPreallocation = 1, + ) + assertThrows { + block.invoke(rawWriter) + } + } + + @Test + fun `calling close while in a container should throw IonException`() { + assertWriterThrows { + stepInList(usingLengthPrefix = true) + close() + } + } + + @Test + fun `calling finish while in a container should throw IonException`() { + assertWriterThrows { + stepInList(usingLengthPrefix = false) + flush() + } + } + + @Test + fun `calling finish with a dangling annotation should throw IonException`() { + assertWriterThrows { + writeAnnotations(10) + flush() + } + } + + @Test + fun `calling stepOut while not in a container should throw IonException`() { + assertWriterThrows { + stepOut() + } + } + + @Test + fun `calling stepOut with a dangling annotation should throw IonException`() { + assertWriterThrows { + stepInList(usingLengthPrefix = false) + writeAnnotations(10) + stepOut() + } + } + + @Test + fun `calling writeIVM when in a container should throw IonException`() { + assertWriterThrows { + stepInList(usingLengthPrefix = true) + writeIVM() + } + } + + @Test + fun `calling writeIVM with a dangling annotation should throw IonException`() { + assertWriterThrows { + writeAnnotations(10) + writeIVM() + } + } + + @Test + fun `calling finish should cause the buffered data to be written to the output stream`() { + val actual = writeAsHexString(autoClose = false) { + writeIVM() + flush() + } + // Just checking that data is written, not asserting the content. + assertTrue(actual.isNotBlank()) + } + + @Test + fun `after calling finish, it should still be possible to write more data`() { + val actual = writeAsHexString { + flush() + writeIVM() + } + // Just checking that data is written, not asserting the content. + assertTrue(actual.isNotBlank()) + } + + @Test + fun `calling close should cause the buffered data to be written to the output stream`() { + val actual = writeAsHexString(autoClose = false) { + writeIVM() + close() + } + // Just checking that data is written, not asserting the content. + assertTrue(actual.isNotBlank()) + } + + @Test + fun `calling close or finish multiple times should not throw any exceptions`() { + val actual = writeAsHexString { + writeIVM() + flush() + close() + flush() + close() + flush() + } + // Just checking that data is written, not asserting the content. + assertTrue(actual.isNotBlank()) + } + + @Test + fun `write the IVM`() { + assertWriterOutputEquals("E0 01 01 EA") { + writeIVM() + } + } + + @Test + fun `write nothing`() { + assertWriterOutputEquals("") { + } + } + + @Test + fun `write a null`() { + assertWriterOutputEquals("EA") { + writeNull() + } + } + + @Test + fun `write a null with a specific type`() { + // Just checking one type. The full range of types are checked in IonEncoder_1_1Test + assertWriterOutputEquals("EB 00") { + writeNull(IonType.BOOL) + } + } + + @ParameterizedTest + @CsvSource("true, 6E", "false, 6F") + fun `write a boolean`(value: Boolean, hexBytes: String) { + assertWriterOutputEquals(hexBytes) { + writeBool(value) + } + } + + @Test + fun `write a delimited list`() { + assertWriterOutputEquals("F1 6E 6F F0") { + stepInList(usingLengthPrefix = false) + writeBool(true) + writeBool(false) + stepOut() + } + } + + @Test + fun `write a prefixed list`() { + assertWriterOutputEquals("B2 6E 6F") { + stepInList(usingLengthPrefix = true) + writeBool(true) + writeBool(false) + stepOut() + } + } + + @Test + fun `write a variable-length prefixed list`() { + assertWriterOutputEquals("FB 21 ${" 6E".repeat(16)}") { + stepInList(usingLengthPrefix = true) + repeat(16) { writeBool(true) } + stepOut() + flush() + } + } + + @Test + fun `write a prefixed list that is so long it requires patch points`() { + assertWriterOutputEquals("FB 02 02 ${" 6E".repeat(128)}") { + stepInList(usingLengthPrefix = true) + repeat(128) { writeBool(true) } + stepOut() + } + } + + @Test + fun `write multiple nested prefixed lists`() { + assertWriterOutputEquals("B4 B3 B2 B1 B0") { + repeat(5) { stepInList(usingLengthPrefix = true) } + repeat(5) { stepOut() } + } + } + + @Test + fun `write multiple nested delimited lists`() { + assertWriterOutputEquals("F1 F1 F1 F1 F0 F0 F0 F0") { + repeat(4) { stepInList(usingLengthPrefix = false) } + repeat(4) { stepOut() } + } + } + + @Test + fun `write multiple nested delimited and prefixed lists`() { + assertWriterOutputEquals("F1 B9 F1 B6 F1 B3 F1 B0 F0 F0 F0 F0") { + repeat(4) { + stepInList(usingLengthPrefix = false) + stepInList(usingLengthPrefix = true) + } + repeat(8) { stepOut() } + } + } + + @Test + fun `write a delimited sexp`() { + assertWriterOutputEquals("F2 6E 6F F0") { + stepInSExp(usingLengthPrefix = false) + writeBool(true) + writeBool(false) + stepOut() + } + } + + @Test + fun `write a prefixed sexp`() { + assertWriterOutputEquals("C2 6E 6F") { + stepInSExp(usingLengthPrefix = true) + writeBool(true) + writeBool(false) + stepOut() + } + } + + @Test + fun `write a variable-length prefixed sexp`() { + assertWriterOutputEquals("FC 21 ${" 6E".repeat(16)}") { + stepInSExp(usingLengthPrefix = true) + repeat(16) { writeBool(true) } + stepOut() + flush() + } + } + + @Test + fun `write a prefixed sexp that is so long it requires patch points`() { + assertWriterOutputEquals("FC 02 02 ${" 6E".repeat(128)}") { + stepInSExp(usingLengthPrefix = true) + repeat(128) { writeBool(true) } + stepOut() + } + } + + @Test + fun `write multiple nested prefixed sexps`() { + assertWriterOutputEquals("C4 C3 C2 C1 C0") { + repeat(5) { stepInSExp(usingLengthPrefix = true) } + repeat(5) { stepOut() } + } + } + + @Test + fun `write multiple nested delimited sexps`() { + assertWriterOutputEquals("F2 F2 F2 F2 F0 F0 F0 F0") { + repeat(4) { stepInSExp(usingLengthPrefix = false) } + repeat(4) { stepOut() } + } + } + + @Test + fun `write multiple nested delimited and prefixed sexps`() { + assertWriterOutputEquals("F2 C9 F2 C6 F2 C3 F2 C0 F0 F0 F0 F0") { + repeat(4) { + stepInSExp(usingLengthPrefix = false) + stepInSExp(usingLengthPrefix = true) + } + repeat(8) { stepOut() } + } + } + + @Test + fun `write a prefixed struct`() { + assertWriterOutputEquals( + """ + D4 | Struct Length = 4 + 17 | SID 11 + 6E | true + 19 | SID 12 + 6F | false + """ + ) { + stepInStruct(usingLengthPrefix = true) + writeFieldName(11) + writeBool(true) + writeFieldName(12) + writeBool(false) + stepOut() + } + } + + @Test + fun `write a variable length prefixed struct`() { + assertWriterOutputEquals( + """ + FD | Variable Length SID Struct + 21 | Length = 16 + ${"17 6E ".repeat(8)} + """ + ) { + stepInStruct(usingLengthPrefix = true) + repeat(8) { + writeFieldName(11) + writeBool(true) + } + stepOut() + } + } + + @Test + fun `write a struct so long it requires patch points`() { + assertWriterOutputEquals( + """ + FD | Variable Length SID Struct + 02 02 | Length = 128 + ${"17 6E ".repeat(64)} + """ + ) { + stepInStruct(usingLengthPrefix = true) + repeat(64) { + writeFieldName(11) + writeBool(true) + } + stepOut() + } + } + + @Test + fun `write multiple nested prefixed structs`() { + assertWriterOutputEquals( + """ + D8 | Struct Length = 8 + 17 | SID 11 + D6 | Struct Length = 6 + 17 | SID 11 + D4 | Struct Length = 4 + 17 | SID 11 + D2 | Struct Length = 2 + 17 | SID 11 + D0 | Struct Length = 0 + """ + ) { + stepInStruct(usingLengthPrefix = true) + repeat(4) { + writeFieldName(11) + stepInStruct(usingLengthPrefix = true) + } + repeat(5) { + stepOut() + } + } + } + + @Test + fun `write multiple nested delimited structs`() { + assertWriterOutputEquals( + """ + F3 | Begin delimited struct + 17 | FlexSym SID 11 + F3 | Begin delimited struct + 17 F3 17 F3 17 F3 | etc. + 01 F0 | End delimited struct + 01 F0 01 F0 01 F0 01 F0 | etc. + """ + ) { + stepInStruct(usingLengthPrefix = false) + repeat(4) { + writeFieldName(11) + stepInStruct(usingLengthPrefix = false) + } + repeat(5) { + stepOut() + } + } + } + + @Test + fun `write empty prefixed struct`() { + assertWriterOutputEquals("D0") { + stepInStruct(usingLengthPrefix = true) + stepOut() + } + } + + @Test + fun `write delimited struct`() { + assertWriterOutputEquals( + """ + F3 | Begin delimited struct + 17 | SID 11 + 6E | true + FB 66 6F 6F | FlexSym 'foo' + 6E | true + 02 01 | FlexSym SID 64 + 6E | true + 01 6F | System Symbol symbol_table + 6E | true + 01 F0 | End delimited struct + """ + ) { + stepInStruct(usingLengthPrefix = false) + writeFieldName(11) + writeBool(true) + writeFieldName("foo") + writeBool(true) + writeFieldName(64) + writeBool(true) + writeFieldName(SystemSymbols_1_1.SYMBOL_TABLE) + writeBool(true) + stepOut() + } + } + + @Test + fun `write empty delimited struct`() { + assertWriterOutputEquals( + """ + F3 | Begin delimited struct + 01 F0 | End delimited struct + """ + ) { + stepInStruct(usingLengthPrefix = false) + stepOut() + } + } + + @Test + fun `write prefixed struct with a single flex sym field`() { + assertWriterOutputEquals( + """ + FD | Variable length Struct + 0D | Length = 6 + 01 | switch to FlexSym encoding + FB 66 6F 6F | FlexSym 'foo' + 6E | true + """ + ) { + stepInStruct(usingLengthPrefix = true) + writeFieldName("foo") + writeBool(true) + stepOut() + } + } + + @Test + fun `write prefixed struct with multiple fields and flex syms`() { + assertWriterOutputEquals( + """ + FD | Variable length Struct + 21 | Length = 16 + 01 | switch to FlexSym encoding + FB 66 6F 6F | FlexSym 'foo' + 6E | true + FB 62 61 72 | FlexSym 'bar' + 6E | true + FB 62 61 7A | FlexSym 'baz' + 6E | true + """ + ) { + stepInStruct(usingLengthPrefix = true) + writeFieldName("foo") + writeBool(true) + writeFieldName("bar") + writeBool(true) + writeFieldName("baz") + writeBool(true) + stepOut() + } + } + + @Test + fun `write prefixed struct that starts with sids and switches partway through to use flex syms`() { + assertWriterOutputEquals( + """ + FD | Variable length Struct + 17 | Length = 11 + 81 | SID 64 + 6E | true + 01 | switch to FlexSym encoding + FB 66 6F 6F | FlexSym 'foo' + 6E | true + 02 01 | FlexSym SID 64 + 6E | true + """ + ) { + stepInStruct(usingLengthPrefix = true) + writeFieldName(64) + writeBool(true) + writeFieldName("foo") + writeBool(true) + writeFieldName(64) + writeBool(true) + stepOut() + } + } + + @Test + fun `write prefixed struct with sid 0`() { + assertWriterOutputEquals( + """ + FD | Variable length Struct + 09 | Length = 4 + 01 | switch to FlexSym encoding + 01 60 | FlexSym SID 0 + 6E | true + """ + ) { + stepInStruct(usingLengthPrefix = true) + writeFieldName(0) + writeBool(true) + stepOut() + } + } + + @Test + fun `write prefixed struct with sid 0 after another value`() { + assertWriterOutputEquals( + """ + FD | Variable length struct + 17 | Length = FlexUInt 11 + 03 | SID 1 + 6E | true + 01 | switch to FlexSym encoding + 01 60 | FlexSym SID 0 + 6E | true + 05 | FlexSym SID 2 + 6E | true + 01 60 | FlexSym SID 0 + 6E | true + """ + ) { + stepInStruct(usingLengthPrefix = true) + writeFieldName(1) + writeBool(true) + writeFieldName(0) + writeBool(true) + writeFieldName(2) + writeBool(true) + writeFieldName(0) + writeBool(true) + stepOut() + } + } + + @Test + fun `write prefixed struct with a system symbol as a field name`() { + assertWriterOutputEquals( + """ + FD | Variable length Struct + 09 | Length = 4 + 01 | switch to FlexSym encoding + 01 6F | FlexSym System Symbol 'symbol_table' + 6E | true + """ + ) { + stepInStruct(usingLengthPrefix = true) + writeFieldName(SystemSymbols_1_1.SYMBOL_TABLE) + writeBool(true) + stepOut() + } + } + + @Test + fun `writing a value in a struct with no field name should throw an exception`() { + assertWriterThrows { + stepInStruct(usingLengthPrefix = false) + writeBool(true) + } + assertWriterThrows { + stepInStruct(usingLengthPrefix = true) + writeBool(true) + } + } + + @Test + fun `calling writeFieldName outside of a struct should throw an exception`() { + assertWriterThrows { + writeFieldName(12) + } + assertWriterThrows { + writeFieldName("foo") + } + } + + @Test + fun `calling stepOut with a dangling field name should throw an exception`() { + assertWriterThrows { + stepInStruct(usingLengthPrefix = true) + writeFieldName(12) + stepOut() + } + assertWriterThrows { + stepInStruct(usingLengthPrefix = false) + writeFieldName("foo") + stepOut() + } + } + + @Test + fun `writeAnnotations with empty int array should write no annotations`() { + assertWriterOutputEquals("6E") { + writeAnnotations(intArrayOf()) + writeBool(true) + } + } + + @Test + fun `write one sid annotation`() { + val expectedBytes = "E4 07 6E" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(3) + writeBool(true) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(3) + writeAnnotations(intArrayOf()) + writeAnnotations(arrayOf()) + writeBool(true) + } + } + + @Test + fun `write two sid annotations`() { + val expectedBytes = "E5 07 09 6E" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(3) + writeAnnotations(4) + writeBool(true) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(3, 4) + writeBool(true) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(intArrayOf(3, 4)) + writeBool(true) + } + } + + @Test + fun `write three sid annotations`() { + val expectedBytes = "E6 09 07 09 02 04 6E" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(3) + writeAnnotations(4) + writeAnnotations(256) + writeBool(true) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(3) + writeAnnotations(4, 256) + writeBool(true) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(intArrayOf(3, 4)) + writeAnnotations(256) + writeBool(true) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(intArrayOf(3, 4, 256)) + writeBool(true) + } + } + + @Test + fun `write sid 0 annotation`() { + assertWriterOutputEquals("E4 01 6E") { + writeAnnotations(0) + writeBool(true) + } + } + + @Test + fun `attempting to write negative SID annotations should throw exception`() { + assertWriterThrows { writeAnnotations(-1) } + assertWriterThrows { writeAnnotations(-1, 2) } + assertWriterThrows { writeAnnotations(1, -2) } + assertWriterThrows { writeAnnotations(intArrayOf(-1, 2, 3)) } + assertWriterThrows { writeAnnotations(intArrayOf(1, -2, 3)) } + assertWriterThrows { writeAnnotations(intArrayOf(1, 2, -3)) } + } + + @Test + fun `write one inline annotation`() { + val expectedBytes = "E7 FB 66 6F 6F 6F" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations("foo") + writeBool(false) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations("foo") + writeAnnotations(intArrayOf()) + writeBool(false) + } + } + + @Test + fun `write two inline annotations`() { + val expectedBytes = "E8 FB 66 6F 6F FB 62 61 72 6F" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations("foo") + writeAnnotations("bar") + writeBool(false) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(arrayOf("foo", "bar")) + writeBool(false) + } + } + + @Test + fun `write three inline annotations`() { + val expectedBytes = "E9 19 FB 66 6F 6F FB 62 61 72 FB 62 61 7A 6F" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations("foo") + writeAnnotations("bar") + writeAnnotations("baz") + writeBool(false) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations("foo") + writeAnnotations("bar", "baz") + writeBool(false) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(arrayOf("foo", "bar")) + writeAnnotations("baz") + writeBool(false) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(arrayOf("foo", "bar", "baz")) + writeBool(false) + } + } + + @Test + fun `write empty text and sid 0 annotations`() { + // Empty text is a system symbol + assertWriterOutputEquals("E8 01 60 01 81 6E") { + writeAnnotations(0) + writeAnnotations("") + writeBool(true) + } + } + + @Test + fun `write two mixed sid and inline annotations`() { + val expectedBytes = "E8 15 FB 66 6F 6F 6F" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(10) + writeAnnotations("foo") + writeBool(false) + } + } + + @Test + fun `write three mixed sid and inline annotations`() { + val expectedBytes = "E9 13 15 FB 66 6F 6F FB 62 61 72 6F" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(10) + writeAnnotations("foo") + writeAnnotations("bar") + writeBool(false) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(10) + writeAnnotations(arrayOf("foo", "bar")) + writeBool(false) + } + } + + @Test + fun `write one system symbol annotation`() { + val expectedBytes = "E7 01 64 6E" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(SystemSymbols_1_1.NAME) + writeBool(true) + } + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(SystemSymbols_1_1.NAME) + writeAnnotations(intArrayOf()) + writeAnnotations(arrayOf()) + writeBool(true) + } + } + + @Test + fun `write two mixed sid and system annotations`() { + val expectedBytes = "E8 15 01 6A 6F" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(10) + writeAnnotations(SystemSymbols_1_1.ENCODING) + writeBool(false) + } + } + + @Test + fun `write two mixed inline and system annotations`() { + val expectedBytes = "E8 FB 66 6F 6F 01 6A 6F" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations("foo") + writeAnnotations(SystemSymbols_1_1.ENCODING) + writeBool(false) + } + } + + @Test + fun `write three mixed sid, inline, and system annotations`() { + val expectedBytes = "E9 0F 15 FB 66 6F 6F 01 6A 6F" + assertWriterOutputEquals(expectedBytes) { + writeAnnotations(10) + writeAnnotations("foo") + writeAnnotations(SystemSymbols_1_1.ENCODING) + writeBool(false) + } + } + + @Test + fun `write annotations that are long enough to need a patch point`() { + val opCode = "E7" + val length = "C6 FD" + val text = "41 6D 61 7A 6F 6E 20 49 6F 6E 20 69 73 20 61 20 72 69 63 68 6C 79 2D 74 79 70 65 64 2C 20 73 65 " + + "6C 66 2D 64 65 73 63 72 69 62 69 6E 67 2C 20 68 69 65 72 61 72 63 68 69 63 61 6C 20 64 61 74 61 20 " + + "73 65 72 69 61 6C 69 7A 61 74 69 6F 6E 20 66 6F 72 6D 61 74 20 6F 66 66 65 72 69 6E 67 20 69 6E 74 " + + "65 72 63 68 61 6E 67 65 61 62 6C 65 20 62 69 6E 61 72 79 20 61 6E 64 20 74 65 78 74 20 72 65 70 72 " + + "65 73 65 6E 74 61 74 69 6F 6E 73 2E" + val falseOpCode = "6F" + assertWriterOutputEquals("$opCode $length $text $falseOpCode") { + writeAnnotations( + "Amazon Ion is a richly-typed, self-describing, hierarchical data serialization " + + "format offering interchangeable binary and text representations." + ) + writeBool(false) + } + } + + @Test + fun `write enough annotations for one value to require resizing the internal annotation buffers`() { + val expectedBytes = """ + E9 + 3D + 07 07 07 07 07 07 07 07 07 07 | 10x SID 3 + FF 20 FF 20 FF 20 FF 20 FF 20 | 5x " " + FF 20 FF 20 FF 20 FF 20 FF 20 | 5x " " + 6E + """ + assertWriterOutputEquals(expectedBytes) { + repeat(10) { writeAnnotations(3) } + repeat(10) { writeAnnotations(" ") } + writeBool(true) + } + } + + @Test + fun `_private_hasFirstAnnotation() should return false when there are no annotations`() { + val rawWriter = ionWriter() + assertFalse(rawWriter._private_hasFirstAnnotation(SystemSymbols.ION_SID, SystemSymbols.ION)) + } + + @Test + fun `_private_hasFirstAnnotation() should return true if only the sid matches`() { + val rawWriter = ionWriter() + rawWriter.writeAnnotations(SystemSymbols.ION_SID) + assertTrue(rawWriter._private_hasFirstAnnotation(SystemSymbols.ION_SID, null)) + } + + @Test + fun `_private_hasFirstAnnotation() should return true if only the text matches`() { + val rawWriter = ionWriter() + rawWriter.writeAnnotations(SystemSymbols.ION) + assertTrue(rawWriter._private_hasFirstAnnotation(-1, SystemSymbols.ION)) + } + + @Test + fun `_private_hasFirstAnnotation() should return false if the first annotation does not match the sid or text`() { + val rawWriter = ionWriter() + rawWriter.writeAnnotations(SystemSymbols.IMPORTS_SID) + rawWriter.writeAnnotations(SystemSymbols.ION) + rawWriter.writeAnnotations(SystemSymbols.ION_SID) + // Matches the second and third annotations, but not the first one. + assertFalse(rawWriter._private_hasFirstAnnotation(SystemSymbols.ION_SID, SystemSymbols.ION)) + } + + @Test + fun `_private_clearAnnotations() should clear text annotations`() { + assertWriterOutputEquals(""" 6E """) { + repeat(5) { writeAnnotations(" ") } + _private_clearAnnotations() + assertFalse(_private_hasFirstAnnotation(-1, " ")) + writeBool(true) + } + assertWriterOutputEquals(""" E4 07 6E """) { + repeat(5) { writeAnnotations(" ") } + _private_clearAnnotations() + writeAnnotations(3) + assertFalse(_private_hasFirstAnnotation(-1, " ")) + writeBool(true) + } + assertWriterOutputEquals(""" E5 07 09 6E """) { + repeat(5) { writeAnnotations("a") } + _private_clearAnnotations() + writeAnnotations(3) + writeAnnotations(4) + writeBool(true) + } + } + + @Test + fun `_private_clearAnnotations() should clear sid annotations`() { + assertWriterOutputEquals(""" 6E """) { + repeat(5) { writeAnnotations(3) } + _private_clearAnnotations() + assertFalse(_private_hasFirstAnnotation(3, null)) + writeBool(true) + } + assertWriterOutputEquals(""" E7 FF 20 6E """) { + repeat(5) { writeAnnotations(3) } + _private_clearAnnotations() + writeAnnotations(" ") + assertFalse(_private_hasFirstAnnotation(3, null)) + writeBool(true) + } + assertWriterOutputEquals(""" E8 FF 61 FF 62 6E """) { + repeat(5) { writeAnnotations(3) } + _private_clearAnnotations() + writeAnnotations("a") + writeAnnotations("b") + writeBool(true) + } + } + + @Test + fun `write int`() { + assertWriterOutputEquals( + """ + 61 01 + 61 0A + """ + ) { + writeInt(1) + writeInt(BigInteger.TEN) + } + } + + @Test + fun `write float`() { + assertWriterOutputEquals( + """ + 6A + 6C C3 F5 48 40 + 6D 1F 85 EB 51 B8 1E 09 40 + """ + ) { + writeFloat(0.0) + writeFloat(3.14f) + writeFloat(3.14) + } + } + + @Test + fun `write decimal`() { + assertWriterOutputEquals( + """ + 70 + 72 01 00 + """ + ) { + writeDecimal(BigDecimal.ZERO) + writeDecimal(Decimal.NEGATIVE_ZERO) + } + } + + @Test + fun `write timestamp`() { + assertWriterOutputEquals( + """ + 87 35 46 AF 7C 55 47 70 2D + F8 05 4B 08 + """ + ) { + writeTimestamp(Timestamp.valueOf("2023-12-08T15:37:23.190583253Z")) + writeTimestamp(Timestamp.valueOf("2123T")) + } + } + + @Test + fun `write symbol`() { + assertWriterOutputEquals( + """ + E1 00 + E1 01 + E2 39 2F + A3 66 6F 6F + EE 0B + EE 21 + """ + ) { + writeSymbol(0) + writeSymbol(1) + writeSymbol(12345) + writeSymbol("foo") + writeSymbol(SystemSymbols_1_1.ION_LITERAL) + writeSymbol(SystemSymbols_1_1.EMPTY_TEXT) + } + } + + @Test + fun `attempting to write a negative SID should throw exception`() { + assertWriterThrows { + writeSymbol(-1) + } + } + + @Test + fun `write string`() { + assertWriterOutputEquals("93 66 6F 6F") { + writeString("foo") + } + } + + @Test + fun `write blob`() { + assertWriterOutputEquals("FE 07 01 02 03") { + writeBlob(byteArrayOf(1, 2, 3), 0, 3) + } + } + + @Test + fun `write clob`() { + assertWriterOutputEquals("FF 07 04 05 06") { + writeClob(byteArrayOf(4, 5, 6), 0, 3) + } + } + + @ParameterizedTest + @EnumSource(SystemMacro::class) + fun `write a system macro e-expression`(systemMacro: SystemMacro) { + val numVariadicParameters = systemMacro.signature.count { it.cardinality != ParameterCardinality.ExactlyOne } + val signatureBytes = when (numVariadicParameters) { + 0 -> "" + 1, 2, 3, 4 -> "00" + 5, 6, 7, 8 -> "00 00" + else -> TODO("There are definitely no system macros with more than 8 variadic parameters") + } + assertWriterOutputEquals(String.format("EF %02X $signatureBytes", systemMacro.id)) { + stepInEExp(systemMacro) + stepOut() + } + } + + @Test + fun `write a delimited e-expression`() { + assertWriterOutputEquals("00") { + stepInEExp(0, false, dummyMacro(nArgs = 0)) + stepOut() + } + assertWriterOutputEquals("3F") { + stepInEExp(63, false, dummyMacro(nArgs = 0)) + stepOut() + } + } + + @ParameterizedTest + @CsvSource( + " 64, 40 00", + " 65, 40 01", + " 319, 40 FF", + " 320, 41 00", + " 1211, 44 7B", + " 4159, 4F FF", + " 4160, 50 00 00", + " 4161, 50 01 00", + " 4415, 50 FF 00", + " 4416, 50 00 01", + " 69695, 50 FF FF", + " 69696, 51 00 00", + " 1052735, 5F FF FF", + " 1052736, F4 04 82 80", + "${Int.MAX_VALUE}, F4 F0 FF FF FF 0F" + ) + fun `write a delimited e-expression with a multi-byte biased id`(id: Int, expectedBytes: String) { + assertWriterOutputEquals(expectedBytes) { + stepInEExp(id, usingLengthPrefix = false, dummyMacro(nArgs = 0)) + stepOut() + } + } + + @Test + fun `write a delimited e-expression that requires a presence bitmap`() { + assertWriterOutputEquals( + """ + 3F | Opcode/MacroID 63 + 55 | PresenceBitmap (4 single expressions) + 61 01 | Int 1 + 61 02 | Int 2 + 61 03 | Int 3 + 61 04 | Int 4 + """ + ) { + stepInEExp(63, usingLengthPrefix = false, dummyMacro(nArgs = 4, variadicParam(ParameterEncoding.Tagged))) + writeInt(1) + writeInt(2) + writeInt(3) + writeInt(4) + stepOut() + } + } + + @Test + fun `write a delimited e-expression with presence bitmap where many args are implicitly void`() { + assertWriterOutputEquals( + """ + 3F | MacroID 63 + 00 00 00 00 | PresenceBitmap (16 void) + """ + ) { + stepInEExp(63, usingLengthPrefix = false, dummyMacro(nArgs = 16, variadicParam(ParameterEncoding.Tagged))) + // Don't write any trailing void args (which is all of them in this case) + stepOut() + } + } + + @ParameterizedTest + @CsvSource( + // Macro Id; Op Address Length=0 + " 0, F5 01 01", + " 64, F5 81 01", + " 65, F5 83 01", + " 127, F5 FF 01", + " 128, F5 02 02 01", + " 729, F5 66 0B 01", + " 16383, F5 FE FF 01", + " 16384, F5 04 00 02 01", + " 1052736, F5 04 82 80 01", + " 2097151, F5 FC FF FF 01", + " 2097152, F5 08 00 00 02 01", + "${Int.MAX_VALUE}, F5 F0 FF FF FF 0F 01", + ) + fun `write a length-prefixed e-expression with no args`(id: Int, expectedBytes: String) { + // This test ensures that the macro address is written correctly + assertWriterOutputEquals(expectedBytes) { + stepInEExp(id, usingLengthPrefix = true, dummyMacro(nArgs = 0)) + stepOut() + } + } + + @Test + fun `write a length-prefixed e-expression with many args`() { + // This test ensures that the macro length is written correctly + assertWriterOutputEquals("F5 03 15 60 60 60 60 60 60 60 60 60 60") { + stepInEExp(1, usingLengthPrefix = true, dummyMacro(nArgs = 10)) + repeat(10) { writeInt(0L) } + stepOut() + } + } + + @Test + fun `write a length-prefixed e-expression that requires a presence bitmap`() { + assertWriterOutputEquals( + """ + F5 | Length-prefixed macro + 81 | MacroID 64 + 13 | Length = 9 + 55 | PresenceBitmap (4 single expressions) + 61 01 | Int 1 + 61 02 | Int 2 + 61 03 | Int 3 + 61 04 | Int 4 + """ + ) { + stepInEExp(64, usingLengthPrefix = true, dummyMacro(nArgs = 4, variadicParam(ParameterEncoding.Tagged))) + writeInt(1) + writeInt(2) + writeInt(3) + writeInt(4) + stepOut() + } + } + + @Test + fun `write a length-prefixed e-expression that requires a multi-byte presence bitmap`() { + assertWriterOutputEquals( + """ + F5 | Length-prefixed macro + 81 | MacroID 64 + 1D | Length = 14 + 55 05 | PresenceBitmap (6 single expressions) + 61 01 | Int 1 + 61 02 | Int 2 + 61 03 | Int 3 + 61 04 | Int 4 + 61 05 | Int 5 + 61 06 | Int 6 + """ + ) { + stepInEExp(64, usingLengthPrefix = true, dummyMacro(nArgs = 6, variadicParam(ParameterEncoding.Tagged))) + writeInt(1) + writeInt(2) + writeInt(3) + writeInt(4) + writeInt(5) + writeInt(6) + stepOut() + } + } + + @Test + fun `write a length-prefixed e-expression with presence bitmap where many args are implicitly void`() { + assertWriterOutputEquals( + """ + F5 | Length-prefixed macro + 81 | MacroID 64 + 09 | Length = 4 + 00 00 00 00 | PresenceBitmap (16 void) + """ + ) { + stepInEExp(64, usingLengthPrefix = true, dummyMacro(nArgs = 16, variadicParam(ParameterEncoding.Tagged))) + // Don't write any trailing void args (which is all of them in this case) + stepOut() + } + } + + @Test + fun `write nested e-expressions`() { + // E-Expressions don't have length prefixes, so we're putting them inside lists + // so that we can check that the length gets propagated correctly to the parent + assertWriterOutputEquals( + """ + BB | List Length 11 + 1F | Macro 31 + B9 | List Length 9 + 40 00 | Macro 64 + B6 | List Length 6 + 40 13 | Macro 83 + B3 | List Length 3 + 50 00 00 | Macro 4160 + """ + ) { + stepInList(usingLengthPrefix = true) + stepInEExp(31, usingLengthPrefix = false, dummyMacro(nArgs = 1)) + stepInList(usingLengthPrefix = true) + stepInEExp(64, usingLengthPrefix = false, dummyMacro(nArgs = 1)) + stepInList(usingLengthPrefix = true) + stepInEExp(83, usingLengthPrefix = false, dummyMacro(nArgs = 1)) + stepInList(usingLengthPrefix = true) + stepInEExp(4160, usingLengthPrefix = false, dummyMacro(nArgs = 0)) + repeat(8) { stepOut() } + } + } + + @Test + fun `write an e-expression in the field name position of a variable-length struct`() { + assertWriterOutputEquals( + """ + FD | Variable Length Struct + 11 | Length = 8 + 15 | SID 10 + 6E | true + 01 | switch to FlexSym encoding + 01 | FlexSym Escape Byte + 1F | Macro 31 (0x1F) + 01 | FlexSym Escape Byte + 40 00 | Macro 64 + """ + ) { + stepInStruct(usingLengthPrefix = true) + writeFieldName(10) + writeBool(true) + stepInEExp(31, usingLengthPrefix = false, dummyMacro(nArgs = 0)) + stepOut() + stepInEExp(64, usingLengthPrefix = false, dummyMacro(nArgs = 0)) + stepOut() + stepOut() + } + } + + @Test + fun `write an e-expression in the field name position of a delimited struct`() { + assertWriterOutputEquals( + """ + F3 | Begin Delimited Struct + 01 | FlexSym Escape Byte + 1F | Macro 31 (0x1F) + 01 | FlexSym Escape Byte + F0 | End Delimiter + """ + ) { + stepInStruct(usingLengthPrefix = false) + stepInEExp(31, usingLengthPrefix = false, dummyMacro(nArgs = 0)) + stepOut() + stepOut() + } + } + + @Test + fun `write an e-expression in the value position of a struct`() { + assertWriterOutputEquals( + """ + D2 | Struct length 2 + 03 | SID 1 + 01 | Macro 1 + """ + ) { + stepInStruct(usingLengthPrefix = true) + writeFieldName(1) + stepInEExp(1, usingLengthPrefix = false, dummyMacro(nArgs = 0)) + stepOut() + stepOut() + } + } + + @Test + fun `calling stepInEExp(String) should throw NotImplementedError`() { + assertThrows { + writeAsHexString { + stepInEExp("foo") + } + } + } + + @Test + fun `calling stepInEExp with an annotation should throw IonException`() { + assertWriterThrows { + writeAnnotations("foo") + stepInEExp(1, usingLengthPrefix = false, dummyMacro(nArgs = 0)) + } + } + + @Test + fun `write a prefixed, tagged expression group with zero values`() { + assertWriterOutputEquals(""" 3D 01 """) { + stepInEExp(0x3D, usingLengthPrefix = false, dummyMacro(nArgs = 1)) + stepInExpressionGroup(usingLengthPrefix = true) + stepOut() + stepOut() + } + } + + @Test + fun `write a prefixed, tagged expression group with one value`() { + assertWriterOutputEquals( + """ + 3D | Macro 61 + 03 | Expression Group, Length = 1 + 6E | true + """ + ) { + stepInEExp(0x3D, usingLengthPrefix = false, dummyMacro(nArgs = 1)) + stepInExpressionGroup(usingLengthPrefix = true) + writeBool(true) + stepOut() + stepOut() + } + } + + @Test + fun `write a prefixed, tagged expression group with multiple values`() { + assertWriterOutputEquals( + """ + 3D | Macro 77 + 0B | Expression Group, Length = 5 + 60 61 01 61 02 | Ints 0, 1, 2 + """ + ) { + stepInEExp(0x3D, usingLengthPrefix = false, dummyMacro(nArgs = 1)) + stepInExpressionGroup(usingLengthPrefix = true) + writeInt(0) + writeInt(1) + continueExpressionGroup() // Should have no effect + writeInt(2) + stepOut() + stepOut() + } + } + + @Test + fun `write a prefixed, tagged expression group so long that it requires a patch point`() { + assertWriterOutputEquals( + """ + 3D | Macro 0 + FE 03 | Expression Group, Length = 255 + ${"6E ".repeat(255)} + """ + ) { + stepInEExp(0x3D, usingLengthPrefix = false, dummyMacro(nArgs = 1)) + stepInExpressionGroup(usingLengthPrefix = true) + repeat(255) { writeBool(true) } + stepOut() + stepOut() + } + } + + @Test + fun `write a delimited, tagged expression group with zero values`() { + assertWriterOutputEquals("3D 01 F0") { + stepInEExp(0x3D, usingLengthPrefix = false, dummyMacro(nArgs = 1)) + stepInExpressionGroup(usingLengthPrefix = false) + stepOut() + stepOut() + } + } + + @Test + fun `write a delimited, tagged expression group with one value`() { + assertWriterOutputEquals("3D 01 60 F0") { + stepInEExp(0x3D, usingLengthPrefix = false, dummyMacro(nArgs = 1)) + stepInExpressionGroup(usingLengthPrefix = false) + writeInt(0) + stepOut() + stepOut() + } + } + + @Test + fun `write a delimited, tagged expression group with multiple values`() { + assertWriterOutputEquals("3D 01 60 61 01 61 02 F0") { + stepInEExp(0x3D, usingLengthPrefix = false, dummyMacro(nArgs = 1)) + stepInExpressionGroup(usingLengthPrefix = false) + writeInt(0) + writeInt(1) + continueExpressionGroup() // Should have no effect + writeInt(2) + stepOut() + stepOut() + } + } + + @Test + fun `write a tagless expression group with zero values`() { + // Empty expression group is elided to be void, so we just have `00` presence bitmap + assertWriterOutputEquals("3D 00") { + stepInEExp(0x3D, usingLengthPrefix = false, dummyMacro(nArgs = 1, variadicParam(ParameterEncoding.Uint8))) + stepInExpressionGroup(usingLengthPrefix = false) + stepOut() + stepOut() + } + } + + @Test + fun `write a tagless expression group with one value`() { + assertWriterOutputEquals("3D 02 03 1A 01") { + stepInEExp(0x3D, usingLengthPrefix = false, dummyMacro(nArgs = 1, variadicParam(ParameterEncoding.Uint8))) + stepInExpressionGroup(usingLengthPrefix = false) + writeInt(0x1A) + stepOut() + stepOut() + } + } + + @Test + fun `write a tagless expression group with multiple values`() { + assertWriterOutputEquals("3D 02 07 1A 2B 3C 01") { + stepInEExp(0x3D, usingLengthPrefix = false, dummyMacro(nArgs = 1, variadicParam(ParameterEncoding.Uint8))) + stepInExpressionGroup(usingLengthPrefix = false) + writeInt(0x1A) + writeInt(0x2B) + writeInt(0x3C) + stepOut() + stepOut() + } + } + + @Test + fun `write a tagless expression group with multiple segments using continueExpressionGroup()`() { + assertWriterOutputEquals( + """ + 3D 02 + 07 1A 2B 3C | 3 ints + 05 4D 5E | 2 more ints + 01 | End of expression group + """ + ) { + stepInEExp(0x3D, usingLengthPrefix = false, dummyMacro(nArgs = 1, variadicParam(ParameterEncoding.Uint8))) + stepInExpressionGroup(usingLengthPrefix = false) + writeInt(0x1A) + writeInt(0x2B) + writeInt(0x3C) + continueExpressionGroup() + writeInt(0x4D) + writeInt(0x5E) + stepOut() + stepOut() + } + } + + @Test + fun `calling stepOut() immediately after continueExpressionGroup() should be handled correctly`() { + assertWriterOutputEquals( + """ + 3D 02 + 07 1A 2B 3C | 3 ints + 01 | End of expression group + """ + ) { + stepInEExp(0x3D, usingLengthPrefix = false, dummyMacro(nArgs = 1, variadicParam(ParameterEncoding.Uint8))) + stepInExpressionGroup(usingLengthPrefix = false) + writeInt(0x1A) + writeInt(0x2B) + writeInt(0x3C) + continueExpressionGroup() + stepOut() + stepOut() + } + } + + @Test + fun `calling stepOut() with too many parameters in a length-prefixed e-expression throws IllegalArgumentException`() { + val rawWriter = ionWriter() + + assertThrows { + rawWriter.stepInEExp(64, usingLengthPrefix = true, dummyMacro(nArgs = 0)) + rawWriter.stepInEExp(SystemMacro.None) + rawWriter.stepOut() + rawWriter.stepOut() + } + } + + @Test + fun `calling continueExpressionGroup() has no effect when there are no expressions in the current segment`() { + assertWriterOutputEquals( + """ + 3D 02 + 05 1A 2B | 2 ints + 03 3C | 1 int + 01 | End of expression group + """ + ) { + stepInEExp(0x3D, usingLengthPrefix = false, dummyMacro(nArgs = 1, variadicParam(ParameterEncoding.Uint8))) + stepInExpressionGroup(usingLengthPrefix = false) + repeat(10) { continueExpressionGroup() } + writeInt(0x1A) + writeInt(0x2B) + repeat(10) { continueExpressionGroup() } + writeInt(0x3C) + stepOut() + stepOut() + } + } + + @Test + fun `calling continueExpressionGroup() throws an exception if not in an expression group`() { + assertWriterThrows { continueExpressionGroup() } + assertWriterThrows { writeList { continueExpressionGroup() } } + assertWriterThrows { writeSExp { continueExpressionGroup() } } + assertWriterThrows { writeStruct { continueExpressionGroup() } } + assertWriterThrows { + stepInEExp(0x3D, usingLengthPrefix = false, dummyMacro(nArgs = 1, variadicParam(ParameterEncoding.Uint8))) + continueExpressionGroup() + } + } + + @Test + fun `calling stepInExpressionGroup with an annotation should throw IonException`() { + assertWriterThrows { + stepInEExp(1, usingLengthPrefix = false, dummyMacro(nArgs = 1)) + writeAnnotations("foo") + stepInExpressionGroup(usingLengthPrefix = true) + } + } + + @Test + fun `calling stepInExpressionGroup while not directly in a Macro container should throw IonException`() { + assertWriterThrows { + stepInExpressionGroup(usingLengthPrefix = true) + } + assertWriterThrows { + stepInList(usingLengthPrefix = true) + stepInExpressionGroup(usingLengthPrefix = true) + } + assertWriterThrows { + stepInSExp(usingLengthPrefix = true) + stepInExpressionGroup(usingLengthPrefix = true) + } + assertWriterThrows { + stepInStruct(usingLengthPrefix = true) + stepInExpressionGroup(usingLengthPrefix = true) + } + assertWriterThrows { + stepInEExp(123, usingLengthPrefix = false, dummyMacro(nArgs = 1)) + stepInExpressionGroup(usingLengthPrefix = true) + stepInExpressionGroup(usingLengthPrefix = true) + } + } + + @ParameterizedTest + @CsvSource( + // These tests are intentionally limited. Full testing of int logic is in `IonEncoder_1_1Test` and `WriteBufferTest` + " Uint8, 0, 00", + " Uint8, 1, 01", + " Uint16, 0, 00 00", + " Uint16, 1, 01 00", + " Uint32, 0, 00 00 00 00", + " Uint32, 1, 01 00 00 00", + " Uint64, 0, 00 00 00 00 00 00 00 00", + " Uint64, 1, 01 00 00 00 00 00 00 00", + " FlexUint, 0, 01", + " FlexUint, 1, 03", + " Int8, 0, 00", + " Int8, 1, 01", + " Int8, -1, FF", + " Int16, 0, 00 00", + " Int16, 1, 01 00", + " Int16, -1, FF FF", + " Int32, 0, 00 00 00 00", + " Int32, 1, 01 00 00 00", + " Int32, -1, FF FF FF FF", + " Int64, 0, 00 00 00 00 00 00 00 00", + " Int64, 1, 01 00 00 00 00 00 00 00", + " Int64, -1, FF FF FF FF FF FF FF FF", + " FlexInt, 0, 01", + " FlexInt, 1, 03", + " FlexInt, -1, FF", + ) + fun `write a tagless int`(encoding: ParameterEncoding, value: Long, expectedBytes: String) { + val macro = dummyMacro(nArgs = 1, variadicParam(encoding)) + // Write the value as single expression + assertWriterOutputEquals("3D 01 $expectedBytes") { + stepInEExp(0x3D, usingLengthPrefix = false, macro) + writeInt(value) + stepOut() + } + // ...and again using writeInt(BigInteger) + assertWriterOutputEquals("3D 01 $expectedBytes") { + stepInEExp(0x3D, usingLengthPrefix = false, macro) + writeInt(value.toBigInteger()) + stepOut() + } + } + + @ParameterizedTest + @CsvSource( + // These tests are intentionally limited. Full testing of int logic is in `IonEncoder_1_1Test` and `WriteBufferTest` + // Primitive, Ints to write, expression group bytes + " Uint8, 0 1, 05 00 01 01", + " Uint16, 0 1, 09 00 00 01 00 01", + " Uint32, 0 1, 11 00 00 00 00 01 00 00 00 01", + " Uint64, 0 1, 21 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 01", + " FlexUint, 0 1 256, 09 01 03 02 04 01", + " Int8, -1 0 1, 07 FF 00 01 01", + " Int16, -1 0 1, 0D FF FF 00 00 01 00 01", + " Int32, -1 0 1, 19 FF FF FF FF 00 00 00 00 01 00 00 00 01", + " Int64, -1 0 1, 31 FF FF FF FF FF FF FF FF 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 01", + " FlexInt, -1 0 1 256, 0B FF 01 03 02 04 01", + ) + fun `write a tagless int in an expression group`(encoding: ParameterEncoding, values: String, expressionGroupBytes: String) { + val longValues = values.split(" ").map { it.toLong() } + val macro = dummyMacro(nArgs = 1, variadicParam(encoding)) + // Write the value in expression group + assertWriterOutputEquals("3D 02 $expressionGroupBytes") { + stepInEExp(0x3D, usingLengthPrefix = false, macro) + stepInExpressionGroup(usingLengthPrefix = false) + longValues.forEach { writeInt(it) } + stepOut() + stepOut() + } + // ...and again using writeInt(BigInteger) + assertWriterOutputEquals("3D 02 $expressionGroupBytes") { + stepInEExp(0x3D, usingLengthPrefix = false, macro) + stepInExpressionGroup(usingLengthPrefix = false) + longValues.forEach { writeInt(it.toBigInteger()) } + stepOut() + stepOut() + } + } + + @ParameterizedTest + @CsvSource( + " Uint8, ${UByte.MIN_VALUE}", + " Uint8, ${UByte.MAX_VALUE}", + " Uint16, ${UShort.MIN_VALUE}", + " Uint16, ${UShort.MAX_VALUE}", + " Uint32, ${UInt.MIN_VALUE}", + " Uint32, ${UInt.MAX_VALUE}", + " Uint64, ${ULong.MIN_VALUE}", + " Uint64, ${ULong.MAX_VALUE}", + " Int8, ${Byte.MIN_VALUE}", + " Int8, ${Byte.MAX_VALUE}", + " Int16, ${Short.MIN_VALUE}", + " Int16, ${Short.MAX_VALUE}", + " Int32, ${Int.MIN_VALUE}", + " Int32, ${Int.MAX_VALUE}", + " Int64, ${Long.MIN_VALUE}", + " Int64, ${Long.MAX_VALUE}", + " FlexUint, 0", + // There is no upper bound for FlexUInt, and no bounds at all for FlexInt + ) + fun `attempting to write a tagless int that is out of bounds for its encoding primitive should throw exception`( + encoding: ParameterEncoding, + // The min or max value of that particular parameter encoding. + goodValue: BigInteger, + ) { + val badValue = if (goodValue > BigInteger.ZERO) goodValue + BigInteger.ONE else goodValue - BigInteger.ONE + val macro = dummyMacro(nArgs = 2, variadicParam(encoding)) + assertWriterThrows { + stepInEExp(0x3D, usingLengthPrefix = false, macro) + writeInt(badValue) + stepOut() + } + + if (badValue.bitLength() < Long.SIZE_BITS) { + // If this bad value fits in a long, test it on the long API as well. + assertWriterThrows { + stepInEExp(0x3D, usingLengthPrefix = false, macro) + writeInt(badValue.longValueExact()) + stepOut() + } + } + } + + @Test + fun `attempting to write an int when another tagless type is expected should throw exception`() { + val macro = dummyMacro(nArgs = 1, variadicParam(ParameterEncoding.Float64)) + assertWriterThrows { + stepInEExp(0x3D, usingLengthPrefix = false, macro) + writeInt(0L) + stepOut() + } + assertWriterThrows { + stepInEExp(0x3D, usingLengthPrefix = false, macro) + writeInt(0.toBigInteger()) + stepOut() + } + } + + @ParameterizedTest + @CsvSource( + // These tests are intentionally limited. Full testing of float logic is in `IonEncoder_1_1Test` + // TODO: Float16 cases, once Float16 is supported + "Float32, 0.0, 00 00 00 00", + "Float32, 1.0, 00 00 80 3F", + "Float32, NaN, 00 00 C0 7F", + "Float32, Infinity, 00 00 80 7F", + "Float32, -Infinity, 00 00 80 FF", + "Float64, 0.0, 00 00 00 00 00 00 00 00", + "Float64, 1.0, 00 00 00 00 00 00 F0 3F", + "Float64, NaN, 00 00 00 00 00 00 F8 7F", + "Float64, Infinity, 00 00 00 00 00 00 F0 7F", + "Float64, -Infinity, 00 00 00 00 00 00 F0 FF", + ) + fun `write a tagless float`(encoding: ParameterEncoding, value: Float, expectedBytes: String) { + val macro = dummyMacro(nArgs = 1, variadicParam(encoding)) + // Write the value as single expression + assertWriterOutputEquals("3D 01 $expectedBytes") { + stepInEExp(0x3D, usingLengthPrefix = false, macro) + writeFloat(value) + stepOut() + } + // ...and again using writeFloat(Double) + assertWriterOutputEquals("3D 01 $expectedBytes") { + stepInEExp(0x3D, usingLengthPrefix = false, macro) + writeFloat(value.toDouble()) + stepOut() + } + } + + @OptIn(ExperimentalStdlibApi::class) + @ParameterizedTest + @CsvSource( + // These tests are intentionally limited. Full testing of float logic is in `IonEncoder_1_1Test` + // TODO: Float16 cases, once Float16 is supported + "Float32, 0.0, 00 00 00 00", + "Float32, 1.0, 00 00 80 3F", + "Float32, NaN, 00 00 C0 7F", + "Float32, Infinity, 00 00 80 7F", + "Float32, -Infinity, 00 00 80 FF", + "Float64, 0.0, 00 00 00 00 00 00 00 00", + "Float64, 1.0, 00 00 00 00 00 00 F0 3F", + "Float64, NaN, 00 00 00 00 00 00 F8 7F", + "Float64, Infinity, 00 00 00 00 00 00 F0 7F", + "Float64, -Infinity, 00 00 00 00 00 00 F0 FF", + ) + fun `write a tagless float in an expression group`(encoding: ParameterEncoding, value: Float, expectedBytes: String) { + val taglessTypeByteSize = when (encoding) { + ParameterEncoding.Float16 -> 2 + ParameterEncoding.Float32 -> 4 + ParameterEncoding.Float64 -> 8 + else -> TODO("Other types not supported in this test.") + } + val macro = dummyMacro(nArgs = 1, variadicParam(encoding)) + // For small numbers, we can use x*2+1 to calculate the FlexUInt encoding + val lengthByte = ((taglessTypeByteSize + taglessTypeByteSize) * 2 + 1).toByte().toHexString(HexFormat.UpperCase) + // Write the value twice in expression group + assertWriterOutputEquals("3D 02 $lengthByte $expectedBytes $expectedBytes 01") { + stepInEExp(0x3D, usingLengthPrefix = false, macro) + stepInExpressionGroup(usingLengthPrefix = false) + writeFloat(value) + writeFloat(value) + stepOut() + stepOut() + } + // ...and again using writeFloat(Double) + assertWriterOutputEquals("3D 02 $lengthByte $expectedBytes $expectedBytes 01") { + stepInEExp(0x3D, usingLengthPrefix = false, macro) + stepInExpressionGroup(usingLengthPrefix = false) + writeFloat(value.toDouble()) + writeFloat(value.toDouble()) + stepOut() + stepOut() + } + } + + @Test + fun `attempting to write a float when another tagless type is expected should throw exception`() { + val macro = dummyMacro(nArgs = 1, variadicParam(ParameterEncoding.Uint8)) + assertWriterThrows { + stepInEExp(0x3D, usingLengthPrefix = false, macro) + writeFloat(0.0) // double + stepOut() + } + assertWriterThrows { + stepInEExp(0x3D, usingLengthPrefix = false, macro) + writeFloat(0.0f) // float + stepOut() + } + } + + @OptIn(ExperimentalStdlibApi::class) + @ParameterizedTest + @CsvSource( + // SID + " 0, 01 60", + " 4, 09", + " 246, DA 03", + // Text + " a, FF 61", + " abc, FB 61 62 63", + " '', 01 81", + ) + fun `write a tagless symbol`(value: String, expectedBytes: String) { + val macro = dummyMacro(nArgs = 1, variadicParam(ParameterEncoding.FlexSym)) + // If it's an int, write as SID, else write as text + val writeTheValue: IonRawBinaryWriter_1_1.() -> Unit = value.toIntOrNull() + ?.let { { writeSymbol(it) } } + ?: { writeSymbol(value) } + // Write the value as single expression + assertWriterOutputEquals("3D 01 $expectedBytes") { + stepInEExp(0x3D, usingLengthPrefix = false, macro) + writeTheValue() + stepOut() + } + // Write the value twice in expression group + // For small numbers, we can use x*2+1 to calculate the FlexUInt encoding + // Also, it conveniently happens that once the white-space is removed, the number of characters is + // equal to the number of bytes to write the values twice. + val lengthByte = ((expectedBytes.replace(" ", "").length) * 2 + 1).toByte().toHexString(HexFormat.UpperCase) + assertWriterOutputEquals("3D 02 $lengthByte $expectedBytes $expectedBytes 01") { + stepInEExp(0x3D, usingLengthPrefix = false, macro) + stepInExpressionGroup(usingLengthPrefix = false) + writeTheValue() + writeTheValue() + stepOut() + stepOut() + } + } + + @Test + fun `attempting to write a symbol when another tagless type is expected should throw exception`() { + val macro = dummyMacro(nArgs = 1, variadicParam(ParameterEncoding.Uint8)) + assertWriterThrows { + stepInEExp(0x3D, usingLengthPrefix = false, macro) + writeSymbol(4) + stepOut() + } + assertWriterThrows { + stepInEExp(0x3D, usingLengthPrefix = false, macro) + writeSymbol("foo") + stepOut() + } + } + + @Test + fun `attempting to write a tagless value with annotations should throw exception`() { + assertWriterThrows { + stepInEExp(0x3D, usingLengthPrefix = false, dummyMacro(nArgs = 1, variadicParam(ParameterEncoding.Uint8))) + writeAnnotations("foo") + writeInt(0) + stepOut() + } + } + + /** + * Writes this Ion, taken from https://amazon-ion.github.io/ion-docs/ + * ``` + * { + * name: "Fido", + * age: years::4, + * birthday: 2012-03-01T, + * toys: [ball, rope], + * weight: pounds::41.2, + * buzz: {{VG8gaW5maW5pdHkuLi4gYW5kIGJleW9uZCE=}}, + * } + * ``` + */ + @Test + fun `write something complex`() { + assertWriterOutputEquals( + """ + E0 01 01 EA | IVM + E4 07 FD 63 | $3::{ // length=49 + 0F FB 5D | $7: [ // length=46 + 94 6E 61 6D 65 | "name", + 93 61 67 65 | "age", + 95 79 65 61 72 73 | "years", + 98 62 69 72 74 68 64 61 79 | "birthday", + 94 74 6F 79 73 | "toys", + 94 62 61 6C 6C | "ball", + 96 77 65 69 67 68 74 | "weight", + 94 62 75 7A 7A | "buzz", + | ] + | } + FD 85 | { // length=66 + 15 94 46 69 64 6F | $10: "Fido", + 17 E4 19 61 04 | $11: $12::4, + 1B 82 AA 09 | $13: 2012-03-01T + 1D B7 | $14: [ // length=7 + E1 0F | $15, + A4 72 6F 70 65 | rope + | ], + 21 | $16: + E7 F5 70 6F 75 6E 64 73 | pounds:: + 73 FF 9C 01 | 41.2 + 23 FE 35 | $17: {{ // length=26 + 54 6F 20 69 6E 66 69 6E 69 | VG8gaW5maW5p + 74 79 2E 2E 2E 20 61 6E 64 | dHkuLi4gYW5k + 20 62 65 79 6F 6E 64 21 | IGJleW9uZCE= + | }} + | } + """ + ) { + writeIVM() + writeAnnotations(3) + writeStruct { + writeFieldName(7) + writeList { + writeString("name") + writeString("age") + writeString("years") + writeString("birthday") + writeString("toys") + writeString("ball") + writeString("weight") + writeString("buzz") + } + } + writeStruct { + writeFieldName(10) + writeString("Fido") + writeFieldName(11) + writeAnnotations(12) + writeInt(4) + writeFieldName(13) + writeTimestamp(Timestamp.valueOf("2012-03-01T")) + writeFieldName(14) + writeList { + writeSymbol(15) + writeSymbol("rope") + } + writeFieldName(16) + writeAnnotations("pounds") + writeDecimal(BigDecimal.valueOf(41.2)) + writeFieldName(17) + writeBlob( + byteArrayOf( + 84, 111, 32, 105, 110, 102, 105, 110, 105, + 116, 121, 46, 46, 46, 32, 97, 110, 100, + 32, 98, 101, 121, 111, 110, 100, 33 + ) + ) + } + } + } + + /** + * Helper function that steps into a struct, applies the contents of [block] to + * the writer, and then steps out of the struct. + * Using this function makes it easy for the indentation of the writer code to + * match the indentation of the equivalent pretty-printed Ion. + */ + private inline fun IonRawWriter_1_1.writeStruct(block: IonRawWriter_1_1.() -> Unit) { + stepInStruct(usingLengthPrefix = true) + block() + stepOut() + } + + /** + * Helper function that steps into a list, applies the contents of [block] to + * the writer, and then steps out of the list. + * Using this function makes it easy for the indentation of the writer code to + * match the indentation of the equivalent pretty-printed Ion. + */ + private inline fun IonRawWriter_1_1.writeList(block: IonRawWriter_1_1.() -> Unit) { + stepInList(usingLengthPrefix = true) + block() + stepOut() + } + + /** + * Helper function that steps into a sexp, applies the contents of [block] to + * the writer, and then steps out of the sexp. + * Using this function makes it easy for the indentation of the writer code to + * match the indentation of the equivalent pretty-printed Ion. + */ + private inline fun IonRawWriter_1_1.writeSExp(block: IonRawWriter_1_1.() -> Unit) { + stepInSExp(usingLengthPrefix = true) + block() + stepOut() + } + + /** + * Helper function that creates a dummy macro with the given number of arguments. + */ + private fun dummyMacro(nArgs: Int, param: Parameter = Parameter("arg", ParameterEncoding.Tagged, ParameterCardinality.ExactlyOne)) = + TemplateMacro(List(nArgs) { param.copy("arg_$it") }, listOf()) + + private fun variadicParam(encoding: ParameterEncoding) = Parameter("arg", encoding, ParameterCardinality.ZeroOrMore) +} diff --git a/src/test/java/com/amazon/ion/impl/bin/PresenceBitmapTest.kt b/src/test/java/com/amazon/ion/impl/bin/PresenceBitmapTest.kt new file mode 100644 index 0000000000..9c1e4d9de5 --- /dev/null +++ b/src/test/java/com/amazon/ion/impl/bin/PresenceBitmapTest.kt @@ -0,0 +1,333 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.bin + +import com.amazon.ion.IonException +import com.amazon.ion.TestUtils.* +import com.amazon.ion.impl.macro.Macro.* +import java.io.ByteArrayOutputStream +import org.junit.Test +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.CsvSource + +class PresenceBitmapTest { + + companion object { + val taggedZeroToMany = Parameter("a", ParameterEncoding.Tagged, ParameterCardinality.ZeroOrMore) + val taggedExactlyOne = Parameter("b", ParameterEncoding.Tagged, ParameterCardinality.ExactlyOne) + val taggedZeroOrOne = Parameter("c", ParameterEncoding.Tagged, ParameterCardinality.ZeroOrOne) + val taggedOneToMany = Parameter("d", ParameterEncoding.Tagged, ParameterCardinality.OneOrMore) + val taglessZeroToMany = Parameter("e", ParameterEncoding.Uint8, ParameterCardinality.ZeroOrMore) + val taglessExactlyOne = Parameter("f", ParameterEncoding.Uint8, ParameterCardinality.ExactlyOne) + val taglessZeroOrOne = Parameter("g", ParameterEncoding.Uint8, ParameterCardinality.ZeroOrOne) + val taglessOneToMany = Parameter("h", ParameterEncoding.Uint8, ParameterCardinality.OneOrMore) + } + + @Test + fun `initialize should ensure that values are cleared`() { + val signature = listOf( + taggedExactlyOne, taggedZeroToMany, taggedZeroOrOne, taggedOneToMany, + taglessExactlyOne, taglessZeroToMany, taglessZeroOrOne, taglessOneToMany, + ) + val pb = PresenceBitmap() + pb.initialize(signature) + for (i in 0..7) pb[i] = PresenceBitmap.EXPRESSION + pb.initialize(signature) + for (i in 0..7) assertEquals(PresenceBitmap.VOID, pb[i]) + assertThrows { pb[8] } + } + + @Test + fun `when initializing with a too-large signature, should throw exception`() { + val pb = PresenceBitmap() + val signature = List(PresenceBitmap.MAX_SUPPORTED_PARAMETERS + 1) { taggedZeroToMany } + assertThrows { pb.initialize(signature) } + } + + @Test + fun `when calling set with an invalid index, should throw exception`() { + val pb = PresenceBitmap() + val signature = listOf(taggedZeroOrOne, taggedOneToMany, taggedExactlyOne) + pb.initialize(signature) + assertThrows { pb.set(-1, PresenceBitmap.EXPRESSION) } + assertThrows { pb.set(3, PresenceBitmap.EXPRESSION) } + } + + @Test + fun `when calling get with an invalid index, should throw exception`() { + val pb = PresenceBitmap() + val signature = listOf(taggedZeroOrOne, taggedOneToMany, taggedExactlyOne) + pb.initialize(signature) + assertThrows { pb.get(-1) } + assertThrows { pb.get(3) } + } + + @Test + fun `when calling set, the presence bits value for that parameter is _not_ validated`() { + val signature = listOf(taggedZeroOrOne, taggedOneToMany, taggedExactlyOne) + with(PresenceBitmap()) { + initialize(signature) + // PresenceBits is an internal only class, so we rely on callers to do the correct thing. + // There should not be an exception thrown for any of these. + set(0, value = PresenceBitmap.GROUP) + set(1, value = PresenceBitmap.VOID) + set(2, value = PresenceBitmap.GROUP) + set(2, value = PresenceBitmap.VOID) + } + } + + @ParameterizedTest + @CsvSource( + // For some reason `Long.decode()` doesn't support binary, so + // we're just using decimal for the presence values here. + "ExactlyOne, 0, false", + "ExactlyOne, 1, true", + "ExactlyOne, 2, false", + "ExactlyOne, 3, false", + "ZeroOrMore, 0, true", + "ZeroOrMore, 1, true", + "ZeroOrMore, 2, true", + "ZeroOrMore, 3, false", + "ZeroOrOne, 0, true", + "ZeroOrOne, 1, true", + "ZeroOrOne, 2, false", + "ZeroOrOne, 3, false", + "OneOrMore, 0, false", + "OneOrMore, 1, true", + "OneOrMore, 2, true", + "OneOrMore, 3, false", + ) + fun `validate() correctly throws exception when presence bits are invalid for signature`(cardinality: ParameterCardinality, presenceValue: Long, isValid: Boolean) { + val signature = listOf(Parameter("a", ParameterEncoding.Uint8, cardinality)) + with(PresenceBitmap()) { + initialize(signature) + set(0, presenceValue) + if (isValid) { + validate() + } else { + assertThrows { validate() } + } + } + } + + @Test + fun `when all parameters are tagged and exactly-one, no presence bits are needed or written`() { + (0..128).forEach { n -> assertExpectedPresenceBitSizes(expectedByteSize = 0, signature = List(n) { taggedExactlyOne }) } + } + + @Test + fun `when all parameters are tagless and exactly-one, no presence bits are needed or written`() { + (0..128).forEach { n -> assertExpectedPresenceBitSizes(expectedByteSize = 0, signature = List(n) { taglessExactlyOne }) } + } + + @Test + fun `when all parameters are tagged and not exactly-one, should write expected number of presence bits`() { + // Index of an element in this list is the number of parameters in the signature + listOf(0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3) + .forEachIndexed { numParameters, expectedByteSize -> + assertExpectedPresenceBitSizes(expectedByteSize, signature = List(numParameters) { taggedZeroToMany }) + } + } + + @Test + fun `when all parameters are tagless and not exactly-one, should write expected number of presence bits`() { + // Index of an element in this list is the number of parameters in the signature + listOf(0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3) + .forEachIndexed { numParameters, expectedByteSize -> + assertExpectedPresenceBitSizes(expectedByteSize, signature = List(numParameters) { taglessZeroToMany }) + } + } + + private fun assertExpectedPresenceBitSizes(expectedByteSize: Int, signature: List) { + val result = writePresenceBits { pb -> + pb.initialize(signature) + assertEquals(expectedByteSize, pb.byteSize) + assertEquals(signature.size, pb.totalParameterCount) + } + assertEquals(expectedByteSize, result.size) + } + + @Test + fun `read 4 parameters`() { + val signature = listOf(taggedZeroToMany, taggedZeroOrOne, taggedExactlyOne, taggedZeroToMany) + // Bits are read in pairs from right to left + // There are only three pairs of presence bits because "exactly-one" parameters do not get presence bits. + val bytes = bitStringToByteArray("100010") + + val pb = PresenceBitmap() + pb.initialize(signature) + pb.readFrom(bytes, 0) + + assertEquals(PresenceBitmap.GROUP, pb[0]) + assertEquals(PresenceBitmap.VOID, pb[1]) + // Should automatically populate the value for the exactly-one parameter + assertEquals(PresenceBitmap.EXPRESSION, pb[2]) + assertEquals(PresenceBitmap.GROUP, pb[3]) + } + + @Test + fun `write 4 parameters`() { + val signature = listOf(taggedZeroToMany, taggedZeroOrOne, taggedExactlyOne, taggedZeroToMany) + + val result = writePresenceBits { pb -> + pb.initialize(signature) + pb[0] = PresenceBitmap.EXPRESSION + pb[1] = PresenceBitmap.GROUP + pb[2] = PresenceBitmap.EXPRESSION + pb[3] = PresenceBitmap.GROUP + } + + assertEquals("00101001", result.toBitString()) + } + + @Test + fun `write presence bitmap`() { + // Ensures that the bits are written in the correct order for all possible sizes + (PresenceBitmap.MAX_SUPPORTED_PARAMETERS downTo 0).forEach { signatureSize -> + val signature = List(signatureSize) { taglessZeroToMany } + (0 until signatureSize).forEach { i -> + val parameterPresences = List(signatureSize) { j -> if (i == j) PresenceBitmap.GROUP else PresenceBitmap.EXPRESSION } + val expected = createBitStringFromParameterPresences(parameterPresences) + val actual = writePresenceBits { pb -> + pb.initialize(signature) + parameterPresences.forEachIndexed(pb::set) + } + assertEquals(expected, actual.toBitString()) + } + } + } + + @Test + fun `read presence bitmap`() { + // Ensures that the bits are read using the correct order + (PresenceBitmap.MAX_SUPPORTED_PARAMETERS downTo 0).forEach { signatureSize -> + val signature = List(signatureSize) { taglessZeroToMany } + (0 until signatureSize).forEach { i -> + val parameterPresences = List(signatureSize) { j -> if (i == j) PresenceBitmap.GROUP else PresenceBitmap.EXPRESSION } + val inputBits = bitStringToByteArray(createBitStringFromParameterPresences(parameterPresences)) + + val pb = PresenceBitmap() + pb.initialize(signature) + pb.readFrom(inputBits, 0) + + parameterPresences.forEachIndexed { l, expected -> assertEquals(expected, pb[l]) } + } + } + } + + @Test + fun `write presence bitmap with a required parameter`() { + // Ensures that the bits are read using the correct order + (PresenceBitmap.MAX_SUPPORTED_PARAMETERS downTo 0).forEach { signatureSize -> + (0 until signatureSize).forEach { i -> + val signature = List(signatureSize) { j -> if (j == i) taglessExactlyOne else taglessZeroToMany } + val parameterPresences = List(signatureSize) { j -> + when { + j < i -> PresenceBitmap.RESERVED + j == i -> PresenceBitmap.EXPRESSION + j > i -> PresenceBitmap.GROUP + else -> TODO("Unreachable") + } + } + val expected = createBitStringFromParameterPresences(parameterPresences.filter { it != PresenceBitmap.EXPRESSION }) + + val actual = writePresenceBits { pb -> + pb.initialize(signature) + parameterPresences.forEachIndexed(pb::set) + } + + assertEquals(expected, actual.toBitString()) + } + } + } + + @Test + fun `read presence bitmap with a required parameter`() { + // Ensures that the bits are read using the correct order + (PresenceBitmap.MAX_SUPPORTED_PARAMETERS downTo 0).forEach { signatureSize -> + (0 until signatureSize).forEach { i -> + val signature = List(signatureSize) { j -> if (j == i) taglessExactlyOne else taglessZeroToMany } + val parameterPresences = List(signatureSize) { j -> + when { + j < i -> PresenceBitmap.RESERVED + j == i -> PresenceBitmap.EXPRESSION + j > i -> PresenceBitmap.GROUP + else -> TODO("Unreachable") + } + } + val inputBitString = createBitStringFromParameterPresences(parameterPresences.filter { it != PresenceBitmap.EXPRESSION }) + val inputBits = bitStringToByteArray(inputBitString) + + val pb = PresenceBitmap() + pb.initialize(signature) + pb.readFrom(inputBits, 0) + + parameterPresences.forEachIndexed { l, expected -> assertEquals(expected, pb[l]) } + } + } + } + + private fun writePresenceBits(action: (PresenceBitmap) -> Unit): ByteArray { + val pb = PresenceBitmap() + action(pb) + val buffer = WriteBuffer(BlockAllocatorProviders.basicProvider().vendAllocator(32)) {} + buffer.reserve(pb.byteSize) + pb.writeTo(buffer, 0) + return buffer.toByteArray() + } + + private fun WriteBuffer.toByteArray() = ByteArrayOutputStream().also { writeTo(it) }.toByteArray() + private fun ByteArray.toBitString(): String = byteArrayToBitString(this) + + @ParameterizedTest + @CsvSource( + " '', '' ", + " 0, 00000000", + " 0 0, 00000000", + " 0 0 0, 00000000", + " 0 0 0 0, 00000000", + "0 0 0 0 0, 00000000 00000000", + "1 0 0 0 0, 00000001 00000000", + "0 1 0 0 0, 00000100 00000000", + "0 0 1 0 0, 00010000 00000000", + "0 0 0 1 0, 01000000 00000000", + "0 0 0 0 1, 00000000 00000001", + "2 0 0 0 0, 00000010 00000000", + "0 2 0 0 0, 00001000 00000000", + "0 0 2 0 0, 00100000 00000000", + "0 0 0 2 0, 10000000 00000000", + "0 0 0 0 2, 00000000 00000010", + ) + fun testCreateBitStringFromParameterPresences(presences: String, expectedBitString: String) { + val presenceList = presences.takeIf { it.isNotBlank() }?.split(" ")?.map { it.toLong() } ?: emptyList() + assertEquals(expectedBitString, createBitStringFromParameterPresences(presenceList)) + } + + /** + * The purpose of this utility function is to create a bit string containing a whole number + * of little endian bytes that represents a list of presence bit pairs. + */ + private fun createBitStringFromParameterPresences(parameterPresences: List): String { + val sb = StringBuilder() + // Calculate the number of bit-pairs needed to have a whole number of bytes. + val n = (((parameterPresences.size + 3) / 4) * 4) + for (i in 0 until n) { + // Calculate the little-endian position + val ii = i - 2 * (i % 4) + 3 + // If `getOrNull` returns null, we've gone past the end of the presence values, so pad with zeros + val parameterPresence = parameterPresences.getOrNull(ii) ?: 0 + val bits = when (parameterPresence) { + 0L -> "00" + 1L -> "01" + 2L -> "10" + 3L -> "11" + else -> TODO("Unreachable") + } + sb.append(bits) + if (i % 4 == 3) sb.append(' ') + } + return sb.toString().trim() + } +} diff --git a/src/test/java/com/amazon/ion/impl/bin/WriteBufferTest.java b/src/test/java/com/amazon/ion/impl/bin/WriteBufferTest.java index e71c4fb7c4..36af9f8521 100644 --- a/src/test/java/com/amazon/ion/impl/bin/WriteBufferTest.java +++ b/src/test/java/com/amazon/ion/impl/bin/WriteBufferTest.java @@ -1,36 +1,22 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.impl.bin; import static com.amazon.ion.TestUtils.hexDump; import static com.amazon.ion.impl.bin.WriteBuffer.varUIntLength; import static com.amazon.ion.impl.bin.WriteBuffer.writeVarUIntTo; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.math.BigInteger; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.concurrent.atomic.AtomicBoolean; +import com.amazon.ion.impl.bin.utf8.Utf8StringEncoder; + import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; @@ -79,11 +65,17 @@ private void assertBuffer(final byte[] expected) final byte[] actual = bytes(); assertArrayEquals( - "Bytes don't match!\nEXPECTED:\n" + hexDump(expected) + "\nACTUAL:\n" + hexDump(actual) + "\n", - expected, actual + expected, actual, + "Bytes don't match!\nEXPECTED:\n" + hexDump(expected) + "\nACTUAL:\n" + hexDump(actual) + "\n" ); } + @Test + public void testConstructorThrowsWhenBlockSizeTooSmall() { + BlockAllocator ba = BlockAllocatorProviders.basicProvider().vendAllocator(9); + assertThrows(IllegalArgumentException.class, () -> new WriteBuffer(ba, () -> {})); + } + @Test public void testInt8Positive() { @@ -944,9 +936,25 @@ public void testBytes() throws IOException @Test public void testTruncate() throws IOException { - buf.writeBytes("ARGLEFOOBARGLEDOO".getBytes("UTF-8")); + buf.writeBytes("ARGLE".getBytes("UTF-8")); buf.truncate(3); + // Check that the expected bytes are present assertBuffer("ARG".getBytes("UTF-8")); + // ...and check that we can resume writing without any issues + buf.writeBytes("LEFOOBARGLEDOO".getBytes("UTF-8")); + assertBuffer("ARGLEFOOBARGLEDOO".getBytes("UTF-8")); + } + + @Test + public void testTruncateAcrossBlocks() throws IOException + { + buf.writeBytes("ABCDEFGHIJKLMNOPQRSTUVWXYZ".getBytes("UTF-8")); + buf.truncate(3); + // Check that the expected bytes are present + assertBuffer("ABC".getBytes("UTF-8")); + // ...and check that we can resume writing without any issues + buf.writeBytes("DEFGHIJKLMNOPQRSTUVWXYZ".getBytes("UTF-8")); + assertBuffer("ABCDEFGHIJKLMNOPQRSTUVWXYZ".getBytes("UTF-8")); } @Test @@ -1121,6 +1129,32 @@ public void shiftBytesLeftWithLengthZeroAcrossBlocks() { assertBuffer("0123456789".getBytes()); } + @Test + public void reserveShouldSkipTheRequestedNumberOfBytes() { + buf.reserve(5); + buf.writeBytes("A".getBytes()); + // WARNING: In testing, the reserved bytes do happen to be 0, but you cannot assume that is true in the general case. + assertBuffer("\0\0\0\0\0A".getBytes()); + } + + @Test + public void reserveShouldSkipTheRequestedNumberOfBytesAcrossOneBlock() { + assertEquals(11, ALLOCATOR.getBlockSize()); + buf.reserve(15); + buf.writeBytes("A".getBytes()); + // WARNING: In testing, the reserved bytes do happen to be 0, but you cannot assume that is true in the general case. + assertBuffer("\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0A".getBytes()); + } + + @Test + public void reserveShouldSkipTheRequestedNumberOfBytesAcrossManyBlock() { + assertEquals(11, ALLOCATOR.getBlockSize()); + buf.reserve(40); + buf.writeBytes("A".getBytes()); + // WARNING: In testing, the reserved bytes do happen to be 0, but you cannot assume that is true in the general case. + assertBuffer("\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0A".getBytes()); + } + /** * Test if the method 'writeVarUIntTo' writes the expected bytes to the output stream. * @throws Exception if there is an error occurred while writing data to the output stream. @@ -1386,6 +1420,25 @@ public void testWriteFlexInt(long value, String expectedBits) { Assertions.assertEquals((expectedBits.length() + 1)/9, numBytes); } + @Test + public void testWriteFlexIntAcrossBlocks() { + long value = Long.MIN_VALUE; + String expectedNumberBits = "00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 11111110"; + + for (int i = 0; i < ALLOCATOR.getBlockSize(); i++) { + buf.reset(); + StringBuilder expectedBits = new StringBuilder(); + for (int j = 0; j < i; j++) { + buf.writeByte((byte) 0x55); + expectedBits.append("01010101 "); + } + expectedBits.append(expectedNumberBits); + buf.writeFlexInt(value); + String actualBits = byteArrayToBitString(bytes()); + Assertions.assertEquals(expectedBits.toString(), actualBits); + } + } + @ParameterizedTest @CsvSource({ " 0, 00000001", @@ -1424,6 +1477,25 @@ public void testWriteFlexUInt(long value, String expectedBits) { Assertions.assertEquals((expectedBits.length() + 1)/9, numBytes); } + @Test + public void testWriteFlexUIntAcrossBlocks() { + long value = Long.MAX_VALUE; + String expectedNumberBits = "00000000 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111111"; + + for (int i = 0; i < ALLOCATOR.getBlockSize(); i++) { + buf.reset(); + StringBuilder expectedBits = new StringBuilder(); + for (int j = 0; j < i; j++) { + buf.writeByte((byte) 0x55); + expectedBits.append("01010101 "); + } + expectedBits.append(expectedNumberBits); + buf.writeFlexUInt(value); + String actualBits = byteArrayToBitString(bytes()); + Assertions.assertEquals(expectedBits.toString(), actualBits); + } + } + @Test public void testWriteFlexUIntForNegativeNumber() { Assertions.assertThrows(IllegalArgumentException.class, () -> buf.writeFlexUInt(-1)); @@ -1486,7 +1558,6 @@ public void testWriteFlexUIntForNegativeNumber() { // Long.MIN_VALUE "-9223372036854775808, 00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 11111110", "-9223372036854775809, 00000000 11111110 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111101", - }) public void testWriteFlexIntForBigInteger(String value, String expectedBits) { int numBytes = buf.writeFlexInt(new BigInteger(value)); @@ -1495,6 +1566,25 @@ public void testWriteFlexIntForBigInteger(String value, String expectedBits) { Assertions.assertEquals((expectedBits.length() + 1)/9, numBytes); } + @Test + public void testWriteFlexIntForBigIntegerAcrossBlocks() { + BigInteger value = new BigInteger("-9223372036854775809"); + String expectedNumberBits = "00000000 11111110 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111101"; + + for (int i = 0; i < ALLOCATOR.getBlockSize(); i++) { + buf.reset(); + StringBuilder expectedBits = new StringBuilder(); + for (int j = 0; j < i; j++) { + buf.writeByte((byte) 0x55); + expectedBits.append("01010101 "); + } + expectedBits.append(expectedNumberBits); + buf.writeFlexInt(value); + String actualBits = byteArrayToBitString(bytes()); + Assertions.assertEquals(expectedBits.toString(), actualBits); + } + } + @ParameterizedTest @CsvSource({ " 0, 00000001", @@ -1534,6 +1624,25 @@ public void testWriteFlexUIntForBigInteger(String value, String expectedBits) { Assertions.assertEquals((expectedBits.length() + 1)/9, numBytes); } + @Test + public void testWriteFlexUIntForBigIntegerAcrossBlocks() { + BigInteger value = new BigInteger("9223372036854775808"); + String expectedNumberBits = "00000000 00000010 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000010"; + + for (int i = 0; i < ALLOCATOR.getBlockSize(); i++) { + buf.reset(); + StringBuilder expectedBits = new StringBuilder(); + for (int j = 0; j < i; j++) { + buf.writeByte((byte) 0x55); + expectedBits.append("01010101 "); + } + expectedBits.append(expectedNumberBits); + buf.writeFlexUInt(value); + String actualBits = byteArrayToBitString(bytes()); + Assertions.assertEquals(expectedBits.toString(), actualBits); + } + } + @Test public void testWriteFlexUIntForNegativeBigInteger() { Assertions.assertThrows(IllegalArgumentException.class, () -> buf.writeFlexUInt(BigInteger.ONE.negate())); @@ -1667,6 +1776,42 @@ public void testWriteFixedIntOrUIntThrowsExceptionWhenNumBytesIsOutOfBounds() { Assertions.assertThrows(IllegalArgumentException.class, () -> buf.writeFixedIntOrUInt(0, 9)); } + @ParameterizedTest + @CsvSource({ + " 0, 00000001 01100000", + " 1, 00000011", + " 2, 00000101", + "63, 01111111", + "64, 00000010 00000001", + }) + public void testWriteSidFlexSym(int value, String expectedBits) { + int numBytes = buf.writeFlexSym(value); + String actualBits = byteArrayToBitString(bytes()); + Assertions.assertEquals(expectedBits, actualBits); + Assertions.assertEquals((expectedBits.length() + 1)/9, numBytes); + } + + @ParameterizedTest + @CsvSource({ + "'', 00000001 10000001", // 10000001 == SystemSymbols_1_1.EMPTY_TEXT.getId() converted to binary + "a, 11111111 01100001", + "abc, 11111011 01100001 01100010 01100011", + "this is a very very very very very long symbol, " + + "10100101 01110100 01101000 01101001 01110011 00100000 01101001 01110011 00100000 01100001 00100000 " + + "01110110 01100101 01110010 01111001 00100000 01110110 01100101 01110010 01111001 00100000 01110110 " + + "01100101 01110010 01111001 00100000 01110110 01100101 01110010 01111001 00100000 01110110 01100101 " + + "01110010 01111001 00100000 01101100 01101111 01101110 01100111 00100000 01110011 01111001 01101101 " + + "01100010 01101111 01101100", + }) + public void testWriteTextFlexSym(String value, String expectedBits) { + // This is a sloppy way to construct a Result, but it works for this test because we only have ascii characters. + Utf8StringEncoder.Result encoded = new Utf8StringEncoder.Result(value.length(), value.getBytes(StandardCharsets.US_ASCII)); + int numBytes = buf.writeFlexSym(encoded); + String actualBits = byteArrayToBitString(bytes()); + Assertions.assertEquals(expectedBits, actualBits); + Assertions.assertEquals((expectedBits.length() + 1)/9, numBytes); + } + /** * Converts a byte array to a string of bits, such as "00110110 10001001". * The purpose of this method is to make it easier to read and write test assertions. diff --git a/src/test/java/com/amazon/ion/impl/macro/MacroCompilerTest.kt b/src/test/java/com/amazon/ion/impl/macro/MacroCompilerTest.kt new file mode 100644 index 0000000000..59a46a5826 --- /dev/null +++ b/src/test/java/com/amazon/ion/impl/macro/MacroCompilerTest.kt @@ -0,0 +1,318 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +import com.amazon.ion.* +import com.amazon.ion.impl.* +import com.amazon.ion.impl.macro.Expression.* +import com.amazon.ion.impl.macro.Macro.* +import com.amazon.ion.impl.macro.Macro.ParameterEncoding.* +import com.amazon.ion.impl.macro.MacroRef.* +import com.amazon.ion.system.IonReaderBuilder +import com.amazon.ion.system.IonSystemBuilder +import java.math.BigDecimal +import java.math.BigInteger +import java.nio.charset.StandardCharsets +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertNull +import org.junit.jupiter.api.TestInstance +import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.Arguments +import org.junit.jupiter.params.provider.EnumSource +import org.junit.jupiter.params.provider.MethodSource + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class MacroCompilerTest { + + val ion: IonSystem = IonSystemBuilder.standard().build() + + private val fakeMacroTable: (MacroRef) -> Macro? = { + when (it) { + ById(12) -> SystemMacro.Values + ByName("values") -> SystemMacro.Values + else -> null + } + } + + private data class MacroSourceAndTemplate(val source: String, val template: TemplateMacro) : Arguments { + override fun get(): Array = arrayOf(source, template.signature, template.body) + } + + private fun annotations(vararg a: String): List = a.map { FakeSymbolToken(it, -1) } + + private infix fun String.shouldCompileTo(macro: TemplateMacro) = MacroSourceAndTemplate(this, macro) + + private fun testCases() = listOf( + "(macro identity (x) (%x))" shouldCompileTo TemplateMacro( + listOf(Parameter("x", Tagged, ParameterCardinality.ExactlyOne)), + listOf(VariableRef(0)), + ), + "(macro identity (any::x) (%x))" shouldCompileTo TemplateMacro( + listOf(Parameter("x", Tagged, ParameterCardinality.ExactlyOne)), + listOf(VariableRef(0)), + ), + "(macro pi () 3.141592653589793)" shouldCompileTo TemplateMacro( + signature = emptyList(), + body = listOf(DecimalValue(emptyList(), BigDecimal("3.141592653589793"))) + ), + "(macro cardinality_test (x?) (%x))" shouldCompileTo TemplateMacro( + signature = listOf(Parameter("x", Tagged, ParameterCardinality.ZeroOrOne)), + body = listOf(VariableRef(0)) + ), + "(macro cardinality_test (x!) (%x))" shouldCompileTo TemplateMacro( + signature = listOf(Parameter("x", Tagged, ParameterCardinality.ExactlyOne)), + body = listOf(VariableRef(0)) + ), + "(macro cardinality_test (x+) (%x))" shouldCompileTo TemplateMacro( + signature = listOf(Parameter("x", Tagged, ParameterCardinality.OneOrMore)), + body = listOf(VariableRef(0)) + ), + "(macro cardinality_test (x*) (%x))" shouldCompileTo TemplateMacro( + signature = listOf(Parameter("x", Tagged, ParameterCardinality.ZeroOrMore)), + body = listOf(VariableRef(0)) + ), + // Outer '.values' call allows multiple expressions in the body + // The second `.values` is a macro call that has a single argument: the variable `x` + // The third `(values x)` is an uninterpreted s-expression. + """(macro literal_test (x) (.values (.values (%x)) (values x)))""" shouldCompileTo TemplateMacro( + signature = listOf(Parameter("x", Tagged, ParameterCardinality.ExactlyOne)), + body = listOf( + MacroInvocation(SystemMacro.Values, selfIndex = 0, endExclusive = 6), + MacroInvocation(SystemMacro.Values, selfIndex = 1, endExclusive = 3), + VariableRef(0), + SExpValue(emptyList(), selfIndex = 3, endExclusive = 6), + SymbolValue(emptyList(), FakeSymbolToken("values", -1)), + SymbolValue(emptyList(), FakeSymbolToken("x", -1)), + ), + ), + "(macro each_type () (.values null true 1 ${"9".repeat(50)} 1e0 1d0 2024-01-16T \"foo\" bar [] () {} {{}} {{\"\"}} ))" shouldCompileTo TemplateMacro( + signature = emptyList(), + body = listOf( + MacroInvocation(SystemMacro.Values, 0, 15), + NullValue(emptyList(), IonType.NULL), + BoolValue(emptyList(), true), + LongIntValue(emptyList(), 1), + BigIntValue(emptyList(), BigInteger("9".repeat(50))), + FloatValue(emptyList(), 1.0), + DecimalValue(emptyList(), Decimal.ONE), + TimestampValue(emptyList(), Timestamp.valueOf("2024-01-16T")), + StringValue(emptyList(), "foo"), + SymbolValue(emptyList(), FakeSymbolToken("bar", -1)), + ListValue(emptyList(), selfIndex = 10, endExclusive = 11), + SExpValue(emptyList(), selfIndex = 11, endExclusive = 12), + StructValue(emptyList(), selfIndex = 12, endExclusive = 13, templateStructIndex = emptyMap()), + BlobValue(emptyList(), ByteArray(0)), + ClobValue(emptyList(), ByteArray(0)) + ) + ), + """(macro foo () (.values 42 "hello" false))""" shouldCompileTo TemplateMacro( + signature = emptyList(), + body = listOf( + MacroInvocation(SystemMacro.Values, selfIndex = 0, endExclusive = 4), + LongIntValue(emptyList(), 42), + StringValue(emptyList(), "hello"), + BoolValue(emptyList(), false), + ) + ), + """(macro using_expr_group () (.values (.. 42 "hello" false)))""" shouldCompileTo TemplateMacro( + signature = emptyList(), + body = listOf( + MacroInvocation(SystemMacro.Values, selfIndex = 0, endExclusive = 5), + ExpressionGroup(selfIndex = 1, endExclusive = 5), + LongIntValue(emptyList(), 42), + StringValue(emptyList(), "hello"), + BoolValue(emptyList(), false), + ) + ), + """(macro invoke_by_id () (.12 true false))""" shouldCompileTo TemplateMacro( + signature = emptyList(), + body = listOf( + MacroInvocation(SystemMacro.Values, selfIndex = 0, endExclusive = 3), + BoolValue(emptyList(), true), + BoolValue(emptyList(), false), + ) + ), + "(macro null () \"abc\")" shouldCompileTo TemplateMacro( + signature = emptyList(), + body = listOf(StringValue(emptyList(), "abc")) + ), + "(macro foo (x y z) [100, [200, a::b::300], (%x), {y: [true, false, (%z)]}])" shouldCompileTo TemplateMacro( + signature = listOf( + Parameter("x", Tagged, ParameterCardinality.ExactlyOne), + Parameter("y", Tagged, ParameterCardinality.ExactlyOne), + Parameter("z", Tagged, ParameterCardinality.ExactlyOne) + ), + body = listOf( + ListValue(emptyList(), selfIndex = 0, endExclusive = 12), + LongIntValue(emptyList(), 100), + ListValue(emptyList(), selfIndex = 2, endExclusive = 5), + LongIntValue(emptyList(), 200), + LongIntValue(annotations("a", "b"), 300), + VariableRef(0), + StructValue(emptyList(), selfIndex = 6, endExclusive = 12, templateStructIndex = mapOf("y" to listOf(8))), + FieldName(FakeSymbolToken("y", -1)), + ListValue(emptyList(), selfIndex = 8, endExclusive = 12), + BoolValue(emptyList(), true), + BoolValue(emptyList(), false), + VariableRef(2), + ) + ) + ) + + enum class ReaderType { + ION_READER { + override fun newMacroCompiler(reader: IonReader, macros: ((MacroRef) -> Macro?)): MacroCompiler { + return MacroCompiler(macros, ReaderAdapterIonReader(reader)) + } + }, + CONTINUABLE { + override fun newMacroCompiler(reader: IonReader, macros: ((MacroRef) -> Macro?)): MacroCompiler { + return MacroCompiler(macros, ReaderAdapterContinuable(reader as IonReaderContinuableCore)) + } + }; + + internal abstract fun newMacroCompiler(reader: IonReader, macros: ((MacroRef) -> Macro?)): MacroCompiler + } + + private fun newReader(source: String): IonReader { + // TODO these tests should be parameterized to exercise both text and binary input. + return IonReaderBuilder.standard().build(TestUtils.ensureBinary(ion, source.toByteArray(StandardCharsets.UTF_8))) + } + + private fun assertMacroCompilation(readerType: ReaderType, source: String, signature: List, body: List) { + val reader = newReader(source) + val compiler = readerType.newMacroCompiler(reader, fakeMacroTable) + reader.next() + val macroDef = compiler.compileMacro() + val expectedDef = TemplateMacro(signature, body) + assertEquals(expectedDef, macroDef) + } + + @ParameterizedTest(name = "{0}") + @MethodSource("testCases") + fun assertMacroCompilationContinuable(source: String, signature: List, body: List) { + assertMacroCompilation(ReaderType.CONTINUABLE, source, signature, body) + } + + @ParameterizedTest(name = "{0}") + @MethodSource("testCases") + fun assertMacroCompilationIonReader(source: String, signature: List, body: List) { + assertMacroCompilation(ReaderType.ION_READER, source, signature, body) + } + + @ParameterizedTest + @EnumSource(ReaderType::class) + fun `test reading a list of macros`(readerType: ReaderType) { + // This test case is essentially the same as the last one, except that it puts all the macro definitions into + // one Ion list, and then compiles them sequentially from that list. + // If this test fails, do not bother trying to fix it until all cases in the parameterized test are passing. + val source = "[${testCases().joinToString(",") { it.source }}]" + val templates = testCases().map { it.template }.iterator() + + val reader = newReader(source) + val compiler = readerType.newMacroCompiler(reader, fakeMacroTable) + // Advance and step into list + reader.next(); reader.stepIn() + while (reader.next() != null) { + val macroDef = compiler.compileMacro() + val expectedDef = templates.next() + assertEquals(expectedDef, macroDef) + } + reader.stepOut() + reader.close() + } + + @ParameterizedTest + @EnumSource(ReaderType::class) + fun `macro compiler should return the correct name`(readerType: ReaderType) { + val reader = newReader( + """ + (macro foo (x) 1) + (macro bar (y) 2) + (macro null (z) 3) + """ + ) + val compiler = readerType.newMacroCompiler(reader, fakeMacroTable) + assertNull(compiler.macroName) + reader.next() + compiler.compileMacro() + assertEquals("foo", compiler.macroName) + reader.next() + compiler.compileMacro() + assertEquals("bar", compiler.macroName) + reader.next() + compiler.compileMacro() + assertNull(compiler.macroName) + } + + // macro with invalid variable + // try compiling something that is not a sexp + // macro missing keyword + // macro has invalid name + // macro has annotations + + private fun badMacros() = listOf( + // There should be exactly one thing wrong in each of these samples. + + // Problems up to and including the macro name + "[macro, pi, (), 3.141592653589793]", // Macro def must be a sexp + "foo::(macro pi () 3.141592653589793)", // Macros cannot be annotated + """("macro" pi () 3.141592653589793)""", // 'macro' must be a symbol + "(pi () 3.141592653589793)", // doesn't start with 'macro' + "(macaroon pi () 3.141592653589793)", // doesn't start with 'macro' + "(macroeconomics pi () 3.141592653589793)", // will the demand for digits of pi ever match the supply? + "(macro pi::pi () 3.141592653589793)", // Illegal annotation on macro name + "(macro () 3.141592653589793)", // No macro name + "(macro 2.5 () 3.141592653589793)", // Macro name is not a symbol + """(macro "pi"() 3.141592653589793)""", // Macro name is not a symbol + "(macro \$0 () 3.141592653589793)", // Macro name must have known text + "(macro + () 123)", // Macro name cannot be an operator symbol + "(macro 'a.b' () 123)", // Macro name must be a symbol that can be unquoted (i.e. an identifier symbol) + "(macro 'false' () 123)", // Macro name must be a symbol that can be unquoted (i.e. an identifier symbol) + + // Problems in the signature + "(macro identity x x)", // Missing sexp around signature + "(macro identity [x] x)", // Using list instead of sexp for signature + "(macro identity any::(x) x)", // Signature sexp should not be annotated + "(macro identity (foo::x) x)", // Unknown type in signature + "(macro identity (x any::*) x)", // Annotation should be on parameter name, not the cardinality + "(macro identity (x! !) x)", // Dangling cardinality modifier + "(macro identity (x%) x)", // Not a real cardinality sigil + "(macro identity (x x) x)", // Repeated parameter name + """(macro identity ("x") x)""", // Parameter name must be a symbol, not a string + + // Problems in the body + "(macro empty ())", // No body expression + "(macro transform (x) (%y))", // Unknown variable + "(macro transform (x) foo::(%x))", // Variable expansion cannot be annotated + "(macro transform (x) (foo::%x))", // Variable expansion operator cannot be annotated + "(macro transform (x) (%foo::x))", // Variable name cannot be annotated + "(macro transform (x) foo::(.values x))", // Macro invocation cannot be annotated + "(macro transform (x) (foo::.values x))", // Macro invocation operator cannot be annotated + "(macro transform (x) (.))", // Macro invocation operator must be followed by macro reference + """(macro transform (x) (."values" x))""", // Macro invocation must start with a symbol or integer id + """(macro transform (x) (.values foo::(..)))""", // Expression group may not be annotated + """(macro transform (x) (.values (foo::..)))""", // Expression group operator may not be annotated + "(macro transform (x) 1 2)", // Template body must be one expression + ) + + @ParameterizedTest + @MethodSource("badMacros") + fun assertCompilationFailsContinuable(source: String) { + assertCompilationFails(ReaderType.CONTINUABLE, source) + } + + @ParameterizedTest + @MethodSource("badMacros") + fun assertCompilationFailsIonReader(source: String) { + assertCompilationFails(ReaderType.ION_READER, source) + } + + private fun assertCompilationFails(readerType: ReaderType, source: String) { + val reader = newReader(source) + reader.next() + val compiler = readerType.newMacroCompiler(reader, fakeMacroTable) + assertThrows { compiler.compileMacro() } + } +} diff --git a/src/test/java/com/amazon/ion/impl/macro/MacroEvaluatorTest.kt b/src/test/java/com/amazon/ion/impl/macro/MacroEvaluatorTest.kt new file mode 100644 index 0000000000..4e7c071120 --- /dev/null +++ b/src/test/java/com/amazon/ion/impl/macro/MacroEvaluatorTest.kt @@ -0,0 +1,1265 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.impl.macro + +import com.amazon.ion.* +import com.amazon.ion.impl.* +import com.amazon.ion.impl.SystemSymbols_1_1.* +import com.amazon.ion.impl._Private_Utils.newSymbolToken +import com.amazon.ion.impl.bin.IonManagedWriter_1_1_Test.Companion.ion +import com.amazon.ion.impl.macro.Expression.* +import com.amazon.ion.impl.macro.ExpressionBuilderDsl.Companion.eExpBody +import com.amazon.ion.impl.macro.ExpressionBuilderDsl.Companion.templateBody +import com.amazon.ion.impl.macro.SystemMacro.* +import com.amazon.ion.system.IonSystemBuilder +import java.math.BigDecimal +import java.math.BigInteger +import java.util.Base64 +import kotlin.contracts.ExperimentalContracts +import kotlin.contracts.contract +import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.Assertions.assertArrayEquals +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertNull +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.Arguments.arguments +import org.junit.jupiter.params.provider.MethodSource + +class MacroEvaluatorTest { + + val IDENTITY_MACRO = template("x!") { + variable(0) + } + + val PI_MACRO = template() { + float(3.14159) + } + + val FOO_STRUCT_MACRO = template("x*") { + struct { + fieldName("foo") + variable(0) + } + } + + val ABCs_LIST_MACRO = template { + list { + string("a") + string("b") + string("c") + } + } + + val evaluator = MacroEvaluator() + + @Test + fun `the 'none' system macro`() { + // Given: + // When: + // (:none) + // Then: + // + + evaluator.initExpansion { + eexp(None) {} + } + + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `the 'none' system macro, invoked in TDL`() { + // Given: + // (macro blackhole (any*) (.none)) + // When: + // (:blackhole "abc" 123 true) + // Then: + // + + val blackholeMacro = template("any*") { + macro(None) {} + } + + evaluator.initExpansion { + eexp(blackholeMacro) { + expressionGroup { + string("abc") + int(123) + bool(true) + } + } + } + + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `a trivial constant macro evaluation`() { + // Given: + // (macro pi () 3.14159) + // When: + // (:pi) + // Then: + // 3.14159 + + evaluator.initExpansion { + eexp(PI_MACRO) {} + } + + assertEquals(FloatValue(emptyList(), 3.14159), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `a nested constant macro evaluation`() { + // Given: + // (macro pi () 3.14159) + // (macro special_number () (pi)) + // When: + // (:special_number) + // Then: + // 3.14159 + + val specialNumberMacro = template() { + macro(PI_MACRO) {} + } + + evaluator.initExpansion { + eexp(specialNumberMacro) {} + } + + assertEquals(FloatValue(emptyList(), 3.14159), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `constant macro with empty list`() { + // Given: + // (macro foo () []) + // When: + // (:foo) + // Then: + // [] + + val fooMacro = template() { + list { } + } + + evaluator.initExpansion { + eexp(fooMacro) {} + } + + assertIsInstance(evaluator.expandNext()) + evaluator.stepIn() + assertEquals(null, evaluator.expandNext()) + evaluator.stepOut() + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `constant macro with single element list`() { + // Given: + // (macro foo () ["a"]) + // When: + // (:foo) + // Then: + // ["a"] + + val fooMacro = template() { + list { + string("a") + } + } + + evaluator.initExpansion { + eexp(fooMacro) {} + } + + assertIsInstance(evaluator.expandNext()) + evaluator.stepIn() + assertEquals(StringValue(value = "a"), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + evaluator.stepOut() + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `constant macro with multi element list`() { + // Given: + // (macro ABCs () ["a", "b", "c"]) + // When: + // (:ABCs) + // Then: + // [ "a", "b", "c" ] + + evaluator.initExpansion { + eexp(ABCs_LIST_MACRO) {} + } + + assertIsInstance(evaluator.expandNext()) + evaluator.stepIn() + assertEquals(StringValue(value = "a"), evaluator.expandNext()) + assertEquals(StringValue(value = "b"), evaluator.expandNext()) + assertEquals(StringValue(value = "c"), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + evaluator.stepOut() + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `it should be possible to step out of a container before the end is reached`() { + // Given: + // (macro ABCs () ["a", "b", "c"]) + // When: + // (:ABCs) + // Then: + // [ "a", "b", "c" ] + + evaluator.initExpansion { + eexp(ABCs_LIST_MACRO) {} + } + + assertIsInstance(evaluator.expandNext()) + evaluator.stepIn() + assertEquals(StringValue(value = "a"), evaluator.expandNext()) + evaluator.stepOut() + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `a trivial variable substitution`() { + // Given: + // (macro identity (x!) (%x)) + // When: + // (:identity true) + // Then: + // true + + evaluator.initExpansion { + eexp(IDENTITY_MACRO) { + bool(true) + } + } + + assertEquals(BoolValue(emptyList(), true), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `a trivial variable substitution with empty list`() { + // Given: + // (macro identity (x!) (%x)) + // When: + // (:identity []) + // Then: + // [] + + evaluator.initExpansion { + eexp(IDENTITY_MACRO) { + list { } + } + } + + assertIsInstance(evaluator.expandNext()) + evaluator.stepIn() + assertEquals(null, evaluator.expandNext()) + evaluator.stepOut() + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `a trivial variable substitution with single element list`() { + // Given: + // (macro identity (x!) (%x)) + // When: + // (:identity ["a"]) + // Then: + // ["a"] + + evaluator.initExpansion { + eexp(IDENTITY_MACRO) { + list { + string("a") + } + } + } + + assertIsInstance(evaluator.expandNext()) + evaluator.stepIn() + assertEquals(StringValue(value = "a"), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + evaluator.stepOut() + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `a variable that gets used twice`() { + // Given: + // (macro double_identity (x!) [(%x), (%x)]) + // When: + // (:double_identity "a") + // Then: + // ["a", "a"] + + val doubleIdentity = template("x!") { + list { + variable(0) + variable(0) + } + } + + evaluator.initExpansion { + eexp(doubleIdentity) { string("a") } + } + + assertIsInstance(evaluator.expandNext()) + evaluator.stepIn() + assertEquals(StringValue(value = "a"), evaluator.expandNext()) + assertEquals(StringValue(value = "a"), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + evaluator.stepOut() + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `invoke values with scalars`() { + // Given: + // When: + // (:values 1 "a") + // Then: + // 1 "a" + + evaluator.initExpansion { + eexp(Values) { + expressionGroup { + int(1) + string("a") + } + } + } + + assertEquals(LongIntValue(emptyList(), 1), evaluator.expandNext()) + assertEquals(StringValue(emptyList(), "a"), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `a trivial nested variable substitution`() { + // Given: + // (macro identity (x!) (%x)) + // (macro nested_identity (x!) (identity (%x))) + // When: + // (:nested_identity true) + // Then: + // true + + val nestedIdentity = template("x!") { + macro(IDENTITY_MACRO) { + variable(0) + } + } + + evaluator.initExpansion { + eexp(nestedIdentity) { + bool(true) + } + } + + assertEquals(BoolValue(emptyList(), true), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `a trivial void variable substitution`() { + // Given: + // (macro voidable_identity (x?) (%x)) + // When: + // (:voidable_identity (:)) + // Then: + // + + val voidableIdentityMacro = template("x?") { + variable(0) + } + + evaluator.initExpansion { + eexp(voidableIdentityMacro) {} + } + + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `simple make_string`() { + // Given: + // When: + // (:make_string "a" "b" "c") + // Then: + // "abc" + + evaluator.initExpansion { + eexp(MakeString) { + expressionGroup { + string("a") + string("b") + string("c") + } + } + } + + assertEquals(StringValue(emptyList(), "abc"), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `nested make_string`() { + // Given: + // When: + // (:make_string "a" (:make_string "b" "c" "d")) + // Then: + // "abcd" + + evaluator.initExpansion { + eexp(MakeString) { + expressionGroup { + string("a") + eexp(MakeString) { + expressionGroup { + string("b") + string("c") + string("d") + } + } + } + } + } + + assertEquals(StringValue(emptyList(), "abcd"), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `simple make_symbol`() { + // Given: + // When: + // (:make_symbol "a" "b" "c") + // Then: + // abc + + evaluator.initExpansion { + eexp(MakeSymbol) { + expressionGroup { + string("a") + string("b") + string("c") + } + } + } + + val expr = evaluator.expandNext() + assertIsInstance(expr) + assertEquals("abc", expr.value.text) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `simple make_blob`() { + // Given: + // When: + // (:make_blob {{"abc"}} {{ 4AEB6g== }}) + // Then: + // {{ YWJj4AEB6g== }} + + evaluator.initExpansion { + eexp(MakeBlob) { + expressionGroup { + clob("abc".toByteArray()) + blob(Base64.getDecoder().decode("4AEB6g==")) + } + } + } + + val expr = evaluator.expandNext() + assertIsInstance(expr) + assertArrayEquals(Base64.getDecoder().decode("YWJj4AEB6g=="), expr.value) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `simple make_decimal`() { + // Given: + // When: + // (:make_decimal 2 4) + // Then: + // 2d4 + + evaluator.initExpansion { + eexp(MakeDecimal) { + int(2) + int(4) + } + } + + val expr = evaluator.expandNext() + assertIsInstance(expr) + assertTrue(BigDecimal.valueOf(20000).compareTo(expr.value) == 0) + assertEquals(BigInteger.valueOf(2), expr.value.unscaledValue()) + assertEquals(-4, expr.value.scale()) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `make_decimal from nested expressions`() { + // Given: + // (macro fixed_point (x) (.make_decimal (%x) (.values -2))) + // When: + // (:fixed_point (:identity 123)) + // Then: + // 1.23 + + val fixedPointMacro = template("x") { + macro(MakeDecimal) { + variable(0) + macro(Values) { + expressionGroup { + int(-2) + } + } + } + } + + evaluator.initExpansion { + eexp(fixedPointMacro) { + eexp(IDENTITY_MACRO) { + int(123) + } + } + } + + val expr = evaluator.expandNext() + assertIsInstance(expr) + assertEquals(BigDecimal.valueOf(123, 2), expr.value) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `simple make_field`() { + // Given: + // When: + // (:make_field foo 1) + // Then: + // { foo: 1 } + + evaluator.initExpansion { + eexp(MakeField) { + symbol(newSymbolToken("foo")) + int(1) + } + } + + assertEquals(FieldName(value = newSymbolToken("foo")), evaluator.expandNext()) + assertEquals(LongIntValue(value = 1), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `simple annotate`() { + // Given: + // When: + // (:annotate (:: "a" "b" "c") 1) + // Then: + // a::b::c::1 + + evaluator.initExpansion { + eexp(Annotate) { + expressionGroup { + string("a") + string("b") + string("c") + } + int(1) + } + } + + val expr = evaluator.expandNext() + assertIsInstance(expr) + assertEquals(listOf("a", "b", "c"), expr.annotations.map { it.text }) + assertEquals(1, expr.value) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `annotate a value that already has some annotations`() { + // Given: + // When: + // (:annotate (:: "a" "b") c::1) + // Then: + // a::b::c::1 + + evaluator.initExpansion { + eexp(Annotate) { + expressionGroup { + string("a") + string("b") + } + annotated(listOf(newSymbolToken("c")), ::int, 1) + } + } + + val expr = evaluator.expandNext() + assertIsInstance(expr) + assertEquals(listOf("a", "b", "c"), expr.annotations.map { it.text }) + assertEquals(1, expr.value) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `annotate a container`() { + // Given: + // When: + // (:annotate (:: "a" "b" "c") [1]) + // Then: + // a::b::c::[1] + + evaluator.initExpansion { + eexp(Annotate) { + expressionGroup { + string("a") + string("b") + string("c") + } + list { + int(1) + } + } + } + + val expr = evaluator.expandNext() + assertIsInstance(expr) + assertEquals(listOf("a", "b", "c"), expr.annotations.map { it.text }) + evaluator.stepIn() + assertEquals(LongIntValue(emptyList(), 1), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + evaluator.stepOut() + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `annotate with nested make_string`() { + // Given: + // When: + // (:annotate (:make_string (:: "a" "b" "c")) 1) + // Then: + // abc::1 + + evaluator.initExpansion { + eexp(Annotate) { + eexp(MakeString) { + expressionGroup { + string("a") + string("b") + string("c") + } + } + int(1) + } + } + + val expr = evaluator.expandNext() + assertIsInstance(expr) + assertEquals(listOf("abc"), expr.annotations.map { it.text }) + assertEquals(1, expr.value) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `annotate an e-expression result`() { + // Given: + // When: + // (:annotate (:: "a" "b" "c") (:make_string "d" "e" "f")) + // Then: + // a::b::c::"def" + + evaluator.initExpansion { + eexp(Annotate) { + expressionGroup { + string("a") + string("b") + string("c") + } + + eexp(MakeString) { + expressionGroup { + string("d") + string("e") + string("f") + } + } + } + } + + val expr = evaluator.expandNext() + assertIsInstance(expr) + assertEquals(listOf("a", "b", "c"), expr.annotations.map { it.text }) + assertEquals("def", expr.value) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `annotate a TDL macro invocation result`() { + // Given: + // (macro pi () 3.14159) + // (macro annotate_pi (x) (.annotate (..(%x)) (.pi))) + // When: + // (:annotate_pi "foo") + // Then: + // foo::3.14159 + + val annotatePi = template("x") { + macro(Annotate) { + expressionGroup { + variable(0) + } + macro(PI_MACRO) {} + } + } + + evaluator.initExpansion { + eexp(annotatePi) { + string("foo") + } + } + + val expr = evaluator.expandNext() + assertIsInstance(expr) + assertEquals(listOf("foo"), expr.annotations.map { it.text }) + assertEquals(3.14159, expr.value) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `macro with a variable substitution in struct field position`() { + // Given: + // (macro foo_struct (x*) {foo: (%x)}) + // When: + // (:foo_struct bar) + // Then: + // {foo: bar} + + evaluator.initExpansion { + eexp(FOO_STRUCT_MACRO) { + string("bar") + } + } + + assertIsInstance(evaluator.expandNext()) + evaluator.stepIn() + assertEquals(FieldName(FakeSymbolToken("foo", -1)), evaluator.expandNext()) + assertEquals(StringValue(value = "bar"), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + evaluator.stepOut() + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `macro with a variable substitution in struct field position with multiple arguments`() { + // Given: + // (macro foo_struct (x*) {foo: (%x)}) + // When: + // (:foo_struct (: bar baz)) + // Then: + // {foo: bar, foo: baz} + + evaluator.initExpansion { + eexp(FOO_STRUCT_MACRO) { + expressionGroup { + string("bar") + string("baz") + } + } + } + + assertIsInstance(evaluator.expandNext()) + evaluator.stepIn() + // Yes, the field name should be here only once. The Ion reader that wraps the evaluator + // is responsible for carrying the field name over to any values that follow. + assertEquals(FieldName(FakeSymbolToken("foo", -1)), evaluator.expandNext()) + assertEquals(StringValue(value = "bar"), evaluator.expandNext()) + assertEquals(StringValue(value = "baz"), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + evaluator.stepOut() + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `macro with a variable substitution in struct field position with void argument`() { + // Given: + // (macro foo_struct (x*) {foo: (%x)}) + // When: + // (:foo_struct (:)) + // Then: + // {} + + evaluator.initExpansion { + eexp(FOO_STRUCT_MACRO) { + expressionGroup { } + } + } + + assertEquals(IonType.STRUCT, (evaluator.expandNext() as? DataModelValue)?.type) + evaluator.stepIn() + // Yes, the field name should be here. The Ion reader that wraps the evaluator + // is responsible for discarding the field name if no values follow. + assertEquals(FieldName(FakeSymbolToken("foo", -1)), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + evaluator.stepOut() + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `e-expression with another e-expression as one of the arguments`() { + // Given: + // (macro pi () 3.14159) + // (macro identity (x) (%x)) + // When: + // (:identity (:pi)) + // Then: + // 3.14159 + + evaluator.initExpansion { + eexp(IDENTITY_MACRO) { + eexp(PI_MACRO) {} + } + } + + assertEquals(FloatValue(emptyList(), 3.14159), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + } + + object IfExpanderTestParameters { + val SINGLE_VALUE = template { int(1) } + val SINGLE_VALUE_STREAM = template { + macro(Values) { + expressionGroup { + int(2) + } + } + } + val TWO_VALUE_STREAM = template { + macro(Values) { + expressionGroup { + int(3) + int(4) + } + } + } + + @JvmStatic + fun parameters() = listOf( + arguments(IfNone, None, true), + arguments(IfNone, SINGLE_VALUE, false), + arguments(IfNone, SINGLE_VALUE_STREAM, false), + arguments(IfNone, TWO_VALUE_STREAM, false), + + arguments(IfSome, None, false), + arguments(IfSome, SINGLE_VALUE, true), + arguments(IfSome, SINGLE_VALUE_STREAM, true), + arguments(IfSome, TWO_VALUE_STREAM, true), + + arguments(IfSingle, None, false), + arguments(IfSingle, SINGLE_VALUE, true), + arguments(IfSingle, SINGLE_VALUE_STREAM, true), + arguments(IfSingle, TWO_VALUE_STREAM, false), + + arguments(IfMulti, None, false), + arguments(IfMulti, SINGLE_VALUE, false), + arguments(IfMulti, SINGLE_VALUE_STREAM, false), + arguments(IfMulti, TWO_VALUE_STREAM, true), + ) + } + + @ParameterizedTest + @MethodSource("com.amazon.ion.impl.macro.MacroEvaluatorTest\$IfExpanderTestParameters#parameters") + fun `check 'if' expansion logic`(ifSpecialForm: SystemMacro, expressionToTest: Macro, expectMatches: Boolean) { + // Given: + // (macro test_if (x*) ( (%x) "a" "b")) + // When: + // (:test_if ) + // Then: + // "a" or "b" depending on whether we expect it to match. + + val theMacro = template("x*") { + macro(ifSpecialForm) { + variable(0) + string("a") + string("b") + } + } + + evaluator.initExpansion { eexp(theMacro) { eexp(expressionToTest) {} } } + + val expectedString = if (expectMatches) "a" else "b" + assertEquals(StringValue(value = expectedString), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + } + + companion object { + /** Helper function to create template macros */ + internal fun template(vararg parameters: String, body: TemplateDsl.() -> Unit): Macro { + val signature = parameters.map { + val cardinality = Macro.ParameterCardinality.fromSigil("${it.last()}") + if (cardinality == null) { + Macro.Parameter(it, Macro.ParameterEncoding.Tagged, Macro.ParameterCardinality.ExactlyOne) + } else { + Macro.Parameter(it.dropLast(1), Macro.ParameterEncoding.Tagged, cardinality) + } + } + return TemplateMacro(signature, templateBody(body)) + } + + /** Helper function to use Expression DSL for evaluator inputs */ + internal fun MacroEvaluator.initExpansion(eExpression: EExpDsl.() -> Unit) = initExpansion(eExpBody(eExpression)) + + @OptIn(ExperimentalContracts::class) + private inline fun assertIsInstance(value: Any?) { + contract { returns() implies (value is T) } + if (value !is T) { + val message = if (value == null) { + "Expected instance of ${T::class.qualifiedName}; was null" + } else if (null is T) { + "Expected instance of ${T::class.qualifiedName}?; was instance of ${value::class.qualifiedName}" + } else { + "Expected instance of ${T::class.qualifiedName}; was instance of ${value::class.qualifiedName}" + } + Assertions.fail(message) + } + } + + /** + * Helper function for testing the output of macro invocations. + */ + fun MacroEvaluator.assertExpansion(expectedOutput: String) { + val ion = IonSystemBuilder.standard().build() as _Private_IonSystem + val actual = mutableListOf() + ion.systemIterate(MacroEvaluatorAsIonReader(this)).forEachRemaining(actual::add) + val expected = mutableListOf() + ion.systemIterate(expectedOutput).forEachRemaining(expected::add) + assertEquals(expected, actual) + } + } + + @Test + fun `invoke repeat with n=1 and value is single expression`() { + // Given: + // When: + // (:repeat 1 0) + // Then: + // 0 + + evaluator.initExpansion { + eexp(Repeat) { + int(1) + int(0) + } + } + + assertEquals(LongIntValue(value = 0), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `invoke repeat with n=1 and value is multiple expressions`() { + // Given: + // When: + // (:repeat 1 (:: "a" "b")) + // Then: + // "a" "b" + + evaluator.initExpansion { + eexp(Repeat) { + int(1) + expressionGroup { + string("a") + string("b") + } + } + } + + assertEquals(StringValue(value = "a"), evaluator.expandNext()) + assertEquals(StringValue(value = "b"), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `invoke repeat with n=2 and value is single expression`() { + // Given: + // When: + // (:repeat 2 0) + // Then: + // 0 0 + + evaluator.initExpansion { + eexp(Repeat) { + int(2) + int(0) + } + } + + assertEquals(LongIntValue(value = 0), evaluator.expandNext()) + assertEquals(LongIntValue(value = 0), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `invoke repeat with n=2 and value is multiple expressions`() { + // Given: + // When: + // (:repeat 2 (:: "a" "b")) + // Then: + // "a" "b" "a" "b" + + evaluator.initExpansion { + eexp(Repeat) { + int(2) + expressionGroup { + string("a") + string("b") + } + } + } + + assertEquals(StringValue(value = "a"), evaluator.expandNext()) + assertEquals(StringValue(value = "b"), evaluator.expandNext()) + assertEquals(StringValue(value = "a"), evaluator.expandNext()) + assertEquals(StringValue(value = "b"), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `invoke repeat macro with a variable for n`() { + // Given: + // (macro (x) (.make_string (.repeat (%x) "na ") "batman!")) + // When: + // (:batman 16) + // Then: + // "na na na na na na na na na na na na na na na na batman!" + + val theMacro = template("x") { + macro(MakeString) { + expressionGroup { + macro(Repeat) { + int(16) + string("na ") + } + string("batman!") + } + } + } + + evaluator.initExpansion { + eexp(theMacro) { int(16) } + } + + assertEquals(StringValue(value = "na na na na na na na na na na na na na na na na batman!"), evaluator.expandNext()) + assertEquals(null, evaluator.expandNext()) + } + + @Test + fun `invoke repeat with invalid 'n' argument`() { + // Given: + // + // When: + // (:repeat -1 "a") + // Then: + // + + evaluator.initExpansion { + eexp(Repeat) { + int(-1) + string("a") + } + } + + assertThrows { evaluator.expandNext() } + } + + @Test + fun `invoke repeat with empty 'value' argument`() { + // Given: + // + // When: + // (:repeat 3 (:values)) + // Then: + // + + evaluator.initExpansion { + eexp(Repeat) { + int(3) + eexp(Values) { expressionGroup { } } + } + } + + assertNull(evaluator.expandNext()) + } + + @Test + fun `the meta macro expands to nothing`() { + // Given: + // When: + // (:meta 1 2 3) + // Then: + // + + evaluator.initExpansion { + eexp(Meta) { + expressionGroup { + int(1) + int(2) + int(3) + } + } + } + + assertNull(evaluator.expandNext()) + } + + @Test + fun `set_symbols expands to an encoding directive that replaces the symbol table and preserves macros`() { + evaluator.initExpansion { + // (:set_symbols a b c) + eexp(SetSymbols) { + expressionGroup { + symbol("a") + symbol("b") + symbol("c") + } + } + } + evaluator.assertExpansion( + """ + $ion::(module _ + (symbol_table [a, b, c]) + (macro_table _) + ) + """ + ) + } + + @Test + fun `add_symbols expands to an encoding directive that appends to the symbol table and preserves macros`() { + evaluator.initExpansion { + // (:add_symbols a b c) + eexp(AddSymbols) { + expressionGroup { + symbol("a") + symbol("b") + symbol("c") + } + } + } + evaluator.assertExpansion( + """ + $ion::(module _ + (symbol_table _ [a, b, c]) + (macro_table _) + ) + """ + ) + } + + @Test + fun `set_macros expands to an encoding directive that preserves symbols and replaces the macro table`() { + evaluator.initExpansion { + // (:set_macros (macro answer () 42)) + eexp(SetMacros) { + expressionGroup { + sexp { + symbol(MACRO) + symbol("answer") + sexp { } + int(42) + } + } + } + } + evaluator.assertExpansion( + """ + $ion::(module _ + (symbol_table _) + (macro_table + (macro answer () 42)) + ) + """ + ) + } + + @Test + fun `add_macros expands to an encoding directive that preserves symbols and appends to the macro table`() { + evaluator.initExpansion { + // (:add_macros (macro answer () 42)) + eexp(AddMacros) { + expressionGroup { + sexp { + symbol(MACRO) + symbol("answer") + sexp { } + int(42) + } + } + } + } + evaluator.assertExpansion( + """ + $ion::(module _ + (symbol_table _) + (macro_table + _ + (macro answer () 42)) + ) + """ + ) + } + + @Test + fun `use expands to an encoding directive that imports a module and appends it to the symbol and macro tables`() { + evaluator.initExpansion { + // (:use "com.amazon.Foo" 2) + eexp(Use) { + string("com.amazon.Foo") + int(2) + } + } + evaluator.assertExpansion( + """ + $ion::(module _ + (import the_module "com.amazon.Foo" 2) + (symbol_table _ the_module) + (macro_table _ the_module) + ) + """ + ) + } + + @Test + fun `use defaults to version 1 if no version is provided`() { + evaluator.initExpansion { + // (:use "com.amazon.Foo") + eexp(Use) { + string("com.amazon.Foo") + expressionGroup { } + } + } + evaluator.assertExpansion( + """ + $ion::(module _ + (import the_module "com.amazon.Foo" 1) + (symbol_table _ the_module) + (macro_table _ the_module) + ) + """ + ) + } +} diff --git a/src/test/java/com/amazon/ion/system/IonBinaryWriterBuilderTest.java b/src/test/java/com/amazon/ion/system/IonBinaryWriterBuilderTest.java index f27c6eb381..3bb69a958c 100644 --- a/src/test/java/com/amazon/ion/system/IonBinaryWriterBuilderTest.java +++ b/src/test/java/com/amazon/ion/system/IonBinaryWriterBuilderTest.java @@ -1,29 +1,13 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.system; import static com.amazon.ion.TestUtils.symbolTableEquals; -import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotSame; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; -import com.amazon.ion.IonCatalog; import com.amazon.ion.IonReader; import com.amazon.ion.IonSystem; import com.amazon.ion.IonType; @@ -39,86 +23,17 @@ import java.io.OutputStream; import java.nio.charset.StandardCharsets; -import org.junit.Assert; import org.junit.Test; -public class IonBinaryWriterBuilderTest +public class IonBinaryWriterBuilderTest extends IonWriterBuilderTestBase { - public void testBuildNull(IonBinaryWriterBuilder b) - { - try { - b.build((OutputStream)null); - fail("Expected exception"); - } - catch (NullPointerException e) { } - } - - @Test - public void testStandard() - { - IonBinaryWriterBuilder b = IonBinaryWriterBuilder.standard(); - Assert.assertNotNull(b); - testBuildNull(b); - - OutputStream out = new ByteArrayOutputStream(); - IonWriter writer = b.build(out); - Assert.assertNotNull(writer); - - assertNotSame(b, IonBinaryWriterBuilder.standard()); - } - - - //------------------------------------------------------------------------- - - @Test - public void testCustomCatalog() - { - IonCatalog catalog = new SimpleCatalog(); - - IonBinaryWriterBuilder b = IonBinaryWriterBuilder.standard(); - b.setCatalog(catalog); - assertSame(catalog, b.getCatalog()); - - OutputStream out = new ByteArrayOutputStream(); - IonWriter writer = b.build(out); - assertSame(catalog, ((_Private_IonWriter)writer).getCatalog()); - - IonCatalog catalog2 = new SimpleCatalog(); - b.setCatalog(catalog2); - assertSame(catalog2, b.getCatalog()); - - // Test with...() on mutable builder - - IonBinaryWriterBuilder b2 = b.withCatalog(catalog); - assertSame(b, b2); - assertSame(catalog, b2.getCatalog()); - - // Test with...() on immutable builder - - b2 = b.immutable(); - assertSame(catalog, b2.getCatalog()); - IonBinaryWriterBuilder b3 = b2.withCatalog(catalog2); - assertNotSame(b2, b3); - assertSame(catalog, b2.getCatalog()); - assertSame(catalog2, b3.getCatalog()); - } - - @Test(expected = UnsupportedOperationException.class) - public void testCatalogImmutability() - { - IonCatalog catalog = new SimpleCatalog(); - - IonBinaryWriterBuilder b = IonBinaryWriterBuilder.standard(); - b.setCatalog(catalog); - - IonBinaryWriterBuilder b2 = b.immutable(); - assertSame(catalog, b2.getCatalog()); - b2.setCatalog(null); + @Override + IonBinaryWriterBuilder standard() { + return IonBinaryWriterBuilder.standard(); } - //------------------------------------------------------------------------- @Test @@ -348,82 +263,4 @@ public void testInitialSymtabImmutability() _Private_IonBinaryWriterBuilder b2 = b.immutable(); b2.setInitialSymbolTable(null); } - - - //------------------------------------------------------------------------- - - @Test - public void testImports() - { - SymbolTable f = Symtabs.CATALOG.getTable("fred", 1); - SymbolTable g = Symtabs.CATALOG.getTable("ginger", 1); - - SymbolTable[] symtabsF = new SymbolTable[] { f }; - SymbolTable[] symtabsG = new SymbolTable[] { g }; - - IonBinaryWriterBuilder b = IonBinaryWriterBuilder.standard(); - b.setImports(f); - - OutputStream out = new ByteArrayOutputStream(); - IonWriter writer = b.build(out); - SymbolTable st = writer.getSymbolTable(); - assertArrayEquals(symtabsF, st.getImportedTables()); - - // Test with...() on mutable builder - - IonBinaryWriterBuilder b2 = b.withImports(g); - assertSame(b, b2); - assertArrayEquals(symtabsG, b2.getImports()); - - // Test with...() on immutable builder - - b2 = b.immutable(); - assertArrayEquals(symtabsG, b2.getImports()); - IonBinaryWriterBuilder b3 = b2.withImports(f); - assertNotSame(b2, b3); - assertArrayEquals(symtabsG, b2.getImports()); - assertArrayEquals(symtabsF, b3.getImports()); - - // Test cloning of array - - SymbolTable[] symtabs = new SymbolTable[] { f }; - b3.setImports(symtabs); - assertNotSame(symtabs, b3.getImports()); - assertArrayEquals(symtabsF, b3.getImports()); - - symtabs[0] = g; - assertArrayEquals(symtabsF, b3.getImports()); - - b3.getImports()[0] = g; - assertArrayEquals(symtabsF, b3.getImports()); - } - - @Test(expected = UnsupportedOperationException.class) - public void testImportsImmutability() - { - SymbolTable f = Symtabs.CATALOG.getTable("fred", 1); - SymbolTable[] symtabs = new SymbolTable[] { f }; - - IonBinaryWriterBuilder b = IonBinaryWriterBuilder.standard(); - b.setImports(f); - - IonBinaryWriterBuilder b2 = b.immutable(); - assertArrayEquals(symtabs, b2.getImports()); - b2.setImports(); - } - - @Test - public void testImportsNull() - { - SymbolTable f = Symtabs.CATALOG.getTable("fred", 1); - SymbolTable[] symtabs = new SymbolTable[] { f }; - - IonBinaryWriterBuilder b = IonBinaryWriterBuilder.standard(); - b.setImports(symtabs); - b.setImports((SymbolTable[])null); - assertSame(null, b.getImports()); - - b.setImports(new SymbolTable[0]); - assertArrayEquals(new SymbolTable[0], b.getImports()); - } } diff --git a/src/test/java/com/amazon/ion/system/IonBinaryWriterBuilder_1_1_Test.java b/src/test/java/com/amazon/ion/system/IonBinaryWriterBuilder_1_1_Test.java new file mode 100644 index 0000000000..b7551e9e21 --- /dev/null +++ b/src/test/java/com/amazon/ion/system/IonBinaryWriterBuilder_1_1_Test.java @@ -0,0 +1,59 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.system; + +import org.junit.Ignore; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertThrows; + +public class IonBinaryWriterBuilder_1_1_Test extends IonWriterBuilderTestBase<_Private_IonBinaryWriterBuilder_1_1> { + + @Override + _Private_IonBinaryWriterBuilder_1_1 standard() { + return _Private_IonBinaryWriterBuilder_1_1.standard(); + } + + @Test + public void testBlockSize() { + IonBinaryWriterBuilder_1_1 b = standard(); + assertEquals(_Private_IonBinaryWriterBuilder_1_1.DEFAULT_BLOCK_SIZE, b.getBlockSize()); + + b.setBlockSize(42); + assertEquals(42, b.getBlockSize()); + + assertSame(b, b.withBlockSize(4096)); + assertEquals(4096, b.getBlockSize()); + + assertThrows(IllegalArgumentException.class, () -> b.setBlockSize(-1)); + assertThrows(IllegalArgumentException.class, () -> b.withBlockSize(Integer.MAX_VALUE)); + assertEquals(4096, b.getBlockSize()); + + IonBinaryWriterBuilder_1_1 immutable = b.immutable(); + + assertThrows(UnsupportedOperationException.class, () -> immutable.setBlockSize(512)); + + IonBinaryWriterBuilder_1_1 mutable = immutable.withBlockSize(16); + assertNotSame(immutable, mutable); + assertEquals(16, mutable.getBlockSize()); + assertEquals(4096, immutable.getBlockSize()); + } + + @Override + @Test + @Ignore + public void testImports() { + // Note: skipped because IonManagedWriter_1_1 does not implement _Private_IonWriter.getSymbolTable. + } + + @Override + @Test + @Ignore + public void testCustomCatalog() { + // Note: skipped because IonManagedWriter_1_1 does not implement _Private_IonWriter.getCatalog. + } + +} diff --git a/src/test/java/com/amazon/ion/system/IonTextWriterBuilderTest.java b/src/test/java/com/amazon/ion/system/IonTextWriterBuilderTestBase.java similarity index 68% rename from src/test/java/com/amazon/ion/system/IonTextWriterBuilderTest.java rename to src/test/java/com/amazon/ion/system/IonTextWriterBuilderTestBase.java index 5c819debdf..e8d4b75792 100644 --- a/src/test/java/com/amazon/ion/system/IonTextWriterBuilderTest.java +++ b/src/test/java/com/amazon/ion/system/IonTextWriterBuilderTestBase.java @@ -1,26 +1,11 @@ -/* - * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 package com.amazon.ion.system; -import static com.amazon.ion.SystemSymbols.ION_1_0; import static com.amazon.ion.system.IonTextWriterBuilder.ASCII; import static com.amazon.ion.system.IonTextWriterBuilder.UTF8; import static com.amazon.ion.system.IonTextWriterBuilder.LstMinimizing.EVERYTHING; import static com.amazon.ion.system.IonTextWriterBuilder.LstMinimizing.LOCALS; -import static com.amazon.ion.system.IonWriterBuilder.InitialIvmHandling.SUPPRESS; import static com.amazon.ion.system.IonWriterBuilder.IvmMinimizing.ADJACENT; import static com.amazon.ion.system.IonWriterBuilder.IvmMinimizing.DISTANT; import static org.junit.Assert.assertArrayEquals; @@ -40,9 +25,15 @@ import org.junit.Assert; import org.junit.Test; - -public class IonTextWriterBuilderTest +/** + * Base tests for classes that inherit from {@link IonTextWriterBuilder}. + */ +public abstract class IonTextWriterBuilderTestBase { + + abstract IonTextWriterBuilder standard(); + abstract String ivm(); + public void testBuildNull(IonTextWriterBuilder b) { try { @@ -61,7 +52,7 @@ public void testBuildNull(IonTextWriterBuilder b) @Test public void testStandard() { - IonTextWriterBuilder b = IonTextWriterBuilder.standard(); + IonTextWriterBuilder b = standard(); Assert.assertNotNull(b); testBuildNull(b); @@ -70,7 +61,7 @@ public void testStandard() IonWriter writer = b.build(out); Assert.assertNotNull(writer); - assertNotSame(b, IonTextWriterBuilder.standard()); + assertNotSame(b, standard()); } @@ -81,7 +72,7 @@ public void testCustomCatalog() { IonCatalog catalog = new SimpleCatalog(); - IonTextWriterBuilder b = IonTextWriterBuilder.standard(); + IonTextWriterBuilder b = standard(); b.setCatalog(catalog); assertSame(catalog, b.getCatalog()); @@ -114,7 +105,7 @@ public void testCatalogImmutability() { IonCatalog catalog = new SimpleCatalog(); - IonTextWriterBuilder b = IonTextWriterBuilder.standard(); + IonTextWriterBuilder b = standard(); b.setCatalog(catalog); IonTextWriterBuilder b2 = b.immutable(); @@ -124,71 +115,10 @@ public void testCatalogImmutability() //------------------------------------------------------------------------- - @Test - public void testInitialIvmHandling() - { - IonTextWriterBuilder b = IonTextWriterBuilder.standard(); - b.setInitialIvmHandling(SUPPRESS); - assertSame(SUPPRESS, b.getInitialIvmHandling()); - - // Test with...() on mutable builder - - IonTextWriterBuilder b2 = b.withInitialIvmHandling(null); - assertSame(b, b2); - assertSame(null, b.getInitialIvmHandling()); - - // Test with...() on immutable builder - - b2 = b.immutable(); - assertSame(null, b2.getInitialIvmHandling()); - IonTextWriterBuilder b3 = b2.withInitialIvmHandling(SUPPRESS); - assertNotSame(b2, b3); - assertSame(null, b2.getInitialIvmHandling()); - assertSame(SUPPRESS, b3.getInitialIvmHandling()); - } - - @Test(expected = UnsupportedOperationException.class) - public void testInitialIvmHandlingImmutability() - { - IonTextWriterBuilder b = IonTextWriterBuilder.standard(); - b.setInitialIvmHandling(SUPPRESS); - - IonTextWriterBuilder b2 = b.immutable(); - assertSame(SUPPRESS, b2.getInitialIvmHandling()); - b2.setInitialIvmHandling(null); - } - - @Test - public void testInitialIvmSuppression() - throws IOException - { - IonTextWriterBuilder b = IonTextWriterBuilder.standard(); - - StringBuilder out = new StringBuilder(); - IonWriter writer = b.build(out); - writer.writeSymbol(ION_1_0); - writer.writeNull(); - writer.writeSymbol(ION_1_0); - writer.close(); - assertEquals(ION_1_0 + " null " + ION_1_0, out.toString()); - - b.withInitialIvmHandling(SUPPRESS); - out.setLength(0); - writer = b.build(out); - writer.writeSymbol(ION_1_0); - writer.writeSymbol(ION_1_0); - writer.writeNull(); - writer.writeSymbol(ION_1_0); - writer.close(); - assertEquals("null " + ION_1_0, out.toString()); - } - - //------------------------------------------------------------------------- - @Test public void testIvmMinimizing() { - IonTextWriterBuilder b = IonTextWriterBuilder.standard(); + IonTextWriterBuilder b = standard(); assertEquals(null, b.getIvmMinimizing()); b.setIvmMinimizing(ADJACENT); assertSame(ADJACENT, b.getIvmMinimizing()); @@ -212,7 +142,7 @@ public void testIvmMinimizing() @Test(expected = UnsupportedOperationException.class) public void testIvmMinimizingImmutability() { - IonTextWriterBuilder b = IonTextWriterBuilder.standard(); + IonTextWriterBuilder b = standard(); b.setIvmMinimizing(ADJACENT); IonTextWriterBuilder b2 = b.immutable(); @@ -224,36 +154,36 @@ public void testIvmMinimizingImmutability() public void testIvmMinimization() throws IOException { - IonTextWriterBuilder b = IonTextWriterBuilder.standard(); + IonTextWriterBuilder b = standard(); StringBuilder out = new StringBuilder(); IonWriter writer = b.build(out); - writer.writeSymbol(ION_1_0); - writer.writeSymbol(ION_1_0); + writer.writeSymbol(ivm()); + writer.writeSymbol(ivm()); writer.close(); - assertEquals(ION_1_0 + " " + ION_1_0, out.toString()); + assertEquals(ivm() + " " + ivm(), out.toString()); b.withIvmMinimizing(ADJACENT); out.setLength(0); writer = b.build(out); - writer.writeSymbol(ION_1_0); - writer.writeSymbol(ION_1_0); + writer.writeSymbol(ivm()); + writer.writeSymbol(ivm()); writer.writeNull(); - writer.writeSymbol(ION_1_0); - writer.writeSymbol(ION_1_0); + writer.writeSymbol(ivm()); + writer.writeSymbol(ivm()); writer.close(); - assertEquals(ION_1_0 + " null " + ION_1_0, out.toString()); + assertEquals(ivm() + " null " + ivm(), out.toString()); b.withIvmMinimizing(DISTANT); out.setLength(0); writer = b.build(out); - writer.writeSymbol(ION_1_0); - writer.writeSymbol(ION_1_0); + writer.writeSymbol(ivm()); + writer.writeSymbol(ivm()); writer.writeNull(); - writer.writeSymbol(ION_1_0); - writer.writeSymbol(ION_1_0); + writer.writeSymbol(ivm()); + writer.writeSymbol(ivm()); writer.close(); - assertEquals(ION_1_0 + " null", out.toString()); + assertEquals(ivm() + " null", out.toString()); } //------------------------------------------------------------------------- @@ -261,7 +191,7 @@ public void testIvmMinimization() @Test public void testLstMinimizing() { - IonTextWriterBuilder b = IonTextWriterBuilder.standard(); + IonTextWriterBuilder b = standard(); b.setLstMinimizing(EVERYTHING); assertSame(EVERYTHING, b.getLstMinimizing()); @@ -284,7 +214,7 @@ public void testLstMinimizing() @Test(expected = UnsupportedOperationException.class) public void testLstMinimizingImmutability() { - IonTextWriterBuilder b = IonTextWriterBuilder.standard(); + IonTextWriterBuilder b = standard(); b.setLstMinimizing(EVERYTHING); IonTextWriterBuilder b2 = b.immutable(); @@ -297,7 +227,7 @@ public void testLstMinimizingImmutability() @Test public void testCharset() { - IonTextWriterBuilder b = IonTextWriterBuilder.standard(); + IonTextWriterBuilder b = standard(); b.setCharset(ASCII); assertSame(ASCII, b.getCharset()); b.setCharset(null); @@ -326,7 +256,7 @@ public void testCharset() @Test(expected = UnsupportedOperationException.class) public void testCharsetImmutability() { - IonTextWriterBuilder b = IonTextWriterBuilder.standard(); + IonTextWriterBuilder b = standard(); b.setCharset(ASCII); assertSame(ASCII, b.getCharset()); @@ -340,7 +270,7 @@ public void testCharsetImmutability() public void testCharsetValidation() { Charset iso = Charset.forName("ISO-8859-1"); - IonTextWriterBuilder b = IonTextWriterBuilder.standard(); + IonTextWriterBuilder b = standard(); b.setCharset(iso); } @@ -355,7 +285,7 @@ public void testImports() SymbolTable[] symtabsF = new SymbolTable[] { f }; SymbolTable[] symtabsG = new SymbolTable[] { g }; - IonTextWriterBuilder b = IonTextWriterBuilder.standard(); + IonTextWriterBuilder b = standard(); b.setImports(f); StringBuilder out = new StringBuilder(); @@ -398,7 +328,7 @@ public void testImportsImmutability() SymbolTable f = Symtabs.CATALOG.getTable("fred", 1); SymbolTable[] symtabs = new SymbolTable[] { f }; - IonTextWriterBuilder b = IonTextWriterBuilder.standard(); + IonTextWriterBuilder b = standard(); b.setImports(f); IonTextWriterBuilder b2 = b.immutable(); @@ -412,7 +342,7 @@ public void testImportsNull() SymbolTable f = Symtabs.CATALOG.getTable("fred", 1); SymbolTable[] symtabs = new SymbolTable[] { f }; - IonTextWriterBuilder b = IonTextWriterBuilder.standard(); + IonTextWriterBuilder b = standard(); b.setImports(symtabs); b.setImports((SymbolTable[])null); assertSame(null, b.getImports()); @@ -427,7 +357,7 @@ public void testImportsNull() @Test(expected = UnsupportedOperationException.class) public void testLongStringThresholdImmutability() { - IonTextWriterBuilder b = IonTextWriterBuilder.standard(); + IonTextWriterBuilder b = standard(); b.setLongStringThreshold(99); IonTextWriterBuilder b2 = b.immutable(); diff --git a/src/test/java/com/amazon/ion/system/IonTextWriterBuilder_1_0_Test.java b/src/test/java/com/amazon/ion/system/IonTextWriterBuilder_1_0_Test.java new file mode 100644 index 0000000000..b104ac57b6 --- /dev/null +++ b/src/test/java/com/amazon/ion/system/IonTextWriterBuilder_1_0_Test.java @@ -0,0 +1,86 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.system; + +import com.amazon.ion.IonWriter; +import org.junit.Test; + +import java.io.IOException; + +import static com.amazon.ion.SystemSymbols.ION_1_0; +import static com.amazon.ion.system.IonWriterBuilder.InitialIvmHandling.SUPPRESS; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertSame; + +public class IonTextWriterBuilder_1_0_Test extends IonTextWriterBuilderTestBase { + + @Override + IonTextWriterBuilder standard() { + return IonTextWriterBuilder.standard(); + } + + @Override + String ivm() { + return ION_1_0; + } + + @Test + public void testInitialIvmHandling() + { + IonTextWriterBuilder b = standard(); + b.setInitialIvmHandling(SUPPRESS); + assertSame(SUPPRESS, b.getInitialIvmHandling()); + + // Test with...() on mutable builder + + IonTextWriterBuilder b2 = b.withInitialIvmHandling(null); + assertSame(b, b2); + assertSame(null, b.getInitialIvmHandling()); + + // Test with...() on immutable builder + + b2 = b.immutable(); + assertSame(null, b2.getInitialIvmHandling()); + IonTextWriterBuilder b3 = b2.withInitialIvmHandling(SUPPRESS); + assertNotSame(b2, b3); + assertSame(null, b2.getInitialIvmHandling()); + assertSame(SUPPRESS, b3.getInitialIvmHandling()); + } + + @Test(expected = UnsupportedOperationException.class) + public void testInitialIvmHandlingImmutability() + { + IonTextWriterBuilder b = standard(); + b.setInitialIvmHandling(SUPPRESS); + + IonTextWriterBuilder b2 = b.immutable(); + assertSame(SUPPRESS, b2.getInitialIvmHandling()); + b2.setInitialIvmHandling(null); + } + + @Test + public void testInitialIvmSuppression() + throws IOException + { + IonTextWriterBuilder b = standard(); + + StringBuilder out = new StringBuilder(); + IonWriter writer = b.build(out); + writer.writeSymbol(ivm()); + writer.writeNull(); + writer.writeSymbol(ivm()); + writer.close(); + assertEquals(ivm() + " null " + ivm(), out.toString()); + + b.withInitialIvmHandling(SUPPRESS); + out.setLength(0); + writer = b.build(out); + writer.writeSymbol(ivm()); + writer.writeSymbol(ivm()); + writer.writeNull(); + writer.writeSymbol(ivm()); + writer.close(); + assertEquals("null " + ivm(), out.toString()); + } +} diff --git a/src/test/java/com/amazon/ion/system/IonTextWriterBuilder_1_1_Test.java b/src/test/java/com/amazon/ion/system/IonTextWriterBuilder_1_1_Test.java new file mode 100644 index 0000000000..0c06b95487 --- /dev/null +++ b/src/test/java/com/amazon/ion/system/IonTextWriterBuilder_1_1_Test.java @@ -0,0 +1,40 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.system; + +import com.amazon.ion.impl._Private_IonTextWriterBuilder_1_1; +import org.junit.Ignore; +import org.junit.Test; + +public class IonTextWriterBuilder_1_1_Test extends IonTextWriterBuilderTestBase { + @Override + IonTextWriterBuilder standard() { + return _Private_IonTextWriterBuilder_1_1.standard(); + } + + @Override + String ivm() { + return "$ion_1_1"; + } + + @Override + @Test + @Ignore + public void testImports() { + // TODO: skipped because IonManagedWriter_1_1 does not implement _Private_IonWriter.getSymbolTable. + } + + @Override + @Test + @Ignore + public void testCustomCatalog() { + // TODO: skipped because IonManagedWriter_1_1 does not implement _Private_IonWriter.getCatalog. + } + + @Override + @Test + @Ignore + public void testIvmMinimization() { + // TODO: skipped because the Ion 1.1 text writer does not yet support Ivm minimization. + } +} diff --git a/src/test/java/com/amazon/ion/system/IonWriterBuilderTestBase.java b/src/test/java/com/amazon/ion/system/IonWriterBuilderTestBase.java new file mode 100644 index 0000000000..d173fda4f2 --- /dev/null +++ b/src/test/java/com/amazon/ion/system/IonWriterBuilderTestBase.java @@ -0,0 +1,176 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +package com.amazon.ion.system; + +import com.amazon.ion.IonCatalog; +import com.amazon.ion.IonWriter; +import com.amazon.ion.SymbolTable; +import com.amazon.ion.impl.Symtabs; +import com.amazon.ion.impl._Private_IonWriter; +import org.junit.Assert; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.OutputStream; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.fail; + +/** + * Base tests for classes that inherit from {@link IonWriterBuilderBase}. + * @param the concrete type of the class under test. + */ +abstract class IonWriterBuilderTestBase> { + + /** + * @return a new, standard builder of the relevant type. + */ + abstract Builder standard(); + + public void testBuildNull(Builder b) + { + try { + b.build((OutputStream)null); + fail("Expected exception"); + } + catch (RuntimeException e) { } + } + + @Test + public void testStandard() + { + Builder b = standard(); + Assert.assertNotNull(b); + + testBuildNull(b); + + OutputStream out = new ByteArrayOutputStream(); + IonWriter writer = b.build(out); + Assert.assertNotNull(writer); + + assertNotSame(b, standard()); + } + + @Test + public void testCustomCatalog() + { + IonCatalog catalog = new SimpleCatalog(); + + Builder b = standard(); + b.setCatalog(catalog); + assertSame(catalog, b.getCatalog()); + + OutputStream out = new ByteArrayOutputStream(); + IonWriter writer = b.build(out); + assertSame(catalog, ((_Private_IonWriter)writer).getCatalog()); + + IonCatalog catalog2 = new SimpleCatalog(); + b.setCatalog(catalog2); + assertSame(catalog2, b.getCatalog()); + + // Test with...() on mutable builder + + Builder b2 = b.withCatalog(catalog); + assertSame(b, b2); + assertSame(catalog, b2.getCatalog()); + + // Test with...() on immutable builder + + b2 = b.immutable(); + assertSame(catalog, b2.getCatalog()); + Builder b3 = b2.withCatalog(catalog2); + assertNotSame(b2, b3); + assertSame(catalog, b2.getCatalog()); + assertSame(catalog2, b3.getCatalog()); + } + + @Test(expected = UnsupportedOperationException.class) + public void testCatalogImmutability() + { + IonCatalog catalog = new SimpleCatalog(); + + Builder b = standard(); + b.setCatalog(catalog); + + Builder b2 = b.immutable(); + assertSame(catalog, b2.getCatalog()); + b2.setCatalog(null); + } + + @Test + public void testImports() + { + SymbolTable f = Symtabs.CATALOG.getTable("fred", 1); + SymbolTable g = Symtabs.CATALOG.getTable("ginger", 1); + + SymbolTable[] symtabsF = new SymbolTable[] { f }; + SymbolTable[] symtabsG = new SymbolTable[] { g }; + + Builder b = standard(); + b.setImports(f); + + OutputStream out = new ByteArrayOutputStream(); + IonWriter writer = b.build(out); + SymbolTable st = writer.getSymbolTable(); + assertArrayEquals(symtabsF, st.getImportedTables()); + + // Test with...() on mutable builder + + Builder b2 = b.withImports(g); + assertSame(b, b2); + assertArrayEquals(symtabsG, b2.getImports()); + + // Test with...() on immutable builder + + b2 = b.immutable(); + assertArrayEquals(symtabsG, b2.getImports()); + Builder b3 = b2.withImports(f); + assertNotSame(b2, b3); + assertArrayEquals(symtabsG, b2.getImports()); + assertArrayEquals(symtabsF, b3.getImports()); + + // Test cloning of array + + SymbolTable[] symtabs = new SymbolTable[] { f }; + b3.setImports(symtabs); + assertNotSame(symtabs, b3.getImports()); + assertArrayEquals(symtabsF, b3.getImports()); + + symtabs[0] = g; + assertArrayEquals(symtabsF, b3.getImports()); + + b3.getImports()[0] = g; + assertArrayEquals(symtabsF, b3.getImports()); + } + + @Test(expected = UnsupportedOperationException.class) + public void testImportsImmutability() + { + SymbolTable f = Symtabs.CATALOG.getTable("fred", 1); + SymbolTable[] symtabs = new SymbolTable[] { f }; + + Builder b = standard(); + b.setImports(f); + + Builder b2 = b.immutable(); + assertArrayEquals(symtabs, b2.getImports()); + b2.setImports(); + } + + @Test + public void testImportsNull() + { + SymbolTable f = Symtabs.CATALOG.getTable("fred", 1); + SymbolTable[] symtabs = new SymbolTable[] { f }; + + Builder b = standard(); + b.setImports(symtabs); + b.setImports((SymbolTable[])null); + assertSame(null, b.getImports()); + + b.setImports(new SymbolTable[0]); + assertArrayEquals(new SymbolTable[0], b.getImports()); + } +} diff --git a/src/test/java/com/amazon/ion/util/formatting.kt b/src/test/java/com/amazon/ion/util/formatting.kt new file mode 100644 index 0000000000..7a5df69ef1 --- /dev/null +++ b/src/test/java/com/amazon/ion/util/formatting.kt @@ -0,0 +1,12 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +@file:JvmName("Formatting") +package com.amazon.ion.util + +@OptIn(ExperimentalStdlibApi::class) +fun ByteArray.toPrettyHexString(bytesPerWord: Int = 4, wordsPerLine: Int = 8): String { + return map { it.toHexString(HexFormat.UpperCase) } + .windowed(bytesPerWord, bytesPerWord, partialWindows = true) + .windowed(wordsPerLine, wordsPerLine, partialWindows = true) + .joinToString("\n") { it.joinToString(" ") { it.joinToString(" ") } } +}