diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 913320e6b5..f6aff8a146 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,2 +1,2 @@ # This should match the owning team set up in https://github.com/orgs/opensearch-project/teams -* @pjfitzgibbons @ps48 @kavithacm @derek-ho @joshuali925 @dai-chen @YANG-DB @rupal-bq @mengweieric @vamsi-amazon @swiddis @penghuo @seankao-az @MaxKsyunz @Yury-Fridlyand @anirudha @forestmvey @acarbonetto @GumpacG @ykmr1224 +* @pjfitzgibbons @ps48 @kavithacm @derek-ho @joshuali925 @dai-chen @YANG-DB @rupal-bq @mengweieric @vamsi-amazon @swiddis @penghuo @seankao-az @MaxKsyunz @Yury-Fridlyand @anirudha @forestmvey @acarbonetto @GumpacG @ykmr1224 @LantaoJin @noCharger \ No newline at end of file diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 2e325678fe..c84ed5b13a 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,16 +1,18 @@ ### Description [Describe what this change achieves] - -### Issues Resolved -[List any issues this PR will resolve] - + +### Related Issues +Resolves #[Issue number to be closed when this PR is merged] + + ### Check List - [ ] New functionality includes testing. - - [ ] All tests pass, including unit test, integration test and doctest - [ ] New functionality has been documented. - - [ ] New functionality has javadoc added - - [ ] New functionality has user manual doc added -- [ ] Commits are signed per the DCO using --signoff + - [ ] New functionality has javadoc added. + - [ ] New functionality has a user manual doc added. +- [ ] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md). +- [ ] Commits are signed per the DCO using `--signoff`. +- [ ] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose). By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. -For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/OpenSearch/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). \ No newline at end of file +For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/sql/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 13c4ef0f60..38fca306bf 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -27,7 +27,11 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v3 - + - name: Set up JDK 21 + uses: actions/setup-java@v3 + with: + distribution: temurin + java-version: 21 - name: Initialize CodeQL uses: github/codeql-action/init@v2 with: diff --git a/.github/workflows/integ-tests-with-security.yml b/.github/workflows/integ-tests-with-security.yml index 8ca5a5c1d5..e7c52f4231 100644 --- a/.github/workflows/integ-tests-with-security.yml +++ b/.github/workflows/integ-tests-with-security.yml @@ -20,21 +20,22 @@ jobs: strategy: fail-fast: false matrix: - java: [ 11, 17, 21 ] - + java: [21] runs-on: ubuntu-latest container: # using the same image which is used by opensearch-build team to build the OpenSearch Distribution # this image tag is subject to change as more dependencies and updates will arrive over time image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }} - # need to switch to root so that github actions can install runner binary on container without permission issues. - options: --user root + options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }} steps: - - uses: actions/checkout@v3 + - name: Run start commands + run: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-command }} + + - uses: actions/checkout@v4 - name: Set up JDK ${{ matrix.java }} - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: ${{ matrix.java }} @@ -46,7 +47,7 @@ jobs: - name: Upload test reports if: ${{ always() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 continue-on-error: true with: name: test-reports-${{ matrix.os }}-${{ matrix.java }} @@ -59,16 +60,16 @@ jobs: strategy: fail-fast: false matrix: - os: [ windows-latest ] - java: [ 11, 17, 21 ] + os: [ windows-latest, macos-13 ] + java: [21] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up JDK ${{ matrix.java }} - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: ${{ matrix.java }} @@ -78,7 +79,7 @@ jobs: - name: Upload test reports if: ${{ always() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 continue-on-error: true with: name: test-reports-${{ matrix.os }}-${{ matrix.java }} diff --git a/.github/workflows/maven-publish.yml b/.github/workflows/maven-publish.yml index 8adf7ae52c..0dd07404bb 100644 --- a/.github/workflows/maven-publish.yml +++ b/.github/workflows/maven-publish.yml @@ -23,7 +23,7 @@ jobs: - uses: actions/setup-java@v3 with: distribution: temurin # Temurin is a distribution of adoptium - java-version: 11 + java-version: 21 - uses: actions/checkout@v3 - uses: aws-actions/configure-aws-credentials@v1.7.0 with: diff --git a/.github/workflows/sql-pitest.yml b/.github/workflows/sql-pitest.yml index bc751daefa..fed98e4926 100644 --- a/.github/workflows/sql-pitest.yml +++ b/.github/workflows/sql-pitest.yml @@ -21,21 +21,23 @@ jobs: strategy: matrix: java: - - 11 - - 17 + - 21 runs-on: ubuntu-latest container: # using the same image which is used by opensearch-build team to build the OpenSearch Distribution # this image tag is subject to change as more dependencies and updates will arrive over time image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }} - # need to switch to root so that github actions can install runner binary on container without permission issues. - options: --user root + options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }} + steps: - - uses: actions/checkout@v3 + - name: Run start commands + run: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-command }} + + - uses: actions/checkout@v4 - name: Set up JDK ${{ matrix.java }} - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: ${{ matrix.java }} @@ -47,7 +49,7 @@ jobs: - name: Upload test reports if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: test-reports-${{ matrix.entry.java }} path: | diff --git a/.github/workflows/sql-test-and-build-workflow.yml b/.github/workflows/sql-test-and-build-workflow.yml index ded974bfcb..a13d5ccf6d 100644 --- a/.github/workflows/sql-test-and-build-workflow.yml +++ b/.github/workflows/sql-test-and-build-workflow.yml @@ -28,23 +28,22 @@ jobs: # Run all jobs fail-fast: false matrix: - java: - - 11 - - 17 - - 21 + java: [21] runs-on: ubuntu-latest container: # using the same image which is used by opensearch-build team to build the OpenSearch Distribution # this image tag is subject to change as more dependencies and updates will arrive over time image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }} - # need to switch to root so that github actions can install runner binary on container without permission issues. - options: --user root + options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }} steps: - - uses: actions/checkout@v3 + - name: Run start commands + run: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-command }} + + - uses: actions/checkout@v4 - name: Set up JDK ${{ matrix.java }} - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: ${{ matrix.java }} @@ -54,11 +53,6 @@ jobs: chown -R 1000:1000 `pwd` su `id -un 1000` -c "./gradlew --continue build" - - name: Run backward compatibility tests - run: | - chown -R 1000:1000 `pwd` - su `id -un 1000` -c "./scripts/bwctest.sh" - - name: Create Artifact Path run: | mkdir -p opensearch-sql-builds @@ -67,14 +61,14 @@ jobs: # This step uses the codecov-action Github action: https://github.com/codecov/codecov-action - name: Upload SQL Coverage Report if: ${{ always() }} - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 continue-on-error: true with: flags: sql-engine token: ${{ secrets.CODECOV_TOKEN }} - name: Upload Artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 continue-on-error: true with: name: opensearch-sql-ubuntu-latest-${{ matrix.java }} @@ -82,7 +76,7 @@ jobs: - name: Upload test reports if: ${{ always() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 continue-on-error: true with: name: test-reports-ubuntu-latest-${{ matrix.java }} @@ -105,16 +99,15 @@ jobs: fail-fast: false matrix: entry: - - { os: windows-latest, java: 11, os_build_args: -x doctest -PbuildPlatform=windows } - - { os: windows-latest, java: 17, os_build_args: -x doctest -PbuildPlatform=windows } - { os: windows-latest, java: 21, os_build_args: -x doctest -PbuildPlatform=windows } + - { os: macos-13, java: 21 } runs-on: ${{ matrix.entry.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up JDK ${{ matrix.java }} - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: ${{ matrix.entry.java }} @@ -130,14 +123,14 @@ jobs: # This step uses the codecov-action Github action: https://github.com/codecov/codecov-action - name: Upload SQL Coverage Report if: ${{ always() && matrix.entry.os == 'ubuntu-latest' }} - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 continue-on-error: true with: flags: sql-engine token: ${{ secrets.CODECOV_TOKEN }} - name: Upload Artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 continue-on-error: true with: name: opensearch-sql-${{ matrix.entry.os }}-${{ matrix.entry.java }} @@ -145,7 +138,7 @@ jobs: - name: Upload test reports if: ${{ always() && matrix.entry.os == 'ubuntu-latest' }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 continue-on-error: true with: name: test-reports-${{ matrix.entry.os }}-${{ matrix.entry.java }} @@ -161,3 +154,49 @@ jobs: plugin/build/reports/** doctest/build/testclusters/docTestCluster-0/logs/* integ-test/build/testclusters/*/logs/* + + bwc-tests: + needs: Get-CI-Image-Tag + runs-on: ubuntu-latest + strategy: + matrix: + java: [21] + container: + image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }} + options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }} + + steps: + - name: Run start commands + run: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-command }} + + - uses: actions/checkout@v4 + + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: ${{ matrix.java }} + + - name: Run backward compatibility tests + run: | + chown -R 1000:1000 `pwd` + su `id -un 1000` -c "./scripts/bwctest.sh" + + - name: Upload test reports + if: ${{ always() }} + uses: actions/upload-artifact@v4 + continue-on-error: true + with: + name: test-reports-ubuntu-latest-${{ matrix.java }}-bwc + path: | + sql/build/reports/** + ppl/build/reports/** + core/build/reports/** + common/build/reports/** + opensearch/build/reports/** + integ-test/build/reports/** + protocol/build/reports/** + legacy/build/reports/** + plugin/build/reports/** + doctest/build/testclusters/docTestCluster-0/logs/* + integ-test/build/testclusters/*/logs/* diff --git a/.github/workflows/sql-test-workflow.yml b/.github/workflows/sql-test-workflow.yml index 46c1930cc8..9cbec80037 100644 --- a/.github/workflows/sql-test-workflow.yml +++ b/.github/workflows/sql-test-workflow.yml @@ -21,21 +21,22 @@ jobs: strategy: matrix: java: - - 11 - - 17 + - 21 runs-on: ubuntu-latest container: # using the same image which is used by opensearch-build team to build the OpenSearch Distribution # this image tag is subject to change as more dependencies and updates will arrive over time image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }} - # need to switch to root so that github actions can install runner binary on container without permission issues. - options: --user root + options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }} steps: - - uses: actions/checkout@v3 + - name: Run start commands + run: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-command }} + + - uses: actions/checkout@v4 - name: Set up JDK ${{ matrix.java }} - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: ${{ matrix.java }} @@ -85,7 +86,7 @@ jobs: - name: Upload test reports if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: test-reports-${{ matrix.entry.java }} path: | diff --git a/.github/workflows/stalled.yml b/.github/workflows/stalled.yml new file mode 100644 index 0000000000..2b10140bbd --- /dev/null +++ b/.github/workflows/stalled.yml @@ -0,0 +1,28 @@ +name: Label Stalled PRs +on: + schedule: + - cron: '15 15 * * *' # Run every day at 15:15 UTC / 7:15 PST / 8:15 PDT +permissions: + pull-requests: write +jobs: + stale: + if: github.repository == 'opensearch-project/sql' + runs-on: ubuntu-latest + steps: + - name: GitHub App token + id: github_app_token + uses: tibdex/github-app-token@v2.1.0 + with: + app_id: ${{ secrets.APP_ID }} + private_key: ${{ secrets.APP_PRIVATE_KEY }} + installation_id: 22958780 + - name: Stale PRs + uses: actions/stale@v9 + with: + repo-token: ${{ steps.github_app_token.outputs.token }} + stale-pr-label: 'stalled' + stale-pr-message: 'This PR is stalled because it has been open for 30 days with no activity.' + days-before-pr-stale: 30 + days-before-issue-stale: -1 + days-before-pr-close: -1 + days-before-issue-close: -1 diff --git a/.gitignore b/.gitignore index 1b892036dd..b9775dea04 100644 --- a/.gitignore +++ b/.gitignore @@ -49,4 +49,5 @@ gen .worktrees http-client.env.json /doctest/sql-cli/ +/doctest/opensearch-job-scheduler/ .factorypath diff --git a/DEVELOPER_GUIDE.rst b/DEVELOPER_GUIDE.rst index c0d2f85668..ec00c587a6 100644 --- a/DEVELOPER_GUIDE.rst +++ b/DEVELOPER_GUIDE.rst @@ -405,7 +405,7 @@ Sample test class: Doctest >>>>>>> -Python doctest library makes our document executable which keeps it up-to-date to source code. The doc generator aforementioned served as scaffolding and generated many docs in short time. Now the examples inside is changed to doctest gradually. For more details please read `Doctest <./dev/Doctest.md>`_. +Python doctest library makes our document executable which keeps it up-to-date to source code. The doc generator aforementioned served as scaffolding and generated many docs in short time. Now the examples inside is changed to doctest gradually. For more details please read `testing-doctest <./docs/dev/testing-doctest.md>`_. Backports diff --git a/MAINTAINERS.md b/MAINTAINERS.md index 0ee07757c6..8012755450 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -5,7 +5,7 @@ This document contains a list of maintainers in this repo. See [opensearch-proje ## Current Maintainers | Maintainer | GitHub ID | Affiliation | -| ----------------- | ------------------------------------------------- | ----------- | +| ----------------- |-----------------------------------------------------| ----------- | | Eric Wei | [mengweieric](https://github.com/mengweieric) | Amazon | | Joshua Li | [joshuali925](https://github.com/joshuali925) | Amazon | | Shenoy Pratik | [ps48](https://github.com/ps48) | Amazon | @@ -16,11 +16,13 @@ This document contains a list of maintainers in this repo. See [opensearch-proje | Peter Fitzgibbons | [pjfitzgibbons](https://github.com/pjfitzgibbons) | Amazon | | Simeon Widdis | [swiddis](https://github.com/swiddis) | Amazon | | Chen Dai | [dai-chen](https://github.com/dai-chen) | Amazon | -| Vamsi Manohar | [vamsi-amazon](https://github.com/vamsi-amazon) | Amazon | +| Vamsi Manohar | [vmmusings](https://github.com/vmmusings) | Amazon | | Peng Huo | [penghuo](https://github.com/penghuo) | Amazon | | Sean Kao | [seankao-az](https://github.com/seankao-az) | Amazon | | Anirudha Jadhav | [anirudha](https://github.com/anirudha) | Amazon | | Tomoyuki Morita | [ykmr1224](https://github.com/ykmr1224) | Amazon | +| Lantao Jin | [LantaoJin](https://github.com/LantaoJin) | Amazon | +| Louis Chu | [noCharger](https://github.com/noCharger) | Amazon | | Max Ksyunz | [MaxKsyunz](https://github.com/MaxKsyunz) | Improving | | Yury Fridlyand | [Yury-Fridlyand](https://github.com/Yury-Fridlyand) | Improving | | Andrew Carbonetto | [acarbonetto](https://github.com/acarbonetto) | Improving | diff --git a/async-query-core/README.md b/async-query-core/README.md new file mode 100644 index 0000000000..08301c024d --- /dev/null +++ b/async-query-core/README.md @@ -0,0 +1,42 @@ +# async-query-core library + +This directory contains async-query-core library, which implements the core logic of async-query and provide extension points to allow plugin different implementation of data storage, etc. +`async-query` module provides implementations for OpenSearch index based implementation. + +## Type of queries +There are following types of queries, and the type is automatically identified by analysing the query. +- BatchQuery: Execute single query in Spark +- InteractiveQuery: Establish session and execute queries in single Spark session +- IndexDMLQuery: Handles DROP/ALTER/VACUUM operation for Flint indices +- RefreshQuery: One time query request to refresh(update) Flint index +- StreamingQuery: Continuously update flint index in single Spark session + +## Extension points +Following is the list of extension points where the consumer of the library needs to provide their own implementation. + +- Data store interface + - [AsyncQueryJobMetadataStorageService](src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryJobMetadataStorageService.java) + - [SessionStorageService](java/org/opensearch/sql/spark/execution/statestore/SessionStorageService.java) + - [StatementStorageService](src/main/java/org/opensearch/sql/spark/execution/statestore/StatementStorageService.java) + - [FlintIndexMetadataService](src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataService.java) + - [FlintIndexStateModelService](src/main/java/org/opensearch/sql/spark/flint/FlintIndexStateModelService.java) + - [IndexDMLResultStorageService](src/main/java/org/opensearch/sql/spark/flint/IndexDMLResultStorageService.java) +- Other + - [LeaseManager](src/main/java/org/opensearch/sql/spark/leasemanager/LeaseManager.java) + - [JobExecutionResponseReader](src/main/java/org/opensearch/sql/spark/response/JobExecutionResponseReader.java) + - [QueryIdProvider](src/main/java/org/opensearch/sql/spark/dispatcher/QueryIdProvider.java) + - [SessionIdProvider](src/main/java/org/opensearch/sql/spark/execution/session/SessionIdProvider.java) + - [SessionConfigSupplier](src/main/java/org/opensearch/sql/spark/execution/session/SessionConfigSupplier.java) + - [EMRServerlessClientFactory](src/main/java/org/opensearch/sql/spark/client/EMRServerlessClientFactory.java) + - [SparkExecutionEngineConfigSupplier](src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigSupplier.java) + - [DataSourceSparkParameterComposer](src/main/java/org/opensearch/sql/spark/parameter/DataSourceSparkParameterComposer.java) + - [GeneralSparkParameterComposer](src/main/java/org/opensearch/sql/spark/parameter/GeneralSparkParameterComposer.java) + - [SparkSubmitParameterModifier](src/main/java/org/opensearch/sql/spark/config/SparkSubmitParameterModifier.java) To be deprecated in favor of GeneralSparkParameterComposer + +## Update Grammar files +This package uses ANTLR grammar files from `opensearch-spark` and `Spark` repositories. +To update the grammar files, update `build.gradle` file (in `downloadG4Files` task) as needed and run: +``` +./gradlew async-query-core:downloadG4Files +``` +This will overwrite the files under `src/main/antlr`. \ No newline at end of file diff --git a/async-query-core/build.gradle b/async-query-core/build.gradle index 3673872988..37bf6748c9 100644 --- a/async-query-core/build.gradle +++ b/async-query-core/build.gradle @@ -21,14 +21,18 @@ tasks.register('downloadG4Files', Exec) { executable 'curl' - args '-o', 'src/main/antlr/FlintSparkSqlExtensions.g4', 'https://raw.githubusercontent.com/opensearch-project/opensearch-spark/main/flint-spark-integration/src/main/antlr4/FlintSparkSqlExtensions.g4' - args '-o', 'src/main/antlr/SparkSqlBase.g4', 'https://raw.githubusercontent.com/opensearch-project/opensearch-spark/main/flint-spark-integration/src/main/antlr4/SparkSqlBase.g4' - args '-o', 'src/main/antlr/SqlBaseParser.g4', 'https://raw.githubusercontent.com/apache/spark/master/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4' - args '-o', 'src/main/antlr/SqlBaseLexer.g4', 'https://raw.githubusercontent.com/apache/spark/master/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4' + def opensearchSparkBranch = "0.6" + def apacheSparkVersionTag = "v3.5.1" + args '-o', 'src/main/antlr/FlintSparkSqlExtensions.g4', "https://raw.githubusercontent.com/opensearch-project/opensearch-spark/${opensearchSparkBranch}/flint-spark-integration/src/main/antlr4/FlintSparkSqlExtensions.g4" + args '-o', 'src/main/antlr/SparkSqlBase.g4', "https://raw.githubusercontent.com/opensearch-project/opensearch-spark/${opensearchSparkBranch}/flint-spark-integration/src/main/antlr4/SparkSqlBase.g4" + args '-o', 'src/main/antlr/SqlBaseParser.g4', "https://raw.githubusercontent.com/apache/spark/${apacheSparkVersionTag}/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4" + args '-o', 'src/main/antlr/SqlBaseLexer.g4', "https://raw.githubusercontent.com/apache/spark/${apacheSparkVersionTag}/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4" + args '-o', 'src/main/antlr/OpenSearchPPLParser.g4', "https://raw.githubusercontent.com/opensearch-project/opensearch-spark/${opensearchSparkBranch}/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4" + args '-o', 'src/main/antlr/OpenSearchPPLLexer.g4', "https://raw.githubusercontent.com/opensearch-project/opensearch-spark/${opensearchSparkBranch}/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4" } generateGrammarSource { - arguments += ['-visitor', '-package', 'org.opensearch.sql.asyncquery.antlr.parser'] + arguments += ['-visitor', '-package', 'org.opensearch.sql.spark.antlr.parser'] source = sourceSets.main.antlr outputDirectory = file("build/generated-src/antlr/main/org/opensearch/sql/asyncquery/antlr/parser") } @@ -38,23 +42,20 @@ configurations { } } -// Make sure the downloadG4File task runs before the generateGrammarSource task -generateGrammarSource.dependsOn downloadG4Files - dependencies { antlr "org.antlr:antlr4:4.7.1" - implementation group: 'com.google.guava', name: 'guava', version: '32.0.1-jre' - implementation group: 'com.fasterxml.jackson.core', name: 'jackson-core', version: "${versions.jackson}" - implementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: "${versions.jackson_databind}" - implementation group: 'com.fasterxml.jackson.dataformat', name: 'jackson-dataformat-cbor', version: "${versions.jackson}" + implementation project(':core') + implementation project(':spark') // TODO: dependency to spark should be eliminated + implementation project(':datasources') // TODO: dependency to datasources should be eliminated + implementation 'org.json:json:20231013' implementation 'com.google.code.gson:gson:2.8.9' testImplementation(platform("org.junit:junit-bom:5.9.3")) testCompileOnly('org.junit.jupiter:junit-jupiter') - testImplementation group: 'org.mockito', name: 'mockito-core', version: '5.7.0' - testImplementation group: 'org.mockito', name: 'mockito-junit-jupiter', version: '5.7.0' + testImplementation 'org.mockito:mockito-core:5.7.0' + testImplementation 'org.mockito:mockito-junit-jupiter:5.7.0' testCompileOnly('junit:junit:4.13.1') { exclude group: 'org.hamcrest', module: 'hamcrest-core' @@ -108,7 +109,20 @@ jacocoTestCoverageVerification { violationRules { rule { element = 'CLASS' - excludes = [] + // TODO: Add unit tests in async-query-core and remove exclusions + excludes = [ + 'org.opensearch.sql.spark.asyncquery.model.*', + 'org.opensearch.sql.spark.data.constants.*', + 'org.opensearch.sql.spark.dispatcher.model.*', + 'org.opensearch.sql.spark.dispatcher.*', + 'org.opensearch.sql.spark.execution.session.*', + 'org.opensearch.sql.spark.execution.statement.*', + 'org.opensearch.sql.spark.flint.*', + 'org.opensearch.sql.spark.flint.operation.*', + 'org.opensearch.sql.spark.rest.*', + 'org.opensearch.sql.spark.utils.SQLQueryUtils.*', + 'org.opensearch.sql.spark.validator.SQLQueryValidationVisitor' + ] limit { counter = 'LINE' minimum = 1.0 diff --git a/async-query-core/src/main/antlr/.gitkeep b/async-query-core/src/main/antlr/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/spark/src/main/antlr/FlintSparkSqlExtensions.g4 b/async-query-core/src/main/antlr/FlintSparkSqlExtensions.g4 similarity index 93% rename from spark/src/main/antlr/FlintSparkSqlExtensions.g4 rename to async-query-core/src/main/antlr/FlintSparkSqlExtensions.g4 index dc097d596d..46e814e9f5 100644 --- a/spark/src/main/antlr/FlintSparkSqlExtensions.g4 +++ b/async-query-core/src/main/antlr/FlintSparkSqlExtensions.g4 @@ -156,7 +156,10 @@ indexManagementStatement ; showFlintIndexStatement - : SHOW FLINT (INDEX | INDEXES) IN catalogDb=multipartIdentifier + : SHOW FLINT (INDEX | INDEXES) + IN catalogDb=multipartIdentifier #showFlintIndex + | SHOW FLINT (INDEX | INDEXES) EXTENDED + IN catalogDb=multipartIdentifier #showFlintIndexExtended ; indexJobManagementStatement @@ -188,7 +191,7 @@ indexColTypeList ; indexColType - : identifier skipType=(PARTITION | VALUE_SET | MIN_MAX | BLOOM_FILTER) + : multipartIdentifier skipType=(PARTITION | VALUE_SET | MIN_MAX | BLOOM_FILTER) (LEFT_PAREN skipParams RIGHT_PAREN)? ; diff --git a/async-query-core/src/main/antlr/OpenSearchPPLLexer.g4 b/async-query-core/src/main/antlr/OpenSearchPPLLexer.g4 new file mode 100644 index 0000000000..cb323f7942 --- /dev/null +++ b/async-query-core/src/main/antlr/OpenSearchPPLLexer.g4 @@ -0,0 +1,496 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + + +lexer grammar OpenSearchPPLLexer; + +channels { WHITESPACE, ERRORCHANNEL } + + +// COMMAND KEYWORDS +SEARCH: 'SEARCH'; +DESCRIBE: 'DESCRIBE'; +SHOW: 'SHOW'; +FROM: 'FROM'; +WHERE: 'WHERE'; +FIELDS: 'FIELDS'; +RENAME: 'RENAME'; +STATS: 'STATS'; +EVENTSTATS: 'EVENTSTATS'; +DEDUP: 'DEDUP'; +SORT: 'SORT'; +EVAL: 'EVAL'; +HEAD: 'HEAD'; +TOP: 'TOP'; +RARE: 'RARE'; +PARSE: 'PARSE'; +METHOD: 'METHOD'; +REGEX: 'REGEX'; +PUNCT: 'PUNCT'; +GROK: 'GROK'; +PATTERN: 'PATTERN'; +PATTERNS: 'PATTERNS'; +NEW_FIELD: 'NEW_FIELD'; +KMEANS: 'KMEANS'; +AD: 'AD'; +ML: 'ML'; +FILLNULL: 'FILLNULL'; +EXPAND: 'EXPAND'; +FLATTEN: 'FLATTEN'; +TRENDLINE: 'TRENDLINE'; + +//Native JOIN KEYWORDS +JOIN: 'JOIN'; +ON: 'ON'; +INNER: 'INNER'; +OUTER: 'OUTER'; +FULL: 'FULL'; +SEMI: 'SEMI'; +ANTI: 'ANTI'; +CROSS: 'CROSS'; +LEFT_HINT: 'HINT.LEFT'; +RIGHT_HINT: 'HINT.RIGHT'; + +//CORRELATION KEYWORDS +CORRELATE: 'CORRELATE'; +SELF: 'SELF'; +EXACT: 'EXACT'; +APPROXIMATE: 'APPROXIMATE'; +SCOPE: 'SCOPE'; +MAPPING: 'MAPPING'; + +//EXPLAIN KEYWORDS +EXPLAIN: 'EXPLAIN'; +FORMATTED: 'FORMATTED'; +COST: 'COST'; +CODEGEN: 'CODEGEN'; +EXTENDED: 'EXTENDED'; +SIMPLE: 'SIMPLE'; + +// COMMAND ASSIST KEYWORDS +AS: 'AS'; +BY: 'BY'; +SOURCE: 'SOURCE'; +INDEX: 'INDEX'; +D: 'D'; +DESC: 'DESC'; +DATASOURCES: 'DATASOURCES'; +USING: 'USING'; +WITH: 'WITH'; + +// FIELD KEYWORDS +AUTO: 'AUTO'; +STR: 'STR'; +IP: 'IP'; +NUM: 'NUM'; + + +// FIELDSUMMARY keywords +FIELDSUMMARY: 'FIELDSUMMARY'; +INCLUDEFIELDS: 'INCLUDEFIELDS'; +NULLS: 'NULLS'; + +//TRENDLINE KEYWORDS +SMA: 'SMA'; +WMA: 'WMA'; + +// ARGUMENT KEYWORDS +KEEPEMPTY: 'KEEPEMPTY'; +CONSECUTIVE: 'CONSECUTIVE'; +DEDUP_SPLITVALUES: 'DEDUP_SPLITVALUES'; +PARTITIONS: 'PARTITIONS'; +ALLNUM: 'ALLNUM'; +DELIM: 'DELIM'; +CENTROIDS: 'CENTROIDS'; +ITERATIONS: 'ITERATIONS'; +DISTANCE_TYPE: 'DISTANCE_TYPE'; +NUMBER_OF_TREES: 'NUMBER_OF_TREES'; +SHINGLE_SIZE: 'SHINGLE_SIZE'; +SAMPLE_SIZE: 'SAMPLE_SIZE'; +OUTPUT_AFTER: 'OUTPUT_AFTER'; +TIME_DECAY: 'TIME_DECAY'; +ANOMALY_RATE: 'ANOMALY_RATE'; +CATEGORY_FIELD: 'CATEGORY_FIELD'; +TIME_FIELD: 'TIME_FIELD'; +TIME_ZONE: 'TIME_ZONE'; +TRAINING_DATA_SIZE: 'TRAINING_DATA_SIZE'; +ANOMALY_SCORE_THRESHOLD: 'ANOMALY_SCORE_THRESHOLD'; +APPEND: 'APPEND'; + +// COMPARISON FUNCTION KEYWORDS +CASE: 'CASE'; +ELSE: 'ELSE'; +IN: 'IN'; +EXISTS: 'EXISTS'; + +// LOGICAL KEYWORDS +NOT: 'NOT'; +OR: 'OR'; +AND: 'AND'; +XOR: 'XOR'; +TRUE: 'TRUE'; +FALSE: 'FALSE'; +REGEXP: 'REGEXP'; + +// DATETIME, INTERVAL AND UNIT KEYWORDS +CONVERT_TZ: 'CONVERT_TZ'; +DATETIME: 'DATETIME'; +DAY: 'DAY'; +DAY_HOUR: 'DAY_HOUR'; +DAY_MICROSECOND: 'DAY_MICROSECOND'; +DAY_MINUTE: 'DAY_MINUTE'; +DAY_OF_YEAR: 'DAY_OF_YEAR'; +DAY_SECOND: 'DAY_SECOND'; +HOUR: 'HOUR'; +HOUR_MICROSECOND: 'HOUR_MICROSECOND'; +HOUR_MINUTE: 'HOUR_MINUTE'; +HOUR_OF_DAY: 'HOUR_OF_DAY'; +HOUR_SECOND: 'HOUR_SECOND'; +INTERVAL: 'INTERVAL'; +MICROSECOND: 'MICROSECOND'; +MILLISECOND: 'MILLISECOND'; +MINUTE: 'MINUTE'; +MINUTE_MICROSECOND: 'MINUTE_MICROSECOND'; +MINUTE_OF_DAY: 'MINUTE_OF_DAY'; +MINUTE_OF_HOUR: 'MINUTE_OF_HOUR'; +MINUTE_SECOND: 'MINUTE_SECOND'; +MONTH: 'MONTH'; +MONTH_OF_YEAR: 'MONTH_OF_YEAR'; +QUARTER: 'QUARTER'; +SECOND: 'SECOND'; +SECOND_MICROSECOND: 'SECOND_MICROSECOND'; +SECOND_OF_MINUTE: 'SECOND_OF_MINUTE'; +WEEK: 'WEEK'; +WEEK_OF_YEAR: 'WEEK_OF_YEAR'; +YEAR: 'YEAR'; +YEAR_MONTH: 'YEAR_MONTH'; + +// DATASET TYPES +DATAMODEL: 'DATAMODEL'; +LOOKUP: 'LOOKUP'; +SAVEDSEARCH: 'SAVEDSEARCH'; + +// CONVERTED DATA TYPES +INT: 'INT'; +INTEGER: 'INTEGER'; +DOUBLE: 'DOUBLE'; +LONG: 'LONG'; +FLOAT: 'FLOAT'; +STRING: 'STRING'; +BOOLEAN: 'BOOLEAN'; + +// SPECIAL CHARACTERS AND OPERATORS +PIPE: '|'; +COMMA: ','; +DOT: '.'; +EQUAL: '='; +GREATER: '>'; +LESS: '<'; +NOT_GREATER: '<' '='; +NOT_LESS: '>' '='; +NOT_EQUAL: '!' '='; +PLUS: '+'; +MINUS: '-'; +STAR: '*'; +DIVIDE: '/'; +MODULE: '%'; +EXCLAMATION_SYMBOL: '!'; +COLON: ':'; +LT_PRTHS: '('; +RT_PRTHS: ')'; +LT_SQR_PRTHS: '['; +RT_SQR_PRTHS: ']'; +SINGLE_QUOTE: '\''; +DOUBLE_QUOTE: '"'; +BACKTICK: '`'; +ARROW: '->'; + +// Operators. Bit + +BIT_NOT_OP: '~'; +BIT_AND_OP: '&'; +BIT_XOR_OP: '^'; + +// AGGREGATIONS +AVG: 'AVG'; +COUNT: 'COUNT'; +DISTINCT_COUNT: 'DISTINCT_COUNT'; +ESTDC: 'ESTDC'; +ESTDC_ERROR: 'ESTDC_ERROR'; +MAX: 'MAX'; +MEAN: 'MEAN'; +MEDIAN: 'MEDIAN'; +MIN: 'MIN'; +MODE: 'MODE'; +RANGE: 'RANGE'; +STDEV: 'STDEV'; +STDEVP: 'STDEVP'; +SUM: 'SUM'; +SUMSQ: 'SUMSQ'; +VAR_SAMP: 'VAR_SAMP'; +VAR_POP: 'VAR_POP'; +STDDEV_SAMP: 'STDDEV_SAMP'; +STDDEV_POP: 'STDDEV_POP'; +PERCENTILE: 'PERCENTILE'; +PERCENTILE_APPROX: 'PERCENTILE_APPROX'; +TAKE: 'TAKE'; +FIRST: 'FIRST'; +LAST: 'LAST'; +LIST: 'LIST'; +VALUES: 'VALUES'; +EARLIEST: 'EARLIEST'; +EARLIEST_TIME: 'EARLIEST_TIME'; +LATEST: 'LATEST'; +LATEST_TIME: 'LATEST_TIME'; +PER_DAY: 'PER_DAY'; +PER_HOUR: 'PER_HOUR'; +PER_MINUTE: 'PER_MINUTE'; +PER_SECOND: 'PER_SECOND'; +RATE: 'RATE'; +SPARKLINE: 'SPARKLINE'; +C: 'C'; +DC: 'DC'; + +// BASIC FUNCTIONS +ABS: 'ABS'; +CBRT: 'CBRT'; +CEIL: 'CEIL'; +CEILING: 'CEILING'; +CONV: 'CONV'; +CRC32: 'CRC32'; +E: 'E'; +EXP: 'EXP'; +FLOOR: 'FLOOR'; +LN: 'LN'; +LOG: 'LOG'; +LOG10: 'LOG10'; +LOG2: 'LOG2'; +MOD: 'MOD'; +PI: 'PI'; +POSITION: 'POSITION'; +POW: 'POW'; +POWER: 'POWER'; +RAND: 'RAND'; +ROUND: 'ROUND'; +SIGN: 'SIGN'; +SIGNUM: 'SIGNUM'; +SQRT: 'SQRT'; +TRUNCATE: 'TRUNCATE'; + +// TRIGONOMETRIC FUNCTIONS +ACOS: 'ACOS'; +ASIN: 'ASIN'; +ATAN: 'ATAN'; +ATAN2: 'ATAN2'; +COS: 'COS'; +COT: 'COT'; +DEGREES: 'DEGREES'; +RADIANS: 'RADIANS'; +SIN: 'SIN'; +TAN: 'TAN'; + +// CRYPTOGRAPHIC FUNCTIONS +MD5: 'MD5'; +SHA1: 'SHA1'; +SHA2: 'SHA2'; + +// DATE AND TIME FUNCTIONS +ADDDATE: 'ADDDATE'; +ADDTIME: 'ADDTIME'; +CURDATE: 'CURDATE'; +CURRENT_DATE: 'CURRENT_DATE'; +CURRENT_TIME: 'CURRENT_TIME'; +CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'; +CURRENT_TIMEZONE: 'CURRENT_TIMEZONE'; +CURTIME: 'CURTIME'; +DATE: 'DATE'; +DATEDIFF: 'DATEDIFF'; +DATE_ADD: 'DATE_ADD'; +DATE_FORMAT: 'DATE_FORMAT'; +DATE_SUB: 'DATE_SUB'; +DAYNAME: 'DAYNAME'; +DAYOFMONTH: 'DAYOFMONTH'; +DAYOFWEEK: 'DAYOFWEEK'; +DAYOFYEAR: 'DAYOFYEAR'; +DAY_OF_MONTH: 'DAY_OF_MONTH'; +DAY_OF_WEEK: 'DAY_OF_WEEK'; +DURATION: 'DURATION'; +EXTRACT: 'EXTRACT'; +FROM_DAYS: 'FROM_DAYS'; +FROM_UNIXTIME: 'FROM_UNIXTIME'; +GET_FORMAT: 'GET_FORMAT'; +LAST_DAY: 'LAST_DAY'; +LOCALTIME: 'LOCALTIME'; +LOCALTIMESTAMP: 'LOCALTIMESTAMP'; +MAKEDATE: 'MAKEDATE'; +MAKE_DATE: 'MAKE_DATE'; +MAKETIME: 'MAKETIME'; +MONTHNAME: 'MONTHNAME'; +NOW: 'NOW'; +PERIOD_ADD: 'PERIOD_ADD'; +PERIOD_DIFF: 'PERIOD_DIFF'; +SEC_TO_TIME: 'SEC_TO_TIME'; +STR_TO_DATE: 'STR_TO_DATE'; +SUBDATE: 'SUBDATE'; +SUBTIME: 'SUBTIME'; +SYSDATE: 'SYSDATE'; +TIME: 'TIME'; +TIMEDIFF: 'TIMEDIFF'; +TIMESTAMP: 'TIMESTAMP'; +TIMESTAMPADD: 'TIMESTAMPADD'; +TIMESTAMPDIFF: 'TIMESTAMPDIFF'; +TIME_FORMAT: 'TIME_FORMAT'; +TIME_TO_SEC: 'TIME_TO_SEC'; +TO_DAYS: 'TO_DAYS'; +TO_SECONDS: 'TO_SECONDS'; +UNIX_TIMESTAMP: 'UNIX_TIMESTAMP'; +UTC_DATE: 'UTC_DATE'; +UTC_TIME: 'UTC_TIME'; +UTC_TIMESTAMP: 'UTC_TIMESTAMP'; +WEEKDAY: 'WEEKDAY'; +YEARWEEK: 'YEARWEEK'; + +// TEXT FUNCTIONS +SUBSTR: 'SUBSTR'; +SUBSTRING: 'SUBSTRING'; +LTRIM: 'LTRIM'; +RTRIM: 'RTRIM'; +TRIM: 'TRIM'; +TO: 'TO'; +LOWER: 'LOWER'; +UPPER: 'UPPER'; +CONCAT: 'CONCAT'; +CONCAT_WS: 'CONCAT_WS'; +LENGTH: 'LENGTH'; +STRCMP: 'STRCMP'; +RIGHT: 'RIGHT'; +LEFT: 'LEFT'; +ASCII: 'ASCII'; +LOCATE: 'LOCATE'; +REPLACE: 'REPLACE'; +REVERSE: 'REVERSE'; +CAST: 'CAST'; +ISEMPTY: 'ISEMPTY'; +ISBLANK: 'ISBLANK'; + +// JSON TEXT FUNCTIONS +JSON: 'JSON'; +JSON_OBJECT: 'JSON_OBJECT'; +JSON_ARRAY: 'JSON_ARRAY'; +JSON_ARRAY_LENGTH: 'JSON_ARRAY_LENGTH'; +TO_JSON_STRING: 'TO_JSON_STRING'; +JSON_EXTRACT: 'JSON_EXTRACT'; +JSON_KEYS: 'JSON_KEYS'; +JSON_VALID: 'JSON_VALID'; +//JSON_APPEND: 'JSON_APPEND'; +//JSON_DELETE: 'JSON_DELETE'; +//JSON_EXTEND: 'JSON_EXTEND'; +//JSON_SET: 'JSON_SET'; +//JSON_ARRAY_ALL_MATCH: 'JSON_ARRAY_ALL_MATCH'; +//JSON_ARRAY_ANY_MATCH: 'JSON_ARRAY_ANY_MATCH'; +//JSON_ARRAY_FILTER: 'JSON_ARRAY_FILTER'; +//JSON_ARRAY_MAP: 'JSON_ARRAY_MAP'; +//JSON_ARRAY_REDUCE: 'JSON_ARRAY_REDUCE'; + +// COLLECTION FUNCTIONS +ARRAY: 'ARRAY'; +ARRAY_LENGTH: 'ARRAY_LENGTH'; + +// LAMBDA FUNCTIONS +//EXISTS: 'EXISTS'; +FORALL: 'FORALL'; +FILTER: 'FILTER'; +TRANSFORM: 'TRANSFORM'; +REDUCE: 'REDUCE'; + +// BOOL FUNCTIONS +LIKE: 'LIKE'; +ISNULL: 'ISNULL'; +ISNOTNULL: 'ISNOTNULL'; +ISPRESENT: 'ISPRESENT'; +BETWEEN: 'BETWEEN'; +CIDRMATCH: 'CIDRMATCH'; + +// FLOWCONTROL FUNCTIONS +IFNULL: 'IFNULL'; +NULLIF: 'NULLIF'; +IF: 'IF'; +TYPEOF: 'TYPEOF'; + +//OTHER CONDITIONAL EXPRESSIONS +COALESCE: 'COALESCE'; + +// RELEVANCE FUNCTIONS AND PARAMETERS +MATCH: 'MATCH'; +MATCH_PHRASE: 'MATCH_PHRASE'; +MATCH_PHRASE_PREFIX: 'MATCH_PHRASE_PREFIX'; +MATCH_BOOL_PREFIX: 'MATCH_BOOL_PREFIX'; +SIMPLE_QUERY_STRING: 'SIMPLE_QUERY_STRING'; +MULTI_MATCH: 'MULTI_MATCH'; +QUERY_STRING: 'QUERY_STRING'; + +ALLOW_LEADING_WILDCARD: 'ALLOW_LEADING_WILDCARD'; +ANALYZE_WILDCARD: 'ANALYZE_WILDCARD'; +ANALYZER: 'ANALYZER'; +AUTO_GENERATE_SYNONYMS_PHRASE_QUERY:'AUTO_GENERATE_SYNONYMS_PHRASE_QUERY'; +BOOST: 'BOOST'; +CUTOFF_FREQUENCY: 'CUTOFF_FREQUENCY'; +DEFAULT_FIELD: 'DEFAULT_FIELD'; +DEFAULT_OPERATOR: 'DEFAULT_OPERATOR'; +ENABLE_POSITION_INCREMENTS: 'ENABLE_POSITION_INCREMENTS'; +ESCAPE: 'ESCAPE'; +FLAGS: 'FLAGS'; +FUZZY_MAX_EXPANSIONS: 'FUZZY_MAX_EXPANSIONS'; +FUZZY_PREFIX_LENGTH: 'FUZZY_PREFIX_LENGTH'; +FUZZY_TRANSPOSITIONS: 'FUZZY_TRANSPOSITIONS'; +FUZZY_REWRITE: 'FUZZY_REWRITE'; +FUZZINESS: 'FUZZINESS'; +LENIENT: 'LENIENT'; +LOW_FREQ_OPERATOR: 'LOW_FREQ_OPERATOR'; +MAX_DETERMINIZED_STATES: 'MAX_DETERMINIZED_STATES'; +MAX_EXPANSIONS: 'MAX_EXPANSIONS'; +MINIMUM_SHOULD_MATCH: 'MINIMUM_SHOULD_MATCH'; +OPERATOR: 'OPERATOR'; +PHRASE_SLOP: 'PHRASE_SLOP'; +PREFIX_LENGTH: 'PREFIX_LENGTH'; +QUOTE_ANALYZER: 'QUOTE_ANALYZER'; +QUOTE_FIELD_SUFFIX: 'QUOTE_FIELD_SUFFIX'; +REWRITE: 'REWRITE'; +SLOP: 'SLOP'; +TIE_BREAKER: 'TIE_BREAKER'; +TYPE: 'TYPE'; +ZERO_TERMS_QUERY: 'ZERO_TERMS_QUERY'; + +// SPAN KEYWORDS +SPAN: 'SPAN'; +MS: 'MS'; +S: 'S'; +M: 'M'; +H: 'H'; +W: 'W'; +Q: 'Q'; +Y: 'Y'; + + +// LITERALS AND VALUES +//STRING_LITERAL: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING; +ID: ID_LITERAL; +CLUSTER: CLUSTER_PREFIX_LITERAL; +INTEGER_LITERAL: DEC_DIGIT+; +DECIMAL_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+; + +fragment DATE_SUFFIX: ([\-.][*0-9]+)+; +fragment ID_LITERAL: [@*A-Z]+?[*A-Z_\-0-9]*; +fragment CLUSTER_PREFIX_LITERAL: [*A-Z]+?[*A-Z_\-0-9]* COLON; +ID_DATE_SUFFIX: CLUSTER_PREFIX_LITERAL? ID_LITERAL DATE_SUFFIX; +DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"'; +SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\''; +BQUOTA_STRING: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`'; +fragment DEC_DIGIT: [0-9]; + +LINE_COMMENT: '//' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN); +BLOCK_COMMENT: '/*' .*? '*/' -> channel(HIDDEN); + +ERROR_RECOGNITION: . -> channel(ERRORCHANNEL); diff --git a/async-query-core/src/main/antlr/OpenSearchPPLParser.g4 b/async-query-core/src/main/antlr/OpenSearchPPLParser.g4 new file mode 100644 index 0000000000..133cf64be5 --- /dev/null +++ b/async-query-core/src/main/antlr/OpenSearchPPLParser.g4 @@ -0,0 +1,1174 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +parser grammar OpenSearchPPLParser; + + +options { tokenVocab = OpenSearchPPLLexer; } +root + : pplStatement? EOF + ; + +// statement +pplStatement + : dmlStatement + ; + +dmlStatement + : (explainCommand PIPE)? queryStatement + ; + +queryStatement + : pplCommands (PIPE commands)* + ; + +subSearch + : searchCommand (PIPE commands)* + ; + +// commands +pplCommands + : searchCommand + | describeCommand + ; + +commands + : whereCommand + | correlateCommand + | joinCommand + | fieldsCommand + | statsCommand + | dedupCommand + | sortCommand + | headCommand + | topCommand + | rareCommand + | evalCommand + | grokCommand + | parseCommand + | patternsCommand + | lookupCommand + | renameCommand + | fillnullCommand + | fieldsummaryCommand + | flattenCommand + | expandCommand + | trendlineCommand + ; + +commandName + : SEARCH + | DESCRIBE + | SHOW + | AD + | ML + | KMEANS + | WHERE + | CORRELATE + | JOIN + | FIELDS + | STATS + | EVENTSTATS + | DEDUP + | EXPLAIN + | SORT + | HEAD + | TOP + | RARE + | EVAL + | GROK + | PARSE + | PATTERNS + | LOOKUP + | RENAME + | EXPAND + | FILLNULL + | FIELDSUMMARY + | FLATTEN + | TRENDLINE + ; + +searchCommand + : (SEARCH)? fromClause # searchFrom + | (SEARCH)? fromClause logicalExpression # searchFromFilter + | (SEARCH)? logicalExpression fromClause # searchFilterFrom + ; + +fieldsummaryCommand + : FIELDSUMMARY (fieldsummaryParameter)* + ; + +fieldsummaryParameter + : INCLUDEFIELDS EQUAL fieldList # fieldsummaryIncludeFields + | NULLS EQUAL booleanLiteral # fieldsummaryNulls + ; + +describeCommand + : DESCRIBE tableSourceClause + ; + +explainCommand + : EXPLAIN explainMode + ; + +explainMode + : FORMATTED + | COST + | CODEGEN + | EXTENDED + | SIMPLE + ; + +showDataSourcesCommand + : SHOW DATASOURCES + ; + +whereCommand + : WHERE logicalExpression + ; + +correlateCommand + : CORRELATE correlationType FIELDS LT_PRTHS fieldList RT_PRTHS (scopeClause)? mappingList + ; + +correlationType + : SELF + | EXACT + | APPROXIMATE + ; + +scopeClause + : SCOPE LT_PRTHS fieldExpression COMMA value = literalValue (unit = timespanUnit)? RT_PRTHS + ; + +mappingList + : MAPPING LT_PRTHS ( mappingClause (COMMA mappingClause)* ) RT_PRTHS + ; + +mappingClause + : left = qualifiedName comparisonOperator right = qualifiedName # mappingCompareExpr + ; + +fieldsCommand + : FIELDS (PLUS | MINUS)? fieldList + ; + +renameCommand + : RENAME renameClasue (COMMA renameClasue)* + ; + +statsCommand + : (STATS | EVENTSTATS) (PARTITIONS EQUAL partitions = integerLiteral)? (ALLNUM EQUAL allnum = booleanLiteral)? (DELIM EQUAL delim = stringLiteral)? statsAggTerm (COMMA statsAggTerm)* (statsByClause)? (DEDUP_SPLITVALUES EQUAL dedupsplit = booleanLiteral)? + ; + +dedupCommand + : DEDUP (number = integerLiteral)? fieldList (KEEPEMPTY EQUAL keepempty = booleanLiteral)? (CONSECUTIVE EQUAL consecutive = booleanLiteral)? + ; + +sortCommand + : SORT sortbyClause + ; + +evalCommand + : EVAL evalClause (COMMA evalClause)* + ; + +headCommand + : HEAD (number = integerLiteral)? (FROM from = integerLiteral)? + ; + +topCommand + : TOP (number = integerLiteral)? fieldList (byClause)? + ; + +rareCommand + : RARE fieldList (byClause)? + ; + +grokCommand + : GROK (source_field = expression) (pattern = stringLiteral) + ; + +parseCommand + : PARSE (source_field = expression) (pattern = stringLiteral) + ; + +patternsCommand + : PATTERNS (patternsParameter)* (source_field = expression) + ; + +patternsParameter + : (NEW_FIELD EQUAL new_field = stringLiteral) + | (PATTERN EQUAL pattern = stringLiteral) + ; + +patternsMethod + : PUNCT + | REGEX + ; + +// lookup +lookupCommand + : LOOKUP tableSource lookupMappingList ((APPEND | REPLACE) outputCandidateList)? + ; + +lookupMappingList + : lookupPair (COMMA lookupPair)* + ; + +outputCandidateList + : lookupPair (COMMA lookupPair)* + ; + + // The lookup pair will generate a K-V pair. + // The format is Key -> Alias(outputFieldName, inputField), Value -> outputField. For example: + // 1. When lookupPair is "name AS cName", the key will be Alias(cName, Field(name)), the value will be Field(cName) + // 2. When lookupPair is "dept", the key is Alias(dept, Field(dept)), value is Field(dept) +lookupPair + : inputField = fieldExpression (AS outputField = fieldExpression)? + ; + +fillnullCommand + : FILLNULL (fillNullWithTheSameValue + | fillNullWithFieldVariousValues) + ; + + fillNullWithTheSameValue + : WITH nullReplacement IN nullableField (COMMA nullableField)* + ; + + fillNullWithFieldVariousValues + : USING nullableField EQUAL nullReplacement (COMMA nullableField EQUAL nullReplacement)* + ; + + + nullableField + : fieldExpression + ; + + nullReplacement + : expression + ; + +expandCommand + : EXPAND fieldExpression (AS alias = qualifiedName)? + ; + +flattenCommand + : FLATTEN fieldExpression (AS alias = identifierSeq)? + ; + +trendlineCommand + : TRENDLINE (SORT sortField)? trendlineClause (trendlineClause)* + ; + +trendlineClause + : trendlineType LT_PRTHS numberOfDataPoints = INTEGER_LITERAL COMMA field = fieldExpression RT_PRTHS (AS alias = qualifiedName)? + ; + +trendlineType + : SMA + | WMA + ; + +kmeansCommand + : KMEANS (kmeansParameter)* + ; + +kmeansParameter + : (CENTROIDS EQUAL centroids = integerLiteral) + | (ITERATIONS EQUAL iterations = integerLiteral) + | (DISTANCE_TYPE EQUAL distance_type = stringLiteral) + ; + +adCommand + : AD (adParameter)* + ; + +adParameter + : (NUMBER_OF_TREES EQUAL number_of_trees = integerLiteral) + | (SHINGLE_SIZE EQUAL shingle_size = integerLiteral) + | (SAMPLE_SIZE EQUAL sample_size = integerLiteral) + | (OUTPUT_AFTER EQUAL output_after = integerLiteral) + | (TIME_DECAY EQUAL time_decay = decimalLiteral) + | (ANOMALY_RATE EQUAL anomaly_rate = decimalLiteral) + | (CATEGORY_FIELD EQUAL category_field = stringLiteral) + | (TIME_FIELD EQUAL time_field = stringLiteral) + | (DATE_FORMAT EQUAL date_format = stringLiteral) + | (TIME_ZONE EQUAL time_zone = stringLiteral) + | (TRAINING_DATA_SIZE EQUAL training_data_size = integerLiteral) + | (ANOMALY_SCORE_THRESHOLD EQUAL anomaly_score_threshold = decimalLiteral) + ; + +mlCommand + : ML (mlArg)* + ; + +mlArg + : (argName = ident EQUAL argValue = literalValue) + ; + +// clauses +fromClause + : SOURCE EQUAL tableOrSubqueryClause + | INDEX EQUAL tableOrSubqueryClause + ; + +tableOrSubqueryClause + : LT_SQR_PRTHS subSearch RT_SQR_PRTHS (AS alias = qualifiedName)? + | tableSourceClause + ; + +// One tableSourceClause will generate one Relation node with/without one alias +// even if the relation contains more than one table sources. +// These table sources in one relation will be readed one by one in OpenSearch. +// But it may have different behaivours in different execution backends. +// For example, a Spark UnresovledRelation node only accepts one data source. +tableSourceClause + : tableSource (COMMA tableSource)* (AS alias = qualifiedName)? + ; + +// join +joinCommand + : (joinType) JOIN sideAlias joinHintList? joinCriteria? right = tableOrSubqueryClause + ; + +joinType + : INNER? + | CROSS + | LEFT OUTER? + | RIGHT OUTER? + | FULL OUTER? + | LEFT? SEMI + | LEFT? ANTI + ; + +sideAlias + : (LEFT EQUAL leftAlias = ident)? COMMA? (RIGHT EQUAL rightAlias = ident)? + ; + +joinCriteria + : ON logicalExpression + ; + +joinHintList + : hintPair (COMMA? hintPair)* + ; + +hintPair + : leftHintKey = LEFT_HINT DOT ID EQUAL leftHintValue = ident #leftHint + | rightHintKey = RIGHT_HINT DOT ID EQUAL rightHintValue = ident #rightHint + ; + +renameClasue + : orignalField = wcFieldExpression AS renamedField = wcFieldExpression + ; + +byClause + : BY fieldList + ; + +statsByClause + : BY fieldList + | BY bySpanClause + | BY bySpanClause COMMA fieldList + ; + +bySpanClause + : spanClause (AS alias = qualifiedName)? + ; + +spanClause + : SPAN LT_PRTHS fieldExpression COMMA value = literalValue (unit = timespanUnit)? RT_PRTHS + ; + +sortbyClause + : sortField (COMMA sortField)* + ; + +evalClause + : fieldExpression EQUAL expression + ; + +// aggregation terms +statsAggTerm + : statsFunction (AS alias = wcFieldExpression)? + ; + +// aggregation functions +statsFunction + : statsFunctionName LT_PRTHS valueExpression RT_PRTHS # statsFunctionCall + | COUNT LT_PRTHS RT_PRTHS # countAllFunctionCall + | (DISTINCT_COUNT | DC) LT_PRTHS valueExpression RT_PRTHS # distinctCountFunctionCall + | percentileFunctionName = (PERCENTILE | PERCENTILE_APPROX) LT_PRTHS valueExpression COMMA percent = integerLiteral RT_PRTHS # percentileFunctionCall + ; + +statsFunctionName + : AVG + | COUNT + | SUM + | MIN + | MAX + | STDDEV_SAMP + | STDDEV_POP + ; + +// expressions +expression + : logicalExpression + | valueExpression + ; + +logicalExpression + : NOT logicalExpression # logicalNot + | LT_PRTHS logicalExpression RT_PRTHS # parentheticLogicalExpr + | comparisonExpression # comparsion + | left = logicalExpression (AND)? right = logicalExpression # logicalAnd + | left = logicalExpression OR right = logicalExpression # logicalOr + | left = logicalExpression XOR right = logicalExpression # logicalXor + | booleanExpression # booleanExpr + ; + +comparisonExpression + : left = valueExpression comparisonOperator right = valueExpression # compareExpr + | valueExpression NOT? IN valueList # inExpr + | expr1 = functionArg NOT? BETWEEN expr2 = functionArg AND expr3 = functionArg # between + ; + +valueExpressionList + : valueExpression + | LT_PRTHS valueExpression (COMMA valueExpression)* RT_PRTHS + ; + +valueExpression + : left = valueExpression binaryOperator = (STAR | DIVIDE | MODULE) right = valueExpression # binaryArithmetic + | left = valueExpression binaryOperator = (PLUS | MINUS) right = valueExpression # binaryArithmetic + | primaryExpression # valueExpressionDefault + | positionFunction # positionFunctionCall + | caseFunction # caseExpr + | timestampFunction # timestampFunctionCall + | LT_PRTHS valueExpression RT_PRTHS # parentheticValueExpr + | LT_SQR_PRTHS subSearch RT_SQR_PRTHS # scalarSubqueryExpr + | ident ARROW expression # lambda + | LT_PRTHS ident (COMMA ident)+ RT_PRTHS ARROW expression # lambda + ; + +primaryExpression + : evalFunctionCall + | fieldExpression + | literalValue + ; + +positionFunction + : positionFunctionName LT_PRTHS functionArg IN functionArg RT_PRTHS + ; + +booleanExpression + : booleanFunctionCall # booleanFunctionCallExpr + | isEmptyExpression # isEmptyExpr + | valueExpressionList NOT? IN LT_SQR_PRTHS subSearch RT_SQR_PRTHS # inSubqueryExpr + | EXISTS LT_SQR_PRTHS subSearch RT_SQR_PRTHS # existsSubqueryExpr + | cidrMatchFunctionCall # cidrFunctionCallExpr + ; + + isEmptyExpression + : (ISEMPTY | ISBLANK) LT_PRTHS functionArg RT_PRTHS + ; + + caseFunction + : CASE LT_PRTHS logicalExpression COMMA valueExpression (COMMA logicalExpression COMMA valueExpression)* (ELSE valueExpression)? RT_PRTHS + ; + +relevanceExpression + : singleFieldRelevanceFunction + | multiFieldRelevanceFunction + ; + +// Field is a single column +singleFieldRelevanceFunction + : singleFieldRelevanceFunctionName LT_PRTHS field = relevanceField COMMA query = relevanceQuery (COMMA relevanceArg)* RT_PRTHS + ; + +// Field is a list of columns +multiFieldRelevanceFunction + : multiFieldRelevanceFunctionName LT_PRTHS LT_SQR_PRTHS field = relevanceFieldAndWeight (COMMA field = relevanceFieldAndWeight)* RT_SQR_PRTHS COMMA query = relevanceQuery (COMMA relevanceArg)* RT_PRTHS + ; + +// tables +tableSource + : tableQualifiedName + | ID_DATE_SUFFIX + ; + +tableFunction + : qualifiedName LT_PRTHS functionArgs RT_PRTHS + ; + +// fields +fieldList + : fieldExpression (COMMA fieldExpression)* + ; + +wcFieldList + : wcFieldExpression (COMMA wcFieldExpression)* + ; + +sortField + : (PLUS | MINUS)? sortFieldExpression + ; + +sortFieldExpression + : fieldExpression + | AUTO LT_PRTHS fieldExpression RT_PRTHS + | STR LT_PRTHS fieldExpression RT_PRTHS + | IP LT_PRTHS fieldExpression RT_PRTHS + | NUM LT_PRTHS fieldExpression RT_PRTHS + ; + +fieldExpression + : qualifiedName + ; + +wcFieldExpression + : wcQualifiedName + ; + +// functions +evalFunctionCall + : evalFunctionName LT_PRTHS functionArgs RT_PRTHS + ; + +// cast function +dataTypeFunctionCall + : CAST LT_PRTHS expression AS convertedDataType RT_PRTHS + ; + +// boolean functions +booleanFunctionCall + : conditionFunctionBase LT_PRTHS functionArgs RT_PRTHS + ; + +cidrMatchFunctionCall + : CIDRMATCH LT_PRTHS ipAddress = functionArg COMMA cidrBlock = functionArg RT_PRTHS + ; + +convertedDataType + : typeName = DATE + | typeName = TIME + | typeName = TIMESTAMP + | typeName = INT + | typeName = INTEGER + | typeName = DOUBLE + | typeName = LONG + | typeName = FLOAT + | typeName = STRING + | typeName = BOOLEAN + ; + +evalFunctionName + : mathematicalFunctionName + | dateTimeFunctionName + | textFunctionName + | conditionFunctionBase + | systemFunctionName + | positionFunctionName + | coalesceFunctionName + | cryptographicFunctionName + | jsonFunctionName + | collectionFunctionName + | lambdaFunctionName + ; + +functionArgs + : (functionArg (COMMA functionArg)*)? + ; + +functionArg + : (ident EQUAL)? valueExpression + ; + +relevanceArg + : relevanceArgName EQUAL relevanceArgValue + ; + +relevanceArgName + : ALLOW_LEADING_WILDCARD + | ANALYZER + | ANALYZE_WILDCARD + | AUTO_GENERATE_SYNONYMS_PHRASE_QUERY + | BOOST + | CUTOFF_FREQUENCY + | DEFAULT_FIELD + | DEFAULT_OPERATOR + | ENABLE_POSITION_INCREMENTS + | ESCAPE + | FIELDS + | FLAGS + | FUZZINESS + | FUZZY_MAX_EXPANSIONS + | FUZZY_PREFIX_LENGTH + | FUZZY_REWRITE + | FUZZY_TRANSPOSITIONS + | LENIENT + | LOW_FREQ_OPERATOR + | MAX_DETERMINIZED_STATES + | MAX_EXPANSIONS + | MINIMUM_SHOULD_MATCH + | OPERATOR + | PHRASE_SLOP + | PREFIX_LENGTH + | QUOTE_ANALYZER + | QUOTE_FIELD_SUFFIX + | REWRITE + | SLOP + | TIE_BREAKER + | TIME_ZONE + | TYPE + | ZERO_TERMS_QUERY + ; + +relevanceFieldAndWeight + : field = relevanceField + | field = relevanceField weight = relevanceFieldWeight + | field = relevanceField BIT_XOR_OP weight = relevanceFieldWeight + ; + +relevanceFieldWeight + : integerLiteral + | decimalLiteral + ; + +relevanceField + : qualifiedName + | stringLiteral + ; + +relevanceQuery + : relevanceArgValue + ; + +relevanceArgValue + : qualifiedName + | literalValue + ; + +mathematicalFunctionName + : ABS + | CBRT + | CEIL + | CEILING + | CONV + | CRC32 + | E + | EXP + | FLOOR + | LN + | LOG + | LOG10 + | LOG2 + | MOD + | PI + | POW + | POWER + | RAND + | ROUND + | SIGN + | SIGNUM + | SQRT + | TRUNCATE + | trigonometricFunctionName + ; + +trigonometricFunctionName + : ACOS + | ASIN + | ATAN + | ATAN2 + | COS + | COT + | DEGREES + | RADIANS + | SIN + | TAN + ; + +cryptographicFunctionName + : MD5 + | SHA1 + | SHA2 + ; + +dateTimeFunctionName + : ADDDATE + | ADDTIME + | CONVERT_TZ + | CURDATE + | CURRENT_DATE + | CURRENT_TIME + | CURRENT_TIMESTAMP + | CURRENT_TIMEZONE + | CURTIME + | DATE + | DATEDIFF + | DATETIME + | DATE_ADD + | DATE_FORMAT + | DATE_SUB + | DAY + | DAYNAME + | DAYOFMONTH + | DAYOFWEEK + | DAYOFYEAR + | DAY_OF_MONTH + | DAY_OF_WEEK + | DAY_OF_YEAR + | FROM_DAYS + | FROM_UNIXTIME + | HOUR + | HOUR_OF_DAY + | LAST_DAY + | LOCALTIME + | LOCALTIMESTAMP + | MAKEDATE + | MAKE_DATE + | MAKETIME + | MICROSECOND + | MINUTE + | MINUTE_OF_DAY + | MINUTE_OF_HOUR + | MONTH + | MONTHNAME + | MONTH_OF_YEAR + | NOW + | PERIOD_ADD + | PERIOD_DIFF + | QUARTER + | SECOND + | SECOND_OF_MINUTE + | SEC_TO_TIME + | STR_TO_DATE + | SUBDATE + | SUBTIME + | SYSDATE + | TIME + | TIMEDIFF + | TIMESTAMP + | TIME_FORMAT + | TIME_TO_SEC + | TO_DAYS + | TO_SECONDS + | UNIX_TIMESTAMP + | UTC_DATE + | UTC_TIME + | UTC_TIMESTAMP + | WEEK + | WEEKDAY + | WEEK_OF_YEAR + | YEAR + | YEARWEEK + ; + +getFormatFunction + : GET_FORMAT LT_PRTHS getFormatType COMMA functionArg RT_PRTHS + ; + +getFormatType + : DATE + | DATETIME + | TIME + | TIMESTAMP + ; + +extractFunction + : EXTRACT LT_PRTHS datetimePart FROM functionArg RT_PRTHS + ; + +simpleDateTimePart + : MICROSECOND + | SECOND + | MINUTE + | HOUR + | DAY + | WEEK + | MONTH + | QUARTER + | YEAR + ; + +complexDateTimePart + : SECOND_MICROSECOND + | MINUTE_MICROSECOND + | MINUTE_SECOND + | HOUR_MICROSECOND + | HOUR_SECOND + | HOUR_MINUTE + | DAY_MICROSECOND + | DAY_SECOND + | DAY_MINUTE + | DAY_HOUR + | YEAR_MONTH + ; + +datetimePart + : simpleDateTimePart + | complexDateTimePart + ; + +timestampFunction + : timestampFunctionName LT_PRTHS simpleDateTimePart COMMA firstArg = functionArg COMMA secondArg = functionArg RT_PRTHS + ; + +timestampFunctionName + : TIMESTAMPADD + | TIMESTAMPDIFF + ; + +// condition function return boolean value +conditionFunctionBase + : LIKE + | IF + | ISNULL + | ISNOTNULL + | IFNULL + | NULLIF + | ISPRESENT + | JSON_VALID + ; + +systemFunctionName + : TYPEOF + ; + +textFunctionName + : SUBSTR + | SUBSTRING + | TRIM + | LTRIM + | RTRIM + | LOWER + | UPPER + | CONCAT + | CONCAT_WS + | LENGTH + | STRCMP + | RIGHT + | LEFT + | ASCII + | LOCATE + | REPLACE + | REVERSE + | ISEMPTY + | ISBLANK + ; + +jsonFunctionName + : JSON + | JSON_OBJECT + | JSON_ARRAY + | JSON_ARRAY_LENGTH + | TO_JSON_STRING + | JSON_EXTRACT + | JSON_KEYS + | JSON_VALID +// | JSON_APPEND +// | JSON_DELETE +// | JSON_EXTEND +// | JSON_SET +// | JSON_ARRAY_ALL_MATCH +// | JSON_ARRAY_ANY_MATCH +// | JSON_ARRAY_FILTER +// | JSON_ARRAY_MAP +// | JSON_ARRAY_REDUCE + ; + +collectionFunctionName + : ARRAY + | ARRAY_LENGTH + ; + +lambdaFunctionName + : FORALL + | EXISTS + | FILTER + | TRANSFORM + | REDUCE + ; + +positionFunctionName + : POSITION + ; + +coalesceFunctionName + : COALESCE + ; + +// operators + comparisonOperator + : EQUAL + | NOT_EQUAL + | LESS + | NOT_LESS + | GREATER + | NOT_GREATER + | REGEXP + ; + +singleFieldRelevanceFunctionName + : MATCH + | MATCH_PHRASE + | MATCH_BOOL_PREFIX + | MATCH_PHRASE_PREFIX + ; + +multiFieldRelevanceFunctionName + : SIMPLE_QUERY_STRING + | MULTI_MATCH + | QUERY_STRING + ; + +// literals and values +literalValue + : stringLiteral + | integerLiteral + | decimalLiteral + | booleanLiteral + | datetimeLiteral //#datetime + | intervalLiteral + ; + +intervalLiteral + : INTERVAL valueExpression intervalUnit + ; + +stringLiteral + : DQUOTA_STRING + | SQUOTA_STRING + ; + +integerLiteral + : (PLUS | MINUS)? INTEGER_LITERAL + ; + +decimalLiteral + : (PLUS | MINUS)? DECIMAL_LITERAL + ; + +booleanLiteral + : TRUE + | FALSE + ; + +// Date and Time Literal, follow ANSI 92 +datetimeLiteral + : dateLiteral + | timeLiteral + | timestampLiteral + ; + +dateLiteral + : DATE date = stringLiteral + ; + +timeLiteral + : TIME time = stringLiteral + ; + +timestampLiteral + : TIMESTAMP timestamp = stringLiteral + ; + +intervalUnit + : MICROSECOND + | SECOND + | MINUTE + | HOUR + | DAY + | WEEK + | MONTH + | QUARTER + | YEAR + | SECOND_MICROSECOND + | MINUTE_MICROSECOND + | MINUTE_SECOND + | HOUR_MICROSECOND + | HOUR_SECOND + | HOUR_MINUTE + | DAY_MICROSECOND + | DAY_SECOND + | DAY_MINUTE + | DAY_HOUR + | YEAR_MONTH + ; + +timespanUnit + : MS + | S + | M + | H + | D + | W + | Q + | Y + | MILLISECOND + | SECOND + | MINUTE + | HOUR + | DAY + | WEEK + | MONTH + | QUARTER + | YEAR + ; + +valueList + : LT_PRTHS literalValue (COMMA literalValue)* RT_PRTHS + ; + +qualifiedName + : ident (DOT ident)* # identsAsQualifiedName + ; + +identifierSeq + : qualifiedName (COMMA qualifiedName)* # identsAsQualifiedNameSeq + | LT_PRTHS qualifiedName (COMMA qualifiedName)* RT_PRTHS # identsAsQualifiedNameSeq + ; + +tableQualifiedName + : tableIdent (DOT ident)* # identsAsTableQualifiedName + ; + +wcQualifiedName + : wildcard (DOT wildcard)* # identsAsWildcardQualifiedName + ; + +ident + : (DOT)? ID + | BACKTICK ident BACKTICK + | BQUOTA_STRING + | keywordsCanBeId + ; + +tableIdent + : (CLUSTER)? ident + ; + +wildcard + : ident (MODULE ident)* (MODULE)? + | SINGLE_QUOTE wildcard SINGLE_QUOTE + | DOUBLE_QUOTE wildcard DOUBLE_QUOTE + | BACKTICK wildcard BACKTICK + ; + +keywordsCanBeId + : D // OD SQL and ODBC special + | timespanUnit + | SPAN + | evalFunctionName + | relevanceArgName + | intervalUnit + | dateTimeFunctionName + | textFunctionName + | jsonFunctionName + | mathematicalFunctionName + | positionFunctionName + | cryptographicFunctionName + | singleFieldRelevanceFunctionName + | multiFieldRelevanceFunctionName + | commandName + | comparisonOperator + | explainMode + | correlationType + // commands assist keywords + | IN + | SOURCE + | INDEX + | DESC + | DATASOURCES + | AUTO + | STR + | IP + | NUM + | FROM + | PATTERN + | NEW_FIELD + | SCOPE + | MAPPING + | WITH + | USING + | CAST + | GET_FORMAT + | EXTRACT + | INTERVAL + | PLUS + | MINUS + | INCLUDEFIELDS + | NULLS + // ARGUMENT KEYWORDS + | KEEPEMPTY + | CONSECUTIVE + | DEDUP_SPLITVALUES + | PARTITIONS + | ALLNUM + | DELIM + | CENTROIDS + | ITERATIONS + | DISTANCE_TYPE + | NUMBER_OF_TREES + | SHINGLE_SIZE + | SAMPLE_SIZE + | OUTPUT_AFTER + | TIME_DECAY + | ANOMALY_RATE + | CATEGORY_FIELD + | TIME_FIELD + | TIME_ZONE + | TRAINING_DATA_SIZE + | ANOMALY_SCORE_THRESHOLD + // AGGREGATIONS + | statsFunctionName + | DISTINCT_COUNT + | PERCENTILE + | PERCENTILE_APPROX + | ESTDC + | ESTDC_ERROR + | MEAN + | MEDIAN + | MODE + | RANGE + | STDEV + | STDEVP + | SUMSQ + | VAR_SAMP + | VAR_POP + | TAKE + | FIRST + | LAST + | LIST + | VALUES + | EARLIEST + | EARLIEST_TIME + | LATEST + | LATEST_TIME + | PER_DAY + | PER_HOUR + | PER_MINUTE + | PER_SECOND + | RATE + | SPARKLINE + | C + | DC + // JOIN TYPE + | OUTER + | INNER + | CROSS + | LEFT + | RIGHT + | FULL + | SEMI + | ANTI + | BETWEEN + | CIDRMATCH + | trendlineType + ; diff --git a/spark/src/main/antlr/SparkSqlBase.g4 b/async-query-core/src/main/antlr/SparkSqlBase.g4 similarity index 99% rename from spark/src/main/antlr/SparkSqlBase.g4 rename to async-query-core/src/main/antlr/SparkSqlBase.g4 index 283981e471..c53c61adfd 100644 --- a/spark/src/main/antlr/SparkSqlBase.g4 +++ b/async-query-core/src/main/antlr/SparkSqlBase.g4 @@ -163,6 +163,7 @@ DESC: 'DESC'; DESCRIBE: 'DESCRIBE'; DROP: 'DROP'; EXISTS: 'EXISTS'; +EXTENDED: 'EXTENDED'; FALSE: 'FALSE'; FLINT: 'FLINT'; IF: 'IF'; diff --git a/spark/src/main/antlr/SqlBaseLexer.g4 b/async-query-core/src/main/antlr/SqlBaseLexer.g4 similarity index 84% rename from spark/src/main/antlr/SqlBaseLexer.g4 rename to async-query-core/src/main/antlr/SqlBaseLexer.g4 index a9705c1733..fb440ef8d3 100644 --- a/spark/src/main/antlr/SqlBaseLexer.g4 +++ b/async-query-core/src/main/antlr/SqlBaseLexer.g4 @@ -69,35 +69,6 @@ lexer grammar SqlBaseLexer; public void markUnclosedComment() { has_unclosed_bracketed_comment = true; } - - /** - * When greater than zero, it's in the middle of parsing ARRAY/MAP/STRUCT type. - */ - public int complex_type_level_counter = 0; - - /** - * Increase the counter by one when hits KEYWORD 'ARRAY', 'MAP', 'STRUCT'. - */ - public void incComplexTypeLevelCounter() { - complex_type_level_counter++; - } - - /** - * Decrease the counter by one when hits close tag '>' && the counter greater than zero - * which means we are in the middle of complex type parsing. Otherwise, it's a dangling - * GT token and we do nothing. - */ - public void decComplexTypeLevelCounter() { - if (complex_type_level_counter > 0) complex_type_level_counter--; - } - - /** - * If the counter is zero, it's a shift right operator. It can be closing tags of an complex - * type definition, such as MAP>. - */ - public boolean isShiftRightOperator() { - return complex_type_level_counter == 0 ? true : false; - } } SEMICOLON: ';'; @@ -108,7 +79,6 @@ COMMA: ','; DOT: '.'; LEFT_BRACKET: '['; RIGHT_BRACKET: ']'; -BANG: '!'; // NOTE: If you add a new token in the list below, you should update the list of keywords // and reserved tag in `docs/sql-ref-ansi-compliance.md#sql-keywords`, and @@ -129,7 +99,7 @@ ANTI: 'ANTI'; ANY: 'ANY'; ANY_VALUE: 'ANY_VALUE'; ARCHIVE: 'ARCHIVE'; -ARRAY: 'ARRAY' {incComplexTypeLevelCounter();}; +ARRAY: 'ARRAY'; AS: 'AS'; ASC: 'ASC'; AT: 'AT'; @@ -137,7 +107,6 @@ AUTHORIZATION: 'AUTHORIZATION'; BETWEEN: 'BETWEEN'; BIGINT: 'BIGINT'; BINARY: 'BINARY'; -BINDING: 'BINDING'; BOOLEAN: 'BOOLEAN'; BOTH: 'BOTH'; BUCKET: 'BUCKET'; @@ -159,7 +128,6 @@ CLUSTER: 'CLUSTER'; CLUSTERED: 'CLUSTERED'; CODEGEN: 'CODEGEN'; COLLATE: 'COLLATE'; -COLLATION: 'COLLATION'; COLLECTION: 'COLLECTION'; COLUMN: 'COLUMN'; COLUMNS: 'COLUMNS'; @@ -167,7 +135,6 @@ COMMENT: 'COMMENT'; COMMIT: 'COMMIT'; COMPACT: 'COMPACT'; COMPACTIONS: 'COMPACTIONS'; -COMPENSATION: 'COMPENSATION'; COMPUTE: 'COMPUTE'; CONCATENATE: 'CONCATENATE'; CONSTRAINT: 'CONSTRAINT'; @@ -194,7 +161,6 @@ DATE_DIFF: 'DATE_DIFF'; DBPROPERTIES: 'DBPROPERTIES'; DEC: 'DEC'; DECIMAL: 'DECIMAL'; -DECLARE: 'DECLARE'; DEFAULT: 'DEFAULT'; DEFINED: 'DEFINED'; DELETE: 'DELETE'; @@ -213,7 +179,6 @@ ELSE: 'ELSE'; END: 'END'; ESCAPE: 'ESCAPE'; ESCAPED: 'ESCAPED'; -EVOLUTION: 'EVOLUTION'; EXCEPT: 'EXCEPT'; EXCHANGE: 'EXCHANGE'; EXCLUDE: 'EXCLUDE'; @@ -251,7 +216,6 @@ HOURS: 'HOURS'; IDENTIFIER_KW: 'IDENTIFIER'; IF: 'IF'; IGNORE: 'IGNORE'; -IMMEDIATE: 'IMMEDIATE'; IMPORT: 'IMPORT'; IN: 'IN'; INCLUDE: 'INCLUDE'; @@ -288,7 +252,7 @@ LOCKS: 'LOCKS'; LOGICAL: 'LOGICAL'; LONG: 'LONG'; MACRO: 'MACRO'; -MAP: 'MAP' {incComplexTypeLevelCounter();}; +MAP: 'MAP'; MATCHED: 'MATCHED'; MERGE: 'MERGE'; MICROSECOND: 'MICROSECOND'; @@ -307,7 +271,7 @@ NANOSECOND: 'NANOSECOND'; NANOSECONDS: 'NANOSECONDS'; NATURAL: 'NATURAL'; NO: 'NO'; -NOT: 'NOT'; +NOT: 'NOT' | '!'; NULL: 'NULL'; NULLS: 'NULLS'; NUMERIC: 'NUMERIC'; @@ -329,6 +293,8 @@ OVERWRITE: 'OVERWRITE'; PARTITION: 'PARTITION'; PARTITIONED: 'PARTITIONED'; PARTITIONS: 'PARTITIONS'; +PERCENTILE_CONT: 'PERCENTILE_CONT'; +PERCENTILE_DISC: 'PERCENTILE_DISC'; PERCENTLIT: 'PERCENT'; PIVOT: 'PIVOT'; PLACING: 'PLACING'; @@ -379,7 +345,6 @@ SETMINUS: 'MINUS'; SETS: 'SETS'; SHORT: 'SHORT'; SHOW: 'SHOW'; -SINGLE: 'SINGLE'; SKEWED: 'SKEWED'; SMALLINT: 'SMALLINT'; SOME: 'SOME'; @@ -391,7 +356,7 @@ STATISTICS: 'STATISTICS'; STORED: 'STORED'; STRATIFY: 'STRATIFY'; STRING: 'STRING'; -STRUCT: 'STRUCT' {incComplexTypeLevelCounter();}; +STRUCT: 'STRUCT'; SUBSTR: 'SUBSTR'; SUBSTRING: 'SUBSTRING'; SYNC: 'SYNC'; @@ -406,7 +371,6 @@ TEMPORARY: 'TEMPORARY' | 'TEMP'; TERMINATED: 'TERMINATED'; THEN: 'THEN'; TIME: 'TIME'; -TIMEDIFF: 'TIMEDIFF'; TIMESTAMP: 'TIMESTAMP'; TIMESTAMP_LTZ: 'TIMESTAMP_LTZ'; TIMESTAMP_NTZ: 'TIMESTAMP_NTZ'; @@ -414,7 +378,6 @@ TIMESTAMPADD: 'TIMESTAMPADD'; TIMESTAMPDIFF: 'TIMESTAMPDIFF'; TINYINT: 'TINYINT'; TO: 'TO'; -EXECUTE: 'EXECUTE'; TOUCH: 'TOUCH'; TRAILING: 'TRAILING'; TRANSACTION: 'TRANSACTION'; @@ -440,9 +403,6 @@ USER: 'USER'; USING: 'USING'; VALUES: 'VALUES'; VARCHAR: 'VARCHAR'; -VAR: 'VAR'; -VARIABLE: 'VARIABLE'; -VARIANT: 'VARIANT'; VERSION: 'VERSION'; VIEW: 'VIEW'; VIEWS: 'VIEWS'; @@ -468,11 +428,8 @@ NEQ : '<>'; NEQJ: '!='; LT : '<'; LTE : '<=' | '!>'; -GT : '>' {decComplexTypeLevelCounter();}; +GT : '>'; GTE : '>=' | '!<'; -SHIFT_LEFT: '<<'; -SHIFT_RIGHT: '>>' {isShiftRightOperator()}?; -SHIFT_RIGHT_UNSIGNED: '>>>' {isShiftRightOperator()}?; PLUS: '+'; MINUS: '-'; @@ -485,7 +442,6 @@ PIPE: '|'; CONCAT_PIPE: '||'; HAT: '^'; COLON: ':'; -DOUBLE_COLON: '::'; ARROW: '->'; FAT_ARROW : '=>'; HENT_START: '/*+'; @@ -545,13 +501,8 @@ BIGDECIMAL_LITERAL | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}? ; -// Generalize the identifier to give a sensible INVALID_IDENTIFIER error message: -// * Unicode letters rather than a-z and A-Z only -// * URI paths for table references using paths -// We then narrow down to ANSI rules in exitUnquotedIdentifier() in the parser. IDENTIFIER - : (UNICODE_LETTER | DIGIT | '_')+ - | UNICODE_LETTER+ '://' (UNICODE_LETTER | DIGIT | '_' | '/' | '-' | '.' | '?' | '=' | '&' | '#' | '%')+ + : (LETTER | DIGIT | '_')+ ; BACKQUOTED_IDENTIFIER @@ -575,10 +526,6 @@ fragment LETTER : [A-Z] ; -fragment UNICODE_LETTER - : [\p{L}] - ; - SIMPLE_COMMENT : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN) ; @@ -588,7 +535,7 @@ BRACKETED_COMMENT ; WS - : [ \t\n\f\r\u000B\u00A0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u202F\u205F\u3000]+ -> channel(HIDDEN) + : [ \r\n\t]+ -> channel(HIDDEN) ; // Catch-all for anything we can't recognize. diff --git a/spark/src/main/antlr/SqlBaseParser.g4 b/async-query-core/src/main/antlr/SqlBaseParser.g4 similarity index 88% rename from spark/src/main/antlr/SqlBaseParser.g4 rename to async-query-core/src/main/antlr/SqlBaseParser.g4 index 4552c17e0c..04128216be 100644 --- a/spark/src/main/antlr/SqlBaseParser.g4 +++ b/async-query-core/src/main/antlr/SqlBaseParser.g4 @@ -72,12 +72,11 @@ singleTableSchema statement : query #statementDefault - | executeImmediate #visitExecuteImmediate | ctes? dmlStatementNoWith #dmlStatement | USE identifierReference #use | USE namespace identifierReference #useNamespace - | SET CATALOG (errorCapturingIdentifier | stringLit) #setCatalog - | CREATE namespace (IF errorCapturingNot EXISTS)? identifierReference + | SET CATALOG (identifier | stringLit) #setCatalog + | CREATE namespace (IF NOT EXISTS)? identifierReference (commentSpec | locationSpec | (WITH (DBPROPERTIES | PROPERTIES) propertyList))* #createNamespace @@ -92,7 +91,7 @@ statement | createTableHeader (LEFT_PAREN createOrReplaceTableColTypeList RIGHT_PAREN)? tableProvider? createTableClauses (AS? query)? #createTable - | CREATE TABLE (IF errorCapturingNot EXISTS)? target=tableIdentifier + | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier LIKE source=tableIdentifier (tableProvider | rowFormat | @@ -141,7 +140,7 @@ statement SET SERDE stringLit (WITH SERDEPROPERTIES propertyList)? #setTableSerDe | ALTER TABLE identifierReference (partitionSpec)? SET SERDEPROPERTIES propertyList #setTableSerDe - | ALTER (TABLE | VIEW) identifierReference ADD (IF errorCapturingNot EXISTS)? + | ALTER (TABLE | VIEW) identifierReference ADD (IF NOT EXISTS)? partitionSpecLocation+ #addTablePartition | ALTER TABLE identifierReference from=partitionSpec RENAME TO to=partitionSpec #renameTablePartition @@ -153,10 +152,9 @@ statement | DROP TABLE (IF EXISTS)? identifierReference PURGE? #dropTable | DROP VIEW (IF EXISTS)? identifierReference #dropView | CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)? - VIEW (IF errorCapturingNot EXISTS)? identifierReference + VIEW (IF NOT EXISTS)? identifierReference identifierCommentList? (commentSpec | - schemaBinding | (PARTITIONED ON identifierList) | (TBLPROPERTIES propertyList))* AS query #createView @@ -164,14 +162,10 @@ statement tableIdentifier (LEFT_PAREN colTypeList RIGHT_PAREN)? tableProvider (OPTIONS propertyList)? #createTempViewUsing | ALTER VIEW identifierReference AS? query #alterViewQuery - | ALTER VIEW identifierReference schemaBinding #alterViewSchemaBinding - | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF errorCapturingNot EXISTS)? + | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF NOT EXISTS)? identifierReference AS className=stringLit (USING resource (COMMA resource)*)? #createFunction - | DROP TEMPORARY? FUNCTION (IF EXISTS)? identifierReference #dropFunction - | DECLARE (OR REPLACE)? VARIABLE? - identifierReference dataType? variableDefaultExpression? #createVariable - | DROP TEMPORARY VARIABLE (IF EXISTS)? identifierReference #dropVariable + | DROP TEMPORARY? FUNCTION (IF EXISTS)? identifierReference #dropFunction | EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)? statement #explain | SHOW TABLES ((FROM | IN) identifierReference)? @@ -212,21 +206,17 @@ statement | (MSCK)? REPAIR TABLE identifierReference (option=(ADD|DROP|SYNC) PARTITIONS)? #repairTable | op=(ADD | LIST) identifier .*? #manageResource - | SET COLLATION collationName=identifier #setCollation | SET ROLE .*? #failNativeCommand | SET TIME ZONE interval #setTimeZone | SET TIME ZONE timezone #setTimeZone | SET TIME ZONE .*? #setTimeZone - | SET (VARIABLE | VAR) assignmentList #setVariable - | SET (VARIABLE | VAR) LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ - LEFT_PAREN query RIGHT_PAREN #setVariable | SET configKey EQ configValue #setQuotedConfiguration | SET configKey (EQ .*?)? #setConfiguration | SET .*? EQ configValue #setQuotedConfiguration | SET .*? #setConfiguration | RESET configKey #resetQuotedConfiguration | RESET .*? #resetConfiguration - | CREATE INDEX (IF errorCapturingNot EXISTS)? identifier ON TABLE? + | CREATE INDEX (IF NOT EXISTS)? identifier ON TABLE? identifierReference (USING indexType=identifier)? LEFT_PAREN columns=multipartIdentifierPropertyList RIGHT_PAREN (OPTIONS options=propertyList)? #createIndex @@ -234,28 +224,6 @@ statement | unsupportedHiveNativeCommands .*? #failNativeCommand ; -executeImmediate - : EXECUTE IMMEDIATE queryParam=executeImmediateQueryParam (INTO targetVariable=multipartIdentifierList)? executeImmediateUsing? - ; - -executeImmediateUsing - : USING LEFT_PAREN params=namedExpressionSeq RIGHT_PAREN - | USING params=namedExpressionSeq - ; - -executeImmediateQueryParam - : stringLit - | multipartIdentifier - ; - -executeImmediateArgument - : (constant|multipartIdentifier) (AS name=errorCapturingIdentifier)? - ; - -executeImmediateArgumentSeq - : executeImmediateArgument (COMMA executeImmediateArgument)* - ; - timezone : stringLit | LOCAL @@ -317,17 +285,13 @@ unsupportedHiveNativeCommands ; createTableHeader - : CREATE TEMPORARY? EXTERNAL? TABLE (IF errorCapturingNot EXISTS)? identifierReference + : CREATE TEMPORARY? EXTERNAL? TABLE (IF NOT EXISTS)? identifierReference ; replaceTableHeader : (CREATE OR)? REPLACE TABLE identifierReference ; -clusterBySpec - : CLUSTER BY LEFT_PAREN multipartIdentifierList RIGHT_PAREN - ; - bucketSpec : CLUSTERED BY identifierList (SORTED BY orderedIdentifierList)? @@ -344,10 +308,6 @@ locationSpec : LOCATION stringLit ; -schemaBinding - : WITH SCHEMA (BINDING | COMPENSATION | EVOLUTION | TYPE EVOLUTION) - ; - commentSpec : COMMENT stringLit ; @@ -357,8 +317,8 @@ query ; insertInto - : INSERT OVERWRITE TABLE? identifierReference (partitionSpec (IF errorCapturingNot EXISTS)?)? ((BY NAME) | identifierList)? #insertOverwriteTable - | INSERT INTO TABLE? identifierReference partitionSpec? (IF errorCapturingNot EXISTS)? ((BY NAME) | identifierList)? #insertIntoTable + : INSERT OVERWRITE TABLE? identifierReference (partitionSpec (IF NOT EXISTS)?)? ((BY NAME) | identifierList)? #insertOverwriteTable + | INSERT INTO TABLE? identifierReference partitionSpec? (IF NOT EXISTS)? ((BY NAME) | identifierList)? #insertIntoTable | INSERT INTO TABLE? identifierReference REPLACE whereClause #insertIntoReplaceWhere | INSERT OVERWRITE LOCAL? DIRECTORY path=stringLit rowFormat? createFileFormat? #insertOverwriteHiveDir | INSERT OVERWRITE LOCAL? DIRECTORY (path=stringLit)? tableProvider (OPTIONS options=propertyList)? #insertOverwriteDir @@ -395,12 +355,10 @@ describeFuncName | comparisonOperator | arithmeticOperator | predicateOperator - | shiftOperator - | BANG ; describeColName - : nameParts+=errorCapturingIdentifier (DOT nameParts+=errorCapturingIdentifier)* + : nameParts+=identifier (DOT nameParts+=identifier)* ; ctes @@ -419,7 +377,6 @@ createTableClauses :((OPTIONS options=expressionPropertyList) | (PARTITIONED BY partitioning=partitionFieldList) | skewSpec | - clusterBySpec | bucketSpec | rowFormat | createFileFormat | @@ -437,7 +394,7 @@ property ; propertyKey - : errorCapturingIdentifier (DOT errorCapturingIdentifier)* + : identifier (DOT identifier)* | stringLit ; @@ -487,7 +444,7 @@ dmlStatementNoWith | fromClause multiInsertQueryBody+ #multiInsertQuery | DELETE FROM identifierReference tableAlias whereClause? #deleteFromTable | UPDATE identifierReference tableAlias setClause whereClause? #updateTable - | MERGE (WITH SCHEMA EVOLUTION)? INTO target=identifierReference targetAlias=tableAlias + | MERGE INTO target=identifierReference targetAlias=tableAlias USING (source=identifierReference | LEFT_PAREN sourceQuery=query RIGHT_PAREN) sourceAlias=tableAlias ON mergeCondition=booleanExpression @@ -595,11 +552,11 @@ matchedClause : WHEN MATCHED (AND matchedCond=booleanExpression)? THEN matchedAction ; notMatchedClause - : WHEN errorCapturingNot MATCHED (BY TARGET)? (AND notMatchedCond=booleanExpression)? THEN notMatchedAction + : WHEN NOT MATCHED (BY TARGET)? (AND notMatchedCond=booleanExpression)? THEN notMatchedAction ; notMatchedBySourceClause - : WHEN errorCapturingNot MATCHED BY SOURCE (AND notMatchedBySourceCond=booleanExpression)? THEN notMatchedBySourceAction + : WHEN NOT MATCHED BY SOURCE (AND notMatchedBySourceCond=booleanExpression)? THEN notMatchedBySourceAction ; matchedAction @@ -619,10 +576,6 @@ notMatchedBySourceAction | UPDATE SET assignmentList ; -exceptClause - : EXCEPT LEFT_PAREN exceptCols=multipartIdentifierList RIGHT_PAREN - ; - assignmentList : assignment (COMMA assignment)* ; @@ -691,18 +644,18 @@ pivotClause ; pivotColumn - : identifiers+=errorCapturingIdentifier - | LEFT_PAREN identifiers+=errorCapturingIdentifier (COMMA identifiers+=errorCapturingIdentifier)* RIGHT_PAREN + : identifiers+=identifier + | LEFT_PAREN identifiers+=identifier (COMMA identifiers+=identifier)* RIGHT_PAREN ; pivotValue - : expression (AS? errorCapturingIdentifier)? + : expression (AS? identifier)? ; unpivotClause : UNPIVOT nullOperator=unpivotNullClause? LEFT_PAREN operator=unpivotOperator - RIGHT_PAREN (AS? errorCapturingIdentifier)? + RIGHT_PAREN (AS? identifier)? ; unpivotNullClause @@ -744,7 +697,7 @@ unpivotColumn ; unpivotAlias - : AS? errorCapturingIdentifier + : AS? identifier ; lateralView @@ -836,21 +789,9 @@ inlineTable ; functionTableSubqueryArgument - : TABLE identifierReference tableArgumentPartitioning? - | TABLE LEFT_PAREN identifierReference RIGHT_PAREN tableArgumentPartitioning? - | TABLE LEFT_PAREN query RIGHT_PAREN tableArgumentPartitioning? - ; - -tableArgumentPartitioning - : ((WITH SINGLE PARTITION) - | ((PARTITION | DISTRIBUTE) BY - (((LEFT_PAREN partition+=expression (COMMA partition+=expression)* RIGHT_PAREN)) - | (expression (COMMA invalidMultiPartitionExpression=expression)+) - | partition+=expression))) - ((ORDER | SORT) BY - (((LEFT_PAREN sortItem (COMMA sortItem)* RIGHT_PAREN) - | (sortItem (COMMA invalidMultiSortItem=sortItem)+) - | sortItem)))? + : TABLE identifierReference + | TABLE LEFT_PAREN identifierReference RIGHT_PAREN + | TABLE LEFT_PAREN query RIGHT_PAREN ; functionTableNamedArgumentExpression @@ -957,7 +898,7 @@ expressionSeq ; booleanExpression - : (NOT | BANG) booleanExpression #logicalNot + : NOT booleanExpression #logicalNot | EXISTS LEFT_PAREN query RIGHT_PAREN #exists | valueExpression predicate? #predicated | left=booleanExpression operator=AND right=booleanExpression #logicalBinary @@ -965,20 +906,15 @@ booleanExpression ; predicate - : errorCapturingNot? kind=BETWEEN lower=valueExpression AND upper=valueExpression - | errorCapturingNot? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN - | errorCapturingNot? kind=IN LEFT_PAREN query RIGHT_PAREN - | errorCapturingNot? kind=RLIKE pattern=valueExpression - | errorCapturingNot? kind=(LIKE | ILIKE) quantifier=(ANY | SOME | ALL) (LEFT_PAREN RIGHT_PAREN | LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN) - | errorCapturingNot? kind=(LIKE | ILIKE) pattern=valueExpression (ESCAPE escapeChar=stringLit)? - | IS errorCapturingNot? kind=NULL - | IS errorCapturingNot? kind=(TRUE | FALSE | UNKNOWN) - | IS errorCapturingNot? kind=DISTINCT FROM right=valueExpression - ; - -errorCapturingNot - : NOT - | BANG + : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression + | NOT? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN + | NOT? kind=IN LEFT_PAREN query RIGHT_PAREN + | NOT? kind=RLIKE pattern=valueExpression + | NOT? kind=(LIKE | ILIKE) quantifier=(ANY | SOME | ALL) (LEFT_PAREN RIGHT_PAREN | LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN) + | NOT? kind=(LIKE | ILIKE) pattern=valueExpression (ESCAPE escapeChar=stringLit)? + | IS NOT? kind=NULL + | IS NOT? kind=(TRUE | FALSE | UNKNOWN) + | IS NOT? kind=DISTINCT FROM right=valueExpression ; valueExpression @@ -986,19 +922,12 @@ valueExpression | operator=(MINUS | PLUS | TILDE) valueExpression #arithmeticUnary | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary | left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression #arithmeticBinary - | left=valueExpression shiftOperator right=valueExpression #shiftExpression | left=valueExpression operator=AMPERSAND right=valueExpression #arithmeticBinary | left=valueExpression operator=HAT right=valueExpression #arithmeticBinary | left=valueExpression operator=PIPE right=valueExpression #arithmeticBinary | left=valueExpression comparisonOperator right=valueExpression #comparison ; -shiftOperator - : SHIFT_LEFT - | SHIFT_RIGHT - | SHIFT_RIGHT_UNSIGNED - ; - datetimeUnit : YEAR | QUARTER | MONTH | WEEK | DAY | DAYOFYEAR @@ -1006,27 +935,24 @@ datetimeUnit ; primaryExpression - : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER | USER | SESSION_USER) #currentLike + : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER | USER) #currentLike | name=(TIMESTAMPADD | DATEADD | DATE_ADD) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA unitsAmount=valueExpression COMMA timestamp=valueExpression RIGHT_PAREN #timestampadd - | name=(TIMESTAMPDIFF | DATEDIFF | DATE_DIFF | TIMEDIFF) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN #timestampdiff + | name=(TIMESTAMPDIFF | DATEDIFF | DATE_DIFF) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN #timestampdiff | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase | name=(CAST | TRY_CAST) LEFT_PAREN expression AS dataType RIGHT_PAREN #cast - | primaryExpression collateClause #collate - | primaryExpression DOUBLE_COLON dataType #castByColon | STRUCT LEFT_PAREN (argument+=namedExpression (COMMA argument+=namedExpression)*)? RIGHT_PAREN #struct | FIRST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #first | ANY_VALUE LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #any_value | LAST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #last | POSITION LEFT_PAREN substr=valueExpression IN str=valueExpression RIGHT_PAREN #position | constant #constantDefault - | ASTERISK exceptClause? #star - | qualifiedName DOT ASTERISK exceptClause? #star + | ASTERISK #star + | qualifiedName DOT ASTERISK #star | LEFT_PAREN namedExpression (COMMA namedExpression)+ RIGHT_PAREN #rowConstructor | LEFT_PAREN query RIGHT_PAREN #subqueryExpression | functionName LEFT_PAREN (setQuantifier? argument+=functionArgument (COMMA argument+=functionArgument)*)? RIGHT_PAREN - (WITHIN GROUP LEFT_PAREN ORDER BY sortItem (COMMA sortItem)* RIGHT_PAREN)? (FILTER LEFT_PAREN WHERE where=booleanExpression RIGHT_PAREN)? (nullsOption=(IGNORE | RESPECT) NULLS)? ( OVER windowSpec)? #functionCall | identifier ARROW expression #lambda @@ -1042,6 +968,9 @@ primaryExpression FROM srcStr=valueExpression RIGHT_PAREN #trim | OVERLAY LEFT_PAREN input=valueExpression PLACING replace=valueExpression FROM position=valueExpression (FOR length=valueExpression)? RIGHT_PAREN #overlay + | name=(PERCENTILE_CONT | PERCENTILE_DISC) LEFT_PAREN percentage=valueExpression RIGHT_PAREN + WITHIN GROUP LEFT_PAREN ORDER BY sortItem RIGHT_PAREN + (FILTER LEFT_PAREN WHERE where=booleanExpression RIGHT_PAREN)? ( OVER windowSpec)? #percentile ; literalType @@ -1118,10 +1047,6 @@ colPosition : position=FIRST | position=AFTER afterCol=errorCapturingIdentifier ; -collateClause - : COLLATE collationName=identifier - ; - type : BOOLEAN | TINYINT | BYTE @@ -1132,14 +1057,13 @@ type | DOUBLE | DATE | TIMESTAMP | TIMESTAMP_NTZ | TIMESTAMP_LTZ - | STRING collateClause? + | STRING | CHARACTER | CHAR | VARCHAR | BINARY | DECIMAL | DEC | NUMERIC | VOID | INTERVAL - | VARIANT | ARRAY | STRUCT | MAP | unsupportedType=identifier ; @@ -1164,7 +1088,7 @@ qualifiedColTypeWithPosition ; colDefinitionDescriptorWithPosition - : errorCapturingNot NULL + : NOT NULL | defaultExpression | commentSpec | colPosition @@ -1174,16 +1098,12 @@ defaultExpression : DEFAULT expression ; -variableDefaultExpression - : (DEFAULT | EQ) expression - ; - colTypeList : colType (COMMA colType)* ; colType - : colName=errorCapturingIdentifier dataType (errorCapturingNot NULL)? commentSpec? + : colName=errorCapturingIdentifier dataType (NOT NULL)? commentSpec? ; createOrReplaceTableColTypeList @@ -1195,7 +1115,7 @@ createOrReplaceTableColType ; colDefinitionOption - : errorCapturingNot NULL + : NOT NULL | defaultExpression | generationExpression | commentSpec @@ -1210,7 +1130,7 @@ complexColTypeList ; complexColType - : errorCapturingIdentifier COLON? dataType (errorCapturingNot NULL)? commentSpec? + : identifier COLON? dataType (NOT NULL)? commentSpec? ; whenClause @@ -1317,7 +1237,7 @@ alterColumnAction : TYPE dataType | commentSpec | colPosition - | setOrDrop=(SET | DROP) errorCapturingNot NULL + | setOrDrop=(SET | DROP) NOT NULL | SET defaultExpression | dropDefault=DROP DEFAULT ; @@ -1364,7 +1284,6 @@ ansiNonReserved | BIGINT | BINARY | BINARY_HEX - | BINDING | BOOLEAN | BUCKET | BUCKETS @@ -1387,7 +1306,6 @@ ansiNonReserved | COMMIT | COMPACT | COMPACTIONS - | COMPENSATION | COMPUTE | CONCATENATE | COST @@ -1407,7 +1325,6 @@ ansiNonReserved | DBPROPERTIES | DEC | DECIMAL - | DECLARE | DEFAULT | DEFINED | DELETE @@ -1422,7 +1339,6 @@ ansiNonReserved | DOUBLE | DROP | ESCAPED - | EVOLUTION | EXCHANGE | EXCLUDE | EXISTS @@ -1448,7 +1364,6 @@ ansiNonReserved | IDENTIFIER_KW | IF | IGNORE - | IMMEDIATE | IMPORT | INCLUDE | INDEX @@ -1552,7 +1467,6 @@ ansiNonReserved | SETS | SHORT | SHOW - | SINGLE | SKEWED | SMALLINT | SORT @@ -1575,7 +1489,6 @@ ansiNonReserved | TBLPROPERTIES | TEMPORARY | TERMINATED - | TIMEDIFF | TIMESTAMP | TIMESTAMP_LTZ | TIMESTAMP_NTZ @@ -1601,9 +1514,6 @@ ansiNonReserved | USE | VALUES | VARCHAR - | VAR - | VARIABLE - | VARIANT | VERSION | VIEW | VIEWS @@ -1666,7 +1576,6 @@ nonReserved | BIGINT | BINARY | BINARY_HEX - | BINDING | BOOLEAN | BOTH | BUCKET @@ -1688,7 +1597,6 @@ nonReserved | CLUSTERED | CODEGEN | COLLATE - | COLLATION | COLLECTION | COLUMN | COLUMNS @@ -1696,7 +1604,6 @@ nonReserved | COMMIT | COMPACT | COMPACTIONS - | COMPENSATION | COMPUTE | CONCATENATE | CONSTRAINT @@ -1722,7 +1629,6 @@ nonReserved | DBPROPERTIES | DEC | DECIMAL - | DECLARE | DEFAULT | DEFINED | DELETE @@ -1741,10 +1647,8 @@ nonReserved | END | ESCAPE | ESCAPED - | EVOLUTION | EXCHANGE | EXCLUDE - | EXECUTE | EXISTS | EXPLAIN | EXPORT @@ -1777,7 +1681,6 @@ nonReserved | IDENTIFIER_KW | IF | IGNORE - | IMMEDIATE | IMPORT | IN | INCLUDE @@ -1849,6 +1752,8 @@ nonReserved | PARTITION | PARTITIONED | PARTITIONS + | PERCENTILE_CONT + | PERCENTILE_DISC | PERCENTLIT | PIVOT | PLACING @@ -1896,7 +1801,6 @@ nonReserved | SETS | SHORT | SHOW - | SINGLE | SKEWED | SMALLINT | SOME @@ -1923,7 +1827,6 @@ nonReserved | TERMINATED | THEN | TIME - | TIMEDIFF | TIMESTAMP | TIMESTAMP_LTZ | TIMESTAMP_NTZ @@ -1954,9 +1857,6 @@ nonReserved | USER | VALUES | VARCHAR - | VAR - | VARIABLE - | VARIANT | VERSION | VIEW | VIEWS diff --git a/async-query-core/src/main/java/org/opensearch/sql/asyncquery/Dummy.java b/async-query-core/src/main/java/org/opensearch/sql/asyncquery/Dummy.java deleted file mode 100644 index b7ab572f2a..0000000000 --- a/async-query-core/src/main/java/org/opensearch/sql/asyncquery/Dummy.java +++ /dev/null @@ -1,13 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.asyncquery; - -// This is a dummy class for scaffolding and should be deleted later -public class Dummy { - public String hello() { - return "Hello!"; - } -} diff --git a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorService.java b/async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorService.java similarity index 85% rename from spark/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorService.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorService.java index d38c8554ae..1240545acd 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorService.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorService.java @@ -31,7 +31,8 @@ CreateAsyncQueryResponse createAsyncQuery( * @param queryId queryId. * @return {@link AsyncQueryExecutionResponse} */ - AsyncQueryExecutionResponse getAsyncQueryResults(String queryId); + AsyncQueryExecutionResponse getAsyncQueryResults( + String queryId, AsyncQueryRequestContext asyncQueryRequestContext); /** * Cancels running async query and returns the cancelled queryId. @@ -39,5 +40,5 @@ CreateAsyncQueryResponse createAsyncQuery( * @param queryId queryId. * @return {@link String} cancelledQueryId. */ - String cancelQuery(String queryId); + String cancelQuery(String queryId, AsyncQueryRequestContext asyncQueryRequestContext); } diff --git a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImpl.java b/async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImpl.java similarity index 85% rename from spark/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImpl.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImpl.java index 6d3d5b6765..0639768354 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImpl.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImpl.java @@ -19,6 +19,7 @@ import org.opensearch.sql.spark.asyncquery.model.AsyncQueryExecutionResponse; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.asyncquery.model.QueryState; import org.opensearch.sql.spark.config.SparkExecutionEngineConfig; import org.opensearch.sql.spark.config.SparkExecutionEngineConfigSupplier; import org.opensearch.sql.spark.dispatcher.SparkQueryDispatcher; @@ -67,6 +68,10 @@ public CreateAsyncQueryResponse createAsyncQuery( .datasourceName(dispatchQueryResponse.getDatasourceName()) .jobType(dispatchQueryResponse.getJobType()) .indexName(dispatchQueryResponse.getIndexName()) + .query(createAsyncQueryRequest.getQuery()) + .langType(createAsyncQueryRequest.getLang()) + .state(dispatchQueryResponse.getStatus()) + .error(dispatchQueryResponse.getError()) .build(), asyncQueryRequestContext); return new CreateAsyncQueryResponse( @@ -74,12 +79,14 @@ public CreateAsyncQueryResponse createAsyncQuery( } @Override - public AsyncQueryExecutionResponse getAsyncQueryResults(String queryId) { + public AsyncQueryExecutionResponse getAsyncQueryResults( + String queryId, AsyncQueryRequestContext asyncQueryRequestContext) { Optional jobMetadata = asyncQueryJobMetadataStorageService.getJobMetadata(queryId); if (jobMetadata.isPresent()) { String sessionId = jobMetadata.get().getSessionId(); - JSONObject jsonObject = sparkQueryDispatcher.getQueryResponse(jobMetadata.get()); + JSONObject jsonObject = + sparkQueryDispatcher.getQueryResponse(jobMetadata.get(), asyncQueryRequestContext); if (JobRunState.SUCCESS.toString().equals(jsonObject.getString(STATUS_FIELD))) { DefaultSparkSqlFunctionResponseHandle sparkSqlFunctionResponseHandle = new DefaultSparkSqlFunctionResponseHandle(jsonObject); @@ -106,11 +113,15 @@ public AsyncQueryExecutionResponse getAsyncQueryResults(String queryId) { } @Override - public String cancelQuery(String queryId) { + public String cancelQuery(String queryId, AsyncQueryRequestContext asyncQueryRequestContext) { Optional asyncQueryJobMetadata = asyncQueryJobMetadataStorageService.getJobMetadata(queryId); if (asyncQueryJobMetadata.isPresent()) { - return sparkQueryDispatcher.cancelJob(asyncQueryJobMetadata.get()); + String result = + sparkQueryDispatcher.cancelJob(asyncQueryJobMetadata.get(), asyncQueryRequestContext); + asyncQueryJobMetadataStorageService.updateState( + asyncQueryJobMetadata.get(), QueryState.CANCELLED, asyncQueryRequestContext); + return result; } throw new AsyncQueryNotFoundException(String.format("QueryId: %s not found", queryId)); } diff --git a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryJobMetadataStorageService.java b/async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryJobMetadataStorageService.java similarity index 72% rename from spark/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryJobMetadataStorageService.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryJobMetadataStorageService.java index b4e94c984d..86e925f58f 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryJobMetadataStorageService.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryJobMetadataStorageService.java @@ -10,6 +10,7 @@ import java.util.Optional; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.asyncquery.model.QueryState; public interface AsyncQueryJobMetadataStorageService { @@ -17,5 +18,10 @@ void storeJobMetadata( AsyncQueryJobMetadata asyncQueryJobMetadata, AsyncQueryRequestContext asyncQueryRequestContext); + void updateState( + AsyncQueryJobMetadata asyncQueryJobMetadata, + QueryState newState, + AsyncQueryRequestContext asyncQueryRequestContext); + Optional getJobMetadata(String jobId); } diff --git a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/exceptions/AsyncQueryNotFoundException.java b/async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/exceptions/AsyncQueryNotFoundException.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/asyncquery/exceptions/AsyncQueryNotFoundException.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/exceptions/AsyncQueryNotFoundException.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryExecutionResponse.java b/async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryExecutionResponse.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryExecutionResponse.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryExecutionResponse.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryJobMetadata.java b/async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryJobMetadata.java similarity index 87% rename from spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryJobMetadata.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryJobMetadata.java index 1cfab4832d..46aa8ac898 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryJobMetadata.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryJobMetadata.java @@ -12,6 +12,7 @@ import lombok.experimental.SuperBuilder; import org.opensearch.sql.spark.dispatcher.model.JobType; import org.opensearch.sql.spark.execution.statestore.StateModel; +import org.opensearch.sql.spark.rest.model.LangType; import org.opensearch.sql.utils.SerializeUtils; /** This class models all the metadata required for a job. */ @@ -35,6 +36,10 @@ public class AsyncQueryJobMetadata extends StateModel { private final String datasourceName; // null if JobType is INTERACTIVE or null private final String indexName; + private final String query; + private final LangType langType; + private final QueryState state; + private final String error; @Override public String toString() { @@ -54,6 +59,10 @@ public static AsyncQueryJobMetadata copy( .datasourceName(copy.datasourceName) .jobType(copy.jobType) .indexName(copy.indexName) + .query(copy.query) + .langType(copy.langType) + .state(copy.state) + .error(copy.error) .metadata(metadata) .build(); } diff --git a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryRequestContext.java b/async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryRequestContext.java similarity index 63% rename from spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryRequestContext.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryRequestContext.java index 56176faefb..d5a478d592 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryRequestContext.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryRequestContext.java @@ -5,7 +5,7 @@ package org.opensearch.sql.spark.asyncquery.model; +import org.opensearch.sql.datasource.RequestContext; + /** Context interface to provide additional request related information */ -public interface AsyncQueryRequestContext { - Object getAttribute(String name); -} +public interface AsyncQueryRequestContext extends RequestContext {} diff --git a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/NullAsyncQueryRequestContext.java b/async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/model/NullAsyncQueryRequestContext.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/NullAsyncQueryRequestContext.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/model/NullAsyncQueryRequestContext.java diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/model/QueryState.java b/async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/model/QueryState.java new file mode 100644 index 0000000000..62bceb6637 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/asyncquery/model/QueryState.java @@ -0,0 +1,41 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.asyncquery.model; + +import java.util.Arrays; +import java.util.Locale; +import java.util.Map; +import java.util.stream.Collectors; +import lombok.Getter; + +@Getter +public enum QueryState { + WAITING("waiting"), + RUNNING("running"), + SUCCESS("success"), + FAILED("failed"), + TIMEOUT("timeout"), + CANCELLED("cancelled"); + + private final String state; + + QueryState(String state) { + this.state = state; + } + + private static final Map STATES = + Arrays.stream(QueryState.values()) + .collect(Collectors.toMap(t -> t.name().toLowerCase(), t -> t)); + + public static QueryState fromString(String key) { + for (QueryState ss : QueryState.values()) { + if (ss.getState().toLowerCase(Locale.ROOT).equals(key)) { + return ss; + } + } + throw new IllegalArgumentException("Invalid query state: " + key); + } +} diff --git a/spark/src/main/java/org/opensearch/sql/spark/client/EMRServerlessClient.java b/async-query-core/src/main/java/org/opensearch/sql/spark/client/EMRServerlessClient.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/client/EMRServerlessClient.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/client/EMRServerlessClient.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/client/EMRServerlessClientFactory.java b/async-query-core/src/main/java/org/opensearch/sql/spark/client/EMRServerlessClientFactory.java similarity index 72% rename from spark/src/main/java/org/opensearch/sql/spark/client/EMRServerlessClientFactory.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/client/EMRServerlessClientFactory.java index 2c05dc865d..c5305ba445 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/client/EMRServerlessClientFactory.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/client/EMRServerlessClientFactory.java @@ -11,7 +11,8 @@ public interface EMRServerlessClientFactory { /** * Gets an instance of {@link EMRServerlessClient}. * + * @param accountId Account ID of the requester. It will be used to decide the cluster. * @return An {@link EMRServerlessClient} instance. */ - EMRServerlessClient getClient(); + EMRServerlessClient getClient(String accountId); } diff --git a/spark/src/main/java/org/opensearch/sql/spark/client/EMRServerlessClientFactoryImpl.java b/async-query-core/src/main/java/org/opensearch/sql/spark/client/EMRServerlessClientFactoryImpl.java similarity index 92% rename from spark/src/main/java/org/opensearch/sql/spark/client/EMRServerlessClientFactoryImpl.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/client/EMRServerlessClientFactoryImpl.java index 9af9878577..72973b3bbb 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/client/EMRServerlessClientFactoryImpl.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/client/EMRServerlessClientFactoryImpl.java @@ -16,22 +16,18 @@ import org.opensearch.sql.spark.asyncquery.model.NullAsyncQueryRequestContext; import org.opensearch.sql.spark.config.SparkExecutionEngineConfig; import org.opensearch.sql.spark.config.SparkExecutionEngineConfigSupplier; +import org.opensearch.sql.spark.metrics.MetricsService; -/** Implementation of {@link EMRServerlessClientFactory}. */ @RequiredArgsConstructor public class EMRServerlessClientFactoryImpl implements EMRServerlessClientFactory { private final SparkExecutionEngineConfigSupplier sparkExecutionEngineConfigSupplier; + private final MetricsService metricsService; private EMRServerlessClient emrServerlessClient; private String region; - /** - * Gets an instance of {@link EMRServerlessClient}. - * - * @return An {@link EMRServerlessClient} instance. - */ @Override - public EMRServerlessClient getClient() { + public EMRServerlessClient getClient(String accountId) { SparkExecutionEngineConfig sparkExecutionEngineConfig = this.sparkExecutionEngineConfigSupplier.getSparkExecutionEngineConfig( new NullAsyncQueryRequestContext()); @@ -68,7 +64,7 @@ private EMRServerlessClient createEMRServerlessClient(String awsRegion) { .withRegion(awsRegion) .withCredentials(new DefaultAWSCredentialsProviderChain()) .build(); - return new EmrServerlessClientImpl(awsemrServerless); + return new EmrServerlessClientImpl(awsemrServerless, metricsService); }); } } diff --git a/spark/src/main/java/org/opensearch/sql/spark/client/EmrServerlessClientImpl.java b/async-query-core/src/main/java/org/opensearch/sql/spark/client/EmrServerlessClientImpl.java similarity index 80% rename from spark/src/main/java/org/opensearch/sql/spark/client/EmrServerlessClientImpl.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/client/EmrServerlessClientImpl.java index 0ceb269d1d..f6f3633bc7 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/client/EmrServerlessClientImpl.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/client/EmrServerlessClientImpl.java @@ -7,6 +7,9 @@ import static org.opensearch.sql.datasource.model.DataSourceMetadata.DEFAULT_RESULT_INDEX; import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_SQL_APPLICATION_JAR; +import static org.opensearch.sql.spark.metrics.EmrMetrics.EMR_CANCEL_JOB_REQUEST_FAILURE_COUNT; +import static org.opensearch.sql.spark.metrics.EmrMetrics.EMR_GET_JOB_RESULT_FAILURE_COUNT; +import static org.opensearch.sql.spark.metrics.EmrMetrics.EMR_START_JOB_REQUEST_FAILURE_COUNT; import com.amazonaws.services.emrserverless.AWSEMRServerless; import com.amazonaws.services.emrserverless.model.CancelJobRunRequest; @@ -20,25 +23,23 @@ import com.amazonaws.services.emrserverless.model.ValidationException; import java.security.AccessController; import java.security.PrivilegedAction; +import lombok.RequiredArgsConstructor; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.sql.legacy.metrics.MetricName; -import org.opensearch.sql.legacy.utils.MetricUtils; +import org.opensearch.sql.spark.metrics.MetricsService; +@RequiredArgsConstructor public class EmrServerlessClientImpl implements EMRServerlessClient { private final AWSEMRServerless emrServerless; + private final MetricsService metricsService; private static final Logger logger = LogManager.getLogger(EmrServerlessClientImpl.class); private static final int MAX_JOB_NAME_LENGTH = 255; public static final String GENERIC_INTERNAL_SERVER_ERROR_MESSAGE = "Internal Server Error."; - public EmrServerlessClientImpl(AWSEMRServerless emrServerless) { - this.emrServerless = emrServerless; - } - @Override public String startJobRun(StartJobRequest startJobRequest) { String resultIndex = @@ -68,8 +69,7 @@ public String startJobRun(StartJobRequest startJobRequest) { return emrServerless.startJobRun(request); } catch (Throwable t) { logger.error("Error while making start job request to emr:", t); - MetricUtils.incrementNumericalMetric( - MetricName.EMR_START_JOB_REQUEST_FAILURE_COUNT); + metricsService.incrementNumericalMetric(EMR_START_JOB_REQUEST_FAILURE_COUNT); if (t instanceof ValidationException) { throw new IllegalArgumentException( "The input fails to satisfy the constraints specified by AWS EMR" @@ -94,8 +94,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return emrServerless.getJobRun(request); } catch (Throwable t) { logger.error("Error while making get job run request to emr:", t); - MetricUtils.incrementNumericalMetric( - MetricName.EMR_GET_JOB_RESULT_FAILURE_COUNT); + metricsService.incrementNumericalMetric(EMR_GET_JOB_RESULT_FAILURE_COUNT); throw new RuntimeException(GENERIC_INTERNAL_SERVER_ERROR_MESSAGE); } }); @@ -117,12 +116,16 @@ public CancelJobRunResult cancelJobRun( } catch (Throwable t) { if (allowExceptionPropagation) { throw t; - } else { - logger.error("Error while making cancel job request to emr:", t); - MetricUtils.incrementNumericalMetric( - MetricName.EMR_CANCEL_JOB_REQUEST_FAILURE_COUNT); - throw new RuntimeException(GENERIC_INTERNAL_SERVER_ERROR_MESSAGE); } + + logger.error("Error while making cancel job request to emr: jobId=" + jobId, t); + metricsService.incrementNumericalMetric(EMR_CANCEL_JOB_REQUEST_FAILURE_COUNT); + if (t instanceof ValidationException) { + throw new IllegalArgumentException( + "The input fails to satisfy the constraints specified by AWS EMR" + + " Serverless."); + } + throw new RuntimeException(GENERIC_INTERNAL_SERVER_ERROR_MESSAGE); } }); logger.info(String.format("Job : %s cancelled", cancelJobRunResult.getJobRunId())); diff --git a/spark/src/main/java/org/opensearch/sql/spark/client/StartJobRequest.java b/async-query-core/src/main/java/org/opensearch/sql/spark/client/StartJobRequest.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/client/StartJobRequest.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/client/StartJobRequest.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfig.java b/async-query-core/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfig.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfig.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfig.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigSupplier.java b/async-query-core/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigSupplier.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigSupplier.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigSupplier.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/config/SparkSubmitParameterModifier.java b/async-query-core/src/main/java/org/opensearch/sql/spark/config/SparkSubmitParameterModifier.java similarity index 50% rename from spark/src/main/java/org/opensearch/sql/spark/config/SparkSubmitParameterModifier.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/config/SparkSubmitParameterModifier.java index 1c6ce5952a..a50491078c 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/config/SparkSubmitParameterModifier.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/config/SparkSubmitParameterModifier.java @@ -1,11 +1,12 @@ package org.opensearch.sql.spark.config; -import org.opensearch.sql.spark.asyncquery.model.SparkSubmitParameters; +import org.opensearch.sql.spark.parameter.SparkSubmitParametersBuilder; /** * Interface for extension point to allow modification of spark submit parameter. modifyParameter - * method is called after the default spark submit parameter is build. + * method is called after the default spark submit parameter is build. To be deprecated in favor of + * {@link org.opensearch.sql.spark.parameter.GeneralSparkParameterComposer} */ public interface SparkSubmitParameterModifier { - void modifyParameters(SparkSubmitParameters parameters); + void modifyParameters(SparkSubmitParametersBuilder parametersBuilder); } diff --git a/spark/src/main/java/org/opensearch/sql/spark/data/constants/SparkConstants.java b/async-query-core/src/main/java/org/opensearch/sql/spark/data/constants/SparkConstants.java similarity index 76% rename from spark/src/main/java/org/opensearch/sql/spark/data/constants/SparkConstants.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/data/constants/SparkConstants.java index b9436b0801..43815a9904 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/data/constants/SparkConstants.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/data/constants/SparkConstants.java @@ -6,8 +6,6 @@ package org.opensearch.sql.spark.data.constants; public class SparkConstants { - public static final String EMR = "emr"; - public static final String STEP_ID_FIELD = "stepId.keyword"; public static final String JOB_ID_FIELD = "jobRunId"; @@ -21,16 +19,11 @@ public class SparkConstants { public static final String SPARK_SQL_APPLICATION_JAR = "file:///home/hadoop/.ivy2/jars/org.opensearch_opensearch-spark-sql-application_2.12-0.3.0-SNAPSHOT.jar"; public static final String SPARK_REQUEST_BUFFER_INDEX_NAME = ".query_execution_request"; - // TODO should be replaced with mvn jar. - public static final String FLINT_INTEGRATION_JAR = - "s3://spark-datasource/flint-spark-integration-assembly-0.3.0-SNAPSHOT.jar"; - // TODO should be replaced with mvn jar. public static final String FLINT_DEFAULT_CLUSTER_NAME = "opensearch-cluster"; public static final String FLINT_DEFAULT_HOST = "localhost"; public static final String FLINT_DEFAULT_PORT = "9200"; public static final String FLINT_DEFAULT_SCHEME = "http"; public static final String FLINT_DEFAULT_AUTH = "noauth"; - public static final String FLINT_DEFAULT_REGION = "us-west-2"; public static final String DEFAULT_CLASS_NAME = "org.apache.spark.sql.FlintJob"; public static final String S3_AWS_CREDENTIALS_PROVIDER_KEY = "spark.hadoop.fs.s3.customAWSCredentialsProvider"; @@ -93,24 +86,46 @@ public class SparkConstants { "com.amazonaws.emr.AssumeRoleAWSCredentialsProvider"; public static final String JAVA_HOME_LOCATION = "/usr/lib/jvm/java-17-amazon-corretto.x86_64/"; public static final String FLINT_JOB_QUERY = "spark.flint.job.query"; + public static final String FLINT_JOB_QUERY_ID = "spark.flint.job.queryId"; + public static final String FLINT_JOB_EXTERNAL_SCHEDULER_ENABLED = + "spark.flint.job.externalScheduler.enabled"; + public static final String FLINT_JOB_EXTERNAL_SCHEDULER_INTERVAL = + "spark.flint.job.externalScheduler.interval"; public static final String FLINT_JOB_REQUEST_INDEX = "spark.flint.job.requestIndex"; public static final String FLINT_JOB_SESSION_ID = "spark.flint.job.sessionId"; public static final String FLINT_SESSION_CLASS_NAME = "org.apache.spark.sql.FlintREPL"; public static final String SPARK_CATALOG = "spark.sql.catalog.spark_catalog"; + public static final String SPARK_CATALOG_CATALOG_IMPL = SPARK_CATALOG + ".catalog-impl"; + public static final String SPARK_CATALOG_CLIENT_REGION = SPARK_CATALOG + ".client.region"; + public static final String SPARK_CATALOG_CLIENT_FACTORY = SPARK_CATALOG + ".client.factory"; + public static final String SPARK_CATALOG_CLIENT_ASSUME_ROLE_ARN = + SPARK_CATALOG + ".client.assume-role.arn"; + public static final String SPARK_CATALOG_CLIENT_ASSUME_ROLE_REGION = + SPARK_CATALOG + ".client.assume-role.region"; + public static final String SPARK_CATALOG_LF_SESSION_TAG_KEY = + SPARK_CATALOG + ".client.assume-role.tags.LakeFormationAuthorizedCaller"; + public static final String SPARK_CATALOG_GLUE_ACCOUNT_ID = SPARK_CATALOG + ".glue.account-id"; + public static final String SPARK_CATALOG_GLUE_LF_ENABLED = + SPARK_CATALOG + ".glue.lakeformation-enabled"; + public static final String ICEBERG_SESSION_CATALOG = "org.apache.iceberg.spark.SparkSessionCatalog"; public static final String ICEBERG_SPARK_EXTENSION = "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions"; - public static final String ICEBERG_SPARK_RUNTIME_PACKAGE = - "/usr/share/aws/iceberg/lib/iceberg-spark3-runtime.jar"; - public static final String SPARK_CATALOG_CATALOG_IMPL = - "spark.sql.catalog.spark_catalog.catalog-impl"; + public static final String ICEBERG_SPARK_JARS = + "org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.5.0,software.amazon.awssdk:bundle:2.26.30"; public static final String ICEBERG_GLUE_CATALOG = "org.apache.iceberg.aws.glue.GlueCatalog"; + public static final String ICEBERG_ASSUME_ROLE_CLIENT_FACTORY = + "org.apache.iceberg.aws.AssumeRoleAwsClientFactory"; + public static final String ICEBERG_LF_CLIENT_FACTORY = + "org.apache.iceberg.aws.lakeformation.LakeFormationAwsClientFactory"; + // The following option is needed in Iceberg 1.5 when reading timestamp types that do not + // contain timezone in parquet files. The timezone is assumed to be GMT. + public static final String ICEBERG_TS_WO_TZ = + "spark.sql.iceberg.handle-timestamp-without-timezone"; - public static final String EMR_LAKEFORMATION_OPTION = - "spark.emr-serverless.lakeformation.enabled"; public static final String FLINT_ACCELERATE_USING_COVERING_INDEX = "spark.flint.optimizer.covering.enabled"; } diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/AsyncQueryHandler.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/AsyncQueryHandler.java similarity index 74% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/AsyncQueryHandler.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/AsyncQueryHandler.java index d61ac17aa3..441846d678 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/AsyncQueryHandler.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/AsyncQueryHandler.java @@ -12,6 +12,7 @@ import com.amazonaws.services.emrserverless.model.JobRunState; import org.json.JSONObject; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.dispatcher.model.DispatchQueryContext; import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; import org.opensearch.sql.spark.dispatcher.model.DispatchQueryResponse; @@ -20,8 +21,10 @@ /** Process async query request. */ public abstract class AsyncQueryHandler { - public JSONObject getQueryResponse(AsyncQueryJobMetadata asyncQueryJobMetadata) { - JSONObject result = getResponseFromResultIndex(asyncQueryJobMetadata); + public JSONObject getQueryResponse( + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext) { + JSONObject result = getResponseFromResultIndex(asyncQueryJobMetadata, asyncQueryRequestContext); if (result.has(DATA_FIELD)) { JSONObject items = result.getJSONObject(DATA_FIELD); @@ -34,7 +37,8 @@ public JSONObject getQueryResponse(AsyncQueryJobMetadata asyncQueryJobMetadata) result.put(ERROR_FIELD, error); return result; } else { - JSONObject statement = getResponseFromExecutor(asyncQueryJobMetadata); + JSONObject statement = + getResponseFromExecutor(asyncQueryJobMetadata, asyncQueryRequestContext); // Consider statement still running if state is success but query result unavailable if (isSuccessState(statement)) { @@ -49,12 +53,16 @@ private boolean isSuccessState(JSONObject statement) { } protected abstract JSONObject getResponseFromResultIndex( - AsyncQueryJobMetadata asyncQueryJobMetadata); + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext); protected abstract JSONObject getResponseFromExecutor( - AsyncQueryJobMetadata asyncQueryJobMetadata); + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext); - public abstract String cancelJob(AsyncQueryJobMetadata asyncQueryJobMetadata); + public abstract String cancelJob( + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext); public abstract DispatchQueryResponse submit( DispatchQueryRequest request, DispatchQueryContext context); diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/BatchQueryHandler.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/BatchQueryHandler.java similarity index 62% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/BatchQueryHandler.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/BatchQueryHandler.java index 09d2dbd6c6..dbd6411998 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/BatchQueryHandler.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/BatchQueryHandler.java @@ -8,16 +8,16 @@ import static org.opensearch.sql.spark.data.constants.SparkConstants.ERROR_FIELD; import static org.opensearch.sql.spark.data.constants.SparkConstants.STATUS_FIELD; import static org.opensearch.sql.spark.dispatcher.SparkQueryDispatcher.JOB_TYPE_TAG_KEY; +import static org.opensearch.sql.spark.metrics.EmrMetrics.EMR_BATCH_QUERY_JOBS_CREATION_COUNT; import com.amazonaws.services.emrserverless.model.GetJobRunResult; import java.util.Map; import lombok.RequiredArgsConstructor; import org.json.JSONObject; import org.opensearch.sql.datasource.model.DataSourceMetadata; -import org.opensearch.sql.legacy.metrics.MetricName; -import org.opensearch.sql.legacy.utils.MetricUtils; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata; -import org.opensearch.sql.spark.asyncquery.model.SparkSubmitParameters; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.asyncquery.model.QueryState; import org.opensearch.sql.spark.client.EMRServerlessClient; import org.opensearch.sql.spark.client.StartJobRequest; import org.opensearch.sql.spark.dispatcher.model.DispatchQueryContext; @@ -25,6 +25,9 @@ import org.opensearch.sql.spark.dispatcher.model.DispatchQueryResponse; import org.opensearch.sql.spark.dispatcher.model.JobType; import org.opensearch.sql.spark.leasemanager.LeaseManager; +import org.opensearch.sql.spark.leasemanager.model.LeaseRequest; +import org.opensearch.sql.spark.metrics.MetricsService; +import org.opensearch.sql.spark.parameter.SparkSubmitParametersBuilderProvider; import org.opensearch.sql.spark.response.JobExecutionResponseReader; /** @@ -36,17 +39,23 @@ public class BatchQueryHandler extends AsyncQueryHandler { protected final EMRServerlessClient emrServerlessClient; protected final JobExecutionResponseReader jobExecutionResponseReader; protected final LeaseManager leaseManager; + protected final MetricsService metricsService; + protected final SparkSubmitParametersBuilderProvider sparkSubmitParametersBuilderProvider; @Override - protected JSONObject getResponseFromResultIndex(AsyncQueryJobMetadata asyncQueryJobMetadata) { + protected JSONObject getResponseFromResultIndex( + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext) { // either empty json when the result is not available or data with status // Fetch from Result Index - return jobExecutionResponseReader.getResultWithJobId( - asyncQueryJobMetadata.getJobId(), asyncQueryJobMetadata.getResultIndex()); + return jobExecutionResponseReader.getResultFromResultIndex( + asyncQueryJobMetadata, asyncQueryRequestContext); } @Override - protected JSONObject getResponseFromExecutor(AsyncQueryJobMetadata asyncQueryJobMetadata) { + protected JSONObject getResponseFromExecutor( + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext) { JSONObject result = new JSONObject(); // make call to EMR Serverless when related result index documents are not available GetJobRunResult getJobRunResult = @@ -59,12 +68,22 @@ protected JSONObject getResponseFromExecutor(AsyncQueryJobMetadata asyncQueryJob } @Override - public String cancelJob(AsyncQueryJobMetadata asyncQueryJobMetadata) { + public String cancelJob( + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext) { emrServerlessClient.cancelJobRun( asyncQueryJobMetadata.getApplicationId(), asyncQueryJobMetadata.getJobId(), false); return asyncQueryJobMetadata.getQueryId(); } + /** + * This method allows RefreshQueryHandler to override the job type when calling + * leaseManager.borrow. + */ + protected void borrow(String datasource) { + leaseManager.borrow(new LeaseRequest(JobType.BATCH, datasource)); + } + @Override public DispatchQueryResponse submit( DispatchQueryRequest dispatchQueryRequest, DispatchQueryContext context) { @@ -72,6 +91,8 @@ public DispatchQueryResponse submit( Map tags = context.getTags(); DataSourceMetadata dataSourceMetadata = context.getDataSourceMetadata(); + this.borrow(dispatchQueryRequest.getDatasource()); + tags.put(JOB_TYPE_TAG_KEY, JobType.BATCH.getText()); StartJobRequest startJobRequest = new StartJobRequest( @@ -79,24 +100,37 @@ public DispatchQueryResponse submit( dispatchQueryRequest.getAccountId(), dispatchQueryRequest.getApplicationId(), dispatchQueryRequest.getExecutionRoleARN(), - SparkSubmitParameters.builder() + sparkSubmitParametersBuilderProvider + .getSparkSubmitParametersBuilder() .clusterName(clusterName) - .dataSource(context.getDataSourceMetadata()) + .queryId(context.getQueryId()) .query(dispatchQueryRequest.getQuery()) - .build() + .dataSource( + context.getDataSourceMetadata(), + dispatchQueryRequest, + context.getAsyncQueryRequestContext()) .acceptModifier(dispatchQueryRequest.getSparkSubmitParameterModifier()) + .acceptComposers(dispatchQueryRequest, context.getAsyncQueryRequestContext()) .toString(), tags, false, dataSourceMetadata.getResultIndex()); String jobId = emrServerlessClient.startJobRun(startJobRequest); - MetricUtils.incrementNumericalMetric(MetricName.EMR_BATCH_QUERY_JOBS_CREATION_COUNT); + metricsService.incrementNumericalMetric(EMR_BATCH_QUERY_JOBS_CREATION_COUNT); return DispatchQueryResponse.builder() .queryId(context.getQueryId()) .jobId(jobId) .resultIndex(dataSourceMetadata.getResultIndex()) .datasourceName(dataSourceMetadata.getName()) - .jobType(JobType.INTERACTIVE) + .jobType(JobType.BATCH) + .status(QueryState.WAITING) + .indexName(getIndexName(context)) .build(); } + + private static String getIndexName(DispatchQueryContext context) { + return context.getIndexQueryDetails() != null + ? context.getIndexQueryDetails().openSearchIndexName() + : null; + } } diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/DatasourceEmbeddedQueryIdProvider.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/DatasourceEmbeddedQueryIdProvider.java similarity index 69% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/DatasourceEmbeddedQueryIdProvider.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/DatasourceEmbeddedQueryIdProvider.java index c170040718..3564fa9552 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/DatasourceEmbeddedQueryIdProvider.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/DatasourceEmbeddedQueryIdProvider.java @@ -5,6 +5,7 @@ package org.opensearch.sql.spark.dispatcher; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; import org.opensearch.sql.spark.utils.IDUtils; @@ -12,7 +13,9 @@ public class DatasourceEmbeddedQueryIdProvider implements QueryIdProvider { @Override - public String getQueryId(DispatchQueryRequest dispatchQueryRequest) { + public String getQueryId( + DispatchQueryRequest dispatchQueryRequest, + AsyncQueryRequestContext asyncQueryRequestContext) { return IDUtils.encode(dispatchQueryRequest.getDatasource()); } } diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandler.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandler.java similarity index 83% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandler.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandler.java index e8413f469c..71b20b4311 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandler.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandler.java @@ -18,6 +18,7 @@ import org.opensearch.sql.datasource.model.DataSourceMetadata; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.asyncquery.model.QueryState; import org.opensearch.sql.spark.dispatcher.model.DispatchQueryContext; import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; import org.opensearch.sql.spark.dispatcher.model.DispatchQueryResponse; @@ -62,9 +63,11 @@ public DispatchQueryResponse submit( long startTime = System.currentTimeMillis(); try { IndexQueryDetails indexDetails = context.getIndexQueryDetails(); - FlintIndexMetadata indexMetadata = getFlintIndexMetadata(indexDetails); + FlintIndexMetadata indexMetadata = + getFlintIndexMetadata(indexDetails, context.getAsyncQueryRequestContext()); - getIndexOp(dispatchQueryRequest, indexDetails).apply(indexMetadata); + getIndexOp(dispatchQueryRequest, indexDetails) + .apply(indexMetadata, context.getAsyncQueryRequestContext()); String asyncQueryId = storeIndexDMLResult( @@ -80,7 +83,8 @@ public DispatchQueryResponse submit( .jobId(DML_QUERY_JOB_ID) .resultIndex(dataSourceMetadata.getResultIndex()) .datasourceName(dataSourceMetadata.getName()) - .jobType(JobType.INTERACTIVE) + .jobType(JobType.BATCH) + .status(QueryState.SUCCESS) .build(); } catch (Exception e) { LOG.error(e.getMessage()); @@ -98,7 +102,9 @@ public DispatchQueryResponse submit( .jobId(DML_QUERY_JOB_ID) .resultIndex(dataSourceMetadata.getResultIndex()) .datasourceName(dataSourceMetadata.getName()) - .jobType(JobType.INTERACTIVE) + .jobType(JobType.BATCH) + .status(QueryState.FAILED) + .error(e.getMessage()) .build(); } } @@ -136,8 +142,6 @@ private FlintIndexOp getIndexOp( case ALTER: return flintIndexOpFactory.getAlter( indexQueryDetails.getFlintIndexOptions(), dispatchQueryRequest.getDatasource()); - case VACUUM: - return flintIndexOpFactory.getVacuum(dispatchQueryRequest.getDatasource()); default: throw new IllegalStateException( String.format( @@ -146,9 +150,11 @@ private FlintIndexOp getIndexOp( } } - private FlintIndexMetadata getFlintIndexMetadata(IndexQueryDetails indexDetails) { + private FlintIndexMetadata getFlintIndexMetadata( + IndexQueryDetails indexDetails, AsyncQueryRequestContext asyncQueryRequestContext) { Map indexMetadataMap = - flintIndexMetadataService.getFlintIndexMetadata(indexDetails.openSearchIndexName()); + flintIndexMetadataService.getFlintIndexMetadata( + indexDetails.openSearchIndexName(), asyncQueryRequestContext); if (!indexMetadataMap.containsKey(indexDetails.openSearchIndexName())) { throw new IllegalStateException( String.format( @@ -158,14 +164,18 @@ private FlintIndexMetadata getFlintIndexMetadata(IndexQueryDetails indexDetails) } @Override - protected JSONObject getResponseFromResultIndex(AsyncQueryJobMetadata asyncQueryJobMetadata) { + protected JSONObject getResponseFromResultIndex( + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext) { String queryId = asyncQueryJobMetadata.getQueryId(); return jobExecutionResponseReader.getResultWithQueryId( - queryId, asyncQueryJobMetadata.getResultIndex()); + queryId, asyncQueryJobMetadata.getResultIndex(), asyncQueryRequestContext); } @Override - protected JSONObject getResponseFromExecutor(AsyncQueryJobMetadata asyncQueryJobMetadata) { + protected JSONObject getResponseFromExecutor( + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext) { // Consider statement still running if result doc created in submit() is not available yet JSONObject result = new JSONObject(); result.put(STATUS_FIELD, StatementState.RUNNING.getState()); @@ -174,7 +184,9 @@ protected JSONObject getResponseFromExecutor(AsyncQueryJobMetadata asyncQueryJob } @Override - public String cancelJob(AsyncQueryJobMetadata asyncQueryJobMetadata) { + public String cancelJob( + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext) { throw new IllegalArgumentException("can't cancel index DML query"); } } diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/InteractiveQueryHandler.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/InteractiveQueryHandler.java similarity index 75% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/InteractiveQueryHandler.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/InteractiveQueryHandler.java index e47f439d9d..1eaad1ca9d 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/InteractiveQueryHandler.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/InteractiveQueryHandler.java @@ -15,10 +15,9 @@ import lombok.RequiredArgsConstructor; import org.json.JSONObject; import org.opensearch.sql.datasource.model.DataSourceMetadata; -import org.opensearch.sql.legacy.metrics.MetricName; -import org.opensearch.sql.legacy.utils.MetricUtils; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata; -import org.opensearch.sql.spark.asyncquery.model.SparkSubmitParameters; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.asyncquery.model.QueryState; import org.opensearch.sql.spark.dispatcher.model.DispatchQueryContext; import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; import org.opensearch.sql.spark.dispatcher.model.DispatchQueryResponse; @@ -32,6 +31,9 @@ import org.opensearch.sql.spark.execution.statement.StatementState; import org.opensearch.sql.spark.leasemanager.LeaseManager; import org.opensearch.sql.spark.leasemanager.model.LeaseRequest; +import org.opensearch.sql.spark.metrics.EmrMetrics; +import org.opensearch.sql.spark.metrics.MetricsService; +import org.opensearch.sql.spark.parameter.SparkSubmitParametersBuilderProvider; import org.opensearch.sql.spark.response.JobExecutionResponseReader; /** @@ -45,23 +47,30 @@ public class InteractiveQueryHandler extends AsyncQueryHandler { private final SessionManager sessionManager; private final JobExecutionResponseReader jobExecutionResponseReader; private final LeaseManager leaseManager; + private final MetricsService metricsService; + protected final SparkSubmitParametersBuilderProvider sparkSubmitParametersBuilderProvider; @Override - protected JSONObject getResponseFromResultIndex(AsyncQueryJobMetadata asyncQueryJobMetadata) { + protected JSONObject getResponseFromResultIndex( + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext) { String queryId = asyncQueryJobMetadata.getQueryId(); return jobExecutionResponseReader.getResultWithQueryId( - queryId, asyncQueryJobMetadata.getResultIndex()); + queryId, asyncQueryJobMetadata.getResultIndex(), asyncQueryRequestContext); } @Override - protected JSONObject getResponseFromExecutor(AsyncQueryJobMetadata asyncQueryJobMetadata) { + protected JSONObject getResponseFromExecutor( + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext) { JSONObject result = new JSONObject(); String queryId = asyncQueryJobMetadata.getQueryId(); Statement statement = getStatementByQueryId( asyncQueryJobMetadata.getSessionId(), queryId, - asyncQueryJobMetadata.getDatasourceName()); + asyncQueryJobMetadata.getDatasourceName(), + asyncQueryRequestContext); StatementState statementState = statement.getStatementState(); result.put(STATUS_FIELD, statementState.getState()); result.put(ERROR_FIELD, Optional.of(statement.getStatementModel().getError()).orElse("")); @@ -69,12 +78,15 @@ protected JSONObject getResponseFromExecutor(AsyncQueryJobMetadata asyncQueryJob } @Override - public String cancelJob(AsyncQueryJobMetadata asyncQueryJobMetadata) { + public String cancelJob( + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext) { String queryId = asyncQueryJobMetadata.getQueryId(); getStatementByQueryId( asyncQueryJobMetadata.getSessionId(), queryId, - asyncQueryJobMetadata.getDatasourceName()) + asyncQueryJobMetadata.getDatasourceName(), + asyncQueryRequestContext) .cancel(); return queryId; } @@ -111,17 +123,21 @@ public DispatchQueryResponse submit( dispatchQueryRequest.getAccountId(), dispatchQueryRequest.getApplicationId(), dispatchQueryRequest.getExecutionRoleARN(), - SparkSubmitParameters.builder() + sparkSubmitParametersBuilderProvider + .getSparkSubmitParametersBuilder() .className(FLINT_SESSION_CLASS_NAME) .clusterName(clusterName) - .dataSource(dataSourceMetadata) - .build() - .acceptModifier(dispatchQueryRequest.getSparkSubmitParameterModifier()), + .dataSource( + dataSourceMetadata, + dispatchQueryRequest, + context.getAsyncQueryRequestContext()) + .acceptModifier(dispatchQueryRequest.getSparkSubmitParameterModifier()) + .acceptComposers(dispatchQueryRequest, context.getAsyncQueryRequestContext()), tags, dataSourceMetadata.getResultIndex(), dataSourceMetadata.getName()), context.getAsyncQueryRequestContext()); - MetricUtils.incrementNumericalMetric(MetricName.EMR_INTERACTIVE_QUERY_JOBS_CREATION_COUNT); + metricsService.incrementNumericalMetric(EmrMetrics.EMR_INTERACTIVE_QUERY_JOBS_CREATION_COUNT); } session.submit( new QueryRequest( @@ -136,15 +152,20 @@ public DispatchQueryResponse submit( .sessionId(session.getSessionId()) .datasourceName(dataSourceMetadata.getName()) .jobType(JobType.INTERACTIVE) + .status(QueryState.WAITING) .build(); } - private Statement getStatementByQueryId(String sessionId, String queryId, String datasourceName) { + private Statement getStatementByQueryId( + String sessionId, + String queryId, + String datasourceName, + AsyncQueryRequestContext asyncQueryRequestContext) { Optional session = sessionManager.getSession(sessionId, datasourceName); if (session.isPresent()) { // todo, statementId == jobId if statement running in session. StatementId statementId = new StatementId(queryId); - Optional statement = session.get().get(statementId); + Optional statement = session.get().get(statementId, asyncQueryRequestContext); if (statement.isPresent()) { return statement.get(); } else { diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/QueryHandlerFactory.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/QueryHandlerFactory.java similarity index 57% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/QueryHandlerFactory.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/QueryHandlerFactory.java index f994d9c728..d6e70a9d86 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/QueryHandlerFactory.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/QueryHandlerFactory.java @@ -12,6 +12,8 @@ import org.opensearch.sql.spark.flint.IndexDMLResultStorageService; import org.opensearch.sql.spark.flint.operation.FlintIndexOpFactory; import org.opensearch.sql.spark.leasemanager.LeaseManager; +import org.opensearch.sql.spark.metrics.MetricsService; +import org.opensearch.sql.spark.parameter.SparkSubmitParametersBuilderProvider; import org.opensearch.sql.spark.response.JobExecutionResponseReader; @RequiredArgsConstructor @@ -24,28 +26,45 @@ public class QueryHandlerFactory { private final IndexDMLResultStorageService indexDMLResultStorageService; private final FlintIndexOpFactory flintIndexOpFactory; private final EMRServerlessClientFactory emrServerlessClientFactory; + private final MetricsService metricsService; + protected final SparkSubmitParametersBuilderProvider sparkSubmitParametersBuilderProvider; - public RefreshQueryHandler getRefreshQueryHandler() { + public RefreshQueryHandler getRefreshQueryHandler(String accountId) { return new RefreshQueryHandler( - emrServerlessClientFactory.getClient(), + emrServerlessClientFactory.getClient(accountId), jobExecutionResponseReader, flintIndexMetadataService, leaseManager, - flintIndexOpFactory); + flintIndexOpFactory, + metricsService, + sparkSubmitParametersBuilderProvider); } - public StreamingQueryHandler getStreamingQueryHandler() { + public StreamingQueryHandler getStreamingQueryHandler(String accountId) { return new StreamingQueryHandler( - emrServerlessClientFactory.getClient(), jobExecutionResponseReader, leaseManager); + emrServerlessClientFactory.getClient(accountId), + jobExecutionResponseReader, + leaseManager, + metricsService, + sparkSubmitParametersBuilderProvider); } - public BatchQueryHandler getBatchQueryHandler() { + public BatchQueryHandler getBatchQueryHandler(String accountId) { return new BatchQueryHandler( - emrServerlessClientFactory.getClient(), jobExecutionResponseReader, leaseManager); + emrServerlessClientFactory.getClient(accountId), + jobExecutionResponseReader, + leaseManager, + metricsService, + sparkSubmitParametersBuilderProvider); } public InteractiveQueryHandler getInteractiveQueryHandler() { - return new InteractiveQueryHandler(sessionManager, jobExecutionResponseReader, leaseManager); + return new InteractiveQueryHandler( + sessionManager, + jobExecutionResponseReader, + leaseManager, + metricsService, + sparkSubmitParametersBuilderProvider); } public IndexDMLHandler getIndexDMLHandler() { diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/QueryIdProvider.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/QueryIdProvider.java similarity index 62% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/QueryIdProvider.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/QueryIdProvider.java index 2167eb6b7a..a108ca1209 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/QueryIdProvider.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/QueryIdProvider.java @@ -5,9 +5,11 @@ package org.opensearch.sql.spark.dispatcher; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; /** Interface for extension point to specify queryId. Called when new query is executed. */ public interface QueryIdProvider { - String getQueryId(DispatchQueryRequest dispatchQueryRequest); + String getQueryId( + DispatchQueryRequest dispatchQueryRequest, AsyncQueryRequestContext asyncQueryRequestContext); } diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/RefreshQueryHandler.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/RefreshQueryHandler.java similarity index 72% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/RefreshQueryHandler.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/RefreshQueryHandler.java index 78a2651317..659166e8a6 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/RefreshQueryHandler.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/RefreshQueryHandler.java @@ -8,6 +8,8 @@ import java.util.Map; import org.opensearch.sql.datasource.model.DataSourceMetadata; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.asyncquery.model.QueryState; import org.opensearch.sql.spark.client.EMRServerlessClient; import org.opensearch.sql.spark.dispatcher.model.DispatchQueryContext; import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; @@ -19,6 +21,8 @@ import org.opensearch.sql.spark.flint.operation.FlintIndexOpFactory; import org.opensearch.sql.spark.leasemanager.LeaseManager; import org.opensearch.sql.spark.leasemanager.model.LeaseRequest; +import org.opensearch.sql.spark.metrics.MetricsService; +import org.opensearch.sql.spark.parameter.SparkSubmitParametersBuilderProvider; import org.opensearch.sql.spark.response.JobExecutionResponseReader; /** @@ -35,17 +39,27 @@ public RefreshQueryHandler( JobExecutionResponseReader jobExecutionResponseReader, FlintIndexMetadataService flintIndexMetadataService, LeaseManager leaseManager, - FlintIndexOpFactory flintIndexOpFactory) { - super(emrServerlessClient, jobExecutionResponseReader, leaseManager); + FlintIndexOpFactory flintIndexOpFactory, + MetricsService metricsService, + SparkSubmitParametersBuilderProvider sparkSubmitParametersBuilderProvider) { + super( + emrServerlessClient, + jobExecutionResponseReader, + leaseManager, + metricsService, + sparkSubmitParametersBuilderProvider); this.flintIndexMetadataService = flintIndexMetadataService; this.flintIndexOpFactory = flintIndexOpFactory; } @Override - public String cancelJob(AsyncQueryJobMetadata asyncQueryJobMetadata) { + public String cancelJob( + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext) { String datasourceName = asyncQueryJobMetadata.getDatasourceName(); Map indexMetadataMap = - flintIndexMetadataService.getFlintIndexMetadata(asyncQueryJobMetadata.getIndexName()); + flintIndexMetadataService.getFlintIndexMetadata( + asyncQueryJobMetadata.getIndexName(), asyncQueryRequestContext); if (!indexMetadataMap.containsKey(asyncQueryJobMetadata.getIndexName())) { throw new IllegalStateException( String.format( @@ -53,14 +67,18 @@ public String cancelJob(AsyncQueryJobMetadata asyncQueryJobMetadata) { } FlintIndexMetadata indexMetadata = indexMetadataMap.get(asyncQueryJobMetadata.getIndexName()); FlintIndexOp jobCancelOp = flintIndexOpFactory.getCancel(datasourceName); - jobCancelOp.apply(indexMetadata); + jobCancelOp.apply(indexMetadata, asyncQueryRequestContext); return asyncQueryJobMetadata.getQueryId(); } + @Override + protected void borrow(String datasource) { + leaseManager.borrow(new LeaseRequest(JobType.REFRESH, datasource)); + } + @Override public DispatchQueryResponse submit( DispatchQueryRequest dispatchQueryRequest, DispatchQueryContext context) { - leaseManager.borrow(new LeaseRequest(JobType.BATCH, dispatchQueryRequest.getDatasource())); DispatchQueryResponse resp = super.submit(dispatchQueryRequest, context); DataSourceMetadata dataSourceMetadata = context.getDataSourceMetadata(); @@ -70,8 +88,9 @@ public DispatchQueryResponse submit( .resultIndex(resp.getResultIndex()) .sessionId(resp.getSessionId()) .datasourceName(dataSourceMetadata.getName()) - .jobType(JobType.BATCH) + .jobType(JobType.REFRESH) .indexName(context.getIndexQueryDetails().openSearchIndexName()) + .status(QueryState.WAITING) .build(); } } diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcher.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcher.java similarity index 59% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcher.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcher.java index 5facdee567..a390924e85 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcher.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcher.java @@ -23,6 +23,8 @@ import org.opensearch.sql.spark.execution.session.SessionManager; import org.opensearch.sql.spark.rest.model.LangType; import org.opensearch.sql.spark.utils.SQLQueryUtils; +import org.opensearch.sql.spark.validator.PPLQueryValidator; +import org.opensearch.sql.spark.validator.SQLQueryValidator; /** This class takes care of understanding query and dispatching job query to emr serverless. */ @AllArgsConstructor @@ -37,65 +39,98 @@ public class SparkQueryDispatcher { private final SessionManager sessionManager; private final QueryHandlerFactory queryHandlerFactory; private final QueryIdProvider queryIdProvider; + private final SQLQueryValidator sqlQueryValidator; + private final PPLQueryValidator pplQueryValidator; public DispatchQueryResponse dispatch( DispatchQueryRequest dispatchQueryRequest, AsyncQueryRequestContext asyncQueryRequestContext) { DataSourceMetadata dataSourceMetadata = this.dataSourceService.verifyDataSourceAccessAndGetRawMetadata( - dispatchQueryRequest.getDatasource()); - - if (LangType.SQL.equals(dispatchQueryRequest.getLangType()) - && SQLQueryUtils.isFlintExtensionQuery(dispatchQueryRequest.getQuery())) { - IndexQueryDetails indexQueryDetails = getIndexQueryDetails(dispatchQueryRequest); - DispatchQueryContext context = - getDefaultDispatchContextBuilder(dispatchQueryRequest, dataSourceMetadata) - .indexQueryDetails(indexQueryDetails) - .asyncQueryRequestContext(asyncQueryRequestContext) - .build(); - - return getQueryHandlerForFlintExtensionQuery(indexQueryDetails) - .submit(dispatchQueryRequest, context); - } else { - DispatchQueryContext context = - getDefaultDispatchContextBuilder(dispatchQueryRequest, dataSourceMetadata) - .asyncQueryRequestContext(asyncQueryRequestContext) - .build(); - return getDefaultAsyncQueryHandler().submit(dispatchQueryRequest, context); + dispatchQueryRequest.getDatasource(), asyncQueryRequestContext); + + String query = dispatchQueryRequest.getQuery(); + if (LangType.SQL.equals(dispatchQueryRequest.getLangType())) { + if (SQLQueryUtils.isFlintExtensionQuery(query)) { + sqlQueryValidator.validateFlintExtensionQuery(query, dataSourceMetadata.getConnector()); + return handleFlintExtensionQuery( + dispatchQueryRequest, asyncQueryRequestContext, dataSourceMetadata); + } + + sqlQueryValidator.validate(query, dataSourceMetadata.getConnector()); + } else if (LangType.PPL.equals(dispatchQueryRequest.getLangType())) { + pplQueryValidator.validate(query, dataSourceMetadata.getConnector()); } + return handleDefaultQuery(dispatchQueryRequest, asyncQueryRequestContext, dataSourceMetadata); + } + + private DispatchQueryResponse handleFlintExtensionQuery( + DispatchQueryRequest dispatchQueryRequest, + AsyncQueryRequestContext asyncQueryRequestContext, + DataSourceMetadata dataSourceMetadata) { + IndexQueryDetails indexQueryDetails = getIndexQueryDetails(dispatchQueryRequest); + DispatchQueryContext context = + getDefaultDispatchContextBuilder( + dispatchQueryRequest, dataSourceMetadata, asyncQueryRequestContext) + .indexQueryDetails(indexQueryDetails) + .asyncQueryRequestContext(asyncQueryRequestContext) + .build(); + + return getQueryHandlerForFlintExtensionQuery(dispatchQueryRequest, indexQueryDetails) + .submit(dispatchQueryRequest, context); + } + + private DispatchQueryResponse handleDefaultQuery( + DispatchQueryRequest dispatchQueryRequest, + AsyncQueryRequestContext asyncQueryRequestContext, + DataSourceMetadata dataSourceMetadata) { + + DispatchQueryContext context = + getDefaultDispatchContextBuilder( + dispatchQueryRequest, dataSourceMetadata, asyncQueryRequestContext) + .asyncQueryRequestContext(asyncQueryRequestContext) + .build(); + + return getDefaultAsyncQueryHandler(dispatchQueryRequest.getAccountId()) + .submit(dispatchQueryRequest, context); } private DispatchQueryContext.DispatchQueryContextBuilder getDefaultDispatchContextBuilder( - DispatchQueryRequest dispatchQueryRequest, DataSourceMetadata dataSourceMetadata) { + DispatchQueryRequest dispatchQueryRequest, + DataSourceMetadata dataSourceMetadata, + AsyncQueryRequestContext asyncQueryRequestContext) { return DispatchQueryContext.builder() .dataSourceMetadata(dataSourceMetadata) .tags(getDefaultTagsForJobSubmission(dispatchQueryRequest)) - .queryId(queryIdProvider.getQueryId(dispatchQueryRequest)); + .queryId(queryIdProvider.getQueryId(dispatchQueryRequest, asyncQueryRequestContext)); } private AsyncQueryHandler getQueryHandlerForFlintExtensionQuery( - IndexQueryDetails indexQueryDetails) { + DispatchQueryRequest dispatchQueryRequest, IndexQueryDetails indexQueryDetails) { if (isEligibleForIndexDMLHandling(indexQueryDetails)) { return queryHandlerFactory.getIndexDMLHandler(); } else if (isEligibleForStreamingQuery(indexQueryDetails)) { - return queryHandlerFactory.getStreamingQueryHandler(); + return queryHandlerFactory.getStreamingQueryHandler(dispatchQueryRequest.getAccountId()); } else if (IndexQueryActionType.CREATE.equals(indexQueryDetails.getIndexQueryActionType())) { // Create should be handled by batch handler. This is to avoid DROP index incorrectly cancel // an interactive job. - return queryHandlerFactory.getBatchQueryHandler(); + return queryHandlerFactory.getBatchQueryHandler(dispatchQueryRequest.getAccountId()); } else if (IndexQueryActionType.REFRESH.equals(indexQueryDetails.getIndexQueryActionType())) { // Manual refresh should be handled by batch handler - return queryHandlerFactory.getRefreshQueryHandler(); + return queryHandlerFactory.getRefreshQueryHandler(dispatchQueryRequest.getAccountId()); + } else if (IndexQueryActionType.RECOVER.equals(indexQueryDetails.getIndexQueryActionType())) { + // RECOVER INDEX JOB should not be executed from async-query-core + throw new IllegalArgumentException("RECOVER INDEX JOB is not allowed."); } else { - return getDefaultAsyncQueryHandler(); + return getDefaultAsyncQueryHandler(dispatchQueryRequest.getAccountId()); } } @NotNull - private AsyncQueryHandler getDefaultAsyncQueryHandler() { + private AsyncQueryHandler getDefaultAsyncQueryHandler(String accountId) { return sessionManager.isEnabled() ? queryHandlerFactory.getInteractiveQueryHandler() - : queryHandlerFactory.getBatchQueryHandler(); + : queryHandlerFactory.getBatchQueryHandler(accountId); } @NotNull @@ -117,7 +152,6 @@ private boolean isEligibleForStreamingQuery(IndexQueryDetails indexQueryDetails) private boolean isEligibleForIndexDMLHandling(IndexQueryDetails indexQueryDetails) { return IndexQueryActionType.DROP.equals(indexQueryDetails.getIndexQueryActionType()) - || IndexQueryActionType.VACUUM.equals(indexQueryDetails.getIndexQueryActionType()) || (IndexQueryActionType.ALTER.equals(indexQueryDetails.getIndexQueryActionType()) && (indexQueryDetails .getFlintIndexOptions() @@ -126,14 +160,18 @@ private boolean isEligibleForIndexDMLHandling(IndexQueryDetails indexQueryDetail && !indexQueryDetails.getFlintIndexOptions().autoRefresh())); } - public JSONObject getQueryResponse(AsyncQueryJobMetadata asyncQueryJobMetadata) { + public JSONObject getQueryResponse( + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext) { return getAsyncQueryHandlerForExistingQuery(asyncQueryJobMetadata) - .getQueryResponse(asyncQueryJobMetadata); + .getQueryResponse(asyncQueryJobMetadata, asyncQueryRequestContext); } - public String cancelJob(AsyncQueryJobMetadata asyncQueryJobMetadata) { + public String cancelJob( + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext) { return getAsyncQueryHandlerForExistingQuery(asyncQueryJobMetadata) - .cancelJob(asyncQueryJobMetadata); + .cancelJob(asyncQueryJobMetadata, asyncQueryRequestContext); } private AsyncQueryHandler getAsyncQueryHandlerForExistingQuery( @@ -142,12 +180,12 @@ private AsyncQueryHandler getAsyncQueryHandlerForExistingQuery( return queryHandlerFactory.getInteractiveQueryHandler(); } else if (IndexDMLHandler.isIndexDMLQuery(asyncQueryJobMetadata.getJobId())) { return queryHandlerFactory.getIndexDMLHandler(); - } else if (asyncQueryJobMetadata.getJobType() == JobType.BATCH) { - return queryHandlerFactory.getRefreshQueryHandler(); + } else if (asyncQueryJobMetadata.getJobType() == JobType.REFRESH) { + return queryHandlerFactory.getRefreshQueryHandler(asyncQueryJobMetadata.getAccountId()); } else if (asyncQueryJobMetadata.getJobType() == JobType.STREAMING) { - return queryHandlerFactory.getStreamingQueryHandler(); + return queryHandlerFactory.getStreamingQueryHandler(asyncQueryJobMetadata.getAccountId()); } else { - return queryHandlerFactory.getBatchQueryHandler(); + return queryHandlerFactory.getBatchQueryHandler(asyncQueryJobMetadata.getAccountId()); } } diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/StreamingQueryHandler.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/StreamingQueryHandler.java similarity index 72% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/StreamingQueryHandler.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/StreamingQueryHandler.java index 7b317d2218..58fb5244b4 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/StreamingQueryHandler.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/StreamingQueryHandler.java @@ -7,13 +7,13 @@ import static org.opensearch.sql.spark.dispatcher.SparkQueryDispatcher.INDEX_TAG_KEY; import static org.opensearch.sql.spark.dispatcher.SparkQueryDispatcher.JOB_TYPE_TAG_KEY; +import static org.opensearch.sql.spark.metrics.EmrMetrics.EMR_STREAMING_QUERY_JOBS_CREATION_COUNT; import java.util.Map; import org.opensearch.sql.datasource.model.DataSourceMetadata; -import org.opensearch.sql.legacy.metrics.MetricName; -import org.opensearch.sql.legacy.utils.MetricUtils; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata; -import org.opensearch.sql.spark.asyncquery.model.SparkSubmitParameters; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.asyncquery.model.QueryState; import org.opensearch.sql.spark.client.EMRServerlessClient; import org.opensearch.sql.spark.client.StartJobRequest; import org.opensearch.sql.spark.dispatcher.model.DispatchQueryContext; @@ -23,6 +23,8 @@ import org.opensearch.sql.spark.dispatcher.model.JobType; import org.opensearch.sql.spark.leasemanager.LeaseManager; import org.opensearch.sql.spark.leasemanager.model.LeaseRequest; +import org.opensearch.sql.spark.metrics.MetricsService; +import org.opensearch.sql.spark.parameter.SparkSubmitParametersBuilderProvider; import org.opensearch.sql.spark.response.JobExecutionResponseReader; /** @@ -34,12 +36,21 @@ public class StreamingQueryHandler extends BatchQueryHandler { public StreamingQueryHandler( EMRServerlessClient emrServerlessClient, JobExecutionResponseReader jobExecutionResponseReader, - LeaseManager leaseManager) { - super(emrServerlessClient, jobExecutionResponseReader, leaseManager); + LeaseManager leaseManager, + MetricsService metricsService, + SparkSubmitParametersBuilderProvider sparkSubmitParametersBuilderProvider) { + super( + emrServerlessClient, + jobExecutionResponseReader, + leaseManager, + metricsService, + sparkSubmitParametersBuilderProvider); } @Override - public String cancelJob(AsyncQueryJobMetadata asyncQueryJobMetadata) { + public String cancelJob( + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext) { throw new IllegalArgumentException( "can't cancel index DML query, using ALTER auto_refresh=off statement to stop job, using" + " VACUUM statement to stop job and delete data"); @@ -69,19 +80,22 @@ public DispatchQueryResponse submit( dispatchQueryRequest.getAccountId(), dispatchQueryRequest.getApplicationId(), dispatchQueryRequest.getExecutionRoleARN(), - SparkSubmitParameters.builder() + sparkSubmitParametersBuilderProvider + .getSparkSubmitParametersBuilder() .clusterName(clusterName) - .dataSource(dataSourceMetadata) + .queryId(context.getQueryId()) .query(dispatchQueryRequest.getQuery()) .structuredStreaming(true) - .build() + .dataSource( + dataSourceMetadata, dispatchQueryRequest, context.getAsyncQueryRequestContext()) .acceptModifier(dispatchQueryRequest.getSparkSubmitParameterModifier()) + .acceptComposers(dispatchQueryRequest, context.getAsyncQueryRequestContext()) .toString(), tags, indexQueryDetails.getFlintIndexOptions().autoRefresh(), dataSourceMetadata.getResultIndex()); String jobId = emrServerlessClient.startJobRun(startJobRequest); - MetricUtils.incrementNumericalMetric(MetricName.EMR_STREAMING_QUERY_JOBS_CREATION_COUNT); + metricsService.incrementNumericalMetric(EMR_STREAMING_QUERY_JOBS_CREATION_COUNT); return DispatchQueryResponse.builder() .queryId(context.getQueryId()) .jobId(jobId) @@ -89,6 +103,7 @@ public DispatchQueryResponse submit( .datasourceName(dataSourceMetadata.getName()) .jobType(JobType.STREAMING) .indexName(indexQueryDetails.openSearchIndexName()) + .status(QueryState.WAITING) .build(); } } diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/DispatchQueryContext.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/DispatchQueryContext.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/DispatchQueryContext.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/DispatchQueryContext.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/DispatchQueryRequest.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/DispatchQueryRequest.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/DispatchQueryRequest.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/DispatchQueryRequest.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/DispatchQueryResponse.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/DispatchQueryResponse.java similarity index 75% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/DispatchQueryResponse.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/DispatchQueryResponse.java index b97d9fd7b0..c484236d6e 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/DispatchQueryResponse.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/DispatchQueryResponse.java @@ -2,6 +2,7 @@ import lombok.Builder; import lombok.Getter; +import org.opensearch.sql.spark.asyncquery.model.QueryState; @Getter @Builder @@ -13,4 +14,6 @@ public class DispatchQueryResponse { private final String datasourceName; private final JobType jobType; private final String indexName; + private final QueryState status; + private final String error; } diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/FlintIndexOptions.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/FlintIndexOptions.java similarity index 79% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/FlintIndexOptions.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/FlintIndexOptions.java index 79af1c91ab..6c7cc7c5fb 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/FlintIndexOptions.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/FlintIndexOptions.java @@ -19,6 +19,7 @@ public class FlintIndexOptions { public static final String INCREMENTAL_REFRESH = "incremental_refresh"; public static final String CHECKPOINT_LOCATION = "checkpoint_location"; public static final String WATERMARK_DELAY = "watermark_delay"; + public static final String SCHEDULER_MODE = "scheduler_mode"; private final Map options = new HashMap<>(); public void setOption(String key, String value) { @@ -33,6 +34,11 @@ public boolean autoRefresh() { return Boolean.parseBoolean(getOption(AUTO_REFRESH).orElse("false")); } + public boolean isExternalScheduler() { + // Default is false, which means using internal scheduler to refresh the index. + return getOption(SCHEDULER_MODE).map(mode -> "external".equals(mode)).orElse(false); + } + public Map getProvidedOptions() { return new HashMap<>(options); } diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/FullyQualifiedTableName.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/FullyQualifiedTableName.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/FullyQualifiedTableName.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/FullyQualifiedTableName.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexDMLResult.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexDMLResult.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexDMLResult.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexDMLResult.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryActionType.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryActionType.java similarity index 93% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryActionType.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryActionType.java index 96e7d159af..51e0832217 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryActionType.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryActionType.java @@ -13,5 +13,6 @@ public enum IndexQueryActionType { SHOW, DROP, VACUUM, - ALTER + ALTER, + RECOVER } diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryDetails.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryDetails.java similarity index 80% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryDetails.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryDetails.java index 5596d1b425..50ce95ffe0 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryDetails.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/IndexQueryDetails.java @@ -31,6 +31,7 @@ public class IndexQueryDetails { // materialized view special case where // table name and mv name are combined. private String mvName; + private String mvQuery; private FlintIndexType indexType; private IndexQueryDetails() {} @@ -73,6 +74,11 @@ public IndexQueryDetailsBuilder mvName(String mvName) { return this; } + public IndexQueryDetailsBuilder mvQuery(String mvQuery) { + indexQueryDetails.mvQuery = mvQuery; + return this; + } + public IndexQueryDetailsBuilder indexType(FlintIndexType indexType) { indexQueryDetails.indexType = indexType; return this; @@ -87,24 +93,35 @@ public IndexQueryDetails build() { } public String openSearchIndexName() { + if (getIndexType() == null) { + return null; + } FullyQualifiedTableName fullyQualifiedTableName = getFullyQualifiedTableName(); String indexName = StringUtils.EMPTY; switch (getIndexType()) { case COVERING: - indexName = - "flint_" - + fullyQualifiedTableName.toFlintName() - + "_" - + strip(getIndexName(), STRIP_CHARS) - + "_" - + getIndexType().getSuffix(); + if (getIndexName() != null) { // getIndexName will be null for SHOW INDEX query + indexName = + "flint_" + + fullyQualifiedTableName.toFlintName() + + "_" + + strip(getIndexName(), STRIP_CHARS) + + "_" + + getIndexType().getSuffix(); + } else { + return null; + } break; case SKIPPING: indexName = "flint_" + fullyQualifiedTableName.toFlintName() + "_" + getIndexType().getSuffix(); break; case MATERIALIZED_VIEW: - indexName = "flint_" + new FullyQualifiedTableName(mvName).toFlintName(); + if (mvName != null) { // mvName is not available for SHOW MATERIALIZED VIEW query + indexName = "flint_" + new FullyQualifiedTableName(mvName).toFlintName(); + } else { + return null; + } break; } return percentEncode(indexName).toLowerCase(); diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/JobType.java b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/JobType.java similarity index 93% rename from spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/JobType.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/JobType.java index 01f5f422e9..26b391933b 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/model/JobType.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/dispatcher/model/JobType.java @@ -8,9 +8,10 @@ public enum JobType { INTERACTIVE("interactive"), STREAMING("streaming"), + REFRESH("refresh"), BATCH("batch"); - private String text; + private final String text; JobType(String text) { this.text = text; diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/session/CreateSessionRequest.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/CreateSessionRequest.java similarity index 88% rename from spark/src/main/java/org/opensearch/sql/spark/execution/session/CreateSessionRequest.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/CreateSessionRequest.java index 4170f0c2d6..6398dd224f 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/execution/session/CreateSessionRequest.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/CreateSessionRequest.java @@ -7,9 +7,9 @@ import java.util.Map; import lombok.Data; -import org.opensearch.sql.spark.asyncquery.model.SparkSubmitParameters; import org.opensearch.sql.spark.client.StartJobRequest; import org.opensearch.sql.spark.dispatcher.model.JobType; +import org.opensearch.sql.spark.parameter.SparkSubmitParametersBuilder; @Data public class CreateSessionRequest { @@ -17,7 +17,7 @@ public class CreateSessionRequest { private final String accountId; private final String applicationId; private final String executionRoleArn; - private final SparkSubmitParameters sparkSubmitParameters; + private final SparkSubmitParametersBuilder sparkSubmitParametersBuilder; private final Map tags; private final String resultIndex; private final String datasourceName; @@ -28,7 +28,7 @@ public StartJobRequest getStartJobRequest(String sessionId) { accountId, applicationId, executionRoleArn, - sparkSubmitParameters.toString(), + sparkSubmitParametersBuilder.toString(), tags, resultIndex); } diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/session/DatasourceEmbeddedSessionIdProvider.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/DatasourceEmbeddedSessionIdProvider.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/session/DatasourceEmbeddedSessionIdProvider.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/DatasourceEmbeddedSessionIdProvider.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/session/InteractiveSession.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/InteractiveSession.java similarity index 80% rename from spark/src/main/java/org/opensearch/sql/spark/execution/session/InteractiveSession.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/InteractiveSession.java index 4a8d6a8f58..2915e2a3e1 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/execution/session/InteractiveSession.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/InteractiveSession.java @@ -16,7 +16,6 @@ import lombok.Getter; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.index.engine.VersionConflictEngineException; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.client.EMRServerlessClient; import org.opensearch.sql.spark.client.StartJobRequest; @@ -25,7 +24,6 @@ import org.opensearch.sql.spark.execution.statement.StatementId; import org.opensearch.sql.spark.execution.statestore.SessionStorageService; import org.opensearch.sql.spark.execution.statestore.StatementStorageService; -import org.opensearch.sql.spark.rest.model.LangType; import org.opensearch.sql.spark.utils.TimeProvider; /** @@ -53,29 +51,20 @@ public class InteractiveSession implements Session { public void open( CreateSessionRequest createSessionRequest, AsyncQueryRequestContext asyncQueryRequestContext) { - try { - // append session id; - createSessionRequest - .getSparkSubmitParameters() - .acceptModifier( - (parameters) -> { - parameters.sessionExecution(sessionId, createSessionRequest.getDatasourceName()); - }); - createSessionRequest.getTags().put(SESSION_ID_TAG_KEY, sessionId); - StartJobRequest startJobRequest = createSessionRequest.getStartJobRequest(sessionId); - String jobID = serverlessClient.startJobRun(startJobRequest); - String applicationId = startJobRequest.getApplicationId(); - String accountId = createSessionRequest.getAccountId(); + // append session id; + createSessionRequest + .getSparkSubmitParametersBuilder() + .sessionExecution(sessionId, createSessionRequest.getDatasourceName()); + createSessionRequest.getTags().put(SESSION_ID_TAG_KEY, sessionId); + StartJobRequest startJobRequest = createSessionRequest.getStartJobRequest(sessionId); + String jobID = serverlessClient.startJobRun(startJobRequest); + String applicationId = startJobRequest.getApplicationId(); + String accountId = createSessionRequest.getAccountId(); - sessionModel = - initInteractiveSession( - accountId, applicationId, jobID, sessionId, createSessionRequest.getDatasourceName()); - sessionStorageService.createSession(sessionModel, asyncQueryRequestContext); - } catch (VersionConflictEngineException e) { - String errorMsg = "session already exist. " + sessionId; - LOG.error(errorMsg); - throw new IllegalStateException(errorMsg); - } + sessionModel = + initInteractiveSession( + accountId, applicationId, jobID, sessionId, createSessionRequest.getDatasourceName()); + sessionStorageService.createSession(sessionModel, asyncQueryRequestContext); } /** todo. StatementSweeper will delete doc. */ @@ -111,7 +100,7 @@ public StatementId submit( .jobId(sessionModel.getJobId()) .statementStorageService(statementStorageService) .statementId(statementId) - .langType(LangType.SQL) + .langType(request.getLangType()) .datasourceName(sessionModel.getDatasourceName()) .query(request.getQuery()) .queryId(qid) @@ -132,9 +121,10 @@ public StatementId submit( } @Override - public Optional get(StatementId stID) { + public Optional get( + StatementId stID, AsyncQueryRequestContext asyncQueryRequestContext) { return statementStorageService - .getStatement(stID.getId(), sessionModel.getDatasourceName()) + .getStatement(stID.getId(), sessionModel.getDatasourceName(), asyncQueryRequestContext) .map( model -> Statement.builder() @@ -148,6 +138,7 @@ public Optional get(StatementId stID) { .queryId(model.getQueryId()) .statementStorageService(statementStorageService) .statementModel(model) + .asyncQueryRequestContext(asyncQueryRequestContext) .build()); } diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/session/Session.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/Session.java similarity index 93% rename from spark/src/main/java/org/opensearch/sql/spark/execution/session/Session.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/Session.java index fad097ca1b..4c083d79c4 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/execution/session/Session.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/Session.java @@ -35,7 +35,7 @@ void open( * @param stID {@link StatementId} * @return {@link Statement} */ - Optional get(StatementId stID); + Optional get(StatementId stID, AsyncQueryRequestContext asyncQueryRequestContext); SessionModel getSessionModel(); diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/session/SessionConfigSupplier.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/SessionConfigSupplier.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/session/SessionConfigSupplier.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/SessionConfigSupplier.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/session/SessionIdProvider.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/SessionIdProvider.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/session/SessionIdProvider.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/SessionIdProvider.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/session/SessionManager.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/SessionManager.java similarity index 98% rename from spark/src/main/java/org/opensearch/sql/spark/execution/session/SessionManager.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/SessionManager.java index f838e89572..0c0727294b 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/execution/session/SessionManager.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/SessionManager.java @@ -33,7 +33,7 @@ public Session createSession( .sessionId(sessionIdProvider.getSessionId(request)) .sessionStorageService(sessionStorageService) .statementStorageService(statementStorageService) - .serverlessClient(emrServerlessClientFactory.getClient()) + .serverlessClient(emrServerlessClientFactory.getClient(request.getAccountId())) .build(); session.open(request, asyncQueryRequestContext); return session; @@ -65,7 +65,7 @@ public Optional getSession(String sessionId, String dataSourceName) { .sessionId(sessionId) .sessionStorageService(sessionStorageService) .statementStorageService(statementStorageService) - .serverlessClient(emrServerlessClientFactory.getClient()) + .serverlessClient(emrServerlessClientFactory.getClient(model.get().getAccountId())) .sessionModel(model.get()) .sessionInactivityTimeoutMilli( sessionConfigSupplier.getSessionInactivityTimeoutMillis()) diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/session/SessionModel.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/SessionModel.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/session/SessionModel.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/SessionModel.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/session/SessionState.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/SessionState.java similarity index 92% rename from spark/src/main/java/org/opensearch/sql/spark/execution/session/SessionState.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/SessionState.java index bd5d14c603..fc15308e05 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/execution/session/SessionState.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/SessionState.java @@ -20,7 +20,7 @@ public enum SessionState { DEAD("dead"), FAIL("fail"); - public static List END_STATE = ImmutableList.of(DEAD, FAIL); + public static final List END_STATE = ImmutableList.of(DEAD, FAIL); private final String sessionState; diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/session/SessionType.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/SessionType.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/session/SessionType.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/session/SessionType.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/statement/QueryRequest.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/statement/QueryRequest.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/statement/QueryRequest.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/statement/QueryRequest.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/statement/Statement.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/statement/Statement.java similarity index 55% rename from spark/src/main/java/org/opensearch/sql/spark/execution/statement/Statement.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/statement/Statement.java index b5edad0996..272f0edf4a 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/execution/statement/Statement.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/statement/Statement.java @@ -12,8 +12,6 @@ import lombok.Setter; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.index.engine.DocumentMissingException; -import org.opensearch.index.engine.VersionConflictEngineException; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.execution.statestore.StatementStorageService; import org.opensearch.sql.spark.rest.model.LangType; @@ -41,25 +39,19 @@ public class Statement { /** Open a statement. */ public void open() { - try { - statementModel = - submitStatement( - sessionId, - accountId, - applicationId, - jobId, - statementId, - langType, - datasourceName, - query, - queryId); - statementModel = - statementStorageService.createStatement(statementModel, asyncQueryRequestContext); - } catch (VersionConflictEngineException e) { - String errorMsg = "statement already exist. " + statementId; - LOG.error(errorMsg); - throw new IllegalStateException(errorMsg); - } + statementModel = + submitStatement( + sessionId, + accountId, + applicationId, + jobId, + statementId, + langType, + datasourceName, + query, + queryId); + statementModel = + statementStorageService.createStatement(statementModel, asyncQueryRequestContext); } /** Cancel a statement. */ @@ -77,26 +69,9 @@ public void cancel() { LOG.error(errorMsg); throw new IllegalStateException(errorMsg); } - try { - this.statementModel = - statementStorageService.updateStatementState(statementModel, StatementState.CANCELLED); - } catch (DocumentMissingException e) { - String errorMsg = - String.format("cancel statement failed. no statement found. statement: %s.", statementId); - LOG.error(errorMsg); - throw new IllegalStateException(errorMsg); - } catch (VersionConflictEngineException e) { - this.statementModel = - statementStorageService - .getStatement(statementModel.getId(), statementModel.getDatasourceName()) - .orElse(this.statementModel); - String errorMsg = - String.format( - "cancel statement failed. current statementState: %s " + "statement: %s.", - this.statementModel.getStatementState(), statementId); - LOG.error(errorMsg); - throw new IllegalStateException(errorMsg); - } + this.statementModel = + statementStorageService.updateStatementState( + statementModel, StatementState.CANCELLED, asyncQueryRequestContext); } public StatementState getStatementState() { diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/statement/StatementId.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/statement/StatementId.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/statement/StatementId.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/statement/StatementId.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/statement/StatementModel.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/statement/StatementModel.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/statement/StatementModel.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/statement/StatementModel.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/statement/StatementState.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/statement/StatementState.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/statement/StatementState.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/statement/StatementState.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/statestore/CopyBuilder.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/statestore/CopyBuilder.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/statestore/CopyBuilder.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/statestore/CopyBuilder.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/statestore/OpenSearchStateStoreUtil.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/statestore/OpenSearchStateStoreUtil.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/statestore/OpenSearchStateStoreUtil.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/statestore/OpenSearchStateStoreUtil.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/statestore/SessionStorageService.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/statestore/SessionStorageService.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/statestore/SessionStorageService.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/statestore/SessionStorageService.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/statestore/StateCopyBuilder.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/statestore/StateCopyBuilder.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/statestore/StateCopyBuilder.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/statestore/StateCopyBuilder.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/statestore/StateModel.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/statestore/StateModel.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/statestore/StateModel.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/statestore/StateModel.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/statestore/StatementStorageService.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/statestore/StatementStorageService.java similarity index 73% rename from spark/src/main/java/org/opensearch/sql/spark/execution/statestore/StatementStorageService.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/statestore/StatementStorageService.java index 39f1ecf704..b9446809fb 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/execution/statestore/StatementStorageService.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/statestore/StatementStorageService.java @@ -20,7 +20,10 @@ StatementModel createStatement( StatementModel statementModel, AsyncQueryRequestContext asyncQueryRequestContext); StatementModel updateStatementState( - StatementModel oldStatementModel, StatementState statementState); + StatementModel oldStatementModel, + StatementState statementState, + AsyncQueryRequestContext asyncQueryRequestContext); - Optional getStatement(String id, String datasourceName); + Optional getStatement( + String id, String datasourceName, AsyncQueryRequestContext asyncQueryRequestContext); } diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/xcontent/XContentSerializerUtil.java b/async-query-core/src/main/java/org/opensearch/sql/spark/execution/xcontent/XContentSerializerUtil.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/xcontent/XContentSerializerUtil.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/execution/xcontent/XContentSerializerUtil.java diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexClient.java b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexClient.java new file mode 100644 index 0000000000..af1a23d8d1 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexClient.java @@ -0,0 +1,11 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.flint; + +/** Interface to abstract access to the FlintIndex */ +public interface FlintIndexClient { + void deleteIndex(String indexName); +} diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadata.java b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadata.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadata.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadata.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataService.java b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataService.java similarity index 60% rename from spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataService.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataService.java index ad274e429e..ece14c2a7b 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataService.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataService.java @@ -6,6 +6,7 @@ package org.opensearch.sql.spark.flint; import java.util.Map; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions; /** Interface for FlintIndexMetadataReader */ @@ -15,16 +16,22 @@ public interface FlintIndexMetadataService { * Retrieves a map of {@link FlintIndexMetadata} instances matching the specified index pattern. * * @param indexPattern indexPattern. + * @param asyncQueryRequestContext request context passed to AsyncQueryExecutorService * @return A map of {@link FlintIndexMetadata} instances against indexName, each providing * metadata access for a matched index. Returns an empty list if no indices match the pattern. */ - Map getFlintIndexMetadata(String indexPattern); + Map getFlintIndexMetadata( + String indexPattern, AsyncQueryRequestContext asyncQueryRequestContext); /** * Performs validation and updates flint index to manual refresh. * * @param indexName indexName. * @param flintIndexOptions flintIndexOptions. + * @param asyncQueryRequestContext request context passed to AsyncQueryExecutorService */ - void updateIndexToManualRefresh(String indexName, FlintIndexOptions flintIndexOptions); + void updateIndexToManualRefresh( + String indexName, + FlintIndexOptions flintIndexOptions, + AsyncQueryRequestContext asyncQueryRequestContext); } diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataValidator.java b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataValidator.java new file mode 100644 index 0000000000..68ba34c476 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataValidator.java @@ -0,0 +1,88 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.flint; + +import static org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions.AUTO_REFRESH; +import static org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions.CHECKPOINT_LOCATION; +import static org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions.INCREMENTAL_REFRESH; +import static org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions.WATERMARK_DELAY; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +public class FlintIndexMetadataValidator { + private static final Logger LOGGER = LogManager.getLogger(FlintIndexMetadataValidator.class); + + public static final Set ALTER_TO_FULL_REFRESH_ALLOWED_OPTIONS = + new LinkedHashSet<>(Arrays.asList(AUTO_REFRESH, INCREMENTAL_REFRESH)); + public static final Set ALTER_TO_INCREMENTAL_REFRESH_ALLOWED_OPTIONS = + new LinkedHashSet<>( + Arrays.asList(AUTO_REFRESH, INCREMENTAL_REFRESH, WATERMARK_DELAY, CHECKPOINT_LOCATION)); + + /** + * Validate if the flint index options contain valid key/value pairs. Throws + * IllegalArgumentException with description about invalid options. + */ + public static void validateFlintIndexOptions( + String kind, Map existingOptions, Map newOptions) { + if ((newOptions.containsKey(INCREMENTAL_REFRESH) + && Boolean.parseBoolean(newOptions.get(INCREMENTAL_REFRESH))) + || ((!newOptions.containsKey(INCREMENTAL_REFRESH) + && Boolean.parseBoolean((String) existingOptions.get(INCREMENTAL_REFRESH))))) { + validateConversionToIncrementalRefresh(kind, existingOptions, newOptions); + } else { + validateConversionToFullRefresh(newOptions); + } + } + + private static void validateConversionToFullRefresh(Map newOptions) { + if (!ALTER_TO_FULL_REFRESH_ALLOWED_OPTIONS.containsAll(newOptions.keySet())) { + throw new IllegalArgumentException( + String.format( + "Altering to full refresh only allows: %s options", + ALTER_TO_FULL_REFRESH_ALLOWED_OPTIONS)); + } + } + + private static void validateConversionToIncrementalRefresh( + String kind, Map existingOptions, Map newOptions) { + if (!ALTER_TO_INCREMENTAL_REFRESH_ALLOWED_OPTIONS.containsAll(newOptions.keySet())) { + throw new IllegalArgumentException( + String.format( + "Altering to incremental refresh only allows: %s options", + ALTER_TO_INCREMENTAL_REFRESH_ALLOWED_OPTIONS)); + } + HashMap mergedOptions = new HashMap<>(); + mergedOptions.putAll(existingOptions); + mergedOptions.putAll(newOptions); + List missingAttributes = new ArrayList<>(); + if (!mergedOptions.containsKey(CHECKPOINT_LOCATION) + || StringUtils.isEmpty((String) mergedOptions.get(CHECKPOINT_LOCATION))) { + missingAttributes.add(CHECKPOINT_LOCATION); + } + if (kind.equals("mv") + && (!mergedOptions.containsKey(WATERMARK_DELAY) + || StringUtils.isEmpty((String) mergedOptions.get(WATERMARK_DELAY)))) { + missingAttributes.add(WATERMARK_DELAY); + } + if (missingAttributes.size() > 0) { + String errorMessage = + "Conversion to incremental refresh index cannot proceed due to missing attributes: " + + String.join(", ", missingAttributes) + + "."; + LOGGER.error(errorMessage); + throw new IllegalArgumentException(errorMessage); + } + } +} diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexState.java b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexState.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexState.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexState.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexStateModel.java b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexStateModel.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexStateModel.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexStateModel.java diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexStateModelService.java b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexStateModelService.java new file mode 100644 index 0000000000..3872f2d5a0 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexStateModelService.java @@ -0,0 +1,63 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.flint; + +import java.util.Optional; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; + +/** + * Abstraction over flint index state storage. Flint index state will maintain the status of each + * flint index. + */ +public interface FlintIndexStateModelService { + + /** + * Create Flint index state record + * + * @param flintIndexStateModel the model to be saved + * @param asyncQueryRequestContext the request context passed to AsyncQueryExecutorService + * @return saved model + */ + FlintIndexStateModel createFlintIndexStateModel( + FlintIndexStateModel flintIndexStateModel, AsyncQueryRequestContext asyncQueryRequestContext); + + /** + * Get Flint index state record + * + * @param id ID(latestId) of the Flint index state record + * @param datasourceName datasource name + * @param asyncQueryRequestContext the request context passed to AsyncQueryExecutorService + * @return retrieved model + */ + Optional getFlintIndexStateModel( + String id, String datasourceName, AsyncQueryRequestContext asyncQueryRequestContext); + + /** + * Update Flint index state record + * + * @param flintIndexStateModel the model to be updated + * @param flintIndexState new state + * @param datasourceName Datasource name + * @param asyncQueryRequestContext the request context passed to AsyncQueryExecutorService + * @return Updated model + */ + FlintIndexStateModel updateFlintIndexState( + FlintIndexStateModel flintIndexStateModel, + FlintIndexState flintIndexState, + String datasourceName, + AsyncQueryRequestContext asyncQueryRequestContext); + + /** + * Delete Flint index state record + * + * @param id ID(latestId) of the Flint index state record + * @param datasourceName datasource name + * @param asyncQueryRequestContext the request context passed to AsyncQueryExecutorService + * @return true if deleted, otherwise false + */ + boolean deleteFlintIndexStateModel( + String id, String datasourceName, AsyncQueryRequestContext asyncQueryRequestContext); +} diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexType.java b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexType.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexType.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/flint/FlintIndexType.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/IndexDMLResultStorageService.java b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/IndexDMLResultStorageService.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/flint/IndexDMLResultStorageService.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/flint/IndexDMLResultStorageService.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOp.java b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOp.java similarity index 76% rename from spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOp.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOp.java index 97ddccaf8f..78d217b8dc 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOp.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOp.java @@ -16,6 +16,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.NotNull; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.client.EMRServerlessClient; import org.opensearch.sql.spark.client.EMRServerlessClientFactory; import org.opensearch.sql.spark.flint.FlintIndexMetadata; @@ -33,30 +34,33 @@ public abstract class FlintIndexOp { private final EMRServerlessClientFactory emrServerlessClientFactory; /** Apply operation on {@link FlintIndexMetadata} */ - public void apply(FlintIndexMetadata metadata) { + public void apply( + FlintIndexMetadata metadata, AsyncQueryRequestContext asyncQueryRequestContext) { // todo, remove this logic after IndexState feature is enabled in Flint. Optional latestId = metadata.getLatestId(); if (latestId.isEmpty()) { - takeActionWithoutOCC(metadata); + takeActionWithoutOCC(metadata, asyncQueryRequestContext); } else { - FlintIndexStateModel initialFlintIndexStateModel = getFlintIndexStateModel(latestId.get()); + FlintIndexStateModel initialFlintIndexStateModel = + getFlintIndexStateModel(latestId.get(), asyncQueryRequestContext); // 1.validate state. validFlintIndexInitialState(initialFlintIndexStateModel); // 2.begin, move to transitioning state FlintIndexStateModel transitionedFlintIndexStateModel = - moveToTransitioningState(initialFlintIndexStateModel); + moveToTransitioningState(initialFlintIndexStateModel, asyncQueryRequestContext); // 3.runOp try { - runOp(metadata, transitionedFlintIndexStateModel); - commit(transitionedFlintIndexStateModel); + runOp(metadata, transitionedFlintIndexStateModel, asyncQueryRequestContext); + commit(transitionedFlintIndexStateModel, asyncQueryRequestContext); } catch (Throwable e) { LOG.error("Rolling back transient log due to transaction operation failure", e); try { flintIndexStateModelService.updateFlintIndexState( transitionedFlintIndexStateModel, initialFlintIndexStateModel.getIndexState(), - datasourceName); + datasourceName, + asyncQueryRequestContext); } catch (Exception ex) { LOG.error("Failed to rollback transient log", ex); } @@ -66,9 +70,11 @@ public void apply(FlintIndexMetadata metadata) { } @NotNull - private FlintIndexStateModel getFlintIndexStateModel(String latestId) { + private FlintIndexStateModel getFlintIndexStateModel( + String latestId, AsyncQueryRequestContext asyncQueryRequestContext) { Optional flintIndexOptional = - flintIndexStateModelService.getFlintIndexStateModel(latestId, datasourceName); + flintIndexStateModelService.getFlintIndexStateModel( + latestId, datasourceName, asyncQueryRequestContext); if (flintIndexOptional.isEmpty()) { String errorMsg = String.format(Locale.ROOT, "no state found. docId: %s", latestId); LOG.error(errorMsg); @@ -77,7 +83,8 @@ private FlintIndexStateModel getFlintIndexStateModel(String latestId) { return flintIndexOptional.get(); } - private void takeActionWithoutOCC(FlintIndexMetadata metadata) { + private void takeActionWithoutOCC( + FlintIndexMetadata metadata, AsyncQueryRequestContext asyncQueryRequestContext) { // take action without occ. FlintIndexStateModel fakeModel = FlintIndexStateModel.builder() @@ -89,7 +96,7 @@ private void takeActionWithoutOCC(FlintIndexMetadata metadata) { .lastUpdateTime(System.currentTimeMillis()) .error("") .build(); - runOp(metadata, fakeModel); + runOp(metadata, fakeModel, asyncQueryRequestContext); } private void validFlintIndexInitialState(FlintIndexStateModel flintIndex) { @@ -103,13 +110,14 @@ private void validFlintIndexInitialState(FlintIndexStateModel flintIndex) { } } - private FlintIndexStateModel moveToTransitioningState(FlintIndexStateModel flintIndex) { + private FlintIndexStateModel moveToTransitioningState( + FlintIndexStateModel flintIndex, AsyncQueryRequestContext asyncQueryRequestContext) { LOG.debug("Moving to transitioning state before committing."); FlintIndexState transitioningState = transitioningState(); try { flintIndex = flintIndexStateModelService.updateFlintIndexState( - flintIndex, transitioningState(), datasourceName); + flintIndex, transitioningState(), datasourceName, asyncQueryRequestContext); } catch (Exception e) { String errorMsg = String.format(Locale.ROOT, "Moving to transition state:%s failed.", transitioningState); @@ -119,16 +127,18 @@ private FlintIndexStateModel moveToTransitioningState(FlintIndexStateModel flint return flintIndex; } - private void commit(FlintIndexStateModel flintIndex) { + private void commit( + FlintIndexStateModel flintIndex, AsyncQueryRequestContext asyncQueryRequestContext) { LOG.debug("Committing the transaction and moving to stable state."); FlintIndexState stableState = stableState(); try { if (stableState == FlintIndexState.NONE) { LOG.info("Deleting index state with docId: " + flintIndex.getLatestId()); flintIndexStateModelService.deleteFlintIndexStateModel( - flintIndex.getLatestId(), datasourceName); + flintIndex.getLatestId(), datasourceName, asyncQueryRequestContext); } else { - flintIndexStateModelService.updateFlintIndexState(flintIndex, stableState, datasourceName); + flintIndexStateModelService.updateFlintIndexState( + flintIndex, stableState, datasourceName, asyncQueryRequestContext); } } catch (Exception e) { String errorMsg = @@ -145,7 +155,8 @@ public void cancelStreamingJob(FlintIndexStateModel flintIndexStateModel) throws InterruptedException, TimeoutException { String applicationId = flintIndexStateModel.getApplicationId(); String jobId = flintIndexStateModel.getJobId(); - EMRServerlessClient emrServerlessClient = emrServerlessClientFactory.getClient(); + EMRServerlessClient emrServerlessClient = + emrServerlessClientFactory.getClient(flintIndexStateModel.getAccountId()); try { emrServerlessClient.cancelJobRun( flintIndexStateModel.getApplicationId(), flintIndexStateModel.getJobId(), true); @@ -191,7 +202,10 @@ public void cancelStreamingJob(FlintIndexStateModel flintIndexStateModel) /** get transitioningState */ abstract FlintIndexState transitioningState(); - abstract void runOp(FlintIndexMetadata flintIndexMetadata, FlintIndexStateModel flintIndex); + abstract void runOp( + FlintIndexMetadata flintIndexMetadata, + FlintIndexStateModel flintIndex, + AsyncQueryRequestContext asyncQueryRequestContext); /** get stableState */ abstract FlintIndexState stableState(); diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpAlter.java b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpAlter.java similarity index 71% rename from spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpAlter.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpAlter.java index 9955320253..596d76c24b 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpAlter.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpAlter.java @@ -8,6 +8,7 @@ import lombok.SneakyThrows; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.client.EMRServerlessClientFactory; import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions; import org.opensearch.sql.spark.flint.FlintIndexMetadata; @@ -15,6 +16,7 @@ import org.opensearch.sql.spark.flint.FlintIndexState; import org.opensearch.sql.spark.flint.FlintIndexStateModel; import org.opensearch.sql.spark.flint.FlintIndexStateModelService; +import org.opensearch.sql.spark.scheduler.AsyncQueryScheduler; /** * Index Operation for Altering the flint index. Only handles alter operation when @@ -24,16 +26,19 @@ public class FlintIndexOpAlter extends FlintIndexOp { private static final Logger LOG = LogManager.getLogger(FlintIndexOpAlter.class); private final FlintIndexMetadataService flintIndexMetadataService; private final FlintIndexOptions flintIndexOptions; + private final AsyncQueryScheduler asyncQueryScheduler; public FlintIndexOpAlter( FlintIndexOptions flintIndexOptions, FlintIndexStateModelService flintIndexStateModelService, String datasourceName, EMRServerlessClientFactory emrServerlessClientFactory, - FlintIndexMetadataService flintIndexMetadataService) { + FlintIndexMetadataService flintIndexMetadataService, + AsyncQueryScheduler asyncQueryScheduler) { super(flintIndexStateModelService, datasourceName, emrServerlessClientFactory); this.flintIndexMetadataService = flintIndexMetadataService; this.flintIndexOptions = flintIndexOptions; + this.asyncQueryScheduler = asyncQueryScheduler; } @Override @@ -48,12 +53,20 @@ FlintIndexState transitioningState() { @SneakyThrows @Override - void runOp(FlintIndexMetadata flintIndexMetadata, FlintIndexStateModel flintIndexStateModel) { + void runOp( + FlintIndexMetadata flintIndexMetadata, + FlintIndexStateModel flintIndexStateModel, + AsyncQueryRequestContext asyncQueryRequestContext) { LOG.debug( "Running alter index operation for index: {}", flintIndexMetadata.getOpensearchIndexName()); this.flintIndexMetadataService.updateIndexToManualRefresh( - flintIndexMetadata.getOpensearchIndexName(), flintIndexOptions); - cancelStreamingJob(flintIndexStateModel); + flintIndexMetadata.getOpensearchIndexName(), flintIndexOptions, asyncQueryRequestContext); + if (flintIndexMetadata.getFlintIndexOptions().isExternalScheduler()) { + asyncQueryScheduler.unscheduleJob( + flintIndexMetadata.getOpensearchIndexName(), asyncQueryRequestContext); + } else { + cancelStreamingJob(flintIndexStateModel); + } } @Override diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpCancel.java b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpCancel.java similarity index 87% rename from spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpCancel.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpCancel.java index 02c8e39c66..504a8f93c9 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpCancel.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpCancel.java @@ -8,6 +8,7 @@ import lombok.SneakyThrows; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.client.EMRServerlessClientFactory; import org.opensearch.sql.spark.flint.FlintIndexMetadata; import org.opensearch.sql.spark.flint.FlintIndexState; @@ -38,7 +39,10 @@ FlintIndexState transitioningState() { /** cancel EMR-S job, wait cancelled state upto 15s. */ @SneakyThrows @Override - void runOp(FlintIndexMetadata flintIndexMetadata, FlintIndexStateModel flintIndexStateModel) { + void runOp( + FlintIndexMetadata flintIndexMetadata, + FlintIndexStateModel flintIndexStateModel, + AsyncQueryRequestContext asyncQueryRequestContext) { LOG.debug( "Performing drop index operation for index: {}", flintIndexMetadata.getOpensearchIndexName()); diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpDrop.java b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpDrop.java similarity index 66% rename from spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpDrop.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpDrop.java index 6613c29870..88aca66fef 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpDrop.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpDrop.java @@ -8,21 +8,27 @@ import lombok.SneakyThrows; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.client.EMRServerlessClientFactory; import org.opensearch.sql.spark.flint.FlintIndexMetadata; import org.opensearch.sql.spark.flint.FlintIndexState; import org.opensearch.sql.spark.flint.FlintIndexStateModel; import org.opensearch.sql.spark.flint.FlintIndexStateModelService; +import org.opensearch.sql.spark.scheduler.AsyncQueryScheduler; /** Operation to drop Flint index */ public class FlintIndexOpDrop extends FlintIndexOp { private static final Logger LOG = LogManager.getLogger(); + private final AsyncQueryScheduler asyncQueryScheduler; + public FlintIndexOpDrop( FlintIndexStateModelService flintIndexStateModelService, String datasourceName, - EMRServerlessClientFactory emrServerlessClientFactory) { + EMRServerlessClientFactory emrServerlessClientFactory, + AsyncQueryScheduler asyncQueryScheduler) { super(flintIndexStateModelService, datasourceName, emrServerlessClientFactory); + this.asyncQueryScheduler = asyncQueryScheduler; } public boolean validate(FlintIndexState state) { @@ -40,11 +46,19 @@ FlintIndexState transitioningState() { /** cancel EMR-S job, wait cancelled state upto 15s. */ @SneakyThrows @Override - void runOp(FlintIndexMetadata flintIndexMetadata, FlintIndexStateModel flintIndexStateModel) { + void runOp( + FlintIndexMetadata flintIndexMetadata, + FlintIndexStateModel flintIndexStateModel, + AsyncQueryRequestContext asyncQueryRequestContext) { LOG.debug( "Performing drop index operation for index: {}", flintIndexMetadata.getOpensearchIndexName()); - cancelStreamingJob(flintIndexStateModel); + if (flintIndexMetadata.getFlintIndexOptions().isExternalScheduler()) { + asyncQueryScheduler.unscheduleJob( + flintIndexMetadata.getOpensearchIndexName(), asyncQueryRequestContext); + } else { + cancelStreamingJob(flintIndexStateModel); + } } @Override diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpFactory.java b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpFactory.java similarity index 79% rename from spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpFactory.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpFactory.java index b102e43d59..d82b29e928 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpFactory.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpFactory.java @@ -6,22 +6,24 @@ package org.opensearch.sql.spark.flint.operation; import lombok.RequiredArgsConstructor; -import org.opensearch.client.Client; import org.opensearch.sql.spark.client.EMRServerlessClientFactory; import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions; +import org.opensearch.sql.spark.flint.FlintIndexClient; import org.opensearch.sql.spark.flint.FlintIndexMetadataService; import org.opensearch.sql.spark.flint.FlintIndexStateModelService; +import org.opensearch.sql.spark.scheduler.AsyncQueryScheduler; @RequiredArgsConstructor public class FlintIndexOpFactory { private final FlintIndexStateModelService flintIndexStateModelService; - private final Client client; + private final FlintIndexClient flintIndexClient; private final FlintIndexMetadataService flintIndexMetadataService; private final EMRServerlessClientFactory emrServerlessClientFactory; + private final AsyncQueryScheduler asyncQueryScheduler; public FlintIndexOpDrop getDrop(String datasource) { return new FlintIndexOpDrop( - flintIndexStateModelService, datasource, emrServerlessClientFactory); + flintIndexStateModelService, datasource, emrServerlessClientFactory, asyncQueryScheduler); } public FlintIndexOpAlter getAlter(FlintIndexOptions flintIndexOptions, String datasource) { @@ -30,12 +32,8 @@ public FlintIndexOpAlter getAlter(FlintIndexOptions flintIndexOptions, String da flintIndexStateModelService, datasource, emrServerlessClientFactory, - flintIndexMetadataService); - } - - public FlintIndexOpVacuum getVacuum(String datasource) { - return new FlintIndexOpVacuum( - flintIndexStateModelService, datasource, client, emrServerlessClientFactory); + flintIndexMetadataService, + asyncQueryScheduler); } public FlintIndexOpCancel getCancel(String datasource) { diff --git a/spark/src/main/java/org/opensearch/sql/spark/leasemanager/ConcurrencyLimitExceededException.java b/async-query-core/src/main/java/org/opensearch/sql/spark/leasemanager/ConcurrencyLimitExceededException.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/leasemanager/ConcurrencyLimitExceededException.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/leasemanager/ConcurrencyLimitExceededException.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/leasemanager/LeaseManager.java b/async-query-core/src/main/java/org/opensearch/sql/spark/leasemanager/LeaseManager.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/leasemanager/LeaseManager.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/leasemanager/LeaseManager.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/leasemanager/model/LeaseRequest.java b/async-query-core/src/main/java/org/opensearch/sql/spark/leasemanager/model/LeaseRequest.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/leasemanager/model/LeaseRequest.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/leasemanager/model/LeaseRequest.java diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/metrics/EmrMetrics.java b/async-query-core/src/main/java/org/opensearch/sql/spark/metrics/EmrMetrics.java new file mode 100644 index 0000000000..2ec587bcc7 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/metrics/EmrMetrics.java @@ -0,0 +1,15 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.metrics; + +public enum EmrMetrics { + EMR_CANCEL_JOB_REQUEST_FAILURE_COUNT, + EMR_GET_JOB_RESULT_FAILURE_COUNT, + EMR_START_JOB_REQUEST_FAILURE_COUNT, + EMR_INTERACTIVE_QUERY_JOBS_CREATION_COUNT, + EMR_STREAMING_QUERY_JOBS_CREATION_COUNT, + EMR_BATCH_QUERY_JOBS_CREATION_COUNT; +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/metrics/MetricsService.java b/async-query-core/src/main/java/org/opensearch/sql/spark/metrics/MetricsService.java new file mode 100644 index 0000000000..ca9cb9db4e --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/metrics/MetricsService.java @@ -0,0 +1,11 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.metrics; + +/** Interface to abstract the emit of metrics */ +public interface MetricsService { + void incrementNumericalMetric(EmrMetrics emrMetrics); +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/parameter/DataSourceSparkParameterComposer.java b/async-query-core/src/main/java/org/opensearch/sql/spark/parameter/DataSourceSparkParameterComposer.java new file mode 100644 index 0000000000..324889b6e0 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/parameter/DataSourceSparkParameterComposer.java @@ -0,0 +1,24 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.parameter; + +import org.opensearch.sql.datasource.model.DataSourceMetadata; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; + +/** + * Compose Spark parameters specific to the {@link + * org.opensearch.sql.datasource.model.DataSourceType} based on the {@link DataSourceMetadata}. For + * the parameters not specific to {@link org.opensearch.sql.datasource.model.DataSourceType}, please + * use {@link GeneralSparkParameterComposer}. + */ +public interface DataSourceSparkParameterComposer { + void compose( + DataSourceMetadata dataSourceMetadata, + SparkSubmitParameters sparkSubmitParameters, + DispatchQueryRequest dispatchQueryRequest, + AsyncQueryRequestContext context); +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/parameter/GeneralSparkParameterComposer.java b/async-query-core/src/main/java/org/opensearch/sql/spark/parameter/GeneralSparkParameterComposer.java new file mode 100644 index 0000000000..c3d46ba5c6 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/parameter/GeneralSparkParameterComposer.java @@ -0,0 +1,31 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.parameter; + +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; + +/** + * Compose spark submit parameters based on the request and context. For {@link + * org.opensearch.sql.datasource.model.DataSourceType} specific parameters, please use {@link + * DataSourceSparkParameterComposer}. See {@link SparkParameterComposerCollection}. + */ +public interface GeneralSparkParameterComposer { + + /** + * Modify sparkSubmitParameters based on dispatchQueryRequest and context. + * + * @param sparkSubmitParameters Implementation of this method will modify this. + * @param dispatchQueryRequest Request. Implementation can refer it to compose + * sparkSubmitParameters. + * @param context Context of the request. Implementation can refer it to compose + * sparkSubmitParameters. + */ + void compose( + SparkSubmitParameters sparkSubmitParameters, + DispatchQueryRequest dispatchQueryRequest, + AsyncQueryRequestContext context); +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/parameter/SparkParameterComposerCollection.java b/async-query-core/src/main/java/org/opensearch/sql/spark/parameter/SparkParameterComposerCollection.java new file mode 100644 index 0000000000..c4d9509bd2 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/parameter/SparkParameterComposerCollection.java @@ -0,0 +1,76 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.parameter; + +import com.google.common.collect.ImmutableList; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.Map; +import org.opensearch.sql.datasource.model.DataSourceMetadata; +import org.opensearch.sql.datasource.model.DataSourceType; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; + +/** + * Stores Spark parameter composers and dispatch compose request to each composer. Composers should + * be registered during initialization such as in Guice Module. + */ +public class SparkParameterComposerCollection { + private final Collection generalComposers = new ArrayList<>(); + private final Map> + datasourceComposers = new HashMap<>(); + + /** + * Register composers for specific DataSourceType. The registered composer is called only if the + * request is for the dataSourceType. + */ + public void register(DataSourceType dataSourceType, DataSourceSparkParameterComposer composer) { + if (!datasourceComposers.containsKey(dataSourceType)) { + datasourceComposers.put(dataSourceType, new LinkedList<>()); + } + datasourceComposers.get(dataSourceType).add(composer); + } + + /** + * Register general composer. The composer is called when spark parameter is generated regardless + * of datasource type. + */ + public void register(GeneralSparkParameterComposer composer) { + generalComposers.add(composer); + } + + /** Execute composers associated with the datasource type */ + public void composeByDataSource( + DataSourceMetadata dataSourceMetadata, + SparkSubmitParameters sparkSubmitParameters, + DispatchQueryRequest dispatchQueryRequest, + AsyncQueryRequestContext context) { + for (DataSourceSparkParameterComposer composer : + getComposersFor(dataSourceMetadata.getConnector())) { + composer.compose(dataSourceMetadata, sparkSubmitParameters, dispatchQueryRequest, context); + } + } + + /** Execute all the registered generic composers */ + public void compose( + SparkSubmitParameters sparkSubmitParameters, + DispatchQueryRequest dispatchQueryRequest, + AsyncQueryRequestContext context) { + for (GeneralSparkParameterComposer composer : generalComposers) { + composer.compose(sparkSubmitParameters, dispatchQueryRequest, context); + } + } + + private Collection getComposersFor(DataSourceType type) { + return datasourceComposers.getOrDefault(type, ImmutableList.of()); + } + + public boolean isComposerRegistered(DataSourceType type) { + return datasourceComposers.containsKey(type); + } +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/parameter/SparkSubmitParameters.java b/async-query-core/src/main/java/org/opensearch/sql/spark/parameter/SparkSubmitParameters.java new file mode 100644 index 0000000000..a496ad7503 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/parameter/SparkSubmitParameters.java @@ -0,0 +1,56 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.parameter; + +import static org.opensearch.sql.spark.data.constants.SparkConstants.DEFAULT_CLASS_NAME; + +import java.util.LinkedHashMap; +import java.util.Map; +import lombok.Setter; + +/** Define Spark Submit Parameters. */ +public class SparkSubmitParameters { + public static final String SPACE = " "; + public static final String EQUALS = "="; + + @Setter private String className = DEFAULT_CLASS_NAME; + private final Map config = new LinkedHashMap<>(); + + /** Extra parameters to append finally */ + @Setter private String extraParameters; + + public void setConfigItem(String key, String value) { + config.put(key, value); + } + + public void deleteConfigItem(String key) { + config.remove(key); + } + + public String getConfigItem(String key) { + return config.get(key); + } + + @Override + public String toString() { + StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append(" --class "); + stringBuilder.append(this.className); + stringBuilder.append(SPACE); + for (String key : config.keySet()) { + stringBuilder.append(" --conf "); + stringBuilder.append(key); + stringBuilder.append(EQUALS); + stringBuilder.append(config.get(key)); + stringBuilder.append(SPACE); + } + + if (extraParameters != null) { + stringBuilder.append(extraParameters); + } + return stringBuilder.toString(); + } +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/parameter/SparkSubmitParametersBuilder.java b/async-query-core/src/main/java/org/opensearch/sql/spark/parameter/SparkSubmitParametersBuilder.java new file mode 100644 index 0000000000..db74d0a5a7 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/parameter/SparkSubmitParametersBuilder.java @@ -0,0 +1,167 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.parameter; + +import static org.opensearch.sql.spark.data.constants.SparkConstants.AWS_SNAPSHOT_REPOSITORY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.DEFAULT_GLUE_CATALOG_CREDENTIALS_PROVIDER_FACTORY_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.DEFAULT_S3_AWS_CREDENTIALS_PROVIDER_VALUE; +import static org.opensearch.sql.spark.data.constants.SparkConstants.EMR_ASSUME_ROLE_CREDENTIALS_PROVIDER; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_CREDENTIALS_PROVIDER_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_DEFAULT_AUTH; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_DEFAULT_CLUSTER_NAME; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_DEFAULT_HOST; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_DEFAULT_PORT; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_DEFAULT_SCHEME; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INDEX_STORE_AUTH_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INDEX_STORE_HOST_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INDEX_STORE_PORT_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INDEX_STORE_SCHEME_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_JOB_QUERY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_JOB_QUERY_ID; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_JOB_REQUEST_INDEX; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_JOB_SESSION_ID; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_PPL_EXTENSION; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_SQL_EXTENSION; +import static org.opensearch.sql.spark.data.constants.SparkConstants.GLUE_HIVE_CATALOG_FACTORY_CLASS; +import static org.opensearch.sql.spark.data.constants.SparkConstants.HADOOP_CATALOG_CREDENTIALS_PROVIDER_FACTORY_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.HIVE_METASTORE_CLASS_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.JAVA_HOME_LOCATION; +import static org.opensearch.sql.spark.data.constants.SparkConstants.PPL_STANDALONE_PACKAGE; +import static org.opensearch.sql.spark.data.constants.SparkConstants.S3_AWS_CREDENTIALS_PROVIDER_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_DRIVER_ENV_FLINT_CLUSTER_NAME_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_DRIVER_ENV_JAVA_HOME_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_EXECUTOR_ENV_FLINT_CLUSTER_NAME_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_EXECUTOR_ENV_JAVA_HOME_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_JAR_PACKAGES_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_JAR_REPOSITORIES_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_LAUNCHER_PACKAGE; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_SQL_EXTENSIONS_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_STANDALONE_PACKAGE; + +import lombok.Getter; +import org.apache.commons.text.StringEscapeUtils; +import org.opensearch.sql.datasource.model.DataSourceMetadata; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.config.SparkSubmitParameterModifier; +import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; +import org.opensearch.sql.spark.execution.statestore.OpenSearchStateStoreUtil; + +public class SparkSubmitParametersBuilder { + private final SparkParameterComposerCollection sparkParameterComposerCollection; + @Getter private final SparkSubmitParameters sparkSubmitParameters; + + public SparkSubmitParametersBuilder( + SparkParameterComposerCollection sparkParameterComposerCollection) { + this.sparkParameterComposerCollection = sparkParameterComposerCollection; + sparkSubmitParameters = new SparkSubmitParameters(); + setDefaultConfigs(); + } + + private void setDefaultConfigs() { + setConfigItem(S3_AWS_CREDENTIALS_PROVIDER_KEY, DEFAULT_S3_AWS_CREDENTIALS_PROVIDER_VALUE); + setConfigItem( + HADOOP_CATALOG_CREDENTIALS_PROVIDER_FACTORY_KEY, + DEFAULT_GLUE_CATALOG_CREDENTIALS_PROVIDER_FACTORY_KEY); + setConfigItem( + SPARK_JAR_PACKAGES_KEY, + SPARK_STANDALONE_PACKAGE + "," + SPARK_LAUNCHER_PACKAGE + "," + PPL_STANDALONE_PACKAGE); + setConfigItem(SPARK_JAR_REPOSITORIES_KEY, AWS_SNAPSHOT_REPOSITORY); + setConfigItem(SPARK_DRIVER_ENV_JAVA_HOME_KEY, JAVA_HOME_LOCATION); + setConfigItem(SPARK_EXECUTOR_ENV_JAVA_HOME_KEY, JAVA_HOME_LOCATION); + setConfigItem(SPARK_DRIVER_ENV_FLINT_CLUSTER_NAME_KEY, FLINT_DEFAULT_CLUSTER_NAME); + setConfigItem(SPARK_EXECUTOR_ENV_FLINT_CLUSTER_NAME_KEY, FLINT_DEFAULT_CLUSTER_NAME); + setConfigItem(FLINT_INDEX_STORE_HOST_KEY, FLINT_DEFAULT_HOST); + setConfigItem(FLINT_INDEX_STORE_PORT_KEY, FLINT_DEFAULT_PORT); + setConfigItem(FLINT_INDEX_STORE_SCHEME_KEY, FLINT_DEFAULT_SCHEME); + setConfigItem(FLINT_INDEX_STORE_AUTH_KEY, FLINT_DEFAULT_AUTH); + setConfigItem(FLINT_CREDENTIALS_PROVIDER_KEY, EMR_ASSUME_ROLE_CREDENTIALS_PROVIDER); + setConfigItem(SPARK_SQL_EXTENSIONS_KEY, FLINT_SQL_EXTENSION + "," + FLINT_PPL_EXTENSION); + setConfigItem(HIVE_METASTORE_CLASS_KEY, GLUE_HIVE_CATALOG_FACTORY_CLASS); + } + + private void setConfigItem(String key, String value) { + sparkSubmitParameters.setConfigItem(key, value); + } + + public SparkSubmitParametersBuilder className(String className) { + sparkSubmitParameters.setClassName(className); + return this; + } + + /** clusterName will be used for logging and metrics in Spark */ + public SparkSubmitParametersBuilder clusterName(String clusterName) { + setConfigItem(SPARK_DRIVER_ENV_FLINT_CLUSTER_NAME_KEY, clusterName); + setConfigItem(SPARK_EXECUTOR_ENV_FLINT_CLUSTER_NAME_KEY, clusterName); + return this; + } + + /** + * For query in spark submit parameters to be parsed correctly, escape the characters in the + * query, then wrap the query with double quotes. + */ + public SparkSubmitParametersBuilder query(String query) { + String escapedQuery = StringEscapeUtils.escapeJava(query); + String wrappedQuery = "\"" + escapedQuery + "\""; + setConfigItem(FLINT_JOB_QUERY, wrappedQuery); + return this; + } + + public SparkSubmitParametersBuilder queryId(String queryId) { + setConfigItem(FLINT_JOB_QUERY_ID, queryId); + return this; + } + + public SparkSubmitParametersBuilder dataSource( + DataSourceMetadata metadata, + DispatchQueryRequest dispatchQueryRequest, + AsyncQueryRequestContext context) { + if (sparkParameterComposerCollection.isComposerRegistered(metadata.getConnector())) { + sparkParameterComposerCollection.composeByDataSource( + metadata, sparkSubmitParameters, dispatchQueryRequest, context); + return this; + } else { + throw new UnsupportedOperationException( + String.format( + "UnSupported datasource type for async queries:: %s", metadata.getConnector())); + } + } + + public SparkSubmitParametersBuilder structuredStreaming(Boolean isStructuredStreaming) { + if (isStructuredStreaming) { + setConfigItem("spark.flint.job.type", "streaming"); + } + return this; + } + + public SparkSubmitParametersBuilder extraParameters(String params) { + sparkSubmitParameters.setExtraParameters(params); + return this; + } + + public SparkSubmitParametersBuilder sessionExecution(String sessionId, String datasourceName) { + setConfigItem(FLINT_JOB_REQUEST_INDEX, OpenSearchStateStoreUtil.getIndexName(datasourceName)); + setConfigItem(FLINT_JOB_SESSION_ID, sessionId); + return this; + } + + public SparkSubmitParametersBuilder acceptModifier(SparkSubmitParameterModifier modifier) { + if (modifier != null) { + modifier.modifyParameters(this); + } + return this; + } + + public SparkSubmitParametersBuilder acceptComposers( + DispatchQueryRequest dispatchQueryRequest, AsyncQueryRequestContext context) { + sparkParameterComposerCollection.compose(sparkSubmitParameters, dispatchQueryRequest, context); + return this; + } + + @Override + public String toString() { + return sparkSubmitParameters.toString(); + } +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/parameter/SparkSubmitParametersBuilderProvider.java b/async-query-core/src/main/java/org/opensearch/sql/spark/parameter/SparkSubmitParametersBuilderProvider.java new file mode 100644 index 0000000000..ccc9ffb680 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/parameter/SparkSubmitParametersBuilderProvider.java @@ -0,0 +1,18 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.parameter; + +import lombok.RequiredArgsConstructor; + +/** Provide SparkSubmitParametersBuilder instance with SparkParameterComposerCollection injected */ +@RequiredArgsConstructor +public class SparkSubmitParametersBuilderProvider { + private final SparkParameterComposerCollection sparkParameterComposerCollection; + + public SparkSubmitParametersBuilder getSparkSubmitParametersBuilder() { + return new SparkSubmitParametersBuilder(sparkParameterComposerCollection); + } +} diff --git a/spark/src/main/java/org/opensearch/sql/spark/response/JobExecutionResponseReader.java b/async-query-core/src/main/java/org/opensearch/sql/spark/response/JobExecutionResponseReader.java similarity index 50% rename from spark/src/main/java/org/opensearch/sql/spark/response/JobExecutionResponseReader.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/response/JobExecutionResponseReader.java index e3184b7326..237ce9c7f6 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/response/JobExecutionResponseReader.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/response/JobExecutionResponseReader.java @@ -6,17 +6,22 @@ package org.opensearch.sql.spark.response; import org.json.JSONObject; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; /** Interface for reading job execution result */ public interface JobExecutionResponseReader { /** * Retrieves the job execution result based on the job ID. * - * @param jobId The job ID. - * @param resultLocation The location identifier where the result is stored (optional). + * @param asyncQueryJobMetadata metadata will have jobId and resultLocation and other required + * params. + * @param asyncQueryRequestContext request context passed to AsyncQueryExecutorService * @return A JSONObject containing the result data. */ - JSONObject getResultWithJobId(String jobId, String resultLocation); + JSONObject getResultFromResultIndex( + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext); /** * Retrieves the job execution result based on the query ID. @@ -25,5 +30,6 @@ public interface JobExecutionResponseReader { * @param resultLocation The location identifier where the result is stored (optional). * @return A JSONObject containing the result data. */ - JSONObject getResultWithQueryId(String queryId, String resultLocation); + JSONObject getResultWithQueryId( + String queryId, String resultLocation, AsyncQueryRequestContext asyncQueryRequestContext); } diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/rest/model/CreateAsyncQueryRequest.java b/async-query-core/src/main/java/org/opensearch/sql/spark/rest/model/CreateAsyncQueryRequest.java new file mode 100644 index 0000000000..e3250c7a58 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/rest/model/CreateAsyncQueryRequest.java @@ -0,0 +1,31 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.rest.model; + +import lombok.Data; +import org.apache.commons.lang3.Validate; + +@Data +public class CreateAsyncQueryRequest { + private String query; + private String datasource; + private LangType lang; + // optional sessionId + private String sessionId; + + public CreateAsyncQueryRequest(String query, String datasource, LangType lang) { + this.query = Validate.notNull(query, "Query can't be null"); + this.datasource = Validate.notNull(datasource, "Datasource can't be null"); + this.lang = Validate.notNull(lang, "lang can't be null"); + } + + public CreateAsyncQueryRequest(String query, String datasource, LangType lang, String sessionId) { + this.query = Validate.notNull(query, "Query can't be null"); + this.datasource = Validate.notNull(datasource, "Datasource can't be null"); + this.lang = Validate.notNull(lang, "lang can't be null"); + this.sessionId = sessionId; + } +} diff --git a/spark/src/main/java/org/opensearch/sql/spark/rest/model/CreateAsyncQueryResponse.java b/async-query-core/src/main/java/org/opensearch/sql/spark/rest/model/CreateAsyncQueryResponse.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/rest/model/CreateAsyncQueryResponse.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/rest/model/CreateAsyncQueryResponse.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/rest/model/LangType.java b/async-query-core/src/main/java/org/opensearch/sql/spark/rest/model/LangType.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/rest/model/LangType.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/rest/model/LangType.java diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/scheduler/AsyncQueryScheduler.java b/async-query-core/src/main/java/org/opensearch/sql/spark/scheduler/AsyncQueryScheduler.java new file mode 100644 index 0000000000..6d5350821b --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/scheduler/AsyncQueryScheduler.java @@ -0,0 +1,75 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.scheduler; + +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.scheduler.model.AsyncQuerySchedulerRequest; + +/** Scheduler interface for scheduling asynchronous query jobs. */ +public interface AsyncQueryScheduler { + + /** + * Schedules a new job in the system. This method creates a new job entry based on the provided + * request parameters. + * + *

Use cases: - Creating a new periodic query execution - Setting up a scheduled data refresh + * task + * + * @param asyncQuerySchedulerRequest The request containing job configuration details + * @param asyncQueryRequestContext The request context passed to AsyncQueryExecutorService + * @throws IllegalArgumentException if a job with the same name already exists + * @throws RuntimeException if there's an error during job creation + */ + void scheduleJob( + AsyncQuerySchedulerRequest asyncQuerySchedulerRequest, + AsyncQueryRequestContext asyncQueryRequestContext); + + /** + * Updates an existing job with new parameters. This method modifies the configuration of an + * already scheduled job. + * + *

Use cases: - Changing the schedule of an existing job - Modifying query parameters of a + * scheduled job - Updating resource allocations for a job + * + * @param asyncQuerySchedulerRequest The request containing updated job configuration + * @param asyncQueryRequestContext The request context passed to AsyncQueryExecutorService + * @throws IllegalArgumentException if the job to be updated doesn't exist + * @throws RuntimeException if there's an error during the update process + */ + void updateJob( + AsyncQuerySchedulerRequest asyncQuerySchedulerRequest, + AsyncQueryRequestContext asyncQueryRequestContext); + + /** + * Unschedules a job by marking it as disabled and updating its last update time. This method is + * used when you want to temporarily stop a job from running but keep its configuration and + * history in the system. + * + *

Use cases: - Pausing a job that's causing issues without losing its configuration - + * Temporarily disabling a job during maintenance or high-load periods - Allowing for easy + * re-enabling of the job in the future + * + * @param jobId The unique identifier of the job to unschedule + * @param asyncQueryRequestContext The request context passed to AsyncQueryExecutorService + * @throws IllegalArgumentException if the job to be unscheduled doesn't exist + * @throws RuntimeException if there's an error during the unschedule process + */ + void unscheduleJob(String jobId, AsyncQueryRequestContext asyncQueryRequestContext); + + /** + * Removes a job completely from the scheduler. This method permanently deletes the job and all + * its associated data from the system. + * + *

Use cases: - Cleaning up jobs that are no longer needed - Removing obsolete or erroneously + * created jobs - Freeing up resources by deleting unused job configurations + * + * @param jobId The unique identifier of the job to remove + * @param asyncQueryRequestContext The request context passed to AsyncQueryExecutorService + * @throws IllegalArgumentException if the job to be removed doesn't exist + * @throws RuntimeException if there's an error during the remove process + */ + void removeJob(String jobId, AsyncQueryRequestContext asyncQueryRequestContext); +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/scheduler/model/AsyncQuerySchedulerRequest.java b/async-query-core/src/main/java/org/opensearch/sql/spark/scheduler/model/AsyncQuerySchedulerRequest.java new file mode 100644 index 0000000000..c38d92365a --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/scheduler/model/AsyncQuerySchedulerRequest.java @@ -0,0 +1,33 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.scheduler.model; + +import java.time.Instant; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; +import org.opensearch.sql.spark.rest.model.LangType; + +/** Represents a job request for a scheduled task. */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class AsyncQuerySchedulerRequest { + protected String accountId; + // Scheduler jobid is the opensearch index name until we support multiple jobs per index + protected String jobId; + protected String dataSource; + protected String scheduledQuery; + protected LangType queryLang; + protected Object schedule; + protected boolean enabled; + protected Instant lastUpdateTime; + protected Instant enabledTime; + protected Long lockDurationSeconds; + protected Double jitter; +} diff --git a/spark/src/main/java/org/opensearch/sql/spark/utils/IDUtils.java b/async-query-core/src/main/java/org/opensearch/sql/spark/utils/IDUtils.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/utils/IDUtils.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/utils/IDUtils.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/utils/RealTimeProvider.java b/async-query-core/src/main/java/org/opensearch/sql/spark/utils/RealTimeProvider.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/utils/RealTimeProvider.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/utils/RealTimeProvider.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/utils/SQLQueryUtils.java b/async-query-core/src/main/java/org/opensearch/sql/spark/utils/SQLQueryUtils.java similarity index 85% rename from spark/src/main/java/org/opensearch/sql/spark/utils/SQLQueryUtils.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/utils/SQLQueryUtils.java index 9dfe30b4b5..bd7060b776 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/utils/SQLQueryUtils.java +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/utils/SQLQueryUtils.java @@ -5,19 +5,28 @@ package org.opensearch.sql.spark.utils; +import java.util.LinkedList; +import java.util.List; import java.util.Locale; import lombok.Getter; import lombok.experimental.UtilityClass; import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.misc.Interval; import org.antlr.v4.runtime.tree.ParseTree; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.opensearch.sql.common.antlr.CaseInsensitiveCharStream; import org.opensearch.sql.common.antlr.SyntaxAnalysisErrorListener; import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.spark.antlr.parser.FlintSparkSqlExtensionsBaseVisitor; import org.opensearch.sql.spark.antlr.parser.FlintSparkSqlExtensionsLexer; import org.opensearch.sql.spark.antlr.parser.FlintSparkSqlExtensionsParser; +import org.opensearch.sql.spark.antlr.parser.FlintSparkSqlExtensionsParser.MaterializedViewQueryContext; +import org.opensearch.sql.spark.antlr.parser.FlintSparkSqlExtensionsParser.RecoverIndexJobStatementContext; import org.opensearch.sql.spark.antlr.parser.SqlBaseLexer; import org.opensearch.sql.spark.antlr.parser.SqlBaseParser; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.IdentifierReferenceContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.StatementContext; import org.opensearch.sql.spark.antlr.parser.SqlBaseParserBaseVisitor; import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions; import org.opensearch.sql.spark.dispatcher.model.FullyQualifiedTableName; @@ -31,17 +40,17 @@ */ @UtilityClass public class SQLQueryUtils { + private static final Logger logger = LogManager.getLogger(SQLQueryUtils.class); - // TODO Handle cases where the query has multiple table Names. - public static FullyQualifiedTableName extractFullyQualifiedTableName(String sqlQuery) { + public static List extractFullyQualifiedTableNames(String sqlQuery) { SqlBaseParser sqlBaseParser = new SqlBaseParser( new CommonTokenStream(new SqlBaseLexer(new CaseInsensitiveCharStream(sqlQuery)))); sqlBaseParser.addErrorListener(new SyntaxAnalysisErrorListener()); - SqlBaseParser.StatementContext statement = sqlBaseParser.statement(); + StatementContext statement = sqlBaseParser.statement(); SparkSqlTableNameVisitor sparkSqlTableNameVisitor = new SparkSqlTableNameVisitor(); statement.accept(sparkSqlTableNameVisitor); - return sparkSqlTableNameVisitor.getFullyQualifiedTableName(); + return sparkSqlTableNameVisitor.getFullyQualifiedTableNames(); } public static IndexQueryDetails extractIndexDetails(String sqlQuery) { @@ -71,25 +80,32 @@ public static boolean isFlintExtensionQuery(String sqlQuery) { } } + public static SqlBaseParser getBaseParser(String sqlQuery) { + SqlBaseParser sqlBaseParser = + new SqlBaseParser( + new CommonTokenStream(new SqlBaseLexer(new CaseInsensitiveCharStream(sqlQuery)))); + sqlBaseParser.addErrorListener(new SyntaxAnalysisErrorListener()); + return sqlBaseParser; + } + public static class SparkSqlTableNameVisitor extends SqlBaseParserBaseVisitor { - @Getter private FullyQualifiedTableName fullyQualifiedTableName; + @Getter + private final List fullyQualifiedTableNames = new LinkedList<>(); - public SparkSqlTableNameVisitor() { - this.fullyQualifiedTableName = new FullyQualifiedTableName(); - } + public SparkSqlTableNameVisitor() {} @Override - public Void visitTableName(SqlBaseParser.TableNameContext ctx) { - fullyQualifiedTableName = new FullyQualifiedTableName(ctx.getText()); - return super.visitTableName(ctx); + public Void visitIdentifierReference(IdentifierReferenceContext ctx) { + fullyQualifiedTableNames.add(new FullyQualifiedTableName(ctx.getText())); + return super.visitIdentifierReference(ctx); } @Override public Void visitDropTable(SqlBaseParser.DropTableContext ctx) { for (ParseTree parseTree : ctx.children) { if (parseTree instanceof SqlBaseParser.IdentifierReferenceContext) { - fullyQualifiedTableName = new FullyQualifiedTableName(parseTree.getText()); + fullyQualifiedTableNames.add(new FullyQualifiedTableName(parseTree.getText())); } } return super.visitDropTable(ctx); @@ -99,7 +115,7 @@ public Void visitDropTable(SqlBaseParser.DropTableContext ctx) { public Void visitDescribeRelation(SqlBaseParser.DescribeRelationContext ctx) { for (ParseTree parseTree : ctx.children) { if (parseTree instanceof SqlBaseParser.IdentifierReferenceContext) { - fullyQualifiedTableName = new FullyQualifiedTableName(parseTree.getText()); + fullyQualifiedTableNames.add(new FullyQualifiedTableName(parseTree.getText())); } } return super.visitDescribeRelation(ctx); @@ -110,7 +126,7 @@ public Void visitDescribeRelation(SqlBaseParser.DescribeRelationContext ctx) { public Void visitCreateTableHeader(SqlBaseParser.CreateTableHeaderContext ctx) { for (ParseTree parseTree : ctx.children) { if (parseTree instanceof SqlBaseParser.IdentifierReferenceContext) { - fullyQualifiedTableName = new FullyQualifiedTableName(parseTree.getText()); + fullyQualifiedTableNames.add(new FullyQualifiedTableName(parseTree.getText())); } } return super.visitCreateTableHeader(ctx); @@ -190,31 +206,6 @@ public Void visitDropMaterializedViewStatement( return super.visitDropMaterializedViewStatement(ctx); } - @Override - public Void visitVacuumSkippingIndexStatement( - FlintSparkSqlExtensionsParser.VacuumSkippingIndexStatementContext ctx) { - indexQueryDetailsBuilder.indexQueryActionType(IndexQueryActionType.VACUUM); - indexQueryDetailsBuilder.indexType(FlintIndexType.SKIPPING); - return super.visitVacuumSkippingIndexStatement(ctx); - } - - @Override - public Void visitVacuumCoveringIndexStatement( - FlintSparkSqlExtensionsParser.VacuumCoveringIndexStatementContext ctx) { - indexQueryDetailsBuilder.indexQueryActionType(IndexQueryActionType.VACUUM); - indexQueryDetailsBuilder.indexType(FlintIndexType.COVERING); - return super.visitVacuumCoveringIndexStatement(ctx); - } - - @Override - public Void visitVacuumMaterializedViewStatement( - FlintSparkSqlExtensionsParser.VacuumMaterializedViewStatementContext ctx) { - indexQueryDetailsBuilder.indexQueryActionType(IndexQueryActionType.VACUUM); - indexQueryDetailsBuilder.indexType(FlintIndexType.MATERIALIZED_VIEW); - indexQueryDetailsBuilder.mvName(ctx.mvName.getText()); - return super.visitVacuumMaterializedViewStatement(ctx); - } - @Override public Void visitDescribeCoveringIndexStatement( FlintSparkSqlExtensionsParser.DescribeCoveringIndexStatementContext ctx) { @@ -325,6 +316,21 @@ public Void visitAlterMaterializedViewStatement( return super.visitAlterMaterializedViewStatement(ctx); } + @Override + public Void visitMaterializedViewQuery(MaterializedViewQueryContext ctx) { + int a = ctx.start.getStartIndex(); + int b = ctx.stop.getStopIndex(); + String query = ctx.start.getInputStream().getText(new Interval(a, b)); + indexQueryDetailsBuilder.mvQuery(query); + return super.visitMaterializedViewQuery(ctx); + } + + @Override + public Void visitRecoverIndexJobStatement(RecoverIndexJobStatementContext ctx) { + indexQueryDetailsBuilder.indexQueryActionType(IndexQueryActionType.RECOVER); + return super.visitRecoverIndexJobStatement(ctx); + } + private String propertyKey(FlintSparkSqlExtensionsParser.PropertyKeyContext key) { if (key.STRING() != null) { return key.STRING().getText(); diff --git a/spark/src/main/java/org/opensearch/sql/spark/utils/TimeProvider.java b/async-query-core/src/main/java/org/opensearch/sql/spark/utils/TimeProvider.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/utils/TimeProvider.java rename to async-query-core/src/main/java/org/opensearch/sql/spark/utils/TimeProvider.java diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/DefaultGrammarElementValidator.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/DefaultGrammarElementValidator.java new file mode 100644 index 0000000000..ddd0a1d094 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/DefaultGrammarElementValidator.java @@ -0,0 +1,13 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +public class DefaultGrammarElementValidator implements GrammarElementValidator { + @Override + public boolean isValid(GrammarElement element) { + return true; + } +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/DenyListGrammarElementValidator.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/DenyListGrammarElementValidator.java new file mode 100644 index 0000000000..514e2c8ad8 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/DenyListGrammarElementValidator.java @@ -0,0 +1,19 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import java.util.Set; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class DenyListGrammarElementValidator implements GrammarElementValidator { + private final Set denyList; + + @Override + public boolean isValid(GrammarElement element) { + return !denyList.contains(element); + } +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/FunctionType.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/FunctionType.java new file mode 100644 index 0000000000..da3760efd6 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/FunctionType.java @@ -0,0 +1,436 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import lombok.AllArgsConstructor; + +/** + * Enum for defining and looking up SQL function type based on its name. Unknown one will be + * considered as UDF (User Defined Function) + */ +@AllArgsConstructor +public enum FunctionType { + AGGREGATE("Aggregate"), + WINDOW("Window"), + ARRAY("Array"), + MAP("Map"), + DATE_TIMESTAMP("Date and Timestamp"), + JSON("JSON"), + MATH("Math"), + STRING("String"), + CONDITIONAL("Conditional"), + BITWISE("Bitwise"), + CONVERSION("Conversion"), + PREDICATE("Predicate"), + CSV("CSV"), + MISC("Misc"), + GENERATOR("Generator"), + UDF("User Defined Function"); + + private final String name; + + private static final Map> FUNCTION_TYPE_TO_FUNCTION_NAMES_MAP = + ImmutableMap.>builder() + .put( + AGGREGATE, + Set.of( + "any", + "any_value", + "approx_count_distinct", + "approx_percentile", + "array_agg", + "avg", + "bit_and", + "bit_or", + "bit_xor", + "bitmap_construct_agg", + "bitmap_or_agg", + "bool_and", + "bool_or", + "collect_list", + "collect_set", + "corr", + "count", + "count_if", + "count_min_sketch", + "covar_pop", + "covar_samp", + "every", + "first", + "first_value", + "grouping", + "grouping_id", + "histogram_numeric", + "hll_sketch_agg", + "hll_union_agg", + "kurtosis", + "last", + "last_value", + "max", + "max_by", + "mean", + "median", + "min", + "min_by", + "mode", + "percentile", + "percentile_approx", + "regr_avgx", + "regr_avgy", + "regr_count", + "regr_intercept", + "regr_r2", + "regr_slope", + "regr_sxx", + "regr_sxy", + "regr_syy", + "skewness", + "some", + "std", + "stddev", + "stddev_pop", + "stddev_samp", + "sum", + "try_avg", + "try_sum", + "var_pop", + "var_samp", + "variance")) + .put( + WINDOW, + Set.of( + "cume_dist", + "dense_rank", + "lag", + "lead", + "nth_value", + "ntile", + "percent_rank", + "rank", + "row_number")) + .put( + ARRAY, + Set.of( + "array", + "array_append", + "array_compact", + "array_contains", + "array_distinct", + "array_except", + "array_insert", + "array_intersect", + "array_join", + "array_max", + "array_min", + "array_position", + "array_prepend", + "array_remove", + "array_repeat", + "array_union", + "arrays_overlap", + "arrays_zip", + "flatten", + "get", + "sequence", + "shuffle", + "slice", + "sort_array")) + .put( + MAP, + Set.of( + "element_at", + "map", + "map_concat", + "map_contains_key", + "map_entries", + "map_from_arrays", + "map_from_entries", + "map_keys", + "map_values", + "str_to_map", + "try_element_at")) + .put( + DATE_TIMESTAMP, + Set.of( + "add_months", + "convert_timezone", + "curdate", + "current_date", + "current_timestamp", + "current_timezone", + "date_add", + "date_diff", + "date_format", + "date_from_unix_date", + "date_part", + "date_sub", + "date_trunc", + "dateadd", + "datediff", + "datepart", + "day", + "dayofmonth", + "dayofweek", + "dayofyear", + "extract", + "from_unixtime", + "from_utc_timestamp", + "hour", + "last_day", + "localtimestamp", + "make_date", + "make_dt_interval", + "make_interval", + "make_timestamp", + "make_timestamp_ltz", + "make_timestamp_ntz", + "make_ym_interval", + "minute", + "month", + "months_between", + "next_day", + "now", + "quarter", + "second", + "session_window", + "timestamp_micros", + "timestamp_millis", + "timestamp_seconds", + "to_date", + "to_timestamp", + "to_timestamp_ltz", + "to_timestamp_ntz", + "to_unix_timestamp", + "to_utc_timestamp", + "trunc", + "try_to_timestamp", + "unix_date", + "unix_micros", + "unix_millis", + "unix_seconds", + "unix_timestamp", + "weekday", + "weekofyear", + "window", + "window_time", + "year")) + .put( + JSON, + Set.of( + "from_json", + "get_json_object", + "json_array_length", + "json_object_keys", + "json_tuple", + "schema_of_json", + "to_json")) + .put( + MATH, + Set.of( + "abs", + "acos", + "acosh", + "asin", + "asinh", + "atan", + "atan2", + "atanh", + "bin", + "bround", + "cbrt", + "ceil", + "ceiling", + "conv", + "cos", + "cosh", + "cot", + "csc", + "degrees", + "e", + "exp", + "expm1", + "factorial", + "floor", + "greatest", + "hex", + "hypot", + "least", + "ln", + "log", + "log10", + "log1p", + "log2", + "negative", + "pi", + "pmod", + "positive", + "pow", + "power", + "radians", + "rand", + "randn", + "random", + "rint", + "round", + "sec", + "shiftleft", + "sign", + "signum", + "sin", + "sinh", + "sqrt", + "tan", + "tanh", + "try_add", + "try_divide", + "try_multiply", + "try_subtract", + "unhex", + "width_bucket")) + .put( + STRING, + Set.of( + "ascii", + "base64", + "bit_length", + "btrim", + "char", + "char_length", + "character_length", + "chr", + "concat", + "concat_ws", + "contains", + "decode", + "elt", + "encode", + "endswith", + "find_in_set", + "format_number", + "format_string", + "initcap", + "instr", + "lcase", + "left", + "len", + "length", + "levenshtein", + "locate", + "lower", + "lpad", + "ltrim", + "luhn_check", + "mask", + "octet_length", + "overlay", + "position", + "printf", + "regexp_count", + "regexp_extract", + "regexp_extract_all", + "regexp_instr", + "regexp_replace", + "regexp_substr", + "repeat", + "replace", + "right", + "rpad", + "rtrim", + "sentences", + "soundex", + "space", + "split", + "split_part", + "startswith", + "substr", + "substring", + "substring_index", + "to_binary", + "to_char", + "to_number", + "to_varchar", + "translate", + "trim", + "try_to_binary", + "try_to_number", + "ucase", + "unbase64", + "upper")) + .put(CONDITIONAL, Set.of("coalesce", "if", "ifnull", "nanvl", "nullif", "nvl", "nvl2")) + .put( + BITWISE, Set.of("bit_count", "bit_get", "getbit", "shiftright", "shiftrightunsigned")) + .put( + CONVERSION, + Set.of( + "bigint", + "binary", + "boolean", + "cast", + "date", + "decimal", + "double", + "float", + "int", + "smallint", + "string", + "timestamp", + "tinyint")) + .put(PREDICATE, Set.of("isnan", "isnotnull", "isnull", "regexp", "regexp_like", "rlike")) + .put(CSV, Set.of("from_csv", "schema_of_csv", "to_csv")) + .put( + MISC, + Set.of( + "aes_decrypt", + "aes_encrypt", + "assert_true", + "bitmap_bit_position", + "bitmap_bucket_number", + "bitmap_count", + "current_catalog", + "current_database", + "current_schema", + "current_user", + "equal_null", + "hll_sketch_estimate", + "hll_union", + "input_file_block_length", + "input_file_block_start", + "input_file_name", + "java_method", + "monotonically_increasing_id", + "reflect", + "spark_partition_id", + "try_aes_decrypt", + "typeof", + "user", + "uuid", + "version")) + .put( + GENERATOR, + Set.of( + "explode", + "explode_outer", + "inline", + "inline_outer", + "posexplode", + "posexplode_outer", + "stack")) + .build(); + + private static final Map FUNCTION_NAME_TO_FUNCTION_TYPE_MAP = + FUNCTION_TYPE_TO_FUNCTION_NAMES_MAP.entrySet().stream() + .flatMap( + entry -> entry.getValue().stream().map(value -> Map.entry(value, entry.getKey()))) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + public static FunctionType fromFunctionName(String functionName) { + return FUNCTION_NAME_TO_FUNCTION_TYPE_MAP.getOrDefault(functionName.toLowerCase(), UDF); + } +} diff --git a/spark-sql-application/project/plugins.sbt b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/GrammarElement.java similarity index 51% rename from spark-sql-application/project/plugins.sbt rename to async-query-core/src/main/java/org/opensearch/sql/spark/validator/GrammarElement.java index 4d14ba6c10..be2394393c 100644 --- a/spark-sql-application/project/plugins.sbt +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/GrammarElement.java @@ -3,4 +3,6 @@ * SPDX-License-Identifier: Apache-2.0 */ - addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0") \ No newline at end of file +package org.opensearch.sql.spark.validator; + +public interface GrammarElement {} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/GrammarElementValidator.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/GrammarElementValidator.java new file mode 100644 index 0000000000..cc49643772 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/GrammarElementValidator.java @@ -0,0 +1,15 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +/** Interface for validator to decide if each GrammarElement is valid or not. */ +public interface GrammarElementValidator { + + /** + * @return true if element is valid (accepted) + */ + boolean isValid(GrammarElement element); +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/GrammarElementValidatorProvider.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/GrammarElementValidatorProvider.java new file mode 100644 index 0000000000..9755a1c0b6 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/GrammarElementValidatorProvider.java @@ -0,0 +1,22 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import java.util.Map; +import lombok.AllArgsConstructor; +import org.opensearch.sql.datasource.model.DataSourceType; + +/** Provides GrammarElementValidator based on DataSourceType. */ +@AllArgsConstructor +public class GrammarElementValidatorProvider { + + private final Map validatorMap; + private final GrammarElementValidator defaultValidator; + + public GrammarElementValidator getValidatorForDatasource(DataSourceType dataSourceType) { + return validatorMap.getOrDefault(dataSourceType, defaultValidator); + } +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLGrammarElement.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLGrammarElement.java new file mode 100644 index 0000000000..9e426f39fb --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLGrammarElement.java @@ -0,0 +1,30 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import lombok.AllArgsConstructor; + +@AllArgsConstructor +public enum PPLGrammarElement implements GrammarElement { + PATTERNS_COMMAND("patterns command"), + JOIN_COMMAND("join command"), + LOOKUP_COMMAND("lookup command"), + SUBQUERY_COMMAND("subquery command"), + FLATTEN_COMMAND("flatten command"), + FILLNULL_COMMAND("fillnull command"), + EXPAND_COMMAND("expand command"), + DESCRIBE_COMMAND("describe command"), + IPADDRESS_FUNCTIONS("IP address functions"), + JSON_FUNCTIONS("JSON functions"), + LAMBDA_FUNCTIONS("Lambda functions"); + + String description; + + @Override + public String toString() { + return description; + } +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLQueryValidationVisitor.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLQueryValidationVisitor.java new file mode 100644 index 0000000000..d829dd17a5 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLQueryValidationVisitor.java @@ -0,0 +1,87 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import lombok.AllArgsConstructor; +import org.opensearch.sql.spark.antlr.parser.OpenSearchPPLParser.*; + +@AllArgsConstructor +public class PPLQueryValidationVisitor + extends org.opensearch.sql.spark.antlr.parser.OpenSearchPPLParserBaseVisitor { + private final GrammarElementValidator grammarElementValidator; + + @Override + public Void visitPatternsCommand(PatternsCommandContext ctx) { + validateAllowed(PPLGrammarElement.PATTERNS_COMMAND); + return super.visitPatternsCommand(ctx); + } + + @Override + public Void visitJoinCommand(JoinCommandContext ctx) { + validateAllowed(PPLGrammarElement.JOIN_COMMAND); + return super.visitJoinCommand(ctx); + } + + @Override + public Void visitLookupCommand(LookupCommandContext ctx) { + validateAllowed(PPLGrammarElement.LOOKUP_COMMAND); + return super.visitLookupCommand(ctx); + } + + @Override + public Void visitSubSearch(SubSearchContext ctx) { + validateAllowed(PPLGrammarElement.SUBQUERY_COMMAND); + return super.visitSubSearch(ctx); + } + + @Override + public Void visitFlattenCommand(FlattenCommandContext ctx) { + validateAllowed(PPLGrammarElement.FLATTEN_COMMAND); + return super.visitFlattenCommand(ctx); + } + + @Override + public Void visitFillnullCommand(FillnullCommandContext ctx) { + validateAllowed(PPLGrammarElement.FILLNULL_COMMAND); + return super.visitFillnullCommand(ctx); + } + + @Override + public Void visitExpandCommand(ExpandCommandContext ctx) { + validateAllowed(PPLGrammarElement.EXPAND_COMMAND); + return super.visitExpandCommand(ctx); + } + + @Override + public Void visitDescribeCommand(DescribeCommandContext ctx) { + validateAllowed(PPLGrammarElement.DESCRIBE_COMMAND); + return super.visitDescribeCommand(ctx); + } + + @Override + public Void visitCidrMatchFunctionCall(CidrMatchFunctionCallContext ctx) { + validateAllowed(PPLGrammarElement.IPADDRESS_FUNCTIONS); + return super.visitCidrMatchFunctionCall(ctx); + } + + @Override + public Void visitJsonFunctionName(JsonFunctionNameContext ctx) { + validateAllowed(PPLGrammarElement.JSON_FUNCTIONS); + return super.visitJsonFunctionName(ctx); + } + + @Override + public Void visitLambdaFunctionName(LambdaFunctionNameContext ctx) { + validateAllowed(PPLGrammarElement.LAMBDA_FUNCTIONS); + return super.visitLambdaFunctionName(ctx); + } + + private void validateAllowed(PPLGrammarElement element) { + if (!grammarElementValidator.isValid(element)) { + throw new IllegalArgumentException(element + " is not allowed."); + } + } +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLQueryValidator.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLQueryValidator.java new file mode 100644 index 0000000000..e630ffc45f --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/PPLQueryValidator.java @@ -0,0 +1,50 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import lombok.AllArgsConstructor; +import org.antlr.v4.runtime.CommonTokenStream; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.sql.common.antlr.CaseInsensitiveCharStream; +import org.opensearch.sql.common.antlr.SyntaxAnalysisErrorListener; +import org.opensearch.sql.datasource.model.DataSourceType; +import org.opensearch.sql.spark.antlr.parser.OpenSearchPPLLexer; +import org.opensearch.sql.spark.antlr.parser.OpenSearchPPLParser; + +@AllArgsConstructor +public class PPLQueryValidator { + private static final Logger log = LogManager.getLogger(SQLQueryValidator.class); + + private final GrammarElementValidatorProvider grammarElementValidatorProvider; + + /** + * It will look up validator associated with the DataSourceType, and throw + * IllegalArgumentException if invalid grammar element is found. + * + * @param pplQuery The query to be validated + * @param datasourceType + */ + public void validate(String pplQuery, DataSourceType datasourceType) { + GrammarElementValidator grammarElementValidator = + grammarElementValidatorProvider.getValidatorForDatasource(datasourceType); + PPLQueryValidationVisitor visitor = new PPLQueryValidationVisitor(grammarElementValidator); + try { + visitor.visit(getPplParser(pplQuery).root()); + } catch (IllegalArgumentException e) { + log.error("Query validation failed. DataSourceType=" + datasourceType, e); + throw e; + } + } + + public static OpenSearchPPLParser getPplParser(String pplQuery) { + OpenSearchPPLParser sqlBaseParser = + new OpenSearchPPLParser( + new CommonTokenStream(new OpenSearchPPLLexer(new CaseInsensitiveCharStream(pplQuery)))); + sqlBaseParser.addErrorListener(new SyntaxAnalysisErrorListener()); + return sqlBaseParser; + } +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/S3GlueSQLGrammarElementValidator.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/S3GlueSQLGrammarElementValidator.java new file mode 100644 index 0000000000..870fb9412d --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/S3GlueSQLGrammarElementValidator.java @@ -0,0 +1,83 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import static org.opensearch.sql.spark.validator.SQLGrammarElement.ALTER_VIEW; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.BITWISE_FUNCTIONS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CLUSTER_BY; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CREATE_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CREATE_VIEW; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CROSS_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DESCRIBE_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DISTRIBUTE_BY; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DROP_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DROP_VIEW; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.FILE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.FULL_OUTER_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.HINTS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.INLINE_TABLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.INSERT; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.LEFT_ANTI_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.LEFT_SEMI_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.LOAD; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.MANAGE_RESOURCE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.MISC_FUNCTIONS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.REFRESH_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.REFRESH_RESOURCE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.RESET; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.RIGHT_OUTER_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SET; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_FUNCTIONS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_VIEWS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.TABLESAMPLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.TABLE_VALUED_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.TRANSFORM; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.UDF; + +import com.google.common.collect.ImmutableSet; +import java.util.Set; + +public class S3GlueSQLGrammarElementValidator extends DenyListGrammarElementValidator { + private static final Set S3GLUE_DENY_LIST = + ImmutableSet.builder() + .add( + ALTER_VIEW, + CREATE_FUNCTION, + CREATE_VIEW, + DROP_FUNCTION, + DROP_VIEW, + INSERT, + LOAD, + CLUSTER_BY, + DISTRIBUTE_BY, + HINTS, + INLINE_TABLE, + FILE, + CROSS_JOIN, + LEFT_SEMI_JOIN, + RIGHT_OUTER_JOIN, + FULL_OUTER_JOIN, + LEFT_ANTI_JOIN, + TABLESAMPLE, + TABLE_VALUED_FUNCTION, + TRANSFORM, + MANAGE_RESOURCE, + DESCRIBE_FUNCTION, + REFRESH_RESOURCE, + REFRESH_FUNCTION, + RESET, + SET, + SHOW_FUNCTIONS, + SHOW_VIEWS, + BITWISE_FUNCTIONS, + MISC_FUNCTIONS, + UDF) + .build(); + + public S3GlueSQLGrammarElementValidator() { + super(S3GLUE_DENY_LIST); + } +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SQLGrammarElement.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SQLGrammarElement.java new file mode 100644 index 0000000000..ef3e1f2c8c --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SQLGrammarElement.java @@ -0,0 +1,91 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import lombok.AllArgsConstructor; + +@AllArgsConstructor +public enum SQLGrammarElement implements GrammarElement { + ALTER_NAMESPACE("ALTER (DATABASE|TABLE|NAMESPACE)"), + ALTER_VIEW("ALTER VIEW"), + CREATE_NAMESPACE("CREATE (DATABASE|TABLE|NAMESPACE)"), + CREATE_FUNCTION("CREATE FUNCTION"), + CREATE_VIEW("CREATE VIEW"), + DROP_NAMESPACE("DROP (DATABASE|TABLE|NAMESPACE)"), + DROP_FUNCTION("DROP FUNCTION"), + DROP_VIEW("DROP VIEW"), + DROP_TABLE("DROP TABLE"), + REPAIR_TABLE("REPAIR TABLE"), + TRUNCATE_TABLE("TRUNCATE TABLE"), + // DML Statements + INSERT("INSERT"), + LOAD("LOAD"), + + // Data Retrieval Statements + EXPLAIN("EXPLAIN"), + WITH("WITH"), + CLUSTER_BY("CLUSTER BY"), + DISTRIBUTE_BY("DISTRIBUTE BY"), + // GROUP_BY("GROUP BY"), + // HAVING("HAVING"), + HINTS("HINTS"), + INLINE_TABLE("Inline Table(VALUES)"), + FILE("File"), + INNER_JOIN("INNER JOIN"), + CROSS_JOIN("CROSS JOIN"), + LEFT_OUTER_JOIN("LEFT OUTER JOIN"), + LEFT_SEMI_JOIN("LEFT SEMI JOIN"), + RIGHT_OUTER_JOIN("RIGHT OUTER JOIN"), + FULL_OUTER_JOIN("FULL OUTER JOIN"), + LEFT_ANTI_JOIN("LEFT ANTI JOIN"), + TABLESAMPLE("TABLESAMPLE"), + TABLE_VALUED_FUNCTION("Table-valued function"), + LATERAL_VIEW("LATERAL VIEW"), + LATERAL_SUBQUERY("LATERAL SUBQUERY"), + TRANSFORM("TRANSFORM"), + + // Auxiliary Statements + MANAGE_RESOURCE("Resource management statements"), + ANALYZE_TABLE("ANALYZE TABLE(S)"), + CACHE_TABLE("CACHE TABLE"), + CLEAR_CACHE("CLEAR CACHE"), + DESCRIBE_NAMESPACE("DESCRIBE (NAMESPACE|DATABASE|SCHEMA)"), + DESCRIBE_FUNCTION("DESCRIBE FUNCTION"), + DESCRIBE_QUERY("DESCRIBE QUERY"), + DESCRIBE_TABLE("DESCRIBE TABLE"), + REFRESH_RESOURCE("REFRESH"), + REFRESH_TABLE("REFRESH TABLE"), + REFRESH_FUNCTION("REFRESH FUNCTION"), + RESET("RESET"), + SET("SET"), + SHOW_COLUMNS("SHOW COLUMNS"), + SHOW_CREATE_TABLE("SHOW CREATE TABLE"), + SHOW_NAMESPACES("SHOW (DATABASES|SCHEMAS)"), + SHOW_FUNCTIONS("SHOW FUNCTIONS"), + SHOW_PARTITIONS("SHOW PARTITIONS"), + SHOW_TABLE_EXTENDED("SHOW TABLE EXTENDED"), + SHOW_TABLES("SHOW TABLES"), + SHOW_TBLPROPERTIES("SHOW TBLPROPERTIES"), + SHOW_VIEWS("SHOW VIEWS"), + UNCACHE_TABLE("UNCACHE TABLE"), + + // Functions + MAP_FUNCTIONS("Map functions"), + BITWISE_FUNCTIONS("Bitwise functions"), + CSV_FUNCTIONS("CSV functions"), + GENERATOR_FUNCTIONS("Generator functions"), + MISC_FUNCTIONS("Misc functions"), + + // UDF + UDF("User Defined functions"); + + String description; + + @Override + public String toString() { + return description; + } +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SQLQueryValidationVisitor.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SQLQueryValidationVisitor.java new file mode 100644 index 0000000000..10fc48727a --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SQLQueryValidationVisitor.java @@ -0,0 +1,622 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import lombok.AllArgsConstructor; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.AddTableColumnsContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.AddTablePartitionContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.AlterTableAlterColumnContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.AlterViewQueryContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.AnalyzeContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.AnalyzeTablesContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.CacheTableContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.ClearCacheContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.CreateNamespaceContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.CreateTableContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.CreateTableLikeContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.CreateViewContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.CtesContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.DescribeFunctionContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.DescribeNamespaceContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.DescribeQueryContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.DescribeRelationContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.DropFunctionContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.DropNamespaceContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.DropTableColumnsContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.DropTableContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.DropTablePartitionsContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.DropViewContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.ErrorCapturingIdentifierContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.ErrorCapturingIdentifierExtraContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.ExplainContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.FunctionNameContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.HintContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.HiveReplaceColumnsContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.InlineTableContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.InsertIntoReplaceWhereContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.InsertIntoTableContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.InsertOverwriteDirContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.InsertOverwriteHiveDirContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.InsertOverwriteTableContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.JoinRelationContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.JoinTypeContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.LateralViewContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.LiteralTypeContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.LoadDataContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.ManageResourceContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.QueryOrganizationContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.RecoverPartitionsContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.RefreshFunctionContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.RefreshResourceContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.RefreshTableContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.RelationContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.RenameTableColumnContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.RenameTableContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.RenameTablePartitionContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.RepairTableContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.ReplaceTableContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.ResetConfigurationContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.ResetQuotedConfigurationContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.SampleContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.SetConfigurationContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.SetNamespaceLocationContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.SetNamespacePropertiesContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.SetTableLocationContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.SetTableSerDeContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.ShowColumnsContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.ShowCreateTableContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.ShowFunctionsContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.ShowNamespacesContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.ShowPartitionsContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.ShowTableExtendedContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.ShowTablesContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.ShowTblPropertiesContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.ShowViewsContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.TableNameContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.TableValuedFunctionContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.TransformClauseContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.TruncateTableContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.TypeContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.UncacheTableContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.UnsupportedHiveNativeCommandsContext; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParserBaseVisitor; + +/** This visitor validate grammar using GrammarElementValidator */ +@AllArgsConstructor +public class SQLQueryValidationVisitor extends SqlBaseParserBaseVisitor { + private final GrammarElementValidator grammarElementValidator; + + @Override + public Void visitCreateFunction(SqlBaseParser.CreateFunctionContext ctx) { + validateAllowed(SQLGrammarElement.CREATE_FUNCTION); + return super.visitCreateFunction(ctx); + } + + @Override + public Void visitSetNamespaceProperties(SetNamespacePropertiesContext ctx) { + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); + return super.visitSetNamespaceProperties(ctx); + } + + @Override + public Void visitAddTableColumns(AddTableColumnsContext ctx) { + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); + return super.visitAddTableColumns(ctx); + } + + @Override + public Void visitAddTablePartition(AddTablePartitionContext ctx) { + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); + return super.visitAddTablePartition(ctx); + } + + @Override + public Void visitRenameTableColumn(RenameTableColumnContext ctx) { + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); + return super.visitRenameTableColumn(ctx); + } + + @Override + public Void visitDropTableColumns(DropTableColumnsContext ctx) { + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); + return super.visitDropTableColumns(ctx); + } + + @Override + public Void visitAlterTableAlterColumn(AlterTableAlterColumnContext ctx) { + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); + return super.visitAlterTableAlterColumn(ctx); + } + + @Override + public Void visitHiveReplaceColumns(HiveReplaceColumnsContext ctx) { + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); + return super.visitHiveReplaceColumns(ctx); + } + + @Override + public Void visitSetTableSerDe(SetTableSerDeContext ctx) { + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); + return super.visitSetTableSerDe(ctx); + } + + @Override + public Void visitRenameTablePartition(RenameTablePartitionContext ctx) { + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); + return super.visitRenameTablePartition(ctx); + } + + @Override + public Void visitDropTablePartitions(DropTablePartitionsContext ctx) { + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); + return super.visitDropTablePartitions(ctx); + } + + @Override + public Void visitSetTableLocation(SetTableLocationContext ctx) { + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); + return super.visitSetTableLocation(ctx); + } + + @Override + public Void visitRecoverPartitions(RecoverPartitionsContext ctx) { + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); + return super.visitRecoverPartitions(ctx); + } + + @Override + public Void visitSetNamespaceLocation(SetNamespaceLocationContext ctx) { + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); + return super.visitSetNamespaceLocation(ctx); + } + + @Override + public Void visitAlterViewQuery(AlterViewQueryContext ctx) { + validateAllowed(SQLGrammarElement.ALTER_VIEW); + return super.visitAlterViewQuery(ctx); + } + + @Override + public Void visitRenameTable(RenameTableContext ctx) { + if (ctx.VIEW() != null) { + validateAllowed(SQLGrammarElement.ALTER_VIEW); + } else { + validateAllowed(SQLGrammarElement.ALTER_NAMESPACE); + } + + return super.visitRenameTable(ctx); + } + + @Override + public Void visitCreateNamespace(CreateNamespaceContext ctx) { + validateAllowed(SQLGrammarElement.CREATE_NAMESPACE); + return super.visitCreateNamespace(ctx); + } + + @Override + public Void visitCreateTable(CreateTableContext ctx) { + validateAllowed(SQLGrammarElement.CREATE_NAMESPACE); + return super.visitCreateTable(ctx); + } + + @Override + public Void visitCreateTableLike(CreateTableLikeContext ctx) { + validateAllowed(SQLGrammarElement.CREATE_NAMESPACE); + return super.visitCreateTableLike(ctx); + } + + @Override + public Void visitReplaceTable(ReplaceTableContext ctx) { + validateAllowed(SQLGrammarElement.CREATE_NAMESPACE); + return super.visitReplaceTable(ctx); + } + + @Override + public Void visitDropNamespace(DropNamespaceContext ctx) { + validateAllowed(SQLGrammarElement.DROP_NAMESPACE); + return super.visitDropNamespace(ctx); + } + + @Override + public Void visitDropTable(DropTableContext ctx) { + validateAllowed(SQLGrammarElement.DROP_NAMESPACE); + return super.visitDropTable(ctx); + } + + @Override + public Void visitCreateView(CreateViewContext ctx) { + validateAllowed(SQLGrammarElement.CREATE_VIEW); + return super.visitCreateView(ctx); + } + + @Override + public Void visitDropView(DropViewContext ctx) { + validateAllowed(SQLGrammarElement.DROP_VIEW); + return super.visitDropView(ctx); + } + + @Override + public Void visitDropFunction(DropFunctionContext ctx) { + validateAllowed(SQLGrammarElement.DROP_FUNCTION); + return super.visitDropFunction(ctx); + } + + @Override + public Void visitRepairTable(RepairTableContext ctx) { + validateAllowed(SQLGrammarElement.REPAIR_TABLE); + return super.visitRepairTable(ctx); + } + + @Override + public Void visitTruncateTable(TruncateTableContext ctx) { + validateAllowed(SQLGrammarElement.TRUNCATE_TABLE); + return super.visitTruncateTable(ctx); + } + + @Override + public Void visitInsertOverwriteTable(InsertOverwriteTableContext ctx) { + validateAllowed(SQLGrammarElement.INSERT); + return super.visitInsertOverwriteTable(ctx); + } + + @Override + public Void visitInsertIntoReplaceWhere(InsertIntoReplaceWhereContext ctx) { + validateAllowed(SQLGrammarElement.INSERT); + return super.visitInsertIntoReplaceWhere(ctx); + } + + @Override + public Void visitInsertIntoTable(InsertIntoTableContext ctx) { + validateAllowed(SQLGrammarElement.INSERT); + return super.visitInsertIntoTable(ctx); + } + + @Override + public Void visitInsertOverwriteDir(InsertOverwriteDirContext ctx) { + validateAllowed(SQLGrammarElement.INSERT); + return super.visitInsertOverwriteDir(ctx); + } + + @Override + public Void visitInsertOverwriteHiveDir(InsertOverwriteHiveDirContext ctx) { + validateAllowed(SQLGrammarElement.INSERT); + return super.visitInsertOverwriteHiveDir(ctx); + } + + @Override + public Void visitLoadData(LoadDataContext ctx) { + validateAllowed(SQLGrammarElement.LOAD); + return super.visitLoadData(ctx); + } + + @Override + public Void visitExplain(ExplainContext ctx) { + validateAllowed(SQLGrammarElement.EXPLAIN); + return super.visitExplain(ctx); + } + + @Override + public Void visitTableName(TableNameContext ctx) { + String reference = ctx.identifierReference().getText(); + if (isFileReference(reference)) { + validateAllowed(SQLGrammarElement.FILE); + } + return super.visitTableName(ctx); + } + + private static final String FILE_REFERENCE_PATTERN = "^[a-zA-Z]+\\.`[^`]+`$"; + + private boolean isFileReference(String reference) { + return reference.matches(FILE_REFERENCE_PATTERN); + } + + @Override + public Void visitCtes(CtesContext ctx) { + validateAllowed(SQLGrammarElement.WITH); + return super.visitCtes(ctx); + } + + @Override + public Void visitQueryOrganization(QueryOrganizationContext ctx) { + if (ctx.CLUSTER() != null) { + validateAllowed(SQLGrammarElement.CLUSTER_BY); + } else if (ctx.DISTRIBUTE() != null) { + validateAllowed(SQLGrammarElement.DISTRIBUTE_BY); + } + return super.visitQueryOrganization(ctx); + } + + @Override + public Void visitHint(HintContext ctx) { + validateAllowed(SQLGrammarElement.HINTS); + return super.visitHint(ctx); + } + + @Override + public Void visitInlineTable(InlineTableContext ctx) { + validateAllowed(SQLGrammarElement.INLINE_TABLE); + return super.visitInlineTable(ctx); + } + + @Override + public Void visitJoinType(JoinTypeContext ctx) { + if (ctx.CROSS() != null) { + validateAllowed(SQLGrammarElement.CROSS_JOIN); + } else if (ctx.LEFT() != null && ctx.SEMI() != null) { + validateAllowed(SQLGrammarElement.LEFT_SEMI_JOIN); + } else if (ctx.ANTI() != null) { + validateAllowed(SQLGrammarElement.LEFT_ANTI_JOIN); + } else if (ctx.LEFT() != null) { + validateAllowed(SQLGrammarElement.LEFT_OUTER_JOIN); + } else if (ctx.RIGHT() != null) { + validateAllowed(SQLGrammarElement.RIGHT_OUTER_JOIN); + } else if (ctx.FULL() != null) { + validateAllowed(SQLGrammarElement.FULL_OUTER_JOIN); + } else { + validateAllowed(SQLGrammarElement.INNER_JOIN); + } + return super.visitJoinType(ctx); + } + + @Override + public Void visitSample(SampleContext ctx) { + validateAllowed(SQLGrammarElement.TABLESAMPLE); + return super.visitSample(ctx); + } + + @Override + public Void visitTableValuedFunction(TableValuedFunctionContext ctx) { + validateAllowed(SQLGrammarElement.TABLE_VALUED_FUNCTION); + return super.visitTableValuedFunction(ctx); + } + + @Override + public Void visitLateralView(LateralViewContext ctx) { + validateAllowed(SQLGrammarElement.LATERAL_VIEW); + return super.visitLateralView(ctx); + } + + @Override + public Void visitRelation(RelationContext ctx) { + if (ctx.LATERAL() != null) { + validateAllowed(SQLGrammarElement.LATERAL_SUBQUERY); + } + return super.visitRelation(ctx); + } + + @Override + public Void visitJoinRelation(JoinRelationContext ctx) { + if (ctx.LATERAL() != null) { + validateAllowed(SQLGrammarElement.LATERAL_SUBQUERY); + } + return super.visitJoinRelation(ctx); + } + + @Override + public Void visitTransformClause(TransformClauseContext ctx) { + if (ctx.TRANSFORM() != null) { + validateAllowed(SQLGrammarElement.TRANSFORM); + } + return super.visitTransformClause(ctx); + } + + @Override + public Void visitManageResource(ManageResourceContext ctx) { + validateAllowed(SQLGrammarElement.MANAGE_RESOURCE); + return super.visitManageResource(ctx); + } + + @Override + public Void visitAnalyze(AnalyzeContext ctx) { + validateAllowed(SQLGrammarElement.ANALYZE_TABLE); + return super.visitAnalyze(ctx); + } + + @Override + public Void visitAnalyzeTables(AnalyzeTablesContext ctx) { + validateAllowed(SQLGrammarElement.ANALYZE_TABLE); + return super.visitAnalyzeTables(ctx); + } + + @Override + public Void visitCacheTable(CacheTableContext ctx) { + validateAllowed(SQLGrammarElement.CACHE_TABLE); + return super.visitCacheTable(ctx); + } + + @Override + public Void visitClearCache(ClearCacheContext ctx) { + validateAllowed(SQLGrammarElement.CLEAR_CACHE); + return super.visitClearCache(ctx); + } + + @Override + public Void visitDescribeNamespace(DescribeNamespaceContext ctx) { + validateAllowed(SQLGrammarElement.DESCRIBE_NAMESPACE); + return super.visitDescribeNamespace(ctx); + } + + @Override + public Void visitDescribeFunction(DescribeFunctionContext ctx) { + validateAllowed(SQLGrammarElement.DESCRIBE_FUNCTION); + return super.visitDescribeFunction(ctx); + } + + @Override + public Void visitDescribeRelation(DescribeRelationContext ctx) { + validateAllowed(SQLGrammarElement.DESCRIBE_TABLE); + return super.visitDescribeRelation(ctx); + } + + @Override + public Void visitDescribeQuery(DescribeQueryContext ctx) { + validateAllowed(SQLGrammarElement.DESCRIBE_QUERY); + return super.visitDescribeQuery(ctx); + } + + @Override + public Void visitRefreshResource(RefreshResourceContext ctx) { + validateAllowed(SQLGrammarElement.REFRESH_RESOURCE); + return super.visitRefreshResource(ctx); + } + + @Override + public Void visitRefreshTable(RefreshTableContext ctx) { + validateAllowed(SQLGrammarElement.REFRESH_TABLE); + return super.visitRefreshTable(ctx); + } + + @Override + public Void visitRefreshFunction(RefreshFunctionContext ctx) { + validateAllowed(SQLGrammarElement.REFRESH_FUNCTION); + return super.visitRefreshFunction(ctx); + } + + @Override + public Void visitResetConfiguration(ResetConfigurationContext ctx) { + validateAllowed(SQLGrammarElement.RESET); + return super.visitResetConfiguration(ctx); + } + + @Override + public Void visitResetQuotedConfiguration(ResetQuotedConfigurationContext ctx) { + validateAllowed(SQLGrammarElement.RESET); + return super.visitResetQuotedConfiguration(ctx); + } + + @Override + public Void visitSetConfiguration(SetConfigurationContext ctx) { + validateAllowed(SQLGrammarElement.SET); + return super.visitSetConfiguration(ctx); + } + + @Override + public Void visitShowColumns(ShowColumnsContext ctx) { + validateAllowed(SQLGrammarElement.SHOW_COLUMNS); + return super.visitShowColumns(ctx); + } + + @Override + public Void visitShowCreateTable(ShowCreateTableContext ctx) { + validateAllowed(SQLGrammarElement.SHOW_CREATE_TABLE); + return super.visitShowCreateTable(ctx); + } + + @Override + public Void visitShowNamespaces(ShowNamespacesContext ctx) { + validateAllowed(SQLGrammarElement.SHOW_NAMESPACES); + return super.visitShowNamespaces(ctx); + } + + @Override + public Void visitShowFunctions(ShowFunctionsContext ctx) { + validateAllowed(SQLGrammarElement.SHOW_FUNCTIONS); + return super.visitShowFunctions(ctx); + } + + @Override + public Void visitShowPartitions(ShowPartitionsContext ctx) { + validateAllowed(SQLGrammarElement.SHOW_PARTITIONS); + return super.visitShowPartitions(ctx); + } + + @Override + public Void visitShowTableExtended(ShowTableExtendedContext ctx) { + validateAllowed(SQLGrammarElement.SHOW_TABLE_EXTENDED); + return super.visitShowTableExtended(ctx); + } + + @Override + public Void visitShowTables(ShowTablesContext ctx) { + validateAllowed(SQLGrammarElement.SHOW_TABLES); + return super.visitShowTables(ctx); + } + + @Override + public Void visitShowTblProperties(ShowTblPropertiesContext ctx) { + validateAllowed(SQLGrammarElement.SHOW_TBLPROPERTIES); + return super.visitShowTblProperties(ctx); + } + + @Override + public Void visitShowViews(ShowViewsContext ctx) { + validateAllowed(SQLGrammarElement.SHOW_VIEWS); + return super.visitShowViews(ctx); + } + + @Override + public Void visitUncacheTable(UncacheTableContext ctx) { + validateAllowed(SQLGrammarElement.UNCACHE_TABLE); + return super.visitUncacheTable(ctx); + } + + @Override + public Void visitFunctionName(FunctionNameContext ctx) { + validateFunctionAllowed(ctx.qualifiedName().getText()); + return super.visitFunctionName(ctx); + } + + private void validateFunctionAllowed(String function) { + FunctionType type = FunctionType.fromFunctionName(function.toLowerCase()); + switch (type) { + case MAP: + validateAllowed(SQLGrammarElement.MAP_FUNCTIONS); + break; + case BITWISE: + validateAllowed(SQLGrammarElement.BITWISE_FUNCTIONS); + break; + case CSV: + validateAllowed(SQLGrammarElement.CSV_FUNCTIONS); + break; + case MISC: + validateAllowed(SQLGrammarElement.MISC_FUNCTIONS); + break; + case GENERATOR: + validateAllowed(SQLGrammarElement.GENERATOR_FUNCTIONS); + break; + case UDF: + validateAllowed(SQLGrammarElement.UDF); + break; + } + } + + private void validateAllowed(SQLGrammarElement element) { + if (!grammarElementValidator.isValid(element)) { + throw new IllegalArgumentException(element + " is not allowed."); + } + } + + @Override + public Void visitErrorCapturingIdentifier(ErrorCapturingIdentifierContext ctx) { + ErrorCapturingIdentifierExtraContext extra = ctx.errorCapturingIdentifierExtra(); + if (extra.children != null) { + throw new IllegalArgumentException("Invalid identifier: " + ctx.getText()); + } + return super.visitErrorCapturingIdentifier(ctx); + } + + @Override + public Void visitLiteralType(LiteralTypeContext ctx) { + if (ctx.unsupportedType != null) { + throw new IllegalArgumentException("Unsupported typed literal: " + ctx.getText()); + } + return super.visitLiteralType(ctx); + } + + @Override + public Void visitType(TypeContext ctx) { + if (ctx.unsupportedType != null) { + throw new IllegalArgumentException("Unsupported data type: " + ctx.getText()); + } + return super.visitType(ctx); + } + + @Override + public Void visitUnsupportedHiveNativeCommands(UnsupportedHiveNativeCommandsContext ctx) { + throw new IllegalArgumentException("Unsupported command."); + } +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SQLQueryValidator.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SQLQueryValidator.java new file mode 100644 index 0000000000..5d934411bf --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SQLQueryValidator.java @@ -0,0 +1,47 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import lombok.AllArgsConstructor; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.sql.datasource.model.DataSourceType; +import org.opensearch.sql.spark.utils.SQLQueryUtils; + +/** Validate input SQL query based on the DataSourceType. */ +@AllArgsConstructor +public class SQLQueryValidator { + private static final Logger log = LogManager.getLogger(SQLQueryValidator.class); + + private final GrammarElementValidatorProvider grammarElementValidatorProvider; + + /** + * It will look up validator associated with the DataSourceType, and throw + * IllegalArgumentException if invalid grammar element is found. + * + * @param sqlQuery The query to be validated + * @param datasourceType + */ + public void validate(String sqlQuery, DataSourceType datasourceType) { + GrammarElementValidator grammarElementValidator = + grammarElementValidatorProvider.getValidatorForDatasource(datasourceType); + SQLQueryValidationVisitor visitor = new SQLQueryValidationVisitor(grammarElementValidator); + try { + visitor.visit(SQLQueryUtils.getBaseParser(sqlQuery).singleStatement()); + } catch (IllegalArgumentException e) { + log.error("Query validation failed. DataSourceType=" + datasourceType, e); + throw e; + } + } + + /** + * Validates a query from the Flint extension grammar. The method is currently a no-op. + * + * @param sqlQuery The Flint extension query to be validated + * @param dataSourceType The type of the datasource the query is being run on + */ + public void validateFlintExtensionQuery(String sqlQuery, DataSourceType dataSourceType) {} +} diff --git a/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SecurityLakeSQLGrammarElementValidator.java b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SecurityLakeSQLGrammarElementValidator.java new file mode 100644 index 0000000000..89af6f31a4 --- /dev/null +++ b/async-query-core/src/main/java/org/opensearch/sql/spark/validator/SecurityLakeSQLGrammarElementValidator.java @@ -0,0 +1,123 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import static org.opensearch.sql.spark.validator.SQLGrammarElement.ALTER_NAMESPACE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.ALTER_VIEW; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.ANALYZE_TABLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CACHE_TABLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CLEAR_CACHE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CLUSTER_BY; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CREATE_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CREATE_NAMESPACE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CREATE_VIEW; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CROSS_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.CSV_FUNCTIONS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DESCRIBE_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DESCRIBE_NAMESPACE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DESCRIBE_QUERY; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DESCRIBE_TABLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DISTRIBUTE_BY; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DROP_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DROP_NAMESPACE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.DROP_VIEW; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.FILE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.FULL_OUTER_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.HINTS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.INLINE_TABLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.INSERT; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.LEFT_ANTI_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.LEFT_SEMI_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.LOAD; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.MANAGE_RESOURCE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.MISC_FUNCTIONS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.REFRESH_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.REFRESH_RESOURCE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.REFRESH_TABLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.REPAIR_TABLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.RESET; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.RIGHT_OUTER_JOIN; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SET; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_COLUMNS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_CREATE_TABLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_FUNCTIONS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_NAMESPACES; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_PARTITIONS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_TABLES; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_TABLE_EXTENDED; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_TBLPROPERTIES; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.SHOW_VIEWS; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.TABLESAMPLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.TABLE_VALUED_FUNCTION; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.TRANSFORM; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.TRUNCATE_TABLE; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.UDF; +import static org.opensearch.sql.spark.validator.SQLGrammarElement.UNCACHE_TABLE; + +import com.google.common.collect.ImmutableSet; +import java.util.Set; + +public class SecurityLakeSQLGrammarElementValidator extends DenyListGrammarElementValidator { + private static final Set SECURITY_LAKE_DENY_LIST = + ImmutableSet.builder() + .add( + ALTER_NAMESPACE, + ALTER_VIEW, + CREATE_NAMESPACE, + CREATE_FUNCTION, + CREATE_VIEW, + DROP_FUNCTION, + DROP_NAMESPACE, + DROP_VIEW, + REPAIR_TABLE, + TRUNCATE_TABLE, + INSERT, + LOAD, + CLUSTER_BY, + DISTRIBUTE_BY, + HINTS, + INLINE_TABLE, + FILE, + CROSS_JOIN, + LEFT_SEMI_JOIN, + RIGHT_OUTER_JOIN, + FULL_OUTER_JOIN, + LEFT_ANTI_JOIN, + TABLESAMPLE, + TABLE_VALUED_FUNCTION, + TRANSFORM, + MANAGE_RESOURCE, + ANALYZE_TABLE, + CACHE_TABLE, + CLEAR_CACHE, + DESCRIBE_NAMESPACE, + DESCRIBE_FUNCTION, + DESCRIBE_QUERY, + DESCRIBE_TABLE, + REFRESH_RESOURCE, + REFRESH_TABLE, + REFRESH_FUNCTION, + RESET, + SET, + SHOW_COLUMNS, + SHOW_CREATE_TABLE, + SHOW_NAMESPACES, + SHOW_FUNCTIONS, + SHOW_PARTITIONS, + SHOW_TABLE_EXTENDED, + SHOW_TABLES, + SHOW_TBLPROPERTIES, + SHOW_VIEWS, + UNCACHE_TABLE, + CSV_FUNCTIONS, + MISC_FUNCTIONS, + UDF) + .build(); + + public SecurityLakeSQLGrammarElementValidator() { + super(SECURITY_LAKE_DENY_LIST); + } +} diff --git a/async-query-core/src/test/java/org/opensearch/sql/asyncquery/DummyTest.java b/async-query-core/src/test/java/org/opensearch/sql/asyncquery/DummyTest.java deleted file mode 100644 index 8fa1cf49ec..0000000000 --- a/async-query-core/src/test/java/org/opensearch/sql/asyncquery/DummyTest.java +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.asyncquery; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import org.junit.jupiter.api.Test; - -public class DummyTest { - @Test - public void test() { - Dummy dummy = new Dummy(); - assertEquals("Hello!", dummy.hello()); - } -} diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryCoreIntegTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryCoreIntegTest.java new file mode 100644 index 0000000000..382b560727 --- /dev/null +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryCoreIntegTest.java @@ -0,0 +1,805 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.asyncquery; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH; +import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_URI; +import static org.opensearch.sql.spark.dispatcher.IndexDMLHandler.DML_QUERY_JOB_ID; +import static org.opensearch.sql.spark.dispatcher.IndexDMLHandler.DROP_INDEX_JOB_ID; + +import com.amazonaws.services.emrserverless.AWSEMRServerless; +import com.amazonaws.services.emrserverless.model.CancelJobRunRequest; +import com.amazonaws.services.emrserverless.model.CancelJobRunResult; +import com.amazonaws.services.emrserverless.model.GetJobRunRequest; +import com.amazonaws.services.emrserverless.model.GetJobRunResult; +import com.amazonaws.services.emrserverless.model.JobRun; +import com.amazonaws.services.emrserverless.model.StartJobRunRequest; +import com.amazonaws.services.emrserverless.model.StartJobRunResult; +import com.google.common.collect.ImmutableMap; +import java.util.Optional; +import org.json.JSONArray; +import org.json.JSONObject; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.ArgumentCaptor; +import org.mockito.Captor; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.datasource.DataSourceService; +import org.opensearch.sql.datasource.model.DataSourceMetadata; +import org.opensearch.sql.datasource.model.DataSourceType; +import org.opensearch.sql.datasources.auth.AuthenticationType; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryExecutionResponse; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata.AsyncQueryJobMetadataBuilder; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.asyncquery.model.QueryState; +import org.opensearch.sql.spark.client.EMRServerlessClientFactory; +import org.opensearch.sql.spark.client.EmrServerlessClientImpl; +import org.opensearch.sql.spark.config.SparkExecutionEngineConfig; +import org.opensearch.sql.spark.config.SparkExecutionEngineConfigSupplier; +import org.opensearch.sql.spark.config.SparkSubmitParameterModifier; +import org.opensearch.sql.spark.dispatcher.QueryHandlerFactory; +import org.opensearch.sql.spark.dispatcher.QueryIdProvider; +import org.opensearch.sql.spark.dispatcher.SparkQueryDispatcher; +import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; +import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions; +import org.opensearch.sql.spark.dispatcher.model.IndexDMLResult; +import org.opensearch.sql.spark.dispatcher.model.JobType; +import org.opensearch.sql.spark.execution.session.CreateSessionRequest; +import org.opensearch.sql.spark.execution.session.SessionConfigSupplier; +import org.opensearch.sql.spark.execution.session.SessionIdProvider; +import org.opensearch.sql.spark.execution.session.SessionManager; +import org.opensearch.sql.spark.execution.session.SessionModel; +import org.opensearch.sql.spark.execution.session.SessionState; +import org.opensearch.sql.spark.execution.statement.StatementId; +import org.opensearch.sql.spark.execution.statement.StatementModel; +import org.opensearch.sql.spark.execution.statement.StatementState; +import org.opensearch.sql.spark.execution.statestore.SessionStorageService; +import org.opensearch.sql.spark.execution.statestore.StatementStorageService; +import org.opensearch.sql.spark.flint.FlintIndexClient; +import org.opensearch.sql.spark.flint.FlintIndexMetadata; +import org.opensearch.sql.spark.flint.FlintIndexMetadataService; +import org.opensearch.sql.spark.flint.FlintIndexStateModelService; +import org.opensearch.sql.spark.flint.IndexDMLResultStorageService; +import org.opensearch.sql.spark.flint.operation.FlintIndexOpFactory; +import org.opensearch.sql.spark.leasemanager.LeaseManager; +import org.opensearch.sql.spark.leasemanager.model.LeaseRequest; +import org.opensearch.sql.spark.metrics.MetricsService; +import org.opensearch.sql.spark.parameter.SparkParameterComposerCollection; +import org.opensearch.sql.spark.parameter.SparkSubmitParametersBuilderProvider; +import org.opensearch.sql.spark.response.JobExecutionResponseReader; +import org.opensearch.sql.spark.rest.model.CreateAsyncQueryRequest; +import org.opensearch.sql.spark.rest.model.CreateAsyncQueryResponse; +import org.opensearch.sql.spark.rest.model.LangType; +import org.opensearch.sql.spark.scheduler.AsyncQueryScheduler; +import org.opensearch.sql.spark.validator.DefaultGrammarElementValidator; +import org.opensearch.sql.spark.validator.GrammarElementValidatorProvider; +import org.opensearch.sql.spark.validator.PPLQueryValidator; +import org.opensearch.sql.spark.validator.S3GlueSQLGrammarElementValidator; +import org.opensearch.sql.spark.validator.SQLQueryValidator; + +/** + * This tests async-query-core library end-to-end using mocked implementation of extension points. + * It intends to cover major happy cases. + */ +@ExtendWith(MockitoExtension.class) +public class AsyncQueryCoreIntegTest { + + public static final String QUERY_ID = "QUERY_ID"; + public static final String SESSION_ID = "SESSION_ID"; + public static final String DATASOURCE_NAME = "DATASOURCE_NAME"; + public static final String INDEX_NAME = "INDEX_NAME"; + public static final String APPLICATION_ID = "APPLICATION_ID"; + public static final String JOB_ID = "JOB_ID"; + public static final String ACCOUNT_ID = "ACCOUNT_ID"; + public static final String RESULT_INDEX = "RESULT_INDEX"; + @Mock SparkSubmitParameterModifier sparkSubmitParameterModifier; + @Mock SparkExecutionEngineConfigSupplier sparkExecutionEngineConfigSupplier; + @Mock SessionConfigSupplier sessionConfigSupplier; + @Mock LeaseManager leaseManager; + @Mock JobExecutionResponseReader jobExecutionResponseReader; + @Mock DataSourceService dataSourceService; + EMRServerlessClientFactory emrServerlessClientFactory; + @Mock AWSEMRServerless awsemrServerless; + @Mock SessionIdProvider sessionIdProvider; + @Mock QueryIdProvider queryIdProvider; + @Mock FlintIndexClient flintIndexClient; + @Mock AsyncQueryRequestContext asyncQueryRequestContext; + @Mock MetricsService metricsService; + @Mock AsyncQueryScheduler asyncQueryScheduler; + @Mock SparkSubmitParametersBuilderProvider sparkSubmitParametersBuilderProvider; + + // storage services + @Mock AsyncQueryJobMetadataStorageService asyncQueryJobMetadataStorageService; + @Mock SessionStorageService sessionStorageService; + @Mock StatementStorageService statementStorageService; + @Mock FlintIndexMetadataService flintIndexMetadataService; + @Mock FlintIndexStateModelService flintIndexStateModelService; + @Mock IndexDMLResultStorageService indexDMLResultStorageService; + + @Captor ArgumentCaptor dispatchQueryRequestArgumentCaptor; + @Captor ArgumentCaptor cancelJobRunRequestArgumentCaptor; + @Captor ArgumentCaptor getJobRunRequestArgumentCaptor; + @Captor ArgumentCaptor indexDMLResultArgumentCaptor; + @Captor ArgumentCaptor asyncQueryJobMetadataArgumentCaptor; + @Captor ArgumentCaptor flintIndexOptionsArgumentCaptor; + @Captor ArgumentCaptor startJobRunRequestArgumentCaptor; + @Captor ArgumentCaptor createSessionRequestArgumentCaptor; + @Captor ArgumentCaptor leaseRequestArgumentCaptor; + + AsyncQueryExecutorService asyncQueryExecutorService; + + @BeforeEach + public void setUp() { + emrServerlessClientFactory = + (accountId) -> new EmrServerlessClientImpl(awsemrServerless, metricsService); + SparkParameterComposerCollection collection = new SparkParameterComposerCollection(); + collection.register( + DataSourceType.S3GLUE, + (dataSourceMetadata, sparkSubmitParameters, dispatchQueryRequest, context) -> + sparkSubmitParameters.setConfigItem( + "key.from.datasource.composer", "value.from.datasource.composer")); + collection.register( + (sparkSubmitParameters, dispatchQueryRequest, context) -> + sparkSubmitParameters.setConfigItem( + "key.from.generic.composer", "value.from.generic.composer")); + SessionManager sessionManager = + new SessionManager( + sessionStorageService, + statementStorageService, + emrServerlessClientFactory, + sessionConfigSupplier, + sessionIdProvider); + FlintIndexOpFactory flintIndexOpFactory = + new FlintIndexOpFactory( + flintIndexStateModelService, + flintIndexClient, + flintIndexMetadataService, + emrServerlessClientFactory, + asyncQueryScheduler); + QueryHandlerFactory queryHandlerFactory = + new QueryHandlerFactory( + jobExecutionResponseReader, + flintIndexMetadataService, + sessionManager, + leaseManager, + indexDMLResultStorageService, + flintIndexOpFactory, + emrServerlessClientFactory, + metricsService, + new SparkSubmitParametersBuilderProvider(collection)); + SQLQueryValidator sqlQueryValidator = + new SQLQueryValidator( + new GrammarElementValidatorProvider( + ImmutableMap.of(DataSourceType.S3GLUE, new S3GlueSQLGrammarElementValidator()), + new DefaultGrammarElementValidator())); + PPLQueryValidator pplQueryValidator = + new PPLQueryValidator( + new GrammarElementValidatorProvider( + ImmutableMap.of(), new DefaultGrammarElementValidator())); + SparkQueryDispatcher sparkQueryDispatcher = + new SparkQueryDispatcher( + dataSourceService, + sessionManager, + queryHandlerFactory, + queryIdProvider, + sqlQueryValidator, + pplQueryValidator); + asyncQueryExecutorService = + new AsyncQueryExecutorServiceImpl( + asyncQueryJobMetadataStorageService, + sparkQueryDispatcher, + sparkExecutionEngineConfigSupplier); + } + + @Test + public void createDropIndexQuery() { + givenSparkExecutionEngineConfigIsSupplied(); + givenValidDataSourceMetadataExist(); + when(queryIdProvider.getQueryId(any(), eq(asyncQueryRequestContext))).thenReturn(QUERY_ID); + String indexName = "flint_datasource_name_table_name_index_name_index"; + givenFlintIndexMetadataExists(indexName); + givenCancelJobRunSucceed(); + givenGetJobRunReturnJobRunWithState("Cancelled"); + + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + "DROP INDEX index_name ON table_name", DATASOURCE_NAME, LangType.SQL), + asyncQueryRequestContext); + + assertEquals(QUERY_ID, response.getQueryId()); + assertNull(response.getSessionId()); + verifyGetQueryIdCalled(); + verifyCancelJobRunCalled(); + verifyCreateIndexDMLResultCalled(); + verifyStoreJobMetadataCalled(DML_QUERY_JOB_ID, QueryState.SUCCESS, JobType.BATCH); + } + + @Test + public void createDropIndexQueryWithScheduler() { + givenSparkExecutionEngineConfigIsSupplied(); + givenValidDataSourceMetadataExist(); + when(queryIdProvider.getQueryId(any(), eq(asyncQueryRequestContext))).thenReturn(QUERY_ID); + + String indexName = "flint_datasource_name_table_name_index_name_index"; + givenFlintIndexMetadataExistsWithExternalScheduler(indexName); + + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + "DROP INDEX index_name ON table_name", DATASOURCE_NAME, LangType.SQL), + asyncQueryRequestContext); + + assertEquals(QUERY_ID, response.getQueryId()); + assertNull(response.getSessionId()); + verifyGetQueryIdCalled(); + verifyCreateIndexDMLResultCalled(); + verifyStoreJobMetadataCalled(DML_QUERY_JOB_ID, QueryState.SUCCESS, JobType.BATCH); + + verify(asyncQueryScheduler).unscheduleJob(indexName, asyncQueryRequestContext); + } + + @Test + public void createVacuumIndexQuery() { + givenSparkExecutionEngineConfigIsSupplied(); + givenValidDataSourceMetadataExist(); + givenSessionExists(); + when(queryIdProvider.getQueryId(any(), eq(asyncQueryRequestContext))).thenReturn(QUERY_ID); + when(sessionIdProvider.getSessionId(any())).thenReturn(SESSION_ID); + givenSessionExists(); // called twice + when(awsemrServerless.startJobRun(any())) + .thenReturn(new StartJobRunResult().withApplicationId(APPLICATION_ID).withJobRunId(JOB_ID)); + + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + "VACUUM INDEX index_name ON table_name", DATASOURCE_NAME, LangType.SQL), + asyncQueryRequestContext); + + assertEquals(QUERY_ID, response.getQueryId()); + assertEquals(SESSION_ID, response.getSessionId()); + verifyGetQueryIdCalled(); + verifyGetSessionIdCalled(); + verify(leaseManager).borrow(leaseRequestArgumentCaptor.capture()); + assertEquals(JobType.INTERACTIVE, leaseRequestArgumentCaptor.getValue().getJobType()); + verifyStartJobRunCalled(); + verifyStoreJobMetadataCalled(JOB_ID, QueryState.WAITING, JobType.INTERACTIVE); + } + + @Test + public void createAlterIndexQuery() { + givenSparkExecutionEngineConfigIsSupplied(); + givenValidDataSourceMetadataExist(); + when(queryIdProvider.getQueryId(any(), eq(asyncQueryRequestContext))).thenReturn(QUERY_ID); + String indexName = "flint_datasource_name_table_name_index_name_index"; + givenFlintIndexMetadataExists(indexName); + givenCancelJobRunSucceed(); + givenGetJobRunReturnJobRunWithState("Cancelled"); + + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + "ALTER INDEX index_name ON table_name WITH (auto_refresh = false)", + DATASOURCE_NAME, + LangType.SQL), + asyncQueryRequestContext); + + assertEquals(QUERY_ID, response.getQueryId()); + assertNull(response.getSessionId()); + verifyGetQueryIdCalled(); + verify(flintIndexMetadataService) + .updateIndexToManualRefresh( + eq(indexName), flintIndexOptionsArgumentCaptor.capture(), eq(asyncQueryRequestContext)); + FlintIndexOptions flintIndexOptions = flintIndexOptionsArgumentCaptor.getValue(); + assertFalse(flintIndexOptions.autoRefresh()); + verifyCancelJobRunCalled(); + verifyCreateIndexDMLResultCalled(); + verifyStoreJobMetadataCalled(DML_QUERY_JOB_ID, QueryState.SUCCESS, JobType.BATCH); + } + + @Test + public void createAlterIndexQueryWithScheduler() { + givenSparkExecutionEngineConfigIsSupplied(); + givenValidDataSourceMetadataExist(); + when(queryIdProvider.getQueryId(any(), eq(asyncQueryRequestContext))).thenReturn(QUERY_ID); + + String indexName = "flint_datasource_name_table_name_index_name_index"; + givenFlintIndexMetadataExistsWithExternalScheduler(indexName); + + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + "ALTER INDEX index_name ON table_name WITH (auto_refresh = false)", + DATASOURCE_NAME, + LangType.SQL), + asyncQueryRequestContext); + + assertEquals(QUERY_ID, response.getQueryId()); + assertNull(response.getSessionId()); + verifyGetQueryIdCalled(); + + verify(flintIndexMetadataService) + .updateIndexToManualRefresh( + eq(indexName), flintIndexOptionsArgumentCaptor.capture(), eq(asyncQueryRequestContext)); + + FlintIndexOptions flintIndexOptions = flintIndexOptionsArgumentCaptor.getValue(); + assertFalse(flintIndexOptions.autoRefresh()); + + verify(asyncQueryScheduler).unscheduleJob(indexName, asyncQueryRequestContext); + verifyCreateIndexDMLResultCalled(); + verifyStoreJobMetadataCalled(DML_QUERY_JOB_ID, QueryState.SUCCESS, JobType.BATCH); + } + + @Test + public void createStreamingQuery() { + givenSparkExecutionEngineConfigIsSupplied(); + givenValidDataSourceMetadataExist(); + when(queryIdProvider.getQueryId(any(), eq(asyncQueryRequestContext))).thenReturn(QUERY_ID); + when(awsemrServerless.startJobRun(any())) + .thenReturn(new StartJobRunResult().withApplicationId(APPLICATION_ID).withJobRunId(JOB_ID)); + + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + "CREATE INDEX index_name ON table_name(l_orderkey, l_quantity)" + + " WITH (auto_refresh = true)", + DATASOURCE_NAME, + LangType.SQL), + asyncQueryRequestContext); + + assertEquals(QUERY_ID, response.getQueryId()); + assertNull(response.getSessionId()); + verifyGetQueryIdCalled(); + verify(leaseManager).borrow(leaseRequestArgumentCaptor.capture()); + assertEquals(JobType.STREAMING, leaseRequestArgumentCaptor.getValue().getJobType()); + verifyStartJobRunCalled(); + verifyStoreJobMetadataCalled(JOB_ID, QueryState.WAITING, JobType.STREAMING); + } + + @Test + public void createBatchQuery() { + givenSparkExecutionEngineConfigIsSupplied(); + givenValidDataSourceMetadataExist(); + when(queryIdProvider.getQueryId(any(), eq(asyncQueryRequestContext))).thenReturn(QUERY_ID); + when(awsemrServerless.startJobRun(any())) + .thenReturn(new StartJobRunResult().withApplicationId(APPLICATION_ID).withJobRunId(JOB_ID)); + + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + "CREATE INDEX index_name ON table_name(l_orderkey, l_quantity)" + + " WITH (auto_refresh = false)", + DATASOURCE_NAME, + LangType.SQL), + asyncQueryRequestContext); + + assertEquals(QUERY_ID, response.getQueryId()); + assertNull(response.getSessionId()); + verifyGetQueryIdCalled(); + verify(leaseManager).borrow(leaseRequestArgumentCaptor.capture()); + assertEquals(JobType.BATCH, leaseRequestArgumentCaptor.getValue().getJobType()); + verifyStartJobRunCalled(); + verifyStoreJobMetadataCalled(JOB_ID, QueryState.WAITING, JobType.BATCH); + } + + private void verifyStartJobRunCalled() { + verify(awsemrServerless).startJobRun(startJobRunRequestArgumentCaptor.capture()); + StartJobRunRequest startJobRunRequest = startJobRunRequestArgumentCaptor.getValue(); + assertEquals(APPLICATION_ID, startJobRunRequest.getApplicationId()); + String submitParameters = + startJobRunRequest.getJobDriver().getSparkSubmit().getSparkSubmitParameters(); + assertTrue( + submitParameters.contains("key.from.datasource.composer=value.from.datasource.composer")); + assertTrue(submitParameters.contains("key.from.generic.composer=value.from.generic.composer")); + } + + @Test + public void createCreateIndexQuery() { + givenSparkExecutionEngineConfigIsSupplied(); + givenValidDataSourceMetadataExist(); + when(queryIdProvider.getQueryId(any(), eq(asyncQueryRequestContext))).thenReturn(QUERY_ID); + when(awsemrServerless.startJobRun(any())) + .thenReturn(new StartJobRunResult().withApplicationId(APPLICATION_ID).withJobRunId(JOB_ID)); + + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + "CREATE INDEX index_name ON table_name(l_orderkey, l_quantity)" + + " WITH (auto_refresh = false)", + DATASOURCE_NAME, + LangType.SQL), + asyncQueryRequestContext); + + assertEquals(QUERY_ID, response.getQueryId()); + assertNull(response.getSessionId()); + verifyGetQueryIdCalled(); + verifyStartJobRunCalled(); + verifyStoreJobMetadataCalled(JOB_ID, QueryState.WAITING, JobType.BATCH); + } + + @Test + public void createRefreshQuery() { + givenSparkExecutionEngineConfigIsSupplied(); + givenValidDataSourceMetadataExist(); + when(queryIdProvider.getQueryId(any(), eq(asyncQueryRequestContext))).thenReturn(QUERY_ID); + when(awsemrServerless.startJobRun(any())) + .thenReturn(new StartJobRunResult().withApplicationId(APPLICATION_ID).withJobRunId(JOB_ID)); + + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + "REFRESH INDEX index_name ON table_name", DATASOURCE_NAME, LangType.SQL), + asyncQueryRequestContext); + + assertEquals(QUERY_ID, response.getQueryId()); + assertNull(response.getSessionId()); + verifyGetQueryIdCalled(); + verify(leaseManager).borrow(leaseRequestArgumentCaptor.capture()); + assertEquals(JobType.REFRESH, leaseRequestArgumentCaptor.getValue().getJobType()); + verifyStartJobRunCalled(); + verifyStoreJobMetadataCalled(JOB_ID, QueryState.WAITING, JobType.REFRESH); + } + + @Test + public void createInteractiveQuery() { + givenSparkExecutionEngineConfigIsSupplied(); + givenValidDataSourceMetadataExist(); + givenSessionExists(); + when(queryIdProvider.getQueryId(any(), eq(asyncQueryRequestContext))).thenReturn(QUERY_ID); + when(sessionIdProvider.getSessionId(any())).thenReturn(SESSION_ID); + givenSessionExists(); // called twice + when(awsemrServerless.startJobRun(any())) + .thenReturn(new StartJobRunResult().withApplicationId(APPLICATION_ID).withJobRunId(JOB_ID)); + + CreateAsyncQueryResponse response = + asyncQueryExecutorService.createAsyncQuery( + new CreateAsyncQueryRequest( + "SELECT * FROM table_name", DATASOURCE_NAME, LangType.SQL, SESSION_ID), + asyncQueryRequestContext); + + assertEquals(QUERY_ID, response.getQueryId()); + assertEquals(SESSION_ID, response.getSessionId()); + verifyGetQueryIdCalled(); + verifyGetSessionIdCalled(); + verify(leaseManager).borrow(leaseRequestArgumentCaptor.capture()); + assertEquals(JobType.INTERACTIVE, leaseRequestArgumentCaptor.getValue().getJobType()); + verifyStartJobRunCalled(); + verifyStoreJobMetadataCalled(JOB_ID, QueryState.WAITING, JobType.INTERACTIVE); + } + + @Test + public void getResultOfInteractiveQuery() { + givenJobMetadataExists( + getBaseAsyncQueryJobMetadataBuilder() + .queryId(QUERY_ID) + .sessionId(SESSION_ID) + .resultIndex(RESULT_INDEX)); + JSONObject result = getValidExecutionResponse(); + when(jobExecutionResponseReader.getResultWithQueryId( + QUERY_ID, RESULT_INDEX, asyncQueryRequestContext)) + .thenReturn(result); + + AsyncQueryExecutionResponse response = + asyncQueryExecutorService.getAsyncQueryResults(QUERY_ID, asyncQueryRequestContext); + + assertEquals("SUCCESS", response.getStatus()); + assertEquals(SESSION_ID, response.getSessionId()); + assertEquals("{col1:\"value\"}", response.getResults().get(0).toString()); + } + + @Test + public void getResultOfIndexDMLQuery() { + givenJobMetadataExists( + getBaseAsyncQueryJobMetadataBuilder() + .queryId(QUERY_ID) + .jobId(DROP_INDEX_JOB_ID) + .resultIndex(RESULT_INDEX)); + JSONObject result = getValidExecutionResponse(); + when(jobExecutionResponseReader.getResultWithQueryId( + QUERY_ID, RESULT_INDEX, asyncQueryRequestContext)) + .thenReturn(result); + + AsyncQueryExecutionResponse response = + asyncQueryExecutorService.getAsyncQueryResults(QUERY_ID, asyncQueryRequestContext); + + assertEquals("SUCCESS", response.getStatus()); + assertNull(response.getSessionId()); + assertEquals("{col1:\"value\"}", response.getResults().get(0).toString()); + } + + @Test + public void getResultOfRefreshQuery() { + givenJobMetadataExists( + getBaseAsyncQueryJobMetadataBuilder() + .queryId(QUERY_ID) + .jobId(JOB_ID) + .jobType(JobType.BATCH) + .resultIndex(RESULT_INDEX)); + JSONObject result = getValidExecutionResponse(); + when(jobExecutionResponseReader.getResultFromResultIndex( + AsyncQueryJobMetadata.builder() + .applicationId(APPLICATION_ID) + .queryId(QUERY_ID) + .jobId(JOB_ID) + .datasourceName(DATASOURCE_NAME) + .resultIndex(RESULT_INDEX) + .jobType(JobType.BATCH) + .metadata(ImmutableMap.of()) + .build(), + asyncQueryRequestContext)) + .thenReturn(result); + + AsyncQueryExecutionResponse response = + asyncQueryExecutorService.getAsyncQueryResults(QUERY_ID, asyncQueryRequestContext); + + assertEquals("SUCCESS", response.getStatus()); + assertNull(response.getSessionId()); + assertEquals("{col1:\"value\"}", response.getResults().get(0).toString()); + } + + @Test + public void cancelInteractiveQuery() { + givenJobMetadataExists(getBaseAsyncQueryJobMetadataBuilder().sessionId(SESSION_ID)); + givenSessionExists(); + when(sessionConfigSupplier.getSessionInactivityTimeoutMillis()).thenReturn(100000L); + final StatementModel statementModel = givenStatementExists(); + StatementModel canceledStatementModel = + StatementModel.copyWithState(statementModel, StatementState.CANCELLED, ImmutableMap.of()); + when(statementStorageService.updateStatementState( + statementModel, StatementState.CANCELLED, asyncQueryRequestContext)) + .thenReturn(canceledStatementModel); + + String result = asyncQueryExecutorService.cancelQuery(QUERY_ID, asyncQueryRequestContext); + + assertEquals(QUERY_ID, result); + verify(statementStorageService) + .updateStatementState(statementModel, StatementState.CANCELLED, asyncQueryRequestContext); + } + + @Test + public void cancelIndexDMLQuery() { + givenJobMetadataExists(getBaseAsyncQueryJobMetadataBuilder().jobId(DROP_INDEX_JOB_ID)); + + assertThrows( + IllegalArgumentException.class, + () -> asyncQueryExecutorService.cancelQuery(QUERY_ID, asyncQueryRequestContext)); + } + + @Test + public void cancelRefreshQuery() { + givenJobMetadataExists( + getBaseAsyncQueryJobMetadataBuilder().jobType(JobType.REFRESH).indexName(INDEX_NAME)); + when(flintIndexMetadataService.getFlintIndexMetadata(INDEX_NAME, asyncQueryRequestContext)) + .thenReturn( + ImmutableMap.of( + INDEX_NAME, + FlintIndexMetadata.builder() + .latestId(null) + .appId(APPLICATION_ID) + .jobId(JOB_ID) + .build())); + givenCancelJobRunSucceed(); + when(awsemrServerless.getJobRun(any())) + .thenReturn( + new GetJobRunResult() + .withJobRun(new JobRun().withJobRunId(JOB_ID).withState("Cancelled"))); + + String result = asyncQueryExecutorService.cancelQuery(QUERY_ID, asyncQueryRequestContext); + + assertEquals(QUERY_ID, result); + verifyCancelJobRunCalled(); + verifyGetJobRunRequest(); + } + + @Test + public void cancelStreamingQuery() { + givenJobMetadataExists(getBaseAsyncQueryJobMetadataBuilder().jobType(JobType.STREAMING)); + + assertThrows( + IllegalArgumentException.class, + () -> asyncQueryExecutorService.cancelQuery(QUERY_ID, asyncQueryRequestContext)); + } + + @Test + public void cancelBatchQuery() { + givenJobMetadataExists(getBaseAsyncQueryJobMetadataBuilder().jobId(JOB_ID)); + givenCancelJobRunSucceed(); + + String result = asyncQueryExecutorService.cancelQuery(QUERY_ID, asyncQueryRequestContext); + + assertEquals(QUERY_ID, result); + verifyCancelJobRunCalled(); + } + + private void givenSparkExecutionEngineConfigIsSupplied() { + when(sparkExecutionEngineConfigSupplier.getSparkExecutionEngineConfig(asyncQueryRequestContext)) + .thenReturn( + SparkExecutionEngineConfig.builder() + .applicationId(APPLICATION_ID) + .accountId(ACCOUNT_ID) + .sparkSubmitParameterModifier(sparkSubmitParameterModifier) + .build()); + } + + private void givenFlintIndexMetadataExists( + String indexName, FlintIndexOptions flintIndexOptions) { + when(flintIndexMetadataService.getFlintIndexMetadata(indexName, asyncQueryRequestContext)) + .thenReturn( + ImmutableMap.of( + indexName, + FlintIndexMetadata.builder() + .appId(APPLICATION_ID) + .jobId(JOB_ID) + .opensearchIndexName(indexName) + .flintIndexOptions(flintIndexOptions) + .build())); + } + + // Overload method for default FlintIndexOptions usage + private void givenFlintIndexMetadataExists(String indexName) { + givenFlintIndexMetadataExists(indexName, new FlintIndexOptions()); + } + + // Method to set up FlintIndexMetadata with external scheduler + private void givenFlintIndexMetadataExistsWithExternalScheduler(String indexName) { + givenFlintIndexMetadataExists(indexName, createExternalSchedulerFlintIndexOptions()); + } + + // Helper method for creating FlintIndexOptions with external scheduler + private FlintIndexOptions createExternalSchedulerFlintIndexOptions() { + FlintIndexOptions options = new FlintIndexOptions(); + options.setOption(FlintIndexOptions.SCHEDULER_MODE, "external"); + return options; + } + + private void givenValidDataSourceMetadataExist() { + when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + DATASOURCE_NAME, asyncQueryRequestContext)) + .thenReturn( + new DataSourceMetadata.Builder() + .setName(DATASOURCE_NAME) + .setConnector(DataSourceType.S3GLUE) + .setProperties( + ImmutableMap.builder() + .put(GLUE_INDEX_STORE_OPENSEARCH_URI, "https://open.search.cluster:9200/") + .put(GLUE_INDEX_STORE_OPENSEARCH_AUTH, AuthenticationType.NOAUTH.getName()) + .build()) + .build()); + } + + private void givenGetJobRunReturnJobRunWithState(String state) { + when(awsemrServerless.getJobRun(any())) + .thenReturn( + new GetJobRunResult() + .withJobRun( + new JobRun() + .withJobRunId(JOB_ID) + .withApplicationId(APPLICATION_ID) + .withState(state))); + } + + private void verifyGetQueryIdCalled() { + verify(queryIdProvider) + .getQueryId(dispatchQueryRequestArgumentCaptor.capture(), eq(asyncQueryRequestContext)); + DispatchQueryRequest dispatchQueryRequest = dispatchQueryRequestArgumentCaptor.getValue(); + assertEquals(ACCOUNT_ID, dispatchQueryRequest.getAccountId()); + assertEquals(APPLICATION_ID, dispatchQueryRequest.getApplicationId()); + } + + private void verifyGetSessionIdCalled() { + verify(sessionIdProvider).getSessionId(createSessionRequestArgumentCaptor.capture()); + CreateSessionRequest createSessionRequest = createSessionRequestArgumentCaptor.getValue(); + assertEquals(ACCOUNT_ID, createSessionRequest.getAccountId()); + assertEquals(APPLICATION_ID, createSessionRequest.getApplicationId()); + } + + private void verifyStoreJobMetadataCalled(String jobId, QueryState state, JobType jobType) { + verify(asyncQueryJobMetadataStorageService) + .storeJobMetadata( + asyncQueryJobMetadataArgumentCaptor.capture(), eq(asyncQueryRequestContext)); + AsyncQueryJobMetadata asyncQueryJobMetadata = asyncQueryJobMetadataArgumentCaptor.getValue(); + assertEquals(QUERY_ID, asyncQueryJobMetadata.getQueryId()); + assertEquals(jobId, asyncQueryJobMetadata.getJobId()); + assertEquals(DATASOURCE_NAME, asyncQueryJobMetadata.getDatasourceName()); + assertNull(asyncQueryJobMetadata.getError()); + assertEquals(LangType.SQL, asyncQueryJobMetadata.getLangType()); + assertEquals(state, asyncQueryJobMetadata.getState()); + assertEquals(jobType, asyncQueryJobMetadata.getJobType()); + } + + private void verifyCreateIndexDMLResultCalled() { + verify(indexDMLResultStorageService) + .createIndexDMLResult(indexDMLResultArgumentCaptor.capture(), eq(asyncQueryRequestContext)); + IndexDMLResult indexDMLResult = indexDMLResultArgumentCaptor.getValue(); + assertEquals(QUERY_ID, indexDMLResult.getQueryId()); + assertEquals(DATASOURCE_NAME, indexDMLResult.getDatasourceName()); + assertEquals("SUCCESS", indexDMLResult.getStatus()); + assertEquals("", indexDMLResult.getError()); + } + + private void verifyCancelJobRunCalled() { + verify(awsemrServerless).cancelJobRun(cancelJobRunRequestArgumentCaptor.capture()); + CancelJobRunRequest cancelJobRunRequest = cancelJobRunRequestArgumentCaptor.getValue(); + assertEquals(JOB_ID, cancelJobRunRequest.getJobRunId()); + assertEquals(APPLICATION_ID, cancelJobRunRequest.getApplicationId()); + } + + private void verifyGetJobRunRequest() { + verify(awsemrServerless).getJobRun(getJobRunRequestArgumentCaptor.capture()); + GetJobRunRequest getJobRunRequest = getJobRunRequestArgumentCaptor.getValue(); + assertEquals(APPLICATION_ID, getJobRunRequest.getApplicationId()); + assertEquals(JOB_ID, getJobRunRequest.getJobRunId()); + } + + private StatementModel givenStatementExists() { + StatementModel statementModel = + StatementModel.builder() + .queryId(QUERY_ID) + .statementId(new StatementId(QUERY_ID)) + .statementState(StatementState.RUNNING) + .build(); + when(statementStorageService.getStatement(QUERY_ID, DATASOURCE_NAME, asyncQueryRequestContext)) + .thenReturn(Optional.of(statementModel)); + return statementModel; + } + + private void givenSessionExists() { + when(sessionStorageService.getSession(SESSION_ID, DATASOURCE_NAME)) + .thenReturn( + Optional.of( + SessionModel.builder() + .sessionId(SESSION_ID) + .datasourceName(DATASOURCE_NAME) + .jobId(JOB_ID) + .sessionState(SessionState.RUNNING) + .build())); + } + + private AsyncQueryJobMetadataBuilder getBaseAsyncQueryJobMetadataBuilder() { + return AsyncQueryJobMetadata.builder() + .applicationId(APPLICATION_ID) + .queryId(QUERY_ID) + .datasourceName(DATASOURCE_NAME); + } + + private void givenJobMetadataExists(AsyncQueryJobMetadataBuilder metadataBuilder) { + AsyncQueryJobMetadata metadata = metadataBuilder.build(); + when(asyncQueryJobMetadataStorageService.getJobMetadata(metadata.getQueryId())) + .thenReturn(Optional.of(metadata)); + } + + private void givenCancelJobRunSucceed() { + when(awsemrServerless.cancelJobRun(any())) + .thenReturn( + new CancelJobRunResult().withJobRunId(JOB_ID).withApplicationId(APPLICATION_ID)); + } + + private static JSONObject getValidExecutionResponse() { + return new JSONObject() + .put( + "data", + new JSONObject() + .put("status", "SUCCESS") + .put( + "schema", + new JSONArray() + .put( + new JSONObject().put("column_name", "col1").put("data_type", "string"))) + .put("result", new JSONArray().put("{'col1': 'value'}"))); + } +} diff --git a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImplTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImplTest.java similarity index 79% rename from spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImplTest.java rename to async-query-core/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImplTest.java index b87fb0dad7..3177c335d9 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImplTest.java +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImplTest.java @@ -5,6 +5,7 @@ package org.opensearch.sql.spark.asyncquery; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.argThat; import static org.mockito.ArgumentMatchers.eq; @@ -33,7 +34,7 @@ import org.opensearch.sql.spark.asyncquery.model.AsyncQueryExecutionResponse; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; -import org.opensearch.sql.spark.config.OpenSearchSparkSubmitParameterModifier; +import org.opensearch.sql.spark.asyncquery.model.QueryState; import org.opensearch.sql.spark.config.SparkExecutionEngineConfig; import org.opensearch.sql.spark.config.SparkExecutionEngineConfigSupplier; import org.opensearch.sql.spark.config.SparkSubmitParameterModifier; @@ -48,6 +49,9 @@ @ExtendWith(MockitoExtension.class) public class AsyncQueryExecutorServiceImplTest { + private static final String QUERY = "select * from my_glue.default.http_logs"; + private static final String QUERY_ID = "QUERY_ID"; + @Mock private SparkQueryDispatcher sparkQueryDispatcher; @Mock private AsyncQueryJobMetadataStorageService asyncQueryJobMetadataStorageService; private AsyncQueryExecutorService jobExecutorService; @@ -55,7 +59,6 @@ public class AsyncQueryExecutorServiceImplTest { @Mock private SparkExecutionEngineConfigSupplier sparkExecutionEngineConfigSupplier; @Mock private SparkSubmitParameterModifier sparkSubmitParameterModifier; @Mock private AsyncQueryRequestContext asyncQueryRequestContext; - private final String QUERY_ID = "QUERY_ID"; @BeforeEach void setUp() { @@ -69,8 +72,7 @@ void setUp() { @Test void testCreateAsyncQuery() { CreateAsyncQueryRequest createAsyncQueryRequest = - new CreateAsyncQueryRequest( - "select * from my_glue.default.http_logs", "my_glue", LangType.SQL); + new CreateAsyncQueryRequest(QUERY, "my_glue", LangType.SQL); when(sparkExecutionEngineConfigSupplier.getSparkExecutionEngineConfig(any())) .thenReturn( SparkExecutionEngineConfig.builder() @@ -83,7 +85,7 @@ void testCreateAsyncQuery() { DispatchQueryRequest expectedDispatchQueryRequest = DispatchQueryRequest.builder() .applicationId(EMRS_APPLICATION_ID) - .query("select * from my_glue.default.http_logs") + .query(QUERY) .datasource("my_glue") .langType(LangType.SQL) .executionRoleARN(EMRS_EXECUTION_ROLE) @@ -109,13 +111,13 @@ void testCreateAsyncQuery() { .getSparkExecutionEngineConfig(asyncQueryRequestContext); verify(sparkQueryDispatcher, times(1)) .dispatch(expectedDispatchQueryRequest, asyncQueryRequestContext); - Assertions.assertEquals(QUERY_ID, createAsyncQueryResponse.getQueryId()); + assertEquals(QUERY_ID, createAsyncQueryResponse.getQueryId()); } @Test void testCreateAsyncQueryWithExtraSparkSubmitParameter() { - OpenSearchSparkSubmitParameterModifier modifier = - new OpenSearchSparkSubmitParameterModifier("--conf spark.dynamicAllocation.enabled=false"); + SparkSubmitParameterModifier modifier = + (builder) -> builder.extraParameters("--conf spark.dynamicAllocation.enabled=false"); when(sparkExecutionEngineConfigSupplier.getSparkExecutionEngineConfig(any())) .thenReturn( SparkExecutionEngineConfig.builder() @@ -135,9 +137,7 @@ void testCreateAsyncQueryWithExtraSparkSubmitParameter() { .build()); jobExecutorService.createAsyncQuery( - new CreateAsyncQueryRequest( - "select * from my_glue.default.http_logs", "my_glue", LangType.SQL), - asyncQueryRequestContext); + new CreateAsyncQueryRequest(QUERY, "my_glue", LangType.SQL), asyncQueryRequestContext); verify(sparkQueryDispatcher, times(1)) .dispatch( @@ -153,10 +153,9 @@ void testGetAsyncQueryResultsWithJobNotFoundException() { AsyncQueryNotFoundException asyncQueryNotFoundException = Assertions.assertThrows( AsyncQueryNotFoundException.class, - () -> jobExecutorService.getAsyncQueryResults(EMR_JOB_ID)); + () -> jobExecutorService.getAsyncQueryResults(EMR_JOB_ID, asyncQueryRequestContext)); - Assertions.assertEquals( - "QueryId: " + EMR_JOB_ID + " not found", asyncQueryNotFoundException.getMessage()); + assertEquals("QueryId: " + EMR_JOB_ID + " not found", asyncQueryNotFoundException.getMessage()); verifyNoInteractions(sparkQueryDispatcher); verifyNoInteractions(sparkExecutionEngineConfigSupplier); } @@ -167,14 +166,16 @@ void testGetAsyncQueryResultsWithInProgressJob() { .thenReturn(Optional.of(getAsyncQueryJobMetadata())); JSONObject jobResult = new JSONObject(); jobResult.put("status", JobRunState.PENDING.toString()); - when(sparkQueryDispatcher.getQueryResponse(getAsyncQueryJobMetadata())).thenReturn(jobResult); + when(sparkQueryDispatcher.getQueryResponse( + getAsyncQueryJobMetadata(), asyncQueryRequestContext)) + .thenReturn(jobResult); AsyncQueryExecutionResponse asyncQueryExecutionResponse = - jobExecutorService.getAsyncQueryResults(EMR_JOB_ID); + jobExecutorService.getAsyncQueryResults(EMR_JOB_ID, asyncQueryRequestContext); Assertions.assertNull(asyncQueryExecutionResponse.getResults()); Assertions.assertNull(asyncQueryExecutionResponse.getSchema()); - Assertions.assertEquals("PENDING", asyncQueryExecutionResponse.getStatus()); + assertEquals("PENDING", asyncQueryExecutionResponse.getStatus()); verifyNoInteractions(sparkExecutionEngineConfigSupplier); } @@ -184,16 +185,17 @@ void testGetAsyncQueryResultsWithSuccessJob() throws IOException { .thenReturn(Optional.of(getAsyncQueryJobMetadata())); JSONObject jobResult = new JSONObject(getJson("select_query_response.json")); jobResult.put("status", JobRunState.SUCCESS.toString()); - when(sparkQueryDispatcher.getQueryResponse(getAsyncQueryJobMetadata())).thenReturn(jobResult); + when(sparkQueryDispatcher.getQueryResponse( + getAsyncQueryJobMetadata(), asyncQueryRequestContext)) + .thenReturn(jobResult); AsyncQueryExecutionResponse asyncQueryExecutionResponse = - jobExecutorService.getAsyncQueryResults(EMR_JOB_ID); + jobExecutorService.getAsyncQueryResults(EMR_JOB_ID, asyncQueryRequestContext); - Assertions.assertEquals("SUCCESS", asyncQueryExecutionResponse.getStatus()); - Assertions.assertEquals(1, asyncQueryExecutionResponse.getSchema().getColumns().size()); - Assertions.assertEquals( - "1", asyncQueryExecutionResponse.getSchema().getColumns().get(0).getName()); - Assertions.assertEquals( + assertEquals("SUCCESS", asyncQueryExecutionResponse.getStatus()); + assertEquals(1, asyncQueryExecutionResponse.getSchema().getColumns().size()); + assertEquals("1", asyncQueryExecutionResponse.getSchema().getColumns().get(0).getName()); + assertEquals( 1, ((HashMap) asyncQueryExecutionResponse.getResults().get(0).value()) .get("1")); @@ -207,10 +209,10 @@ void testCancelJobWithJobNotFound() { AsyncQueryNotFoundException asyncQueryNotFoundException = Assertions.assertThrows( - AsyncQueryNotFoundException.class, () -> jobExecutorService.cancelQuery(EMR_JOB_ID)); + AsyncQueryNotFoundException.class, + () -> jobExecutorService.cancelQuery(EMR_JOB_ID, asyncQueryRequestContext)); - Assertions.assertEquals( - "QueryId: " + EMR_JOB_ID + " not found", asyncQueryNotFoundException.getMessage()); + assertEquals("QueryId: " + EMR_JOB_ID + " not found", asyncQueryNotFoundException.getMessage()); verifyNoInteractions(sparkQueryDispatcher); verifyNoInteractions(sparkExecutionEngineConfigSupplier); } @@ -219,11 +221,14 @@ void testCancelJobWithJobNotFound() { void testCancelJob() { when(asyncQueryJobMetadataStorageService.getJobMetadata(EMR_JOB_ID)) .thenReturn(Optional.of(getAsyncQueryJobMetadata())); - when(sparkQueryDispatcher.cancelJob(getAsyncQueryJobMetadata())).thenReturn(EMR_JOB_ID); + when(sparkQueryDispatcher.cancelJob(getAsyncQueryJobMetadata(), asyncQueryRequestContext)) + .thenReturn(EMR_JOB_ID); - String jobId = jobExecutorService.cancelQuery(EMR_JOB_ID); + String jobId = jobExecutorService.cancelQuery(EMR_JOB_ID, asyncQueryRequestContext); - Assertions.assertEquals(EMR_JOB_ID, jobId); + assertEquals(EMR_JOB_ID, jobId); + verify(asyncQueryJobMetadataStorageService) + .updateState(any(), eq(QueryState.CANCELLED), eq(asyncQueryRequestContext)); verifyNoInteractions(sparkExecutionEngineConfigSupplier); } @@ -232,6 +237,8 @@ private AsyncQueryJobMetadata getAsyncQueryJobMetadata() { .queryId(QUERY_ID) .applicationId(EMRS_APPLICATION_ID) .jobId(EMR_JOB_ID) + .query(QUERY) + .langType(LangType.SQL) .build(); } } diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/asyncquery/model/QueryStateTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/asyncquery/model/QueryStateTest.java new file mode 100644 index 0000000000..8e86e3b176 --- /dev/null +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/asyncquery/model/QueryStateTest.java @@ -0,0 +1,28 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.asyncquery.model; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.junit.jupiter.api.Test; + +class QueryStateTest { + @Test + public void testFromString() { + assertEquals(QueryState.WAITING, QueryState.fromString("waiting")); + assertEquals(QueryState.RUNNING, QueryState.fromString("running")); + assertEquals(QueryState.SUCCESS, QueryState.fromString("success")); + assertEquals(QueryState.FAILED, QueryState.fromString("failed")); + assertEquals(QueryState.CANCELLED, QueryState.fromString("cancelled")); + assertEquals(QueryState.TIMEOUT, QueryState.fromString("timeout")); + } + + @Test + public void testFromStringWithUnknownState() { + assertThrows(IllegalArgumentException.class, () -> QueryState.fromString("UNKNOWN_STATE")); + } +} diff --git a/spark/src/test/java/org/opensearch/sql/spark/client/EMRServerlessClientFactoryImplTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/client/EMRServerlessClientFactoryImplTest.java similarity index 86% rename from spark/src/test/java/org/opensearch/sql/spark/client/EMRServerlessClientFactoryImplTest.java rename to async-query-core/src/test/java/org/opensearch/sql/spark/client/EMRServerlessClientFactoryImplTest.java index 562fc84eca..309d29c600 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/client/EMRServerlessClientFactoryImplTest.java +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/client/EMRServerlessClientFactoryImplTest.java @@ -16,19 +16,24 @@ import org.opensearch.sql.spark.config.SparkExecutionEngineConfig; import org.opensearch.sql.spark.config.SparkExecutionEngineConfigSupplier; import org.opensearch.sql.spark.constants.TestConstants; +import org.opensearch.sql.spark.metrics.MetricsService; @ExtendWith(MockitoExtension.class) public class EMRServerlessClientFactoryImplTest { + public static final String ACCOUNT_ID = "accountId"; @Mock private SparkExecutionEngineConfigSupplier sparkExecutionEngineConfigSupplier; + @Mock private MetricsService metricsService; @Test public void testGetClient() { when(sparkExecutionEngineConfigSupplier.getSparkExecutionEngineConfig(any())) .thenReturn(createSparkExecutionEngineConfig()); EMRServerlessClientFactory emrServerlessClientFactory = - new EMRServerlessClientFactoryImpl(sparkExecutionEngineConfigSupplier); - EMRServerlessClient emrserverlessClient = emrServerlessClientFactory.getClient(); + new EMRServerlessClientFactoryImpl(sparkExecutionEngineConfigSupplier, metricsService); + + EMRServerlessClient emrserverlessClient = emrServerlessClientFactory.getClient(ACCOUNT_ID); + Assertions.assertNotNull(emrserverlessClient); } @@ -38,17 +43,17 @@ public void testGetClientWithChangeInSetting() { when(sparkExecutionEngineConfigSupplier.getSparkExecutionEngineConfig(any())) .thenReturn(sparkExecutionEngineConfig); EMRServerlessClientFactory emrServerlessClientFactory = - new EMRServerlessClientFactoryImpl(sparkExecutionEngineConfigSupplier); - EMRServerlessClient emrserverlessClient = emrServerlessClientFactory.getClient(); + new EMRServerlessClientFactoryImpl(sparkExecutionEngineConfigSupplier, metricsService); + EMRServerlessClient emrserverlessClient = emrServerlessClientFactory.getClient(ACCOUNT_ID); Assertions.assertNotNull(emrserverlessClient); - EMRServerlessClient emrServerlessClient1 = emrServerlessClientFactory.getClient(); + EMRServerlessClient emrServerlessClient1 = emrServerlessClientFactory.getClient(ACCOUNT_ID); Assertions.assertEquals(emrServerlessClient1, emrserverlessClient); sparkExecutionEngineConfig.setRegion(TestConstants.US_WEST_REGION); when(sparkExecutionEngineConfigSupplier.getSparkExecutionEngineConfig(any())) .thenReturn(sparkExecutionEngineConfig); - EMRServerlessClient emrServerlessClient2 = emrServerlessClientFactory.getClient(); + EMRServerlessClient emrServerlessClient2 = emrServerlessClientFactory.getClient(ACCOUNT_ID); Assertions.assertNotEquals(emrServerlessClient2, emrserverlessClient); Assertions.assertNotEquals(emrServerlessClient2, emrServerlessClient1); } @@ -57,10 +62,12 @@ public void testGetClientWithChangeInSetting() { public void testGetClientWithException() { when(sparkExecutionEngineConfigSupplier.getSparkExecutionEngineConfig(any())).thenReturn(null); EMRServerlessClientFactory emrServerlessClientFactory = - new EMRServerlessClientFactoryImpl(sparkExecutionEngineConfigSupplier); + new EMRServerlessClientFactoryImpl(sparkExecutionEngineConfigSupplier, metricsService); + IllegalArgumentException illegalArgumentException = Assertions.assertThrows( - IllegalArgumentException.class, emrServerlessClientFactory::getClient); + IllegalArgumentException.class, () -> emrServerlessClientFactory.getClient(ACCOUNT_ID)); + Assertions.assertEquals( "Async Query APIs are disabled. Please configure plugins.query.executionengine.spark.config" + " in cluster settings to enable them.", @@ -74,10 +81,12 @@ public void testGetClientWithExceptionWithNullRegion() { when(sparkExecutionEngineConfigSupplier.getSparkExecutionEngineConfig(any())) .thenReturn(sparkExecutionEngineConfig); EMRServerlessClientFactory emrServerlessClientFactory = - new EMRServerlessClientFactoryImpl(sparkExecutionEngineConfigSupplier); + new EMRServerlessClientFactoryImpl(sparkExecutionEngineConfigSupplier, metricsService); + IllegalArgumentException illegalArgumentException = Assertions.assertThrows( - IllegalArgumentException.class, emrServerlessClientFactory::getClient); + IllegalArgumentException.class, () -> emrServerlessClientFactory.getClient(ACCOUNT_ID)); + Assertions.assertEquals( "Async Query APIs are disabled. Please configure plugins.query.executionengine.spark.config" + " in cluster settings to enable them.", diff --git a/spark/src/test/java/org/opensearch/sql/spark/client/EmrServerlessClientImplTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/client/EmrServerlessClientImplTest.java similarity index 80% rename from spark/src/test/java/org/opensearch/sql/spark/client/EmrServerlessClientImplTest.java rename to async-query-core/src/test/java/org/opensearch/sql/spark/client/EmrServerlessClientImplTest.java index 9ea7e91c54..b3a2bda36a 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/client/EmrServerlessClientImplTest.java +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/client/EmrServerlessClientImplTest.java @@ -4,9 +4,7 @@ package org.opensearch.sql.spark.client; -import static java.util.Collections.emptyList; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -32,43 +30,35 @@ import java.util.List; import org.apache.commons.lang3.RandomStringUtils; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.ArgumentCaptor; import org.mockito.Captor; +import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import org.opensearch.sql.common.setting.Settings; -import org.opensearch.sql.legacy.esdomain.LocalClusterState; -import org.opensearch.sql.legacy.metrics.Metrics; -import org.opensearch.sql.opensearch.setting.OpenSearchSettings; -import org.opensearch.sql.spark.asyncquery.model.SparkSubmitParameters; +import org.opensearch.sql.spark.metrics.MetricsService; +import org.opensearch.sql.spark.parameter.SparkParameterComposerCollection; +import org.opensearch.sql.spark.parameter.SparkSubmitParametersBuilder; @ExtendWith(MockitoExtension.class) public class EmrServerlessClientImplTest { @Mock private AWSEMRServerless emrServerless; - - @Mock private OpenSearchSettings settings; + @Mock private MetricsService metricsService; @Captor private ArgumentCaptor startJobRunRequestArgumentCaptor; - @BeforeEach - public void setUp() { - doReturn(emptyList()).when(settings).getSettings(); - when(settings.getSettingValue(Settings.Key.METRICS_ROLLING_INTERVAL)).thenReturn(3600L); - when(settings.getSettingValue(Settings.Key.METRICS_ROLLING_WINDOW)).thenReturn(600L); - LocalClusterState.state().setPluginSettings(settings); - Metrics.getInstance().registerDefaultMetrics(); - } + @InjectMocks EmrServerlessClientImpl emrServerlessClient; @Test void testStartJobRun() { StartJobRunResult response = new StartJobRunResult(); when(emrServerless.startJobRun(any())).thenReturn(response); - EmrServerlessClientImpl emrServerlessClient = new EmrServerlessClientImpl(emrServerless); - String parameters = SparkSubmitParameters.builder().query(QUERY).build().toString(); + String parameters = + new SparkSubmitParametersBuilder(new SparkParameterComposerCollection()) + .query(QUERY) + .toString(); emrServerlessClient.startJobRun( new StartJobRequest( @@ -102,7 +92,6 @@ void testStartJobRunWithErrorMetric() { doThrow(new AWSEMRServerlessException("Couldn't start job")) .when(emrServerless) .startJobRun(any()); - EmrServerlessClientImpl emrServerlessClient = new EmrServerlessClientImpl(emrServerless); RuntimeException runtimeException = Assertions.assertThrows( RuntimeException.class, @@ -125,7 +114,6 @@ void testStartJobRunResultIndex() { StartJobRunResult response = new StartJobRunResult(); when(emrServerless.startJobRun(any())).thenReturn(response); - EmrServerlessClientImpl emrServerlessClient = new EmrServerlessClientImpl(emrServerless); emrServerlessClient.startJobRun( new StartJobRequest( EMRS_JOB_NAME, @@ -145,14 +133,12 @@ void testGetJobRunState() { GetJobRunResult response = new GetJobRunResult(); response.setJobRun(jobRun); when(emrServerless.getJobRun(any())).thenReturn(response); - EmrServerlessClientImpl emrServerlessClient = new EmrServerlessClientImpl(emrServerless); emrServerlessClient.getJobRunResult(EMRS_APPLICATION_ID, "123"); } @Test void testGetJobRunStateWithErrorMetric() { doThrow(new ValidationException("Not a good job")).when(emrServerless).getJobRun(any()); - EmrServerlessClientImpl emrServerlessClient = new EmrServerlessClientImpl(emrServerless); RuntimeException runtimeException = Assertions.assertThrows( RuntimeException.class, @@ -164,16 +150,17 @@ void testGetJobRunStateWithErrorMetric() { void testCancelJobRun() { when(emrServerless.cancelJobRun(any())) .thenReturn(new CancelJobRunResult().withJobRunId(EMR_JOB_ID)); - EmrServerlessClientImpl emrServerlessClient = new EmrServerlessClientImpl(emrServerless); + CancelJobRunResult cancelJobRunResult = emrServerlessClient.cancelJobRun(EMRS_APPLICATION_ID, EMR_JOB_ID, false); + Assertions.assertEquals(EMR_JOB_ID, cancelJobRunResult.getJobRunId()); } @Test void testCancelJobRunWithErrorMetric() { doThrow(new RuntimeException()).when(emrServerless).cancelJobRun(any()); - EmrServerlessClientImpl emrServerlessClient = new EmrServerlessClientImpl(emrServerless); + Assertions.assertThrows( RuntimeException.class, () -> emrServerlessClient.cancelJobRun(EMRS_APPLICATION_ID, "123", false)); @@ -182,22 +169,26 @@ void testCancelJobRunWithErrorMetric() { @Test void testCancelJobRunWithValidationException() { doThrow(new ValidationException("Error")).when(emrServerless).cancelJobRun(any()); - EmrServerlessClientImpl emrServerlessClient = new EmrServerlessClientImpl(emrServerless); + RuntimeException runtimeException = Assertions.assertThrows( - RuntimeException.class, + IllegalArgumentException.class, () -> emrServerlessClient.cancelJobRun(EMRS_APPLICATION_ID, EMR_JOB_ID, false)); - Assertions.assertEquals("Internal Server Error.", runtimeException.getMessage()); + + Assertions.assertEquals( + "The input fails to satisfy the constraints specified by AWS EMR Serverless.", + runtimeException.getMessage()); } @Test void testCancelJobRunWithNativeEMRExceptionWithValidationException() { doThrow(new ValidationException("Error")).when(emrServerless).cancelJobRun(any()); - EmrServerlessClientImpl emrServerlessClient = new EmrServerlessClientImpl(emrServerless); + ValidationException validationException = Assertions.assertThrows( ValidationException.class, () -> emrServerlessClient.cancelJobRun(EMRS_APPLICATION_ID, EMR_JOB_ID, true)); + Assertions.assertTrue(validationException.getMessage().contains("Error")); } @@ -205,9 +196,10 @@ void testCancelJobRunWithNativeEMRExceptionWithValidationException() { void testCancelJobRunWithNativeEMRException() { when(emrServerless.cancelJobRun(any())) .thenReturn(new CancelJobRunResult().withJobRunId(EMR_JOB_ID)); - EmrServerlessClientImpl emrServerlessClient = new EmrServerlessClientImpl(emrServerless); + CancelJobRunResult cancelJobRunResult = emrServerlessClient.cancelJobRun(EMRS_APPLICATION_ID, EMR_JOB_ID, true); + Assertions.assertEquals(EMR_JOB_ID, cancelJobRunResult.getJobRunId()); } @@ -216,7 +208,6 @@ void testStartJobRunWithLongJobName() { StartJobRunResult response = new StartJobRunResult(); when(emrServerless.startJobRun(any())).thenReturn(response); - EmrServerlessClientImpl emrServerlessClient = new EmrServerlessClientImpl(emrServerless); emrServerlessClient.startJobRun( new StartJobRequest( RandomStringUtils.random(300), @@ -227,6 +218,7 @@ void testStartJobRunWithLongJobName() { new HashMap<>(), false, DEFAULT_RESULT_INDEX)); + verify(emrServerless, times(1)).startJobRun(startJobRunRequestArgumentCaptor.capture()); StartJobRunRequest startJobRunRequest = startJobRunRequestArgumentCaptor.getValue(); Assertions.assertEquals(255, startJobRunRequest.getName().length()); @@ -235,7 +227,6 @@ void testStartJobRunWithLongJobName() { @Test void testStartJobRunThrowsValidationException() { when(emrServerless.startJobRun(any())).thenThrow(new ValidationException("Unmatched quote")); - EmrServerlessClientImpl emrServerlessClient = new EmrServerlessClientImpl(emrServerless); IllegalArgumentException exception = Assertions.assertThrows( diff --git a/spark/src/test/java/org/opensearch/sql/spark/client/StartJobRequestTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/client/StartJobRequestTest.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/client/StartJobRequestTest.java rename to async-query-core/src/test/java/org/opensearch/sql/spark/client/StartJobRequestTest.java diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/constants/TestConstants.java b/async-query-core/src/test/java/org/opensearch/sql/spark/constants/TestConstants.java new file mode 100644 index 0000000000..295c74dcee --- /dev/null +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/constants/TestConstants.java @@ -0,0 +1,23 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.constants; + +public class TestConstants { + public static final String QUERY = "select 1"; + public static final String EMR_JOB_ID = "job-123xxx"; + public static final String EMRS_APPLICATION_ID = "app-xxxxx"; + public static final String EMRS_EXECUTION_ROLE = "execution_role"; + public static final String EMRS_JOB_NAME = "job_name"; + public static final String SPARK_SUBMIT_PARAMETERS = "--conf org.flint.sql.SQLJob"; + public static final String TEST_CLUSTER_NAME = "TEST_CLUSTER"; + public static final String MOCK_SESSION_ID = "s-0123456"; + public static final String MOCK_STATEMENT_ID = "st-0123456"; + public static final String ENTRY_POINT_START_JAR = + "file:///home/hadoop/.ivy2/jars/org.opensearch_opensearch-spark-sql-application_2.12-0.3.0-SNAPSHOT.jar"; + public static final String DEFAULT_RESULT_INDEX = "query_execution_result_ds1"; + public static final String US_EAST_REGION = "us-east-1"; + public static final String US_WEST_REGION = "us-west-1"; +} diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/dispatcher/DatasourceEmbeddedQueryIdProviderTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/dispatcher/DatasourceEmbeddedQueryIdProviderTest.java new file mode 100644 index 0000000000..2cd50d755d --- /dev/null +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/dispatcher/DatasourceEmbeddedQueryIdProviderTest.java @@ -0,0 +1,35 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.dispatcher; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.mockito.Mockito.verifyNoInteractions; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; + +@ExtendWith(MockitoExtension.class) +class DatasourceEmbeddedQueryIdProviderTest { + @Mock AsyncQueryRequestContext asyncQueryRequestContext; + + final DatasourceEmbeddedQueryIdProvider datasourceEmbeddedQueryIdProvider = + new DatasourceEmbeddedQueryIdProvider(); + + @Test + public void test() { + String queryId = + datasourceEmbeddedQueryIdProvider.getQueryId( + DispatchQueryRequest.builder().datasource("DATASOURCE").build(), + asyncQueryRequestContext); + + assertNotNull(queryId); + verifyNoInteractions(asyncQueryRequestContext); + } +} diff --git a/spark/src/test/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandlerTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandlerTest.java similarity index 88% rename from spark/src/test/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandlerTest.java rename to async-query-core/src/test/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandlerTest.java index 877d6ec32b..570a7cab7d 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandlerTest.java +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/dispatcher/IndexDMLHandlerTest.java @@ -7,6 +7,7 @@ import static org.junit.jupiter.api.Assertions.*; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import static org.opensearch.sql.datasource.model.DataSourceStatus.ACTIVE; @@ -27,6 +28,7 @@ import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.datasource.model.DataSourceMetadata; import org.opensearch.sql.datasource.model.DataSourceType; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.config.SparkSubmitParameterModifier; import org.opensearch.sql.spark.dispatcher.model.DispatchQueryContext; import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; @@ -50,6 +52,7 @@ class IndexDMLHandlerTest { @Mock private IndexDMLResultStorageService indexDMLResultStorageService; @Mock private FlintIndexOpFactory flintIndexOpFactory; @Mock private SparkSubmitParameterModifier sparkSubmitParameterModifier; + @Mock private AsyncQueryRequestContext asyncQueryRequestContext; @InjectMocks IndexDMLHandler indexDMLHandler; @@ -63,7 +66,9 @@ class IndexDMLHandlerTest { @Test public void getResponseFromExecutor() { - JSONObject result = new IndexDMLHandler(null, null, null, null).getResponseFromExecutor(null); + JSONObject result = + new IndexDMLHandler(null, null, null, null) + .getResponseFromExecutor(null, asyncQueryRequestContext); assertEquals("running", result.getString(STATUS_FIELD)); assertEquals("", result.getString(ERROR_FIELD)); @@ -82,8 +87,10 @@ public void testWhenIndexDetailsAreNotFound() { .queryId(QUERY_ID) .dataSourceMetadata(metadata) .indexQueryDetails(indexQueryDetails) + .asyncQueryRequestContext(asyncQueryRequestContext) .build(); - Mockito.when(flintIndexMetadataService.getFlintIndexMetadata(any())) + Mockito.when( + flintIndexMetadataService.getFlintIndexMetadata(any(), eq(asyncQueryRequestContext))) .thenReturn(new HashMap<>()); DispatchQueryResponse dispatchQueryResponse = @@ -107,10 +114,12 @@ public void testWhenIndexDetailsWithInvalidQueryActionType() { .queryId(QUERY_ID) .dataSourceMetadata(metadata) .indexQueryDetails(indexQueryDetails) + .asyncQueryRequestContext(asyncQueryRequestContext) .build(); HashMap flintMetadataMap = new HashMap<>(); flintMetadataMap.put(indexQueryDetails.openSearchIndexName(), flintIndexMetadata); - when(flintIndexMetadataService.getFlintIndexMetadata(indexQueryDetails.openSearchIndexName())) + when(flintIndexMetadataService.getFlintIndexMetadata( + indexQueryDetails.openSearchIndexName(), asyncQueryRequestContext)) .thenReturn(flintMetadataMap); indexDMLHandler.submit(dispatchQueryRequest, dispatchQueryContext); diff --git a/spark/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java similarity index 55% rename from spark/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java rename to async-query-core/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java index 199582dde7..b5ea349045 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/dispatcher/SparkQueryDispatcherTest.java @@ -5,6 +5,7 @@ package org.opensearch.sql.spark.dispatcher; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.Answers.RETURNS_DEEP_STUBS; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.argThat; @@ -27,9 +28,10 @@ import static org.opensearch.sql.spark.constants.TestConstants.TEST_CLUSTER_NAME; import static org.opensearch.sql.spark.data.constants.SparkConstants.DATA_FIELD; import static org.opensearch.sql.spark.data.constants.SparkConstants.ERROR_FIELD; -import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INDEX_STORE_AUTH_PASSWORD; -import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INDEX_STORE_AUTH_USERNAME; -import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INDEX_STORE_AWSREGION_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INDEX_STORE_AUTH_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INDEX_STORE_HOST_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INDEX_STORE_PORT_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INDEX_STORE_SCHEME_KEY; import static org.opensearch.sql.spark.data.constants.SparkConstants.STATUS_FIELD; import static org.opensearch.sql.spark.dispatcher.SparkQueryDispatcher.CLUSTER_NAME_TAG_KEY; import static org.opensearch.sql.spark.dispatcher.SparkQueryDispatcher.DATASOURCE_TAG_KEY; @@ -40,11 +42,15 @@ import com.amazonaws.services.emrserverless.model.GetJobRunResult; import com.amazonaws.services.emrserverless.model.JobRun; import com.amazonaws.services.emrserverless.model.JobRunState; +import com.google.common.collect.ImmutableMap; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.stream.Collectors; import org.json.JSONObject; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; @@ -75,13 +81,28 @@ import org.opensearch.sql.spark.flint.IndexDMLResultStorageService; import org.opensearch.sql.spark.flint.operation.FlintIndexOpFactory; import org.opensearch.sql.spark.leasemanager.LeaseManager; +import org.opensearch.sql.spark.metrics.MetricsService; +import org.opensearch.sql.spark.parameter.DataSourceSparkParameterComposer; +import org.opensearch.sql.spark.parameter.GeneralSparkParameterComposer; +import org.opensearch.sql.spark.parameter.SparkParameterComposerCollection; +import org.opensearch.sql.spark.parameter.SparkSubmitParametersBuilderProvider; import org.opensearch.sql.spark.response.JobExecutionResponseReader; import org.opensearch.sql.spark.rest.model.LangType; +import org.opensearch.sql.spark.scheduler.AsyncQueryScheduler; +import org.opensearch.sql.spark.validator.DefaultGrammarElementValidator; +import org.opensearch.sql.spark.validator.GrammarElementValidatorProvider; +import org.opensearch.sql.spark.validator.PPLQueryValidator; +import org.opensearch.sql.spark.validator.S3GlueSQLGrammarElementValidator; +import org.opensearch.sql.spark.validator.SQLQueryValidator; @ExtendWith(MockitoExtension.class) public class SparkQueryDispatcherTest { public static final String MY_GLUE = "my_glue"; + public static final String KEY_FROM_COMPOSER = "key.from.composer"; + public static final String VALUE_FROM_COMPOSER = "value.from.composer"; + public static final String KEY_FROM_DATASOURCE_COMPOSER = "key.from.datasource.composer"; + public static final String VALUE_FROM_DATASOURCE_COMPOSER = "value.from.datasource.composer"; @Mock private EMRServerlessClient emrServerlessClient; @Mock private EMRServerlessClientFactory emrServerlessClientFactory; @Mock private DataSourceService dataSourceService; @@ -94,6 +115,36 @@ public class SparkQueryDispatcherTest { @Mock private SparkSubmitParameterModifier sparkSubmitParameterModifier; @Mock private QueryIdProvider queryIdProvider; @Mock private AsyncQueryRequestContext asyncQueryRequestContext; + @Mock private MetricsService metricsService; + @Mock private AsyncQueryScheduler asyncQueryScheduler; + + private final SQLQueryValidator sqlQueryValidator = + new SQLQueryValidator( + new GrammarElementValidatorProvider( + ImmutableMap.of(DataSourceType.S3GLUE, new S3GlueSQLGrammarElementValidator()), + new DefaultGrammarElementValidator())); + + private final PPLQueryValidator pplQueryValidator = + new PPLQueryValidator( + new GrammarElementValidatorProvider( + ImmutableMap.of(), new DefaultGrammarElementValidator())); + + private final DataSourceSparkParameterComposer dataSourceSparkParameterComposer = + (datasourceMetadata, sparkSubmitParameters, dispatchQueryRequest, context) -> { + sparkSubmitParameters.setConfigItem(FLINT_INDEX_STORE_AUTH_KEY, "basic"); + sparkSubmitParameters.setConfigItem(FLINT_INDEX_STORE_HOST_KEY, "HOST"); + sparkSubmitParameters.setConfigItem(FLINT_INDEX_STORE_PORT_KEY, "PORT"); + sparkSubmitParameters.setConfigItem(FLINT_INDEX_STORE_SCHEME_KEY, "SCHEMA"); + sparkSubmitParameters.setConfigItem( + KEY_FROM_DATASOURCE_COMPOSER, VALUE_FROM_DATASOURCE_COMPOSER); + }; + + private final GeneralSparkParameterComposer generalSparkParameterComposer = + (sparkSubmitParameters, dispatchQueryRequest, context) -> { + sparkSubmitParameters.setConfigItem(KEY_FROM_COMPOSER, VALUE_FROM_COMPOSER); + }; + + private SparkSubmitParametersBuilderProvider sparkSubmitParametersBuilderProvider; @Mock(answer = RETURNS_DEEP_STUBS) private Session session; @@ -109,6 +160,10 @@ public class SparkQueryDispatcherTest { @BeforeEach void setUp() { + SparkParameterComposerCollection collection = new SparkParameterComposerCollection(); + collection.register(DataSourceType.S3GLUE, dataSourceSparkParameterComposer); + collection.register(generalSparkParameterComposer); + sparkSubmitParametersBuilderProvider = new SparkSubmitParametersBuilderProvider(collection); QueryHandlerFactory queryHandlerFactory = new QueryHandlerFactory( jobExecutionResponseReader, @@ -117,183 +172,27 @@ void setUp() { leaseManager, indexDMLResultStorageService, flintIndexOpFactory, - emrServerlessClientFactory); + emrServerlessClientFactory, + metricsService, + sparkSubmitParametersBuilderProvider); sparkQueryDispatcher = new SparkQueryDispatcher( - dataSourceService, sessionManager, queryHandlerFactory, queryIdProvider); + dataSourceService, + sessionManager, + queryHandlerFactory, + queryIdProvider, + sqlQueryValidator, + pplQueryValidator); } @Test void testDispatchSelectQuery() { - when(emrServerlessClientFactory.getClient()).thenReturn(emrServerlessClient); - HashMap tags = new HashMap<>(); - tags.put(DATASOURCE_TAG_KEY, MY_GLUE); - tags.put(CLUSTER_NAME_TAG_KEY, TEST_CLUSTER_NAME); - tags.put(JOB_TYPE_TAG_KEY, JobType.BATCH.getText()); - String query = "select * from my_glue.default.http_logs"; - String sparkSubmitParameters = - constructExpectedSparkSubmitParameterString( - "sigv4", - new HashMap<>() { - { - put(FLINT_INDEX_STORE_AWSREGION_KEY, "eu-west-1"); - } - }, - query); - StartJobRequest expected = - new StartJobRequest( - "TEST_CLUSTER:batch", - null, - EMRS_APPLICATION_ID, - EMRS_EXECUTION_ROLE, - sparkSubmitParameters, - tags, - false, - "query_execution_result_my_glue"); - when(emrServerlessClient.startJobRun(expected)).thenReturn(EMR_JOB_ID); - DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata(MY_GLUE)) - .thenReturn(dataSourceMetadata); - - DispatchQueryResponse dispatchQueryResponse = - sparkQueryDispatcher.dispatch( - DispatchQueryRequest.builder() - .applicationId(EMRS_APPLICATION_ID) - .query(query) - .datasource(MY_GLUE) - .langType(LangType.SQL) - .executionRoleARN(EMRS_EXECUTION_ROLE) - .clusterName(TEST_CLUSTER_NAME) - .sparkSubmitParameterModifier(sparkSubmitParameterModifier) - .build(), - asyncQueryRequestContext); - - verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); - Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); - Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - verifyNoInteractions(flintIndexMetadataService); - } - - @Test - void testDispatchSelectQueryWithLakeFormation() { - when(emrServerlessClientFactory.getClient()).thenReturn(emrServerlessClient); - HashMap tags = new HashMap<>(); - tags.put(DATASOURCE_TAG_KEY, MY_GLUE); - tags.put(CLUSTER_NAME_TAG_KEY, TEST_CLUSTER_NAME); - tags.put(JOB_TYPE_TAG_KEY, JobType.BATCH.getText()); - String query = "select * from my_glue.default.http_logs"; - String sparkSubmitParameters = - constructExpectedSparkSubmitParameterString( - "sigv4", - new HashMap<>() { - { - put(FLINT_INDEX_STORE_AWSREGION_KEY, "eu-west-1"); - } - }, - query, - true); - StartJobRequest expected = - new StartJobRequest( - "TEST_CLUSTER:batch", - null, - EMRS_APPLICATION_ID, - EMRS_EXECUTION_ROLE, - sparkSubmitParameters, - tags, - false, - "query_execution_result_my_glue"); - when(emrServerlessClient.startJobRun(expected)).thenReturn(EMR_JOB_ID); - DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadataWithLakeFormation(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata(MY_GLUE)) - .thenReturn(dataSourceMetadata); - - DispatchQueryResponse dispatchQueryResponse = - sparkQueryDispatcher.dispatch(getBaseDispatchQueryRequest(query), asyncQueryRequestContext); - verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); - Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); - Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - verifyNoInteractions(flintIndexMetadataService); + testDispatchBatchQuery("select * from my_glue.default.http_logs"); } @Test void testDispatchSelectQueryWithBasicAuthIndexStoreDatasource() { - when(emrServerlessClientFactory.getClient()).thenReturn(emrServerlessClient); - HashMap tags = new HashMap<>(); - tags.put(DATASOURCE_TAG_KEY, MY_GLUE); - tags.put(CLUSTER_NAME_TAG_KEY, TEST_CLUSTER_NAME); - tags.put(JOB_TYPE_TAG_KEY, JobType.BATCH.getText()); - String query = "select * from my_glue.default.http_logs"; - String sparkSubmitParameters = - constructExpectedSparkSubmitParameterString( - "basic", - new HashMap<>() { - { - put(FLINT_INDEX_STORE_AUTH_USERNAME, "username"); - put(FLINT_INDEX_STORE_AUTH_PASSWORD, "password"); - } - }, - query); - StartJobRequest expected = - new StartJobRequest( - "TEST_CLUSTER:batch", - null, - EMRS_APPLICATION_ID, - EMRS_EXECUTION_ROLE, - sparkSubmitParameters, - tags, - false, - "query_execution_result_my_glue"); - when(emrServerlessClient.startJobRun(expected)).thenReturn(EMR_JOB_ID); - DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadataWithBasicAuth(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata(MY_GLUE)) - .thenReturn(dataSourceMetadata); - - DispatchQueryResponse dispatchQueryResponse = - sparkQueryDispatcher.dispatch(getBaseDispatchQueryRequest(query), asyncQueryRequestContext); - - verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); - Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); - Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - verifyNoInteractions(flintIndexMetadataService); - } - - @Test - void testDispatchSelectQueryWithNoAuthIndexStoreDatasource() { - when(emrServerlessClientFactory.getClient()).thenReturn(emrServerlessClient); - HashMap tags = new HashMap<>(); - tags.put(DATASOURCE_TAG_KEY, MY_GLUE); - tags.put(CLUSTER_NAME_TAG_KEY, TEST_CLUSTER_NAME); - tags.put(JOB_TYPE_TAG_KEY, JobType.BATCH.getText()); - String query = "select * from my_glue.default.http_logs"; - String sparkSubmitParameters = - constructExpectedSparkSubmitParameterString( - "noauth", - new HashMap<>() { - { - } - }, - query); - StartJobRequest expected = - new StartJobRequest( - "TEST_CLUSTER:batch", - null, - EMRS_APPLICATION_ID, - EMRS_EXECUTION_ROLE, - sparkSubmitParameters, - tags, - false, - "query_execution_result_my_glue"); - when(emrServerlessClient.startJobRun(expected)).thenReturn(EMR_JOB_ID); - DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadataWithNoAuth(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata(MY_GLUE)) - .thenReturn(dataSourceMetadata); - - DispatchQueryResponse dispatchQueryResponse = - sparkQueryDispatcher.dispatch(getBaseDispatchQueryRequest(query), asyncQueryRequestContext); - verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); - Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); - Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - verifyNoInteractions(flintIndexMetadataService); + testDispatchBatchQuery("select * from my_glue.default.http_logs"); } @Test @@ -307,7 +206,8 @@ void testDispatchSelectQueryCreateNewSession() { doReturn(new StatementId(MOCK_STATEMENT_ID)).when(session).submit(any(), any()); when(session.getSessionModel().getJobId()).thenReturn(EMR_JOB_ID); DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata(MY_GLUE)) + when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + MY_GLUE, asyncQueryRequestContext)) .thenReturn(dataSourceMetadata); DispatchQueryResponse dispatchQueryResponse = @@ -315,8 +215,8 @@ void testDispatchSelectQueryCreateNewSession() { verifyNoInteractions(emrServerlessClient); verify(sessionManager, never()).getSession(any(), any()); - Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - Assertions.assertEquals(MOCK_SESSION_ID, dispatchQueryResponse.getSessionId()); + assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); + assertEquals(MOCK_SESSION_ID, dispatchQueryResponse.getSessionId()); } @Test @@ -333,7 +233,8 @@ void testDispatchSelectQueryReuseSession() { when(session.getSessionModel().getJobId()).thenReturn(EMR_JOB_ID); when(session.isOperationalForDataSource(any())).thenReturn(true); DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata(MY_GLUE)) + when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + MY_GLUE, asyncQueryRequestContext)) .thenReturn(dataSourceMetadata); DispatchQueryResponse dispatchQueryResponse = @@ -341,8 +242,8 @@ void testDispatchSelectQueryReuseSession() { verifyNoInteractions(emrServerlessClient); verify(sessionManager, never()).createSession(any(), any()); - Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - Assertions.assertEquals(MOCK_SESSION_ID, dispatchQueryResponse.getSessionId()); + assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); + assertEquals(MOCK_SESSION_ID, dispatchQueryResponse.getSessionId()); } @Test @@ -353,7 +254,8 @@ void testDispatchSelectQueryFailedCreateSession() { doReturn(true).when(sessionManager).isEnabled(); doThrow(RuntimeException.class).when(sessionManager).createSession(any(), any()); DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata(MY_GLUE)) + when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + MY_GLUE, asyncQueryRequestContext)) .thenReturn(dataSourceMetadata); Assertions.assertThrows( @@ -365,7 +267,8 @@ void testDispatchSelectQueryFailedCreateSession() { @Test void testDispatchCreateAutoRefreshIndexQuery() { - when(emrServerlessClientFactory.getClient()).thenReturn(emrServerlessClient); + when(emrServerlessClientFactory.getClient(any())).thenReturn(emrServerlessClient); + when(queryIdProvider.getQueryId(any(), any())).thenReturn(QUERY_ID); HashMap tags = new HashMap<>(); tags.put(DATASOURCE_TAG_KEY, MY_GLUE); tags.put(INDEX_TAG_KEY, "flint_my_glue_default_http_logs_elb_and_requesturi_index"); @@ -375,15 +278,7 @@ void testDispatchCreateAutoRefreshIndexQuery() { "CREATE INDEX elb_and_requestUri ON my_glue.default.http_logs(l_orderkey, l_quantity) WITH" + " (auto_refresh = true)"; String sparkSubmitParameters = - withStructuredStreaming( - constructExpectedSparkSubmitParameterString( - "sigv4", - new HashMap<>() { - { - put(FLINT_INDEX_STORE_AWSREGION_KEY, "eu-west-1"); - } - }, - query)); + constructExpectedSparkSubmitParameterString(query, "streaming", QUERY_ID); StartJobRequest expected = new StartJobRequest( "TEST_CLUSTER:streaming:flint_my_glue_default_http_logs_elb_and_requesturi_index", @@ -396,78 +291,37 @@ void testDispatchCreateAutoRefreshIndexQuery() { "query_execution_result_my_glue"); when(emrServerlessClient.startJobRun(expected)).thenReturn(EMR_JOB_ID); DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata(MY_GLUE)) + when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + MY_GLUE, asyncQueryRequestContext)) .thenReturn(dataSourceMetadata); DispatchQueryResponse dispatchQueryResponse = sparkQueryDispatcher.dispatch(getBaseDispatchQueryRequest(query), asyncQueryRequestContext); verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); - Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); - Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); + assertEquals(expected, startJobRequestArgumentCaptor.getValue()); + assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); verifyNoInteractions(flintIndexMetadataService); } @Test void testDispatchCreateManualRefreshIndexQuery() { - when(emrServerlessClientFactory.getClient()).thenReturn(emrServerlessClient); - HashMap tags = new HashMap<>(); - tags.put(DATASOURCE_TAG_KEY, "my_glue"); - tags.put(CLUSTER_NAME_TAG_KEY, TEST_CLUSTER_NAME); - tags.put(JOB_TYPE_TAG_KEY, JobType.BATCH.getText()); - String query = + testDispatchBatchQuery( "CREATE INDEX elb_and_requestUri ON my_glue.default.http_logs(l_orderkey, l_quantity) WITH" - + " (auto_refresh = false)"; - String sparkSubmitParameters = - constructExpectedSparkSubmitParameterString( - "sigv4", - new HashMap<>() { - { - put(FLINT_INDEX_STORE_AWSREGION_KEY, "eu-west-1"); - } - }, - query); - StartJobRequest expected = - new StartJobRequest( - "TEST_CLUSTER:batch", - null, - EMRS_APPLICATION_ID, - EMRS_EXECUTION_ROLE, - sparkSubmitParameters, - tags, - false, - "query_execution_result_my_glue"); - when(emrServerlessClient.startJobRun(expected)).thenReturn(EMR_JOB_ID); - DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata("my_glue")) - .thenReturn(dataSourceMetadata); - - DispatchQueryResponse dispatchQueryResponse = - sparkQueryDispatcher.dispatch(getBaseDispatchQueryRequest(query), asyncQueryRequestContext); - - verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); - Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); - Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - verifyNoInteractions(flintIndexMetadataService); + + " (auto_refresh = false)"); } @Test void testDispatchWithPPLQuery() { - when(emrServerlessClientFactory.getClient()).thenReturn(emrServerlessClient); + when(emrServerlessClientFactory.getClient(any())).thenReturn(emrServerlessClient); + when(queryIdProvider.getQueryId(any(), any())).thenReturn(QUERY_ID); HashMap tags = new HashMap<>(); tags.put(DATASOURCE_TAG_KEY, MY_GLUE); tags.put(CLUSTER_NAME_TAG_KEY, TEST_CLUSTER_NAME); tags.put(JOB_TYPE_TAG_KEY, JobType.BATCH.getText()); String query = "source = my_glue.default.http_logs"; String sparkSubmitParameters = - constructExpectedSparkSubmitParameterString( - "sigv4", - new HashMap<>() { - { - put(FLINT_INDEX_STORE_AWSREGION_KEY, "eu-west-1"); - } - }, - query); + constructExpectedSparkSubmitParameterString(query, null, QUERY_ID); StartJobRequest expected = new StartJobRequest( "TEST_CLUSTER:batch", @@ -480,7 +334,8 @@ void testDispatchWithPPLQuery() { "query_execution_result_my_glue"); when(emrServerlessClient.startJobRun(expected)).thenReturn(EMR_JOB_ID); DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata(MY_GLUE)) + when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + MY_GLUE, asyncQueryRequestContext)) .thenReturn(dataSourceMetadata); DispatchQueryResponse dispatchQueryResponse = @@ -489,55 +344,47 @@ void testDispatchWithPPLQuery() { asyncQueryRequestContext); verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); - Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); - Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); + assertEquals(expected, startJobRequestArgumentCaptor.getValue()); + assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); verifyNoInteractions(flintIndexMetadataService); } @Test - void testDispatchQueryWithoutATableAndDataSourceName() { - when(emrServerlessClientFactory.getClient()).thenReturn(emrServerlessClient); - HashMap tags = new HashMap<>(); - tags.put(DATASOURCE_TAG_KEY, MY_GLUE); - tags.put(CLUSTER_NAME_TAG_KEY, TEST_CLUSTER_NAME); - tags.put(JOB_TYPE_TAG_KEY, JobType.BATCH.getText()); - String query = "show tables"; - String sparkSubmitParameters = - constructExpectedSparkSubmitParameterString( - "sigv4", - new HashMap<>() { - { - put(FLINT_INDEX_STORE_AWSREGION_KEY, "eu-west-1"); - } - }, - query); - StartJobRequest expected = - new StartJobRequest( - "TEST_CLUSTER:batch", - null, - EMRS_APPLICATION_ID, - EMRS_EXECUTION_ROLE, - sparkSubmitParameters, - tags, - false, - "query_execution_result_my_glue"); - when(emrServerlessClient.startJobRun(expected)).thenReturn(EMR_JOB_ID); - DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata(MY_GLUE)) - .thenReturn(dataSourceMetadata); - - DispatchQueryResponse dispatchQueryResponse = - sparkQueryDispatcher.dispatch(getBaseDispatchQueryRequest(query), asyncQueryRequestContext); + void testDispatchWithSparkUDFQuery() { + List udfQueries = new ArrayList<>(); + udfQueries.add( + "CREATE FUNCTION celsius_to_fahrenheit AS 'org.apache.spark.sql.functions.expr(\"(celsius *" + + " 9/5) + 32\")'"); + udfQueries.add( + "CREATE TEMPORARY FUNCTION square AS 'org.apache.spark.sql.functions.expr(\"num * num\")'"); + for (String query : udfQueries) { + DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); + when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + MY_GLUE, asyncQueryRequestContext)) + .thenReturn(dataSourceMetadata); + + IllegalArgumentException illegalArgumentException = + Assertions.assertThrows( + IllegalArgumentException.class, + () -> + sparkQueryDispatcher.dispatch( + getBaseDispatchQueryRequestBuilder(query).langType(LangType.SQL).build(), + asyncQueryRequestContext)); + assertEquals("CREATE FUNCTION is not allowed.", illegalArgumentException.getMessage()); + verifyNoInteractions(emrServerlessClient); + verifyNoInteractions(flintIndexMetadataService); + } + } - verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); - Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); - Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - verifyNoInteractions(flintIndexMetadataService); + @Test + void testDispatchQueryWithoutATableAndDataSourceName() { + testDispatchBatchQuery("show tables"); } @Test void testDispatchIndexQueryWithoutADatasourceName() { - when(emrServerlessClientFactory.getClient()).thenReturn(emrServerlessClient); + when(emrServerlessClientFactory.getClient(any())).thenReturn(emrServerlessClient); + when(queryIdProvider.getQueryId(any(), any())).thenReturn(QUERY_ID); HashMap tags = new HashMap<>(); tags.put(DATASOURCE_TAG_KEY, MY_GLUE); tags.put(INDEX_TAG_KEY, "flint_my_glue_default_http_logs_elb_and_requesturi_index"); @@ -547,15 +394,7 @@ void testDispatchIndexQueryWithoutADatasourceName() { "CREATE INDEX elb_and_requestUri ON default.http_logs(l_orderkey, l_quantity) WITH" + " (auto_refresh = true)"; String sparkSubmitParameters = - withStructuredStreaming( - constructExpectedSparkSubmitParameterString( - "sigv4", - new HashMap<>() { - { - put(FLINT_INDEX_STORE_AWSREGION_KEY, "eu-west-1"); - } - }, - query)); + constructExpectedSparkSubmitParameterString(query, "streaming", QUERY_ID); StartJobRequest expected = new StartJobRequest( "TEST_CLUSTER:streaming:flint_my_glue_default_http_logs_elb_and_requesturi_index", @@ -568,39 +407,32 @@ void testDispatchIndexQueryWithoutADatasourceName() { "query_execution_result_my_glue"); when(emrServerlessClient.startJobRun(expected)).thenReturn(EMR_JOB_ID); DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata(MY_GLUE)) + when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + MY_GLUE, asyncQueryRequestContext)) .thenReturn(dataSourceMetadata); DispatchQueryResponse dispatchQueryResponse = sparkQueryDispatcher.dispatch(getBaseDispatchQueryRequest(query), asyncQueryRequestContext); verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); - Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); - Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); + assertEquals(expected, startJobRequestArgumentCaptor.getValue()); + assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); verifyNoInteractions(flintIndexMetadataService); } @Test void testDispatchMaterializedViewQuery() { - when(emrServerlessClientFactory.getClient()).thenReturn(emrServerlessClient); + when(emrServerlessClientFactory.getClient(any())).thenReturn(emrServerlessClient); + when(queryIdProvider.getQueryId(any(), any())).thenReturn(QUERY_ID); HashMap tags = new HashMap<>(); tags.put(DATASOURCE_TAG_KEY, MY_GLUE); tags.put(INDEX_TAG_KEY, "flint_mv_1"); tags.put(CLUSTER_NAME_TAG_KEY, TEST_CLUSTER_NAME); tags.put(JOB_TYPE_TAG_KEY, JobType.STREAMING.getText()); String query = - "CREATE MATERIALIZED VIEW mv_1 AS query=select * from my_glue.default.logs WITH" - + " (auto_refresh = true)"; + "CREATE MATERIALIZED VIEW mv_1 AS select * from logs WITH" + " (auto_refresh = true)"; String sparkSubmitParameters = - withStructuredStreaming( - constructExpectedSparkSubmitParameterString( - "sigv4", - new HashMap<>() { - { - put(FLINT_INDEX_STORE_AWSREGION_KEY, "eu-west-1"); - } - }, - query)); + constructExpectedSparkSubmitParameterString(query, "streaming", QUERY_ID); StartJobRequest expected = new StartJobRequest( "TEST_CLUSTER:streaming:flint_mv_1", @@ -613,35 +445,31 @@ void testDispatchMaterializedViewQuery() { "query_execution_result_my_glue"); when(emrServerlessClient.startJobRun(expected)).thenReturn(EMR_JOB_ID); DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata(MY_GLUE)) + when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + MY_GLUE, asyncQueryRequestContext)) .thenReturn(dataSourceMetadata); DispatchQueryResponse dispatchQueryResponse = sparkQueryDispatcher.dispatch(getBaseDispatchQueryRequest(query), asyncQueryRequestContext); verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); - Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); - Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); + assertEquals(expected, startJobRequestArgumentCaptor.getValue()); + assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); verifyNoInteractions(flintIndexMetadataService); } @Test - void testDispatchShowMVQuery() { - when(emrServerlessClientFactory.getClient()).thenReturn(emrServerlessClient); + void testManualRefreshMaterializedViewQuery() { + when(emrServerlessClientFactory.getClient(any())).thenReturn(emrServerlessClient); + when(queryIdProvider.getQueryId(any(), any())).thenReturn(QUERY_ID); HashMap tags = new HashMap<>(); tags.put(DATASOURCE_TAG_KEY, MY_GLUE); tags.put(CLUSTER_NAME_TAG_KEY, TEST_CLUSTER_NAME); tags.put(JOB_TYPE_TAG_KEY, JobType.BATCH.getText()); - String query = "SHOW MATERIALIZED VIEW IN mys3.default"; + String query = + "CREATE MATERIALIZED VIEW mv_1 AS select * from logs WITH" + " (auto_refresh = false)"; String sparkSubmitParameters = - constructExpectedSparkSubmitParameterString( - "sigv4", - new HashMap<>() { - { - put(FLINT_INDEX_STORE_AWSREGION_KEY, "eu-west-1"); - } - }, - query); + constructExpectedSparkSubmitParameterString(query, null, QUERY_ID); StartJobRequest expected = new StartJobRequest( "TEST_CLUSTER:batch", @@ -654,35 +482,36 @@ void testDispatchShowMVQuery() { "query_execution_result_my_glue"); when(emrServerlessClient.startJobRun(expected)).thenReturn(EMR_JOB_ID); DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata(MY_GLUE)) + when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + MY_GLUE, asyncQueryRequestContext)) .thenReturn(dataSourceMetadata); DispatchQueryResponse dispatchQueryResponse = sparkQueryDispatcher.dispatch(getBaseDispatchQueryRequest(query), asyncQueryRequestContext); verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); - Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); - Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); + assertEquals(expected, startJobRequestArgumentCaptor.getValue()); + assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); + assertEquals("flint_mv_1", dispatchQueryResponse.getIndexName()); verifyNoInteractions(flintIndexMetadataService); } + @Test + void testDispatchShowMVQuery() { + testDispatchBatchQuery("SHOW MATERIALIZED VIEW IN mys3.default"); + } + @Test void testRefreshIndexQuery() { - when(emrServerlessClientFactory.getClient()).thenReturn(emrServerlessClient); + when(emrServerlessClientFactory.getClient(any())).thenReturn(emrServerlessClient); + when(queryIdProvider.getQueryId(any(), any())).thenReturn(QUERY_ID); HashMap tags = new HashMap<>(); tags.put(DATASOURCE_TAG_KEY, MY_GLUE); tags.put(CLUSTER_NAME_TAG_KEY, TEST_CLUSTER_NAME); tags.put(JOB_TYPE_TAG_KEY, JobType.BATCH.getText()); String query = "REFRESH SKIPPING INDEX ON my_glue.default.http_logs"; String sparkSubmitParameters = - constructExpectedSparkSubmitParameterString( - "sigv4", - new HashMap<>() { - { - put(FLINT_INDEX_STORE_AWSREGION_KEY, "eu-west-1"); - } - }, - query); + constructExpectedSparkSubmitParameterString(query, null, QUERY_ID); StartJobRequest expected = new StartJobRequest( "TEST_CLUSTER:batch", @@ -695,62 +524,29 @@ void testRefreshIndexQuery() { "query_execution_result_my_glue"); when(emrServerlessClient.startJobRun(expected)).thenReturn(EMR_JOB_ID); DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata(MY_GLUE)) + when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + MY_GLUE, asyncQueryRequestContext)) .thenReturn(dataSourceMetadata); DispatchQueryResponse dispatchQueryResponse = sparkQueryDispatcher.dispatch(getBaseDispatchQueryRequest(query), asyncQueryRequestContext); verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); - Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); - Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); + assertEquals(expected, startJobRequestArgumentCaptor.getValue()); + assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); + Assertions.assertEquals(JobType.REFRESH, dispatchQueryResponse.getJobType()); verifyNoInteractions(flintIndexMetadataService); } @Test void testDispatchDescribeIndexQuery() { - when(emrServerlessClientFactory.getClient()).thenReturn(emrServerlessClient); - HashMap tags = new HashMap<>(); - tags.put(DATASOURCE_TAG_KEY, MY_GLUE); - tags.put(CLUSTER_NAME_TAG_KEY, TEST_CLUSTER_NAME); - tags.put(JOB_TYPE_TAG_KEY, JobType.BATCH.getText()); - String query = "DESCRIBE SKIPPING INDEX ON mys3.default.http_logs"; - String sparkSubmitParameters = - constructExpectedSparkSubmitParameterString( - "sigv4", - new HashMap<>() { - { - put(FLINT_INDEX_STORE_AWSREGION_KEY, "eu-west-1"); - } - }, - query); - StartJobRequest expected = - new StartJobRequest( - "TEST_CLUSTER:batch", - null, - EMRS_APPLICATION_ID, - EMRS_EXECUTION_ROLE, - sparkSubmitParameters, - tags, - false, - "query_execution_result_my_glue"); - when(emrServerlessClient.startJobRun(expected)).thenReturn(EMR_JOB_ID); - DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata(MY_GLUE)) - .thenReturn(dataSourceMetadata); - - DispatchQueryResponse dispatchQueryResponse = - sparkQueryDispatcher.dispatch(getBaseDispatchQueryRequest(query), asyncQueryRequestContext); - - verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); - Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); - Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); - verifyNoInteractions(flintIndexMetadataService); + testDispatchBatchQuery("DESCRIBE SKIPPING INDEX ON mys3.default.http_logs"); } @Test void testDispatchAlterToAutoRefreshIndexQuery() { - when(emrServerlessClientFactory.getClient()).thenReturn(emrServerlessClient); + when(emrServerlessClientFactory.getClient(any())).thenReturn(emrServerlessClient); + when(queryIdProvider.getQueryId(any(), any())).thenReturn(QUERY_ID); HashMap tags = new HashMap<>(); tags.put(DATASOURCE_TAG_KEY, "my_glue"); tags.put(INDEX_TAG_KEY, "flint_my_glue_default_http_logs_elb_and_requesturi_index"); @@ -760,15 +556,7 @@ void testDispatchAlterToAutoRefreshIndexQuery() { "ALTER INDEX elb_and_requestUri ON my_glue.default.http_logs WITH" + " (auto_refresh = true)"; String sparkSubmitParameters = - withStructuredStreaming( - constructExpectedSparkSubmitParameterString( - "sigv4", - new HashMap<>() { - { - put(FLINT_INDEX_STORE_AWSREGION_KEY, "eu-west-1"); - } - }, - query)); + constructExpectedSparkSubmitParameterString(query, "streaming", QUERY_ID); StartJobRequest expected = new StartJobRequest( "TEST_CLUSTER:streaming:flint_my_glue_default_http_logs_elb_and_requesturi_index", @@ -781,15 +569,16 @@ void testDispatchAlterToAutoRefreshIndexQuery() { "query_execution_result_my_glue"); when(emrServerlessClient.startJobRun(expected)).thenReturn(EMR_JOB_ID); DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata("my_glue")) + when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + "my_glue", asyncQueryRequestContext)) .thenReturn(dataSourceMetadata); DispatchQueryResponse dispatchQueryResponse = sparkQueryDispatcher.dispatch(getBaseDispatchQueryRequest(query), asyncQueryRequestContext); verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); - Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); - Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); + assertEquals(expected, startJobRequestArgumentCaptor.getValue()); + assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); verifyNoInteractions(flintIndexMetadataService); } @@ -798,13 +587,18 @@ void testDispatchAlterToManualRefreshIndexQuery() { QueryHandlerFactory queryHandlerFactory = mock(QueryHandlerFactory.class); sparkQueryDispatcher = new SparkQueryDispatcher( - dataSourceService, sessionManager, queryHandlerFactory, queryIdProvider); - + dataSourceService, + sessionManager, + queryHandlerFactory, + queryIdProvider, + sqlQueryValidator, + pplQueryValidator); String query = "ALTER INDEX elb_and_requestUri ON my_glue.default.http_logs WITH" + " (auto_refresh = false)"; DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata("my_glue")) + when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + "my_glue", asyncQueryRequestContext)) .thenReturn(dataSourceMetadata); when(queryHandlerFactory.getIndexDMLHandler()) .thenReturn( @@ -815,6 +609,7 @@ void testDispatchAlterToManualRefreshIndexQuery() { flintIndexOpFactory)); sparkQueryDispatcher.dispatch(getBaseDispatchQueryRequest(query), asyncQueryRequestContext); + verify(queryHandlerFactory, times(1)).getIndexDMLHandler(); } @@ -823,11 +618,16 @@ void testDispatchDropIndexQuery() { QueryHandlerFactory queryHandlerFactory = mock(QueryHandlerFactory.class); sparkQueryDispatcher = new SparkQueryDispatcher( - dataSourceService, sessionManager, queryHandlerFactory, queryIdProvider); - + dataSourceService, + sessionManager, + queryHandlerFactory, + queryIdProvider, + sqlQueryValidator, + pplQueryValidator); String query = "DROP INDEX elb_and_requestUri ON my_glue.default.http_logs"; DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata("my_glue")) + when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + "my_glue", asyncQueryRequestContext)) .thenReturn(dataSourceMetadata); when(queryHandlerFactory.getIndexDMLHandler()) .thenReturn( @@ -837,54 +637,36 @@ void testDispatchDropIndexQuery() { indexDMLResultStorageService, flintIndexOpFactory)); - sparkQueryDispatcher.dispatch(getBaseDispatchQueryRequest(query), asyncQueryRequestContext); + DispatchQueryResponse response = + sparkQueryDispatcher.dispatch(getBaseDispatchQueryRequest(query), asyncQueryRequestContext); + verify(queryHandlerFactory, times(1)).getIndexDMLHandler(); } @Test void testDispatchVacuumIndexQuery() { - QueryHandlerFactory queryHandlerFactory = mock(QueryHandlerFactory.class); - sparkQueryDispatcher = - new SparkQueryDispatcher( - dataSourceService, sessionManager, queryHandlerFactory, queryIdProvider); - - String query = "VACUUM INDEX elb_and_requestUri ON my_glue.default.http_logs"; - DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata("my_glue")) - .thenReturn(dataSourceMetadata); - when(queryHandlerFactory.getIndexDMLHandler()) - .thenReturn( - new IndexDMLHandler( - jobExecutionResponseReader, - flintIndexMetadataService, - indexDMLResultStorageService, - flintIndexOpFactory)); - - sparkQueryDispatcher.dispatch(getBaseDispatchQueryRequest(query), asyncQueryRequestContext); - verify(queryHandlerFactory, times(1)).getIndexDMLHandler(); + testDispatchBatchQuery("VACUUM INDEX elb_and_requestUri ON my_glue.default.http_logs"); } @Test - void testDispatchWithWrongURI() { - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata(MY_GLUE)) - .thenReturn(constructMyGlueDataSourceMetadataWithBadURISyntax()); - String query = "select * from my_glue.default.http_logs"; - - IllegalArgumentException illegalArgumentException = - Assertions.assertThrows( - IllegalArgumentException.class, - () -> - sparkQueryDispatcher.dispatch( - getBaseDispatchQueryRequest(query), asyncQueryRequestContext)); + void testDispatchRecoverIndexQuery() { + DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); + when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + MY_GLUE, asyncQueryRequestContext)) + .thenReturn(dataSourceMetadata); - Assertions.assertEquals( - "Bad URI in indexstore configuration of the : my_glue datasoure.", - illegalArgumentException.getMessage()); + String query = "RECOVER INDEX JOB `flint_spark_catalog_default_test_skipping_index`"; + Assertions.assertThrows( + IllegalArgumentException.class, + () -> + sparkQueryDispatcher.dispatch( + getBaseDispatchQueryRequest(query), asyncQueryRequestContext)); } @Test void testDispatchWithUnSupportedDataSourceType() { - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata("my_prometheus")) + when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + "my_prometheus", asyncQueryRequestContext)) .thenReturn(constructPrometheusDataSourceType()); String query = "select * from my_prometheus.default.http_logs"; @@ -896,38 +678,35 @@ void testDispatchWithUnSupportedDataSourceType() { getBaseDispatchQueryRequestBuilder(query).datasource("my_prometheus").build(), asyncQueryRequestContext)); - Assertions.assertEquals( + assertEquals( "UnSupported datasource type for async queries:: PROMETHEUS", unsupportedOperationException.getMessage()); } @Test void testCancelJob() { - when(emrServerlessClientFactory.getClient()).thenReturn(emrServerlessClient); - when(emrServerlessClient.cancelJobRun(EMRS_APPLICATION_ID, EMR_JOB_ID, false)) - .thenReturn( - new CancelJobRunResult() - .withJobRunId(EMR_JOB_ID) - .withApplicationId(EMRS_APPLICATION_ID)); + givenCancelJobRunSucceed(); - String queryId = sparkQueryDispatcher.cancelJob(asyncQueryJobMetadata()); + String queryId = + sparkQueryDispatcher.cancelJob(asyncQueryJobMetadata(), asyncQueryRequestContext); - Assertions.assertEquals(QUERY_ID, queryId); + assertEquals(QUERY_ID, queryId); } @Test void testCancelQueryWithSession() { doReturn(Optional.of(session)).when(sessionManager).getSession(MOCK_SESSION_ID, MY_GLUE); - doReturn(Optional.of(statement)).when(session).get(any()); + doReturn(Optional.of(statement)).when(session).get(any(), eq(asyncQueryRequestContext)); doNothing().when(statement).cancel(); String queryId = sparkQueryDispatcher.cancelJob( - asyncQueryJobMetadataWithSessionId(MOCK_STATEMENT_ID, MOCK_SESSION_ID)); + asyncQueryJobMetadataWithSessionId(MOCK_STATEMENT_ID, MOCK_SESSION_ID), + asyncQueryRequestContext); verifyNoInteractions(emrServerlessClient); verify(statement, times(1)).cancel(); - Assertions.assertEquals(MOCK_STATEMENT_ID, queryId); + assertEquals(MOCK_STATEMENT_ID, queryId); } @Test @@ -939,11 +718,12 @@ void testCancelQueryWithInvalidSession() { IllegalArgumentException.class, () -> sparkQueryDispatcher.cancelJob( - asyncQueryJobMetadataWithSessionId(MOCK_STATEMENT_ID, "invalid"))); + asyncQueryJobMetadataWithSessionId(MOCK_STATEMENT_ID, "invalid"), + asyncQueryRequestContext)); verifyNoInteractions(emrServerlessClient); verifyNoInteractions(session); - Assertions.assertEquals("no session found. invalid", exception.getMessage()); + assertEquals("no session found. invalid", exception.getMessage()); } @Test @@ -955,58 +735,86 @@ void testCancelQueryWithInvalidStatementId() { IllegalArgumentException.class, () -> sparkQueryDispatcher.cancelJob( - asyncQueryJobMetadataWithSessionId("invalid", MOCK_SESSION_ID))); + asyncQueryJobMetadataWithSessionId("invalid", MOCK_SESSION_ID), + asyncQueryRequestContext)); verifyNoInteractions(emrServerlessClient); verifyNoInteractions(statement); - Assertions.assertEquals( - "no statement found. " + new StatementId("invalid"), exception.getMessage()); + assertEquals("no statement found. " + new StatementId("invalid"), exception.getMessage()); } @Test void testCancelQueryWithNoSessionId() { - when(emrServerlessClientFactory.getClient()).thenReturn(emrServerlessClient); + givenCancelJobRunSucceed(); + + String queryId = + sparkQueryDispatcher.cancelJob(asyncQueryJobMetadata(), asyncQueryRequestContext); + + Assertions.assertEquals(QUERY_ID, queryId); + } + + @Test + void testCancelBatchJob() { + givenCancelJobRunSucceed(); + + String queryId = + sparkQueryDispatcher.cancelJob( + asyncQueryJobMetadata(JobType.BATCH), asyncQueryRequestContext); + + Assertions.assertEquals(QUERY_ID, queryId); + } + + private void givenCancelJobRunSucceed() { + when(emrServerlessClientFactory.getClient(any())).thenReturn(emrServerlessClient); when(emrServerlessClient.cancelJobRun(EMRS_APPLICATION_ID, EMR_JOB_ID, false)) .thenReturn( new CancelJobRunResult() .withJobRunId(EMR_JOB_ID) .withApplicationId(EMRS_APPLICATION_ID)); - - String queryId = sparkQueryDispatcher.cancelJob(asyncQueryJobMetadata()); - - Assertions.assertEquals(QUERY_ID, queryId); } @Test void testGetQueryResponse() { - when(emrServerlessClientFactory.getClient()).thenReturn(emrServerlessClient); + when(emrServerlessClientFactory.getClient(any())).thenReturn(emrServerlessClient); when(emrServerlessClient.getJobRunResult(EMRS_APPLICATION_ID, EMR_JOB_ID)) .thenReturn(new GetJobRunResult().withJobRun(new JobRun().withState(JobRunState.PENDING))); // simulate result index is not created yet - when(jobExecutionResponseReader.getResultWithJobId(EMR_JOB_ID, null)) + when(jobExecutionResponseReader.getResultFromResultIndex( + AsyncQueryJobMetadata.builder() + .jobId(EMR_JOB_ID) + .queryId(QUERY_ID) + .applicationId(EMRS_APPLICATION_ID) + .jobId(EMR_JOB_ID) + .jobType(JobType.INTERACTIVE) + .datasourceName(MY_GLUE) + .metadata(ImmutableMap.of()) + .build(), + asyncQueryRequestContext)) .thenReturn(new JSONObject()); - JSONObject result = sparkQueryDispatcher.getQueryResponse(asyncQueryJobMetadata()); + JSONObject result = + sparkQueryDispatcher.getQueryResponse(asyncQueryJobMetadata(), asyncQueryRequestContext); - Assertions.assertEquals("PENDING", result.get("status")); + assertEquals("PENDING", result.get("status")); } @Test void testGetQueryResponseWithSession() { doReturn(Optional.of(session)).when(sessionManager).getSession(MOCK_SESSION_ID, MY_GLUE); - doReturn(Optional.of(statement)).when(session).get(any()); + doReturn(Optional.of(statement)).when(session).get(any(), eq(asyncQueryRequestContext)); when(statement.getStatementModel().getError()).thenReturn("mock error"); doReturn(StatementState.WAITING).when(statement).getStatementState(); doReturn(new JSONObject()) .when(jobExecutionResponseReader) - .getResultWithQueryId(eq(MOCK_STATEMENT_ID), any()); + .getResultWithQueryId(eq(MOCK_STATEMENT_ID), any(), eq(asyncQueryRequestContext)); JSONObject result = sparkQueryDispatcher.getQueryResponse( - asyncQueryJobMetadataWithSessionId(MOCK_STATEMENT_ID, MOCK_SESSION_ID)); + asyncQueryJobMetadataWithSessionId(MOCK_STATEMENT_ID, MOCK_SESSION_ID), + asyncQueryRequestContext); verifyNoInteractions(emrServerlessClient); - Assertions.assertEquals("waiting", result.get("status")); + assertEquals("waiting", result.get("status")); } @Test @@ -1014,36 +822,38 @@ void testGetQueryResponseWithInvalidSession() { doReturn(Optional.empty()).when(sessionManager).getSession(MOCK_SESSION_ID, MY_GLUE); doReturn(new JSONObject()) .when(jobExecutionResponseReader) - .getResultWithQueryId(eq(MOCK_STATEMENT_ID), any()); + .getResultWithQueryId(eq(MOCK_STATEMENT_ID), any(), eq(asyncQueryRequestContext)); IllegalArgumentException exception = Assertions.assertThrows( IllegalArgumentException.class, () -> sparkQueryDispatcher.getQueryResponse( - asyncQueryJobMetadataWithSessionId(MOCK_STATEMENT_ID, MOCK_SESSION_ID))); + asyncQueryJobMetadataWithSessionId(MOCK_STATEMENT_ID, MOCK_SESSION_ID), + asyncQueryRequestContext)); verifyNoInteractions(emrServerlessClient); - Assertions.assertEquals("no session found. " + MOCK_SESSION_ID, exception.getMessage()); + assertEquals("no session found. " + MOCK_SESSION_ID, exception.getMessage()); } @Test void testGetQueryResponseWithStatementNotExist() { doReturn(Optional.of(session)).when(sessionManager).getSession(MOCK_SESSION_ID, MY_GLUE); - doReturn(Optional.empty()).when(session).get(any()); + doReturn(Optional.empty()).when(session).get(any(), eq(asyncQueryRequestContext)); doReturn(new JSONObject()) .when(jobExecutionResponseReader) - .getResultWithQueryId(eq(MOCK_STATEMENT_ID), any()); + .getResultWithQueryId(eq(MOCK_STATEMENT_ID), any(), eq(asyncQueryRequestContext)); IllegalArgumentException exception = Assertions.assertThrows( IllegalArgumentException.class, () -> sparkQueryDispatcher.getQueryResponse( - asyncQueryJobMetadataWithSessionId(MOCK_STATEMENT_ID, MOCK_SESSION_ID))); + asyncQueryJobMetadataWithSessionId(MOCK_STATEMENT_ID, MOCK_SESSION_ID), + asyncQueryRequestContext)); verifyNoInteractions(emrServerlessClient); - Assertions.assertEquals( + assertEquals( "no statement found. " + new StatementId(MOCK_STATEMENT_ID), exception.getMessage()); } @@ -1054,12 +864,26 @@ void testGetQueryResponseWithSuccess() { resultMap.put(STATUS_FIELD, "SUCCESS"); resultMap.put(ERROR_FIELD, ""); queryResult.put(DATA_FIELD, resultMap); - when(jobExecutionResponseReader.getResultWithJobId(EMR_JOB_ID, null)).thenReturn(queryResult); + AsyncQueryJobMetadata asyncQueryJobMetadata = + AsyncQueryJobMetadata.builder() + .queryId(QUERY_ID) + .applicationId(EMRS_APPLICATION_ID) + .jobId(EMR_JOB_ID) + .jobType(JobType.INTERACTIVE) + .datasourceName(MY_GLUE) + .metadata(ImmutableMap.of()) + .jobId(EMR_JOB_ID) + .build(); + when(jobExecutionResponseReader.getResultFromResultIndex( + asyncQueryJobMetadata, asyncQueryRequestContext)) + .thenReturn(queryResult); - JSONObject result = sparkQueryDispatcher.getQueryResponse(asyncQueryJobMetadata()); + JSONObject result = + sparkQueryDispatcher.getQueryResponse(asyncQueryJobMetadata(), asyncQueryRequestContext); - verify(jobExecutionResponseReader, times(1)).getResultWithJobId(EMR_JOB_ID, null); - Assertions.assertEquals( + verify(jobExecutionResponseReader, times(1)) + .getResultFromResultIndex(asyncQueryJobMetadata, asyncQueryRequestContext); + assertEquals( new HashSet<>(Arrays.asList(DATA_FIELD, STATUS_FIELD, ERROR_FIELD)), result.keySet()); JSONObject dataJson = new JSONObject(); dataJson.put(ERROR_FIELD, ""); @@ -1070,15 +894,16 @@ void testGetQueryResponseWithSuccess() { // the same order. // We need similar. Assertions.assertTrue(dataJson.similar(result.get(DATA_FIELD))); - Assertions.assertEquals("SUCCESS", result.get(STATUS_FIELD)); + assertEquals("SUCCESS", result.get(STATUS_FIELD)); verifyNoInteractions(emrServerlessClient); } @Test void testDispatchQueryWithExtraSparkSubmitParameters() { - when(emrServerlessClientFactory.getClient()).thenReturn(emrServerlessClient); + when(emrServerlessClientFactory.getClient(any())).thenReturn(emrServerlessClient); DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); - when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata(MY_GLUE)) + when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + MY_GLUE, asyncQueryRequestContext)) .thenReturn(dataSourceMetadata); String extraParameters = "--conf spark.dynamicAllocation.enabled=false"; @@ -1108,71 +933,81 @@ void testDispatchQueryWithExtraSparkSubmitParameters() { } } - private String constructExpectedSparkSubmitParameterString( - String auth, Map authParams, String query) { - return constructExpectedSparkSubmitParameterString(auth, authParams, query, false); + private void testDispatchBatchQuery(String query) { + when(emrServerlessClientFactory.getClient(any())).thenReturn(emrServerlessClient); + when(queryIdProvider.getQueryId(any(), any())).thenReturn(QUERY_ID); + HashMap tags = new HashMap<>(); + tags.put(DATASOURCE_TAG_KEY, MY_GLUE); + tags.put(CLUSTER_NAME_TAG_KEY, TEST_CLUSTER_NAME); + tags.put(JOB_TYPE_TAG_KEY, JobType.BATCH.getText()); + + String sparkSubmitParameters = + constructExpectedSparkSubmitParameterString(query, null, QUERY_ID); + StartJobRequest expected = + new StartJobRequest( + "TEST_CLUSTER:batch", + null, + EMRS_APPLICATION_ID, + EMRS_EXECUTION_ROLE, + sparkSubmitParameters, + tags, + false, + "query_execution_result_my_glue"); + when(emrServerlessClient.startJobRun(expected)).thenReturn(EMR_JOB_ID); + DataSourceMetadata dataSourceMetadata = constructMyGlueDataSourceMetadata(); + when(dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + MY_GLUE, asyncQueryRequestContext)) + .thenReturn(dataSourceMetadata); + + DispatchQueryResponse dispatchQueryResponse = + sparkQueryDispatcher.dispatch(getBaseDispatchQueryRequest(query), asyncQueryRequestContext); + + verify(emrServerlessClient, times(1)).startJobRun(startJobRequestArgumentCaptor.capture()); + Assertions.assertEquals(expected, startJobRequestArgumentCaptor.getValue()); + Assertions.assertEquals(EMR_JOB_ID, dispatchQueryResponse.getJobId()); + Assertions.assertEquals(JobType.BATCH, dispatchQueryResponse.getJobType()); + verifyNoInteractions(flintIndexMetadataService); + } + + private String constructExpectedSparkSubmitParameterString(String query) { + return constructExpectedSparkSubmitParameterString(query, null, null); } private String constructExpectedSparkSubmitParameterString( - String auth, Map authParams, String query, boolean lakeFormationEnabled) { - StringBuilder authParamConfigBuilder = new StringBuilder(); - for (String key : authParams.keySet()) { - authParamConfigBuilder.append(" --conf "); - authParamConfigBuilder.append(key); - authParamConfigBuilder.append("="); - authParamConfigBuilder.append(authParams.get(key)); - } + String query, String jobType, String queryId) { query = "\"" + query + "\""; - return " --class org.apache.spark.sql.FlintJob --conf" - + " spark.hadoop.fs.s3.customAWSCredentialsProvider=com.amazonaws.emr.AssumeRoleAWSCredentialsProvider" - + " --conf" - + " spark.hadoop.aws.catalog.credentials.provider.factory.class=com.amazonaws.glue.catalog.metastore.STSAssumeRoleSessionCredentialsProviderFactory" - + " --conf spark.jars=/usr/share/aws/iceberg/lib/iceberg-spark3-runtime.jar --conf" - + " spark.jars.packages=org.opensearch:opensearch-spark-standalone_2.12:0.3.0-SNAPSHOT,org.opensearch:opensearch-spark-sql-application_2.12:0.3.0-SNAPSHOT,org.opensearch:opensearch-spark-ppl_2.12:0.3.0-SNAPSHOT" - + " --conf" - + " spark.jars.repositories=https://aws.oss.sonatype.org/content/repositories/snapshots" - + " --conf" - + " spark.emr-serverless.driverEnv.JAVA_HOME=/usr/lib/jvm/java-17-amazon-corretto.x86_64/" - + " --conf spark.executorEnv.JAVA_HOME=/usr/lib/jvm/java-17-amazon-corretto.x86_64/" - + " --conf spark.emr-serverless.driverEnv.FLINT_CLUSTER_NAME=TEST_CLUSTER --conf" - + " spark.executorEnv.FLINT_CLUSTER_NAME=TEST_CLUSTER --conf" - + " spark.datasource.flint.host=search-flint-dp-benchmark-cf5crj5mj2kfzvgwdeynkxnefy.eu-west-1.es.amazonaws.com" - + " --conf spark.datasource.flint.port=-1 --conf" - + " spark.datasource.flint.scheme=https --conf spark.datasource.flint.auth=" - + auth - + " --conf" - + " spark.datasource.flint.customAWSCredentialsProvider=com.amazonaws.emr.AssumeRoleAWSCredentialsProvider" - + " --conf" - + " spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,org.opensearch.flint.spark.FlintSparkExtensions,org.opensearch.flint.spark.FlintPPLSparkExtensions" - + " --conf" - + " spark.hadoop.hive.metastore.client.factory.class=com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory" - + " --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog " - + " --conf" - + " spark.sql.catalog.spark_catalog.catalog-impl=org.apache.iceberg.aws.glue.GlueCatalog " - + " --conf" - + " spark.emr-serverless.driverEnv.ASSUME_ROLE_CREDENTIALS_ROLE_ARN=arn:aws:iam::924196221507:role/FlintOpensearchServiceRole" - + " --conf" - + " spark.executorEnv.ASSUME_ROLE_CREDENTIALS_ROLE_ARN=arn:aws:iam::924196221507:role/FlintOpensearchServiceRole" - + " --conf" - + " spark.hive.metastore.glue.role.arn=arn:aws:iam::924196221507:role/FlintOpensearchServiceRole" - + " --conf spark.sql.catalog.my_glue=org.opensearch.sql.FlintDelegatingSessionCatalog " - + " --conf spark.flint.datasource.name=my_glue --conf" - + " spark.emr-serverless.lakeformation.enabled=" - + Boolean.toString(lakeFormationEnabled) - + " --conf spark.flint.optimizer.covering.enabled=" - + Boolean.toString(!lakeFormationEnabled) - + authParamConfigBuilder - + " --conf spark.flint.job.query=" - + query - + " "; + return " --class org.apache.spark.sql.FlintJob " + + getConfParam( + "spark.hadoop.fs.s3.customAWSCredentialsProvider=com.amazonaws.emr.AssumeRoleAWSCredentialsProvider", + "spark.hadoop.aws.catalog.credentials.provider.factory.class=com.amazonaws.glue.catalog.metastore.STSAssumeRoleSessionCredentialsProviderFactory", + "spark.jars.packages=org.opensearch:opensearch-spark-standalone_2.12:0.3.0-SNAPSHOT,org.opensearch:opensearch-spark-sql-application_2.12:0.3.0-SNAPSHOT,org.opensearch:opensearch-spark-ppl_2.12:0.3.0-SNAPSHOT", + "spark.jars.repositories=https://aws.oss.sonatype.org/content/repositories/snapshots", + "spark.emr-serverless.driverEnv.JAVA_HOME=/usr/lib/jvm/java-17-amazon-corretto.x86_64/", + "spark.executorEnv.JAVA_HOME=/usr/lib/jvm/java-17-amazon-corretto.x86_64/", + "spark.emr-serverless.driverEnv.FLINT_CLUSTER_NAME=TEST_CLUSTER", + "spark.executorEnv.FLINT_CLUSTER_NAME=TEST_CLUSTER", + "spark.datasource.flint.host=HOST", + "spark.datasource.flint.port=PORT", + "spark.datasource.flint.scheme=SCHEMA", + "spark.datasource.flint.auth=basic", + "spark.datasource.flint.customAWSCredentialsProvider=com.amazonaws.emr.AssumeRoleAWSCredentialsProvider", + "spark.sql.extensions=org.opensearch.flint.spark.FlintSparkExtensions,org.opensearch.flint.spark.FlintPPLSparkExtensions", + "spark.hadoop.hive.metastore.client.factory.class=com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory") + + (queryId != null ? getConfParam("spark.flint.job.queryId=" + queryId) : "") + + getConfParam("spark.flint.job.query=" + query) + + (jobType != null ? getConfParam("spark.flint.job.type=" + jobType) : "") + + getConfParam( + KEY_FROM_DATASOURCE_COMPOSER + "=" + VALUE_FROM_DATASOURCE_COMPOSER, + KEY_FROM_COMPOSER + "=" + VALUE_FROM_COMPOSER); } - private String withStructuredStreaming(String parameters) { - return parameters + " --conf spark.flint.job.type=streaming "; + private String getConfParam(String... params) { + return Arrays.stream(params) + .map(param -> String.format(" --conf %s ", param)) + .collect(Collectors.joining()); } private DataSourceMetadata constructMyGlueDataSourceMetadata() { - Map properties = new HashMap<>(); properties.put("glue.auth.type", "iam_role"); properties.put( @@ -1207,56 +1042,6 @@ private DataSourceMetadata constructMyGlueDataSourceMetadataWithBasicAuth() { .build(); } - private DataSourceMetadata constructMyGlueDataSourceMetadataWithNoAuth() { - Map properties = new HashMap<>(); - properties.put("glue.auth.type", "iam_role"); - properties.put( - "glue.auth.role_arn", "arn:aws:iam::924196221507:role/FlintOpensearchServiceRole"); - properties.put( - "glue.indexstore.opensearch.uri", - "https://search-flint-dp-benchmark-cf5crj5mj2kfzvgwdeynkxnefy.eu-west-1.es.amazonaws.com"); - properties.put("glue.indexstore.opensearch.auth", "noauth"); - return new DataSourceMetadata.Builder() - .setName(MY_GLUE) - .setConnector(DataSourceType.S3GLUE) - .setProperties(properties) - .build(); - } - - private DataSourceMetadata constructMyGlueDataSourceMetadataWithBadURISyntax() { - Map properties = new HashMap<>(); - properties.put("glue.auth.type", "iam_role"); - properties.put( - "glue.auth.role_arn", "arn:aws:iam::924196221507:role/FlintOpensearchServiceRole"); - properties.put("glue.indexstore.opensearch.uri", "http://localhost:9090? param"); - properties.put("glue.indexstore.opensearch.auth", "awssigv4"); - properties.put("glue.indexstore.opensearch.region", "eu-west-1"); - return new DataSourceMetadata.Builder() - .setName(MY_GLUE) - .setConnector(DataSourceType.S3GLUE) - .setProperties(properties) - .build(); - } - - private DataSourceMetadata constructMyGlueDataSourceMetadataWithLakeFormation() { - - Map properties = new HashMap<>(); - properties.put("glue.auth.type", "iam_role"); - properties.put( - "glue.auth.role_arn", "arn:aws:iam::924196221507:role/FlintOpensearchServiceRole"); - properties.put( - "glue.indexstore.opensearch.uri", - "https://search-flint-dp-benchmark-cf5crj5mj2kfzvgwdeynkxnefy.eu-west-1.es.amazonaws.com"); - properties.put("glue.indexstore.opensearch.auth", "awssigv4"); - properties.put("glue.indexstore.opensearch.region", "eu-west-1"); - properties.put("glue.lakeformation.enabled", "true"); - return new DataSourceMetadata.Builder() - .setName(MY_GLUE) - .setConnector(DataSourceType.S3GLUE) - .setProperties(properties) - .build(); - } - private DataSourceMetadata constructPrometheusDataSourceType() { return new DataSourceMetadata.Builder() .setName("my_prometheus") @@ -1284,8 +1069,7 @@ private DispatchQueryRequest constructDispatchQueryRequest( String query, LangType langType, String extraParameters) { return getBaseDispatchQueryRequestBuilder(query) .langType(langType) - .sparkSubmitParameterModifier( - (parameters) -> parameters.setExtraParameters(extraParameters)) + .sparkSubmitParameterModifier((builder) -> builder.extraParameters(extraParameters)) .build(); } @@ -1294,11 +1078,16 @@ private DispatchQueryRequest dispatchQueryRequestWithSessionId(String query, Str } private AsyncQueryJobMetadata asyncQueryJobMetadata() { + return asyncQueryJobMetadata(JobType.INTERACTIVE); + } + + private AsyncQueryJobMetadata asyncQueryJobMetadata(JobType jobType) { return AsyncQueryJobMetadata.builder() .queryId(QUERY_ID) .applicationId(EMRS_APPLICATION_ID) .jobId(EMR_JOB_ID) .datasourceName(MY_GLUE) + .jobType(jobType) .build(); } diff --git a/spark/src/test/java/org/opensearch/sql/spark/execution/session/SessionManagerTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/execution/session/SessionManagerTest.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/execution/session/SessionManagerTest.java rename to async-query-core/src/test/java/org/opensearch/sql/spark/execution/session/SessionManagerTest.java diff --git a/spark/src/test/java/org/opensearch/sql/spark/execution/session/SessionStateTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/execution/session/SessionStateTest.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/execution/session/SessionStateTest.java rename to async-query-core/src/test/java/org/opensearch/sql/spark/execution/session/SessionStateTest.java diff --git a/spark/src/test/java/org/opensearch/sql/spark/execution/session/SessionTypeTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/execution/session/SessionTypeTest.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/execution/session/SessionTypeTest.java rename to async-query-core/src/test/java/org/opensearch/sql/spark/execution/session/SessionTypeTest.java diff --git a/spark/src/test/java/org/opensearch/sql/spark/execution/statement/StatementStateTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/execution/statement/StatementStateTest.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/execution/statement/StatementStateTest.java rename to async-query-core/src/test/java/org/opensearch/sql/spark/execution/statement/StatementStateTest.java diff --git a/spark/src/test/java/org/opensearch/sql/spark/execution/statestore/OpenSearchStateStoreUtilTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/execution/statestore/OpenSearchStateStoreUtilTest.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/execution/statestore/OpenSearchStateStoreUtilTest.java rename to async-query-core/src/test/java/org/opensearch/sql/spark/execution/statestore/OpenSearchStateStoreUtilTest.java diff --git a/spark/src/test/java/org/opensearch/sql/spark/execution/statestore/StateModelTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/execution/statestore/StateModelTest.java similarity index 97% rename from spark/src/test/java/org/opensearch/sql/spark/execution/statestore/StateModelTest.java rename to async-query-core/src/test/java/org/opensearch/sql/spark/execution/statestore/StateModelTest.java index 15d1ec2ecc..fdbbbc17e0 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/execution/statestore/StateModelTest.java +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/execution/statestore/StateModelTest.java @@ -25,7 +25,7 @@ public String getId() { } } - ConcreteStateModel model = + final ConcreteStateModel model = ConcreteStateModel.builder().metadata(ImmutableMap.of(METADATA_KEY, METADATA_VALUE)).build(); @Test diff --git a/spark/src/test/java/org/opensearch/sql/spark/execution/xcontent/XContentSerializerUtilTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/execution/xcontent/XContentSerializerUtilTest.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/execution/xcontent/XContentSerializerUtilTest.java rename to async-query-core/src/test/java/org/opensearch/sql/spark/execution/xcontent/XContentSerializerUtilTest.java diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataValidatorTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataValidatorTest.java new file mode 100644 index 0000000000..7a1e718c05 --- /dev/null +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataValidatorTest.java @@ -0,0 +1,90 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.flint; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions.AUTO_REFRESH; +import static org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions.CHECKPOINT_LOCATION; +import static org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions.INCREMENTAL_REFRESH; +import static org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions.WATERMARK_DELAY; + +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.junit.jupiter.api.Test; + +class FlintIndexMetadataValidatorTest { + @Test + public void conversionToIncrementalRefreshWithValidOption() { + Map existingOptions = + ImmutableMap.builder().put(INCREMENTAL_REFRESH, "false").build(); + Map newOptions = + ImmutableMap.builder() + .put(INCREMENTAL_REFRESH, "true") + .put(CHECKPOINT_LOCATION, "checkpoint_location") + .put(WATERMARK_DELAY, "1") + .build(); + + FlintIndexMetadataValidator.validateFlintIndexOptions("mv", existingOptions, newOptions); + } + + @Test + public void conversionToIncrementalRefreshWithMissingOptions() { + Map existingOptions = + ImmutableMap.builder().put(AUTO_REFRESH, "true").build(); + Map newOptions = + ImmutableMap.builder().put(INCREMENTAL_REFRESH, "true").build(); + + assertThrows( + IllegalArgumentException.class, + () -> + FlintIndexMetadataValidator.validateFlintIndexOptions( + "mv", existingOptions, newOptions)); + } + + @Test + public void conversionToIncrementalRefreshWithInvalidOption() { + Map existingOptions = + ImmutableMap.builder().put(INCREMENTAL_REFRESH, "false").build(); + Map newOptions = + ImmutableMap.builder() + .put(INCREMENTAL_REFRESH, "true") + .put("INVALID_OPTION", "1") + .build(); + + assertThrows( + IllegalArgumentException.class, + () -> + FlintIndexMetadataValidator.validateFlintIndexOptions( + "mv", existingOptions, newOptions)); + } + + @Test + public void conversionToFullRefreshWithValidOption() { + Map existingOptions = + ImmutableMap.builder().put(AUTO_REFRESH, "false").build(); + Map newOptions = + ImmutableMap.builder().put(AUTO_REFRESH, "true").build(); + + FlintIndexMetadataValidator.validateFlintIndexOptions("mv", existingOptions, newOptions); + } + + @Test + public void conversionToFullRefreshWithInvalidOption() { + Map existingOptions = + ImmutableMap.builder().put(AUTO_REFRESH, "false").build(); + Map newOptions = + ImmutableMap.builder() + .put(AUTO_REFRESH, "true") + .put(WATERMARK_DELAY, "1") + .build(); + + assertThrows( + IllegalArgumentException.class, + () -> + FlintIndexMetadataValidator.validateFlintIndexOptions( + "mv", existingOptions, newOptions)); + } +} diff --git a/spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexStateTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/flint/FlintIndexStateTest.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexStateTest.java rename to async-query-core/src/test/java/org/opensearch/sql/spark/flint/FlintIndexStateTest.java diff --git a/spark/src/test/java/org/opensearch/sql/spark/flint/IndexQueryDetailsTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/flint/IndexQueryDetailsTest.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/flint/IndexQueryDetailsTest.java rename to async-query-core/src/test/java/org/opensearch/sql/spark/flint/IndexQueryDetailsTest.java diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpFactoryTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpFactoryTest.java new file mode 100644 index 0000000000..e73c5614ae --- /dev/null +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpFactoryTest.java @@ -0,0 +1,48 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.flint.operation; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.spark.client.EMRServerlessClientFactory; +import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions; +import org.opensearch.sql.spark.flint.FlintIndexClient; +import org.opensearch.sql.spark.flint.FlintIndexMetadataService; +import org.opensearch.sql.spark.flint.FlintIndexStateModelService; +import org.opensearch.sql.spark.scheduler.AsyncQueryScheduler; + +@ExtendWith(MockitoExtension.class) +class FlintIndexOpFactoryTest { + public static final String DATASOURCE_NAME = "DATASOURCE_NAME"; + + @Mock private FlintIndexStateModelService flintIndexStateModelService; + @Mock private FlintIndexClient flintIndexClient; + @Mock private FlintIndexMetadataService flintIndexMetadataService; + @Mock private EMRServerlessClientFactory emrServerlessClientFactory; + @Mock private AsyncQueryScheduler asyncQueryScheduler; + + @InjectMocks FlintIndexOpFactory flintIndexOpFactory; + + @Test + void getDrop() { + assertNotNull(flintIndexOpFactory.getDrop(DATASOURCE_NAME)); + } + + @Test + void getAlter() { + assertNotNull(flintIndexOpFactory.getAlter(new FlintIndexOptions(), DATASOURCE_NAME)); + } + + @Test + void getCancel() { + assertNotNull(flintIndexOpFactory.getDrop(DATASOURCE_NAME)); + } +} diff --git a/spark/src/test/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpTest.java similarity index 75% rename from spark/src/test/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpTest.java rename to async-query-core/src/test/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpTest.java index 0c82733ae6..8105629822 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpTest.java +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpTest.java @@ -16,6 +16,7 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.client.EMRServerlessClientFactory; import org.opensearch.sql.spark.execution.xcontent.XContentSerializerUtil; import org.opensearch.sql.spark.flint.FlintIndexMetadata; @@ -28,21 +29,26 @@ public class FlintIndexOpTest { @Mock private FlintIndexStateModelService flintIndexStateModelService; @Mock private EMRServerlessClientFactory mockEmrServerlessClientFactory; + @Mock private AsyncQueryRequestContext asyncQueryRequestContext; @Test public void testApplyWithTransitioningStateFailure() { FlintIndexMetadata metadata = mock(FlintIndexMetadata.class); when(metadata.getLatestId()).thenReturn(Optional.of("latestId")); FlintIndexStateModel fakeModel = getFlintIndexStateModel(metadata); - when(flintIndexStateModelService.getFlintIndexStateModel(eq("latestId"), any())) + when(flintIndexStateModelService.getFlintIndexStateModel( + eq("latestId"), any(), eq(asyncQueryRequestContext))) .thenReturn(Optional.of(fakeModel)); - when(flintIndexStateModelService.updateFlintIndexState(any(), any(), any())) + when(flintIndexStateModelService.updateFlintIndexState( + any(), any(), any(), eq(asyncQueryRequestContext))) .thenThrow(new RuntimeException("Transitioning state failed")); FlintIndexOp flintIndexOp = new TestFlintIndexOp(flintIndexStateModelService, "myS3", mockEmrServerlessClientFactory); IllegalStateException illegalStateException = - Assertions.assertThrows(IllegalStateException.class, () -> flintIndexOp.apply(metadata)); + Assertions.assertThrows( + IllegalStateException.class, + () -> flintIndexOp.apply(metadata, asyncQueryRequestContext)); Assertions.assertEquals( "Moving to transition state:DELETING failed.", illegalStateException.getMessage()); @@ -53,9 +59,11 @@ public void testApplyWithCommitFailure() { FlintIndexMetadata metadata = mock(FlintIndexMetadata.class); when(metadata.getLatestId()).thenReturn(Optional.of("latestId")); FlintIndexStateModel fakeModel = getFlintIndexStateModel(metadata); - when(flintIndexStateModelService.getFlintIndexStateModel(eq("latestId"), any())) + when(flintIndexStateModelService.getFlintIndexStateModel( + eq("latestId"), any(), eq(asyncQueryRequestContext))) .thenReturn(Optional.of(fakeModel)); - when(flintIndexStateModelService.updateFlintIndexState(any(), any(), any())) + when(flintIndexStateModelService.updateFlintIndexState( + any(), any(), any(), eq(asyncQueryRequestContext))) .thenReturn( FlintIndexStateModel.copy(fakeModel, XContentSerializerUtil.buildMetadata(1, 2))) .thenThrow(new RuntimeException("Commit state failed")) @@ -65,7 +73,9 @@ public void testApplyWithCommitFailure() { new TestFlintIndexOp(flintIndexStateModelService, "myS3", mockEmrServerlessClientFactory); IllegalStateException illegalStateException = - Assertions.assertThrows(IllegalStateException.class, () -> flintIndexOp.apply(metadata)); + Assertions.assertThrows( + IllegalStateException.class, + () -> flintIndexOp.apply(metadata, asyncQueryRequestContext)); Assertions.assertEquals( "commit failed. target stable state: [DELETED]", illegalStateException.getMessage()); @@ -76,9 +86,11 @@ public void testApplyWithRollBackFailure() { FlintIndexMetadata metadata = mock(FlintIndexMetadata.class); when(metadata.getLatestId()).thenReturn(Optional.of("latestId")); FlintIndexStateModel fakeModel = getFlintIndexStateModel(metadata); - when(flintIndexStateModelService.getFlintIndexStateModel(eq("latestId"), any())) + when(flintIndexStateModelService.getFlintIndexStateModel( + eq("latestId"), any(), eq(asyncQueryRequestContext))) .thenReturn(Optional.of(fakeModel)); - when(flintIndexStateModelService.updateFlintIndexState(any(), any(), any())) + when(flintIndexStateModelService.updateFlintIndexState( + any(), any(), any(), eq(asyncQueryRequestContext))) .thenReturn( FlintIndexStateModel.copy(fakeModel, XContentSerializerUtil.buildMetadata(1, 2))) .thenThrow(new RuntimeException("Commit state failed")) @@ -87,7 +99,9 @@ public void testApplyWithRollBackFailure() { new TestFlintIndexOp(flintIndexStateModelService, "myS3", mockEmrServerlessClientFactory); IllegalStateException illegalStateException = - Assertions.assertThrows(IllegalStateException.class, () -> flintIndexOp.apply(metadata)); + Assertions.assertThrows( + IllegalStateException.class, + () -> flintIndexOp.apply(metadata, asyncQueryRequestContext)); Assertions.assertEquals( "commit failed. target stable state: [DELETED]", illegalStateException.getMessage()); @@ -125,7 +139,10 @@ FlintIndexState transitioningState() { } @Override - void runOp(FlintIndexMetadata flintIndexMetadata, FlintIndexStateModel flintIndex) {} + void runOp( + FlintIndexMetadata flintIndexMetadata, + FlintIndexStateModel flintIndex, + AsyncQueryRequestContext asyncQueryRequestContext) {} @Override FlintIndexState stableState() { diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/leasemanager/ConcurrencyLimitExceededExceptionTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/leasemanager/ConcurrencyLimitExceededExceptionTest.java new file mode 100644 index 0000000000..c0591eaf66 --- /dev/null +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/leasemanager/ConcurrencyLimitExceededExceptionTest.java @@ -0,0 +1,19 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.leasemanager; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.junit.jupiter.api.Test; + +class ConcurrencyLimitExceededExceptionTest { + @Test + public void test() { + ConcurrencyLimitExceededException e = new ConcurrencyLimitExceededException("Test"); + + assertEquals("Test", e.getMessage()); + } +} diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/parameter/SparkParameterComposerCollectionTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/parameter/SparkParameterComposerCollectionTest.java new file mode 100644 index 0000000000..8cd1de8b27 --- /dev/null +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/parameter/SparkParameterComposerCollectionTest.java @@ -0,0 +1,93 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.parameter; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoInteractions; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.datasource.model.DataSourceMetadata; +import org.opensearch.sql.datasource.model.DataSourceType; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; + +@ExtendWith(MockitoExtension.class) +class SparkParameterComposerCollectionTest { + + @Mock DataSourceSparkParameterComposer composer1; + @Mock DataSourceSparkParameterComposer composer2; + @Mock DataSourceSparkParameterComposer composer3; + @Mock GeneralSparkParameterComposer generalComposer; + @Mock DispatchQueryRequest dispatchQueryRequest; + @Mock AsyncQueryRequestContext asyncQueryRequestContext; + + final DataSourceType type1 = new DataSourceType("TYPE1"); + final DataSourceType type2 = new DataSourceType("TYPE2"); + final DataSourceType type3 = new DataSourceType("TYPE3"); + + SparkParameterComposerCollection collection; + + @BeforeEach + void setUp() { + collection = new SparkParameterComposerCollection(); + collection.register(type1, composer1); + collection.register(type1, composer2); + collection.register(type2, composer3); + collection.register(generalComposer); + } + + @Test + void isComposerRegistered() { + assertTrue(collection.isComposerRegistered(type1)); + assertTrue(collection.isComposerRegistered(type2)); + assertFalse(collection.isComposerRegistered(type3)); + } + + @Test + void composeByDataSourceWithRegisteredType() { + DataSourceMetadata metadata = + new DataSourceMetadata.Builder().setConnector(type1).setName("name").build(); + SparkSubmitParameters sparkSubmitParameters = new SparkSubmitParameters(); + + collection.composeByDataSource( + metadata, sparkSubmitParameters, dispatchQueryRequest, asyncQueryRequestContext); + + verify(composer1) + .compose(metadata, sparkSubmitParameters, dispatchQueryRequest, asyncQueryRequestContext); + verify(composer2) + .compose(metadata, sparkSubmitParameters, dispatchQueryRequest, asyncQueryRequestContext); + verifyNoInteractions(composer3); + } + + @Test + void composeByDataSourceWithUnregisteredType() { + DataSourceMetadata metadata = + new DataSourceMetadata.Builder().setConnector(type3).setName("name").build(); + SparkSubmitParameters sparkSubmitParameters = new SparkSubmitParameters(); + + collection.composeByDataSource( + metadata, sparkSubmitParameters, dispatchQueryRequest, asyncQueryRequestContext); + + verifyNoInteractions(composer1, composer2, composer3); + } + + @Test + void compose() { + SparkSubmitParameters sparkSubmitParameters = new SparkSubmitParameters(); + + collection.compose(sparkSubmitParameters, dispatchQueryRequest, asyncQueryRequestContext); + + verify(generalComposer) + .compose(sparkSubmitParameters, dispatchQueryRequest, asyncQueryRequestContext); + verifyNoInteractions(composer1, composer2, composer3); + } +} diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/parameter/SparkSubmitParametersBuilderTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/parameter/SparkSubmitParametersBuilderTest.java new file mode 100644 index 0000000000..8fb975d187 --- /dev/null +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/parameter/SparkSubmitParametersBuilderTest.java @@ -0,0 +1,208 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.parameter; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static org.opensearch.sql.spark.data.constants.SparkConstants.HADOOP_CATALOG_CREDENTIALS_PROVIDER_FACTORY_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_JARS_KEY; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.datasource.model.DataSourceMetadata; +import org.opensearch.sql.datasource.model.DataSourceType; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.config.SparkSubmitParameterModifier; +import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; + +@ExtendWith(MockitoExtension.class) +public class SparkSubmitParametersBuilderTest { + + @Mock SparkParameterComposerCollection sparkParameterComposerCollection; + @Mock SparkSubmitParameterModifier sparkSubmitParameterModifier; + @Mock AsyncQueryRequestContext asyncQueryRequestContext; + @Mock DispatchQueryRequest dispatchQueryRequest; + + @InjectMocks SparkSubmitParametersBuilder sparkSubmitParametersBuilder; + + @Test + public void testBuildWithoutExtraParameters() { + String params = sparkSubmitParametersBuilder.toString(); + + assertNotNull(params); + } + + @Test + public void testBuildWithExtraParameters() { + String params = sparkSubmitParametersBuilder.extraParameters("--conf A=1").toString(); + + // Assert the conf is included with a space + assertTrue(params.endsWith(" --conf A=1")); + } + + @Test + public void testBuildQueryString() { + String rawQuery = "SHOW tables LIKE \"%\";"; + String expectedQueryInParams = "\"SHOW tables LIKE \\\"%\\\";\""; + String params = sparkSubmitParametersBuilder.query(rawQuery).toString(); + assertTrue(params.contains(expectedQueryInParams)); + } + + @Test + public void testBuildQueryStringNestedQuote() { + String rawQuery = "SELECT '\"1\"'"; + String expectedQueryInParams = "\"SELECT '\\\"1\\\"'\""; + String params = sparkSubmitParametersBuilder.query(rawQuery).toString(); + assertTrue(params.contains(expectedQueryInParams)); + } + + @Test + public void testBuildQueryStringSpecialCharacter() { + String rawQuery = "SELECT '{\"test ,:+\\\"inner\\\"/\\|?#><\"}'"; + String expectedQueryInParams = "SELECT '{\\\"test ,:+\\\\\\\"inner\\\\\\\"/\\\\|?#><\\\"}'"; + String params = sparkSubmitParametersBuilder.query(rawQuery).toString(); + assertTrue(params.contains(expectedQueryInParams)); + } + + @Test + public void testClassName() { + String params = sparkSubmitParametersBuilder.className("CLASS_NAME").toString(); + assertTrue(params.contains("--class CLASS_NAME")); + } + + @Test + public void testClusterName() { + String params = sparkSubmitParametersBuilder.clusterName("CLUSTER_NAME").toString(); + assertTrue(params.contains("spark.emr-serverless.driverEnv.FLINT_CLUSTER_NAME=CLUSTER_NAME")); + assertTrue(params.contains("spark.executorEnv.FLINT_CLUSTER_NAME=CLUSTER_NAME")); + } + + @Test + public void testOverrideConfigItem() { + SparkSubmitParameters params = sparkSubmitParametersBuilder.getSparkSubmitParameters(); + params.setConfigItem(SPARK_JARS_KEY, "Overridden"); + String result = params.toString(); + + assertEquals("Overridden", params.getConfigItem(SPARK_JARS_KEY)); + assertTrue(result.contains(String.format("%s=Overridden", SPARK_JARS_KEY))); + } + + @Test + public void testDeleteConfigItem() { + SparkSubmitParameters params = sparkSubmitParametersBuilder.getSparkSubmitParameters(); + params.deleteConfigItem(HADOOP_CATALOG_CREDENTIALS_PROVIDER_FACTORY_KEY); + String result = params.toString(); + + assertFalse(result.contains(HADOOP_CATALOG_CREDENTIALS_PROVIDER_FACTORY_KEY)); + } + + @Test + public void testAddConfigItem() { + SparkSubmitParameters params = sparkSubmitParametersBuilder.getSparkSubmitParameters(); + params.setConfigItem("AdditionalKey", "Value"); + String result = params.toString(); + + assertTrue(result.contains("AdditionalKey=Value")); + } + + @Test + public void testStructuredStreaming() { + SparkSubmitParameters params = + sparkSubmitParametersBuilder.structuredStreaming(true).getSparkSubmitParameters(); + String result = params.toString(); + + assertTrue(result.contains("spark.flint.job.type=streaming")); + } + + @Test + public void testNonStructuredStreaming() { + SparkSubmitParameters params = + sparkSubmitParametersBuilder.structuredStreaming(false).getSparkSubmitParameters(); + String result = params.toString(); + + assertFalse(result.contains("spark.flint.job.type=streaming")); + } + + @Test + public void testSessionExecution() { + SparkSubmitParameters params = + sparkSubmitParametersBuilder + .sessionExecution("SESSION_ID", "DATASOURCE_NAME") + .getSparkSubmitParameters(); + String result = params.toString(); + + assertTrue( + result.contains("spark.flint.job.requestIndex=.query_execution_request_datasource_name")); + assertTrue(result.contains("spark.flint.job.sessionId=SESSION_ID")); + } + + @Test + public void testAcceptModifier() { + sparkSubmitParametersBuilder.acceptModifier(sparkSubmitParameterModifier); + + verify(sparkSubmitParameterModifier).modifyParameters(sparkSubmitParametersBuilder); + } + + @Test + public void testAcceptNullModifier() { + sparkSubmitParametersBuilder.acceptModifier(null); + } + + @Test + public void testDataSource() { + when(sparkParameterComposerCollection.isComposerRegistered(DataSourceType.S3GLUE)) + .thenReturn(true); + + DataSourceMetadata metadata = + new DataSourceMetadata.Builder() + .setConnector(DataSourceType.S3GLUE) + .setName("name") + .build(); + SparkSubmitParameters params = + sparkSubmitParametersBuilder + .dataSource(metadata, dispatchQueryRequest, asyncQueryRequestContext) + .getSparkSubmitParameters(); + + verify(sparkParameterComposerCollection) + .composeByDataSource(metadata, params, dispatchQueryRequest, asyncQueryRequestContext); + } + + @Test + public void testUnsupportedDataSource() { + when(sparkParameterComposerCollection.isComposerRegistered(DataSourceType.S3GLUE)) + .thenReturn(false); + + DataSourceMetadata metadata = + new DataSourceMetadata.Builder() + .setConnector(DataSourceType.S3GLUE) + .setName("name") + .build(); + assertThrows( + UnsupportedOperationException.class, + () -> + sparkSubmitParametersBuilder.dataSource( + metadata, dispatchQueryRequest, asyncQueryRequestContext)); + } + + @Test + public void testAcceptComposers() { + SparkSubmitParameters params = + sparkSubmitParametersBuilder + .acceptComposers(dispatchQueryRequest, asyncQueryRequestContext) + .getSparkSubmitParameters(); + + verify(sparkParameterComposerCollection) + .compose(params, dispatchQueryRequest, asyncQueryRequestContext); + } +} diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/utils/IDUtilsTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/utils/IDUtilsTest.java new file mode 100644 index 0000000000..1893256c39 --- /dev/null +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/utils/IDUtilsTest.java @@ -0,0 +1,33 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.utils; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.Test; + +class IDUtilsTest { + public static final String DATASOURCE_NAME = "DATASOURCE_NAME"; + + @Test + public void encodeAndDecode() { + String id = IDUtils.encode(DATASOURCE_NAME); + String decoded = IDUtils.decode(id); + + assertTrue(id.length() > IDUtils.PREFIX_LEN); + assertEquals(DATASOURCE_NAME, decoded); + } + + @Test + public void generateUniqueIds() { + String id1 = IDUtils.encode(DATASOURCE_NAME); + String id2 = IDUtils.encode(DATASOURCE_NAME); + + assertNotEquals(id1, id2); + } +} diff --git a/spark/src/test/java/org/opensearch/sql/spark/utils/MockTimeProvider.java b/async-query-core/src/test/java/org/opensearch/sql/spark/utils/MockTimeProvider.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/utils/MockTimeProvider.java rename to async-query-core/src/test/java/org/opensearch/sql/spark/utils/MockTimeProvider.java diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/utils/RealTimeProviderTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/utils/RealTimeProviderTest.java new file mode 100644 index 0000000000..7eb5a56cfe --- /dev/null +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/utils/RealTimeProviderTest.java @@ -0,0 +1,19 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.utils; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.Test; + +class RealTimeProviderTest { + @Test + public void testCurrentEpochMillis() { + RealTimeProvider realTimeProvider = new RealTimeProvider(); + + assertTrue(realTimeProvider.currentEpochMillis() > 0); + } +} diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/utils/SQLQueryUtilsTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/utils/SQLQueryUtilsTest.java new file mode 100644 index 0000000000..881ad0e56a --- /dev/null +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/utils/SQLQueryUtilsTest.java @@ -0,0 +1,487 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.utils; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.opensearch.sql.spark.utils.SQLQueryUtilsTest.IndexQuery.index; +import static org.opensearch.sql.spark.utils.SQLQueryUtilsTest.IndexQuery.mv; +import static org.opensearch.sql.spark.utils.SQLQueryUtilsTest.IndexQuery.skippingIndex; + +import java.util.List; +import lombok.Getter; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.datasource.model.DataSource; +import org.opensearch.sql.spark.dispatcher.model.FullyQualifiedTableName; +import org.opensearch.sql.spark.dispatcher.model.IndexQueryActionType; +import org.opensearch.sql.spark.dispatcher.model.IndexQueryDetails; +import org.opensearch.sql.spark.flint.FlintIndexType; + +@ExtendWith(MockitoExtension.class) +public class SQLQueryUtilsTest { + + @Mock private DataSource dataSource; + + @Test + void testExtractionOfTableNameFromSQLQueries() { + String sqlQuery = "select * from my_glue.default.http_logs"; + FullyQualifiedTableName fullyQualifiedTableName = + SQLQueryUtils.extractFullyQualifiedTableNames(sqlQuery).get(0); + assertFalse(SQLQueryUtils.isFlintExtensionQuery(sqlQuery)); + assertFullyQualifiedTableName("my_glue", "default", "http_logs", fullyQualifiedTableName); + + sqlQuery = "select * from my_glue.db.http_logs"; + assertFalse(SQLQueryUtils.isFlintExtensionQuery(sqlQuery)); + fullyQualifiedTableName = SQLQueryUtils.extractFullyQualifiedTableNames(sqlQuery).get(0); + assertFullyQualifiedTableName("my_glue", "db", "http_logs", fullyQualifiedTableName); + + sqlQuery = "select * from my_glue.http_logs"; + fullyQualifiedTableName = SQLQueryUtils.extractFullyQualifiedTableNames(sqlQuery).get(0); + assertFalse(SQLQueryUtils.isFlintExtensionQuery(sqlQuery)); + assertFullyQualifiedTableName(null, "my_glue", "http_logs", fullyQualifiedTableName); + + sqlQuery = "select * from http_logs"; + fullyQualifiedTableName = SQLQueryUtils.extractFullyQualifiedTableNames(sqlQuery).get(0); + assertFalse(SQLQueryUtils.isFlintExtensionQuery(sqlQuery)); + assertFullyQualifiedTableName(null, null, "http_logs", fullyQualifiedTableName); + + sqlQuery = "DROP TABLE myS3.default.alb_logs"; + fullyQualifiedTableName = SQLQueryUtils.extractFullyQualifiedTableNames(sqlQuery).get(0); + assertFalse(SQLQueryUtils.isFlintExtensionQuery(sqlQuery)); + assertFullyQualifiedTableName("myS3", "default", "alb_logs", fullyQualifiedTableName); + + sqlQuery = "DESCRIBE TABLE myS3.default.alb_logs"; + fullyQualifiedTableName = SQLQueryUtils.extractFullyQualifiedTableNames(sqlQuery).get(0); + assertFalse(SQLQueryUtils.isFlintExtensionQuery(sqlQuery)); + assertFullyQualifiedTableName("myS3", "default", "alb_logs", fullyQualifiedTableName); + + sqlQuery = + "CREATE EXTERNAL TABLE\n" + + "myS3.default.alb_logs\n" + + "[ PARTITIONED BY (col_name [, … ] ) ]\n" + + "[ ROW FORMAT DELIMITED row_format ]\n" + + "STORED AS file_format\n" + + "LOCATION { 's3://bucket/folder/' }"; + fullyQualifiedTableName = SQLQueryUtils.extractFullyQualifiedTableNames(sqlQuery).get(0); + assertFalse(SQLQueryUtils.isFlintExtensionQuery(sqlQuery)); + assertFullyQualifiedTableName("myS3", "default", "alb_logs", fullyQualifiedTableName); + } + + @Test + void testMultipleTables() { + String[] sqlQueries = { + "SELECT * FROM my_glue.default.http_logs, my_glue.default.access_logs", + "SELECT * FROM my_glue.default.http_logs LEFT JOIN my_glue.default.access_logs", + "SELECT table1.id, table2.id FROM my_glue.default.http_logs table1 LEFT OUTER JOIN" + + " (SELECT * FROM my_glue.default.access_logs) table2 ON table1.tag = table2.tag", + "SELECT table1.id, table2.id FROM my_glue.default.http_logs FOR VERSION AS OF 1 table1" + + " LEFT OUTER JOIN" + + " (SELECT * FROM my_glue.default.access_logs) table2" + + " ON table1.tag = table2.tag" + }; + + for (String sqlQuery : sqlQueries) { + List fullyQualifiedTableNames = + SQLQueryUtils.extractFullyQualifiedTableNames(sqlQuery); + + assertFalse(SQLQueryUtils.isFlintExtensionQuery(sqlQuery)); + assertEquals(2, fullyQualifiedTableNames.size()); + assertFullyQualifiedTableName( + "my_glue", "default", "http_logs", fullyQualifiedTableNames.get(0)); + assertFullyQualifiedTableName( + "my_glue", "default", "access_logs", fullyQualifiedTableNames.get(1)); + } + } + + @Test + void testMultipleTablesWithJoin() { + String sqlQuery = + "select * from my_glue.default.http_logs LEFT JOIN my_glue.default.access_logs"; + + List fullyQualifiedTableNames = + SQLQueryUtils.extractFullyQualifiedTableNames(sqlQuery); + + assertFalse(SQLQueryUtils.isFlintExtensionQuery(sqlQuery)); + assertFullyQualifiedTableName( + "my_glue", "default", "http_logs", fullyQualifiedTableNames.get(0)); + assertFullyQualifiedTableName( + "my_glue", "default", "access_logs", fullyQualifiedTableNames.get(1)); + } + + @Test + void testNoFullyQualifiedTableName() { + String sqlQuery = "SHOW tables"; + + List fullyQualifiedTableNames = + SQLQueryUtils.extractFullyQualifiedTableNames(sqlQuery); + + assertEquals(0, fullyQualifiedTableNames.size()); + } + + @Test + void testExtractionFromFlintSkippingIndexQueries() { + String[] createSkippingIndexQueries = { + "CREATE SKIPPING INDEX ON myS3.default.alb_logs (l_orderkey VALUE_SET)", + "CREATE SKIPPING INDEX IF NOT EXISTS" + + " ON myS3.default.alb_logs (l_orderkey VALUE_SET) " + + " WITH (auto_refresh = true)", + "CREATE SKIPPING INDEX ON myS3.default.alb_logs(l_orderkey VALUE_SET)" + + " WITH (auto_refresh = true)", + "CREATE SKIPPING INDEX ON myS3.default.alb_logs(l_orderkey VALUE_SET) " + + " WHERE elb_status_code = 500 " + + " WITH (auto_refresh = true)", + "DROP SKIPPING INDEX ON myS3.default.alb_logs", + "ALTER SKIPPING INDEX ON myS3.default.alb_logs WITH (auto_refresh = false)", + }; + + for (String query : createSkippingIndexQueries) { + assertTrue(SQLQueryUtils.isFlintExtensionQuery(query), "Failed query: " + query); + + IndexQueryDetails indexQueryDetails = SQLQueryUtils.extractIndexDetails(query); + FullyQualifiedTableName fullyQualifiedTableName = + indexQueryDetails.getFullyQualifiedTableName(); + + assertNull(indexQueryDetails.getIndexName()); + assertFullyQualifiedTableName("myS3", "default", "alb_logs", fullyQualifiedTableName); + assertEquals( + "flint_mys3_default_alb_logs_skipping_index", indexQueryDetails.openSearchIndexName()); + } + } + + @Test + void testExtractionFromFlintCoveringIndexQueries() { + String[] coveringIndexQueries = { + "CREATE INDEX elb_and_requestUri ON myS3.default.alb_logs(l_orderkey, l_quantity)", + "CREATE INDEX IF NOT EXISTS elb_and_requestUri " + + " ON myS3.default.alb_logs(l_orderkey, l_quantity) " + + " WITH (auto_refresh = true)", + "CREATE INDEX elb_and_requestUri ON myS3.default.alb_logs(l_orderkey, l_quantity)" + + " WITH (auto_refresh = true)", + "CREATE INDEX elb_and_requestUri ON myS3.default.alb_logs(l_orderkey, l_quantity) " + + " WHERE elb_status_code = 500 " + + " WITH (auto_refresh = true)", + "DROP INDEX elb_and_requestUri ON myS3.default.alb_logs", + "ALTER INDEX elb_and_requestUri ON myS3.default.alb_logs WITH (auto_refresh = false)" + }; + + for (String query : coveringIndexQueries) { + assertTrue(SQLQueryUtils.isFlintExtensionQuery(query), "Failed query: " + query); + + IndexQueryDetails indexQueryDetails = SQLQueryUtils.extractIndexDetails(query); + FullyQualifiedTableName fullyQualifiedTableName = + indexQueryDetails.getFullyQualifiedTableName(); + + assertEquals("elb_and_requestUri", indexQueryDetails.getIndexName()); + assertFullyQualifiedTableName("myS3", "default", "alb_logs", fullyQualifiedTableName); + assertEquals( + "flint_mys3_default_alb_logs_elb_and_requesturi_index", + indexQueryDetails.openSearchIndexName()); + } + } + + @Test + void testExtractionFromCreateMVQuery() { + String mvQuery = "select * from my_glue.default.logs"; + String query = "CREATE MATERIALIZED VIEW mv_1 AS " + mvQuery + " WITH (auto_refresh = true)"; + + assertTrue(SQLQueryUtils.isFlintExtensionQuery(query)); + IndexQueryDetails indexQueryDetails = SQLQueryUtils.extractIndexDetails(query); + assertNull(indexQueryDetails.getIndexName()); + assertNull(indexQueryDetails.getFullyQualifiedTableName()); + assertEquals(mvQuery, indexQueryDetails.getMvQuery()); + assertEquals("mv_1", indexQueryDetails.getMvName()); + assertEquals("flint_mv_1", indexQueryDetails.openSearchIndexName()); + } + + @Test + void testExtractionFromFlintMVQuery() { + String[] mvQueries = { + "DROP MATERIALIZED VIEW mv_1", "ALTER MATERIALIZED VIEW mv_1 WITH (auto_refresh = false)", + }; + + for (String query : mvQueries) { + assertTrue(SQLQueryUtils.isFlintExtensionQuery(query)); + + IndexQueryDetails indexQueryDetails = SQLQueryUtils.extractIndexDetails(query); + FullyQualifiedTableName fullyQualifiedTableName = + indexQueryDetails.getFullyQualifiedTableName(); + + assertNull(indexQueryDetails.getIndexName()); + assertNull(fullyQualifiedTableName); + assertNull(indexQueryDetails.getMvQuery()); + assertEquals("mv_1", indexQueryDetails.getMvName()); + assertEquals("flint_mv_1", indexQueryDetails.openSearchIndexName()); + } + } + + @Test + void testDescSkippingIndex() { + String descSkippingIndex = "DESC SKIPPING INDEX ON mys3.default.http_logs"; + + assertTrue(SQLQueryUtils.isFlintExtensionQuery(descSkippingIndex)); + IndexQueryDetails indexDetails = SQLQueryUtils.extractIndexDetails(descSkippingIndex); + FullyQualifiedTableName fullyQualifiedTableName = indexDetails.getFullyQualifiedTableName(); + + assertNull(indexDetails.getIndexName()); + assertNotNull(fullyQualifiedTableName); + assertEquals(FlintIndexType.SKIPPING, indexDetails.getIndexType()); + assertEquals(IndexQueryActionType.DESCRIBE, indexDetails.getIndexQueryActionType()); + assertEquals("flint_mys3_default_http_logs_skipping_index", indexDetails.openSearchIndexName()); + } + + @Test + void testDescCoveringIndex() { + String descCoveringIndex = "DESC INDEX cv1 ON mys3.default.http_logs"; + + assertTrue(SQLQueryUtils.isFlintExtensionQuery(descCoveringIndex)); + IndexQueryDetails indexDetails = SQLQueryUtils.extractIndexDetails(descCoveringIndex); + FullyQualifiedTableName fullyQualifiedTableName = indexDetails.getFullyQualifiedTableName(); + + assertEquals("cv1", indexDetails.getIndexName()); + assertNotNull(fullyQualifiedTableName); + assertEquals(FlintIndexType.COVERING, indexDetails.getIndexType()); + assertEquals(IndexQueryActionType.DESCRIBE, indexDetails.getIndexQueryActionType()); + assertEquals("flint_mys3_default_http_logs_cv1_index", indexDetails.openSearchIndexName()); + } + + @Test + void testDescMaterializedView() { + String descMv = "DESC MATERIALIZED VIEW mv1"; + + assertTrue(SQLQueryUtils.isFlintExtensionQuery(descMv)); + IndexQueryDetails indexDetails = SQLQueryUtils.extractIndexDetails(descMv); + FullyQualifiedTableName fullyQualifiedTableName = indexDetails.getFullyQualifiedTableName(); + + assertNull(indexDetails.getIndexName()); + assertEquals("mv1", indexDetails.getMvName()); + assertNull(fullyQualifiedTableName); + assertEquals(FlintIndexType.MATERIALIZED_VIEW, indexDetails.getIndexType()); + assertEquals(IndexQueryActionType.DESCRIBE, indexDetails.getIndexQueryActionType()); + assertEquals("flint_mv1", indexDetails.openSearchIndexName()); + } + + @Test + void testShowIndex() { + String showCoveringIndex = "SHOW INDEX ON myS3.default.http_logs"; + + assertTrue(SQLQueryUtils.isFlintExtensionQuery(showCoveringIndex)); + IndexQueryDetails indexDetails = SQLQueryUtils.extractIndexDetails(showCoveringIndex); + FullyQualifiedTableName fullyQualifiedTableName = indexDetails.getFullyQualifiedTableName(); + + assertNull(indexDetails.getIndexName()); + assertNull(indexDetails.getMvName()); + assertNotNull(fullyQualifiedTableName); + assertEquals(FlintIndexType.COVERING, indexDetails.getIndexType()); + assertEquals(IndexQueryActionType.SHOW, indexDetails.getIndexQueryActionType()); + assertNull(indexDetails.openSearchIndexName()); + } + + @Test + void testShowMaterializedView() { + String showMV = "SHOW MATERIALIZED VIEW IN my_glue.default"; + + assertTrue(SQLQueryUtils.isFlintExtensionQuery(showMV)); + IndexQueryDetails indexDetails = SQLQueryUtils.extractIndexDetails(showMV); + FullyQualifiedTableName fullyQualifiedTableName = indexDetails.getFullyQualifiedTableName(); + + assertNull(indexDetails.getIndexName()); + assertNull(indexDetails.getMvName()); + assertNull(fullyQualifiedTableName); + assertEquals(FlintIndexType.MATERIALIZED_VIEW, indexDetails.getIndexType()); + assertEquals(IndexQueryActionType.SHOW, indexDetails.getIndexQueryActionType()); + assertNull(indexDetails.openSearchIndexName()); + } + + @Test + void testRefreshIndex() { + String refreshSkippingIndex = "REFRESH SKIPPING INDEX ON mys3.default.http_logs"; + assertTrue(SQLQueryUtils.isFlintExtensionQuery(refreshSkippingIndex)); + IndexQueryDetails indexDetails = SQLQueryUtils.extractIndexDetails(refreshSkippingIndex); + FullyQualifiedTableName fullyQualifiedTableName = indexDetails.getFullyQualifiedTableName(); + assertNull(indexDetails.getIndexName()); + assertNotNull(fullyQualifiedTableName); + assertEquals(FlintIndexType.SKIPPING, indexDetails.getIndexType()); + assertEquals(IndexQueryActionType.REFRESH, indexDetails.getIndexQueryActionType()); + + String refreshCoveringIndex = "REFRESH INDEX cv1 ON mys3.default.http_logs"; + assertTrue(SQLQueryUtils.isFlintExtensionQuery(refreshCoveringIndex)); + indexDetails = SQLQueryUtils.extractIndexDetails(refreshCoveringIndex); + fullyQualifiedTableName = indexDetails.getFullyQualifiedTableName(); + assertEquals("cv1", indexDetails.getIndexName()); + assertNotNull(fullyQualifiedTableName); + assertEquals(FlintIndexType.COVERING, indexDetails.getIndexType()); + assertEquals(IndexQueryActionType.REFRESH, indexDetails.getIndexQueryActionType()); + + String refreshMV = "REFRESH MATERIALIZED VIEW mv1"; + assertTrue(SQLQueryUtils.isFlintExtensionQuery(refreshMV)); + indexDetails = SQLQueryUtils.extractIndexDetails(refreshMV); + fullyQualifiedTableName = indexDetails.getFullyQualifiedTableName(); + assertNull(indexDetails.getIndexName()); + assertEquals("mv1", indexDetails.getMvName()); + assertNull(fullyQualifiedTableName); + assertEquals(FlintIndexType.MATERIALIZED_VIEW, indexDetails.getIndexType()); + assertEquals(IndexQueryActionType.REFRESH, indexDetails.getIndexQueryActionType()); + } + + /** https://github.com/opensearch-project/sql/issues/2206 */ + @Test + void testAutoRefresh() { + assertFalse( + SQLQueryUtils.extractIndexDetails(skippingIndex().getQuery()) + .getFlintIndexOptions() + .autoRefresh()); + + assertFalse( + SQLQueryUtils.extractIndexDetails( + skippingIndex().withProperty("auto_refresh", "false").getQuery()) + .getFlintIndexOptions() + .autoRefresh()); + + assertTrue( + SQLQueryUtils.extractIndexDetails( + skippingIndex().withProperty("auto_refresh", "true").getQuery()) + .getFlintIndexOptions() + .autoRefresh()); + + assertTrue( + SQLQueryUtils.extractIndexDetails( + skippingIndex().withProperty("auto_refresh", "true").withSemicolon().getQuery()) + .getFlintIndexOptions() + .autoRefresh()); + + assertTrue( + SQLQueryUtils.extractIndexDetails( + skippingIndex().withProperty("\"auto_refresh\"", "true").getQuery()) + .getFlintIndexOptions() + .autoRefresh()); + + assertTrue( + SQLQueryUtils.extractIndexDetails( + skippingIndex().withProperty("\"auto_refresh\"", "true").withSemicolon().getQuery()) + .getFlintIndexOptions() + .autoRefresh()); + + assertTrue( + SQLQueryUtils.extractIndexDetails( + skippingIndex().withProperty("\"auto_refresh\"", "\"true\"").getQuery()) + .getFlintIndexOptions() + .autoRefresh()); + + assertTrue( + SQLQueryUtils.extractIndexDetails( + skippingIndex() + .withProperty("\"auto_refresh\"", "\"true\"") + .withSemicolon() + .getQuery()) + .getFlintIndexOptions() + .autoRefresh()); + + assertFalse( + SQLQueryUtils.extractIndexDetails( + skippingIndex().withProperty("auto_refresh", "1").getQuery()) + .getFlintIndexOptions() + .autoRefresh()); + + assertFalse( + SQLQueryUtils.extractIndexDetails(skippingIndex().withProperty("interval", "1").getQuery()) + .getFlintIndexOptions() + .autoRefresh()); + + assertFalse( + SQLQueryUtils.extractIndexDetails( + skippingIndex().withProperty("\"\"", "\"true\"").getQuery()) + .getFlintIndexOptions() + .autoRefresh()); + + assertFalse( + SQLQueryUtils.extractIndexDetails(index().getQuery()).getFlintIndexOptions().autoRefresh()); + + assertFalse( + SQLQueryUtils.extractIndexDetails(index().withProperty("auto_refresh", "false").getQuery()) + .getFlintIndexOptions() + .autoRefresh()); + + assertTrue( + SQLQueryUtils.extractIndexDetails(index().withProperty("auto_refresh", "true").getQuery()) + .getFlintIndexOptions() + .autoRefresh()); + + assertTrue( + SQLQueryUtils.extractIndexDetails( + index().withProperty("auto_refresh", "true").withSemicolon().getQuery()) + .getFlintIndexOptions() + .autoRefresh()); + + assertTrue( + SQLQueryUtils.extractIndexDetails(mv().withProperty("auto_refresh", "true").getQuery()) + .getFlintIndexOptions() + .autoRefresh()); + + assertTrue( + SQLQueryUtils.extractIndexDetails( + mv().withProperty("auto_refresh", "true").withSemicolon().getQuery()) + .getFlintIndexOptions() + .autoRefresh()); + } + + @Test + void testRecoverIndex() { + String refreshSkippingIndex = + "RECOVER INDEX JOB `flint_spark_catalog_default_test_skipping_index`"; + assertTrue(SQLQueryUtils.isFlintExtensionQuery(refreshSkippingIndex)); + IndexQueryDetails indexDetails = SQLQueryUtils.extractIndexDetails(refreshSkippingIndex); + assertEquals(IndexQueryActionType.RECOVER, indexDetails.getIndexQueryActionType()); + } + + @Getter + protected static class IndexQuery { + private String query; + + private IndexQuery(String query) { + this.query = query; + } + + public static IndexQuery skippingIndex() { + return new IndexQuery( + "CREATE SKIPPING INDEX ON myS3.default.alb_logs" + "(l_orderkey VALUE_SET)"); + } + + public static IndexQuery index() { + return new IndexQuery( + "CREATE INDEX elb_and_requestUri ON myS3.default.alb_logs(l_orderkey, " + "l_quantity)"); + } + + public static IndexQuery mv() { + return new IndexQuery("CREATE MATERIALIZED VIEW mv_1 AS select * from my_glue.default.logs"); + } + + public IndexQuery withProperty(String key, String value) { + query = String.format("%s with (%s = %s)", query, key, value); + return this; + } + + public IndexQuery withSemicolon() { + query += ";"; + return this; + } + } + + private void assertFullyQualifiedTableName( + String expectedDatasourceName, + String expectedSchemaName, + String expectedTableName, + FullyQualifiedTableName fullyQualifiedTableName) { + assertEquals(expectedDatasourceName, fullyQualifiedTableName.getDatasourceName()); + assertEquals(expectedSchemaName, fullyQualifiedTableName.getSchemaName()); + assertEquals(expectedTableName, fullyQualifiedTableName.getTableName()); + } +} diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/utils/TestUtils.java b/async-query-core/src/test/java/org/opensearch/sql/spark/utils/TestUtils.java new file mode 100644 index 0000000000..4336b13aa9 --- /dev/null +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/utils/TestUtils.java @@ -0,0 +1,17 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.utils; + +import java.io.IOException; +import java.util.Objects; + +public class TestUtils { + public static String getJson(String filename) throws IOException { + ClassLoader classLoader = TestUtils.class.getClassLoader(); + return new String( + Objects.requireNonNull(classLoader.getResourceAsStream(filename)).readAllBytes()); + } +} diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/validator/FunctionTypeTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/validator/FunctionTypeTest.java new file mode 100644 index 0000000000..a5f868421c --- /dev/null +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/validator/FunctionTypeTest.java @@ -0,0 +1,47 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.junit.jupiter.api.Test; + +class FunctionTypeTest { + @Test + public void test() { + assertEquals(FunctionType.AGGREGATE, FunctionType.fromFunctionName("any")); + assertEquals(FunctionType.AGGREGATE, FunctionType.fromFunctionName("variance")); + assertEquals(FunctionType.WINDOW, FunctionType.fromFunctionName("cume_dist")); + assertEquals(FunctionType.WINDOW, FunctionType.fromFunctionName("row_number")); + assertEquals(FunctionType.ARRAY, FunctionType.fromFunctionName("array")); + assertEquals(FunctionType.ARRAY, FunctionType.fromFunctionName("sort_array")); + assertEquals(FunctionType.MAP, FunctionType.fromFunctionName("element_at")); + assertEquals(FunctionType.MAP, FunctionType.fromFunctionName("try_element_at")); + assertEquals(FunctionType.DATE_TIMESTAMP, FunctionType.fromFunctionName("add_months")); + assertEquals(FunctionType.DATE_TIMESTAMP, FunctionType.fromFunctionName("year")); + assertEquals(FunctionType.JSON, FunctionType.fromFunctionName("from_json")); + assertEquals(FunctionType.JSON, FunctionType.fromFunctionName("to_json")); + assertEquals(FunctionType.MATH, FunctionType.fromFunctionName("abs")); + assertEquals(FunctionType.MATH, FunctionType.fromFunctionName("width_bucket")); + assertEquals(FunctionType.STRING, FunctionType.fromFunctionName("ascii")); + assertEquals(FunctionType.STRING, FunctionType.fromFunctionName("upper")); + assertEquals(FunctionType.CONDITIONAL, FunctionType.fromFunctionName("coalesce")); + assertEquals(FunctionType.CONDITIONAL, FunctionType.fromFunctionName("nvl2")); + assertEquals(FunctionType.BITWISE, FunctionType.fromFunctionName("bit_count")); + assertEquals(FunctionType.BITWISE, FunctionType.fromFunctionName("shiftrightunsigned")); + assertEquals(FunctionType.CONVERSION, FunctionType.fromFunctionName("bigint")); + assertEquals(FunctionType.CONVERSION, FunctionType.fromFunctionName("tinyint")); + assertEquals(FunctionType.PREDICATE, FunctionType.fromFunctionName("isnan")); + assertEquals(FunctionType.PREDICATE, FunctionType.fromFunctionName("rlike")); + assertEquals(FunctionType.CSV, FunctionType.fromFunctionName("from_csv")); + assertEquals(FunctionType.CSV, FunctionType.fromFunctionName("to_csv")); + assertEquals(FunctionType.MISC, FunctionType.fromFunctionName("aes_decrypt")); + assertEquals(FunctionType.MISC, FunctionType.fromFunctionName("version")); + assertEquals(FunctionType.GENERATOR, FunctionType.fromFunctionName("explode")); + assertEquals(FunctionType.GENERATOR, FunctionType.fromFunctionName("stack")); + assertEquals(FunctionType.UDF, FunctionType.fromFunctionName("unknown")); + } +} diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/validator/GrammarElementValidatorProviderTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/validator/GrammarElementValidatorProviderTest.java new file mode 100644 index 0000000000..a42c9f7cd5 --- /dev/null +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/validator/GrammarElementValidatorProviderTest.java @@ -0,0 +1,40 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.datasource.model.DataSourceType; + +class GrammarElementValidatorProviderTest { + final S3GlueSQLGrammarElementValidator s3GlueSQLGrammarElementValidator = + new S3GlueSQLGrammarElementValidator(); + final SecurityLakeSQLGrammarElementValidator securityLakeSQLGrammarElementValidator = + new SecurityLakeSQLGrammarElementValidator(); + final DefaultGrammarElementValidator defaultGrammarElementValidator = + new DefaultGrammarElementValidator(); + final GrammarElementValidatorProvider grammarElementValidatorProvider = + new GrammarElementValidatorProvider( + ImmutableMap.of( + DataSourceType.S3GLUE, s3GlueSQLGrammarElementValidator, + DataSourceType.SECURITY_LAKE, securityLakeSQLGrammarElementValidator), + defaultGrammarElementValidator); + + @Test + public void test() { + assertEquals( + s3GlueSQLGrammarElementValidator, + grammarElementValidatorProvider.getValidatorForDatasource(DataSourceType.S3GLUE)); + assertEquals( + securityLakeSQLGrammarElementValidator, + grammarElementValidatorProvider.getValidatorForDatasource(DataSourceType.SECURITY_LAKE)); + assertEquals( + defaultGrammarElementValidator, + grammarElementValidatorProvider.getValidatorForDatasource(DataSourceType.PROMETHEUS)); + } +} diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/validator/PPLQueryValidatorTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/validator/PPLQueryValidatorTest.java new file mode 100644 index 0000000000..8d02bb3c72 --- /dev/null +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/validator/PPLQueryValidatorTest.java @@ -0,0 +1,202 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.when; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.DESCRIBE_COMMAND; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.EXPAND_COMMAND; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.FILLNULL_COMMAND; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.FLATTEN_COMMAND; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.IPADDRESS_FUNCTIONS; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.JOIN_COMMAND; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.JSON_FUNCTIONS; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.LAMBDA_FUNCTIONS; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.LOOKUP_COMMAND; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.PATTERNS_COMMAND; +import static org.opensearch.sql.spark.validator.PPLGrammarElement.SUBQUERY_COMMAND; + +import com.google.common.collect.ImmutableSet; +import java.util.Arrays; +import java.util.Set; +import lombok.AllArgsConstructor; +import lombok.Getter; +import org.antlr.v4.runtime.CommonTokenStream; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.common.antlr.CaseInsensitiveCharStream; +import org.opensearch.sql.datasource.model.DataSourceType; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.SingleStatementContext; + +@ExtendWith(MockitoExtension.class) +public class PPLQueryValidatorTest { + @Mock GrammarElementValidatorProvider mockedProvider; + + @InjectMocks PPLQueryValidator pplQueryValidator; + + private static final String SOURCE_PREFIX = "source = t | "; + + private enum TestElement { + FIELDS("fields field1, field1"), + WHERE("where field1=\"success\""), + STATS("stats count(), count(`field1`), min(`field1`), max(`field1`)"), + PARSE("parse `field1` \".*/(?[^/]+$)\""), + PATTERNS("patterns new_field='no_numbers' pattern='[0-9]' message"), + SORT("sort -field1Alias"), + EVAL("eval field2 = `field` * 2"), + RENAME("rename field2 as field1"), + HEAD("head 10"), + GROK("grok email '.+@%{HOSTNAME:host)'"), + TOP("top 2 Field1 by Field2"), + DEDUP("dedup field1"), + JOIN("join on c_custkey = o_custkey orders"), + LOOKUP("lookup account_list mkt_id AS mkt_code REPLACE amount, account_name AS name"), + SUBQUERY("where a > [ source = inner | stats min(c) ]"), + RARE("rare Field1 by Field2"), + TRENDLINE("trendline sma(2, field1) as Field1Alias"), + EVENTSTATS("eventstats sum(field1) by field2"), + FLATTEN("flatten field1"), + FIELD_SUMMARY("fieldsummary includefields=field1 nulls=true"), + FILLNULL("fillnull with 0 in field1"), + EXPAND("expand employee"), + DESCRIBE(false, "describe schema.table"), + STRING_FUNCTIONS("eval cl1Len = LENGTH(col1)"), + DATETIME_FUNCTIONS("eval newDate = ADDDATE(DATE('2020-08-26'), 1)"), + CONDITION_FUNCTIONS("eval field2 = isnull(col1)"), + MATH_FUNCTIONS("eval field2 = ACOS(col1)"), + EXPRESSIONS("where age > (25 + 5)"), + IPADDRESS_FUNCTIONS("where cidrmatch(ip, '192.168.0.1/24')"), + JSON_FUNCTIONS("where cidrmatch(ip, '192.168.0.1/24')"), + LAMBDA_FUNCTIONS("eval array = json_array(1, -1, 2), result = filter(array, x -> x > 0)"), + CRYPTO_FUNCTIONS("eval field1 = MD5('hello')"); + + @Getter private final String[] queries; + + TestElement(String... queries) { + this.queries = addPrefix(queries); + } + + // For describe + TestElement(boolean addPrefix, String... queries) { + this.queries = addPrefix ? addPrefix(queries) : queries; + } + + private String[] addPrefix(String... queries) { + return Arrays.stream(queries).map(query -> SOURCE_PREFIX + query).toArray(String[]::new); + } + } + + @Test + void testAllowAllByDefault() { + when(mockedProvider.getValidatorForDatasource(any())) + .thenReturn(new DefaultGrammarElementValidator()); + VerifyValidator v = new VerifyValidator(pplQueryValidator, DataSourceType.SPARK); + Arrays.stream(PPLQueryValidatorTest.TestElement.values()).forEach(v::ok); + } + + private static class TestPPLGrammarElementValidator extends DenyListGrammarElementValidator { + private static final Set DENY_LIST = + ImmutableSet.builder() + .add( + PATTERNS_COMMAND, + JOIN_COMMAND, + LOOKUP_COMMAND, + SUBQUERY_COMMAND, + FLATTEN_COMMAND, + FILLNULL_COMMAND, + EXPAND_COMMAND, + DESCRIBE_COMMAND, + IPADDRESS_FUNCTIONS, + JSON_FUNCTIONS, + LAMBDA_FUNCTIONS) + .build(); + + public TestPPLGrammarElementValidator() { + super(DENY_LIST); + } + } + + @Test + void testCwlValidator() { + when(mockedProvider.getValidatorForDatasource(any())) + .thenReturn(new TestPPLGrammarElementValidator()); + VerifyValidator v = new VerifyValidator(pplQueryValidator, DataSourceType.SPARK); + + v.ok(TestElement.FIELDS); + v.ok(TestElement.WHERE); + v.ok(TestElement.STATS); + v.ok(TestElement.PARSE); + v.ng(TestElement.PATTERNS); + v.ok(TestElement.SORT); + v.ok(TestElement.EVAL); + v.ok(TestElement.RENAME); + v.ok(TestElement.HEAD); + v.ok(TestElement.GROK); + v.ok(TestElement.TOP); + v.ok(TestElement.DEDUP); + v.ng(TestElement.JOIN); + v.ng(TestElement.LOOKUP); + v.ng(TestElement.SUBQUERY); + v.ok(TestElement.RARE); + v.ok(TestElement.TRENDLINE); + v.ok(TestElement.EVENTSTATS); + v.ng(TestElement.FLATTEN); + v.ok(TestElement.FIELD_SUMMARY); + v.ng(TestElement.FILLNULL); + v.ng(TestElement.EXPAND); + v.ng(TestElement.DESCRIBE); + v.ok(TestElement.STRING_FUNCTIONS); + v.ok(TestElement.DATETIME_FUNCTIONS); + v.ok(TestElement.CONDITION_FUNCTIONS); + v.ok(TestElement.MATH_FUNCTIONS); + v.ok(TestElement.EXPRESSIONS); + v.ng(TestElement.IPADDRESS_FUNCTIONS); + v.ng(TestElement.JSON_FUNCTIONS); + v.ng(TestElement.LAMBDA_FUNCTIONS); + v.ok(TestElement.CRYPTO_FUNCTIONS); + } + + @AllArgsConstructor + private static class VerifyValidator { + private final PPLQueryValidator validator; + private final DataSourceType dataSourceType; + + public void ok(PPLQueryValidatorTest.TestElement query) { + runValidate(query.getQueries()); + } + + public void ng(PPLQueryValidatorTest.TestElement element) { + Arrays.stream(element.queries) + .forEach( + query -> + assertThrows( + IllegalArgumentException.class, + () -> runValidate(query), + "The query should throw: query=`" + query.toString() + "`")); + } + + void runValidate(String[] queries) { + Arrays.stream(queries).forEach(query -> validator.validate(query, dataSourceType)); + } + + void runValidate(String query) { + validator.validate(query, dataSourceType); + } + + SingleStatementContext getParser(String query) { + org.opensearch.sql.spark.antlr.parser.SqlBaseParser sqlBaseParser = + new org.opensearch.sql.spark.antlr.parser.SqlBaseParser( + new CommonTokenStream( + new org.opensearch.sql.spark.antlr.parser.SqlBaseLexer( + new CaseInsensitiveCharStream(query)))); + return sqlBaseParser.singleStatement(); + } + } +} diff --git a/async-query-core/src/test/java/org/opensearch/sql/spark/validator/SQLQueryValidatorTest.java b/async-query-core/src/test/java/org/opensearch/sql/spark/validator/SQLQueryValidatorTest.java new file mode 100644 index 0000000000..ad73daa37f --- /dev/null +++ b/async-query-core/src/test/java/org/opensearch/sql/spark/validator/SQLQueryValidatorTest.java @@ -0,0 +1,663 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.validator; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.when; + +import java.util.Arrays; +import java.util.UUID; +import lombok.AllArgsConstructor; +import lombok.Getter; +import org.antlr.v4.runtime.CommonTokenStream; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.common.antlr.CaseInsensitiveCharStream; +import org.opensearch.sql.datasource.model.DataSourceType; +import org.opensearch.sql.spark.antlr.parser.SqlBaseLexer; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser; +import org.opensearch.sql.spark.antlr.parser.SqlBaseParser.SingleStatementContext; + +@ExtendWith(MockitoExtension.class) +class SQLQueryValidatorTest { + @Mock GrammarElementValidatorProvider mockedProvider; + + @InjectMocks SQLQueryValidator sqlQueryValidator; + + private enum TestElement { + // DDL Statements + ALTER_DATABASE( + "ALTER DATABASE inventory SET DBPROPERTIES ('Edit-date' = '01/01/2001');", + "ALTER DATABASE dbx.tab1 SET LOCATION '/path/to/part/ways';"), + ALTER_TABLE( + "ALTER TABLE default.StudentInfo PARTITION (age='10') RENAME TO PARTITION (age='15');", + "ALTER TABLE StudentInfo ADD columns (LastName string, DOB timestamp);", + "ALTER TABLE StudentInfo ADD IF NOT EXISTS PARTITION (age=18);", + "ALTER TABLE StudentInfo RENAME COLUMN name TO FirstName;", + "ALTER TABLE StudentInfo RENAME TO newName;", + "ALTER TABLE StudentInfo DROP columns (LastName, DOB);", + "ALTER TABLE StudentInfo ALTER COLUMN FirstName COMMENT \"new comment\";", + "ALTER TABLE StudentInfo REPLACE COLUMNS (name string, ID int COMMENT 'new comment');", + "ALTER TABLE test_tab SET SERDE 'org.apache.LazyBinaryColumnarSerDe';", + "ALTER TABLE StudentInfo DROP IF EXISTS PARTITION (age=18);", + "ALTER TABLE dbx.tab1 PARTITION (a='1', b='2') SET LOCATION '/path/to/part/ways';", + "ALTER TABLE dbx.tab1 RECOVER PARTITIONS;", + "ALTER TABLE dbx.tab1 SET LOCATION '/path/to/part/ways';"), + ALTER_VIEW( + "ALTER VIEW tempdb1.v1 RENAME TO tempdb1.v2;", + "ALTER VIEW tempdb1.v2 AS SELECT * FROM tempdb1.v1;"), + CREATE_DATABASE("CREATE DATABASE IF NOT EXISTS customer_db;\n"), + CREATE_FUNCTION("CREATE FUNCTION simple_udf AS 'SimpleUdf' USING JAR '/tmp/SimpleUdf.jar';"), + CREATE_TABLE( + "CREATE TABLE Student_Dupli like Student;", + "CREATE TABLE student (id INT, name STRING, age INT) USING CSV;", + "CREATE TABLE student_copy USING CSV AS SELECT * FROM student;", + "CREATE TABLE student (id INT, name STRING, age INT);", + "REPLACE TABLE student (id INT, name STRING, age INT) USING CSV;"), + CREATE_VIEW( + "CREATE OR REPLACE VIEW experienced_employee" + + " (ID COMMENT 'Unique identification number', Name)" + + " COMMENT 'View for experienced employees'" + + " AS SELECT id, name FROM all_employee" + + " WHERE working_years > 5;"), + DROP_DATABASE("DROP DATABASE inventory_db CASCADE;"), + DROP_FUNCTION("DROP FUNCTION test_avg;"), + DROP_TABLE("DROP TABLE employeetable;"), + DROP_VIEW("DROP VIEW employeeView;"), + REPAIR_TABLE("REPAIR TABLE t1;"), + TRUNCATE_TABLE("TRUNCATE TABLE Student partition(age=10);"), + + // DML Statements + INSERT_TABLE( + "INSERT INTO target_table SELECT * FROM source_table;", + "INSERT INTO persons REPLACE WHERE ssn = 123456789 SELECT * FROM persons2;", + "INSERT OVERWRITE students VALUES ('Ashua Hill', '456 Erica Ct, Cupertino', 111111);"), + INSERT_OVERWRITE_DIRECTORY( + "INSERT OVERWRITE DIRECTORY '/path/to/output' SELECT * FROM source_table;", + "INSERT OVERWRITE DIRECTORY USING myTable SELECT * FROM source_table;", + "INSERT OVERWRITE LOCAL DIRECTORY '/tmp/destination' STORED AS orc SELECT * FROM" + + " test_table;"), + LOAD("LOAD DATA INPATH '/path/to/data' INTO TABLE target_table;"), + + // Data Retrieval Statements + SELECT("SELECT 1;"), + EXPLAIN("EXPLAIN SELECT * FROM my_table;"), + COMMON_TABLE_EXPRESSION( + "WITH cte AS (SELECT * FROM my_table WHERE age > 30) SELECT * FROM cte;"), + CLUSTER_BY_CLAUSE("SELECT * FROM my_table CLUSTER BY age;"), + DISTRIBUTE_BY_CLAUSE("SELECT * FROM my_table DISTRIBUTE BY name;"), + GROUP_BY_CLAUSE("SELECT name, count(*) FROM my_table GROUP BY name;"), + HAVING_CLAUSE("SELECT name, count(*) FROM my_table GROUP BY name HAVING count(*) > 1;"), + HINTS("SELECT /*+ BROADCAST(my_table) */ * FROM my_table;"), + INLINE_TABLE("SELECT * FROM (VALUES (1, 'a'), (2, 'b')) AS inline_table(id, value);"), + FILE("SELECT * FROM text.`/path/to/file.txt`;"), + INNER_JOIN("SELECT t1.name, t2.age FROM table1 t1 INNER JOIN table2 t2 ON t1.id = t2.id;"), + CROSS_JOIN("SELECT t1.name, t2.age FROM table1 t1 CROSS JOIN table2 t2;"), + LEFT_OUTER_JOIN( + "SELECT t1.name, t2.age FROM table1 t1 LEFT OUTER JOIN table2 t2 ON t1.id = t2.id;"), + LEFT_SEMI_JOIN("SELECT t1.name FROM table1 t1 LEFT SEMI JOIN table2 t2 ON t1.id = t2.id;"), + RIGHT_OUTER_JOIN( + "SELECT t1.name, t2.age FROM table1 t1 RIGHT OUTER JOIN table2 t2 ON t1.id = t2.id;"), + FULL_OUTER_JOIN( + "SELECT t1.name, t2.age FROM table1 t1 FULL OUTER JOIN table2 t2 ON t1.id = t2.id;"), + LEFT_ANTI_JOIN("SELECT t1.name FROM table1 t1 LEFT ANTI JOIN table2 t2 ON t1.id = t2.id;"), + LIKE_PREDICATE("SELECT * FROM my_table WHERE name LIKE 'A%';"), + LIMIT_CLAUSE("SELECT * FROM my_table LIMIT 10;"), + OFFSET_CLAUSE("SELECT * FROM my_table OFFSET 5;"), + ORDER_BY_CLAUSE("SELECT * FROM my_table ORDER BY age DESC;"), + SET_OPERATORS("SELECT * FROM table1 UNION SELECT * FROM table2;"), + SORT_BY_CLAUSE("SELECT * FROM my_table SORT BY age DESC;"), + TABLESAMPLE("SELECT * FROM my_table TABLESAMPLE(10 PERCENT);"), + // TABLE_VALUED_FUNCTION("SELECT explode(array(10, 20));"), TODO: Need to handle this case + TABLE_VALUED_FUNCTION("SELECT * FROM explode(array(10, 20));"), + WHERE_CLAUSE("SELECT * FROM my_table WHERE age > 30;"), + AGGREGATE_FUNCTION("SELECT count(*) FROM my_table;"), + WINDOW_FUNCTION("SELECT name, age, rank() OVER (ORDER BY age DESC) FROM my_table;"), + CASE_CLAUSE("SELECT name, CASE WHEN age > 30 THEN 'Adult' ELSE 'Young' END FROM my_table;"), + PIVOT_CLAUSE( + "SELECT * FROM (SELECT name, age, gender FROM my_table) PIVOT (COUNT(*) FOR gender IN ('M'," + + " 'F'));"), + UNPIVOT_CLAUSE( + "SELECT name, value, category FROM (SELECT name, 'M' AS gender, age AS male_age, 0 AS" + + " female_age FROM my_table) UNPIVOT (value FOR category IN (male_age, female_age));"), + LATERAL_VIEW_CLAUSE( + "SELECT name, age, exploded_value FROM my_table LATERAL VIEW OUTER EXPLODE(split(comments," + + " ',')) exploded_table AS exploded_value;"), + LATERAL_SUBQUERY( + "SELECT * FROM t1, LATERAL (SELECT * FROM t2 WHERE t1.c1 = t2.c1);", + "SELECT * FROM t1 JOIN LATERAL (SELECT * FROM t2 WHERE t1.c1 = t2.c1);"), + TRANSFORM_CLAUSE( + "SELECT transform(zip_code, name, age) USING 'cat' AS (a, b, c) FROM my_table;"), + + // Auxiliary Statements + ADD_FILE("ADD FILE /tmp/test.txt;"), + ADD_JAR("ADD JAR /path/to/my.jar;"), + ANALYZE_TABLE( + "ANALYZE TABLE my_table COMPUTE STATISTICS;", + "ANALYZE TABLES IN school_db COMPUTE STATISTICS NOSCAN;"), + CACHE_TABLE("CACHE TABLE my_table;"), + CLEAR_CACHE("CLEAR CACHE;"), + DESCRIBE_DATABASE("DESCRIBE DATABASE my_db;"), + DESCRIBE_FUNCTION("DESCRIBE FUNCTION my_function;"), + DESCRIBE_QUERY("DESCRIBE QUERY SELECT * FROM my_table;"), + DESCRIBE_TABLE("DESCRIBE TABLE my_table;"), + LIST_FILE("LIST FILE '/path/to/files';"), + LIST_JAR("LIST JAR;"), + REFRESH("REFRESH;"), + REFRESH_TABLE("REFRESH TABLE my_table;"), + REFRESH_FUNCTION("REFRESH FUNCTION my_function;"), + RESET("RESET;", "RESET spark.abc;", "RESET `key`;"), + SET( + "SET spark.sql.shuffle.partitions=200;", + "SET -v;", + "SET;", + "SET spark.sql.variable.substitute;"), + SHOW_COLUMNS("SHOW COLUMNS FROM my_table;"), + SHOW_CREATE_TABLE("SHOW CREATE TABLE my_table;"), + SHOW_DATABASES("SHOW DATABASES;"), + SHOW_FUNCTIONS("SHOW FUNCTIONS;"), + SHOW_PARTITIONS("SHOW PARTITIONS my_table;"), + SHOW_TABLE_EXTENDED("SHOW TABLE EXTENDED LIKE 'my_table';"), + SHOW_TABLES("SHOW TABLES;"), + SHOW_TBLPROPERTIES("SHOW TBLPROPERTIES my_table;"), + SHOW_VIEWS("SHOW VIEWS;"), + UNCACHE_TABLE("UNCACHE TABLE my_table;"), + + // Functions + ARRAY_FUNCTIONS("SELECT array_contains(array(1, 2, 3), 2);"), + MAP_FUNCTIONS("SELECT map_keys(map('a', 1, 'b', 2));"), + DATE_AND_TIMESTAMP_FUNCTIONS("SELECT date_format(current_date(), 'yyyy-MM-dd');"), + JSON_FUNCTIONS("SELECT json_tuple('{\"a\":1, \"b\":2}', 'a', 'b');"), + MATHEMATICAL_FUNCTIONS("SELECT round(3.1415, 2);"), + STRING_FUNCTIONS("SELECT ascii('Hello');"), + BITWISE_FUNCTIONS("SELECT bit_count(42);"), + CONVERSION_FUNCTIONS("SELECT cast('2023-04-01' as date);"), + CONDITIONAL_FUNCTIONS("SELECT if(1 > 0, 'true', 'false');"), + PREDICATE_FUNCTIONS("SELECT isnotnull(1);"), + CSV_FUNCTIONS("SELECT from_csv(array('a', 'b', 'c'), ',');"), + MISC_FUNCTIONS("SELECT current_user();"), + + // Aggregate-like Functions + AGGREGATE_FUNCTIONS("SELECT count(*), max(age), min(age) FROM my_table;"), + WINDOW_FUNCTIONS("SELECT name, age, rank() OVER (ORDER BY age DESC) FROM my_table;"), + + // Generator Functions + GENERATOR_FUNCTIONS("SELECT explode(array(1, 2, 3));"), + + // UDFs (User-Defined Functions) + SCALAR_USER_DEFINED_FUNCTIONS("SELECT my_udf(name) FROM my_table;"), + USER_DEFINED_AGGREGATE_FUNCTIONS("SELECT my_udaf(age) FROM my_table GROUP BY name;"), + INTEGRATION_WITH_HIVE_UDFS_UDAFS_UDTFS("SELECT my_hive_udf(name) FROM my_table;"); + + @Getter private final String[] queries; + + TestElement(String... queries) { + this.queries = queries; + } + } + + @Test + void testAllowAllByDefault() { + when(mockedProvider.getValidatorForDatasource(any())) + .thenReturn(new DefaultGrammarElementValidator()); + VerifyValidator v = new VerifyValidator(sqlQueryValidator, DataSourceType.SPARK); + Arrays.stream(TestElement.values()).forEach(v::ok); + } + + @Test + void testDenyAllValidator() { + when(mockedProvider.getValidatorForDatasource(any())).thenReturn(element -> false); + VerifyValidator v = new VerifyValidator(sqlQueryValidator, DataSourceType.SPARK); + // The elements which doesn't have validation will be accepted. + // That's why there are some ok case + + // DDL Statements + v.ng(TestElement.ALTER_DATABASE); + v.ng(TestElement.ALTER_TABLE); + v.ng(TestElement.ALTER_VIEW); + v.ng(TestElement.CREATE_DATABASE); + v.ng(TestElement.CREATE_FUNCTION); + v.ng(TestElement.CREATE_TABLE); + v.ng(TestElement.CREATE_VIEW); + v.ng(TestElement.DROP_DATABASE); + v.ng(TestElement.DROP_FUNCTION); + v.ng(TestElement.DROP_TABLE); + v.ng(TestElement.DROP_VIEW); + v.ng(TestElement.REPAIR_TABLE); + v.ng(TestElement.TRUNCATE_TABLE); + + // DML Statements + v.ng(TestElement.INSERT_TABLE); + v.ng(TestElement.INSERT_OVERWRITE_DIRECTORY); + v.ng(TestElement.LOAD); + + // Data Retrieval + v.ng(TestElement.EXPLAIN); + v.ng(TestElement.COMMON_TABLE_EXPRESSION); + v.ng(TestElement.CLUSTER_BY_CLAUSE); + v.ng(TestElement.DISTRIBUTE_BY_CLAUSE); + v.ok(TestElement.GROUP_BY_CLAUSE); + v.ok(TestElement.HAVING_CLAUSE); + v.ng(TestElement.HINTS); + v.ng(TestElement.INLINE_TABLE); + v.ng(TestElement.FILE); + v.ng(TestElement.INNER_JOIN); + v.ng(TestElement.CROSS_JOIN); + v.ng(TestElement.LEFT_OUTER_JOIN); + v.ng(TestElement.LEFT_SEMI_JOIN); + v.ng(TestElement.RIGHT_OUTER_JOIN); + v.ng(TestElement.FULL_OUTER_JOIN); + v.ng(TestElement.LEFT_ANTI_JOIN); + v.ok(TestElement.LIKE_PREDICATE); + v.ok(TestElement.LIMIT_CLAUSE); + v.ok(TestElement.OFFSET_CLAUSE); + v.ok(TestElement.ORDER_BY_CLAUSE); + v.ok(TestElement.SET_OPERATORS); + v.ok(TestElement.SORT_BY_CLAUSE); + v.ng(TestElement.TABLESAMPLE); + v.ng(TestElement.TABLE_VALUED_FUNCTION); + v.ok(TestElement.WHERE_CLAUSE); + v.ok(TestElement.AGGREGATE_FUNCTION); + v.ok(TestElement.WINDOW_FUNCTION); + v.ok(TestElement.CASE_CLAUSE); + v.ok(TestElement.PIVOT_CLAUSE); + v.ok(TestElement.UNPIVOT_CLAUSE); + v.ng(TestElement.LATERAL_VIEW_CLAUSE); + v.ng(TestElement.LATERAL_SUBQUERY); + v.ng(TestElement.TRANSFORM_CLAUSE); + + // Auxiliary Statements + v.ng(TestElement.ADD_FILE); + v.ng(TestElement.ADD_JAR); + v.ng(TestElement.ANALYZE_TABLE); + v.ng(TestElement.CACHE_TABLE); + v.ng(TestElement.CLEAR_CACHE); + v.ng(TestElement.DESCRIBE_DATABASE); + v.ng(TestElement.DESCRIBE_FUNCTION); + v.ng(TestElement.DESCRIBE_QUERY); + v.ng(TestElement.DESCRIBE_TABLE); + v.ng(TestElement.LIST_FILE); + v.ng(TestElement.LIST_JAR); + v.ng(TestElement.REFRESH); + v.ng(TestElement.REFRESH_TABLE); + v.ng(TestElement.REFRESH_FUNCTION); + v.ng(TestElement.RESET); + v.ng(TestElement.SET); + v.ng(TestElement.SHOW_COLUMNS); + v.ng(TestElement.SHOW_CREATE_TABLE); + v.ng(TestElement.SHOW_DATABASES); + v.ng(TestElement.SHOW_FUNCTIONS); + v.ng(TestElement.SHOW_PARTITIONS); + v.ng(TestElement.SHOW_TABLE_EXTENDED); + v.ng(TestElement.SHOW_TABLES); + v.ng(TestElement.SHOW_TBLPROPERTIES); + v.ng(TestElement.SHOW_VIEWS); + v.ng(TestElement.UNCACHE_TABLE); + + // Functions + v.ok(TestElement.ARRAY_FUNCTIONS); + v.ng(TestElement.MAP_FUNCTIONS); + v.ok(TestElement.DATE_AND_TIMESTAMP_FUNCTIONS); + v.ok(TestElement.JSON_FUNCTIONS); + v.ok(TestElement.MATHEMATICAL_FUNCTIONS); + v.ok(TestElement.STRING_FUNCTIONS); + v.ng(TestElement.BITWISE_FUNCTIONS); + v.ok(TestElement.CONVERSION_FUNCTIONS); + v.ok(TestElement.CONDITIONAL_FUNCTIONS); + v.ok(TestElement.PREDICATE_FUNCTIONS); + v.ng(TestElement.CSV_FUNCTIONS); + v.ng(TestElement.MISC_FUNCTIONS); + + // Aggregate-like Functions + v.ok(TestElement.AGGREGATE_FUNCTIONS); + v.ok(TestElement.WINDOW_FUNCTIONS); + + // Generator Functions + v.ng(TestElement.GENERATOR_FUNCTIONS); + + // UDFs + v.ng(TestElement.SCALAR_USER_DEFINED_FUNCTIONS); + v.ng(TestElement.USER_DEFINED_AGGREGATE_FUNCTIONS); + v.ng(TestElement.INTEGRATION_WITH_HIVE_UDFS_UDAFS_UDTFS); + } + + @Test + void testS3glueQueries() { + when(mockedProvider.getValidatorForDatasource(any())) + .thenReturn(new S3GlueSQLGrammarElementValidator()); + VerifyValidator v = new VerifyValidator(sqlQueryValidator, DataSourceType.S3GLUE); + + // DDL Statements + v.ok(TestElement.ALTER_DATABASE); + v.ok(TestElement.ALTER_TABLE); + v.ng(TestElement.ALTER_VIEW); + v.ok(TestElement.CREATE_DATABASE); + v.ng(TestElement.CREATE_FUNCTION); + v.ok(TestElement.CREATE_TABLE); + v.ng(TestElement.CREATE_VIEW); + v.ok(TestElement.DROP_DATABASE); + v.ng(TestElement.DROP_FUNCTION); + v.ok(TestElement.DROP_TABLE); + v.ng(TestElement.DROP_VIEW); + v.ok(TestElement.REPAIR_TABLE); + v.ok(TestElement.TRUNCATE_TABLE); + + // DML Statements + v.ng(TestElement.INSERT_TABLE); + v.ng(TestElement.INSERT_OVERWRITE_DIRECTORY); + v.ng(TestElement.LOAD); + + // Data Retrieval + v.ok(TestElement.SELECT); + v.ok(TestElement.EXPLAIN); + v.ok(TestElement.COMMON_TABLE_EXPRESSION); + v.ng(TestElement.CLUSTER_BY_CLAUSE); + v.ng(TestElement.DISTRIBUTE_BY_CLAUSE); + v.ok(TestElement.GROUP_BY_CLAUSE); + v.ok(TestElement.HAVING_CLAUSE); + v.ng(TestElement.HINTS); + v.ng(TestElement.INLINE_TABLE); + v.ng(TestElement.FILE); + v.ok(TestElement.INNER_JOIN); + v.ng(TestElement.CROSS_JOIN); + v.ok(TestElement.LEFT_OUTER_JOIN); + v.ng(TestElement.LEFT_SEMI_JOIN); + v.ng(TestElement.RIGHT_OUTER_JOIN); + v.ng(TestElement.FULL_OUTER_JOIN); + v.ng(TestElement.LEFT_ANTI_JOIN); + v.ok(TestElement.LIKE_PREDICATE); + v.ok(TestElement.LIMIT_CLAUSE); + v.ok(TestElement.OFFSET_CLAUSE); + v.ok(TestElement.ORDER_BY_CLAUSE); + v.ok(TestElement.SET_OPERATORS); + v.ok(TestElement.SORT_BY_CLAUSE); + v.ng(TestElement.TABLESAMPLE); + v.ng(TestElement.TABLE_VALUED_FUNCTION); + v.ok(TestElement.WHERE_CLAUSE); + v.ok(TestElement.AGGREGATE_FUNCTION); + v.ok(TestElement.WINDOW_FUNCTION); + v.ok(TestElement.CASE_CLAUSE); + v.ok(TestElement.PIVOT_CLAUSE); + v.ok(TestElement.UNPIVOT_CLAUSE); + v.ok(TestElement.LATERAL_VIEW_CLAUSE); + v.ok(TestElement.LATERAL_SUBQUERY); + v.ng(TestElement.TRANSFORM_CLAUSE); + + // Auxiliary Statements + v.ng(TestElement.ADD_FILE); + v.ng(TestElement.ADD_JAR); + v.ok(TestElement.ANALYZE_TABLE); + v.ok(TestElement.CACHE_TABLE); + v.ok(TestElement.CLEAR_CACHE); + v.ok(TestElement.DESCRIBE_DATABASE); + v.ng(TestElement.DESCRIBE_FUNCTION); + v.ok(TestElement.DESCRIBE_QUERY); + v.ok(TestElement.DESCRIBE_TABLE); + v.ng(TestElement.LIST_FILE); + v.ng(TestElement.LIST_JAR); + v.ng(TestElement.REFRESH); + v.ok(TestElement.REFRESH_TABLE); + v.ng(TestElement.REFRESH_FUNCTION); + v.ng(TestElement.RESET); + v.ng(TestElement.SET); + v.ok(TestElement.SHOW_COLUMNS); + v.ok(TestElement.SHOW_CREATE_TABLE); + v.ok(TestElement.SHOW_DATABASES); + v.ng(TestElement.SHOW_FUNCTIONS); + v.ok(TestElement.SHOW_PARTITIONS); + v.ok(TestElement.SHOW_TABLE_EXTENDED); + v.ok(TestElement.SHOW_TABLES); + v.ok(TestElement.SHOW_TBLPROPERTIES); + v.ng(TestElement.SHOW_VIEWS); + v.ok(TestElement.UNCACHE_TABLE); + + // Functions + v.ok(TestElement.ARRAY_FUNCTIONS); + v.ok(TestElement.MAP_FUNCTIONS); + v.ok(TestElement.DATE_AND_TIMESTAMP_FUNCTIONS); + v.ok(TestElement.JSON_FUNCTIONS); + v.ok(TestElement.MATHEMATICAL_FUNCTIONS); + v.ok(TestElement.STRING_FUNCTIONS); + v.ng(TestElement.BITWISE_FUNCTIONS); + v.ok(TestElement.CONVERSION_FUNCTIONS); + v.ok(TestElement.CONDITIONAL_FUNCTIONS); + v.ok(TestElement.PREDICATE_FUNCTIONS); + v.ok(TestElement.CSV_FUNCTIONS); + v.ng(TestElement.MISC_FUNCTIONS); + + // Aggregate-like Functions + v.ok(TestElement.AGGREGATE_FUNCTIONS); + v.ok(TestElement.WINDOW_FUNCTIONS); + + // Generator Functions + v.ok(TestElement.GENERATOR_FUNCTIONS); + + // UDFs + v.ng(TestElement.SCALAR_USER_DEFINED_FUNCTIONS); + v.ng(TestElement.USER_DEFINED_AGGREGATE_FUNCTIONS); + v.ng(TestElement.INTEGRATION_WITH_HIVE_UDFS_UDAFS_UDTFS); + } + + @Test + void testSecurityLakeQueries() { + when(mockedProvider.getValidatorForDatasource(any())) + .thenReturn(new SecurityLakeSQLGrammarElementValidator()); + VerifyValidator v = new VerifyValidator(sqlQueryValidator, DataSourceType.SECURITY_LAKE); + + // DDL Statements + v.ng(TestElement.ALTER_DATABASE); + v.ng(TestElement.ALTER_TABLE); + v.ng(TestElement.ALTER_VIEW); + v.ng(TestElement.CREATE_DATABASE); + v.ng(TestElement.CREATE_FUNCTION); + v.ng(TestElement.CREATE_TABLE); + v.ng(TestElement.CREATE_VIEW); + v.ng(TestElement.DROP_DATABASE); + v.ng(TestElement.DROP_FUNCTION); + v.ng(TestElement.DROP_TABLE); + v.ng(TestElement.DROP_VIEW); + v.ng(TestElement.REPAIR_TABLE); + v.ng(TestElement.TRUNCATE_TABLE); + + // DML Statements + v.ng(TestElement.INSERT_TABLE); + v.ng(TestElement.INSERT_OVERWRITE_DIRECTORY); + v.ng(TestElement.LOAD); + + // Data Retrieval + v.ok(TestElement.SELECT); + v.ok(TestElement.EXPLAIN); + v.ok(TestElement.COMMON_TABLE_EXPRESSION); + v.ng(TestElement.CLUSTER_BY_CLAUSE); + v.ng(TestElement.DISTRIBUTE_BY_CLAUSE); + v.ok(TestElement.GROUP_BY_CLAUSE); + v.ok(TestElement.HAVING_CLAUSE); + v.ng(TestElement.HINTS); + v.ng(TestElement.INLINE_TABLE); + v.ng(TestElement.FILE); + v.ok(TestElement.INNER_JOIN); + v.ng(TestElement.CROSS_JOIN); + v.ok(TestElement.LEFT_OUTER_JOIN); + v.ng(TestElement.LEFT_SEMI_JOIN); + v.ng(TestElement.RIGHT_OUTER_JOIN); + v.ng(TestElement.FULL_OUTER_JOIN); + v.ng(TestElement.LEFT_ANTI_JOIN); + v.ok(TestElement.LIKE_PREDICATE); + v.ok(TestElement.LIMIT_CLAUSE); + v.ok(TestElement.OFFSET_CLAUSE); + v.ok(TestElement.ORDER_BY_CLAUSE); + v.ok(TestElement.SET_OPERATORS); + v.ok(TestElement.SORT_BY_CLAUSE); + v.ng(TestElement.TABLESAMPLE); + v.ng(TestElement.TABLE_VALUED_FUNCTION); + v.ok(TestElement.WHERE_CLAUSE); + v.ok(TestElement.AGGREGATE_FUNCTION); + v.ok(TestElement.WINDOW_FUNCTION); + v.ok(TestElement.CASE_CLAUSE); + v.ok(TestElement.PIVOT_CLAUSE); + v.ok(TestElement.UNPIVOT_CLAUSE); + v.ok(TestElement.LATERAL_VIEW_CLAUSE); + v.ok(TestElement.LATERAL_SUBQUERY); + v.ng(TestElement.TRANSFORM_CLAUSE); + + // Auxiliary Statements + v.ng(TestElement.ADD_FILE); + v.ng(TestElement.ADD_JAR); + v.ng(TestElement.ANALYZE_TABLE); + v.ng(TestElement.CACHE_TABLE); + v.ng(TestElement.CLEAR_CACHE); + v.ng(TestElement.DESCRIBE_DATABASE); + v.ng(TestElement.DESCRIBE_FUNCTION); + v.ng(TestElement.DESCRIBE_QUERY); + v.ng(TestElement.DESCRIBE_TABLE); + v.ng(TestElement.LIST_FILE); + v.ng(TestElement.LIST_JAR); + v.ng(TestElement.REFRESH); + v.ng(TestElement.REFRESH_TABLE); + v.ng(TestElement.REFRESH_FUNCTION); + v.ng(TestElement.RESET); + v.ng(TestElement.SET); + v.ng(TestElement.SHOW_COLUMNS); + v.ng(TestElement.SHOW_CREATE_TABLE); + v.ng(TestElement.SHOW_DATABASES); + v.ng(TestElement.SHOW_FUNCTIONS); + v.ng(TestElement.SHOW_PARTITIONS); + v.ng(TestElement.SHOW_TABLE_EXTENDED); + v.ng(TestElement.SHOW_TABLES); + v.ng(TestElement.SHOW_TBLPROPERTIES); + v.ng(TestElement.SHOW_VIEWS); + v.ng(TestElement.UNCACHE_TABLE); + + // Functions + v.ok(TestElement.ARRAY_FUNCTIONS); + v.ok(TestElement.MAP_FUNCTIONS); + v.ok(TestElement.DATE_AND_TIMESTAMP_FUNCTIONS); + v.ok(TestElement.JSON_FUNCTIONS); + v.ok(TestElement.MATHEMATICAL_FUNCTIONS); + v.ok(TestElement.STRING_FUNCTIONS); + v.ok(TestElement.BITWISE_FUNCTIONS); + v.ok(TestElement.CONVERSION_FUNCTIONS); + v.ok(TestElement.CONDITIONAL_FUNCTIONS); + v.ok(TestElement.PREDICATE_FUNCTIONS); + v.ng(TestElement.CSV_FUNCTIONS); + v.ng(TestElement.MISC_FUNCTIONS); + + // Aggregate-like Functions + v.ok(TestElement.AGGREGATE_FUNCTIONS); + v.ok(TestElement.WINDOW_FUNCTIONS); + + // Generator Functions + v.ok(TestElement.GENERATOR_FUNCTIONS); + + // UDFs + v.ng(TestElement.SCALAR_USER_DEFINED_FUNCTIONS); + v.ng(TestElement.USER_DEFINED_AGGREGATE_FUNCTIONS); + v.ng(TestElement.INTEGRATION_WITH_HIVE_UDFS_UDAFS_UDTFS); + } + + @Test + void testValidateFlintExtensionQuery() { + assertDoesNotThrow( + () -> + sqlQueryValidator.validateFlintExtensionQuery( + UUID.randomUUID().toString(), DataSourceType.SECURITY_LAKE)); + } + + @Test + void testInvalidIdentifier() { + when(mockedProvider.getValidatorForDatasource(any())).thenReturn(element -> true); + VerifyValidator v = new VerifyValidator(sqlQueryValidator, DataSourceType.SPARK); + v.ng("SELECT a.b.c as a-b-c FROM abc"); + v.ok("SELECT a.b.c as `a-b-c` FROM abc"); + v.ok("SELECT a.b.c as a_b_c FROM abc"); + + v.ng("SELECT a.b.c FROM a-b-c"); + v.ng("SELECT a.b.c FROM a.b-c"); + v.ok("SELECT a.b.c FROM b.c.`a-b-c`"); + v.ok("SELECT a.b.c FROM `a-b-c`"); + } + + @Test + void testUnsupportedType() { + when(mockedProvider.getValidatorForDatasource(any())).thenReturn(element -> true); + VerifyValidator v = new VerifyValidator(sqlQueryValidator, DataSourceType.SPARK); + + v.ng("SELECT cast ( a as DateTime ) FROM tbl"); + v.ok("SELECT cast ( a as DATE ) FROM tbl"); + v.ok("SELECT cast ( a as Date ) FROM tbl"); + v.ok("SELECT cast ( a as Timestamp ) FROM tbl"); + } + + @Test + void testUnsupportedTypedLiteral() { + when(mockedProvider.getValidatorForDatasource(any())).thenReturn(element -> true); + VerifyValidator v = new VerifyValidator(sqlQueryValidator, DataSourceType.SPARK); + + v.ng("SELECT DATETIME '2024-10-11'"); + v.ok("SELECT DATE '2024-10-11'"); + v.ok("SELECT TIMESTAMP '2024-10-11'"); + } + + @Test + void testUnsupportedHiveNativeCommand() { + when(mockedProvider.getValidatorForDatasource(any())).thenReturn(element -> true); + VerifyValidator v = new VerifyValidator(sqlQueryValidator, DataSourceType.SPARK); + + v.ng("CREATE ROLE aaa"); + v.ng("SHOW GRANT"); + v.ng("EXPORT TABLE"); + v.ng("ALTER TABLE aaa NOT CLUSTERED"); + v.ng("START TRANSACTION"); + v.ng("COMMIT"); + v.ng("ROLLBACK"); + v.ng("DFS"); + } + + @AllArgsConstructor + private static class VerifyValidator { + private final SQLQueryValidator validator; + private final DataSourceType dataSourceType; + + public void ok(TestElement query) { + runValidate(query.getQueries()); + } + + public void ok(String query) { + runValidate(query); + } + + public void ng(TestElement query) { + Arrays.stream(query.getQueries()).forEach(this::ng); + } + + public void ng(String query) { + assertThrows( + IllegalArgumentException.class, + () -> runValidate(query), + "The query should throw: query=`" + query.toString() + "`"); + } + + void runValidate(String[] queries) { + Arrays.stream(queries).forEach(query -> validator.validate(query, dataSourceType)); + } + + void runValidate(String query) { + validator.validate(query, dataSourceType); + } + + SingleStatementContext getParser(String query) { + SqlBaseParser sqlBaseParser = + new SqlBaseParser( + new CommonTokenStream(new SqlBaseLexer(new CaseInsensitiveCharStream(query)))); + return sqlBaseParser.singleStatement(); + } + } +} diff --git a/spark/src/test/resources/invalid_response.json b/async-query-core/src/test/resources/invalid_response.json similarity index 100% rename from spark/src/test/resources/invalid_response.json rename to async-query-core/src/test/resources/invalid_response.json diff --git a/async-query-core/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker b/async-query-core/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker new file mode 100644 index 0000000000..ca6ee9cea8 --- /dev/null +++ b/async-query-core/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker @@ -0,0 +1 @@ +mock-maker-inline \ No newline at end of file diff --git a/async-query-core/src/test/resources/select_query_response.json b/async-query-core/src/test/resources/select_query_response.json new file mode 100644 index 0000000000..24cb06b49e --- /dev/null +++ b/async-query-core/src/test/resources/select_query_response.json @@ -0,0 +1,12 @@ +{ + "data": { + "result": [ + "{'1':1}" + ], + "schema": [ + "{'column_name':'1','data_type':'integer'}" + ], + "stepId": "s-123456789", + "applicationId": "application-abc" + } +} diff --git a/async-query/build.gradle b/async-query/build.gradle index ee40e5b366..fba74aa216 100644 --- a/async-query/build.gradle +++ b/async-query/build.gradle @@ -16,8 +16,10 @@ repositories { dependencies { + implementation "org.opensearch:opensearch-job-scheduler-spi:${opensearch_build}" + api project(':core') - implementation project(':async-query-core') + api project(':async-query-core') implementation project(':protocol') implementation project(':datasources') implementation project(':legacy') @@ -26,7 +28,7 @@ dependencies { implementation group: 'org.json', name: 'json', version: '20231013' api group: 'com.amazonaws', name: 'aws-java-sdk-emr', version: "${aws_java_sdk_version}" api group: 'com.amazonaws', name: 'aws-java-sdk-emrserverless', version: "${aws_java_sdk_version}" - implementation group: 'commons-io', name: 'commons-io', version: '2.8.0' + implementation group: 'commons-io', name: 'commons-io', version: '2.14.0' testImplementation(platform("org.junit:junit-bom:5.9.3")) @@ -91,22 +93,14 @@ jacocoTestCoverageVerification { rule { element = 'CLASS' excludes = [ - 'org.opensearch.sql.spark.data.constants.*', - 'org.opensearch.sql.spark.rest.*', - 'org.opensearch.sql.spark.transport.model.*', - 'org.opensearch.sql.spark.asyncquery.model.*', - 'org.opensearch.sql.spark.asyncquery.exceptions.*', - 'org.opensearch.sql.spark.dispatcher.model.*', - 'org.opensearch.sql.spark.flint.FlintIndexType', - // ignore because XContext IOException - 'org.opensearch.sql.spark.execution.statestore.StateStore', - 'org.opensearch.sql.spark.execution.session.SessionModel', - 'org.opensearch.sql.spark.execution.statement.StatementModel', - 'org.opensearch.sql.spark.flint.FlintIndexStateModel', - // TODO: add tests for purging flint indices 'org.opensearch.sql.spark.cluster.ClusterManagerEventListener*', 'org.opensearch.sql.spark.cluster.FlintIndexRetention', - 'org.opensearch.sql.spark.cluster.IndexCleanup' + 'org.opensearch.sql.spark.cluster.IndexCleanup', + // ignore because XContext IOException + 'org.opensearch.sql.spark.execution.statestore.StateStore', + 'org.opensearch.sql.spark.rest.*', + 'org.opensearch.sql.spark.scheduler.parser.OpenSearchScheduleQueryJobRequestParser', + 'org.opensearch.sql.spark.transport.model.*' ] limit { counter = 'LINE' diff --git a/async-query/src/main/java/org/opensearch/sql/asyncquery/DummyConsumer.java b/async-query/src/main/java/org/opensearch/sql/asyncquery/DummyConsumer.java deleted file mode 100644 index 9b1641e559..0000000000 --- a/async-query/src/main/java/org/opensearch/sql/asyncquery/DummyConsumer.java +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.asyncquery; - -import lombok.AllArgsConstructor; - -// This is a dummy class for scaffolding and should be deleted later -@AllArgsConstructor -public class DummyConsumer { - Dummy dummy; - - public String hello() { - return dummy.hello(); - } -} diff --git a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/OpenSearchAsyncQueryJobMetadataStorageService.java b/async-query/src/main/java/org/opensearch/sql/spark/asyncquery/OpenSearchAsyncQueryJobMetadataStorageService.java similarity index 87% rename from spark/src/main/java/org/opensearch/sql/spark/asyncquery/OpenSearchAsyncQueryJobMetadataStorageService.java rename to async-query/src/main/java/org/opensearch/sql/spark/asyncquery/OpenSearchAsyncQueryJobMetadataStorageService.java index 4847c8e00f..eb377a5cff 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/OpenSearchAsyncQueryJobMetadataStorageService.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/asyncquery/OpenSearchAsyncQueryJobMetadataStorageService.java @@ -12,6 +12,7 @@ import org.opensearch.sql.spark.asyncquery.exceptions.AsyncQueryNotFoundException; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.asyncquery.model.QueryState; import org.opensearch.sql.spark.execution.statestore.OpenSearchStateStoreUtil; import org.opensearch.sql.spark.execution.statestore.StateStore; import org.opensearch.sql.spark.execution.xcontent.AsyncQueryJobMetadataXContentSerializer; @@ -39,6 +40,14 @@ public void storeJobMetadata( OpenSearchStateStoreUtil.getIndexName(asyncQueryJobMetadata.getDatasourceName())); } + @Override + public void updateState( + AsyncQueryJobMetadata asyncQueryJobMetadata, + QueryState newState, + AsyncQueryRequestContext asyncQueryRequestContext) { + // NoOp since AsyncQueryJobMetadata record does not store state now + } + private String mapIdToDocumentId(String id) { return "qid" + id; } diff --git a/spark/src/main/java/org/opensearch/sql/spark/cluster/ClusterManagerEventListener.java b/async-query/src/main/java/org/opensearch/sql/spark/cluster/ClusterManagerEventListener.java similarity index 95% rename from spark/src/main/java/org/opensearch/sql/spark/cluster/ClusterManagerEventListener.java rename to async-query/src/main/java/org/opensearch/sql/spark/cluster/ClusterManagerEventListener.java index 6c660f073c..52c829318a 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/cluster/ClusterManagerEventListener.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/cluster/ClusterManagerEventListener.java @@ -30,13 +30,13 @@ public class ClusterManagerEventListener implements LocalNodeClusterManagerListe private Cancellable flintIndexRetentionCron; private Cancellable flintStreamingJobHouseKeeperCron; - private ClusterService clusterService; - private ThreadPool threadPool; - private Client client; - private Clock clock; - private DataSourceService dataSourceService; - private FlintIndexMetadataService flintIndexMetadataService; - private FlintIndexOpFactory flintIndexOpFactory; + private final ClusterService clusterService; + private final ThreadPool threadPool; + private final Client client; + private final Clock clock; + private final DataSourceService dataSourceService; + private final FlintIndexMetadataService flintIndexMetadataService; + private final FlintIndexOpFactory flintIndexOpFactory; private Duration sessionTtlDuration; private Duration resultTtlDuration; private TimeValue streamingJobHouseKeepingInterval; diff --git a/spark/src/main/java/org/opensearch/sql/spark/cluster/FlintIndexRetention.java b/async-query/src/main/java/org/opensearch/sql/spark/cluster/FlintIndexRetention.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/cluster/FlintIndexRetention.java rename to async-query/src/main/java/org/opensearch/sql/spark/cluster/FlintIndexRetention.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/cluster/FlintStreamingJobHouseKeeperTask.java b/async-query/src/main/java/org/opensearch/sql/spark/cluster/FlintStreamingJobHouseKeeperTask.java similarity index 91% rename from spark/src/main/java/org/opensearch/sql/spark/cluster/FlintStreamingJobHouseKeeperTask.java rename to async-query/src/main/java/org/opensearch/sql/spark/cluster/FlintStreamingJobHouseKeeperTask.java index 31b1ecb49c..2dd0a4a7cf 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/cluster/FlintStreamingJobHouseKeeperTask.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/cluster/FlintStreamingJobHouseKeeperTask.java @@ -17,6 +17,7 @@ import org.opensearch.sql.datasources.exceptions.DataSourceNotFoundException; import org.opensearch.sql.legacy.metrics.MetricName; import org.opensearch.sql.legacy.metrics.Metrics; +import org.opensearch.sql.spark.asyncquery.model.NullAsyncQueryRequestContext; import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions; import org.opensearch.sql.spark.flint.FlintIndexMetadata; import org.opensearch.sql.spark.flint.FlintIndexMetadataService; @@ -29,6 +30,8 @@ public class FlintStreamingJobHouseKeeperTask implements Runnable { private final DataSourceService dataSourceService; private final FlintIndexMetadataService flintIndexMetadataService; private final FlintIndexOpFactory flintIndexOpFactory; + private final NullAsyncQueryRequestContext nullAsyncQueryRequestContext = + new NullAsyncQueryRequestContext(); private static final Logger LOGGER = LogManager.getLogger(FlintStreamingJobHouseKeeperTask.class); protected static final AtomicBoolean isRunning = new AtomicBoolean(false); @@ -91,7 +94,9 @@ private void dropAutoRefreshIndex( String autoRefreshIndex, FlintIndexMetadata flintIndexMetadata, String datasourceName) { // When the datasource is deleted. Possibly Replace with VACUUM Operation. LOGGER.info("Attempting to drop auto refresh index: {}", autoRefreshIndex); - flintIndexOpFactory.getDrop(datasourceName).apply(flintIndexMetadata); + flintIndexOpFactory + .getDrop(datasourceName) + .apply(flintIndexMetadata, nullAsyncQueryRequestContext); LOGGER.info("Successfully dropped index: {}", autoRefreshIndex); } @@ -100,7 +105,9 @@ private void alterAutoRefreshIndex( LOGGER.info("Attempting to alter index: {}", autoRefreshIndex); FlintIndexOptions flintIndexOptions = new FlintIndexOptions(); flintIndexOptions.setOption(FlintIndexOptions.AUTO_REFRESH, "false"); - flintIndexOpFactory.getAlter(flintIndexOptions, datasourceName).apply(flintIndexMetadata); + flintIndexOpFactory + .getAlter(flintIndexOptions, datasourceName) + .apply(flintIndexMetadata, nullAsyncQueryRequestContext); LOGGER.info("Successfully altered index: {}", autoRefreshIndex); } @@ -119,7 +126,7 @@ private String getDataSourceName(FlintIndexMetadata flintIndexMetadata) { private Map getAllAutoRefreshIndices() { Map flintIndexMetadataHashMap = - flintIndexMetadataService.getFlintIndexMetadata("flint_*"); + flintIndexMetadataService.getFlintIndexMetadata("flint_*", nullAsyncQueryRequestContext); return flintIndexMetadataHashMap.entrySet().stream() .filter(entry -> entry.getValue().getFlintIndexOptions().autoRefresh()) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); diff --git a/spark/src/main/java/org/opensearch/sql/spark/cluster/IndexCleanup.java b/async-query/src/main/java/org/opensearch/sql/spark/cluster/IndexCleanup.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/cluster/IndexCleanup.java rename to async-query/src/main/java/org/opensearch/sql/spark/cluster/IndexCleanup.java diff --git a/async-query/src/main/java/org/opensearch/sql/spark/config/OpenSearchAsyncQuerySchedulerConfigComposer.java b/async-query/src/main/java/org/opensearch/sql/spark/config/OpenSearchAsyncQuerySchedulerConfigComposer.java new file mode 100644 index 0000000000..28fd4b1b58 --- /dev/null +++ b/async-query/src/main/java/org/opensearch/sql/spark/config/OpenSearchAsyncQuerySchedulerConfigComposer.java @@ -0,0 +1,41 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.config; + +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_JOB_EXTERNAL_SCHEDULER_ENABLED; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_JOB_EXTERNAL_SCHEDULER_INTERVAL; + +import lombok.RequiredArgsConstructor; +import org.opensearch.core.common.Strings; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; +import org.opensearch.sql.spark.parameter.GeneralSparkParameterComposer; +import org.opensearch.sql.spark.parameter.SparkSubmitParameters; + +@RequiredArgsConstructor +public class OpenSearchAsyncQuerySchedulerConfigComposer implements GeneralSparkParameterComposer { + private final Settings settings; + + @Override + public void compose( + SparkSubmitParameters sparkSubmitParameters, + DispatchQueryRequest dispatchQueryRequest, + AsyncQueryRequestContext context) { + Boolean externalSchedulerEnabled = + settings.getSettingValue(Settings.Key.ASYNC_QUERY_EXTERNAL_SCHEDULER_ENABLED); + String externalSchedulerInterval = + settings.getSettingValue(Settings.Key.ASYNC_QUERY_EXTERNAL_SCHEDULER_INTERVAL); + sparkSubmitParameters.setConfigItem( + FLINT_JOB_EXTERNAL_SCHEDULER_ENABLED, String.valueOf(externalSchedulerEnabled)); + if (!Strings.isNullOrEmpty(externalSchedulerInterval)) { + externalSchedulerInterval = + "\"" + externalSchedulerInterval + "\""; // Wrap the value with double quotes + sparkSubmitParameters.setConfigItem( + FLINT_JOB_EXTERNAL_SCHEDULER_INTERVAL, externalSchedulerInterval); + } + } +} diff --git a/async-query/src/main/java/org/opensearch/sql/spark/config/OpenSearchExtraParameterComposer.java b/async-query/src/main/java/org/opensearch/sql/spark/config/OpenSearchExtraParameterComposer.java new file mode 100644 index 0000000000..1925ada46e --- /dev/null +++ b/async-query/src/main/java/org/opensearch/sql/spark/config/OpenSearchExtraParameterComposer.java @@ -0,0 +1,30 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.config; + +import lombok.RequiredArgsConstructor; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; +import org.opensearch.sql.spark.parameter.GeneralSparkParameterComposer; +import org.opensearch.sql.spark.parameter.SparkSubmitParameters; + +/** Load extra parameters from settings and add to Spark submit parameters */ +@RequiredArgsConstructor +public class OpenSearchExtraParameterComposer implements GeneralSparkParameterComposer { + private final SparkExecutionEngineConfigClusterSettingLoader settingLoader; + + @Override + public void compose( + SparkSubmitParameters sparkSubmitParameters, + DispatchQueryRequest dispatchQueryRequest, + AsyncQueryRequestContext context) { + settingLoader + .load() + .ifPresent( + settings -> + sparkSubmitParameters.setExtraParameters(settings.getSparkSubmitParameters())); + } +} diff --git a/async-query/src/main/java/org/opensearch/sql/spark/config/OpenSearchSparkSubmitParameterModifier.java b/async-query/src/main/java/org/opensearch/sql/spark/config/OpenSearchSparkSubmitParameterModifier.java new file mode 100644 index 0000000000..117d161440 --- /dev/null +++ b/async-query/src/main/java/org/opensearch/sql/spark/config/OpenSearchSparkSubmitParameterModifier.java @@ -0,0 +1,20 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.config; + +import lombok.AllArgsConstructor; +import org.opensearch.sql.spark.parameter.SparkSubmitParametersBuilder; + +@AllArgsConstructor +public class OpenSearchSparkSubmitParameterModifier implements SparkSubmitParameterModifier { + + private String extraParameters; + + @Override + public void modifyParameters(SparkSubmitParametersBuilder builder) { + builder.extraParameters(this.extraParameters); + } +} diff --git a/spark/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigClusterSetting.java b/async-query/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigClusterSetting.java similarity index 75% rename from spark/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigClusterSetting.java rename to async-query/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigClusterSetting.java index 0347f5ffc1..f940680c06 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigClusterSetting.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigClusterSetting.java @@ -6,14 +6,20 @@ package org.opensearch.sql.spark.config; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.google.gson.Gson; +import lombok.AllArgsConstructor; +import lombok.Builder; import lombok.Data; -import org.opensearch.sql.utils.SerializeUtils; +import lombok.NoArgsConstructor; /** * This POJO is just for reading stringified json in `plugins.query.executionengine.spark.config` * setting. */ @Data +@Builder +@AllArgsConstructor +@NoArgsConstructor @JsonIgnoreProperties(ignoreUnknown = true) public class SparkExecutionEngineConfigClusterSetting { // optional @@ -27,7 +33,6 @@ public class SparkExecutionEngineConfigClusterSetting { public static SparkExecutionEngineConfigClusterSetting toSparkExecutionEngineConfig( String jsonString) { - return SerializeUtils.buildGson() - .fromJson(jsonString, SparkExecutionEngineConfigClusterSetting.class); + return new Gson().fromJson(jsonString, SparkExecutionEngineConfigClusterSetting.class); } } diff --git a/async-query/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigClusterSettingLoader.java b/async-query/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigClusterSettingLoader.java new file mode 100644 index 0000000000..73b057ca5c --- /dev/null +++ b/async-query/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigClusterSettingLoader.java @@ -0,0 +1,36 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.config; + +import static org.opensearch.sql.common.setting.Settings.Key.SPARK_EXECUTION_ENGINE_CONFIG; + +import java.security.AccessController; +import java.security.PrivilegedAction; +import java.util.Optional; +import lombok.RequiredArgsConstructor; +import org.apache.commons.lang3.StringUtils; +import org.opensearch.sql.common.setting.Settings; + +/** Load SparkExecutionEngineConfigClusterSetting from settings with privilege check. */ +@RequiredArgsConstructor +public class SparkExecutionEngineConfigClusterSettingLoader { + private final Settings settings; + + public Optional load() { + String sparkExecutionEngineConfigSettingString = + this.settings.getSettingValue(SPARK_EXECUTION_ENGINE_CONFIG); + if (!StringUtils.isBlank(sparkExecutionEngineConfigSettingString)) { + return Optional.of( + AccessController.doPrivileged( + (PrivilegedAction) + () -> + SparkExecutionEngineConfigClusterSetting.toSparkExecutionEngineConfig( + sparkExecutionEngineConfigSettingString))); + } else { + return Optional.empty(); + } + } +} diff --git a/async-query/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigSupplierImpl.java b/async-query/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigSupplierImpl.java new file mode 100644 index 0000000000..66ad964ad1 --- /dev/null +++ b/async-query/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigSupplierImpl.java @@ -0,0 +1,40 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.config; + +import static org.opensearch.sql.common.setting.Settings.Key.CLUSTER_NAME; + +import lombok.AllArgsConstructor; +import org.opensearch.cluster.ClusterName; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; + +@AllArgsConstructor +public class SparkExecutionEngineConfigSupplierImpl implements SparkExecutionEngineConfigSupplier { + + private final Settings settings; + private final SparkExecutionEngineConfigClusterSettingLoader settingLoader; + + @Override + public SparkExecutionEngineConfig getSparkExecutionEngineConfig( + AsyncQueryRequestContext asyncQueryRequestContext) { + ClusterName clusterName = settings.getSettingValue(CLUSTER_NAME); + return getBuilderFromSettingsIfAvailable().clusterName(clusterName.value()).build(); + } + + private SparkExecutionEngineConfig.SparkExecutionEngineConfigBuilder + getBuilderFromSettingsIfAvailable() { + return settingLoader + .load() + .map( + setting -> + SparkExecutionEngineConfig.builder() + .applicationId(setting.getApplicationId()) + .executionRoleARN(setting.getExecutionRoleARN()) + .region(setting.getRegion())) + .orElse(SparkExecutionEngineConfig.builder()); + } +} diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/session/OpenSearchSessionConfigSupplier.java b/async-query/src/main/java/org/opensearch/sql/spark/execution/session/OpenSearchSessionConfigSupplier.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/session/OpenSearchSessionConfigSupplier.java rename to async-query/src/main/java/org/opensearch/sql/spark/execution/session/OpenSearchSessionConfigSupplier.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/statestore/FromXContent.java b/async-query/src/main/java/org/opensearch/sql/spark/execution/statestore/FromXContent.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/statestore/FromXContent.java rename to async-query/src/main/java/org/opensearch/sql/spark/execution/statestore/FromXContent.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/statestore/OpenSearchSessionStorageService.java b/async-query/src/main/java/org/opensearch/sql/spark/execution/statestore/OpenSearchSessionStorageService.java similarity index 68% rename from spark/src/main/java/org/opensearch/sql/spark/execution/statestore/OpenSearchSessionStorageService.java rename to async-query/src/main/java/org/opensearch/sql/spark/execution/statestore/OpenSearchSessionStorageService.java index eefc6a9b14..db5ded46b5 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/execution/statestore/OpenSearchSessionStorageService.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/execution/statestore/OpenSearchSessionStorageService.java @@ -7,6 +7,9 @@ import java.util.Optional; import lombok.RequiredArgsConstructor; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.index.engine.VersionConflictEngineException; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.execution.session.SessionModel; import org.opensearch.sql.spark.execution.session.SessionState; @@ -14,6 +17,7 @@ @RequiredArgsConstructor public class OpenSearchSessionStorageService implements SessionStorageService { + private static final Logger LOG = LogManager.getLogger(); private final StateStore stateStore; private final SessionModelXContentSerializer serializer; @@ -21,11 +25,17 @@ public class OpenSearchSessionStorageService implements SessionStorageService { @Override public SessionModel createSession( SessionModel sessionModel, AsyncQueryRequestContext asyncQueryRequestContext) { - return stateStore.create( - sessionModel.getId(), - sessionModel, - SessionModel::of, - OpenSearchStateStoreUtil.getIndexName(sessionModel.getDatasourceName())); + try { + return stateStore.create( + sessionModel.getId(), + sessionModel, + SessionModel::of, + OpenSearchStateStoreUtil.getIndexName(sessionModel.getDatasourceName())); + } catch (VersionConflictEngineException e) { + String errorMsg = "session already exist. " + sessionModel.getSessionId(); + LOG.error(errorMsg); + throw new IllegalStateException(errorMsg); + } } @Override diff --git a/async-query/src/main/java/org/opensearch/sql/spark/execution/statestore/OpenSearchStatementStorageService.java b/async-query/src/main/java/org/opensearch/sql/spark/execution/statestore/OpenSearchStatementStorageService.java new file mode 100644 index 0000000000..527cd24bc8 --- /dev/null +++ b/async-query/src/main/java/org/opensearch/sql/spark/execution/statestore/OpenSearchStatementStorageService.java @@ -0,0 +1,82 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.execution.statestore; + +import java.util.Optional; +import lombok.RequiredArgsConstructor; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.index.engine.DocumentMissingException; +import org.opensearch.index.engine.VersionConflictEngineException; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.execution.statement.StatementModel; +import org.opensearch.sql.spark.execution.statement.StatementState; +import org.opensearch.sql.spark.execution.xcontent.StatementModelXContentSerializer; + +@RequiredArgsConstructor +public class OpenSearchStatementStorageService implements StatementStorageService { + private static final Logger LOG = LogManager.getLogger(); + + private final StateStore stateStore; + private final StatementModelXContentSerializer serializer; + + @Override + public StatementModel createStatement( + StatementModel statementModel, AsyncQueryRequestContext asyncQueryRequestContext) { + try { + return stateStore.create( + statementModel.getId(), + statementModel, + StatementModel::copy, + OpenSearchStateStoreUtil.getIndexName(statementModel.getDatasourceName())); + } catch (VersionConflictEngineException e) { + String errorMsg = "statement already exist. " + statementModel.getStatementId(); + LOG.error(errorMsg); + throw new IllegalStateException(errorMsg); + } + } + + @Override + public Optional getStatement( + String id, String datasourceName, AsyncQueryRequestContext asyncQueryRequestContext) { + return stateStore.get( + id, serializer::fromXContent, OpenSearchStateStoreUtil.getIndexName(datasourceName)); + } + + @Override + public StatementModel updateStatementState( + StatementModel oldStatementModel, + StatementState statementState, + AsyncQueryRequestContext asyncQueryRequestContext) { + try { + return stateStore.updateState( + oldStatementModel, + statementState, + StatementModel::copyWithState, + OpenSearchStateStoreUtil.getIndexName(oldStatementModel.getDatasourceName())); + } catch (DocumentMissingException e) { + String errorMsg = + String.format( + "cancel statement failed. no statement found. statement: %s.", + oldStatementModel.getStatementId()); + LOG.error(errorMsg); + throw new IllegalStateException(errorMsg); + } catch (VersionConflictEngineException e) { + StatementModel statementModel = + getStatement( + oldStatementModel.getId(), + oldStatementModel.getDatasourceName(), + asyncQueryRequestContext) + .orElse(oldStatementModel); + String errorMsg = + String.format( + "cancel statement failed. current statementState: %s " + "statement: %s.", + statementModel.getStatementState(), statementModel.getStatementId()); + LOG.error(errorMsg); + throw new IllegalStateException(errorMsg); + } + } +} diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/statestore/StateStore.java b/async-query/src/main/java/org/opensearch/sql/spark/execution/statestore/StateStore.java similarity index 97% rename from spark/src/main/java/org/opensearch/sql/spark/execution/statestore/StateStore.java rename to async-query/src/main/java/org/opensearch/sql/spark/execution/statestore/StateStore.java index 8d57198277..552c646cbe 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/execution/statestore/StateStore.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/execution/statestore/StateStore.java @@ -67,9 +67,9 @@ */ @RequiredArgsConstructor public class StateStore { - public static String SETTINGS_FILE_NAME = "query_execution_request_settings.yml"; - public static String MAPPING_FILE_NAME = "query_execution_request_mapping.yml"; - public static String ALL_DATASOURCE = "*"; + public static final String SETTINGS_FILE_NAME = "query_execution_request_settings.yml"; + public static final String MAPPING_FILE_NAME = "query_execution_request_mapping.yml"; + public static final String ALL_DATASOURCE = "*"; private static final Logger LOG = LogManager.getLogger(); @@ -237,7 +237,8 @@ private void createIndex(String indexName) { } } - private long count(String indexName, QueryBuilder query) { + @VisibleForTesting + public long count(String indexName, QueryBuilder query) { SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.query(query); searchSourceBuilder.size(0); diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/xcontent/AsyncQueryJobMetadataXContentSerializer.java b/async-query/src/main/java/org/opensearch/sql/spark/execution/xcontent/AsyncQueryJobMetadataXContentSerializer.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/xcontent/AsyncQueryJobMetadataXContentSerializer.java rename to async-query/src/main/java/org/opensearch/sql/spark/execution/xcontent/AsyncQueryJobMetadataXContentSerializer.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/xcontent/FlintIndexStateModelXContentSerializer.java b/async-query/src/main/java/org/opensearch/sql/spark/execution/xcontent/FlintIndexStateModelXContentSerializer.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/xcontent/FlintIndexStateModelXContentSerializer.java rename to async-query/src/main/java/org/opensearch/sql/spark/execution/xcontent/FlintIndexStateModelXContentSerializer.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/xcontent/IndexDMLResultXContentSerializer.java b/async-query/src/main/java/org/opensearch/sql/spark/execution/xcontent/IndexDMLResultXContentSerializer.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/xcontent/IndexDMLResultXContentSerializer.java rename to async-query/src/main/java/org/opensearch/sql/spark/execution/xcontent/IndexDMLResultXContentSerializer.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/xcontent/SessionModelXContentSerializer.java b/async-query/src/main/java/org/opensearch/sql/spark/execution/xcontent/SessionModelXContentSerializer.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/xcontent/SessionModelXContentSerializer.java rename to async-query/src/main/java/org/opensearch/sql/spark/execution/xcontent/SessionModelXContentSerializer.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/xcontent/StatementModelXContentSerializer.java b/async-query/src/main/java/org/opensearch/sql/spark/execution/xcontent/StatementModelXContentSerializer.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/xcontent/StatementModelXContentSerializer.java rename to async-query/src/main/java/org/opensearch/sql/spark/execution/xcontent/StatementModelXContentSerializer.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/xcontent/XContentCommonAttributes.java b/async-query/src/main/java/org/opensearch/sql/spark/execution/xcontent/XContentCommonAttributes.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/xcontent/XContentCommonAttributes.java rename to async-query/src/main/java/org/opensearch/sql/spark/execution/xcontent/XContentCommonAttributes.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/xcontent/XContentSerializer.java b/async-query/src/main/java/org/opensearch/sql/spark/execution/xcontent/XContentSerializer.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/execution/xcontent/XContentSerializer.java rename to async-query/src/main/java/org/opensearch/sql/spark/execution/xcontent/XContentSerializer.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImpl.java b/async-query/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImpl.java similarity index 57% rename from spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImpl.java rename to async-query/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImpl.java index 893b33b39d..38789dd796 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImpl.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImpl.java @@ -5,10 +5,6 @@ package org.opensearch.sql.spark.flint; -import static org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions.AUTO_REFRESH; -import static org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions.CHECKPOINT_LOCATION; -import static org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions.INCREMENTAL_REFRESH; -import static org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions.WATERMARK_DELAY; import static org.opensearch.sql.spark.flint.FlintIndexMetadata.APP_ID; import static org.opensearch.sql.spark.flint.FlintIndexMetadata.ENV_KEY; import static org.opensearch.sql.spark.flint.FlintIndexMetadata.KIND_KEY; @@ -20,19 +16,14 @@ import static org.opensearch.sql.spark.flint.FlintIndexMetadata.SERVERLESS_EMR_JOB_ID; import static org.opensearch.sql.spark.flint.FlintIndexMetadata.SOURCE_KEY; -import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; -import java.util.LinkedHashSet; -import java.util.List; import java.util.Map; -import java.util.Set; import lombok.AllArgsConstructor; -import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.action.admin.indices.mapping.get.GetMappingsResponse; import org.opensearch.client.Client; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions; /** Implementation of {@link FlintIndexMetadataService} */ @@ -42,14 +33,10 @@ public class FlintIndexMetadataServiceImpl implements FlintIndexMetadataService private static final Logger LOGGER = LogManager.getLogger(FlintIndexMetadataServiceImpl.class); private final Client client; - public static final Set ALTER_TO_FULL_REFRESH_ALLOWED_OPTIONS = - new LinkedHashSet<>(Arrays.asList(AUTO_REFRESH, INCREMENTAL_REFRESH)); - public static final Set ALTER_TO_INCREMENTAL_REFRESH_ALLOWED_OPTIONS = - new LinkedHashSet<>( - Arrays.asList(AUTO_REFRESH, INCREMENTAL_REFRESH, WATERMARK_DELAY, CHECKPOINT_LOCATION)); @Override - public Map getFlintIndexMetadata(String indexPattern) { + public Map getFlintIndexMetadata( + String indexPattern, AsyncQueryRequestContext asyncQueryRequestContext) { GetMappingsResponse mappingsResponse = client.admin().indices().prepareGetMappings().setIndices(indexPattern).get(); Map indexMetadataMap = new HashMap<>(); @@ -73,7 +60,10 @@ public Map getFlintIndexMetadata(String indexPattern } @Override - public void updateIndexToManualRefresh(String indexName, FlintIndexOptions flintIndexOptions) { + public void updateIndexToManualRefresh( + String indexName, + FlintIndexOptions flintIndexOptions, + AsyncQueryRequestContext asyncQueryRequestContext) { GetMappingsResponse mappingsResponse = client.admin().indices().prepareGetMappings().setIndices(indexName).get(); Map flintMetadataMap = @@ -82,63 +72,11 @@ public void updateIndexToManualRefresh(String indexName, FlintIndexOptions flint String kind = (String) meta.get("kind"); Map options = (Map) meta.get("options"); Map newOptions = flintIndexOptions.getProvidedOptions(); - validateFlintIndexOptions(kind, options, newOptions); + FlintIndexMetadataValidator.validateFlintIndexOptions(kind, options, newOptions); options.putAll(newOptions); client.admin().indices().preparePutMapping(indexName).setSource(flintMetadataMap).get(); } - private void validateFlintIndexOptions( - String kind, Map existingOptions, Map newOptions) { - if ((newOptions.containsKey(INCREMENTAL_REFRESH) - && Boolean.parseBoolean(newOptions.get(INCREMENTAL_REFRESH))) - || ((!newOptions.containsKey(INCREMENTAL_REFRESH) - && Boolean.parseBoolean((String) existingOptions.get(INCREMENTAL_REFRESH))))) { - validateConversionToIncrementalRefresh(kind, existingOptions, newOptions); - } else { - validateConversionToFullRefresh(newOptions); - } - } - - private void validateConversionToFullRefresh(Map newOptions) { - if (!ALTER_TO_FULL_REFRESH_ALLOWED_OPTIONS.containsAll(newOptions.keySet())) { - throw new IllegalArgumentException( - String.format( - "Altering to full refresh only allows: %s options", - ALTER_TO_FULL_REFRESH_ALLOWED_OPTIONS)); - } - } - - private void validateConversionToIncrementalRefresh( - String kind, Map existingOptions, Map newOptions) { - if (!ALTER_TO_INCREMENTAL_REFRESH_ALLOWED_OPTIONS.containsAll(newOptions.keySet())) { - throw new IllegalArgumentException( - String.format( - "Altering to incremental refresh only allows: %s options", - ALTER_TO_INCREMENTAL_REFRESH_ALLOWED_OPTIONS)); - } - HashMap mergedOptions = new HashMap<>(); - mergedOptions.putAll(existingOptions); - mergedOptions.putAll(newOptions); - List missingAttributes = new ArrayList<>(); - if (!mergedOptions.containsKey(CHECKPOINT_LOCATION) - || StringUtils.isEmpty((String) mergedOptions.get(CHECKPOINT_LOCATION))) { - missingAttributes.add(CHECKPOINT_LOCATION); - } - if (kind.equals("mv") - && (!mergedOptions.containsKey(WATERMARK_DELAY) - || StringUtils.isEmpty((String) mergedOptions.get(WATERMARK_DELAY)))) { - missingAttributes.add(WATERMARK_DELAY); - } - if (missingAttributes.size() > 0) { - String errorMessage = - "Conversion to incremental refresh index cannot proceed due to missing attributes: " - + String.join(", ", missingAttributes) - + "."; - LOGGER.error(errorMessage); - throw new IllegalArgumentException(errorMessage); - } - } - private FlintIndexMetadata fromMetadata(String indexName, Map metaMap) { FlintIndexMetadata.FlintIndexMetadataBuilder flintIndexMetadataBuilder = FlintIndexMetadata.builder(); diff --git a/async-query/src/main/java/org/opensearch/sql/spark/flint/OpenSearchFlintIndexClient.java b/async-query/src/main/java/org/opensearch/sql/spark/flint/OpenSearchFlintIndexClient.java new file mode 100644 index 0000000000..7a655f0678 --- /dev/null +++ b/async-query/src/main/java/org/opensearch/sql/spark/flint/OpenSearchFlintIndexClient.java @@ -0,0 +1,27 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.flint; + +import lombok.RequiredArgsConstructor; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; +import org.opensearch.action.support.master.AcknowledgedResponse; +import org.opensearch.client.Client; + +@RequiredArgsConstructor +public class OpenSearchFlintIndexClient implements FlintIndexClient { + private static final Logger LOG = LogManager.getLogger(); + + private final Client client; + + @Override + public void deleteIndex(String indexName) { + DeleteIndexRequest request = new DeleteIndexRequest().indices(indexName); + AcknowledgedResponse response = client.admin().indices().delete(request).actionGet(); + LOG.info("OpenSearch index delete result: {}", response.isAcknowledged()); + } +} diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/OpenSearchFlintIndexStateModelService.java b/async-query/src/main/java/org/opensearch/sql/spark/flint/OpenSearchFlintIndexStateModelService.java similarity index 76% rename from spark/src/main/java/org/opensearch/sql/spark/flint/OpenSearchFlintIndexStateModelService.java rename to async-query/src/main/java/org/opensearch/sql/spark/flint/OpenSearchFlintIndexStateModelService.java index 5781c3e44b..eba338e912 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/flint/OpenSearchFlintIndexStateModelService.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/flint/OpenSearchFlintIndexStateModelService.java @@ -7,6 +7,7 @@ import java.util.Optional; import lombok.RequiredArgsConstructor; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.execution.statestore.OpenSearchStateStoreUtil; import org.opensearch.sql.spark.execution.statestore.StateStore; import org.opensearch.sql.spark.execution.xcontent.FlintIndexStateModelXContentSerializer; @@ -20,7 +21,8 @@ public class OpenSearchFlintIndexStateModelService implements FlintIndexStateMod public FlintIndexStateModel updateFlintIndexState( FlintIndexStateModel flintIndexStateModel, FlintIndexState flintIndexState, - String datasourceName) { + String datasourceName, + AsyncQueryRequestContext asyncQueryRequestContext) { return stateStore.updateState( flintIndexStateModel, flintIndexState, @@ -29,14 +31,16 @@ public FlintIndexStateModel updateFlintIndexState( } @Override - public Optional getFlintIndexStateModel(String id, String datasourceName) { + public Optional getFlintIndexStateModel( + String id, String datasourceName, AsyncQueryRequestContext asyncQueryRequestContext) { return stateStore.get( id, serializer::fromXContent, OpenSearchStateStoreUtil.getIndexName(datasourceName)); } @Override public FlintIndexStateModel createFlintIndexStateModel( - FlintIndexStateModel flintIndexStateModel) { + FlintIndexStateModel flintIndexStateModel, + AsyncQueryRequestContext asyncQueryRequestContext) { return stateStore.create( flintIndexStateModel.getId(), flintIndexStateModel, @@ -45,7 +49,8 @@ public FlintIndexStateModel createFlintIndexStateModel( } @Override - public boolean deleteFlintIndexStateModel(String id, String datasourceName) { + public boolean deleteFlintIndexStateModel( + String id, String datasourceName, AsyncQueryRequestContext asyncQueryRequestContext) { return stateStore.delete(id, OpenSearchStateStoreUtil.getIndexName(datasourceName)); } } diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/OpenSearchIndexDMLResultStorageService.java b/async-query/src/main/java/org/opensearch/sql/spark/flint/OpenSearchIndexDMLResultStorageService.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/flint/OpenSearchIndexDMLResultStorageService.java rename to async-query/src/main/java/org/opensearch/sql/spark/flint/OpenSearchIndexDMLResultStorageService.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/leasemanager/DefaultLeaseManager.java b/async-query/src/main/java/org/opensearch/sql/spark/leasemanager/DefaultLeaseManager.java similarity index 96% rename from spark/src/main/java/org/opensearch/sql/spark/leasemanager/DefaultLeaseManager.java rename to async-query/src/main/java/org/opensearch/sql/spark/leasemanager/DefaultLeaseManager.java index 375fa7b11e..db8ca1ad2b 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/leasemanager/DefaultLeaseManager.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/leasemanager/DefaultLeaseManager.java @@ -92,7 +92,8 @@ public String description() { @Override public boolean test(LeaseRequest leaseRequest) { - if (leaseRequest.getJobType() == JobType.INTERACTIVE) { + if (leaseRequest.getJobType() != JobType.REFRESH + && leaseRequest.getJobType() != JobType.STREAMING) { return true; } return activeRefreshJobCount(stateStore, ALL_DATASOURCE).get() < refreshJobLimit(); diff --git a/async-query/src/main/java/org/opensearch/sql/spark/metrics/OpenSearchMetricsService.java b/async-query/src/main/java/org/opensearch/sql/spark/metrics/OpenSearchMetricsService.java new file mode 100644 index 0000000000..316ab536bc --- /dev/null +++ b/async-query/src/main/java/org/opensearch/sql/spark/metrics/OpenSearchMetricsService.java @@ -0,0 +1,32 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.metrics; + +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.opensearch.sql.legacy.metrics.MetricName; +import org.opensearch.sql.legacy.utils.MetricUtils; + +public class OpenSearchMetricsService implements MetricsService { + private static final Map mapping = + ImmutableMap.of( + EmrMetrics.EMR_CANCEL_JOB_REQUEST_FAILURE_COUNT, + MetricName.EMR_CANCEL_JOB_REQUEST_FAILURE_COUNT, + EmrMetrics.EMR_GET_JOB_RESULT_FAILURE_COUNT, MetricName.EMR_GET_JOB_RESULT_FAILURE_COUNT, + EmrMetrics.EMR_START_JOB_REQUEST_FAILURE_COUNT, + MetricName.EMR_START_JOB_REQUEST_FAILURE_COUNT, + EmrMetrics.EMR_INTERACTIVE_QUERY_JOBS_CREATION_COUNT, + MetricName.EMR_INTERACTIVE_QUERY_JOBS_CREATION_COUNT, + EmrMetrics.EMR_STREAMING_QUERY_JOBS_CREATION_COUNT, + MetricName.EMR_STREAMING_QUERY_JOBS_CREATION_COUNT, + EmrMetrics.EMR_BATCH_QUERY_JOBS_CREATION_COUNT, + MetricName.EMR_BATCH_QUERY_JOBS_CREATION_COUNT); + + @Override + public void incrementNumericalMetric(EmrMetrics metricName) { + MetricUtils.incrementNumericalMetric(mapping.get(metricName)); + } +} diff --git a/async-query/src/main/java/org/opensearch/sql/spark/parameter/S3GlueDataSourceSparkParameterComposer.java b/async-query/src/main/java/org/opensearch/sql/spark/parameter/S3GlueDataSourceSparkParameterComposer.java new file mode 100644 index 0000000000..189e140416 --- /dev/null +++ b/async-query/src/main/java/org/opensearch/sql/spark/parameter/S3GlueDataSourceSparkParameterComposer.java @@ -0,0 +1,178 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.parameter; + +import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_ICEBERG_ENABLED; +import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH; +import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH_PASSWORD; +import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH_USERNAME; +import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_REGION; +import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_URI; +import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_LAKEFORMATION_ENABLED; +import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_LAKEFORMATION_SESSION_TAG; +import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_ROLE_ARN; +import static org.opensearch.sql.spark.data.constants.SparkConstants.DRIVER_ENV_ASSUME_ROLE_ARN_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.EXECUTOR_ENV_ASSUME_ROLE_ARN_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_ACCELERATE_USING_COVERING_INDEX; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_DATA_SOURCE_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_DELEGATE_CATALOG; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INDEX_STORE_AUTH_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INDEX_STORE_AUTH_PASSWORD; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INDEX_STORE_AUTH_USERNAME; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INDEX_STORE_AWSREGION_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INDEX_STORE_HOST_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INDEX_STORE_PORT_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INDEX_STORE_SCHEME_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_PPL_EXTENSION; +import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_SQL_EXTENSION; +import static org.opensearch.sql.spark.data.constants.SparkConstants.HIVE_METASTORE_GLUE_ARN_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.ICEBERG_ASSUME_ROLE_CLIENT_FACTORY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.ICEBERG_GLUE_CATALOG; +import static org.opensearch.sql.spark.data.constants.SparkConstants.ICEBERG_LF_CLIENT_FACTORY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.ICEBERG_SESSION_CATALOG; +import static org.opensearch.sql.spark.data.constants.SparkConstants.ICEBERG_SPARK_EXTENSION; +import static org.opensearch.sql.spark.data.constants.SparkConstants.ICEBERG_SPARK_JARS; +import static org.opensearch.sql.spark.data.constants.SparkConstants.ICEBERG_TS_WO_TZ; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_CATALOG; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_CATALOG_CATALOG_IMPL; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_CATALOG_CLIENT_ASSUME_ROLE_ARN; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_CATALOG_CLIENT_ASSUME_ROLE_REGION; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_CATALOG_CLIENT_FACTORY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_CATALOG_CLIENT_REGION; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_CATALOG_GLUE_ACCOUNT_ID; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_CATALOG_GLUE_LF_ENABLED; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_CATALOG_LF_SESSION_TAG_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_JAR_PACKAGES_KEY; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_SQL_EXTENSIONS_KEY; + +import com.amazonaws.arn.Arn; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Optional; +import java.util.function.Supplier; +import lombok.RequiredArgsConstructor; +import org.apache.commons.lang3.BooleanUtils; +import org.apache.commons.lang3.StringUtils; +import org.opensearch.sql.datasource.model.DataSourceMetadata; +import org.opensearch.sql.datasources.auth.AuthenticationType; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.config.SparkExecutionEngineConfigClusterSetting; +import org.opensearch.sql.spark.config.SparkExecutionEngineConfigClusterSettingLoader; +import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; + +@RequiredArgsConstructor +public class S3GlueDataSourceSparkParameterComposer implements DataSourceSparkParameterComposer { + public static final String FLINT_BASIC_AUTH = "basic"; + public static final String FALSE = "false"; + public static final String TRUE = "true"; + + private final SparkExecutionEngineConfigClusterSettingLoader settingLoader; + + @Override + public void compose( + DataSourceMetadata metadata, + SparkSubmitParameters params, + DispatchQueryRequest dispatchQueryRequest, + AsyncQueryRequestContext context) { + final Optional maybeClusterSettings = + settingLoader.load(); + if (!maybeClusterSettings.isPresent()) { + throw new RuntimeException("No cluster settings present"); + } + final SparkExecutionEngineConfigClusterSetting clusterSetting = maybeClusterSettings.get(); + final String region = clusterSetting.getRegion(); + + final String roleArn = metadata.getProperties().get(GLUE_ROLE_ARN); + final String accountId = Arn.fromString(roleArn).getAccountId(); + + params.setConfigItem(DRIVER_ENV_ASSUME_ROLE_ARN_KEY, roleArn); + params.setConfigItem(EXECUTOR_ENV_ASSUME_ROLE_ARN_KEY, roleArn); + params.setConfigItem(HIVE_METASTORE_GLUE_ARN_KEY, roleArn); + params.setConfigItem("spark.sql.catalog." + metadata.getName(), FLINT_DELEGATE_CATALOG); + params.setConfigItem(FLINT_DATA_SOURCE_KEY, metadata.getName()); + + final boolean icebergEnabled = + BooleanUtils.toBoolean(metadata.getProperties().get(GLUE_ICEBERG_ENABLED)); + if (icebergEnabled) { + params.setConfigItem( + SPARK_JAR_PACKAGES_KEY, + params.getConfigItem(SPARK_JAR_PACKAGES_KEY) + "," + ICEBERG_SPARK_JARS); + params.setConfigItem(SPARK_CATALOG, ICEBERG_SESSION_CATALOG); + params.setConfigItem(SPARK_CATALOG_CATALOG_IMPL, ICEBERG_GLUE_CATALOG); + params.setConfigItem( + SPARK_SQL_EXTENSIONS_KEY, + ICEBERG_SPARK_EXTENSION + "," + FLINT_SQL_EXTENSION + "," + FLINT_PPL_EXTENSION); + + params.setConfigItem(SPARK_CATALOG_CLIENT_REGION, region); + params.setConfigItem(SPARK_CATALOG_GLUE_ACCOUNT_ID, accountId); + params.setConfigItem(SPARK_CATALOG_CLIENT_ASSUME_ROLE_ARN, roleArn); + params.setConfigItem(SPARK_CATALOG_CLIENT_ASSUME_ROLE_REGION, region); + params.setConfigItem(ICEBERG_TS_WO_TZ, TRUE); + + final boolean lakeFormationEnabled = + BooleanUtils.toBoolean(metadata.getProperties().get(GLUE_LAKEFORMATION_ENABLED)); + if (lakeFormationEnabled) { + final String sessionTag = metadata.getProperties().get(GLUE_LAKEFORMATION_SESSION_TAG); + if (StringUtils.isBlank(sessionTag)) { + throw new IllegalArgumentException(GLUE_LAKEFORMATION_SESSION_TAG + " is required"); + } + + params.setConfigItem(FLINT_ACCELERATE_USING_COVERING_INDEX, FALSE); + params.setConfigItem(SPARK_CATALOG_GLUE_LF_ENABLED, TRUE); + params.setConfigItem(SPARK_CATALOG_CLIENT_FACTORY, ICEBERG_LF_CLIENT_FACTORY); + params.setConfigItem(SPARK_CATALOG_LF_SESSION_TAG_KEY, sessionTag); + } else { + params.setConfigItem(SPARK_CATALOG_CLIENT_FACTORY, ICEBERG_ASSUME_ROLE_CLIENT_FACTORY); + } + } + + setFlintIndexStoreHost( + params, + parseUri( + metadata.getProperties().get(GLUE_INDEX_STORE_OPENSEARCH_URI), metadata.getName())); + setFlintIndexStoreAuthProperties( + params, + metadata.getProperties().get(GLUE_INDEX_STORE_OPENSEARCH_AUTH), + () -> metadata.getProperties().get(GLUE_INDEX_STORE_OPENSEARCH_AUTH_USERNAME), + () -> metadata.getProperties().get(GLUE_INDEX_STORE_OPENSEARCH_AUTH_PASSWORD), + () -> metadata.getProperties().get(GLUE_INDEX_STORE_OPENSEARCH_REGION)); + params.setConfigItem("spark.flint.datasource.name", metadata.getName()); + } + + private void setFlintIndexStoreHost(SparkSubmitParameters params, URI uri) { + params.setConfigItem(FLINT_INDEX_STORE_HOST_KEY, uri.getHost()); + params.setConfigItem(FLINT_INDEX_STORE_PORT_KEY, String.valueOf(uri.getPort())); + params.setConfigItem(FLINT_INDEX_STORE_SCHEME_KEY, uri.getScheme()); + } + + private void setFlintIndexStoreAuthProperties( + SparkSubmitParameters params, + String authType, + Supplier userName, + Supplier password, + Supplier region) { + if (AuthenticationType.get(authType).equals(AuthenticationType.BASICAUTH)) { + params.setConfigItem(FLINT_INDEX_STORE_AUTH_KEY, FLINT_BASIC_AUTH); + params.setConfigItem(FLINT_INDEX_STORE_AUTH_USERNAME, userName.get()); + params.setConfigItem(FLINT_INDEX_STORE_AUTH_PASSWORD, password.get()); + } else if (AuthenticationType.get(authType).equals(AuthenticationType.AWSSIGV4AUTH)) { + params.setConfigItem(FLINT_INDEX_STORE_AUTH_KEY, "sigv4"); + params.setConfigItem(FLINT_INDEX_STORE_AWSREGION_KEY, region.get()); + } else { + params.setConfigItem(FLINT_INDEX_STORE_AUTH_KEY, authType); + } + } + + private URI parseUri(String opensearchUri, String datasourceName) { + try { + return new URI(opensearchUri); + } catch (URISyntaxException e) { + throw new IllegalArgumentException( + String.format( + "Bad URI in indexstore configuration of the : %s datasoure.", datasourceName)); + } + } +} diff --git a/spark/src/main/java/org/opensearch/sql/spark/response/OpenSearchJobExecutionResponseReader.java b/async-query/src/main/java/org/opensearch/sql/spark/response/OpenSearchJobExecutionResponseReader.java similarity index 82% rename from spark/src/main/java/org/opensearch/sql/spark/response/OpenSearchJobExecutionResponseReader.java rename to async-query/src/main/java/org/opensearch/sql/spark/response/OpenSearchJobExecutionResponseReader.java index 10113ece8d..c969a3a6dc 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/response/OpenSearchJobExecutionResponseReader.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/response/OpenSearchJobExecutionResponseReader.java @@ -21,6 +21,8 @@ import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.SearchHit; import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; /** JobExecutionResponseReader implementation for reading response from OpenSearch index. */ public class OpenSearchJobExecutionResponseReader implements JobExecutionResponseReader { @@ -32,12 +34,17 @@ public OpenSearchJobExecutionResponseReader(Client client) { } @Override - public JSONObject getResultWithJobId(String jobId, String resultLocation) { - return searchInSparkIndex(QueryBuilders.termQuery(JOB_ID_FIELD, jobId), resultLocation); + public JSONObject getResultFromResultIndex( + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext) { + return searchInSparkIndex( + QueryBuilders.termQuery(JOB_ID_FIELD, asyncQueryJobMetadata.getJobId()), + asyncQueryJobMetadata.getResultIndex()); } @Override - public JSONObject getResultWithQueryId(String queryId, String resultLocation) { + public JSONObject getResultWithQueryId( + String queryId, String resultLocation, AsyncQueryRequestContext asyncQueryRequestContext) { return searchInSparkIndex(QueryBuilders.termQuery("queryId", queryId), resultLocation); } diff --git a/spark/src/main/java/org/opensearch/sql/spark/rest/RestAsyncQueryManagementAction.java b/async-query/src/main/java/org/opensearch/sql/spark/rest/RestAsyncQueryManagementAction.java similarity index 91% rename from spark/src/main/java/org/opensearch/sql/spark/rest/RestAsyncQueryManagementAction.java rename to async-query/src/main/java/org/opensearch/sql/spark/rest/RestAsyncQueryManagementAction.java index ced5609083..c188cf693f 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/rest/RestAsyncQueryManagementAction.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/rest/RestAsyncQueryManagementAction.java @@ -16,6 +16,7 @@ import java.io.IOException; import java.util.List; import java.util.Locale; +import lombok.RequiredArgsConstructor; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.OpenSearchException; @@ -26,17 +27,21 @@ import org.opensearch.rest.BytesRestResponse; import org.opensearch.rest.RestChannel; import org.opensearch.rest.RestRequest; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.datasources.exceptions.DataSourceClientException; import org.opensearch.sql.datasources.exceptions.ErrorMessage; import org.opensearch.sql.datasources.utils.Scheduler; import org.opensearch.sql.legacy.metrics.MetricName; import org.opensearch.sql.legacy.utils.MetricUtils; +import org.opensearch.sql.opensearch.setting.OpenSearchSettings; +import org.opensearch.sql.opensearch.util.RestRequestUtil; import org.opensearch.sql.spark.asyncquery.exceptions.AsyncQueryNotFoundException; import org.opensearch.sql.spark.leasemanager.ConcurrencyLimitExceededException; import org.opensearch.sql.spark.rest.model.CreateAsyncQueryRequest; import org.opensearch.sql.spark.transport.TransportCancelAsyncQueryRequestAction; import org.opensearch.sql.spark.transport.TransportCreateAsyncQueryRequestAction; import org.opensearch.sql.spark.transport.TransportGetAsyncQueryResultAction; +import org.opensearch.sql.spark.transport.format.CreateAsyncQueryRequestConverter; import org.opensearch.sql.spark.transport.model.CancelAsyncQueryActionRequest; import org.opensearch.sql.spark.transport.model.CancelAsyncQueryActionResponse; import org.opensearch.sql.spark.transport.model.CreateAsyncQueryActionRequest; @@ -44,6 +49,7 @@ import org.opensearch.sql.spark.transport.model.GetAsyncQueryResultActionRequest; import org.opensearch.sql.spark.transport.model.GetAsyncQueryResultActionResponse; +@RequiredArgsConstructor public class RestAsyncQueryManagementAction extends BaseRestHandler { public static final String ASYNC_QUERY_ACTIONS = "async_query_actions"; @@ -51,6 +57,8 @@ public class RestAsyncQueryManagementAction extends BaseRestHandler { private static final Logger LOG = LogManager.getLogger(RestAsyncQueryManagementAction.class); + private final OpenSearchSettings settings; + @Override public String getName() { return ASYNC_QUERY_ACTIONS; @@ -99,6 +107,9 @@ public List routes() { @Override protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient nodeClient) throws IOException { + if (!dataSourcesEnabled()) { + return dataSourcesDisabledError(restRequest); + } switch (restRequest.method()) { case POST: return executePostRequest(restRequest, nodeClient); @@ -119,7 +130,7 @@ private RestChannelConsumer executePostRequest(RestRequest restRequest, NodeClie try { MetricUtils.incrementNumericalMetric(MetricName.ASYNC_QUERY_CREATE_API_REQUEST_COUNT); CreateAsyncQueryRequest submitJobRequest = - CreateAsyncQueryRequest.fromXContentParser(restRequest.contentParser()); + CreateAsyncQueryRequestConverter.fromXContentParser(restRequest.contentParser()); Scheduler.schedule( nodeClient, () -> @@ -271,4 +282,21 @@ private void addCustomerErrorMetric(RestRequest.Method requestMethod) { break; } } + + private boolean dataSourcesEnabled() { + return settings.getSettingValue(Settings.Key.DATASOURCES_ENABLED); + } + + private RestChannelConsumer dataSourcesDisabledError(RestRequest request) { + + RestRequestUtil.consumeAllRequestParameters(request); + + return channel -> { + reportError( + channel, + new IllegalAccessException( + String.format("%s setting is false", Settings.Key.DATASOURCES_ENABLED.getKeyValue())), + BAD_REQUEST); + }; + } } diff --git a/async-query/src/main/java/org/opensearch/sql/spark/scheduler/OpenSearchAsyncQueryScheduler.java b/async-query/src/main/java/org/opensearch/sql/spark/scheduler/OpenSearchAsyncQueryScheduler.java new file mode 100644 index 0000000000..59bad14320 --- /dev/null +++ b/async-query/src/main/java/org/opensearch/sql/spark/scheduler/OpenSearchAsyncQueryScheduler.java @@ -0,0 +1,213 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.scheduler; + +import static org.opensearch.core.xcontent.ToXContent.EMPTY_PARAMS; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Strings; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.time.Instant; +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; +import org.apache.commons.io.IOUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.action.DocWriteRequest; +import org.opensearch.action.DocWriteResponse; +import org.opensearch.action.admin.indices.create.CreateIndexRequest; +import org.opensearch.action.admin.indices.create.CreateIndexResponse; +import org.opensearch.action.delete.DeleteRequest; +import org.opensearch.action.delete.DeleteResponse; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.action.index.IndexResponse; +import org.opensearch.action.support.WriteRequest; +import org.opensearch.action.update.UpdateRequest; +import org.opensearch.action.update.UpdateResponse; +import org.opensearch.client.Client; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.action.ActionFuture; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.index.engine.DocumentMissingException; +import org.opensearch.index.engine.VersionConflictEngineException; +import org.opensearch.jobscheduler.spi.ScheduledJobRunner; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.scheduler.job.ScheduledAsyncQueryJobRunner; +import org.opensearch.sql.spark.scheduler.model.AsyncQuerySchedulerRequest; +import org.opensearch.sql.spark.scheduler.model.ScheduledAsyncQueryJobRequest; + +/** Scheduler class for managing asynchronous query jobs. */ +@RequiredArgsConstructor +public class OpenSearchAsyncQueryScheduler implements AsyncQueryScheduler { + public static final String SCHEDULER_INDEX_NAME = ".async-query-scheduler"; + public static final String SCHEDULER_PLUGIN_JOB_TYPE = "async-query-scheduler"; + private static final String SCHEDULER_INDEX_MAPPING_FILE_NAME = + "async-query-scheduler-index-mapping.yml"; + private static final String SCHEDULER_INDEX_SETTINGS_FILE_NAME = + "async-query-scheduler-index-settings.yml"; + private static final Logger LOG = LogManager.getLogger(); + + private final Client client; + private final ClusterService clusterService; + + @Override + /** Schedules a new job by indexing it into the job index. */ + public void scheduleJob( + AsyncQuerySchedulerRequest asyncQuerySchedulerRequest, + AsyncQueryRequestContext asyncQueryRequestContext) { + ScheduledAsyncQueryJobRequest request = + ScheduledAsyncQueryJobRequest.fromAsyncQuerySchedulerRequest(asyncQuerySchedulerRequest); + if (!this.clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)) { + createAsyncQuerySchedulerIndex(); + } + IndexRequest indexRequest = new IndexRequest(SCHEDULER_INDEX_NAME); + indexRequest.id(request.getName()); + indexRequest.opType(DocWriteRequest.OpType.CREATE); + indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); + IndexResponse indexResponse; + try { + indexRequest.source(request.toXContent(JsonXContent.contentBuilder(), EMPTY_PARAMS)); + ActionFuture indexResponseActionFuture = client.index(indexRequest); + indexResponse = indexResponseActionFuture.actionGet(); + } catch (VersionConflictEngineException exception) { + throw new IllegalArgumentException("A job already exists with name: " + request.getName()); + } catch (Throwable e) { + LOG.error("Failed to schedule job : {}", request.getName(), e); + throw new RuntimeException(e); + } + + if (indexResponse.getResult().equals(DocWriteResponse.Result.CREATED)) { + LOG.debug("Job : {} successfully created", request.getName()); + } else { + throw new RuntimeException( + "Schedule job failed with result : " + indexResponse.getResult().getLowercase()); + } + } + + /** Unschedules a job by marking it as disabled and updating its last update time. */ + @Override + public void unscheduleJob(String jobId, AsyncQueryRequestContext asyncQueryRequestContext) { + if (Strings.isNullOrEmpty(jobId)) { + throw new IllegalArgumentException("JobId cannot be null or empty"); + } + try { + AsyncQuerySchedulerRequest request = + ScheduledAsyncQueryJobRequest.builder() + .jobId(jobId) + .enabled(false) + .lastUpdateTime(Instant.now()) + .build(); + updateJob(request, asyncQueryRequestContext); + LOG.info("Unscheduled job for jobId: {}", jobId); + } catch (IllegalStateException | DocumentMissingException e) { + LOG.error("Failed to unschedule job: {}", jobId, e); + } + } + + /** Updates an existing job with new parameters. */ + @Override + @SneakyThrows + public void updateJob( + AsyncQuerySchedulerRequest asyncQuerySchedulerRequest, + AsyncQueryRequestContext asyncQueryRequestContext) { + ScheduledAsyncQueryJobRequest request = + ScheduledAsyncQueryJobRequest.fromAsyncQuerySchedulerRequest(asyncQuerySchedulerRequest); + assertIndexExists(); + UpdateRequest updateRequest = new UpdateRequest(SCHEDULER_INDEX_NAME, request.getName()); + updateRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); + updateRequest.doc(request.toXContent(JsonXContent.contentBuilder(), EMPTY_PARAMS)); + UpdateResponse updateResponse; + try { + ActionFuture updateResponseActionFuture = client.update(updateRequest); + updateResponse = updateResponseActionFuture.actionGet(); + } catch (DocumentMissingException exception) { + throw new IllegalArgumentException("Job: " + request.getName() + " doesn't exist"); + } catch (Throwable e) { + LOG.error("Failed to update job : {}", request.getName(), e); + throw new RuntimeException(e); + } + + if (updateResponse.getResult().equals(DocWriteResponse.Result.UPDATED) + || updateResponse.getResult().equals(DocWriteResponse.Result.NOOP)) { + LOG.debug("Job : {} successfully updated", request.getName()); + } else { + throw new RuntimeException( + "Update job failed with result : " + updateResponse.getResult().getLowercase()); + } + } + + /** Removes a job by deleting its document from the index. */ + @Override + public void removeJob(String jobId, AsyncQueryRequestContext asyncQueryRequestContext) { + assertIndexExists(); + if (Strings.isNullOrEmpty(jobId)) { + throw new IllegalArgumentException("JobId cannot be null or empty"); + } + DeleteRequest deleteRequest = new DeleteRequest(SCHEDULER_INDEX_NAME, jobId); + deleteRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); + ActionFuture deleteResponseActionFuture = client.delete(deleteRequest); + DeleteResponse deleteResponse = deleteResponseActionFuture.actionGet(); + + if (deleteResponse.getResult().equals(DocWriteResponse.Result.DELETED)) { + LOG.debug("Job : {} successfully deleted", jobId); + } else if (deleteResponse.getResult().equals(DocWriteResponse.Result.NOT_FOUND)) { + throw new IllegalArgumentException("Job : " + jobId + " doesn't exist"); + } else { + throw new RuntimeException( + "Remove job failed with result : " + deleteResponse.getResult().getLowercase()); + } + } + + /** Creates the async query scheduler index with specified mappings and settings. */ + @VisibleForTesting + void createAsyncQuerySchedulerIndex() { + try { + InputStream mappingFileStream = + OpenSearchAsyncQueryScheduler.class + .getClassLoader() + .getResourceAsStream(SCHEDULER_INDEX_MAPPING_FILE_NAME); + InputStream settingsFileStream = + OpenSearchAsyncQueryScheduler.class + .getClassLoader() + .getResourceAsStream(SCHEDULER_INDEX_SETTINGS_FILE_NAME); + CreateIndexRequest createIndexRequest = new CreateIndexRequest(SCHEDULER_INDEX_NAME); + createIndexRequest.mapping( + IOUtils.toString(mappingFileStream, StandardCharsets.UTF_8), XContentType.YAML); + createIndexRequest.settings( + IOUtils.toString(settingsFileStream, StandardCharsets.UTF_8), XContentType.YAML); + ActionFuture createIndexResponseActionFuture = + client.admin().indices().create(createIndexRequest); + CreateIndexResponse createIndexResponse = createIndexResponseActionFuture.actionGet(); + + if (createIndexResponse.isAcknowledged()) { + LOG.debug("Index: {} creation Acknowledged", SCHEDULER_INDEX_NAME); + } else { + throw new RuntimeException("Index creation is not acknowledged."); + } + } catch (Throwable e) { + LOG.error("Error creating index: {}", SCHEDULER_INDEX_NAME, e); + throw new RuntimeException( + "Internal server error while creating " + + SCHEDULER_INDEX_NAME + + " index: " + + e.getMessage(), + e); + } + } + + private void assertIndexExists() { + if (!this.clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)) { + throw new IllegalStateException("Job index does not exist."); + } + } + + /** Returns the job runner instance for the scheduler. */ + public static ScheduledJobRunner getJobRunner() { + return ScheduledAsyncQueryJobRunner.getJobRunnerInstance(); + } +} diff --git a/async-query/src/main/java/org/opensearch/sql/spark/scheduler/job/ScheduledAsyncQueryJobRunner.java b/async-query/src/main/java/org/opensearch/sql/spark/scheduler/job/ScheduledAsyncQueryJobRunner.java new file mode 100644 index 0000000000..a2abb8e944 --- /dev/null +++ b/async-query/src/main/java/org/opensearch/sql/spark/scheduler/job/ScheduledAsyncQueryJobRunner.java @@ -0,0 +1,116 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.scheduler.job; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.client.Client; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.jobscheduler.spi.JobExecutionContext; +import org.opensearch.jobscheduler.spi.ScheduledJobParameter; +import org.opensearch.jobscheduler.spi.ScheduledJobRunner; +import org.opensearch.plugins.Plugin; +import org.opensearch.sql.legacy.executor.AsyncRestExecutor; +import org.opensearch.sql.spark.asyncquery.AsyncQueryExecutorService; +import org.opensearch.sql.spark.asyncquery.model.NullAsyncQueryRequestContext; +import org.opensearch.sql.spark.rest.model.CreateAsyncQueryRequest; +import org.opensearch.sql.spark.rest.model.CreateAsyncQueryResponse; +import org.opensearch.sql.spark.scheduler.model.ScheduledAsyncQueryJobRequest; +import org.opensearch.threadpool.ThreadPool; + +/** + * The job runner class for scheduling async query. + * + *

The job runner should be a singleton class if it uses OpenSearch client or other objects + * passed from OpenSearch. Because when registering the job runner to JobScheduler plugin, + * OpenSearch has not invoked plugins' createComponents() method. That is saying the plugin is not + * completely initialized, and the OpenSearch {@link org.opensearch.client.Client}, {@link + * ClusterService} and other objects are not available to plugin and this job runner. + * + *

So we have to move this job runner initialization to {@link Plugin} createComponents() method, + * and using singleton job runner to ensure we register a usable job runner instance to JobScheduler + * plugin. + */ +public class ScheduledAsyncQueryJobRunner implements ScheduledJobRunner { + // Share SQL plugin thread pool + private static final String ASYNC_QUERY_THREAD_POOL_NAME = + AsyncRestExecutor.SQL_WORKER_THREAD_POOL_NAME; + private static final Logger LOGGER = LogManager.getLogger(ScheduledAsyncQueryJobRunner.class); + + private static final ScheduledAsyncQueryJobRunner INSTANCE = new ScheduledAsyncQueryJobRunner(); + + public static ScheduledAsyncQueryJobRunner getJobRunnerInstance() { + return INSTANCE; + } + + private ClusterService clusterService; + private ThreadPool threadPool; + private Client client; + private AsyncQueryExecutorService asyncQueryExecutorService; + + private ScheduledAsyncQueryJobRunner() { + // Singleton class, use getJobRunnerInstance method instead of constructor + } + + /** Loads job resources, setting up required services and job runner instance. */ + public void loadJobResource( + Client client, + ClusterService clusterService, + ThreadPool threadPool, + AsyncQueryExecutorService asyncQueryExecutorService) { + this.client = client; + this.clusterService = clusterService; + this.threadPool = threadPool; + this.asyncQueryExecutorService = asyncQueryExecutorService; + } + + @Override + public void runJob(ScheduledJobParameter jobParameter, JobExecutionContext context) { + // Parser will convert jobParameter to ScheduledAsyncQueryJobRequest + if (!(jobParameter instanceof ScheduledAsyncQueryJobRequest)) { + throw new IllegalStateException( + "Job parameter is not instance of ScheduledAsyncQueryJobRequest, type: " + + jobParameter.getClass().getCanonicalName()); + } + + if (this.clusterService == null) { + throw new IllegalStateException("ClusterService is not initialized."); + } + + if (this.threadPool == null) { + throw new IllegalStateException("ThreadPool is not initialized."); + } + + if (this.client == null) { + throw new IllegalStateException("Client is not initialized."); + } + + if (this.asyncQueryExecutorService == null) { + throw new IllegalStateException("AsyncQueryExecutorService is not initialized."); + } + + Runnable runnable = + () -> { + try { + doRefresh((ScheduledAsyncQueryJobRequest) jobParameter); + } catch (Throwable throwable) { + LOGGER.error(throwable); + } + }; + threadPool.executor(ASYNC_QUERY_THREAD_POOL_NAME).submit(runnable); + } + + void doRefresh(ScheduledAsyncQueryJobRequest request) { + LOGGER.info("Scheduled refresh index job on job: " + request.getName()); + CreateAsyncQueryRequest createAsyncQueryRequest = + new CreateAsyncQueryRequest( + request.getScheduledQuery(), request.getDataSource(), request.getQueryLang()); + CreateAsyncQueryResponse createAsyncQueryResponse = + asyncQueryExecutorService.createAsyncQuery( + createAsyncQueryRequest, new NullAsyncQueryRequestContext()); + LOGGER.info("Created async query with queryId: " + createAsyncQueryResponse.getQueryId()); + } +} diff --git a/async-query/src/main/java/org/opensearch/sql/spark/scheduler/model/ScheduledAsyncQueryJobRequest.java b/async-query/src/main/java/org/opensearch/sql/spark/scheduler/model/ScheduledAsyncQueryJobRequest.java new file mode 100644 index 0000000000..48aa52a3ce --- /dev/null +++ b/async-query/src/main/java/org/opensearch/sql/spark/scheduler/model/ScheduledAsyncQueryJobRequest.java @@ -0,0 +1,156 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.scheduler.model; + +import java.io.IOException; +import java.time.Instant; +import lombok.Builder; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.ToString; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.jobscheduler.spi.ScheduledJobParameter; +import org.opensearch.jobscheduler.spi.schedule.Schedule; +import org.opensearch.sql.spark.rest.model.LangType; +import org.opensearch.sql.spark.scheduler.parser.IntervalScheduleParser; + +/** Represents a job request to refresh index. */ +@Data +@EqualsAndHashCode(callSuper = true) +@ToString(callSuper = true) +public class ScheduledAsyncQueryJobRequest extends AsyncQuerySchedulerRequest + implements ScheduledJobParameter { + // Constant fields for JSON serialization + public static final String ACCOUNT_ID_FIELD = "accountId"; + public static final String JOB_ID_FIELD = "jobId"; + public static final String DATA_SOURCE_NAME_FIELD = "dataSource"; + public static final String SCHEDULED_QUERY_FIELD = "scheduledQuery"; + public static final String QUERY_LANG_FIELD = "queryLang"; + public static final String LAST_UPDATE_TIME_FIELD = "lastUpdateTime"; + public static final String SCHEDULE_FIELD = "schedule"; + public static final String ENABLED_TIME_FIELD = "enabledTime"; + public static final String LOCK_DURATION_SECONDS = "lockDurationSeconds"; + public static final String JITTER = "jitter"; + public static final String ENABLED_FIELD = "enabled"; + private final Schedule schedule; + + @Builder(builderMethodName = "scheduledAsyncQueryJobRequestBuilder") + public ScheduledAsyncQueryJobRequest( + String accountId, + String jobId, + String dataSource, + String scheduledQuery, + LangType queryLang, + Schedule schedule, // Use the OpenSearch Schedule type + boolean enabled, + Instant lastUpdateTime, + Instant enabledTime, + Long lockDurationSeconds, + Double jitter) { + super( + accountId, + jobId, + dataSource, + scheduledQuery, + queryLang, + schedule, + enabled, + lastUpdateTime, + enabledTime, + lockDurationSeconds, + jitter); + this.schedule = schedule; + } + + @Override + public String getName() { + return getJobId(); + } + + @Override + public boolean isEnabled() { + return enabled; + } + + @Override + public Instant getLastUpdateTime() { + return lastUpdateTime; + } + + @Override + public Instant getEnabledTime() { + return enabledTime; + } + + @Override + public Schedule getSchedule() { + return schedule; + } + + @Override + public Long getLockDurationSeconds() { + return lockDurationSeconds; + } + + @Override + public Double getJitter() { + return jitter; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) + throws IOException { + builder.startObject(); + if (getAccountId() != null) { + builder.field(ACCOUNT_ID_FIELD, getAccountId()); + } + builder.field(JOB_ID_FIELD, getJobId()).field(ENABLED_FIELD, isEnabled()); + if (getDataSource() != null) { + builder.field(DATA_SOURCE_NAME_FIELD, getDataSource()); + } + if (getScheduledQuery() != null) { + builder.field(SCHEDULED_QUERY_FIELD, getScheduledQuery()); + } + if (getQueryLang() != null) { + builder.field(QUERY_LANG_FIELD, getQueryLang()); + } + if (getSchedule() != null) { + builder.field(SCHEDULE_FIELD, getSchedule()); + } + if (getEnabledTime() != null) { + builder.field(ENABLED_TIME_FIELD, getEnabledTime().toEpochMilli()); + } + builder.field(LAST_UPDATE_TIME_FIELD, getLastUpdateTime().toEpochMilli()); + if (this.lockDurationSeconds != null) { + builder.field(LOCK_DURATION_SECONDS, this.lockDurationSeconds); + } + if (this.jitter != null) { + builder.field(JITTER, this.jitter); + } + builder.endObject(); + return builder; + } + + public static ScheduledAsyncQueryJobRequest fromAsyncQuerySchedulerRequest( + AsyncQuerySchedulerRequest request) { + Instant updateTime = + request.getLastUpdateTime() != null ? request.getLastUpdateTime() : Instant.now(); + return ScheduledAsyncQueryJobRequest.scheduledAsyncQueryJobRequestBuilder() + .accountId(request.getAccountId()) + .jobId(request.getJobId()) + .dataSource(request.getDataSource()) + .scheduledQuery(request.getScheduledQuery()) + .queryLang(request.getQueryLang()) + .enabled(request.isEnabled()) + .lastUpdateTime(updateTime) + .enabledTime(request.getEnabledTime()) + .lockDurationSeconds(request.getLockDurationSeconds()) + .jitter(request.getJitter()) + .schedule(IntervalScheduleParser.parse(request.getSchedule(), updateTime)) + .build(); + } +} diff --git a/async-query/src/main/java/org/opensearch/sql/spark/scheduler/parser/IntervalScheduleParser.java b/async-query/src/main/java/org/opensearch/sql/spark/scheduler/parser/IntervalScheduleParser.java new file mode 100644 index 0000000000..47e652c570 --- /dev/null +++ b/async-query/src/main/java/org/opensearch/sql/spark/scheduler/parser/IntervalScheduleParser.java @@ -0,0 +1,99 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.scheduler.parser; + +import com.google.common.annotations.VisibleForTesting; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.opensearch.jobscheduler.spi.schedule.IntervalSchedule; +import org.opensearch.jobscheduler.spi.schedule.Schedule; + +/** Parse string raw schedule into job scheduler IntervalSchedule */ +public class IntervalScheduleParser { + private static final Pattern DURATION_PATTERN = + Pattern.compile( + "^(\\d+)\\s*(years?|months?|weeks?|days?|hours?|minutes?|minute|mins?|seconds?|secs?|milliseconds?|millis?|microseconds?|microsecond|micros?|micros|nanoseconds?|nanos?)$", + Pattern.CASE_INSENSITIVE); + + public static Schedule parse(Object schedule, Instant startTime) { + if (schedule == null) { + return null; + } + + if (schedule instanceof Schedule) { + return (Schedule) schedule; + } + + if (!(schedule instanceof String)) { + throw new IllegalArgumentException("Schedule must be a String object for parsing."); + } + + String intervalStr = ((String) schedule).trim().toLowerCase(); + + Matcher matcher = DURATION_PATTERN.matcher(intervalStr); + if (!matcher.matches()) { + throw new IllegalArgumentException("Invalid interval format: " + intervalStr); + } + + long value = Long.parseLong(matcher.group(1)); + String unitStr = matcher.group(2).toLowerCase(); + + // Convert to a supported unit or directly return an IntervalSchedule + long intervalInMinutes = convertToSupportedUnit(value, unitStr); + + return new IntervalSchedule(startTime, (int) intervalInMinutes, ChronoUnit.MINUTES); + } + + @VisibleForTesting + protected static long convertToSupportedUnit(long value, String unitStr) { + switch (unitStr) { + case "years": + case "year": + throw new IllegalArgumentException("Years cannot be converted to minutes accurately."); + case "months": + case "month": + throw new IllegalArgumentException("Months cannot be converted to minutes accurately."); + case "weeks": + case "week": + return value * 7 * 24 * 60; // Convert weeks to minutes + case "days": + case "day": + return value * 24 * 60; // Convert days to minutes + case "hours": + case "hour": + return value * 60; // Convert hours to minutes + case "minutes": + case "minute": + case "mins": + case "min": + return value; // Already in minutes + case "seconds": + case "second": + case "secs": + case "sec": + return value / 60; // Convert seconds to minutes + case "milliseconds": + case "millisecond": + case "millis": + case "milli": + return value / (60 * 1000); // Convert milliseconds to minutes + case "microseconds": + case "microsecond": + case "micros": + case "micro": + return value / (60 * 1000 * 1000); // Convert microseconds to minutes + case "nanoseconds": + case "nanosecond": + case "nanos": + case "nano": + return value / (60 * 1000 * 1000 * 1000L); // Convert nanoseconds to minutes + default: + throw new IllegalArgumentException("Unsupported time unit: " + unitStr); + } + } +} diff --git a/async-query/src/main/java/org/opensearch/sql/spark/scheduler/parser/OpenSearchScheduleQueryJobRequestParser.java b/async-query/src/main/java/org/opensearch/sql/spark/scheduler/parser/OpenSearchScheduleQueryJobRequestParser.java new file mode 100644 index 0000000000..a824797066 --- /dev/null +++ b/async-query/src/main/java/org/opensearch/sql/spark/scheduler/parser/OpenSearchScheduleQueryJobRequestParser.java @@ -0,0 +1,81 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.scheduler.parser; + +import java.io.IOException; +import java.time.Instant; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.core.xcontent.XContentParserUtils; +import org.opensearch.jobscheduler.spi.ScheduledJobParser; +import org.opensearch.jobscheduler.spi.schedule.ScheduleParser; +import org.opensearch.sql.spark.rest.model.LangType; +import org.opensearch.sql.spark.scheduler.model.ScheduledAsyncQueryJobRequest; + +public class OpenSearchScheduleQueryJobRequestParser { + + private static Instant parseInstantValue(XContentParser parser) throws IOException { + if (XContentParser.Token.VALUE_NULL.equals(parser.currentToken())) { + return null; + } + if (parser.currentToken().isValue()) { + return Instant.ofEpochMilli(parser.longValue()); + } + XContentParserUtils.throwUnknownToken(parser.currentToken(), parser.getTokenLocation()); + return null; + } + + public static ScheduledJobParser getJobParser() { + return (parser, id, jobDocVersion) -> { + ScheduledAsyncQueryJobRequest.ScheduledAsyncQueryJobRequestBuilder builder = + ScheduledAsyncQueryJobRequest.scheduledAsyncQueryJobRequestBuilder(); + XContentParserUtils.ensureExpectedToken( + XContentParser.Token.START_OBJECT, parser.nextToken(), parser); + + while (!parser.nextToken().equals(XContentParser.Token.END_OBJECT)) { + String fieldName = parser.currentName(); + parser.nextToken(); + switch (fieldName) { + case ScheduledAsyncQueryJobRequest.ACCOUNT_ID_FIELD: + builder.accountId(parser.text()); + break; + case ScheduledAsyncQueryJobRequest.JOB_ID_FIELD: + builder.jobId(parser.text()); + break; + case ScheduledAsyncQueryJobRequest.DATA_SOURCE_NAME_FIELD: + builder.dataSource(parser.text()); + break; + case ScheduledAsyncQueryJobRequest.SCHEDULED_QUERY_FIELD: + builder.scheduledQuery(parser.text()); + break; + case ScheduledAsyncQueryJobRequest.QUERY_LANG_FIELD: + builder.queryLang(LangType.fromString(parser.text())); + break; + case ScheduledAsyncQueryJobRequest.ENABLED_FIELD: + builder.enabled(parser.booleanValue()); + break; + case ScheduledAsyncQueryJobRequest.ENABLED_TIME_FIELD: + builder.enabledTime(parseInstantValue(parser)); + break; + case ScheduledAsyncQueryJobRequest.LAST_UPDATE_TIME_FIELD: + builder.lastUpdateTime(parseInstantValue(parser)); + break; + case ScheduledAsyncQueryJobRequest.SCHEDULE_FIELD: + builder.schedule(ScheduleParser.parse(parser)); + break; + case ScheduledAsyncQueryJobRequest.LOCK_DURATION_SECONDS: + builder.lockDurationSeconds(parser.longValue()); + break; + case ScheduledAsyncQueryJobRequest.JITTER: + builder.jitter(parser.doubleValue()); + break; + default: + XContentParserUtils.throwUnknownToken(parser.currentToken(), parser.getTokenLocation()); + } + } + return builder.build(); + }; + } +} diff --git a/spark/src/main/java/org/opensearch/sql/spark/transport/TransportCancelAsyncQueryRequestAction.java b/async-query/src/main/java/org/opensearch/sql/spark/transport/TransportCancelAsyncQueryRequestAction.java similarity index 89% rename from spark/src/main/java/org/opensearch/sql/spark/transport/TransportCancelAsyncQueryRequestAction.java rename to async-query/src/main/java/org/opensearch/sql/spark/transport/TransportCancelAsyncQueryRequestAction.java index 232a280db5..ce80351f70 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/transport/TransportCancelAsyncQueryRequestAction.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/transport/TransportCancelAsyncQueryRequestAction.java @@ -13,6 +13,7 @@ import org.opensearch.common.inject.Inject; import org.opensearch.core.action.ActionListener; import org.opensearch.sql.spark.asyncquery.AsyncQueryExecutorServiceImpl; +import org.opensearch.sql.spark.asyncquery.model.NullAsyncQueryRequestContext; import org.opensearch.sql.spark.transport.model.CancelAsyncQueryActionRequest; import org.opensearch.sql.spark.transport.model.CancelAsyncQueryActionResponse; import org.opensearch.tasks.Task; @@ -41,7 +42,9 @@ protected void doExecute( CancelAsyncQueryActionRequest request, ActionListener listener) { try { - String jobId = asyncQueryExecutorService.cancelQuery(request.getQueryId()); + String jobId = + asyncQueryExecutorService.cancelQuery( + request.getQueryId(), new NullAsyncQueryRequestContext()); listener.onResponse( new CancelAsyncQueryActionResponse( String.format("Deleted async query with id: %s", jobId))); diff --git a/spark/src/main/java/org/opensearch/sql/spark/transport/TransportCreateAsyncQueryRequestAction.java b/async-query/src/main/java/org/opensearch/sql/spark/transport/TransportCreateAsyncQueryRequestAction.java similarity index 100% rename from spark/src/main/java/org/opensearch/sql/spark/transport/TransportCreateAsyncQueryRequestAction.java rename to async-query/src/main/java/org/opensearch/sql/spark/transport/TransportCreateAsyncQueryRequestAction.java diff --git a/spark/src/main/java/org/opensearch/sql/spark/transport/TransportGetAsyncQueryResultAction.java b/async-query/src/main/java/org/opensearch/sql/spark/transport/TransportGetAsyncQueryResultAction.java similarity index 89% rename from spark/src/main/java/org/opensearch/sql/spark/transport/TransportGetAsyncQueryResultAction.java rename to async-query/src/main/java/org/opensearch/sql/spark/transport/TransportGetAsyncQueryResultAction.java index 5c784cf04c..250837e0cd 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/transport/TransportGetAsyncQueryResultAction.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/transport/TransportGetAsyncQueryResultAction.java @@ -1,8 +1,6 @@ /* - * - * * Copyright OpenSearch Contributors - * * SPDX-License-Identifier: Apache-2.0 - * + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 */ package org.opensearch.sql.spark.transport; @@ -18,8 +16,9 @@ import org.opensearch.sql.spark.asyncquery.AsyncQueryExecutorService; import org.opensearch.sql.spark.asyncquery.AsyncQueryExecutorServiceImpl; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryExecutionResponse; -import org.opensearch.sql.spark.asyncquery.model.AsyncQueryResult; +import org.opensearch.sql.spark.asyncquery.model.NullAsyncQueryRequestContext; import org.opensearch.sql.spark.transport.format.AsyncQueryResultResponseFormatter; +import org.opensearch.sql.spark.transport.model.AsyncQueryResult; import org.opensearch.sql.spark.transport.model.GetAsyncQueryResultActionRequest; import org.opensearch.sql.spark.transport.model.GetAsyncQueryResultActionResponse; import org.opensearch.tasks.Task; @@ -52,7 +51,7 @@ protected void doExecute( try { String jobId = request.getQueryId(); AsyncQueryExecutionResponse asyncQueryExecutionResponse = - asyncQueryExecutorService.getAsyncQueryResults(jobId); + asyncQueryExecutorService.getAsyncQueryResults(jobId, new NullAsyncQueryRequestContext()); ResponseFormatter formatter = new AsyncQueryResultResponseFormatter(JsonResponseFormatter.Style.PRETTY); String responseContent = diff --git a/spark/src/main/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModule.java b/async-query/src/main/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModule.java similarity index 63% rename from spark/src/main/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModule.java rename to async-query/src/main/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModule.java index c4eaceb937..d21f8c7665 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModule.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModule.java @@ -7,6 +7,7 @@ import static org.opensearch.sql.spark.execution.statestore.StateStore.ALL_DATASOURCE; +import com.google.common.collect.ImmutableMap; import lombok.RequiredArgsConstructor; import org.opensearch.client.node.NodeClient; import org.opensearch.cluster.service.ClusterService; @@ -15,6 +16,7 @@ import org.opensearch.common.inject.Singleton; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.datasource.DataSourceService; +import org.opensearch.sql.datasource.model.DataSourceType; import org.opensearch.sql.legacy.metrics.GaugeMetric; import org.opensearch.sql.legacy.metrics.Metrics; import org.opensearch.sql.spark.asyncquery.AsyncQueryExecutorService; @@ -23,6 +25,9 @@ import org.opensearch.sql.spark.asyncquery.OpenSearchAsyncQueryJobMetadataStorageService; import org.opensearch.sql.spark.client.EMRServerlessClientFactory; import org.opensearch.sql.spark.client.EMRServerlessClientFactoryImpl; +import org.opensearch.sql.spark.config.OpenSearchAsyncQuerySchedulerConfigComposer; +import org.opensearch.sql.spark.config.OpenSearchExtraParameterComposer; +import org.opensearch.sql.spark.config.SparkExecutionEngineConfigClusterSettingLoader; import org.opensearch.sql.spark.config.SparkExecutionEngineConfigSupplier; import org.opensearch.sql.spark.config.SparkExecutionEngineConfigSupplierImpl; import org.opensearch.sql.spark.dispatcher.DatasourceEmbeddedQueryIdProvider; @@ -42,15 +47,30 @@ import org.opensearch.sql.spark.execution.xcontent.FlintIndexStateModelXContentSerializer; import org.opensearch.sql.spark.execution.xcontent.SessionModelXContentSerializer; import org.opensearch.sql.spark.execution.xcontent.StatementModelXContentSerializer; +import org.opensearch.sql.spark.flint.FlintIndexClient; import org.opensearch.sql.spark.flint.FlintIndexMetadataServiceImpl; import org.opensearch.sql.spark.flint.FlintIndexStateModelService; import org.opensearch.sql.spark.flint.IndexDMLResultStorageService; +import org.opensearch.sql.spark.flint.OpenSearchFlintIndexClient; import org.opensearch.sql.spark.flint.OpenSearchFlintIndexStateModelService; import org.opensearch.sql.spark.flint.OpenSearchIndexDMLResultStorageService; import org.opensearch.sql.spark.flint.operation.FlintIndexOpFactory; import org.opensearch.sql.spark.leasemanager.DefaultLeaseManager; +import org.opensearch.sql.spark.metrics.MetricsService; +import org.opensearch.sql.spark.metrics.OpenSearchMetricsService; +import org.opensearch.sql.spark.parameter.S3GlueDataSourceSparkParameterComposer; +import org.opensearch.sql.spark.parameter.SparkParameterComposerCollection; +import org.opensearch.sql.spark.parameter.SparkSubmitParametersBuilderProvider; import org.opensearch.sql.spark.response.JobExecutionResponseReader; import org.opensearch.sql.spark.response.OpenSearchJobExecutionResponseReader; +import org.opensearch.sql.spark.scheduler.AsyncQueryScheduler; +import org.opensearch.sql.spark.scheduler.OpenSearchAsyncQueryScheduler; +import org.opensearch.sql.spark.validator.DefaultGrammarElementValidator; +import org.opensearch.sql.spark.validator.GrammarElementValidatorProvider; +import org.opensearch.sql.spark.validator.PPLQueryValidator; +import org.opensearch.sql.spark.validator.S3GlueSQLGrammarElementValidator; +import org.opensearch.sql.spark.validator.SQLQueryValidator; +import org.opensearch.sql.spark.validator.SecurityLakeSQLGrammarElementValidator; @RequiredArgsConstructor public class AsyncExecutorServiceModule extends AbstractModule { @@ -88,9 +108,16 @@ public SparkQueryDispatcher sparkQueryDispatcher( DataSourceService dataSourceService, SessionManager sessionManager, QueryHandlerFactory queryHandlerFactory, - QueryIdProvider queryIdProvider) { + QueryIdProvider queryIdProvider, + SQLQueryValidator sqlQueryValidator, + PPLQueryValidator pplQueryValidator) { return new SparkQueryDispatcher( - dataSourceService, sessionManager, queryHandlerFactory, queryIdProvider); + dataSourceService, + sessionManager, + queryHandlerFactory, + queryIdProvider, + sqlQueryValidator, + pplQueryValidator); } @Provides @@ -106,7 +133,9 @@ public QueryHandlerFactory queryhandlerFactory( DefaultLeaseManager defaultLeaseManager, IndexDMLResultStorageService indexDMLResultStorageService, FlintIndexOpFactory flintIndexOpFactory, - EMRServerlessClientFactory emrServerlessClientFactory) { + EMRServerlessClientFactory emrServerlessClientFactory, + MetricsService metricsService, + SparkSubmitParametersBuilderProvider sparkSubmitParametersBuilderProvider) { return new QueryHandlerFactory( openSearchJobExecutionResponseReader, flintIndexMetadataReader, @@ -114,17 +143,29 @@ public QueryHandlerFactory queryhandlerFactory( defaultLeaseManager, indexDMLResultStorageService, flintIndexOpFactory, - emrServerlessClientFactory); + emrServerlessClientFactory, + metricsService, + sparkSubmitParametersBuilderProvider); } @Provides public FlintIndexOpFactory flintIndexOpFactory( FlintIndexStateModelService flintIndexStateModelService, - NodeClient client, + FlintIndexClient flintIndexClient, FlintIndexMetadataServiceImpl flintIndexMetadataService, - EMRServerlessClientFactory emrServerlessClientFactory) { + EMRServerlessClientFactory emrServerlessClientFactory, + AsyncQueryScheduler asyncQueryScheduler) { return new FlintIndexOpFactory( - flintIndexStateModelService, client, flintIndexMetadataService, emrServerlessClientFactory); + flintIndexStateModelService, + flintIndexClient, + flintIndexMetadataService, + emrServerlessClientFactory, + asyncQueryScheduler); + } + + @Provides + public FlintIndexClient flintIndexClient(NodeClient nodeClient) { + return new OpenSearchFlintIndexClient(nodeClient); } @Provides @@ -133,6 +174,41 @@ public FlintIndexStateModelService flintIndexStateModelService( return new OpenSearchFlintIndexStateModelService(stateStore, serializer); } + @Provides + public SparkSubmitParametersBuilderProvider sparkSubmitParametersBuilderProvider( + Settings settings, SparkExecutionEngineConfigClusterSettingLoader clusterSettingLoader) { + SparkParameterComposerCollection collection = new SparkParameterComposerCollection(); + collection.register( + DataSourceType.S3GLUE, new S3GlueDataSourceSparkParameterComposer(clusterSettingLoader)); + collection.register( + DataSourceType.SECURITY_LAKE, + new S3GlueDataSourceSparkParameterComposer(clusterSettingLoader)); + collection.register(new OpenSearchAsyncQuerySchedulerConfigComposer(settings)); + collection.register(new OpenSearchExtraParameterComposer(clusterSettingLoader)); + return new SparkSubmitParametersBuilderProvider(collection); + } + + @Provides + public SQLQueryValidator sqlQueryValidator() { + GrammarElementValidatorProvider validatorProvider = + new GrammarElementValidatorProvider( + ImmutableMap.of( + DataSourceType.S3GLUE, + new S3GlueSQLGrammarElementValidator(), + DataSourceType.SECURITY_LAKE, + new SecurityLakeSQLGrammarElementValidator()), + new DefaultGrammarElementValidator()); + return new SQLQueryValidator(validatorProvider); + } + + @Provides + public PPLQueryValidator pplQueryValidator() { + GrammarElementValidatorProvider validatorProvider = + new GrammarElementValidatorProvider( + ImmutableMap.of(), new DefaultGrammarElementValidator()); + return new PPLQueryValidator(validatorProvider); + } + @Provides public IndexDMLResultStorageService indexDMLResultStorageService( DataSourceService dataSourceService, StateStore stateStore) { @@ -172,13 +248,26 @@ public DefaultLeaseManager defaultLeaseManager(Settings settings, StateStore sta @Provides public EMRServerlessClientFactory createEMRServerlessClientFactory( - SparkExecutionEngineConfigSupplier sparkExecutionEngineConfigSupplier) { - return new EMRServerlessClientFactoryImpl(sparkExecutionEngineConfigSupplier); + SparkExecutionEngineConfigSupplier sparkExecutionEngineConfigSupplier, + MetricsService metricsService) { + return new EMRServerlessClientFactoryImpl(sparkExecutionEngineConfigSupplier, metricsService); + } + + @Provides + public MetricsService metricsService() { + return new OpenSearchMetricsService(); } @Provides - public SparkExecutionEngineConfigSupplier sparkExecutionEngineConfigSupplier(Settings settings) { - return new SparkExecutionEngineConfigSupplierImpl(settings); + public SparkExecutionEngineConfigSupplier sparkExecutionEngineConfigSupplier( + Settings settings, SparkExecutionEngineConfigClusterSettingLoader clusterSettingLoader) { + return new SparkExecutionEngineConfigSupplierImpl(settings, clusterSettingLoader); + } + + @Provides + public SparkExecutionEngineConfigClusterSettingLoader + sparkExecutionEngineConfigClusterSettingLoader(Settings settings) { + return new SparkExecutionEngineConfigClusterSettingLoader(settings); } @Provides @@ -197,6 +286,14 @@ public SessionConfigSupplier sessionConfigSupplier(Settings settings) { return new OpenSearchSessionConfigSupplier(settings); } + @Provides + @Singleton + public AsyncQueryScheduler asyncQueryScheduler(NodeClient client, ClusterService clusterService) { + OpenSearchAsyncQueryScheduler scheduler = + new OpenSearchAsyncQueryScheduler(client, clusterService); + return scheduler; + } + private void registerStateStoreMetrics(StateStore stateStore) { GaugeMetric activeSessionMetric = new GaugeMetric<>( diff --git a/spark/src/main/java/org/opensearch/sql/spark/transport/format/AsyncQueryResultResponseFormatter.java b/async-query/src/main/java/org/opensearch/sql/spark/transport/format/AsyncQueryResultResponseFormatter.java similarity index 97% rename from spark/src/main/java/org/opensearch/sql/spark/transport/format/AsyncQueryResultResponseFormatter.java rename to async-query/src/main/java/org/opensearch/sql/spark/transport/format/AsyncQueryResultResponseFormatter.java index 3a2a5b110f..afa6797694 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/transport/format/AsyncQueryResultResponseFormatter.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/transport/format/AsyncQueryResultResponseFormatter.java @@ -14,7 +14,7 @@ import org.opensearch.core.common.Strings; import org.opensearch.sql.protocol.response.QueryResult; import org.opensearch.sql.protocol.response.format.JsonResponseFormatter; -import org.opensearch.sql.spark.asyncquery.model.AsyncQueryResult; +import org.opensearch.sql.spark.transport.model.AsyncQueryResult; /** * JSON response format with schema header and data rows. For example, diff --git a/spark/src/main/java/org/opensearch/sql/spark/rest/model/CreateAsyncQueryRequest.java b/async-query/src/main/java/org/opensearch/sql/spark/transport/format/CreateAsyncQueryRequestConverter.java similarity index 58% rename from spark/src/main/java/org/opensearch/sql/spark/rest/model/CreateAsyncQueryRequest.java rename to async-query/src/main/java/org/opensearch/sql/spark/transport/format/CreateAsyncQueryRequestConverter.java index f3a9a198fb..c22c2da24d 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/rest/model/CreateAsyncQueryRequest.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/transport/format/CreateAsyncQueryRequestConverter.java @@ -3,38 +3,18 @@ * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.sql.spark.rest.model; +package org.opensearch.sql.spark.transport.format; import static org.opensearch.core.xcontent.XContentParserUtils.ensureExpectedToken; -import java.io.IOException; -import lombok.Data; -import org.apache.commons.lang3.Validate; +import lombok.experimental.UtilityClass; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.sql.spark.rest.model.CreateAsyncQueryRequest; +import org.opensearch.sql.spark.rest.model.LangType; -@Data -public class CreateAsyncQueryRequest { - private String query; - private String datasource; - private LangType lang; - // optional sessionId - private String sessionId; - - public CreateAsyncQueryRequest(String query, String datasource, LangType lang) { - this.query = Validate.notNull(query, "Query can't be null"); - this.datasource = Validate.notNull(datasource, "Datasource can't be null"); - this.lang = Validate.notNull(lang, "lang can't be null"); - } - - public CreateAsyncQueryRequest(String query, String datasource, LangType lang, String sessionId) { - this.query = Validate.notNull(query, "Query can't be null"); - this.datasource = Validate.notNull(datasource, "Datasource can't be null"); - this.lang = Validate.notNull(lang, "lang can't be null"); - this.sessionId = sessionId; - } - - public static CreateAsyncQueryRequest fromXContentParser(XContentParser parser) - throws IOException { +@UtilityClass +public class CreateAsyncQueryRequestConverter { + public static CreateAsyncQueryRequest fromXContentParser(XContentParser parser) { String query = null; LangType lang = null; String datasource = null; diff --git a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryResult.java b/async-query/src/main/java/org/opensearch/sql/spark/transport/model/AsyncQueryResult.java similarity index 87% rename from spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryResult.java rename to async-query/src/main/java/org/opensearch/sql/spark/transport/model/AsyncQueryResult.java index c229aa3920..712cebf7e1 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryResult.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/transport/model/AsyncQueryResult.java @@ -1,4 +1,9 @@ -package org.opensearch.sql.spark.asyncquery.model; +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.transport.model; import java.util.Collection; import lombok.Getter; diff --git a/spark/src/main/java/org/opensearch/sql/spark/transport/model/CancelAsyncQueryActionRequest.java b/async-query/src/main/java/org/opensearch/sql/spark/transport/model/CancelAsyncQueryActionRequest.java similarity index 88% rename from spark/src/main/java/org/opensearch/sql/spark/transport/model/CancelAsyncQueryActionRequest.java rename to async-query/src/main/java/org/opensearch/sql/spark/transport/model/CancelAsyncQueryActionRequest.java index 0065b575ed..8a5f31646f 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/transport/model/CancelAsyncQueryActionRequest.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/transport/model/CancelAsyncQueryActionRequest.java @@ -1,8 +1,6 @@ /* - * - * * Copyright OpenSearch Contributors - * * SPDX-License-Identifier: Apache-2.0 - * + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 */ package org.opensearch.sql.spark.transport.model; diff --git a/spark/src/main/java/org/opensearch/sql/spark/transport/model/CancelAsyncQueryActionResponse.java b/async-query/src/main/java/org/opensearch/sql/spark/transport/model/CancelAsyncQueryActionResponse.java similarity index 88% rename from spark/src/main/java/org/opensearch/sql/spark/transport/model/CancelAsyncQueryActionResponse.java rename to async-query/src/main/java/org/opensearch/sql/spark/transport/model/CancelAsyncQueryActionResponse.java index af97140b49..a73430603f 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/transport/model/CancelAsyncQueryActionResponse.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/transport/model/CancelAsyncQueryActionResponse.java @@ -1,8 +1,6 @@ /* - * - * * Copyright OpenSearch Contributors - * * SPDX-License-Identifier: Apache-2.0 - * + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 */ package org.opensearch.sql.spark.transport.model; diff --git a/spark/src/main/java/org/opensearch/sql/spark/transport/model/CreateAsyncQueryActionRequest.java b/async-query/src/main/java/org/opensearch/sql/spark/transport/model/CreateAsyncQueryActionRequest.java similarity index 90% rename from spark/src/main/java/org/opensearch/sql/spark/transport/model/CreateAsyncQueryActionRequest.java rename to async-query/src/main/java/org/opensearch/sql/spark/transport/model/CreateAsyncQueryActionRequest.java index bcb329b2dc..d003990311 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/transport/model/CreateAsyncQueryActionRequest.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/transport/model/CreateAsyncQueryActionRequest.java @@ -1,8 +1,6 @@ /* - * - * * Copyright OpenSearch Contributors - * * SPDX-License-Identifier: Apache-2.0 - * + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 */ package org.opensearch.sql.spark.transport.model; diff --git a/spark/src/main/java/org/opensearch/sql/spark/transport/model/CreateAsyncQueryActionResponse.java b/async-query/src/main/java/org/opensearch/sql/spark/transport/model/CreateAsyncQueryActionResponse.java similarity index 88% rename from spark/src/main/java/org/opensearch/sql/spark/transport/model/CreateAsyncQueryActionResponse.java rename to async-query/src/main/java/org/opensearch/sql/spark/transport/model/CreateAsyncQueryActionResponse.java index de5acc2537..17a4a73ed7 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/transport/model/CreateAsyncQueryActionResponse.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/transport/model/CreateAsyncQueryActionResponse.java @@ -1,8 +1,6 @@ /* - * - * * Copyright OpenSearch Contributors - * * SPDX-License-Identifier: Apache-2.0 - * + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 */ package org.opensearch.sql.spark.transport.model; diff --git a/spark/src/main/java/org/opensearch/sql/spark/transport/model/GetAsyncQueryResultActionRequest.java b/async-query/src/main/java/org/opensearch/sql/spark/transport/model/GetAsyncQueryResultActionRequest.java similarity index 88% rename from spark/src/main/java/org/opensearch/sql/spark/transport/model/GetAsyncQueryResultActionRequest.java rename to async-query/src/main/java/org/opensearch/sql/spark/transport/model/GetAsyncQueryResultActionRequest.java index 06faa75a26..f30decbb4d 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/transport/model/GetAsyncQueryResultActionRequest.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/transport/model/GetAsyncQueryResultActionRequest.java @@ -1,8 +1,6 @@ /* - * - * * Copyright OpenSearch Contributors - * * SPDX-License-Identifier: Apache-2.0 - * + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 */ package org.opensearch.sql.spark.transport.model; diff --git a/spark/src/main/java/org/opensearch/sql/spark/transport/model/GetAsyncQueryResultActionResponse.java b/async-query/src/main/java/org/opensearch/sql/spark/transport/model/GetAsyncQueryResultActionResponse.java similarity index 88% rename from spark/src/main/java/org/opensearch/sql/spark/transport/model/GetAsyncQueryResultActionResponse.java rename to async-query/src/main/java/org/opensearch/sql/spark/transport/model/GetAsyncQueryResultActionResponse.java index bb77bb131a..b2bbedd9ef 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/transport/model/GetAsyncQueryResultActionResponse.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/transport/model/GetAsyncQueryResultActionResponse.java @@ -1,8 +1,6 @@ /* - * - * * Copyright OpenSearch Contributors - * * SPDX-License-Identifier: Apache-2.0 - * + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 */ package org.opensearch.sql.spark.transport.model; diff --git a/async-query/src/main/resources/async-query-scheduler-index-mapping.yml b/async-query/src/main/resources/async-query-scheduler-index-mapping.yml new file mode 100644 index 0000000000..1aa90e8ed8 --- /dev/null +++ b/async-query/src/main/resources/async-query-scheduler-index-mapping.yml @@ -0,0 +1,47 @@ +--- +## +# Copyright OpenSearch Contributors +# SPDX-License-Identifier: Apache-2.0 +## + +# Schema file for the .async-query-scheduler index +# Also "dynamic" is set to "false" so that other fields cannot be added. +dynamic: false +properties: + accountId: + type: keyword + jobId: + type: keyword + dataSource: + type: keyword + scheduledQuery: + type: text + queryLang: + type: keyword + lastUpdateTime: + type: date + format: epoch_millis + enabledTime: + type: date + format: epoch_millis + schedule: + properties: + initialDelay: + type: long + interval: + properties: + start_time: + type: date + format: "strict_date_time||epoch_millis" + period: + type: integer + unit: + type: keyword + enabled: + type: boolean + lockDurationSeconds: + type: long + null_value: -1 + jitter: + type: double + null_value: 0.0 \ No newline at end of file diff --git a/async-query/src/main/resources/async-query-scheduler-index-settings.yml b/async-query/src/main/resources/async-query-scheduler-index-settings.yml new file mode 100644 index 0000000000..386f1f4f34 --- /dev/null +++ b/async-query/src/main/resources/async-query-scheduler-index-settings.yml @@ -0,0 +1,11 @@ +--- +## +# Copyright OpenSearch Contributors +# SPDX-License-Identifier: Apache-2.0 +## + +# Settings file for the .async-query-scheduler index +index: + number_of_shards: "1" + auto_expand_replicas: "0-2" + number_of_replicas: "0" \ No newline at end of file diff --git a/async-query/src/test/java/org/opensearch/sql/asyncquery/DummyConsumerTest.java b/async-query/src/test/java/org/opensearch/sql/asyncquery/DummyConsumerTest.java deleted file mode 100644 index a08dbae736..0000000000 --- a/async-query/src/test/java/org/opensearch/sql/asyncquery/DummyConsumerTest.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.asyncquery; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.mockito.Mockito.when; - -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; - -@ExtendWith(MockitoExtension.class) -class DummyConsumerTest { - - @Mock Dummy dummy; - - @Test - public void test() { - DummyConsumer dummyConsumer = new DummyConsumer(dummy); - when(dummy.hello()).thenReturn("Hello from mock"); - - assertEquals("Hello from mock", dummyConsumer.hello()); - } -} diff --git a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImplSpecTest.java b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImplSpecTest.java similarity index 89% rename from spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImplSpecTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImplSpecTest.java index f8b61aee5a..e6459c752e 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImplSpecTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImplSpecTest.java @@ -44,12 +44,12 @@ import org.opensearch.sql.spark.utils.IDUtils; public class AsyncQueryExecutorServiceImplSpecTest extends AsyncQueryExecutorServiceSpec { - AsyncQueryRequestContext asyncQueryRequestContext = new NullAsyncQueryRequestContext(); + final AsyncQueryRequestContext asyncQueryRequestContext = new NullAsyncQueryRequestContext(); @Disabled("batch query is unsupported") public void withoutSessionCreateAsyncQueryThenGetResultThenCancel() { LocalEMRSClient emrsClient = new LocalEMRSClient(); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -66,12 +66,14 @@ public void withoutSessionCreateAsyncQueryThenGetResultThenCancel() { // 2. fetch async query result. AsyncQueryExecutionResponse asyncQueryResults = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("RUNNING", asyncQueryResults.getStatus()); emrsClient.getJobRunResultCalled(1); // 3. cancel async query. - String cancelQueryId = asyncQueryExecutorService.cancelQuery(response.getQueryId()); + String cancelQueryId = + asyncQueryExecutorService.cancelQuery(response.getQueryId(), asyncQueryRequestContext); assertEquals(response.getQueryId(), cancelQueryId); emrsClient.cancelJobRunCalled(1); } @@ -79,7 +81,7 @@ public void withoutSessionCreateAsyncQueryThenGetResultThenCancel() { @Disabled("batch query is unsupported") public void sessionLimitNotImpactBatchQuery() { LocalEMRSClient emrsClient = new LocalEMRSClient(); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -104,7 +106,7 @@ public void sessionLimitNotImpactBatchQuery() { @Disabled("batch query is unsupported") public void createAsyncQueryCreateJobWithCorrectParameters() { LocalEMRSClient emrsClient = new LocalEMRSClient(); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -140,7 +142,7 @@ public void createAsyncQueryCreateJobWithCorrectParameters() { @Test public void withSessionCreateAsyncQueryThenGetResultThenCancel() { LocalEMRSClient emrsClient = new LocalEMRSClient(); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -151,26 +153,29 @@ public void withSessionCreateAsyncQueryThenGetResultThenCancel() { asyncQueryRequestContext); assertNotNull(response.getSessionId()); Optional statementModel = - statementStorageService.getStatement(response.getQueryId(), MYS3_DATASOURCE); + statementStorageService.getStatement( + response.getQueryId(), MYS3_DATASOURCE, asyncQueryRequestContext); assertTrue(statementModel.isPresent()); assertEquals(StatementState.WAITING, statementModel.get().getStatementState()); // 2. fetch async query result. AsyncQueryExecutionResponse asyncQueryResults = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("", asyncQueryResults.getError()); assertTrue(Strings.isEmpty(asyncQueryResults.getError())); assertEquals(StatementState.WAITING.getState(), asyncQueryResults.getStatus()); // 3. cancel async query. - String cancelQueryId = asyncQueryExecutorService.cancelQuery(response.getQueryId()); + String cancelQueryId = + asyncQueryExecutorService.cancelQuery(response.getQueryId(), asyncQueryRequestContext); assertEquals(response.getQueryId(), cancelQueryId); } @Test public void reuseSessionWhenCreateAsyncQuery() { LocalEMRSClient emrsClient = new LocalEMRSClient(); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -209,13 +214,15 @@ public void reuseSessionWhenCreateAsyncQuery() { .must(QueryBuilders.termQuery(SESSION_ID, first.getSessionId())))); Optional firstModel = - statementStorageService.getStatement(first.getQueryId(), MYS3_DATASOURCE); + statementStorageService.getStatement( + first.getQueryId(), MYS3_DATASOURCE, asyncQueryRequestContext); assertTrue(firstModel.isPresent()); assertEquals(StatementState.WAITING, firstModel.get().getStatementState()); assertEquals(first.getQueryId(), firstModel.get().getStatementId().getId()); assertEquals(first.getQueryId(), firstModel.get().getQueryId()); Optional secondModel = - statementStorageService.getStatement(second.getQueryId(), MYS3_DATASOURCE); + statementStorageService.getStatement( + second.getQueryId(), MYS3_DATASOURCE, asyncQueryRequestContext); assertEquals(StatementState.WAITING, secondModel.get().getStatementState()); assertEquals(second.getQueryId(), secondModel.get().getStatementId().getId()); assertEquals(second.getQueryId(), secondModel.get().getQueryId()); @@ -224,7 +231,7 @@ public void reuseSessionWhenCreateAsyncQuery() { @Disabled("batch query is unsupported") public void batchQueryHasTimeout() { LocalEMRSClient emrsClient = new LocalEMRSClient(); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -240,7 +247,7 @@ public void batchQueryHasTimeout() { @Test public void interactiveQueryNoTimeout() { LocalEMRSClient emrsClient = new LocalEMRSClient(); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -274,7 +281,7 @@ public void datasourceWithBasicAuth() { .setProperties(properties) .build()); LocalEMRSClient emrsClient = new LocalEMRSClient(); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -295,7 +302,7 @@ public void datasourceWithBasicAuth() { @Test public void withSessionCreateAsyncQueryFailed() { LocalEMRSClient emrsClient = new LocalEMRSClient(); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -305,11 +312,12 @@ public void withSessionCreateAsyncQueryFailed() { // 1. create async query. CreateAsyncQueryResponse response = asyncQueryExecutorService.createAsyncQuery( - new CreateAsyncQueryRequest("myselect 1", MYS3_DATASOURCE, LangType.SQL, null), + new CreateAsyncQueryRequest("select 1", MYS3_DATASOURCE, LangType.SQL, null), asyncQueryRequestContext); assertNotNull(response.getSessionId()); Optional statementModel = - statementStorageService.getStatement(response.getQueryId(), MYS3_DATASOURCE); + statementStorageService.getStatement( + response.getQueryId(), MYS3_DATASOURCE, asyncQueryRequestContext); assertTrue(statementModel.isPresent()); assertEquals(StatementState.WAITING, statementModel.get().getStatementState()); @@ -332,10 +340,12 @@ public void withSessionCreateAsyncQueryFailed() { .error("mock error") .metadata(submitted.getMetadata()) .build(); - statementStorageService.updateStatementState(mocked, StatementState.FAILED); + statementStorageService.updateStatementState( + mocked, StatementState.FAILED, asyncQueryRequestContext); AsyncQueryExecutionResponse asyncQueryResults = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals(StatementState.FAILED.getState(), asyncQueryResults.getStatus()); assertEquals("mock error", asyncQueryResults.getError()); } @@ -344,7 +354,7 @@ public void withSessionCreateAsyncQueryFailed() { @Test public void createSessionMoreThanLimitFailed() { LocalEMRSClient emrsClient = new LocalEMRSClient(); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -376,7 +386,7 @@ public void createSessionMoreThanLimitFailed() { @Test public void recreateSessionIfNotReady() { LocalEMRSClient emrsClient = new LocalEMRSClient(); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -417,7 +427,7 @@ public void recreateSessionIfNotReady() { @Test public void submitQueryWithDifferentDataSourceSessionWillCreateNewSession() { LocalEMRSClient emrsClient = new LocalEMRSClient(); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -465,7 +475,7 @@ public void submitQueryWithDifferentDataSourceSessionWillCreateNewSession() { @Test public void recreateSessionIfStale() { LocalEMRSClient emrsClient = new LocalEMRSClient(); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -523,7 +533,7 @@ public void recreateSessionIfStale() { @Test public void submitQueryInInvalidSessionWillCreateNewSession() { LocalEMRSClient emrsClient = new LocalEMRSClient(); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -561,7 +571,7 @@ public void datasourceNameIncludeUppercase() { .build()); LocalEMRSClient emrsClient = new LocalEMRSClient(); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -583,7 +593,7 @@ public void datasourceNameIncludeUppercase() { @Test public void concurrentSessionLimitIsDomainLevel() { LocalEMRSClient emrsClient = new LocalEMRSClient(); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -612,7 +622,7 @@ public void concurrentSessionLimitIsDomainLevel() { @Test public void testDatasourceDisabled() { LocalEMRSClient emrsClient = new LocalEMRSClient(); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); diff --git a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceSpec.java b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceSpec.java similarity index 78% rename from spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceSpec.java rename to async-query/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceSpec.java index 9a94accd7d..9511359f86 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceSpec.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceSpec.java @@ -18,12 +18,17 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.io.Resources; +import com.google.gson.Gson; +import com.google.gson.JsonObject; import java.net.URL; import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Optional; +import java.util.stream.Collectors; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.SneakyThrows; @@ -42,6 +47,7 @@ import org.opensearch.index.query.QueryBuilder; import org.opensearch.plugins.Plugin; import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.sql.common.setting.Settings.Key; import org.opensearch.sql.datasource.model.DataSourceMetadata; import org.opensearch.sql.datasource.model.DataSourceType; import org.opensearch.sql.datasources.encryptor.EncryptorImpl; @@ -58,6 +64,7 @@ import org.opensearch.sql.spark.client.StartJobRequest; import org.opensearch.sql.spark.config.OpenSearchSparkSubmitParameterModifier; import org.opensearch.sql.spark.config.SparkExecutionEngineConfig; +import org.opensearch.sql.spark.config.SparkExecutionEngineConfigClusterSettingLoader; import org.opensearch.sql.spark.dispatcher.DatasourceEmbeddedQueryIdProvider; import org.opensearch.sql.spark.dispatcher.QueryHandlerFactory; import org.opensearch.sql.spark.dispatcher.SparkQueryDispatcher; @@ -77,16 +84,29 @@ import org.opensearch.sql.spark.execution.xcontent.FlintIndexStateModelXContentSerializer; import org.opensearch.sql.spark.execution.xcontent.SessionModelXContentSerializer; import org.opensearch.sql.spark.execution.xcontent.StatementModelXContentSerializer; +import org.opensearch.sql.spark.flint.FlintIndexClient; import org.opensearch.sql.spark.flint.FlintIndexMetadataService; import org.opensearch.sql.spark.flint.FlintIndexMetadataServiceImpl; import org.opensearch.sql.spark.flint.FlintIndexStateModelService; import org.opensearch.sql.spark.flint.FlintIndexType; +import org.opensearch.sql.spark.flint.OpenSearchFlintIndexClient; import org.opensearch.sql.spark.flint.OpenSearchFlintIndexStateModelService; import org.opensearch.sql.spark.flint.OpenSearchIndexDMLResultStorageService; import org.opensearch.sql.spark.flint.operation.FlintIndexOpFactory; import org.opensearch.sql.spark.leasemanager.DefaultLeaseManager; +import org.opensearch.sql.spark.metrics.OpenSearchMetricsService; +import org.opensearch.sql.spark.parameter.S3GlueDataSourceSparkParameterComposer; +import org.opensearch.sql.spark.parameter.SparkParameterComposerCollection; +import org.opensearch.sql.spark.parameter.SparkSubmitParametersBuilderProvider; import org.opensearch.sql.spark.response.JobExecutionResponseReader; import org.opensearch.sql.spark.response.OpenSearchJobExecutionResponseReader; +import org.opensearch.sql.spark.scheduler.AsyncQueryScheduler; +import org.opensearch.sql.spark.scheduler.OpenSearchAsyncQueryScheduler; +import org.opensearch.sql.spark.validator.DefaultGrammarElementValidator; +import org.opensearch.sql.spark.validator.GrammarElementValidatorProvider; +import org.opensearch.sql.spark.validator.PPLQueryValidator; +import org.opensearch.sql.spark.validator.S3GlueSQLGrammarElementValidator; +import org.opensearch.sql.spark.validator.SQLQueryValidator; import org.opensearch.sql.storage.DataSourceFactory; import org.opensearch.test.OpenSearchIntegTestCase; @@ -94,11 +114,16 @@ public class AsyncQueryExecutorServiceSpec extends OpenSearchIntegTestCase { public static final String MYS3_DATASOURCE = "mys3"; public static final String MYGLUE_DATASOURCE = "my_glue"; + public static final String ACCOUNT_ID = "accountId"; + public static final String APPLICATION_ID = "appId"; + public static final String REGION = "us-west-2"; + public static final String ROLE_ARN = "roleArn"; protected ClusterService clusterService; protected org.opensearch.sql.common.setting.Settings pluginSettings; protected SessionConfigSupplier sessionConfigSupplier; protected NodeClient client; + protected FlintIndexClient flintIndexClient; protected DataSourceServiceImpl dataSourceService; protected ClusterSettings clusterSettings; protected FlintIndexMetadataService flintIndexMetadataService; @@ -106,8 +131,9 @@ public class AsyncQueryExecutorServiceSpec extends OpenSearchIntegTestCase { protected StateStore stateStore; protected SessionStorageService sessionStorageService; protected StatementStorageService statementStorageService; + protected AsyncQueryScheduler asyncQueryScheduler; protected AsyncQueryRequestContext asyncQueryRequestContext; - protected SessionIdProvider sessionIdProvider = new DatasourceEmbeddedSessionIdProvider(); + protected final SessionIdProvider sessionIdProvider = new DatasourceEmbeddedSessionIdProvider(); @Override protected Collection> nodePlugins() { @@ -141,6 +167,7 @@ public void setup() { .putList(DATASOURCE_URI_HOSTS_DENY_LIST.getKey(), Collections.emptyList()) .build()) .get(); + flintIndexClient = new OpenSearchFlintIndexClient(client); dataSourceService = createDataSourceService(); DataSourceMetadata dm = new DataSourceMetadata.Builder() @@ -185,12 +212,17 @@ public void setup() { new OpenSearchSessionStorageService(stateStore, new SessionModelXContentSerializer()); statementStorageService = new OpenSearchStatementStorageService(stateStore, new StatementModelXContentSerializer()); + asyncQueryScheduler = new OpenSearchAsyncQueryScheduler(client, clusterService); } protected FlintIndexOpFactory getFlintIndexOpFactory( EMRServerlessClientFactory emrServerlessClientFactory) { return new FlintIndexOpFactory( - flintIndexStateModelService, client, flintIndexMetadataService, emrServerlessClientFactory); + flintIndexStateModelService, + flintIndexClient, + flintIndexMetadataService, + emrServerlessClientFactory, + asyncQueryScheduler); } @After @@ -222,7 +254,10 @@ private DataSourceServiceImpl createDataSourceService() { String masterKey = "a57d991d9b573f75b9bba1df"; DataSourceMetadataStorage dataSourceMetadataStorage = new OpenSearchDataSourceMetadataStorage( - client, clusterService, new EncryptorImpl(masterKey)); + client, + clusterService, + new EncryptorImpl(masterKey), + (OpenSearchSettings) pluginSettings); return new DataSourceServiceImpl( new ImmutableSet.Builder() .add(new GlueDataSourceFactory(pluginSettings)) @@ -245,6 +280,18 @@ protected AsyncQueryExecutorService createAsyncQueryExecutorService( AsyncQueryJobMetadataStorageService asyncQueryJobMetadataStorageService = new OpenSearchAsyncQueryJobMetadataStorageService( stateStore, new AsyncQueryJobMetadataXContentSerializer()); + SparkParameterComposerCollection sparkParameterComposerCollection = + new SparkParameterComposerCollection(); + sparkParameterComposerCollection.register( + DataSourceType.S3GLUE, + new S3GlueDataSourceSparkParameterComposer( + getSparkExecutionEngineConfigClusterSettingLoader())); + sparkParameterComposerCollection.register( + DataSourceType.SECURITY_LAKE, + new S3GlueDataSourceSparkParameterComposer( + getSparkExecutionEngineConfigClusterSettingLoader())); + SparkSubmitParametersBuilderProvider sparkSubmitParametersBuilderProvider = + new SparkSubmitParametersBuilderProvider(sparkParameterComposerCollection); QueryHandlerFactory queryHandlerFactory = new QueryHandlerFactory( jobExecutionResponseReader, @@ -259,10 +306,22 @@ protected AsyncQueryExecutorService createAsyncQueryExecutorService( new OpenSearchIndexDMLResultStorageService(dataSourceService, stateStore), new FlintIndexOpFactory( flintIndexStateModelService, - client, + flintIndexClient, new FlintIndexMetadataServiceImpl(client), - emrServerlessClientFactory), - emrServerlessClientFactory); + emrServerlessClientFactory, + asyncQueryScheduler), + emrServerlessClientFactory, + new OpenSearchMetricsService(), + sparkSubmitParametersBuilderProvider); + SQLQueryValidator sqlQueryValidator = + new SQLQueryValidator( + new GrammarElementValidatorProvider( + ImmutableMap.of(DataSourceType.S3GLUE, new S3GlueSQLGrammarElementValidator()), + new DefaultGrammarElementValidator())); + PPLQueryValidator pplQueryValidator = + new PPLQueryValidator( + new GrammarElementValidatorProvider( + ImmutableMap.of(), new DefaultGrammarElementValidator())); SparkQueryDispatcher sparkQueryDispatcher = new SparkQueryDispatcher( this.dataSourceService, @@ -273,7 +332,9 @@ protected AsyncQueryExecutorService createAsyncQueryExecutorService( sessionConfigSupplier, sessionIdProvider), queryHandlerFactory, - new DatasourceEmbeddedQueryIdProvider()); + new DatasourceEmbeddedQueryIdProvider(), + sqlQueryValidator, + pplQueryValidator); return new AsyncQueryExecutorServiceImpl( asyncQueryJobMetadataStorageService, sparkQueryDispatcher, @@ -342,7 +403,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { public static class LocalEMRServerlessClientFactory implements EMRServerlessClientFactory { @Override - public EMRServerlessClient getClient() { + public EMRServerlessClient getClient(String accountId) { return new LocalEMRSClient(); } } @@ -350,14 +411,56 @@ public EMRServerlessClient getClient() { public SparkExecutionEngineConfig sparkExecutionEngineConfig( AsyncQueryRequestContext asyncQueryRequestContext) { return SparkExecutionEngineConfig.builder() - .applicationId("appId") - .region("us-west-2") - .executionRoleARN("roleArn") + .applicationId(APPLICATION_ID) + .region(REGION) + .executionRoleARN(ROLE_ARN) .sparkSubmitParameterModifier(new OpenSearchSparkSubmitParameterModifier("")) .clusterName("myCluster") .build(); } + public static class TestSettings extends org.opensearch.sql.common.setting.Settings { + final Map values; + + public TestSettings() { + values = new HashMap<>(); + } + + /** Get Setting Value. */ + @Override + public T getSettingValue(Key key) { + return (T) values.get(key); + } + + @Override + public List getSettings() { + return values.keySet().stream().map(Key::getKeyValue).collect(Collectors.toList()); + } + + public void putSettingValue(Key key, T value) { + values.put(key, value); + } + } + + public SparkExecutionEngineConfigClusterSettingLoader + getSparkExecutionEngineConfigClusterSettingLoader() { + Gson gson = new Gson(); + JsonObject jsonObject = new JsonObject(); + jsonObject.addProperty("accountId", ACCOUNT_ID); + jsonObject.addProperty("applicationId", APPLICATION_ID); + jsonObject.addProperty("region", REGION); + jsonObject.addProperty("executionRoleARN", ROLE_ARN); + jsonObject.addProperty("sparkSubmitParameters", ""); + + // Convert JsonObject to JSON string + final String jsonString = gson.toJson(jsonObject); + + final TestSettings settings = new TestSettings(); + settings.putSettingValue(Key.SPARK_EXECUTION_ENGINE_CONFIG, jsonString); + + return new SparkExecutionEngineConfigClusterSettingLoader(settings); + } + public void enableSession(boolean enabled) { // doNothing } diff --git a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryGetResultSpecTest.java b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryGetResultSpecTest.java similarity index 94% rename from spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryGetResultSpecTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryGetResultSpecTest.java index 12fa8043ea..5f6d7190da 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryGetResultSpecTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryGetResultSpecTest.java @@ -24,8 +24,8 @@ import org.opensearch.sql.protocol.response.format.JsonResponseFormatter; import org.opensearch.sql.protocol.response.format.ResponseFormatter; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryExecutionResponse; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; -import org.opensearch.sql.spark.asyncquery.model.AsyncQueryResult; import org.opensearch.sql.spark.asyncquery.model.MockFlintSparkJob; import org.opensearch.sql.spark.asyncquery.model.NullAsyncQueryRequestContext; import org.opensearch.sql.spark.client.EMRServerlessClientFactory; @@ -38,9 +38,10 @@ import org.opensearch.sql.spark.rest.model.CreateAsyncQueryResponse; import org.opensearch.sql.spark.rest.model.LangType; import org.opensearch.sql.spark.transport.format.AsyncQueryResultResponseFormatter; +import org.opensearch.sql.spark.transport.model.AsyncQueryResult; public class AsyncQueryGetResultSpecTest extends AsyncQueryExecutorServiceSpec { - AsyncQueryRequestContext asyncQueryRequestContext = new NullAsyncQueryRequestContext(); + final AsyncQueryRequestContext asyncQueryRequestContext = new NullAsyncQueryRequestContext(); /** Mock Flint index and index state */ private final FlintDatasetMock mockIndex = @@ -417,7 +418,7 @@ private class AssertionHelper { private Interaction interaction; AssertionHelper(String query, LocalEMRSClient emrClient) { - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrClient; this.queryService = createAsyncQueryExecutorService( emrServerlessClientFactory, @@ -428,12 +429,21 @@ private class AssertionHelper { */ new JobExecutionResponseReader() { @Override - public JSONObject getResultWithJobId(String jobId, String resultIndex) { - return interaction.interact(new InteractionStep(emrClient, jobId, resultIndex)); + public JSONObject getResultFromResultIndex( + AsyncQueryJobMetadata asyncQueryJobMetadata, + AsyncQueryRequestContext asyncQueryRequestContext) { + return interaction.interact( + new InteractionStep( + emrClient, + asyncQueryJobMetadata.getJobId(), + asyncQueryJobMetadata.getResultIndex())); } @Override - public JSONObject getResultWithQueryId(String queryId, String resultIndex) { + public JSONObject getResultWithQueryId( + String queryId, + String resultIndex, + AsyncQueryRequestContext asyncQueryRequestContext) { return interaction.interact(new InteractionStep(emrClient, queryId, resultIndex)); } }); @@ -450,7 +460,8 @@ AssertionHelper withInteraction(Interaction interaction) { AssertionHelper assertQueryResults(String status, List data) { AsyncQueryExecutionResponse results = - queryService.getAsyncQueryResults(createQueryResponse.getQueryId()); + queryService.getAsyncQueryResults( + createQueryResponse.getQueryId(), asyncQueryRequestContext); assertEquals(status, results.getStatus()); assertEquals(data, results.getResults()); return this; @@ -458,7 +469,8 @@ AssertionHelper assertQueryResults(String status, List data) { AssertionHelper assertFormattedQueryResults(String expected) { AsyncQueryExecutionResponse results = - queryService.getAsyncQueryResults(createQueryResponse.getQueryId()); + queryService.getAsyncQueryResults( + createQueryResponse.getQueryId(), asyncQueryRequestContext); ResponseFormatter formatter = new AsyncQueryResultResponseFormatter(JsonResponseFormatter.Style.COMPACT); @@ -499,7 +511,7 @@ private InteractionStep(LocalEMRSClient emrClient, String queryId, String result /** Simulate PPL plugin search query_execution_result */ JSONObject pluginSearchQueryResult() { return new OpenSearchJobExecutionResponseReader(client) - .getResultWithQueryId(queryId, resultIndex); + .getResultWithQueryId(queryId, resultIndex, null); } /** Simulate EMR-S bulk writes query_execution_result with refresh = wait_for */ @@ -515,8 +527,11 @@ void emrJobWriteResultDoc(Map resultDoc) { /** Simulate EMR-S updates query_execution_request with state */ void emrJobUpdateStatementState(StatementState newState) { - StatementModel stmt = statementStorageService.getStatement(queryId, MYS3_DATASOURCE).get(); - statementStorageService.updateStatementState(stmt, newState); + StatementModel stmt = + statementStorageService + .getStatement(queryId, MYS3_DATASOURCE, asyncQueryRequestContext) + .get(); + statementStorageService.updateStatementState(stmt, newState, asyncQueryRequestContext); } void emrJobUpdateJobState(JobRunState jobState) { diff --git a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecAlterTest.java b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecAlterTest.java similarity index 95% rename from spark/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecAlterTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecAlterTest.java index 801a24922f..d69c7d4864 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecAlterTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecAlterTest.java @@ -1,3 +1,8 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + package org.opensearch.sql.spark.asyncquery; import com.amazonaws.services.emrserverless.model.CancelJobRunResult; @@ -58,7 +63,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(jobRun); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index @@ -81,7 +86,8 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { // 2. fetch result AsyncQueryExecutionResponse asyncQueryExecutionResponse = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("SUCCESS", asyncQueryExecutionResponse.getStatus()); emrsClient.startJobRunCalled(0); emrsClient.cancelJobRunCalled(1); @@ -126,7 +132,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(jobRun); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index @@ -150,7 +156,8 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { // 2. fetch result AsyncQueryExecutionResponse asyncQueryExecutionResponse = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("SUCCESS", asyncQueryExecutionResponse.getStatus()); emrsClient.startJobRunCalled(0); emrsClient.cancelJobRunCalled(1); @@ -209,7 +216,7 @@ public CancelJobRunResult cancelJobRun( throw new ValidationException("Job run is not in a cancellable state"); } }; - EMRServerlessClientFactory emrServerlessCientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessCientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessCientFactory); // Mock flint index @@ -232,7 +239,8 @@ public CancelJobRunResult cancelJobRun( // 2. fetch result AsyncQueryExecutionResponse asyncQueryExecutionResponse = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("SUCCESS", asyncQueryExecutionResponse.getStatus()); emrsClient.startJobRunCalled(0); emrsClient.cancelJobRunCalled(1); @@ -271,8 +279,8 @@ public void testAlterIndexQueryConvertingToAutoRefresh() { ImmutableList.of(ALTER_SKIPPING, ALTER_COVERING, ALTER_MV) .forEach( mockDS -> { - LocalEMRSClient localEMRSClient = new LocalEMRSClient(); - EMRServerlessClientFactory clientFactory = () -> localEMRSClient; + LocalEMRSClient emrsClient = new LocalEMRSClient(); + EMRServerlessClientFactory clientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(clientFactory); @@ -298,13 +306,13 @@ public void testAlterIndexQueryConvertingToAutoRefresh() { assertEquals( "RUNNING", asyncQueryExecutorService - .getAsyncQueryResults(response.getQueryId()) + .getAsyncQueryResults(response.getQueryId(), asyncQueryRequestContext) .getStatus()); flintIndexJob.assertState(FlintIndexState.ACTIVE); - localEMRSClient.startJobRunCalled(1); - localEMRSClient.getJobRunResultCalled(1); - localEMRSClient.cancelJobRunCalled(0); + emrsClient.startJobRunCalled(1); + emrsClient.getJobRunResultCalled(1); + emrsClient.cancelJobRunCalled(0); Map mappings = mockDS.getIndexMappings(); Map meta = (HashMap) mappings.get("_meta"); Map options = (Map) meta.get("options"); @@ -337,8 +345,8 @@ public void testAlterIndexQueryWithOutAnyAutoRefresh() { ImmutableList.of(ALTER_SKIPPING, ALTER_COVERING, ALTER_MV) .forEach( mockDS -> { - LocalEMRSClient localEMRSClient = new LocalEMRSClient(); - EMRServerlessClientFactory clientFactory = () -> localEMRSClient; + LocalEMRSClient emrsClient = new LocalEMRSClient(); + EMRServerlessClientFactory clientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(clientFactory); @@ -364,13 +372,13 @@ public void testAlterIndexQueryWithOutAnyAutoRefresh() { assertEquals( "RUNNING", asyncQueryExecutorService - .getAsyncQueryResults(response.getQueryId()) + .getAsyncQueryResults(response.getQueryId(), asyncQueryRequestContext) .getStatus()); flintIndexJob.assertState(FlintIndexState.ACTIVE); - localEMRSClient.startJobRunCalled(1); - localEMRSClient.getJobRunResultCalled(1); - localEMRSClient.cancelJobRunCalled(0); + emrsClient.startJobRunCalled(1); + emrsClient.getJobRunResultCalled(1); + emrsClient.cancelJobRunCalled(0); Map mappings = mockDS.getIndexMappings(); Map meta = (HashMap) mappings.get("_meta"); Map options = (Map) meta.get("options"); @@ -414,7 +422,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(jobRun); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index @@ -437,7 +445,8 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { // 2. fetch result AsyncQueryExecutionResponse asyncQueryExecutionResponse = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("FAILED", asyncQueryExecutionResponse.getStatus()); assertEquals( "Altering to full refresh only allows: [auto_refresh, incremental_refresh]" @@ -489,7 +498,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(jobRun); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index @@ -512,7 +521,8 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { // 2. fetch result AsyncQueryExecutionResponse asyncQueryExecutionResponse = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("FAILED", asyncQueryExecutionResponse.getStatus()); assertEquals( "Altering to incremental refresh only allows: [auto_refresh, incremental_refresh," @@ -557,7 +567,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(jobRun); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index @@ -581,7 +591,8 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { // 2. fetch result AsyncQueryExecutionResponse asyncQueryExecutionResponse = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("FAILED", asyncQueryExecutionResponse.getStatus()); assertEquals( "Conversion to incremental refresh index cannot proceed due to missing" @@ -619,7 +630,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(jobRun); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index @@ -643,7 +654,8 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { // 2. fetch result AsyncQueryExecutionResponse asyncQueryExecutionResponse = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("FAILED", asyncQueryExecutionResponse.getStatus()); assertEquals( "Conversion to incremental refresh index cannot proceed due to missing" @@ -681,7 +693,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(jobRun); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index @@ -707,7 +719,8 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { // 2. fetch result AsyncQueryExecutionResponse asyncQueryExecutionResponse = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("FAILED", asyncQueryExecutionResponse.getStatus()); assertEquals( "Conversion to incremental refresh index cannot proceed due to missing" @@ -745,7 +758,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(jobRun); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index @@ -771,7 +784,8 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { // 2. fetch result AsyncQueryExecutionResponse asyncQueryExecutionResponse = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("SUCCESS", asyncQueryExecutionResponse.getStatus()); emrsClient.startJobRunCalled(0); emrsClient.getJobRunResultCalled(1); @@ -806,7 +820,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(jobRun); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index @@ -832,7 +846,8 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { // 2. fetch result AsyncQueryExecutionResponse asyncQueryExecutionResponse = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("SUCCESS", asyncQueryExecutionResponse.getStatus()); emrsClient.startJobRunCalled(0); emrsClient.getJobRunResultCalled(1); @@ -868,7 +883,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(jobRun); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index @@ -891,7 +906,8 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { // 2. fetch result AsyncQueryExecutionResponse asyncQueryExecutionResponse = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("FAILED", asyncQueryExecutionResponse.getStatus()); assertEquals( "Transaction failed as flint index is not in a valid state.", @@ -935,7 +951,7 @@ public CancelJobRunResult cancelJobRun( throw new ValidationException("Job run is not in a cancellable state"); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index @@ -958,7 +974,8 @@ public CancelJobRunResult cancelJobRun( // 2. fetch result AsyncQueryExecutionResponse asyncQueryExecutionResponse = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("SUCCESS", asyncQueryExecutionResponse.getStatus()); emrsClient.startJobRunCalled(0); emrsClient.cancelJobRunCalled(1); @@ -1000,7 +1017,7 @@ public CancelJobRunResult cancelJobRun( throw new ValidationException("Random validation exception"); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index @@ -1023,7 +1040,8 @@ public CancelJobRunResult cancelJobRun( // 2. fetch result AsyncQueryExecutionResponse asyncQueryExecutionResponse = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("FAILED", asyncQueryExecutionResponse.getStatus()); assertEquals("Internal Server Error.", asyncQueryExecutionResponse.getError()); emrsClient.startJobRunCalled(0); @@ -1066,7 +1084,7 @@ public CancelJobRunResult cancelJobRun( throw new IllegalArgumentException("Unknown Error"); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); // Mock flint index @@ -1089,7 +1107,8 @@ public CancelJobRunResult cancelJobRun( // 2. fetch result AsyncQueryExecutionResponse asyncQueryExecutionResponse = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("FAILED", asyncQueryExecutionResponse.getStatus()); assertEquals("Internal Server Error.", asyncQueryExecutionResponse.getError()); emrsClient.startJobRunCalled(0); diff --git a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecTest.java b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecTest.java similarity index 94% rename from spark/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecTest.java index 2b6b1d2ba0..920981abf1 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecTest.java @@ -124,7 +124,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(new JobRun().withState("Cancelled")); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -143,7 +143,8 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { // 2.fetch result AsyncQueryExecutionResponse asyncQueryResults = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("SUCCESS", asyncQueryResults.getStatus()); assertNull(asyncQueryResults.getError()); emrsClient.cancelJobRunCalled(1); @@ -152,7 +153,9 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { IllegalArgumentException exception = assertThrows( IllegalArgumentException.class, - () -> asyncQueryExecutorService.cancelQuery(response.getQueryId())); + () -> + asyncQueryExecutorService.cancelQuery( + response.getQueryId(), asyncQueryRequestContext)); assertEquals("can't cancel index DML query", exception.getMessage()); }); } @@ -175,7 +178,7 @@ public CancelJobRunResult cancelJobRun( throw new ValidationException("Job run is not in a cancellable state"); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -191,7 +194,8 @@ public CancelJobRunResult cancelJobRun( // 2.fetch result. AsyncQueryExecutionResponse asyncQueryResults = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("SUCCESS", asyncQueryResults.getStatus()); assertNull(asyncQueryResults.getError()); }); @@ -215,7 +219,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(new JobRun().withState("Running")); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -231,7 +235,8 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { // 2. fetch result AsyncQueryExecutionResponse asyncQueryResults = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("FAILED", asyncQueryResults.getStatus()); assertEquals("Cancel job operation timed out.", asyncQueryResults.getError()); }); @@ -253,7 +258,7 @@ public CancelJobRunResult cancelJobRun( throw new ValidationException("Job run is not in a cancellable state"); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -268,7 +273,8 @@ public CancelJobRunResult cancelJobRun( // 2.fetch result. AsyncQueryExecutionResponse asyncQueryResults = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("SUCCESS", asyncQueryResults.getStatus()); assertNull(asyncQueryResults.getError()); } @@ -290,7 +296,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(new JobRun().withState("Cancelled")); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -317,7 +323,8 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { // 2.fetch result AsyncQueryExecutionResponse asyncQueryResults = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("SUCCESS", asyncQueryResults.getStatus()); assertNull(asyncQueryResults.getError()); emrsClient.cancelJobRunCalled(1); @@ -326,7 +333,9 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { IllegalArgumentException exception = assertThrows( IllegalArgumentException.class, - () -> asyncQueryExecutorService.cancelQuery(response.getQueryId())); + () -> + asyncQueryExecutorService.cancelQuery( + response.getQueryId(), asyncQueryRequestContext)); assertEquals("can't cancel index DML query", exception.getMessage()); }); } @@ -350,7 +359,7 @@ public CancelJobRunResult cancelJobRun( throw new ValidationException("Job run is not in a cancellable state"); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -371,7 +380,8 @@ public CancelJobRunResult cancelJobRun( // 2.fetch result. AsyncQueryExecutionResponse asyncQueryResults = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("SUCCESS", asyncQueryResults.getStatus()); assertNull(asyncQueryResults.getError()); @@ -397,7 +407,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(new JobRun().withState("Running")); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -418,7 +428,8 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { // 2. fetch result AsyncQueryExecutionResponse asyncQueryResults = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("FAILED", asyncQueryResults.getStatus()); assertEquals("Cancel job operation timed out.", asyncQueryResults.getError()); flintIndexJob.assertState(FlintIndexState.REFRESHING); @@ -443,7 +454,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(new JobRun().withState("Cancelled")); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -466,7 +477,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { assertEquals( "SUCCESS", asyncQueryExecutorService - .getAsyncQueryResults(response.getQueryId()) + .getAsyncQueryResults(response.getQueryId(), asyncQueryRequestContext) .getStatus()); flintIndexJob.assertState(FlintIndexState.DELETED); @@ -494,7 +505,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(new JobRun().withState("Cancelled")); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -515,7 +526,8 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { // 2. fetch result AsyncQueryExecutionResponse asyncQueryExecutionResponse = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("SUCCESS", asyncQueryExecutionResponse.getStatus()); flintIndexJob.assertState(FlintIndexState.DELETED); emrsClient.startJobRunCalled(0); @@ -542,7 +554,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(new JobRun().withState("Cancelled")); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -565,7 +577,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { assertEquals( "SUCCESS", asyncQueryExecutorService - .getAsyncQueryResults(response.getQueryId()) + .getAsyncQueryResults(response.getQueryId(), asyncQueryRequestContext) .getStatus()); flintIndexJob.assertState(FlintIndexState.DELETED); @@ -590,7 +602,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return new GetJobRunResult().withJobRun(new JobRun().withState("Cancelled")); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -612,7 +624,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { assertEquals( "SUCCESS", asyncQueryExecutorService - .getAsyncQueryResults(response.getQueryId()) + .getAsyncQueryResults(response.getQueryId(), asyncQueryRequestContext) .getStatus()); flintIndexJob.assertState(FlintIndexState.DELETED); @@ -644,7 +656,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return null; } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -664,7 +676,8 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { asyncQueryRequestContext); AsyncQueryExecutionResponse asyncQueryExecutionResponse = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); // 2. fetch result assertEquals("FAILED", asyncQueryExecutionResponse.getStatus()); assertEquals( @@ -691,7 +704,7 @@ public CancelJobRunResult cancelJobRun( throw new IllegalArgumentException("Job run is not in a cancellable state"); } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -710,7 +723,8 @@ public CancelJobRunResult cancelJobRun( // 2.fetch result. AsyncQueryExecutionResponse asyncQueryResults = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("FAILED", asyncQueryResults.getStatus()); assertEquals("Internal Server Error.", asyncQueryResults.getError()); @@ -742,7 +756,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return null; } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -758,7 +772,8 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { // 2. fetch result AsyncQueryExecutionResponse asyncQueryResults = - asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); + asyncQueryExecutorService.getAsyncQueryResults( + response.getQueryId(), asyncQueryRequestContext); assertEquals("FAILED", asyncQueryResults.getStatus()); assertTrue(asyncQueryResults.getError().contains("no state found")); }); @@ -887,7 +902,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { return null; } }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService(emrServerlessClientFactory); @@ -901,7 +916,9 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { IllegalArgumentException exception = assertThrows( IllegalArgumentException.class, - () -> asyncQueryExecutorService.cancelQuery(response.getQueryId())); + () -> + asyncQueryExecutorService.cancelQuery( + response.getQueryId(), asyncQueryRequestContext)); assertEquals( "can't cancel index DML query, using ALTER auto_refresh=off statement to stop" + " job, using VACUUM statement to stop job and delete data", @@ -917,7 +934,7 @@ public void cancelRefreshStatement() { mockDS -> { AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService( - () -> + (accountId) -> new LocalEMRSClient() { @Override public GetJobRunResult getJobRunResult( @@ -944,7 +961,9 @@ public GetJobRunResult getJobRunResult( flintIndexJob.refreshing(); // 2. Cancel query - String cancelResponse = asyncQueryExecutorService.cancelQuery(response.getQueryId()); + String cancelResponse = + asyncQueryExecutorService.cancelQuery( + response.getQueryId(), asyncQueryRequestContext); assertNotNull(cancelResponse); assertTrue(clusterService.state().routingTable().hasIndex(mockDS.indexName)); @@ -962,7 +981,7 @@ public void cancelRefreshStatementWithActiveState() { mockDS -> { AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService( - () -> + (accountId) -> new LocalEMRSClient() { @Override public GetJobRunResult getJobRunResult( @@ -992,7 +1011,9 @@ public GetJobRunResult getJobRunResult( IllegalStateException illegalStateException = Assertions.assertThrows( IllegalStateException.class, - () -> asyncQueryExecutorService.cancelQuery(response.getQueryId())); + () -> + asyncQueryExecutorService.cancelQuery( + response.getQueryId(), asyncQueryRequestContext)); Assertions.assertEquals( "Transaction failed as flint index is not in a valid state.", illegalStateException.getMessage()); @@ -1009,7 +1030,7 @@ public void cancelRefreshStatementWithFailureInFetchingIndexMetadata() { new MockFlintIndex(client(), indexName, FlintIndexType.COVERING, null); AsyncQueryExecutorService asyncQueryExecutorService = createAsyncQueryExecutorService( - () -> + (accountId) -> new LocalEMRSClient() { @Override public GetJobRunResult getJobRunResult(String applicationId, String jobId) { @@ -1038,6 +1059,7 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { // 2. Cancel query Assertions.assertThrows( IllegalStateException.class, - () -> asyncQueryExecutorService.cancelQuery(response.getQueryId())); + () -> + asyncQueryExecutorService.cancelQuery(response.getQueryId(), asyncQueryRequestContext)); } } diff --git a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/OpenSearchAsyncQueryJobMetadataStorageServiceTest.java b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/OpenSearchAsyncQueryJobMetadataStorageServiceTest.java similarity index 97% rename from spark/src/test/java/org/opensearch/sql/spark/asyncquery/OpenSearchAsyncQueryJobMetadataStorageServiceTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/asyncquery/OpenSearchAsyncQueryJobMetadataStorageServiceTest.java index c84d68421d..d373f451a6 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/OpenSearchAsyncQueryJobMetadataStorageServiceTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/OpenSearchAsyncQueryJobMetadataStorageServiceTest.java @@ -28,7 +28,8 @@ public class OpenSearchAsyncQueryJobMetadataStorageServiceTest extends OpenSearc private static final String MOCK_RESULT_INDEX = "resultIndex"; private static final String MOCK_QUERY_ID = "00fdo6u94n7abo0q"; private OpenSearchAsyncQueryJobMetadataStorageService openSearchJobMetadataStorageService; - private AsyncQueryRequestContext asyncQueryRequestContext = new NullAsyncQueryRequestContext(); + private final AsyncQueryRequestContext asyncQueryRequestContext = + new NullAsyncQueryRequestContext(); @Before public void setup() { diff --git a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/model/MockFlintIndex.java b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/model/MockFlintIndex.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/asyncquery/model/MockFlintIndex.java rename to async-query/src/test/java/org/opensearch/sql/spark/asyncquery/model/MockFlintIndex.java diff --git a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/model/MockFlintSparkJob.java b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/model/MockFlintSparkJob.java similarity index 76% rename from spark/src/test/java/org/opensearch/sql/spark/asyncquery/model/MockFlintSparkJob.java rename to async-query/src/test/java/org/opensearch/sql/spark/asyncquery/model/MockFlintSparkJob.java index 6c82188ee6..e61d550d68 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/model/MockFlintSparkJob.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/model/MockFlintSparkJob.java @@ -16,8 +16,10 @@ public class MockFlintSparkJob { private FlintIndexStateModel stateModel; - private FlintIndexStateModelService flintIndexStateModelService; - private String datasource; + private final FlintIndexStateModelService flintIndexStateModelService; + private final String datasource; + private final AsyncQueryRequestContext asyncQueryRequestContext = + new NullAsyncQueryRequestContext(); public MockFlintSparkJob( FlintIndexStateModelService flintIndexStateModelService, String latestId, String datasource) { @@ -34,12 +36,15 @@ public MockFlintSparkJob( .lastUpdateTime(System.currentTimeMillis()) .error("") .build(); - stateModel = flintIndexStateModelService.createFlintIndexStateModel(stateModel); + stateModel = + flintIndexStateModelService.createFlintIndexStateModel( + stateModel, asyncQueryRequestContext); } public void transition(FlintIndexState newState) { stateModel = - flintIndexStateModelService.updateFlintIndexState(stateModel, newState, datasource); + flintIndexStateModelService.updateFlintIndexState( + stateModel, newState, datasource, asyncQueryRequestContext); } public void refreshing() { @@ -68,7 +73,8 @@ public void deleted() { public void assertState(FlintIndexState expected) { Optional stateModelOpt = - flintIndexStateModelService.getFlintIndexStateModel(stateModel.getId(), datasource); + flintIndexStateModelService.getFlintIndexStateModel( + stateModel.getId(), datasource, asyncQueryRequestContext); assertTrue(stateModelOpt.isPresent()); assertEquals(expected, stateModelOpt.get().getIndexState()); } diff --git a/spark/src/test/java/org/opensearch/sql/spark/cluster/FlintStreamingJobHouseKeeperTaskTest.java b/async-query/src/test/java/org/opensearch/sql/spark/cluster/FlintStreamingJobHouseKeeperTaskTest.java similarity index 92% rename from spark/src/test/java/org/opensearch/sql/spark/cluster/FlintStreamingJobHouseKeeperTaskTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/cluster/FlintStreamingJobHouseKeeperTaskTest.java index aa4684811f..0a3a180932 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/cluster/FlintStreamingJobHouseKeeperTaskTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/cluster/FlintStreamingJobHouseKeeperTaskTest.java @@ -6,6 +6,7 @@ package org.opensearch.sql.spark.cluster; import static org.opensearch.sql.datasource.model.DataSourceStatus.DISABLED; +import static org.opensearch.sql.spark.asyncquery.AsyncQueryExecutorServiceSpec.MYGLUE_DATASOURCE; import com.amazonaws.services.emrserverless.model.GetJobRunResult; import com.amazonaws.services.emrserverless.model.JobRun; @@ -19,6 +20,7 @@ import org.opensearch.sql.legacy.metrics.MetricName; import org.opensearch.sql.legacy.metrics.Metrics; import org.opensearch.sql.spark.asyncquery.AsyncQueryExecutorServiceSpec; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.asyncquery.model.MockFlintIndex; import org.opensearch.sql.spark.asyncquery.model.MockFlintSparkJob; import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions; @@ -53,7 +55,9 @@ public void testStreamingJobHouseKeeperWhenDataSourceDisabled() { FlintIndexMetadataService flintIndexMetadataService = new FlintIndexMetadataServiceImpl(client); FlintStreamingJobHouseKeeperTask flintStreamingJobHouseKeeperTask = new FlintStreamingJobHouseKeeperTask( - dataSourceService, flintIndexMetadataService, getFlintIndexOpFactory(() -> emrsClient)); + dataSourceService, + flintIndexMetadataService, + getFlintIndexOpFactory((accountId) -> emrsClient)); Thread thread = new Thread(flintStreamingJobHouseKeeperTask); thread.start(); @@ -132,7 +136,9 @@ public void testStreamingJobHouseKeeperWhenCancelJobGivesTimeout() { FlintIndexMetadataService flintIndexMetadataService = new FlintIndexMetadataServiceImpl(client); FlintStreamingJobHouseKeeperTask flintStreamingJobHouseKeeperTask = new FlintStreamingJobHouseKeeperTask( - dataSourceService, flintIndexMetadataService, getFlintIndexOpFactory(() -> emrsClient)); + dataSourceService, + flintIndexMetadataService, + getFlintIndexOpFactory((accountId) -> emrsClient)); Thread thread = new Thread(flintStreamingJobHouseKeeperTask); thread.start(); @@ -180,7 +186,9 @@ public void testSimulateConcurrentJobHouseKeeperExecution() { FlintIndexMetadataService flintIndexMetadataService = new FlintIndexMetadataServiceImpl(client); FlintStreamingJobHouseKeeperTask flintStreamingJobHouseKeeperTask = new FlintStreamingJobHouseKeeperTask( - dataSourceService, flintIndexMetadataService, getFlintIndexOpFactory(() -> emrsClient)); + dataSourceService, + flintIndexMetadataService, + getFlintIndexOpFactory((accountId) -> emrsClient)); FlintStreamingJobHouseKeeperTask.isRunning.compareAndSet(false, true); Thread thread = new Thread(flintStreamingJobHouseKeeperTask); @@ -230,7 +238,9 @@ public void testStreamingJobClearnerWhenDataSourceIsDeleted() { FlintIndexMetadataService flintIndexMetadataService = new FlintIndexMetadataServiceImpl(client); FlintStreamingJobHouseKeeperTask flintStreamingJobHouseKeeperTask = new FlintStreamingJobHouseKeeperTask( - dataSourceService, flintIndexMetadataService, getFlintIndexOpFactory(() -> emrsClient)); + dataSourceService, + flintIndexMetadataService, + getFlintIndexOpFactory((accountId) -> emrsClient)); Thread thread = new Thread(flintStreamingJobHouseKeeperTask); thread.start(); @@ -277,7 +287,9 @@ public void testStreamingJobHouseKeeperWhenDataSourceIsNeitherDisabledNorDeleted FlintIndexMetadataService flintIndexMetadataService = new FlintIndexMetadataServiceImpl(client); FlintStreamingJobHouseKeeperTask flintStreamingJobHouseKeeperTask = new FlintStreamingJobHouseKeeperTask( - dataSourceService, flintIndexMetadataService, getFlintIndexOpFactory(() -> emrsClient)); + dataSourceService, + flintIndexMetadataService, + getFlintIndexOpFactory((accountId) -> emrsClient)); Thread thread = new Thread(flintStreamingJobHouseKeeperTask); thread.start(); @@ -319,7 +331,9 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { FlintIndexMetadataService flintIndexMetadataService = new FlintIndexMetadataServiceImpl(client); FlintStreamingJobHouseKeeperTask flintStreamingJobHouseKeeperTask = new FlintStreamingJobHouseKeeperTask( - dataSourceService, flintIndexMetadataService, getFlintIndexOpFactory(() -> emrsClient)); + dataSourceService, + flintIndexMetadataService, + getFlintIndexOpFactory((accountId) -> emrsClient)); Thread thread = new Thread(flintStreamingJobHouseKeeperTask); thread.start(); @@ -346,7 +360,9 @@ public void testStreamingJobHouseKeeperWhenFlintIndexIsCorrupted() throws Interr FlintIndexMetadataService flintIndexMetadataService = new FlintIndexMetadataServiceImpl(client); FlintStreamingJobHouseKeeperTask flintStreamingJobHouseKeeperTask = new FlintStreamingJobHouseKeeperTask( - dataSourceService, flintIndexMetadataService, getFlintIndexOpFactory(() -> emrsClient)); + dataSourceService, + flintIndexMetadataService, + getFlintIndexOpFactory((accountId) -> emrsClient)); Thread thread = new Thread(flintStreamingJobHouseKeeperTask); thread.start(); @@ -378,17 +394,22 @@ public GetJobRunResult getJobRunResult(String applicationId, String jobId) { FlintIndexMetadataService flintIndexMetadataService = new FlintIndexMetadataService() { @Override - public Map getFlintIndexMetadata(String indexPattern) { + public Map getFlintIndexMetadata( + String indexPattern, AsyncQueryRequestContext asyncQueryRequestContext) { throw new RuntimeException("Couldn't fetch details from ElasticSearch"); } @Override public void updateIndexToManualRefresh( - String indexName, FlintIndexOptions flintIndexOptions) {} + String indexName, + FlintIndexOptions flintIndexOptions, + AsyncQueryRequestContext asyncQueryRequestContext) {} }; FlintStreamingJobHouseKeeperTask flintStreamingJobHouseKeeperTask = new FlintStreamingJobHouseKeeperTask( - dataSourceService, flintIndexMetadataService, getFlintIndexOpFactory(() -> emrsClient)); + dataSourceService, + flintIndexMetadataService, + getFlintIndexOpFactory((accountId) -> emrsClient)); Thread thread = new Thread(flintStreamingJobHouseKeeperTask); thread.start(); @@ -428,7 +449,9 @@ public void testStreamingJobHouseKeeperMultipleTimesWhenDataSourceDisabled() { FlintIndexMetadataService flintIndexMetadataService = new FlintIndexMetadataServiceImpl(client); FlintStreamingJobHouseKeeperTask flintStreamingJobHouseKeeperTask = new FlintStreamingJobHouseKeeperTask( - dataSourceService, flintIndexMetadataService, getFlintIndexOpFactory(() -> emrsClient)); + dataSourceService, + flintIndexMetadataService, + getFlintIndexOpFactory((accountId) -> emrsClient)); Thread thread = new Thread(flintStreamingJobHouseKeeperTask); thread.start(); @@ -500,7 +523,9 @@ public void testRunStreamingJobHouseKeeperWhenDataSourceIsDeleted() { FlintIndexMetadataService flintIndexMetadataService = new FlintIndexMetadataServiceImpl(client); FlintStreamingJobHouseKeeperTask flintStreamingJobHouseKeeperTask = new FlintStreamingJobHouseKeeperTask( - dataSourceService, flintIndexMetadataService, getFlintIndexOpFactory(() -> emrsClient)); + dataSourceService, + flintIndexMetadataService, + getFlintIndexOpFactory((accountId) -> emrsClient)); Thread thread = new Thread(flintStreamingJobHouseKeeperTask); thread.start(); diff --git a/async-query/src/test/java/org/opensearch/sql/spark/config/OpenSearchAsyncQuerySchedulerConfigComposerTest.java b/async-query/src/test/java/org/opensearch/sql/spark/config/OpenSearchAsyncQuerySchedulerConfigComposerTest.java new file mode 100644 index 0000000000..19ab091e25 --- /dev/null +++ b/async-query/src/test/java/org/opensearch/sql/spark/config/OpenSearchAsyncQuerySchedulerConfigComposerTest.java @@ -0,0 +1,69 @@ +package org.opensearch.sql.spark.config; + +import static org.junit.Assert.assertNull; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; +import org.opensearch.sql.spark.parameter.SparkSubmitParameters; + +@ExtendWith(MockitoExtension.class) +public class OpenSearchAsyncQuerySchedulerConfigComposerTest { + + @Mock private Settings settings; + @Mock private SparkSubmitParameters sparkSubmitParameters; + @Mock private DispatchQueryRequest dispatchQueryRequest; + @Mock private AsyncQueryRequestContext context; + + private OpenSearchAsyncQuerySchedulerConfigComposer composer; + + @BeforeEach + public void setUp() { + composer = new OpenSearchAsyncQuerySchedulerConfigComposer(settings); + } + + @Test + public void testCompose() { + when(settings.getSettingValue(Settings.Key.ASYNC_QUERY_EXTERNAL_SCHEDULER_ENABLED)) + .thenReturn(true); + when(settings.getSettingValue(Settings.Key.ASYNC_QUERY_EXTERNAL_SCHEDULER_INTERVAL)) + .thenReturn("10 minutes"); + + composer.compose(sparkSubmitParameters, dispatchQueryRequest, context); + + verify(sparkSubmitParameters) + .setConfigItem("spark.flint.job.externalScheduler.enabled", "true"); + verify(sparkSubmitParameters) + .setConfigItem("spark.flint.job.externalScheduler.interval", "\"10 minutes\""); + } + + @Test + public void testComposeWithDisabledScheduler() { + when(settings.getSettingValue(Settings.Key.ASYNC_QUERY_EXTERNAL_SCHEDULER_ENABLED)) + .thenReturn(false); + + composer.compose(sparkSubmitParameters, dispatchQueryRequest, context); + + verify(sparkSubmitParameters) + .setConfigItem("spark.flint.job.externalScheduler.enabled", "false"); + } + + @Test + public void testComposeWithMissingInterval() { + when(settings.getSettingValue(Settings.Key.ASYNC_QUERY_EXTERNAL_SCHEDULER_ENABLED)) + .thenReturn(true); + when(settings.getSettingValue(Settings.Key.ASYNC_QUERY_EXTERNAL_SCHEDULER_INTERVAL)) + .thenReturn(""); + + composer.compose(sparkSubmitParameters, dispatchQueryRequest, context); + + assertNull(sparkSubmitParameters.getConfigItem("spark.flint.job.externalScheduler.interval")); + } +} diff --git a/async-query/src/test/java/org/opensearch/sql/spark/config/OpenSearchExtraParameterComposerTest.java b/async-query/src/test/java/org/opensearch/sql/spark/config/OpenSearchExtraParameterComposerTest.java new file mode 100644 index 0000000000..d3b0b2727a --- /dev/null +++ b/async-query/src/test/java/org/opensearch/sql/spark/config/OpenSearchExtraParameterComposerTest.java @@ -0,0 +1,52 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.config; + +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoInteractions; +import static org.mockito.Mockito.when; + +import java.util.Optional; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; +import org.opensearch.sql.spark.parameter.SparkSubmitParameters; + +@ExtendWith(MockitoExtension.class) +class OpenSearchExtraParameterComposerTest { + + public static final String PARAMS = "PARAMS"; + @Mock SparkExecutionEngineConfigClusterSettingLoader settingsLoader; + @Mock SparkSubmitParameters sparkSubmitParameters; + @Mock DispatchQueryRequest dispatchQueryRequest; + @Mock AsyncQueryRequestContext context; + + @InjectMocks OpenSearchExtraParameterComposer openSearchExtraParameterComposer; + + @Test + public void paramExists_compose() { + SparkExecutionEngineConfigClusterSetting setting = + SparkExecutionEngineConfigClusterSetting.builder().sparkSubmitParameters(PARAMS).build(); + when(settingsLoader.load()).thenReturn(Optional.of(setting)); + + openSearchExtraParameterComposer.compose(sparkSubmitParameters, dispatchQueryRequest, context); + + verify(sparkSubmitParameters).setExtraParameters(PARAMS); + } + + @Test + public void paramNotExist_compose() { + when(settingsLoader.load()).thenReturn(Optional.empty()); + + openSearchExtraParameterComposer.compose(sparkSubmitParameters, dispatchQueryRequest, context); + + verifyNoInteractions(sparkSubmitParameters); + } +} diff --git a/async-query/src/test/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigClusterSettingLoaderTest.java b/async-query/src/test/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigClusterSettingLoaderTest.java new file mode 100644 index 0000000000..f9ccd93b00 --- /dev/null +++ b/async-query/src/test/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigClusterSettingLoaderTest.java @@ -0,0 +1,67 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.config; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.when; +import static org.opensearch.sql.common.setting.Settings.Key.SPARK_EXECUTION_ENGINE_CONFIG; +import static org.opensearch.sql.spark.constants.TestConstants.ACCOUNT_ID; +import static org.opensearch.sql.spark.constants.TestConstants.EMRS_APPLICATION_ID; +import static org.opensearch.sql.spark.constants.TestConstants.EMRS_EXECUTION_ROLE; +import static org.opensearch.sql.spark.constants.TestConstants.SPARK_SUBMIT_PARAMETERS; +import static org.opensearch.sql.spark.constants.TestConstants.US_WEST_REGION; + +import java.util.Optional; +import org.json.JSONObject; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.common.setting.Settings; + +@ExtendWith(MockitoExtension.class) +class SparkExecutionEngineConfigClusterSettingLoaderTest { + @Mock Settings settings; + + @InjectMocks + SparkExecutionEngineConfigClusterSettingLoader sparkExecutionEngineConfigClusterSettingLoader; + + @Test + public void blankConfig() { + when(settings.getSettingValue(SPARK_EXECUTION_ENGINE_CONFIG)).thenReturn(""); + + Optional result = + sparkExecutionEngineConfigClusterSettingLoader.load(); + + assertTrue(result.isEmpty()); + } + + @Test + public void validConfig() { + when(settings.getSettingValue(SPARK_EXECUTION_ENGINE_CONFIG)).thenReturn(getConfigJson()); + + SparkExecutionEngineConfigClusterSetting result = + sparkExecutionEngineConfigClusterSettingLoader.load().get(); + + Assertions.assertEquals(ACCOUNT_ID, result.getAccountId()); + Assertions.assertEquals(EMRS_APPLICATION_ID, result.getApplicationId()); + Assertions.assertEquals(EMRS_EXECUTION_ROLE, result.getExecutionRoleARN()); + Assertions.assertEquals(US_WEST_REGION, result.getRegion()); + Assertions.assertEquals(SPARK_SUBMIT_PARAMETERS, result.getSparkSubmitParameters()); + } + + String getConfigJson() { + return new JSONObject() + .put("accountId", ACCOUNT_ID) + .put("applicationId", EMRS_APPLICATION_ID) + .put("executionRoleARN", EMRS_EXECUTION_ROLE) + .put("region", US_WEST_REGION) + .put("sparkSubmitParameters", SPARK_SUBMIT_PARAMETERS) + .toString(); + } +} diff --git a/spark/src/test/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigClusterSettingTest.java b/async-query/src/test/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigClusterSettingTest.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigClusterSettingTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigClusterSettingTest.java diff --git a/spark/src/test/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigSupplierImplTest.java b/async-query/src/test/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigSupplierImplTest.java similarity index 72% rename from spark/src/test/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigSupplierImplTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigSupplierImplTest.java index 2409d32726..124d8d0b6e 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigSupplierImplTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigSupplierImplTest.java @@ -1,3 +1,8 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + package org.opensearch.sql.spark.config; import static org.mockito.Mockito.when; @@ -7,7 +12,7 @@ import static org.opensearch.sql.spark.constants.TestConstants.TEST_CLUSTER_NAME; import static org.opensearch.sql.spark.constants.TestConstants.US_WEST_REGION; -import org.json.JSONObject; +import java.util.Optional; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -16,7 +21,6 @@ import org.opensearch.cluster.ClusterName; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; -import org.opensearch.sql.spark.asyncquery.model.SparkSubmitParameters; @ExtendWith(MockitoExtension.class) public class SparkExecutionEngineConfigSupplierImplTest { @@ -24,43 +28,46 @@ public class SparkExecutionEngineConfigSupplierImplTest { @Mock private Settings settings; @Mock private AsyncQueryRequestContext asyncQueryRequestContext; + @Mock + private SparkExecutionEngineConfigClusterSettingLoader + sparkExecutionEngineConfigClusterSettingLoader; + @Test void testGetSparkExecutionEngineConfig() { SparkExecutionEngineConfigSupplier sparkExecutionEngineConfigSupplier = - new SparkExecutionEngineConfigSupplierImpl(settings); - when(settings.getSettingValue(Settings.Key.SPARK_EXECUTION_ENGINE_CONFIG)) - .thenReturn(getConfigJson()); + new SparkExecutionEngineConfigSupplierImpl( + settings, sparkExecutionEngineConfigClusterSettingLoader); when(settings.getSettingValue(Settings.Key.CLUSTER_NAME)) .thenReturn(new ClusterName(TEST_CLUSTER_NAME)); + when(sparkExecutionEngineConfigClusterSettingLoader.load()) + .thenReturn(Optional.of(getClusterSetting())); SparkExecutionEngineConfig sparkExecutionEngineConfig = sparkExecutionEngineConfigSupplier.getSparkExecutionEngineConfig(asyncQueryRequestContext); - SparkSubmitParameters parameters = SparkSubmitParameters.builder().build(); - sparkExecutionEngineConfig.getSparkSubmitParameterModifier().modifyParameters(parameters); Assertions.assertEquals(EMRS_APPLICATION_ID, sparkExecutionEngineConfig.getApplicationId()); Assertions.assertEquals(EMRS_EXECUTION_ROLE, sparkExecutionEngineConfig.getExecutionRoleARN()); Assertions.assertEquals(US_WEST_REGION, sparkExecutionEngineConfig.getRegion()); Assertions.assertEquals(TEST_CLUSTER_NAME, sparkExecutionEngineConfig.getClusterName()); - Assertions.assertTrue(parameters.toString().contains(SPARK_SUBMIT_PARAMETERS)); } - String getConfigJson() { - return new JSONObject() - .put("applicationId", EMRS_APPLICATION_ID) - .put("executionRoleARN", EMRS_EXECUTION_ROLE) - .put("region", US_WEST_REGION) - .put("sparkSubmitParameters", SPARK_SUBMIT_PARAMETERS) - .toString(); + SparkExecutionEngineConfigClusterSetting getClusterSetting() { + return SparkExecutionEngineConfigClusterSetting.builder() + .applicationId(EMRS_APPLICATION_ID) + .executionRoleARN(EMRS_EXECUTION_ROLE) + .region(US_WEST_REGION) + .sparkSubmitParameters(SPARK_SUBMIT_PARAMETERS) + .build(); } @Test void testGetSparkExecutionEngineConfigWithNullSetting() { SparkExecutionEngineConfigSupplier sparkExecutionEngineConfigSupplier = - new SparkExecutionEngineConfigSupplierImpl(settings); - when(settings.getSettingValue(Settings.Key.SPARK_EXECUTION_ENGINE_CONFIG)).thenReturn(null); + new SparkExecutionEngineConfigSupplierImpl( + settings, sparkExecutionEngineConfigClusterSettingLoader); when(settings.getSettingValue(Settings.Key.CLUSTER_NAME)) .thenReturn(new ClusterName(TEST_CLUSTER_NAME)); + when(sparkExecutionEngineConfigClusterSettingLoader.load()).thenReturn(Optional.empty()); SparkExecutionEngineConfig sparkExecutionEngineConfig = sparkExecutionEngineConfigSupplier.getSparkExecutionEngineConfig(asyncQueryRequestContext); diff --git a/async-query/src/test/java/org/opensearch/sql/spark/constants/TestConstants.java b/async-query/src/test/java/org/opensearch/sql/spark/constants/TestConstants.java new file mode 100644 index 0000000000..15871bf6b2 --- /dev/null +++ b/async-query/src/test/java/org/opensearch/sql/spark/constants/TestConstants.java @@ -0,0 +1,18 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.constants; + +public class TestConstants { + public static final String TEST_DATASOURCE_NAME = "test_datasource_name"; + public static final String EMR_JOB_ID = "job-123xxx"; + public static final String ACCOUNT_ID = "TEST_ACCOUNT_ID"; + public static final String EMRS_APPLICATION_ID = "app-xxxxx"; + public static final String EMRS_EXECUTION_ROLE = "execution_role"; + public static final String SPARK_SUBMIT_PARAMETERS = "--conf org.flint.sql.SQLJob"; + public static final String TEST_CLUSTER_NAME = "TEST_CLUSTER"; + public static final String MOCK_SESSION_ID = "s-0123456"; + public static final String US_WEST_REGION = "us-west-1"; +} diff --git a/spark/src/test/java/org/opensearch/sql/spark/execution/session/InteractiveSessionTest.java b/async-query/src/test/java/org/opensearch/sql/spark/execution/session/InteractiveSessionTest.java similarity index 95% rename from spark/src/test/java/org/opensearch/sql/spark/execution/session/InteractiveSessionTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/execution/session/InteractiveSessionTest.java index e8aeb17505..30a71607b0 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/execution/session/InteractiveSessionTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/execution/session/InteractiveSessionTest.java @@ -44,10 +44,11 @@ public class InteractiveSessionTest extends OpenSearchIntegTestCase { private StartJobRequest startJobRequest; private SessionStorageService sessionStorageService; private StatementStorageService statementStorageService; - private SessionConfigSupplier sessionConfigSupplier = () -> 600000L; + private final SessionConfigSupplier sessionConfigSupplier = () -> 600000L; private SessionManager sessionManager; - private AsyncQueryRequestContext asyncQueryRequestContext = new NullAsyncQueryRequestContext(); - private SessionIdProvider sessionIdProvider = new DatasourceEmbeddedSessionIdProvider(); + private final AsyncQueryRequestContext asyncQueryRequestContext = + new NullAsyncQueryRequestContext(); + private final SessionIdProvider sessionIdProvider = new DatasourceEmbeddedSessionIdProvider(); @Before public void setup() { @@ -58,7 +59,7 @@ public void setup() { new OpenSearchSessionStorageService(stateStore, new SessionModelXContentSerializer()); statementStorageService = new OpenSearchStatementStorageService(stateStore, new StatementModelXContentSerializer()); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; sessionManager = new SessionManager( diff --git a/spark/src/test/java/org/opensearch/sql/spark/execution/session/SessionTestUtil.java b/async-query/src/test/java/org/opensearch/sql/spark/execution/session/SessionTestUtil.java similarity index 72% rename from spark/src/test/java/org/opensearch/sql/spark/execution/session/SessionTestUtil.java rename to async-query/src/test/java/org/opensearch/sql/spark/execution/session/SessionTestUtil.java index 06689a15d0..e5ca93e96e 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/execution/session/SessionTestUtil.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/execution/session/SessionTestUtil.java @@ -9,7 +9,8 @@ import static org.opensearch.sql.spark.constants.TestConstants.TEST_DATASOURCE_NAME; import java.util.HashMap; -import org.opensearch.sql.spark.asyncquery.model.SparkSubmitParameters; +import org.opensearch.sql.spark.parameter.SparkParameterComposerCollection; +import org.opensearch.sql.spark.parameter.SparkSubmitParametersBuilder; public class SessionTestUtil { @@ -19,7 +20,7 @@ public static CreateSessionRequest createSessionRequest() { null, "appId", "arn", - SparkSubmitParameters.builder().build(), + new SparkSubmitParametersBuilder(new SparkParameterComposerCollection()), new HashMap<>(), "resultIndex", TEST_DATASOURCE_NAME); diff --git a/spark/src/test/java/org/opensearch/sql/spark/execution/session/TestEMRServerlessClient.java b/async-query/src/test/java/org/opensearch/sql/spark/execution/session/TestEMRServerlessClient.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/execution/session/TestEMRServerlessClient.java rename to async-query/src/test/java/org/opensearch/sql/spark/execution/session/TestEMRServerlessClient.java diff --git a/spark/src/test/java/org/opensearch/sql/spark/execution/statement/StatementTest.java b/async-query/src/test/java/org/opensearch/sql/spark/execution/statement/StatementTest.java similarity index 89% rename from spark/src/test/java/org/opensearch/sql/spark/execution/statement/StatementTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/execution/statement/StatementTest.java index 3c6517fdb2..49a4d69222 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/execution/statement/StatementTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/execution/statement/StatementTest.java @@ -19,7 +19,6 @@ import org.junit.Test; import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; import org.opensearch.action.delete.DeleteRequest; -import org.opensearch.sql.spark.asyncquery.model.AsyncQueryId; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.asyncquery.model.NullAsyncQueryRequestContext; import org.opensearch.sql.spark.client.EMRServerlessClientFactory; @@ -39,6 +38,7 @@ import org.opensearch.sql.spark.execution.xcontent.SessionModelXContentSerializer; import org.opensearch.sql.spark.execution.xcontent.StatementModelXContentSerializer; import org.opensearch.sql.spark.rest.model.LangType; +import org.opensearch.sql.spark.utils.IDUtils; import org.opensearch.test.OpenSearchIntegTestCase; public class StatementTest extends OpenSearchIntegTestCase { @@ -47,12 +47,13 @@ public class StatementTest extends OpenSearchIntegTestCase { private StatementStorageService statementStorageService; private SessionStorageService sessionStorageService; - private TestEMRServerlessClient emrsClient = new TestEMRServerlessClient(); - private SessionConfigSupplier sessionConfigSupplier = () -> 600000L; - private SessionIdProvider sessionIdProvider = new DatasourceEmbeddedSessionIdProvider(); + private final TestEMRServerlessClient emrsClient = new TestEMRServerlessClient(); + private final SessionConfigSupplier sessionConfigSupplier = () -> 600000L; + private final SessionIdProvider sessionIdProvider = new DatasourceEmbeddedSessionIdProvider(); private SessionManager sessionManager; - private AsyncQueryRequestContext asyncQueryRequestContext = new NullAsyncQueryRequestContext(); + private final AsyncQueryRequestContext asyncQueryRequestContext = + new NullAsyncQueryRequestContext(); @Before public void setup() { @@ -61,7 +62,7 @@ public void setup() { new OpenSearchStatementStorageService(stateStore, new StatementModelXContentSerializer()); sessionStorageService = new OpenSearchSessionStorageService(stateStore, new SessionModelXContentSerializer()); - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; sessionManager = new SessionManager( @@ -144,7 +145,8 @@ public void cancelFailedBecauseOfConflict() { st.open(); StatementModel running = - statementStorageService.updateStatementState(st.getStatementModel(), CANCELLED); + statementStorageService.updateStatementState( + st.getStatementModel(), CANCELLED, asyncQueryRequestContext); assertEquals(StatementState.CANCELLED, running.getStatementState()); IllegalStateException exception = assertThrows(IllegalStateException.class, st::cancel); @@ -265,7 +267,7 @@ public void newStatementFieldAssert() { Session session = sessionManager.createSession(createSessionRequest(), asyncQueryRequestContext); StatementId statementId = session.submit(queryRequest(), asyncQueryRequestContext); - Optional statement = session.get(statementId); + Optional statement = session.get(statementId, asyncQueryRequestContext); assertTrue(statement.isPresent()); assertEquals(session.getSessionId(), statement.get().getSessionId()); @@ -279,7 +281,7 @@ public void newStatementFieldAssert() { @Test public void failToSubmitStatementInDeletedSession() { - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; + EMRServerlessClientFactory emrServerlessClientFactory = (accountId) -> emrsClient; Session session = sessionManager.createSession(createSessionRequest(), asyncQueryRequestContext); @@ -301,7 +303,7 @@ public void getStatementSuccess() { sessionStorageService.updateSessionState(session.getSessionModel(), SessionState.RUNNING); StatementId statementId = session.submit(queryRequest(), asyncQueryRequestContext); - Optional statement = session.get(statementId); + Optional statement = session.get(statementId, asyncQueryRequestContext); assertTrue(statement.isPresent()); assertEquals(WAITING, statement.get().getStatementState()); assertEquals(statementId, statement.get().getStatementId()); @@ -314,7 +316,8 @@ public void getStatementNotExist() { // App change state to running sessionStorageService.updateSessionState(session.getSessionModel(), SessionState.RUNNING); - Optional statement = session.get(StatementId.newStatementId("not-exist-id")); + Optional statement = + session.get(StatementId.newStatementId("not-exist-id"), asyncQueryRequestContext); assertFalse(statement.isPresent()); } @@ -332,7 +335,8 @@ public TestStatement assertSessionState(StatementState expected) { assertEquals(expected, st.getStatementModel().getStatementState()); Optional model = - statementStorageService.getStatement(st.getStatementId().getId(), TEST_DATASOURCE_NAME); + statementStorageService.getStatement( + st.getStatementId().getId(), TEST_DATASOURCE_NAME, st.getAsyncQueryRequestContext()); assertTrue(model.isPresent()); assertEquals(expected, model.get().getStatementState()); @@ -343,7 +347,8 @@ public TestStatement assertStatementId(StatementId expected) { assertEquals(expected, st.getStatementModel().getStatementId()); Optional model = - statementStorageService.getStatement(st.getStatementId().getId(), TEST_DATASOURCE_NAME); + statementStorageService.getStatement( + st.getStatementId().getId(), TEST_DATASOURCE_NAME, st.getAsyncQueryRequestContext()); assertTrue(model.isPresent()); assertEquals(expected, model.get().getStatementId()); return this; @@ -361,15 +366,15 @@ public TestStatement cancel() { public TestStatement run() { StatementModel model = - statementStorageService.updateStatementState(st.getStatementModel(), RUNNING); + statementStorageService.updateStatementState( + st.getStatementModel(), RUNNING, st.getAsyncQueryRequestContext()); st.setStatementModel(model); return this; } } private QueryRequest queryRequest() { - return new QueryRequest( - AsyncQueryId.newAsyncQueryId(TEST_DATASOURCE_NAME).getId(), LangType.SQL, "select 1"); + return new QueryRequest(IDUtils.encode(TEST_DATASOURCE_NAME), LangType.SQL, "select 1"); } private Statement createStatement(StatementId stId) { diff --git a/spark/src/test/java/org/opensearch/sql/spark/execution/xcontent/AsyncQueryJobMetadataXContentSerializerTest.java b/async-query/src/test/java/org/opensearch/sql/spark/execution/xcontent/AsyncQueryJobMetadataXContentSerializerTest.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/execution/xcontent/AsyncQueryJobMetadataXContentSerializerTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/execution/xcontent/AsyncQueryJobMetadataXContentSerializerTest.java diff --git a/spark/src/test/java/org/opensearch/sql/spark/execution/xcontent/FlintIndexStateModelXContentSerializerTest.java b/async-query/src/test/java/org/opensearch/sql/spark/execution/xcontent/FlintIndexStateModelXContentSerializerTest.java similarity index 98% rename from spark/src/test/java/org/opensearch/sql/spark/execution/xcontent/FlintIndexStateModelXContentSerializerTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/execution/xcontent/FlintIndexStateModelXContentSerializerTest.java index 0d6d5f3119..1840c52457 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/execution/xcontent/FlintIndexStateModelXContentSerializerTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/execution/xcontent/FlintIndexStateModelXContentSerializerTest.java @@ -23,7 +23,7 @@ @ExtendWith(MockitoExtension.class) class FlintIndexStateModelXContentSerializerTest { - private FlintIndexStateModelXContentSerializer serializer = + private final FlintIndexStateModelXContentSerializer serializer = new FlintIndexStateModelXContentSerializer(); @Test diff --git a/spark/src/test/java/org/opensearch/sql/spark/execution/xcontent/IndexDMLResultXContentSerializerTest.java b/async-query/src/test/java/org/opensearch/sql/spark/execution/xcontent/IndexDMLResultXContentSerializerTest.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/execution/xcontent/IndexDMLResultXContentSerializerTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/execution/xcontent/IndexDMLResultXContentSerializerTest.java diff --git a/spark/src/test/java/org/opensearch/sql/spark/execution/xcontent/SessionModelXContentSerializerTest.java b/async-query/src/test/java/org/opensearch/sql/spark/execution/xcontent/SessionModelXContentSerializerTest.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/execution/xcontent/SessionModelXContentSerializerTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/execution/xcontent/SessionModelXContentSerializerTest.java diff --git a/spark/src/test/java/org/opensearch/sql/spark/execution/xcontent/StatementModelXContentSerializerTest.java b/async-query/src/test/java/org/opensearch/sql/spark/execution/xcontent/StatementModelXContentSerializerTest.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/execution/xcontent/StatementModelXContentSerializerTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/execution/xcontent/StatementModelXContentSerializerTest.java diff --git a/spark/src/test/java/org/opensearch/sql/spark/execution/xcontent/XContentSerializerTestUtil.java b/async-query/src/test/java/org/opensearch/sql/spark/execution/xcontent/XContentSerializerTestUtil.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/execution/xcontent/XContentSerializerTestUtil.java rename to async-query/src/test/java/org/opensearch/sql/spark/execution/xcontent/XContentSerializerTestUtil.java diff --git a/spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImplTest.java b/async-query/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImplTest.java similarity index 91% rename from spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImplTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImplTest.java index f6baa82dd2..b1321cc132 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImplTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/flint/FlintIndexMetadataServiceImplTest.java @@ -29,6 +29,7 @@ import org.opensearch.core.xcontent.DeprecationHandler; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.dispatcher.model.FlintIndexOptions; import org.opensearch.sql.spark.dispatcher.model.FullyQualifiedTableName; import org.opensearch.sql.spark.dispatcher.model.IndexQueryActionType; @@ -39,6 +40,8 @@ public class FlintIndexMetadataServiceImplTest { @Mock(answer = RETURNS_DEEP_STUBS) private Client client; + @Mock private AsyncQueryRequestContext asyncQueryRequestContext; + @SneakyThrows @Test void testGetJobIdFromFlintSkippingIndexMetadata() { @@ -56,8 +59,11 @@ void testGetJobIdFromFlintSkippingIndexMetadata() { .indexQueryActionType(IndexQueryActionType.DROP) .indexType(FlintIndexType.SKIPPING) .build(); + Map indexMetadataMap = - flintIndexMetadataService.getFlintIndexMetadata(indexQueryDetails.openSearchIndexName()); + flintIndexMetadataService.getFlintIndexMetadata( + indexQueryDetails.openSearchIndexName(), asyncQueryRequestContext); + Assertions.assertEquals( "00fhelvq7peuao0", indexMetadataMap.get(indexQueryDetails.openSearchIndexName()).getJobId()); @@ -80,8 +86,11 @@ void testGetJobIdFromFlintSkippingIndexMetadataWithIndexState() { .indexQueryActionType(IndexQueryActionType.DROP) .indexType(FlintIndexType.SKIPPING) .build(); + Map indexMetadataMap = - flintIndexMetadataService.getFlintIndexMetadata(indexQueryDetails.openSearchIndexName()); + flintIndexMetadataService.getFlintIndexMetadata( + indexQueryDetails.openSearchIndexName(), asyncQueryRequestContext); + FlintIndexMetadata metadata = indexMetadataMap.get(indexQueryDetails.openSearchIndexName()); Assertions.assertEquals("00fhelvq7peuao0", metadata.getJobId()); } @@ -103,8 +112,11 @@ void testGetJobIdFromFlintCoveringIndexMetadata() { .indexType(FlintIndexType.COVERING) .build(); FlintIndexMetadataService flintIndexMetadataService = new FlintIndexMetadataServiceImpl(client); + Map indexMetadataMap = - flintIndexMetadataService.getFlintIndexMetadata(indexQueryDetails.openSearchIndexName()); + flintIndexMetadataService.getFlintIndexMetadata( + indexQueryDetails.openSearchIndexName(), asyncQueryRequestContext); + Assertions.assertEquals( "00fdmvv9hp8u0o0q", indexMetadataMap.get(indexQueryDetails.openSearchIndexName()).getJobId()); @@ -126,8 +138,11 @@ void testGetJobIDWithNPEException() { .indexQueryActionType(IndexQueryActionType.DROP) .indexType(FlintIndexType.COVERING) .build(); + Map flintIndexMetadataMap = - flintIndexMetadataService.getFlintIndexMetadata(indexQueryDetails.openSearchIndexName()); + flintIndexMetadataService.getFlintIndexMetadata( + indexQueryDetails.openSearchIndexName(), asyncQueryRequestContext); + Assertions.assertFalse( flintIndexMetadataMap.containsKey("flint_mys3_default_http_logs_cv1_index")); } @@ -148,8 +163,10 @@ void testGetJobIDWithNPEExceptionForMultipleIndices() { indexMappingsMap.put(indexName, mappings); mockNodeClientIndicesMappings("flint_mys3*", indexMappingsMap); FlintIndexMetadataService flintIndexMetadataService = new FlintIndexMetadataServiceImpl(client); + Map flintIndexMetadataMap = - flintIndexMetadataService.getFlintIndexMetadata("flint_mys3*"); + flintIndexMetadataService.getFlintIndexMetadata("flint_mys3*", asyncQueryRequestContext); + Assertions.assertFalse( flintIndexMetadataMap.containsKey("flint_mys3_default_http_logs_cv1_index")); Assertions.assertTrue( diff --git a/async-query/src/test/java/org/opensearch/sql/spark/flint/OpenSearchFlintIndexClientTest.java b/async-query/src/test/java/org/opensearch/sql/spark/flint/OpenSearchFlintIndexClientTest.java new file mode 100644 index 0000000000..d9f2e58dba --- /dev/null +++ b/async-query/src/test/java/org/opensearch/sql/spark/flint/OpenSearchFlintIndexClientTest.java @@ -0,0 +1,42 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.flint; + +import static org.mockito.Answers.RETURNS_DEEP_STUBS; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; +import org.opensearch.action.support.master.AcknowledgedResponse; +import org.opensearch.client.Client; + +@ExtendWith(MockitoExtension.class) +public class OpenSearchFlintIndexClientTest { + + @Mock(answer = RETURNS_DEEP_STUBS) + private Client client; + + @Mock private AcknowledgedResponse acknowledgedResponse; + + @InjectMocks private OpenSearchFlintIndexClient openSearchFlintIndexClient; + + @Test + public void testDeleteIndex() { + when(client.admin().indices().delete(any(DeleteIndexRequest.class)).actionGet()) + .thenReturn(acknowledgedResponse); + when(acknowledgedResponse.isAcknowledged()).thenReturn(true); + + openSearchFlintIndexClient.deleteIndex("test-index"); + verify(client.admin().indices()).delete(any(DeleteIndexRequest.class)); + verify(acknowledgedResponse).isAcknowledged(); + } +} diff --git a/spark/src/test/java/org/opensearch/sql/spark/flint/OpenSearchFlintIndexStateModelServiceTest.java b/async-query/src/test/java/org/opensearch/sql/spark/flint/OpenSearchFlintIndexStateModelServiceTest.java similarity index 85% rename from spark/src/test/java/org/opensearch/sql/spark/flint/OpenSearchFlintIndexStateModelServiceTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/flint/OpenSearchFlintIndexStateModelServiceTest.java index 977f77b397..4faff41fe6 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/flint/OpenSearchFlintIndexStateModelServiceTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/flint/OpenSearchFlintIndexStateModelServiceTest.java @@ -16,6 +16,7 @@ import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; import org.opensearch.sql.spark.execution.statestore.StateStore; import org.opensearch.sql.spark.execution.xcontent.FlintIndexStateModelXContentSerializer; @@ -30,6 +31,7 @@ public class OpenSearchFlintIndexStateModelServiceTest { @Mock FlintIndexState flintIndexState; @Mock FlintIndexStateModel responseFlintIndexStateModel; @Mock FlintIndexStateModelXContentSerializer flintIndexStateModelXContentSerializer; + @Mock AsyncQueryRequestContext asyncQueryRequestContext; @InjectMocks OpenSearchFlintIndexStateModelService openSearchFlintIndexStateModelService; @@ -40,7 +42,7 @@ void updateFlintIndexState() { FlintIndexStateModel result = openSearchFlintIndexStateModelService.updateFlintIndexState( - flintIndexStateModel, flintIndexState, DATASOURCE); + flintIndexStateModel, flintIndexState, DATASOURCE, asyncQueryRequestContext); assertEquals(responseFlintIndexStateModel, result); } @@ -51,7 +53,8 @@ void getFlintIndexStateModel() { .thenReturn(Optional.of(responseFlintIndexStateModel)); Optional result = - openSearchFlintIndexStateModelService.getFlintIndexStateModel("ID", DATASOURCE); + openSearchFlintIndexStateModelService.getFlintIndexStateModel( + "ID", DATASOURCE, asyncQueryRequestContext); assertEquals(responseFlintIndexStateModel, result.get()); } @@ -63,7 +66,8 @@ void createFlintIndexStateModel() { when(flintIndexStateModel.getDatasourceName()).thenReturn(DATASOURCE); FlintIndexStateModel result = - openSearchFlintIndexStateModelService.createFlintIndexStateModel(flintIndexStateModel); + openSearchFlintIndexStateModelService.createFlintIndexStateModel( + flintIndexStateModel, asyncQueryRequestContext); assertEquals(responseFlintIndexStateModel, result); } @@ -73,7 +77,8 @@ void deleteFlintIndexStateModel() { when(mockStateStore.delete(any(), any())).thenReturn(true); boolean result = - openSearchFlintIndexStateModelService.deleteFlintIndexStateModel(ID, DATASOURCE); + openSearchFlintIndexStateModelService.deleteFlintIndexStateModel( + ID, DATASOURCE, asyncQueryRequestContext); assertTrue(result); } diff --git a/async-query/src/test/java/org/opensearch/sql/spark/leasemanager/DefaultLeaseManagerTest.java b/async-query/src/test/java/org/opensearch/sql/spark/leasemanager/DefaultLeaseManagerTest.java new file mode 100644 index 0000000000..a7ea6aa22f --- /dev/null +++ b/async-query/src/test/java/org/opensearch/sql/spark/leasemanager/DefaultLeaseManagerTest.java @@ -0,0 +1,60 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.leasemanager; + +import static org.junit.Assert.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.when; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.spark.dispatcher.model.JobType; +import org.opensearch.sql.spark.execution.statestore.StateStore; +import org.opensearch.sql.spark.leasemanager.model.LeaseRequest; + +@ExtendWith(MockitoExtension.class) +class DefaultLeaseManagerTest { + @Mock private Settings settings; + + @Mock private StateStore stateStore; + + @Test + public void leaseManagerRejectsJobs() { + when(stateStore.count(any(), any())).thenReturn(3L); + when(settings.getSettingValue(any())).thenReturn(3); + DefaultLeaseManager defaultLeaseManager = new DefaultLeaseManager(settings, stateStore); + + defaultLeaseManager.borrow(getLeaseRequest(JobType.BATCH)); + assertThrows( + ConcurrencyLimitExceededException.class, + () -> defaultLeaseManager.borrow(getLeaseRequest(JobType.INTERACTIVE))); + assertThrows( + ConcurrencyLimitExceededException.class, + () -> defaultLeaseManager.borrow(getLeaseRequest(JobType.STREAMING))); + assertThrows( + ConcurrencyLimitExceededException.class, + () -> defaultLeaseManager.borrow(getLeaseRequest(JobType.REFRESH))); + } + + @Test + public void leaseManagerAcceptsJobs() { + when(stateStore.count(any(), any())).thenReturn(2L); + when(settings.getSettingValue(any())).thenReturn(3); + DefaultLeaseManager defaultLeaseManager = new DefaultLeaseManager(settings, stateStore); + + defaultLeaseManager.borrow(getLeaseRequest(JobType.BATCH)); + defaultLeaseManager.borrow(getLeaseRequest(JobType.INTERACTIVE)); + defaultLeaseManager.borrow(getLeaseRequest(JobType.STREAMING)); + defaultLeaseManager.borrow(getLeaseRequest(JobType.REFRESH)); + } + + private LeaseRequest getLeaseRequest(JobType jobType) { + return new LeaseRequest(jobType, "mys3"); + } +} diff --git a/async-query/src/test/java/org/opensearch/sql/spark/parameter/S3GlueDataSourceSparkParameterComposerTest.java b/async-query/src/test/java/org/opensearch/sql/spark/parameter/S3GlueDataSourceSparkParameterComposerTest.java new file mode 100644 index 0000000000..3e12aa78d0 --- /dev/null +++ b/async-query/src/test/java/org/opensearch/sql/spark/parameter/S3GlueDataSourceSparkParameterComposerTest.java @@ -0,0 +1,368 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.parameter; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_JAR_PACKAGES_KEY; + +import com.google.common.collect.ImmutableMap; +import com.google.gson.Gson; +import com.google.gson.JsonObject; +import java.util.Arrays; +import java.util.Map; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.common.setting.Settings.Key; +import org.opensearch.sql.datasource.model.DataSourceMetadata; +import org.opensearch.sql.datasource.model.DataSourceStatus; +import org.opensearch.sql.datasource.model.DataSourceType; +import org.opensearch.sql.datasources.auth.AuthenticationType; +import org.opensearch.sql.datasources.glue.GlueDataSourceFactory; +import org.opensearch.sql.opensearch.setting.OpenSearchSettings; +import org.opensearch.sql.spark.asyncquery.model.NullAsyncQueryRequestContext; +import org.opensearch.sql.spark.config.SparkExecutionEngineConfigClusterSettingLoader; +import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest; + +@ExtendWith(MockitoExtension.class) +class S3GlueDataSourceSparkParameterComposerTest { + + public static final String VALID_URI = "https://test.host.com:9200"; + public static final String INVALID_URI = "http://test/\r\n"; + public static final String USERNAME = "USERNAME"; + public static final String PASSWORD = "PASSWORD"; + public static final String REGION = "REGION"; + public static final String TRUE = "true"; + public static final String ROLE_ARN = "arn:aws:iam::123456789012:role/ROLE_NAME"; + public static final String APP_ID = "APP_ID"; + public static final String CLUSTER_NAME = "CLUSTER_NAME"; + public static final String ACCOUNT_ID = "123456789012"; + public static final String SESSION_TAG = "SESSION_TAG"; + + private static final String COMMON_EXPECTED_PARAMS = + " --class org.apache.spark.sql.FlintJob " + + getConfList( + "spark.emr-serverless.driverEnv.ASSUME_ROLE_CREDENTIALS_ROLE_ARN=arn:aws:iam::123456789012:role/ROLE_NAME", + "spark.executorEnv.ASSUME_ROLE_CREDENTIALS_ROLE_ARN=arn:aws:iam::123456789012:role/ROLE_NAME", + "spark.hive.metastore.glue.role.arn=arn:aws:iam::123456789012:role/ROLE_NAME", + "spark.sql.catalog.DATASOURCE_NAME=org.opensearch.sql.FlintDelegatingSessionCatalog", + "spark.flint.datasource.name=DATASOURCE_NAME", + "spark.datasource.flint.host=test.host.com", + "spark.datasource.flint.port=9200", + "spark.datasource.flint.scheme=https"); + + @Mock DispatchQueryRequest dispatchQueryRequest; + + @Test + public void testBasicAuth() { + DataSourceMetadata dataSourceMetadata = + getDataSourceMetadata(AuthenticationType.BASICAUTH, VALID_URI); + SparkSubmitParameters sparkSubmitParameters = new SparkSubmitParameters(); + + new S3GlueDataSourceSparkParameterComposer(getSparkExecutionEngineConfigClusterSettingLoader()) + .compose( + dataSourceMetadata, + sparkSubmitParameters, + dispatchQueryRequest, + new NullAsyncQueryRequestContext()); + + assertEquals( + COMMON_EXPECTED_PARAMS + + getConfList( + "spark.datasource.flint.auth=basic", + "spark.datasource.flint.auth.username=USERNAME", + "spark.datasource.flint.auth.password=PASSWORD"), + sparkSubmitParameters.toString()); + } + + @Test + public void testComposeWithSigV4Auth() { + DataSourceMetadata dataSourceMetadata = + getDataSourceMetadata(AuthenticationType.AWSSIGV4AUTH, VALID_URI); + SparkSubmitParameters sparkSubmitParameters = new SparkSubmitParameters(); + + new S3GlueDataSourceSparkParameterComposer(getSparkExecutionEngineConfigClusterSettingLoader()) + .compose( + dataSourceMetadata, + sparkSubmitParameters, + dispatchQueryRequest, + new NullAsyncQueryRequestContext()); + + assertEquals( + COMMON_EXPECTED_PARAMS + + getConfList( + "spark.datasource.flint.auth=sigv4", "spark.datasource.flint.region=REGION"), + sparkSubmitParameters.toString()); + } + + @Test + public void testComposeWithNoAuth() { + DataSourceMetadata dataSourceMetadata = + getDataSourceMetadata(AuthenticationType.NOAUTH, VALID_URI); + SparkSubmitParameters sparkSubmitParameters = new SparkSubmitParameters(); + + new S3GlueDataSourceSparkParameterComposer(getSparkExecutionEngineConfigClusterSettingLoader()) + .compose( + dataSourceMetadata, + sparkSubmitParameters, + dispatchQueryRequest, + new NullAsyncQueryRequestContext()); + + assertEquals( + COMMON_EXPECTED_PARAMS + getConfList("spark.datasource.flint.auth=noauth"), + sparkSubmitParameters.toString()); + } + + @Test + public void testComposeWithBadUri() { + DataSourceMetadata dataSourceMetadata = + getDataSourceMetadata(AuthenticationType.NOAUTH, INVALID_URI); + SparkSubmitParameters sparkSubmitParameters = new SparkSubmitParameters(); + + assertThrows( + IllegalArgumentException.class, + () -> + new S3GlueDataSourceSparkParameterComposer( + getSparkExecutionEngineConfigClusterSettingLoader()) + .compose( + dataSourceMetadata, + sparkSubmitParameters, + dispatchQueryRequest, + new NullAsyncQueryRequestContext())); + } + + @Test + public void testIcebergEnabled() { + final Map properties = + ImmutableMap.builder() + .put(GlueDataSourceFactory.GLUE_ROLE_ARN, ROLE_ARN) + .put(GlueDataSourceFactory.GLUE_ICEBERG_ENABLED, TRUE) + .put(GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_URI, VALID_URI) + .put( + GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH, + AuthenticationType.BASICAUTH.getName()) + .put(GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH_USERNAME, USERNAME) + .put(GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH_PASSWORD, PASSWORD) + .put(GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_REGION, REGION) + .build(); + + final String expectedParams = + " --class org.apache.spark.sql.FlintJob " + + getConfList( + "spark.jars.packages=package,org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.5.0,software.amazon.awssdk:bundle:2.26.30", + "spark.emr-serverless.driverEnv.ASSUME_ROLE_CREDENTIALS_ROLE_ARN=arn:aws:iam::123456789012:role/ROLE_NAME", + "spark.executorEnv.ASSUME_ROLE_CREDENTIALS_ROLE_ARN=arn:aws:iam::123456789012:role/ROLE_NAME", + "spark.hive.metastore.glue.role.arn=arn:aws:iam::123456789012:role/ROLE_NAME", + "spark.sql.catalog.DATASOURCE_NAME=org.opensearch.sql.FlintDelegatingSessionCatalog", + "spark.flint.datasource.name=DATASOURCE_NAME", + "spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog", + "spark.sql.catalog.spark_catalog.catalog-impl=org.apache.iceberg.aws.glue.GlueCatalog", + "spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,org.opensearch.flint.spark.FlintSparkExtensions,org.opensearch.flint.spark.FlintPPLSparkExtensions", + "spark.sql.catalog.spark_catalog.client.region=REGION", + "spark.sql.catalog.spark_catalog.glue.account-id=123456789012", + "spark.sql.catalog.spark_catalog.client.assume-role.arn=arn:aws:iam::123456789012:role/ROLE_NAME", + "spark.sql.catalog.spark_catalog.client.assume-role.region=REGION", + "spark.sql.iceberg.handle-timestamp-without-timezone=true", + "spark.sql.catalog.spark_catalog.client.factory=org.apache.iceberg.aws.AssumeRoleAwsClientFactory", + "spark.datasource.flint.host=test.host.com", + "spark.datasource.flint.port=9200", + "spark.datasource.flint.scheme=https", + "spark.datasource.flint.auth=basic", + "spark.datasource.flint.auth.username=USERNAME", + "spark.datasource.flint.auth.password=PASSWORD"); + + DataSourceMetadata dataSourceMetadata = getDataSourceMetadata(properties); + SparkSubmitParameters sparkSubmitParameters = new SparkSubmitParameters(); + sparkSubmitParameters.setConfigItem(SPARK_JAR_PACKAGES_KEY, "package"); + + new S3GlueDataSourceSparkParameterComposer(getSparkExecutionEngineConfigClusterSettingLoader()) + .compose( + dataSourceMetadata, + sparkSubmitParameters, + dispatchQueryRequest, + new NullAsyncQueryRequestContext()); + + assertEquals(expectedParams, sparkSubmitParameters.toString()); + } + + @Test + public void testIcebergWithLakeFormationEnabled() { + final Map properties = + ImmutableMap.builder() + .put(GlueDataSourceFactory.GLUE_ROLE_ARN, ROLE_ARN) + .put(GlueDataSourceFactory.GLUE_ICEBERG_ENABLED, TRUE) + .put(GlueDataSourceFactory.GLUE_LAKEFORMATION_ENABLED, TRUE) + .put(GlueDataSourceFactory.GLUE_LAKEFORMATION_SESSION_TAG, SESSION_TAG) + .put(GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_URI, VALID_URI) + .put( + GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH, + AuthenticationType.BASICAUTH.getName()) + .put(GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH_USERNAME, USERNAME) + .put(GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH_PASSWORD, PASSWORD) + .put(GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_REGION, REGION) + .build(); + + final String expectedParams = + " --class org.apache.spark.sql.FlintJob " + + getConfList( + "spark.jars.packages=package,org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.5.0,software.amazon.awssdk:bundle:2.26.30", + "spark.emr-serverless.driverEnv.ASSUME_ROLE_CREDENTIALS_ROLE_ARN=arn:aws:iam::123456789012:role/ROLE_NAME", + "spark.executorEnv.ASSUME_ROLE_CREDENTIALS_ROLE_ARN=arn:aws:iam::123456789012:role/ROLE_NAME", + "spark.hive.metastore.glue.role.arn=arn:aws:iam::123456789012:role/ROLE_NAME", + "spark.sql.catalog.DATASOURCE_NAME=org.opensearch.sql.FlintDelegatingSessionCatalog", + "spark.flint.datasource.name=DATASOURCE_NAME", + "spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog", + "spark.sql.catalog.spark_catalog.catalog-impl=org.apache.iceberg.aws.glue.GlueCatalog", + "spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,org.opensearch.flint.spark.FlintSparkExtensions,org.opensearch.flint.spark.FlintPPLSparkExtensions", + "spark.sql.catalog.spark_catalog.client.region=REGION", + "spark.sql.catalog.spark_catalog.glue.account-id=123456789012", + "spark.sql.catalog.spark_catalog.client.assume-role.arn=arn:aws:iam::123456789012:role/ROLE_NAME", + "spark.sql.catalog.spark_catalog.client.assume-role.region=REGION", + "spark.sql.iceberg.handle-timestamp-without-timezone=true", + "spark.flint.optimizer.covering.enabled=false", + "spark.sql.catalog.spark_catalog.glue.lakeformation-enabled=true", + "spark.sql.catalog.spark_catalog.client.factory=org.apache.iceberg.aws.lakeformation.LakeFormationAwsClientFactory", + "spark.sql.catalog.spark_catalog.client.assume-role.tags.LakeFormationAuthorizedCaller=SESSION_TAG", + "spark.datasource.flint.host=test.host.com", + "spark.datasource.flint.port=9200", + "spark.datasource.flint.scheme=https", + "spark.datasource.flint.auth=basic", + "spark.datasource.flint.auth.username=USERNAME", + "spark.datasource.flint.auth.password=PASSWORD"); + + DataSourceMetadata dataSourceMetadata = getDataSourceMetadata(properties); + SparkSubmitParameters sparkSubmitParameters = new SparkSubmitParameters(); + sparkSubmitParameters.setConfigItem(SPARK_JAR_PACKAGES_KEY, "package"); + + new S3GlueDataSourceSparkParameterComposer(getSparkExecutionEngineConfigClusterSettingLoader()) + .compose( + dataSourceMetadata, + sparkSubmitParameters, + dispatchQueryRequest, + new NullAsyncQueryRequestContext()); + + assertEquals(expectedParams, sparkSubmitParameters.toString()); + } + + @Test + public void testIcebergWithLakeFormationEnabledNoSessionTag() { + final Map properties = + ImmutableMap.builder() + .put(GlueDataSourceFactory.GLUE_ROLE_ARN, ROLE_ARN) + .put(GlueDataSourceFactory.GLUE_ICEBERG_ENABLED, TRUE) + .put(GlueDataSourceFactory.GLUE_LAKEFORMATION_ENABLED, TRUE) + .put(GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_URI, VALID_URI) + .put( + GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH, + AuthenticationType.BASICAUTH.getName()) + .put(GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH_USERNAME, USERNAME) + .put(GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH_PASSWORD, PASSWORD) + .put(GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_REGION, REGION) + .build(); + + DataSourceMetadata dataSourceMetadata = getDataSourceMetadata(properties); + SparkSubmitParameters sparkSubmitParameters = new SparkSubmitParameters(); + + final S3GlueDataSourceSparkParameterComposer composer = + new S3GlueDataSourceSparkParameterComposer( + getSparkExecutionEngineConfigClusterSettingLoader()); + assertThrows( + IllegalArgumentException.class, + () -> + composer.compose( + dataSourceMetadata, + sparkSubmitParameters, + dispatchQueryRequest, + new NullAsyncQueryRequestContext())); + } + + @Test + public void testNoClusterConfigAvailable() { + DataSourceMetadata dataSourceMetadata = + getDataSourceMetadata(AuthenticationType.BASICAUTH, VALID_URI); + SparkSubmitParameters sparkSubmitParameters = new SparkSubmitParameters(); + + final OpenSearchSettings settings = Mockito.mock(OpenSearchSettings.class); + Mockito.when(settings.getSettingValue(Key.SPARK_EXECUTION_ENGINE_CONFIG)).thenReturn(null); + + final S3GlueDataSourceSparkParameterComposer composer = + new S3GlueDataSourceSparkParameterComposer( + new SparkExecutionEngineConfigClusterSettingLoader(settings)); + + assertThrows( + RuntimeException.class, + () -> + composer.compose( + dataSourceMetadata, + sparkSubmitParameters, + dispatchQueryRequest, + new NullAsyncQueryRequestContext())); + } + + private DataSourceMetadata getDataSourceMetadata( + AuthenticationType authenticationType, String uri) { + return new DataSourceMetadata.Builder() + .setConnector(DataSourceType.S3GLUE) + .setName("DATASOURCE_NAME") + .setDescription("DESCRIPTION") + .setResultIndex("RESULT_INDEX") + .setDataSourceStatus(DataSourceStatus.ACTIVE) + .setProperties(getProperties(authenticationType, uri)) + .build(); + } + + private DataSourceMetadata getDataSourceMetadata(Map properties) { + return new DataSourceMetadata.Builder() + .setConnector(DataSourceType.S3GLUE) + .setName("DATASOURCE_NAME") + .setDescription("DESCRIPTION") + .setResultIndex("RESULT_INDEX") + .setDataSourceStatus(DataSourceStatus.ACTIVE) + .setProperties(properties) + .build(); + } + + private Map getProperties(AuthenticationType authType, String uri) { + return ImmutableMap.builder() + .put(GlueDataSourceFactory.GLUE_ROLE_ARN, ROLE_ARN) + .put(GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_URI, uri) + .put(GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH, authType.getName()) + .put(GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH_USERNAME, USERNAME) + .put(GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH_PASSWORD, PASSWORD) + .put(GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_REGION, REGION) + .build(); + } + + private SparkExecutionEngineConfigClusterSettingLoader + getSparkExecutionEngineConfigClusterSettingLoader() { + Gson gson = new Gson(); + JsonObject jsonObject = new JsonObject(); + jsonObject.addProperty("accountId", ACCOUNT_ID); + jsonObject.addProperty("applicationId", APP_ID); + jsonObject.addProperty("region", REGION); + jsonObject.addProperty("executionRoleARN", ROLE_ARN); + jsonObject.addProperty("sparkSubmitParameters", ""); + + // Convert JsonObject to JSON string + final String jsonString = gson.toJson(jsonObject); + + final OpenSearchSettings settings = Mockito.mock(OpenSearchSettings.class); + Mockito.when(settings.getSettingValue(Key.SPARK_EXECUTION_ENGINE_CONFIG)) + .thenReturn(jsonString); + + return new SparkExecutionEngineConfigClusterSettingLoader(settings); + } + + private static String getConfList(String... params) { + return Arrays.stream(params) + .map(param -> String.format(" --conf %s ", param)) + .collect(Collectors.joining()); + } +} diff --git a/spark/src/test/java/org/opensearch/sql/spark/response/OpenSearchJobExecutionResponseReaderTest.java b/async-query/src/test/java/org/opensearch/sql/spark/response/OpenSearchJobExecutionResponseReaderTest.java similarity index 77% rename from spark/src/test/java/org/opensearch/sql/spark/response/OpenSearchJobExecutionResponseReaderTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/response/OpenSearchJobExecutionResponseReaderTest.java index 66230464e5..4de3a56dd9 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/response/OpenSearchJobExecutionResponseReaderTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/response/OpenSearchJobExecutionResponseReaderTest.java @@ -29,6 +29,7 @@ import org.opensearch.index.IndexNotFoundException; import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryJobMetadata; @ExtendWith(MockitoExtension.class) public class OpenSearchJobExecutionResponseReaderTest { @@ -50,7 +51,11 @@ public void testGetResultFromOpensearchIndex() { new SearchHit[] {searchHit}, new TotalHits(1, TotalHits.Relation.EQUAL_TO), 1.0F)); Mockito.when(searchHit.getSourceAsMap()).thenReturn(Map.of("stepId", EMR_JOB_ID)); - assertFalse(jobExecutionResponseReader.getResultWithJobId(EMR_JOB_ID, null).isEmpty()); + assertFalse( + jobExecutionResponseReader + .getResultFromResultIndex( + AsyncQueryJobMetadata.builder().jobId(EMR_JOB_ID).build(), null) + .isEmpty()); } @Test @@ -64,7 +69,11 @@ public void testGetResultFromCustomIndex() { new SearchHit[] {searchHit}, new TotalHits(1, TotalHits.Relation.EQUAL_TO), 1.0F)); Mockito.when(searchHit.getSourceAsMap()).thenReturn(Map.of("stepId", EMR_JOB_ID)); - assertFalse(jobExecutionResponseReader.getResultWithJobId(EMR_JOB_ID, "foo").isEmpty()); + assertFalse( + jobExecutionResponseReader + .getResultFromResultIndex( + AsyncQueryJobMetadata.builder().jobId(EMR_JOB_ID).resultIndex("foo").build(), null) + .isEmpty()); } @Test @@ -76,7 +85,9 @@ public void testInvalidSearchResponse() { RuntimeException exception = assertThrows( RuntimeException.class, - () -> jobExecutionResponseReader.getResultWithJobId(EMR_JOB_ID, null)); + () -> + jobExecutionResponseReader.getResultFromResultIndex( + AsyncQueryJobMetadata.builder().jobId(EMR_JOB_ID).build(), null)); Assertions.assertEquals( "Fetching result from " @@ -92,13 +103,18 @@ public void testSearchFailure() { assertThrows( RuntimeException.class, - () -> jobExecutionResponseReader.getResultWithJobId(EMR_JOB_ID, null)); + () -> + jobExecutionResponseReader.getResultFromResultIndex( + AsyncQueryJobMetadata.builder().jobId(EMR_JOB_ID).build(), null)); } @Test public void testIndexNotFoundException() { when(client.search(any())).thenThrow(IndexNotFoundException.class); - - assertTrue(jobExecutionResponseReader.getResultWithJobId(EMR_JOB_ID, "foo").isEmpty()); + assertTrue( + jobExecutionResponseReader + .getResultFromResultIndex( + AsyncQueryJobMetadata.builder().jobId(EMR_JOB_ID).resultIndex("foo").build(), null) + .isEmpty()); } } diff --git a/async-query/src/test/java/org/opensearch/sql/spark/rest/RestAsyncQueryManagementActionTest.java b/async-query/src/test/java/org/opensearch/sql/spark/rest/RestAsyncQueryManagementActionTest.java new file mode 100644 index 0000000000..ccee3eb642 --- /dev/null +++ b/async-query/src/test/java/org/opensearch/sql/spark/rest/RestAsyncQueryManagementActionTest.java @@ -0,0 +1,83 @@ +package org.opensearch.sql.spark.rest; + +import com.google.gson.Gson; +import com.google.gson.JsonObject; +import lombok.SneakyThrows; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.ArgumentCaptor; +import org.mockito.ArgumentMatchers; +import org.mockito.Mockito; +import org.opensearch.client.node.NodeClient; +import org.opensearch.rest.RestChannel; +import org.opensearch.rest.RestRequest; +import org.opensearch.rest.RestResponse; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.opensearch.setting.OpenSearchSettings; +import org.opensearch.threadpool.ThreadPool; + +public class RestAsyncQueryManagementActionTest { + + private OpenSearchSettings settings; + private RestRequest request; + private RestChannel channel; + private NodeClient nodeClient; + private ThreadPool threadPool; + private RestAsyncQueryManagementAction unit; + + @BeforeEach + public void setup() { + settings = Mockito.mock(OpenSearchSettings.class); + request = Mockito.mock(RestRequest.class); + channel = Mockito.mock(RestChannel.class); + nodeClient = Mockito.mock(NodeClient.class); + threadPool = Mockito.mock(ThreadPool.class); + + Mockito.when(nodeClient.threadPool()).thenReturn(threadPool); + + unit = new RestAsyncQueryManagementAction(settings); + } + + @Test + @SneakyThrows + public void testWhenDataSourcesAreDisabled() { + setDataSourcesEnabled(false); + unit.handleRequest(request, channel, nodeClient); + Mockito.verifyNoInteractions(nodeClient); + ArgumentCaptor response = ArgumentCaptor.forClass(RestResponse.class); + Mockito.verify(channel, Mockito.times(1)).sendResponse(response.capture()); + Assertions.assertEquals(400, response.getValue().status().getStatus()); + JsonObject actualResponseJson = + new Gson().fromJson(response.getValue().content().utf8ToString(), JsonObject.class); + JsonObject expectedResponseJson = new JsonObject(); + expectedResponseJson.addProperty("status", 400); + expectedResponseJson.add("error", new JsonObject()); + expectedResponseJson.getAsJsonObject("error").addProperty("type", "IllegalAccessException"); + expectedResponseJson.getAsJsonObject("error").addProperty("reason", "Invalid Request"); + expectedResponseJson + .getAsJsonObject("error") + .addProperty("details", "plugins.query.datasources.enabled setting is false"); + Assertions.assertEquals(expectedResponseJson, actualResponseJson); + } + + @Test + @SneakyThrows + public void testWhenDataSourcesAreEnabled() { + setDataSourcesEnabled(true); + Mockito.when(request.method()).thenReturn(RestRequest.Method.GET); + unit.handleRequest(request, channel, nodeClient); + Mockito.verify(threadPool, Mockito.times(1)) + .schedule(ArgumentMatchers.any(), ArgumentMatchers.any(), ArgumentMatchers.any()); + Mockito.verifyNoInteractions(channel); + } + + @Test + public void testGetName() { + Assertions.assertEquals("async_query_actions", unit.getName()); + } + + private void setDataSourcesEnabled(boolean value) { + Mockito.when(settings.getSettingValue(Settings.Key.DATASOURCES_ENABLED)).thenReturn(value); + } +} diff --git a/async-query/src/test/java/org/opensearch/sql/spark/scheduler/OpenSearchAsyncQuerySchedulerTest.java b/async-query/src/test/java/org/opensearch/sql/spark/scheduler/OpenSearchAsyncQuerySchedulerTest.java new file mode 100644 index 0000000000..d6e672f7a2 --- /dev/null +++ b/async-query/src/test/java/org/opensearch/sql/spark/scheduler/OpenSearchAsyncQuerySchedulerTest.java @@ -0,0 +1,458 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.scheduler; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static org.opensearch.sql.spark.scheduler.OpenSearchAsyncQueryScheduler.SCHEDULER_INDEX_NAME; + +import java.time.Instant; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Answers; +import org.mockito.ArgumentCaptor; +import org.mockito.ArgumentMatchers; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; +import org.opensearch.action.DocWriteResponse; +import org.opensearch.action.admin.indices.create.CreateIndexRequest; +import org.opensearch.action.admin.indices.create.CreateIndexResponse; +import org.opensearch.action.delete.DeleteRequest; +import org.opensearch.action.delete.DeleteResponse; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.action.index.IndexResponse; +import org.opensearch.action.support.WriteRequest; +import org.opensearch.action.update.UpdateRequest; +import org.opensearch.action.update.UpdateResponse; +import org.opensearch.client.Client; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.action.ActionFuture; +import org.opensearch.index.engine.DocumentMissingException; +import org.opensearch.index.engine.VersionConflictEngineException; +import org.opensearch.jobscheduler.spi.ScheduledJobRunner; +import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; +import org.opensearch.sql.spark.scheduler.model.AsyncQuerySchedulerRequest; +import org.opensearch.sql.spark.scheduler.model.ScheduledAsyncQueryJobRequest; + +public class OpenSearchAsyncQuerySchedulerTest { + + private static final String TEST_SCHEDULER_INDEX_NAME = "testQS"; + + private static final String TEST_JOB_ID = "testJob"; + + @Mock(answer = Answers.RETURNS_DEEP_STUBS) + private Client client; + + @Mock(answer = Answers.RETURNS_DEEP_STUBS) + private ClusterService clusterService; + + @Mock private AsyncQueryRequestContext context; + + @Mock private ActionFuture indexResponseActionFuture; + + @Mock private ActionFuture updateResponseActionFuture; + + @Mock private ActionFuture deleteResponseActionFuture; + + @Mock private ActionFuture createIndexResponseActionFuture; + + @Mock private IndexResponse indexResponse; + + @Mock private UpdateResponse updateResponse; + + private OpenSearchAsyncQueryScheduler scheduler; + + @BeforeEach + public void setup() { + MockitoAnnotations.openMocks(this); + scheduler = new OpenSearchAsyncQueryScheduler(client, clusterService); + } + + @Test + public void testScheduleJob() { + when(clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)) + .thenReturn(Boolean.FALSE); + when(client.admin().indices().create(any(CreateIndexRequest.class))) + .thenReturn(createIndexResponseActionFuture); + when(createIndexResponseActionFuture.actionGet()) + .thenReturn(new CreateIndexResponse(true, true, TEST_SCHEDULER_INDEX_NAME)); + when(client.index(any(IndexRequest.class))).thenReturn(indexResponseActionFuture); + + // Test the if case + when(indexResponseActionFuture.actionGet()).thenReturn(indexResponse); + when(indexResponse.getResult()).thenReturn(DocWriteResponse.Result.CREATED); + + ScheduledAsyncQueryJobRequest request = + ScheduledAsyncQueryJobRequest.scheduledAsyncQueryJobRequestBuilder() + .jobId(TEST_JOB_ID) + .lastUpdateTime(Instant.now()) + .build(); + + scheduler.scheduleJob(request, context); + + // Verify index created + verify(client.admin().indices(), times(1)).create(ArgumentMatchers.any()); + + // Verify doc indexed + ArgumentCaptor captor = ArgumentCaptor.forClass(IndexRequest.class); + verify(client, times(1)).index(captor.capture()); + IndexRequest capturedRequest = captor.getValue(); + assertEquals(request.getName(), capturedRequest.id()); + assertEquals(WriteRequest.RefreshPolicy.IMMEDIATE, capturedRequest.getRefreshPolicy()); + } + + @Test + public void testScheduleJobWithExistingJob() { + when(clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)) + .thenReturn(Boolean.TRUE); + + ScheduledAsyncQueryJobRequest request = + ScheduledAsyncQueryJobRequest.scheduledAsyncQueryJobRequestBuilder() + .jobId(TEST_JOB_ID) + .lastUpdateTime(Instant.now()) + .build(); + + when(client.index(any(IndexRequest.class))).thenThrow(VersionConflictEngineException.class); + + IllegalArgumentException exception = + assertThrows( + IllegalArgumentException.class, + () -> { + scheduler.scheduleJob(request, context); + }); + + verify(client, times(1)).index(ArgumentCaptor.forClass(IndexRequest.class).capture()); + assertEquals("A job already exists with name: testJob", exception.getMessage()); + } + + @Test + public void testScheduleJobWithExceptions() { + when(clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)) + .thenReturn(Boolean.FALSE); + when(client.admin().indices().create(any(CreateIndexRequest.class))) + .thenReturn(createIndexResponseActionFuture); + when(createIndexResponseActionFuture.actionGet()) + .thenReturn(new CreateIndexResponse(true, true, TEST_SCHEDULER_INDEX_NAME)); + when(client.index(any(IndexRequest.class))).thenThrow(new RuntimeException("Test exception")); + + ScheduledAsyncQueryJobRequest request = + ScheduledAsyncQueryJobRequest.scheduledAsyncQueryJobRequestBuilder() + .jobId(TEST_JOB_ID) + .lastUpdateTime(Instant.now()) + .build(); + + assertThrows(RuntimeException.class, () -> scheduler.scheduleJob(request, context)); + + when(client.index(any(IndexRequest.class))).thenReturn(indexResponseActionFuture); + when(indexResponseActionFuture.actionGet()).thenReturn(indexResponse); + when(indexResponse.getResult()).thenReturn(DocWriteResponse.Result.NOT_FOUND); + + RuntimeException exception = + assertThrows(RuntimeException.class, () -> scheduler.scheduleJob(request, context)); + assertEquals("Schedule job failed with result : not_found", exception.getMessage()); + } + + @Test + public void testUnscheduleJob() { + when(clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)).thenReturn(true); + + when(updateResponseActionFuture.actionGet()).thenReturn(updateResponse); + when(updateResponse.getResult()).thenReturn(DocWriteResponse.Result.UPDATED); + + when(client.update(any(UpdateRequest.class))).thenReturn(updateResponseActionFuture); + + scheduler.unscheduleJob(TEST_JOB_ID, context); + + ArgumentCaptor captor = ArgumentCaptor.forClass(UpdateRequest.class); + verify(client).update(captor.capture()); + + UpdateRequest capturedRequest = captor.getValue(); + assertEquals(TEST_JOB_ID, capturedRequest.id()); + assertEquals(WriteRequest.RefreshPolicy.IMMEDIATE, capturedRequest.getRefreshPolicy()); + + // Reset the captor for the next verification + captor = ArgumentCaptor.forClass(UpdateRequest.class); + + when(updateResponse.getResult()).thenReturn(DocWriteResponse.Result.NOOP); + scheduler.unscheduleJob(TEST_JOB_ID, context); + + verify(client, times(2)).update(captor.capture()); + capturedRequest = captor.getValue(); + assertEquals(TEST_JOB_ID, capturedRequest.id()); + assertEquals(WriteRequest.RefreshPolicy.IMMEDIATE, capturedRequest.getRefreshPolicy()); + } + + @Test + public void testUnscheduleJobInvalidJobId() { + when(clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)).thenReturn(true); + + IllegalArgumentException exception = + assertThrows(IllegalArgumentException.class, () -> scheduler.unscheduleJob("", context)); + assertEquals("JobId cannot be null or empty", exception.getMessage()); + } + + @Test + public void testUnscheduleJobWithIndexNotFound() { + when(clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)).thenReturn(false); + + scheduler.unscheduleJob(TEST_JOB_ID, context); + + // Verify that no update operation was performed + verify(client, never()).update(any(UpdateRequest.class)); + } + + @Test + public void testUpdateJob() { + ScheduledAsyncQueryJobRequest request = + ScheduledAsyncQueryJobRequest.scheduledAsyncQueryJobRequestBuilder() + .jobId(TEST_JOB_ID) + .lastUpdateTime(Instant.now()) + .build(); + + when(clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)).thenReturn(true); + + when(updateResponseActionFuture.actionGet()).thenReturn(updateResponse); + when(updateResponse.getResult()).thenReturn(DocWriteResponse.Result.UPDATED); + + when(client.update(any(UpdateRequest.class))).thenReturn(updateResponseActionFuture); + + scheduler.updateJob(request, context); + + ArgumentCaptor captor = ArgumentCaptor.forClass(UpdateRequest.class); + verify(client).update(captor.capture()); + + UpdateRequest capturedRequest = captor.getValue(); + assertEquals(request.getName(), capturedRequest.id()); + assertEquals(WriteRequest.RefreshPolicy.IMMEDIATE, capturedRequest.getRefreshPolicy()); + } + + @Test + public void testUpdateJobWithIndexNotFound() { + ScheduledAsyncQueryJobRequest request = + ScheduledAsyncQueryJobRequest.scheduledAsyncQueryJobRequestBuilder() + .jobId(TEST_JOB_ID) + .lastUpdateTime(Instant.now()) + .build(); + + when(clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)).thenReturn(false); + + assertThrows(IllegalStateException.class, () -> scheduler.updateJob(request, context)); + } + + @Test + public void testUpdateJobWithExceptions() { + ScheduledAsyncQueryJobRequest request = + ScheduledAsyncQueryJobRequest.scheduledAsyncQueryJobRequestBuilder() + .jobId(TEST_JOB_ID) + .lastUpdateTime(Instant.now()) + .build(); + + when(clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)).thenReturn(true); + when(client.update(any(UpdateRequest.class))) + .thenThrow(new DocumentMissingException(null, null)); + + IllegalArgumentException exception1 = + assertThrows( + IllegalArgumentException.class, + () -> { + scheduler.updateJob(request, context); + }); + + assertEquals("Job: testJob doesn't exist", exception1.getMessage()); + + when(client.update(any(UpdateRequest.class))).thenThrow(new RuntimeException("Test exception")); + + RuntimeException exception2 = + assertThrows( + RuntimeException.class, + () -> { + scheduler.updateJob(request, context); + }); + + assertEquals("java.lang.RuntimeException: Test exception", exception2.getMessage()); + + when(client.update(any(UpdateRequest.class))).thenReturn(updateResponseActionFuture); + when(updateResponseActionFuture.actionGet()).thenReturn(updateResponse); + when(updateResponse.getResult()).thenReturn(DocWriteResponse.Result.NOT_FOUND); + + RuntimeException exception = + assertThrows(RuntimeException.class, () -> scheduler.updateJob(request, context)); + assertEquals("Update job failed with result : not_found", exception.getMessage()); + } + + @Test + public void testRemoveJob() { + when(clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)).thenReturn(true); + + DeleteResponse deleteResponse = mock(DeleteResponse.class); + when(deleteResponseActionFuture.actionGet()).thenReturn(deleteResponse); + when(deleteResponse.getResult()).thenReturn(DocWriteResponse.Result.DELETED); + + when(client.delete(any(DeleteRequest.class))).thenReturn(deleteResponseActionFuture); + + scheduler.removeJob(TEST_JOB_ID, context); + + ArgumentCaptor captor = ArgumentCaptor.forClass(DeleteRequest.class); + verify(client).delete(captor.capture()); + + DeleteRequest capturedRequest = captor.getValue(); + assertEquals(TEST_JOB_ID, capturedRequest.id()); + assertEquals(WriteRequest.RefreshPolicy.IMMEDIATE, capturedRequest.getRefreshPolicy()); + } + + @Test + public void testRemoveJobWithIndexNotFound() { + when(clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)).thenReturn(false); + + AsyncQuerySchedulerRequest request = + AsyncQuerySchedulerRequest.builder().jobId(TEST_JOB_ID).build(); + assertThrows(IllegalStateException.class, () -> scheduler.removeJob(TEST_JOB_ID, context)); + } + + @Test + public void testRemoveJobInvalidJobId() { + when(clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)).thenReturn(true); + + IllegalArgumentException exception = + assertThrows(IllegalArgumentException.class, () -> scheduler.removeJob("", context)); + assertEquals("JobId cannot be null or empty", exception.getMessage()); + } + + @Test + public void testCreateAsyncQuerySchedulerIndex() { + when(clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)).thenReturn(false); + + CreateIndexResponse createIndexResponse = mock(CreateIndexResponse.class); + when(createIndexResponseActionFuture.actionGet()).thenReturn(createIndexResponse); + when(createIndexResponse.isAcknowledged()).thenReturn(true); + + when(client.admin().indices().create(any(CreateIndexRequest.class))) + .thenReturn(createIndexResponseActionFuture); + + scheduler.createAsyncQuerySchedulerIndex(); + + ArgumentCaptor captor = ArgumentCaptor.forClass(CreateIndexRequest.class); + verify(client.admin().indices()).create(captor.capture()); + + CreateIndexRequest capturedRequest = captor.getValue(); + assertEquals(SCHEDULER_INDEX_NAME, capturedRequest.index()); + } + + @Test + public void testCreateAsyncQuerySchedulerIndexFailure() { + when(clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)).thenReturn(false); + + when(client.admin().indices().create(any(CreateIndexRequest.class))) + .thenThrow(new RuntimeException("Error creating index")); + + RuntimeException exception = + assertThrows( + RuntimeException.class, + () -> { + scheduler.createAsyncQuerySchedulerIndex(); + }); + + assertEquals( + "Internal server error while creating .async-query-scheduler index: Error creating index", + exception.getMessage()); + + when(client.admin().indices().create(any(CreateIndexRequest.class))) + .thenReturn(createIndexResponseActionFuture); + Mockito.when(createIndexResponseActionFuture.actionGet()) + .thenReturn(new CreateIndexResponse(false, false, SCHEDULER_INDEX_NAME)); + + ScheduledAsyncQueryJobRequest request = + ScheduledAsyncQueryJobRequest.scheduledAsyncQueryJobRequestBuilder() + .jobId(TEST_JOB_ID) + .lastUpdateTime(Instant.now()) + .build(); + + RuntimeException runtimeException = + Assertions.assertThrows( + RuntimeException.class, () -> scheduler.scheduleJob(request, context)); + Assertions.assertEquals( + "Internal server error while creating .async-query-scheduler index: Index creation is not" + + " acknowledged.", + runtimeException.getMessage()); + } + + @Test + public void testUpdateJobNotFound() { + ScheduledAsyncQueryJobRequest request = + ScheduledAsyncQueryJobRequest.scheduledAsyncQueryJobRequestBuilder() + .jobId(TEST_JOB_ID) + .lastUpdateTime(Instant.now()) + .build(); + + when(clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)).thenReturn(true); + + when(client.update(any(UpdateRequest.class))) + .thenThrow(new DocumentMissingException(null, null)); + + IllegalArgumentException exception = + assertThrows( + IllegalArgumentException.class, + () -> { + scheduler.updateJob(request, context); + }); + + assertEquals("Job: testJob doesn't exist", exception.getMessage()); + } + + @Test + public void testRemoveJobNotFound() { + when(clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)).thenReturn(true); + + DeleteResponse deleteResponse = mock(DeleteResponse.class); + when(deleteResponseActionFuture.actionGet()).thenReturn(deleteResponse); + when(deleteResponse.getResult()).thenReturn(DocWriteResponse.Result.NOT_FOUND); + + when(client.delete(any(DeleteRequest.class))).thenReturn(deleteResponseActionFuture); + + IllegalArgumentException exception = + assertThrows( + IllegalArgumentException.class, + () -> { + scheduler.removeJob(TEST_JOB_ID, context); + }); + + assertEquals("Job : testJob doesn't exist", exception.getMessage()); + } + + @Test + public void testRemoveJobWithExceptions() { + when(clusterService.state().routingTable().hasIndex(SCHEDULER_INDEX_NAME)).thenReturn(true); + + when(client.delete(any(DeleteRequest.class))).thenThrow(new RuntimeException("Test exception")); + + assertThrows(RuntimeException.class, () -> scheduler.removeJob(TEST_JOB_ID, context)); + + DeleteResponse deleteResponse = mock(DeleteResponse.class); + when(client.delete(any(DeleteRequest.class))).thenReturn(deleteResponseActionFuture); + when(deleteResponseActionFuture.actionGet()).thenReturn(deleteResponse); + when(deleteResponse.getResult()).thenReturn(DocWriteResponse.Result.NOOP); + + RuntimeException runtimeException = + Assertions.assertThrows( + RuntimeException.class, () -> scheduler.removeJob(TEST_JOB_ID, context)); + Assertions.assertEquals("Remove job failed with result : noop", runtimeException.getMessage()); + } + + @Test + public void testGetJobRunner() { + ScheduledJobRunner jobRunner = OpenSearchAsyncQueryScheduler.getJobRunner(); + assertNotNull(jobRunner); + } +} diff --git a/async-query/src/test/java/org/opensearch/sql/spark/scheduler/job/ScheduledAsyncQueryJobRunnerTest.java b/async-query/src/test/java/org/opensearch/sql/spark/scheduler/job/ScheduledAsyncQueryJobRunnerTest.java new file mode 100644 index 0000000000..fdfb138ddb --- /dev/null +++ b/async-query/src/test/java/org/opensearch/sql/spark/scheduler/job/ScheduledAsyncQueryJobRunnerTest.java @@ -0,0 +1,210 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.scheduler.job; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.time.Instant; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.core.Appender; +import org.apache.logging.log4j.core.LogEvent; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Answers; +import org.mockito.ArgumentCaptor; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.opensearch.client.Client; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.jobscheduler.spi.JobExecutionContext; +import org.opensearch.jobscheduler.spi.ScheduledJobParameter; +import org.opensearch.sql.legacy.executor.AsyncRestExecutor; +import org.opensearch.sql.spark.asyncquery.AsyncQueryExecutorService; +import org.opensearch.sql.spark.asyncquery.model.NullAsyncQueryRequestContext; +import org.opensearch.sql.spark.rest.model.CreateAsyncQueryRequest; +import org.opensearch.sql.spark.rest.model.LangType; +import org.opensearch.sql.spark.scheduler.model.ScheduledAsyncQueryJobRequest; +import org.opensearch.threadpool.ThreadPool; + +public class ScheduledAsyncQueryJobRunnerTest { + + @Mock(answer = Answers.RETURNS_DEEP_STUBS) + private ClusterService clusterService; + + @Mock(answer = Answers.RETURNS_DEEP_STUBS) + private ThreadPool threadPool; + + @Mock(answer = Answers.RETURNS_DEEP_STUBS) + private Client client; + + @Mock(answer = Answers.RETURNS_DEEP_STUBS) + private AsyncQueryExecutorService asyncQueryExecutorService; + + @Mock private JobExecutionContext context; + + private ScheduledAsyncQueryJobRunner jobRunner; + + private ScheduledAsyncQueryJobRunner spyJobRunner; + + @BeforeEach + public void setup() { + MockitoAnnotations.openMocks(this); + jobRunner = ScheduledAsyncQueryJobRunner.getJobRunnerInstance(); + jobRunner.loadJobResource(null, null, null, null); + } + + @Test + public void testRunJobWithCorrectParameter() { + spyJobRunner = spy(jobRunner); + spyJobRunner.loadJobResource(client, clusterService, threadPool, asyncQueryExecutorService); + + ScheduledAsyncQueryJobRequest request = + ScheduledAsyncQueryJobRequest.scheduledAsyncQueryJobRequestBuilder() + .jobId("testJob") + .lastUpdateTime(Instant.now()) + .lockDurationSeconds(10L) + .scheduledQuery("REFRESH INDEX testIndex") + .dataSource("testDataSource") + .queryLang(LangType.SQL) + .build(); + + CreateAsyncQueryRequest createAsyncQueryRequest = + new CreateAsyncQueryRequest( + request.getScheduledQuery(), request.getDataSource(), request.getQueryLang()); + spyJobRunner.runJob(request, context); + + ArgumentCaptor captor = ArgumentCaptor.forClass(Runnable.class); + verify(threadPool.executor(AsyncRestExecutor.SQL_WORKER_THREAD_POOL_NAME)) + .submit(captor.capture()); + + Runnable runnable = captor.getValue(); + runnable.run(); + + verify(spyJobRunner).doRefresh(eq(request)); + verify(asyncQueryExecutorService) + .createAsyncQuery(eq(createAsyncQueryRequest), any(NullAsyncQueryRequestContext.class)); + } + + @Test + public void testRunJobWithIncorrectParameter() { + jobRunner = ScheduledAsyncQueryJobRunner.getJobRunnerInstance(); + jobRunner.loadJobResource(client, clusterService, threadPool, asyncQueryExecutorService); + + ScheduledJobParameter wrongParameter = mock(ScheduledJobParameter.class); + + IllegalStateException exception = + assertThrows( + IllegalStateException.class, + () -> jobRunner.runJob(wrongParameter, context), + "Expected IllegalStateException but no exception was thrown"); + + assertEquals( + "Job parameter is not instance of ScheduledAsyncQueryJobRequest, type: " + + wrongParameter.getClass().getCanonicalName(), + exception.getMessage()); + } + + @Test + public void testDoRefreshThrowsException() { + spyJobRunner = spy(jobRunner); + spyJobRunner.loadJobResource(client, clusterService, threadPool, asyncQueryExecutorService); + + ScheduledAsyncQueryJobRequest request = + ScheduledAsyncQueryJobRequest.scheduledAsyncQueryJobRequestBuilder() + .jobId("testJob") + .lastUpdateTime(Instant.now()) + .lockDurationSeconds(10L) + .scheduledQuery("REFRESH INDEX testIndex") + .dataSource("testDataSource") + .queryLang(LangType.SQL) + .build(); + + doThrow(new RuntimeException("Test exception")).when(spyJobRunner).doRefresh(request); + + Logger logger = LogManager.getLogger(ScheduledAsyncQueryJobRunner.class); + Appender mockAppender = mock(Appender.class); + when(mockAppender.getName()).thenReturn("MockAppender"); + when(mockAppender.isStarted()).thenReturn(true); + when(mockAppender.isStopped()).thenReturn(false); + ((org.apache.logging.log4j.core.Logger) logger) + .addAppender((org.apache.logging.log4j.core.Appender) mockAppender); + + spyJobRunner.runJob(request, context); + + ArgumentCaptor captor = ArgumentCaptor.forClass(Runnable.class); + verify(threadPool.executor(AsyncRestExecutor.SQL_WORKER_THREAD_POOL_NAME)) + .submit(captor.capture()); + + Runnable runnable = captor.getValue(); + runnable.run(); + + verify(spyJobRunner).doRefresh(eq(request)); + verify(mockAppender).append(any(LogEvent.class)); + } + + @Test + public void testRunJobWithUninitializedServices() { + ScheduledAsyncQueryJobRequest jobParameter = + ScheduledAsyncQueryJobRequest.scheduledAsyncQueryJobRequestBuilder() + .jobId("testJob") + .lastUpdateTime(Instant.now()) + .build(); + + IllegalStateException exception = + assertThrows( + IllegalStateException.class, + () -> jobRunner.runJob(jobParameter, context), + "Expected IllegalStateException but no exception was thrown"); + assertEquals("ClusterService is not initialized.", exception.getMessage()); + + jobRunner.loadJobResource(null, clusterService, null, null); + + exception = + assertThrows( + IllegalStateException.class, + () -> jobRunner.runJob(jobParameter, context), + "Expected IllegalStateException but no exception was thrown"); + assertEquals("ThreadPool is not initialized.", exception.getMessage()); + + jobRunner.loadJobResource(null, clusterService, threadPool, null); + + exception = + assertThrows( + IllegalStateException.class, + () -> jobRunner.runJob(jobParameter, context), + "Expected IllegalStateException but no exception was thrown"); + assertEquals("Client is not initialized.", exception.getMessage()); + + jobRunner.loadJobResource(client, clusterService, threadPool, null); + + exception = + assertThrows( + IllegalStateException.class, + () -> jobRunner.runJob(jobParameter, context), + "Expected IllegalStateException but no exception was thrown"); + assertEquals("AsyncQueryExecutorService is not initialized.", exception.getMessage()); + } + + @Test + public void testGetJobRunnerInstanceMultipleCalls() { + ScheduledAsyncQueryJobRunner instance1 = ScheduledAsyncQueryJobRunner.getJobRunnerInstance(); + ScheduledAsyncQueryJobRunner instance2 = ScheduledAsyncQueryJobRunner.getJobRunnerInstance(); + ScheduledAsyncQueryJobRunner instance3 = ScheduledAsyncQueryJobRunner.getJobRunnerInstance(); + + assertSame(instance1, instance2); + assertSame(instance2, instance3); + } +} diff --git a/async-query/src/test/java/org/opensearch/sql/spark/scheduler/model/ScheduledAsyncQueryJobRequestTest.java b/async-query/src/test/java/org/opensearch/sql/spark/scheduler/model/ScheduledAsyncQueryJobRequestTest.java new file mode 100644 index 0000000000..edf8379195 --- /dev/null +++ b/async-query/src/test/java/org/opensearch/sql/spark/scheduler/model/ScheduledAsyncQueryJobRequestTest.java @@ -0,0 +1,210 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.scheduler.model; + +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertThrows; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.opensearch.core.xcontent.ToXContent.EMPTY_PARAMS; + +import java.io.IOException; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import org.junit.jupiter.api.Test; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.jobscheduler.spi.schedule.IntervalSchedule; +import org.opensearch.sql.spark.rest.model.LangType; + +public class ScheduledAsyncQueryJobRequestTest { + + @Test + public void testBuilderAndGetterMethods() { + Instant now = Instant.now(); + IntervalSchedule schedule = new IntervalSchedule(now, 1, ChronoUnit.MINUTES); + + ScheduledAsyncQueryJobRequest jobRequest = + ScheduledAsyncQueryJobRequest.scheduledAsyncQueryJobRequestBuilder() + .accountId("testAccount") + .jobId("testJob") + .dataSource("testDataSource") + .scheduledQuery("SELECT * FROM test") + .queryLang(LangType.SQL) + .schedule(schedule) + .enabled(true) + .lastUpdateTime(now) + .enabledTime(now) + .lockDurationSeconds(60L) + .jitter(0.1) + .build(); + + assertEquals("testAccount", jobRequest.getAccountId()); + assertEquals("testJob", jobRequest.getJobId()); + assertEquals("testJob", jobRequest.getName()); + assertEquals("testDataSource", jobRequest.getDataSource()); + assertEquals("SELECT * FROM test", jobRequest.getScheduledQuery()); + assertEquals(LangType.SQL, jobRequest.getQueryLang()); + assertEquals(schedule, jobRequest.getSchedule()); + assertTrue(jobRequest.isEnabled()); + assertEquals(now, jobRequest.getLastUpdateTime()); + assertEquals(now, jobRequest.getEnabledTime()); + assertEquals(60L, jobRequest.getLockDurationSeconds()); + assertEquals(0.1, jobRequest.getJitter()); + } + + @Test + public void testToXContent() throws IOException { + Instant now = Instant.now(); + IntervalSchedule schedule = new IntervalSchedule(now, 1, ChronoUnit.MINUTES); + + ScheduledAsyncQueryJobRequest request = + ScheduledAsyncQueryJobRequest.scheduledAsyncQueryJobRequestBuilder() + .accountId("testAccount") + .jobId("testJob") + .dataSource("testDataSource") + .scheduledQuery("SELECT * FROM test") + .queryLang(LangType.SQL) + .schedule(schedule) + .enabled(true) + .enabledTime(now) + .lastUpdateTime(now) + .lockDurationSeconds(60L) + .jitter(0.1) + .build(); + + XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); + request.toXContent(builder, EMPTY_PARAMS); + String jsonString = builder.toString(); + + assertTrue(jsonString.contains("\"accountId\" : \"testAccount\"")); + assertTrue(jsonString.contains("\"jobId\" : \"testJob\"")); + assertTrue(jsonString.contains("\"dataSource\" : \"testDataSource\"")); + assertTrue(jsonString.contains("\"scheduledQuery\" : \"SELECT * FROM test\"")); + assertTrue(jsonString.contains("\"queryLang\" : \"SQL\"")); + assertTrue(jsonString.contains("\"start_time\" : " + now.toEpochMilli())); + assertTrue(jsonString.contains("\"period\" : 1")); + assertTrue(jsonString.contains("\"unit\" : \"Minutes\"")); + assertTrue(jsonString.contains("\"enabled\" : true")); + assertTrue(jsonString.contains("\"lastUpdateTime\" : " + now.toEpochMilli())); + assertTrue(jsonString.contains("\"enabledTime\" : " + now.toEpochMilli())); + assertTrue(jsonString.contains("\"lockDurationSeconds\" : 60")); + assertTrue(jsonString.contains("\"jitter\" : 0.1")); + } + + @Test + public void testFromAsyncQuerySchedulerRequest() { + Instant now = Instant.now(); + AsyncQuerySchedulerRequest request = new AsyncQuerySchedulerRequest(); + request.setJobId("testJob"); + request.setAccountId("testAccount"); + request.setDataSource("testDataSource"); + request.setScheduledQuery("SELECT * FROM test"); + request.setQueryLang(LangType.SQL); + request.setSchedule("1 minutes"); + request.setEnabled(true); + request.setLastUpdateTime(now); + request.setLockDurationSeconds(60L); + request.setJitter(0.1); + + ScheduledAsyncQueryJobRequest jobRequest = + ScheduledAsyncQueryJobRequest.fromAsyncQuerySchedulerRequest(request); + + assertEquals("testJob", jobRequest.getJobId()); + assertEquals("testAccount", jobRequest.getAccountId()); + assertEquals("testDataSource", jobRequest.getDataSource()); + assertEquals("SELECT * FROM test", jobRequest.getScheduledQuery()); + assertEquals(LangType.SQL, jobRequest.getQueryLang()); + assertEquals(new IntervalSchedule(now, 1, ChronoUnit.MINUTES), jobRequest.getSchedule()); + assertTrue(jobRequest.isEnabled()); + assertEquals(60L, jobRequest.getLockDurationSeconds()); + assertEquals(0.1, jobRequest.getJitter()); + } + + @Test + public void testFromAsyncQuerySchedulerRequestWithInvalidSchedule() { + AsyncQuerySchedulerRequest request = new AsyncQuerySchedulerRequest(); + request.setJobId("testJob"); + request.setSchedule(new Object()); // Set schedule to a non-String object + + IllegalArgumentException exception = + assertThrows( + IllegalArgumentException.class, + () -> { + ScheduledAsyncQueryJobRequest.fromAsyncQuerySchedulerRequest(request); + }); + + assertEquals("Schedule must be a String object for parsing.", exception.getMessage()); + } + + @Test + public void testEqualsAndHashCode() { + Instant now = Instant.now(); + IntervalSchedule schedule = new IntervalSchedule(now, 1, ChronoUnit.MINUTES); + + ScheduledAsyncQueryJobRequest request1 = + ScheduledAsyncQueryJobRequest.scheduledAsyncQueryJobRequestBuilder() + .accountId("testAccount") + .jobId("testJob") + .dataSource("testDataSource") + .scheduledQuery("SELECT * FROM test") + .queryLang(LangType.SQL) + .schedule(schedule) + .enabled(true) + .enabledTime(now) + .lastUpdateTime(now) + .lockDurationSeconds(60L) + .jitter(0.1) + .build(); + + // Test toString + String toString = request1.toString(); + assertTrue(toString.contains("accountId=testAccount")); + assertTrue(toString.contains("jobId=testJob")); + assertTrue(toString.contains("dataSource=testDataSource")); + assertTrue(toString.contains("scheduledQuery=SELECT * FROM test")); + assertTrue(toString.contains("queryLang=SQL")); + assertTrue(toString.contains("enabled=true")); + assertTrue(toString.contains("lockDurationSeconds=60")); + assertTrue(toString.contains("jitter=0.1")); + + ScheduledAsyncQueryJobRequest request2 = + ScheduledAsyncQueryJobRequest.scheduledAsyncQueryJobRequestBuilder() + .accountId("testAccount") + .jobId("testJob") + .dataSource("testDataSource") + .scheduledQuery("SELECT * FROM test") + .queryLang(LangType.SQL) + .schedule(schedule) + .enabled(true) + .enabledTime(now) + .lastUpdateTime(now) + .lockDurationSeconds(60L) + .jitter(0.1) + .build(); + + assertEquals(request1, request2); + assertEquals(request1.hashCode(), request2.hashCode()); + + ScheduledAsyncQueryJobRequest request3 = + ScheduledAsyncQueryJobRequest.scheduledAsyncQueryJobRequestBuilder() + .accountId("differentAccount") + .jobId("testJob") + .dataSource("testDataSource") + .scheduledQuery("SELECT * FROM test") + .queryLang(LangType.SQL) + .schedule(schedule) + .enabled(true) + .enabledTime(now) + .lastUpdateTime(now) + .lockDurationSeconds(60L) + .jitter(0.1) + .build(); + + assertNotEquals(request1, request3); + assertNotEquals(request1.hashCode(), request3.hashCode()); + } +} diff --git a/async-query/src/test/java/org/opensearch/sql/spark/scheduler/parser/IntervalScheduleParserTest.java b/async-query/src/test/java/org/opensearch/sql/spark/scheduler/parser/IntervalScheduleParserTest.java new file mode 100644 index 0000000000..f211548c7c --- /dev/null +++ b/async-query/src/test/java/org/opensearch/sql/spark/scheduler/parser/IntervalScheduleParserTest.java @@ -0,0 +1,130 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.scheduler.parser; + +import static org.junit.Assert.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.opensearch.jobscheduler.spi.schedule.IntervalSchedule; +import org.opensearch.jobscheduler.spi.schedule.Schedule; + +public class IntervalScheduleParserTest { + + private Instant startTime; + + @BeforeEach + public void setup() { + startTime = Instant.now(); + } + + @Test + public void testConstructor() { + // Test that the constructor of IntervalScheduleParser can be invoked + IntervalScheduleParser parser = new IntervalScheduleParser(); + assertNotNull(parser); + } + + @Test + public void testParseValidScheduleString() { + verifyParseSchedule(5, "5 minutes"); + } + + @Test + public void testParseValidScheduleStringWithDifferentUnits() { + verifyParseSchedule(120, "2 hours"); + verifyParseSchedule(1440, "1 day"); + verifyParseSchedule(30240, "3 weeks"); + } + + @Test + public void testParseNullSchedule() { + Schedule schedule = IntervalScheduleParser.parse(null, startTime); + assertNull(schedule); + } + + @Test + public void testParseScheduleObject() { + IntervalSchedule expectedSchedule = new IntervalSchedule(startTime, 10, ChronoUnit.MINUTES); + Schedule schedule = IntervalScheduleParser.parse(expectedSchedule, startTime); + assertEquals(expectedSchedule, schedule); + } + + @Test + public void testParseInvalidScheduleString() { + String scheduleStr = "invalid schedule"; + IllegalArgumentException exception = + assertThrows( + IllegalArgumentException.class, + () -> IntervalScheduleParser.parse(scheduleStr, startTime), + "Expected IllegalArgumentException but no exception was thrown"); + + assertEquals("Invalid interval format: " + scheduleStr.toLowerCase(), exception.getMessage()); + } + + @Test + public void testParseUnsupportedUnits() { + assertThrows( + IllegalArgumentException.class, + () -> IntervalScheduleParser.parse("1 year", startTime), + "Expected IllegalArgumentException but no exception was thrown"); + + assertThrows( + IllegalArgumentException.class, + () -> IntervalScheduleParser.parse("1 month", startTime), + "Expected IllegalArgumentException but no exception was thrown"); + } + + @Test + public void testParseNonStringSchedule() { + Object nonStringSchedule = 12345; + IllegalArgumentException exception = + assertThrows( + IllegalArgumentException.class, + () -> IntervalScheduleParser.parse(nonStringSchedule, startTime), + "Expected IllegalArgumentException but no exception was thrown"); + + assertEquals("Schedule must be a String object for parsing.", exception.getMessage()); + } + + @Test + public void testParseScheduleWithNanoseconds() { + verifyParseSchedule(1, "60000000000 nanoseconds"); + } + + @Test + public void testParseScheduleWithMilliseconds() { + verifyParseSchedule(1, "60000 milliseconds"); + } + + @Test + public void testParseScheduleWithMicroseconds() { + verifyParseSchedule(1, "60000000 microseconds"); + } + + @Test + public void testUnsupportedTimeUnit() { + assertThrows( + IllegalArgumentException.class, + () -> IntervalScheduleParser.convertToSupportedUnit(10, "unsupportedunit"), + "Expected IllegalArgumentException but no exception was thrown"); + } + + @Test + public void testParseScheduleWithSeconds() { + verifyParseSchedule(2, "120 seconds"); + } + + private void verifyParseSchedule(int expectedMinutes, String scheduleStr) { + Schedule schedule = IntervalScheduleParser.parse(scheduleStr, startTime); + assertEquals(new IntervalSchedule(startTime, expectedMinutes, ChronoUnit.MINUTES), schedule); + } +} diff --git a/spark/src/test/java/org/opensearch/sql/spark/transport/TransportCancelAsyncQueryRequestActionTest.java b/async-query/src/test/java/org/opensearch/sql/spark/transport/TransportCancelAsyncQueryRequestActionTest.java similarity index 85% rename from spark/src/test/java/org/opensearch/sql/spark/transport/TransportCancelAsyncQueryRequestActionTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/transport/TransportCancelAsyncQueryRequestActionTest.java index 2ff76b9b57..a2581fdea2 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/transport/TransportCancelAsyncQueryRequestActionTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/transport/TransportCancelAsyncQueryRequestActionTest.java @@ -7,6 +7,8 @@ package org.opensearch.sql.spark.transport; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.when; import static org.opensearch.sql.spark.constants.TestConstants.EMR_JOB_ID; @@ -24,6 +26,7 @@ import org.opensearch.action.support.ActionFilters; import org.opensearch.core.action.ActionListener; import org.opensearch.sql.spark.asyncquery.AsyncQueryExecutorServiceImpl; +import org.opensearch.sql.spark.asyncquery.model.NullAsyncQueryRequestContext; import org.opensearch.sql.spark.transport.model.CancelAsyncQueryActionRequest; import org.opensearch.sql.spark.transport.model.CancelAsyncQueryActionResponse; import org.opensearch.tasks.Task; @@ -36,7 +39,6 @@ public class TransportCancelAsyncQueryRequestActionTest { @Mock private TransportCancelAsyncQueryRequestAction action; @Mock private Task task; @Mock private ActionListener actionListener; - @Mock private AsyncQueryExecutorServiceImpl asyncQueryExecutorService; @Captor @@ -54,8 +56,12 @@ public void setUp() { @Test public void testDoExecute() { CancelAsyncQueryActionRequest request = new CancelAsyncQueryActionRequest(EMR_JOB_ID); - when(asyncQueryExecutorService.cancelQuery(EMR_JOB_ID)).thenReturn(EMR_JOB_ID); + when(asyncQueryExecutorService.cancelQuery( + eq(EMR_JOB_ID), any(NullAsyncQueryRequestContext.class))) + .thenReturn(EMR_JOB_ID); + action.doExecute(task, request, actionListener); + Mockito.verify(actionListener).onResponse(deleteJobActionResponseArgumentCaptor.capture()); CancelAsyncQueryActionResponse cancelAsyncQueryActionResponse = deleteJobActionResponseArgumentCaptor.getValue(); @@ -66,8 +72,12 @@ public void testDoExecute() { @Test public void testDoExecuteWithException() { CancelAsyncQueryActionRequest request = new CancelAsyncQueryActionRequest(EMR_JOB_ID); - doThrow(new RuntimeException("Error")).when(asyncQueryExecutorService).cancelQuery(EMR_JOB_ID); + doThrow(new RuntimeException("Error")) + .when(asyncQueryExecutorService) + .cancelQuery(eq(EMR_JOB_ID), any(NullAsyncQueryRequestContext.class)); + action.doExecute(task, request, actionListener); + Mockito.verify(actionListener).onFailure(exceptionArgumentCaptor.capture()); Exception exception = exceptionArgumentCaptor.getValue(); Assertions.assertTrue(exception instanceof RuntimeException); diff --git a/spark/src/test/java/org/opensearch/sql/spark/transport/TransportCreateAsyncQueryRequestActionTest.java b/async-query/src/test/java/org/opensearch/sql/spark/transport/TransportCreateAsyncQueryRequestActionTest.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/transport/TransportCreateAsyncQueryRequestActionTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/transport/TransportCreateAsyncQueryRequestActionTest.java diff --git a/spark/src/test/java/org/opensearch/sql/spark/transport/TransportGetAsyncQueryResultActionTest.java b/async-query/src/test/java/org/opensearch/sql/spark/transport/TransportGetAsyncQueryResultActionTest.java similarity index 90% rename from spark/src/test/java/org/opensearch/sql/spark/transport/TransportGetAsyncQueryResultActionTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/transport/TransportGetAsyncQueryResultActionTest.java index 34f10b0083..475eceb37e 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/transport/TransportGetAsyncQueryResultActionTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/transport/TransportGetAsyncQueryResultActionTest.java @@ -7,6 +7,8 @@ package org.opensearch.sql.spark.transport; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -33,6 +35,7 @@ import org.opensearch.sql.spark.asyncquery.AsyncQueryExecutorServiceImpl; import org.opensearch.sql.spark.asyncquery.exceptions.AsyncQueryNotFoundException; import org.opensearch.sql.spark.asyncquery.model.AsyncQueryExecutionResponse; +import org.opensearch.sql.spark.asyncquery.model.NullAsyncQueryRequestContext; import org.opensearch.sql.spark.transport.model.GetAsyncQueryResultActionRequest; import org.opensearch.sql.spark.transport.model.GetAsyncQueryResultActionResponse; import org.opensearch.tasks.Task; @@ -64,8 +67,11 @@ public void testDoExecute() { GetAsyncQueryResultActionRequest request = new GetAsyncQueryResultActionRequest("jobId"); AsyncQueryExecutionResponse asyncQueryExecutionResponse = new AsyncQueryExecutionResponse("IN_PROGRESS", null, null, null, null); - when(jobExecutorService.getAsyncQueryResults("jobId")).thenReturn(asyncQueryExecutionResponse); + when(jobExecutorService.getAsyncQueryResults(eq("jobId"), any())) + .thenReturn(asyncQueryExecutionResponse); + action.doExecute(task, request, actionListener); + verify(actionListener).onResponse(createJobActionResponseArgumentCaptor.capture()); GetAsyncQueryResultActionResponse getAsyncQueryResultActionResponse = createJobActionResponseArgumentCaptor.getValue(); @@ -91,8 +97,11 @@ public void testDoExecuteWithSuccessResponse() { tupleValue(ImmutableMap.of("name", "Smith", "age", 30))), null, null); - when(jobExecutorService.getAsyncQueryResults("jobId")).thenReturn(asyncQueryExecutionResponse); + when(jobExecutorService.getAsyncQueryResults(eq("jobId"), any())) + .thenReturn(asyncQueryExecutionResponse); + action.doExecute(task, request, actionListener); + verify(actionListener).onResponse(createJobActionResponseArgumentCaptor.capture()); GetAsyncQueryResultActionResponse getAsyncQueryResultActionResponse = createJobActionResponseArgumentCaptor.getValue(); @@ -130,9 +139,12 @@ public void testDoExecuteWithException() { GetAsyncQueryResultActionRequest request = new GetAsyncQueryResultActionRequest("123"); doThrow(new AsyncQueryNotFoundException("JobId 123 not found")) .when(jobExecutorService) - .getAsyncQueryResults("123"); + .getAsyncQueryResults(eq("123"), any()); + action.doExecute(task, request, actionListener); - verify(jobExecutorService, times(1)).getAsyncQueryResults("123"); + + verify(jobExecutorService, times(1)) + .getAsyncQueryResults(eq("123"), any(NullAsyncQueryRequestContext.class)); verify(actionListener).onFailure(exceptionArgumentCaptor.capture()); Exception exception = exceptionArgumentCaptor.getValue(); Assertions.assertTrue(exception instanceof RuntimeException); diff --git a/spark/src/test/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModuleTest.java b/async-query/src/test/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModuleTest.java similarity index 100% rename from spark/src/test/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModuleTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModuleTest.java diff --git a/spark/src/test/java/org/opensearch/sql/spark/transport/format/AsyncQueryResultResponseFormatterTest.java b/async-query/src/test/java/org/opensearch/sql/spark/transport/format/AsyncQueryResultResponseFormatterTest.java similarity index 93% rename from spark/src/test/java/org/opensearch/sql/spark/transport/format/AsyncQueryResultResponseFormatterTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/transport/format/AsyncQueryResultResponseFormatterTest.java index 711db75efb..bb7d5f7893 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/transport/format/AsyncQueryResultResponseFormatterTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/transport/format/AsyncQueryResultResponseFormatterTest.java @@ -1,3 +1,8 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + package org.opensearch.sql.spark.transport.format; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -11,7 +16,7 @@ import java.util.Arrays; import org.junit.jupiter.api.Test; import org.opensearch.sql.executor.ExecutionEngine; -import org.opensearch.sql.spark.asyncquery.model.AsyncQueryResult; +import org.opensearch.sql.spark.transport.model.AsyncQueryResult; public class AsyncQueryResultResponseFormatterTest { diff --git a/spark/src/test/java/org/opensearch/sql/spark/rest/model/CreateAsyncQueryRequestTest.java b/async-query/src/test/java/org/opensearch/sql/spark/transport/format/CreateAsyncQueryRequestConverterTest.java similarity index 83% rename from spark/src/test/java/org/opensearch/sql/spark/rest/model/CreateAsyncQueryRequestTest.java rename to async-query/src/test/java/org/opensearch/sql/spark/transport/format/CreateAsyncQueryRequestConverterTest.java index de38ca0e3c..d7f8046a1b 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/rest/model/CreateAsyncQueryRequestTest.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/transport/format/CreateAsyncQueryRequestConverterTest.java @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.sql.spark.rest.model; +package org.opensearch.sql.spark.transport.format; import java.io.IOException; import org.junit.jupiter.api.Assertions; @@ -12,8 +12,10 @@ import org.opensearch.common.xcontent.XContentType; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.sql.spark.rest.model.CreateAsyncQueryRequest; +import org.opensearch.sql.spark.rest.model.LangType; -public class CreateAsyncQueryRequestTest { +public class CreateAsyncQueryRequestConverterTest { @Test public void fromXContent() throws IOException { @@ -24,7 +26,7 @@ public void fromXContent() throws IOException { + " \"query\": \"select 1\"\n" + "}"; CreateAsyncQueryRequest queryRequest = - CreateAsyncQueryRequest.fromXContentParser(xContentParser(request)); + CreateAsyncQueryRequestConverter.fromXContentParser(xContentParser(request)); Assertions.assertEquals("my_glue", queryRequest.getDatasource()); Assertions.assertEquals(LangType.SQL, queryRequest.getLang()); Assertions.assertEquals("select 1", queryRequest.getQuery()); @@ -48,7 +50,7 @@ public void fromXContentWithDuplicateFields() throws IOException { IllegalArgumentException illegalArgumentException = Assertions.assertThrows( IllegalArgumentException.class, - () -> CreateAsyncQueryRequest.fromXContentParser(xContentParser(request))); + () -> CreateAsyncQueryRequestConverter.fromXContentParser(xContentParser(request))); Assertions.assertTrue( illegalArgumentException .getMessage() @@ -67,7 +69,7 @@ public void fromXContentWithUnknownField() throws IOException { IllegalArgumentException illegalArgumentException = Assertions.assertThrows( IllegalArgumentException.class, - () -> CreateAsyncQueryRequest.fromXContentParser(xContentParser(request))); + () -> CreateAsyncQueryRequestConverter.fromXContentParser(xContentParser(request))); Assertions.assertEquals( "Error while parsing the request body: Unknown field: random", illegalArgumentException.getMessage()); @@ -81,7 +83,7 @@ public void fromXContentWithWrongDatatype() throws IOException { IllegalArgumentException illegalArgumentException = Assertions.assertThrows( IllegalArgumentException.class, - () -> CreateAsyncQueryRequest.fromXContentParser(xContentParser(request))); + () -> CreateAsyncQueryRequestConverter.fromXContentParser(xContentParser(request))); Assertions.assertEquals( "Error while parsing the request body: Can't get text on a START_ARRAY at 1:16", illegalArgumentException.getMessage()); @@ -97,7 +99,7 @@ public void fromXContentWithSessionId() throws IOException { + " \"sessionId\": \"00fdjevgkf12s00q\"\n" + "}"; CreateAsyncQueryRequest queryRequest = - CreateAsyncQueryRequest.fromXContentParser(xContentParser(request)); + CreateAsyncQueryRequestConverter.fromXContentParser(xContentParser(request)); Assertions.assertEquals("00fdjevgkf12s00q", queryRequest.getSessionId()); } diff --git a/async-query/src/test/java/org/opensearch/sql/spark/utils/TestUtils.java b/async-query/src/test/java/org/opensearch/sql/spark/utils/TestUtils.java new file mode 100644 index 0000000000..24c10ebea9 --- /dev/null +++ b/async-query/src/test/java/org/opensearch/sql/spark/utils/TestUtils.java @@ -0,0 +1,29 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.utils; + +import com.google.common.base.Charsets; +import com.google.common.io.Resources; +import java.net.URL; +import lombok.SneakyThrows; +import org.opensearch.action.admin.indices.create.CreateIndexRequest; +import org.opensearch.client.Client; +import org.opensearch.common.xcontent.XContentType; + +public class TestUtils { + @SneakyThrows + public static String loadMappings(String path) { + URL url = Resources.getResource(path); + return Resources.toString(url, Charsets.UTF_8); + } + + public static void createIndexWithMappings( + Client client, String indexName, String metadataFileLocation) { + CreateIndexRequest request = new CreateIndexRequest(indexName); + request.mapping(loadMappings(metadataFileLocation), XContentType.JSON); + client.admin().indices().create(request).actionGet(); + } +} diff --git a/spark/src/test/resources/flint-index-mappings/0.1.1/flint_covering_index.json b/async-query/src/test/resources/flint-index-mappings/0.1.1/flint_covering_index.json similarity index 100% rename from spark/src/test/resources/flint-index-mappings/0.1.1/flint_covering_index.json rename to async-query/src/test/resources/flint-index-mappings/0.1.1/flint_covering_index.json diff --git a/spark/src/test/resources/flint-index-mappings/0.1.1/flint_mv.json b/async-query/src/test/resources/flint-index-mappings/0.1.1/flint_mv.json similarity index 100% rename from spark/src/test/resources/flint-index-mappings/0.1.1/flint_mv.json rename to async-query/src/test/resources/flint-index-mappings/0.1.1/flint_mv.json diff --git a/spark/src/test/resources/flint-index-mappings/0.1.1/flint_skipping_index.json b/async-query/src/test/resources/flint-index-mappings/0.1.1/flint_skipping_index.json similarity index 100% rename from spark/src/test/resources/flint-index-mappings/0.1.1/flint_skipping_index.json rename to async-query/src/test/resources/flint-index-mappings/0.1.1/flint_skipping_index.json diff --git a/spark/src/test/resources/flint-index-mappings/0.1.1/flint_special_character_index.json b/async-query/src/test/resources/flint-index-mappings/0.1.1/flint_special_character_index.json similarity index 100% rename from spark/src/test/resources/flint-index-mappings/0.1.1/flint_special_character_index.json rename to async-query/src/test/resources/flint-index-mappings/0.1.1/flint_special_character_index.json diff --git a/spark/src/test/resources/flint-index-mappings/flint_covering_index.json b/async-query/src/test/resources/flint-index-mappings/flint_covering_index.json similarity index 100% rename from spark/src/test/resources/flint-index-mappings/flint_covering_index.json rename to async-query/src/test/resources/flint-index-mappings/flint_covering_index.json diff --git a/spark/src/test/resources/flint-index-mappings/flint_mv.json b/async-query/src/test/resources/flint-index-mappings/flint_mv.json similarity index 100% rename from spark/src/test/resources/flint-index-mappings/flint_mv.json rename to async-query/src/test/resources/flint-index-mappings/flint_mv.json diff --git a/spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_corrupted_index_mapping.json b/async-query/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_corrupted_index_mapping.json similarity index 100% rename from spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_corrupted_index_mapping.json rename to async-query/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_corrupted_index_mapping.json diff --git a/spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_error_index_mapping.json b/async-query/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_error_index_mapping.json similarity index 100% rename from spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_error_index_mapping.json rename to async-query/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_error_index_mapping.json diff --git a/spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_index_mapping.json b/async-query/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_index_mapping.json similarity index 100% rename from spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_index_mapping.json rename to async-query/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_covering_index_mapping.json diff --git a/spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_skipping_index_mapping.json b/async-query/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_skipping_index_mapping.json similarity index 100% rename from spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_skipping_index_mapping.json rename to async-query/src/test/resources/flint-index-mappings/flint_my_glue_mydb_http_logs_skipping_index_mapping.json diff --git a/spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_mv_mapping.json b/async-query/src/test/resources/flint-index-mappings/flint_my_glue_mydb_mv_mapping.json similarity index 100% rename from spark/src/test/resources/flint-index-mappings/flint_my_glue_mydb_mv_mapping.json rename to async-query/src/test/resources/flint-index-mappings/flint_my_glue_mydb_mv_mapping.json diff --git a/spark/src/test/resources/flint-index-mappings/flint_mys3_default_http_logs_cv1_index.json b/async-query/src/test/resources/flint-index-mappings/flint_mys3_default_http_logs_cv1_index.json similarity index 100% rename from spark/src/test/resources/flint-index-mappings/flint_mys3_default_http_logs_cv1_index.json rename to async-query/src/test/resources/flint-index-mappings/flint_mys3_default_http_logs_cv1_index.json diff --git a/spark/src/test/resources/flint-index-mappings/flint_mys3_default_http_logs_skipping_index.json b/async-query/src/test/resources/flint-index-mappings/flint_mys3_default_http_logs_skipping_index.json similarity index 100% rename from spark/src/test/resources/flint-index-mappings/flint_mys3_default_http_logs_skipping_index.json rename to async-query/src/test/resources/flint-index-mappings/flint_mys3_default_http_logs_skipping_index.json diff --git a/spark/src/test/resources/flint-index-mappings/flint_skipping_index.json b/async-query/src/test/resources/flint-index-mappings/flint_skipping_index.json similarity index 100% rename from spark/src/test/resources/flint-index-mappings/flint_skipping_index.json rename to async-query/src/test/resources/flint-index-mappings/flint_skipping_index.json diff --git a/spark/src/test/resources/flint-index-mappings/flint_special_character_index.json b/async-query/src/test/resources/flint-index-mappings/flint_special_character_index.json similarity index 100% rename from spark/src/test/resources/flint-index-mappings/flint_special_character_index.json rename to async-query/src/test/resources/flint-index-mappings/flint_special_character_index.json diff --git a/spark/src/test/resources/flint-index-mappings/npe_mapping.json b/async-query/src/test/resources/flint-index-mappings/npe_mapping.json similarity index 100% rename from spark/src/test/resources/flint-index-mappings/npe_mapping.json rename to async-query/src/test/resources/flint-index-mappings/npe_mapping.json diff --git a/async-query/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker b/async-query/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker new file mode 100644 index 0000000000..ca6ee9cea8 --- /dev/null +++ b/async-query/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker @@ -0,0 +1 @@ +mock-maker-inline \ No newline at end of file diff --git a/spark/src/test/resources/query_execution_result_mapping.json b/async-query/src/test/resources/query_execution_result_mapping.json similarity index 100% rename from spark/src/test/resources/query_execution_result_mapping.json rename to async-query/src/test/resources/query_execution_result_mapping.json diff --git a/build.gradle b/build.gradle index 3e180d4042..702d6f478a 100644 --- a/build.gradle +++ b/build.gradle @@ -50,6 +50,7 @@ buildscript { return "https://github.com/prometheus/prometheus/releases/download/v${prometheus_binary_version}/prometheus-${prometheus_binary_version}."+ getOSFamilyType() + "-" + getArchType() + ".tar.gz" } aws_java_sdk_version = "1.12.651" + guava_version = "32.1.3-jre" } repositories { @@ -112,7 +113,7 @@ allprojects { } plugins.withId('java') { - sourceCompatibility = targetCompatibility = "11" + sourceCompatibility = targetCompatibility = JavaVersion.VERSION_21 } configurations.all { resolutionStrategy.force "org.jetbrains.kotlin:kotlin-stdlib:1.9.10" @@ -192,7 +193,7 @@ configurations.all { exclude group: "commons-logging", module: "commons-logging" // enforce 1.1.3, https://www.whitesourcesoftware.com/vulnerability-database/WS-2019-0379 resolutionStrategy.force 'commons-codec:commons-codec:1.13' - resolutionStrategy.force 'com.google.guava:guava:32.0.1-jre' + resolutionStrategy.force "com.google.guava:guava:${guava_version}" } // updateVersion: Task to auto increment to the next development iteration diff --git a/common/build.gradle b/common/build.gradle index b4ee98a5b7..15c48dd6b3 100644 --- a/common/build.gradle +++ b/common/build.gradle @@ -34,7 +34,7 @@ repositories { dependencies { api "org.antlr:antlr4-runtime:4.7.1" - api group: 'com.google.guava', name: 'guava', version: '32.0.1-jre' + api group: 'com.google.guava', name: 'guava', version: "${guava_version}" api group: 'org.apache.logging.log4j', name: 'log4j-core', version:"${versions.log4j}" api group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0' api group: 'org.apache.commons', name: 'commons-text', version: '1.10.0' @@ -46,7 +46,7 @@ dependencies { testImplementation group: 'junit', name: 'junit', version: '4.13.2' testImplementation group: 'org.assertj', name: 'assertj-core', version: '3.9.1' - testImplementation group: 'com.google.guava', name: 'guava', version: '32.0.1-jre' + testImplementation group: 'com.google.guava', name: 'guava', version: "${guava_version}" testImplementation group: 'org.hamcrest', name: 'hamcrest-library', version: '2.1' testImplementation('org.junit.jupiter:junit-jupiter:5.9.3') testImplementation group: 'org.mockito', name: 'mockito-core', version: '5.7.0' diff --git a/common/src/main/java/org/opensearch/sql/common/grok/GrokCompiler.java b/common/src/main/java/org/opensearch/sql/common/grok/GrokCompiler.java index aba96ad4cb..05fdbd57ed 100644 --- a/common/src/main/java/org/opensearch/sql/common/grok/GrokCompiler.java +++ b/common/src/main/java/org/opensearch/sql/common/grok/GrokCompiler.java @@ -29,7 +29,7 @@ public class GrokCompiler implements Serializable { // We don't want \n and commented line - private static final Pattern patternLinePattern = Pattern.compile("^([A-z0-9_]+)\\s+(.*)$"); + private static final Pattern patternLinePattern = Pattern.compile("^([a-zA-Z0-9_]+)\\s+(.*)$"); /** {@code Grok} patterns definitions. */ private final Map grokPatternDefinitions = new HashMap<>(); diff --git a/common/src/main/java/org/opensearch/sql/common/grok/GrokUtils.java b/common/src/main/java/org/opensearch/sql/common/grok/GrokUtils.java index 4b145bbbe8..2a309bba8f 100644 --- a/common/src/main/java/org/opensearch/sql/common/grok/GrokUtils.java +++ b/common/src/main/java/org/opensearch/sql/common/grok/GrokUtils.java @@ -24,8 +24,8 @@ public class GrokUtils { Pattern.compile( "%\\{" + "(?" - + "(?[A-z0-9]+)" - + "(?::(?[A-z0-9_:;,\\-\\/\\s\\.']+))?" + + "(?[a-zA-Z0-9_]+)" + + "(?::(?[a-zA-Z0-9_:;,\\-\\/\\s\\.']+))?" + ")" + "(?:=(?" + "(?:" diff --git a/common/src/main/java/org/opensearch/sql/common/interceptors/AwsSigningInterceptor.java b/common/src/main/java/org/opensearch/sql/common/interceptors/AwsSigningInterceptor.java index 16196544b5..1ab1bb8976 100644 --- a/common/src/main/java/org/opensearch/sql/common/interceptors/AwsSigningInterceptor.java +++ b/common/src/main/java/org/opensearch/sql/common/interceptors/AwsSigningInterceptor.java @@ -24,9 +24,9 @@ public class AwsSigningInterceptor implements Interceptor { - private OkHttpAwsV4Signer okHttpAwsV4Signer; + private final OkHttpAwsV4Signer okHttpAwsV4Signer; - private AWSCredentialsProvider awsCredentialsProvider; + private final AWSCredentialsProvider awsCredentialsProvider; private static final Logger LOG = LogManager.getLogger(); diff --git a/common/src/main/java/org/opensearch/sql/common/interceptors/BasicAuthenticationInterceptor.java b/common/src/main/java/org/opensearch/sql/common/interceptors/BasicAuthenticationInterceptor.java index 15e9a0fc12..0ade25520f 100644 --- a/common/src/main/java/org/opensearch/sql/common/interceptors/BasicAuthenticationInterceptor.java +++ b/common/src/main/java/org/opensearch/sql/common/interceptors/BasicAuthenticationInterceptor.java @@ -16,7 +16,7 @@ public class BasicAuthenticationInterceptor implements Interceptor { - private String credentials; + private final String credentials; public BasicAuthenticationInterceptor(@NonNull String username, @NonNull String password) { this.credentials = Credentials.basic(username, password); diff --git a/common/src/main/java/org/opensearch/sql/common/setting/Settings.java b/common/src/main/java/org/opensearch/sql/common/setting/Settings.java index e2b7ab2904..a9fa693a22 100644 --- a/common/src/main/java/org/opensearch/sql/common/setting/Settings.java +++ b/common/src/main/java/org/opensearch/sql/common/setting/Settings.java @@ -23,16 +23,21 @@ public enum Key { SQL_SLOWLOG("plugins.sql.slowlog"), SQL_CURSOR_KEEP_ALIVE("plugins.sql.cursor.keep_alive"), SQL_DELETE_ENABLED("plugins.sql.delete.enabled"), + SQL_PAGINATION_API_SEARCH_AFTER("plugins.sql.pagination.api"), /** PPL Settings. */ PPL_ENABLED("plugins.ppl.enabled"), + /** Query Settings. */ + FIELD_TYPE_TOLERANCE("plugins.query.field_type_tolerance"), + /** Common Settings for SQL and PPL. */ QUERY_MEMORY_LIMIT("plugins.query.memory_limit"), QUERY_SIZE_LIMIT("plugins.query.size_limit"), ENCYRPTION_MASTER_KEY("plugins.query.datasources.encryption.masterkey"), DATASOURCES_URI_HOSTS_DENY_LIST("plugins.query.datasources.uri.hosts.denylist"), DATASOURCES_LIMIT("plugins.query.datasources.limit"), + DATASOURCES_ENABLED("plugins.query.datasources.enabled"), METRICS_ROLLING_WINDOW("plugins.query.metrics.rolling_window"), METRICS_ROLLING_INTERVAL("plugins.query.metrics.rolling_interval"), @@ -49,6 +54,10 @@ public enum Key { /** Async query Settings * */ ASYNC_QUERY_ENABLED("plugins.query.executionengine.async_query.enabled"), + ASYNC_QUERY_EXTERNAL_SCHEDULER_ENABLED( + "plugins.query.executionengine.async_query.external_scheduler.enabled"), + ASYNC_QUERY_EXTERNAL_SCHEDULER_INTERVAL( + "plugins.query.executionengine.async_query.external_scheduler.interval"), STREAMING_JOB_HOUSEKEEPER_INTERVAL( "plugins.query.executionengine.spark.streamingjobs.housekeeper.interval"); diff --git a/common/src/test/java/org/opensearch/sql/common/grok/BasicTest.java b/common/src/test/java/org/opensearch/sql/common/grok/BasicTest.java index c724b58f3e..748495bff6 100644 --- a/common/src/test/java/org/opensearch/sql/common/grok/BasicTest.java +++ b/common/src/test/java/org/opensearch/sql/common/grok/BasicTest.java @@ -33,7 +33,7 @@ @FixMethodOrder(MethodSorters.NAME_ASCENDING) public class BasicTest { - @Rule public TemporaryFolder tempFolder = new TemporaryFolder(); + @Rule public final TemporaryFolder tempFolder = new TemporaryFolder(); private GrokCompiler compiler; diff --git a/core/build.gradle b/core/build.gradle index 655e7d92c2..c596251342 100644 --- a/core/build.gradle +++ b/core/build.gradle @@ -46,7 +46,7 @@ pitest { } dependencies { - api group: 'com.google.guava', name: 'guava', version: '32.0.1-jre' + api group: 'com.google.guava', name: 'guava', version: "${guava_version}" api group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0' api group: 'org.apache.commons', name: 'commons-text', version: '1.10.0' api group: 'com.facebook.presto', name: 'presto-matching', version: '0.240' @@ -57,6 +57,7 @@ dependencies { api group: 'com.google.code.gson', name: 'gson', version: '2.8.9' api group: 'com.tdunning', name: 't-digest', version: '3.3' api project(':common') + implementation "com.github.seancfoley:ipaddress:5.4.2" testImplementation('org.junit.jupiter:junit-jupiter:5.9.3') testImplementation group: 'org.hamcrest', name: 'hamcrest-library', version: '2.1' diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index d5e8b93b13..d0051568c4 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -10,7 +10,10 @@ import static org.opensearch.sql.ast.tree.Sort.NullOrder.NULL_LAST; import static org.opensearch.sql.ast.tree.Sort.SortOrder.ASC; import static org.opensearch.sql.ast.tree.Sort.SortOrder.DESC; +import static org.opensearch.sql.data.type.ExprCoreType.DATE; import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; +import static org.opensearch.sql.data.type.ExprCoreType.TIME; +import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; import static org.opensearch.sql.utils.MLCommonsConstants.RCF_ANOMALOUS; import static org.opensearch.sql.utils.MLCommonsConstants.RCF_ANOMALY_GRADE; import static org.opensearch.sql.utils.MLCommonsConstants.RCF_SCORE; @@ -22,6 +25,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -46,6 +50,7 @@ import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; import org.opensearch.sql.ast.tree.FetchCursor; +import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Kmeans; @@ -61,6 +66,7 @@ import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.ast.tree.Sort.SortOption; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.common.antlr.SyntaxCheckException; @@ -99,6 +105,7 @@ import org.opensearch.sql.planner.logical.LogicalRemove; import org.opensearch.sql.planner.logical.LogicalRename; import org.opensearch.sql.planner.logical.LogicalSort; +import org.opensearch.sql.planner.logical.LogicalTrendline; import org.opensearch.sql.planner.logical.LogicalValues; import org.opensearch.sql.planner.physical.datasource.DataSourceTable; import org.opensearch.sql.storage.Table; @@ -468,23 +475,7 @@ public LogicalPlan visitParse(Parse node, AnalysisContext context) { @Override public LogicalPlan visitSort(Sort node, AnalysisContext context) { LogicalPlan child = node.getChild().get(0).accept(this, context); - ExpressionReferenceOptimizer optimizer = - new ExpressionReferenceOptimizer(expressionAnalyzer.getRepository(), child); - - List> sortList = - node.getSortList().stream() - .map( - sortField -> { - var analyzed = expressionAnalyzer.analyze(sortField.getField(), context); - if (analyzed == null) { - throw new UnsupportedOperationException( - String.format("Invalid use of expression %s", sortField.getField())); - } - Expression expression = optimizer.optimize(analyzed, context); - return ImmutablePair.of(analyzeSortOption(sortField.getFieldArgs()), expression); - }) - .collect(Collectors.toList()); - return new LogicalSort(child, sortList); + return buildSort(child, context, node.getSortList()); } /** Build {@link LogicalDedupe}. */ @@ -558,6 +549,29 @@ public LogicalPlan visitAD(AD node, AnalysisContext context) { return new LogicalAD(child, options); } + /** Build {@link LogicalEval} for fillnull command. */ + @Override + public LogicalPlan visitFillNull(final FillNull node, final AnalysisContext context) { + LogicalPlan child = node.getChild().get(0).accept(this, context); + + ImmutableList.Builder> expressionsBuilder = + new Builder<>(); + for (FillNull.NullableFieldFill fieldFill : node.getNullableFieldFills()) { + Expression fieldExpr = + expressionAnalyzer.analyze(fieldFill.getNullableFieldReference(), context); + ReferenceExpression ref = + DSL.ref(fieldFill.getNullableFieldReference().getField().toString(), fieldExpr.type()); + FunctionExpression ifNullFunction = + DSL.ifnull(ref, expressionAnalyzer.analyze(fieldFill.getReplaceNullWithMe(), context)); + expressionsBuilder.add(new ImmutablePair<>(ref, ifNullFunction)); + TypeEnvironment typeEnvironment = context.peek(); + // define the new reference in type env. + typeEnvironment.define(ref); + } + + return new LogicalEval(child, expressionsBuilder.build()); + } + /** Build {@link LogicalML} for ml command. */ @Override public LogicalPlan visitML(ML node, AnalysisContext context) { @@ -570,6 +584,55 @@ public LogicalPlan visitML(ML node, AnalysisContext context) { return new LogicalML(child, node.getArguments()); } + /** Build {@link LogicalTrendline} for Trendline command. */ + @Override + public LogicalPlan visitTrendline(Trendline node, AnalysisContext context) { + final LogicalPlan child = node.getChild().get(0).accept(this, context); + + final TypeEnvironment currEnv = context.peek(); + final List computations = node.getComputations(); + final ImmutableList.Builder> + computationsAndTypes = ImmutableList.builder(); + computations.forEach( + computation -> { + final Expression resolvedField = + expressionAnalyzer.analyze(computation.getDataField(), context); + final ExprCoreType averageType; + // Duplicate the semantics of AvgAggregator#create(): + // - All numerical types have the DOUBLE type for the moving average. + // - All datetime types have the same datetime type for the moving average. + if (ExprCoreType.numberTypes().contains(resolvedField.type())) { + averageType = ExprCoreType.DOUBLE; + } else { + switch (resolvedField.type()) { + case DATE: + case TIME: + case TIMESTAMP: + averageType = (ExprCoreType) resolvedField.type(); + break; + default: + throw new SemanticCheckException( + String.format( + "Invalid field used for trendline computation %s. Source field %s had type" + + " %s but must be a numerical or datetime field.", + computation.getAlias(), + computation.getDataField().getChild().get(0), + resolvedField.type().typeName())); + } + } + currEnv.define(new Symbol(Namespace.FIELD_NAME, computation.getAlias()), averageType); + computationsAndTypes.add(Pair.of(computation, averageType)); + }); + + if (node.getSortByField().isEmpty()) { + return new LogicalTrendline(child, computationsAndTypes.build()); + } + + return new LogicalTrendline( + buildSort(child, context, Collections.singletonList(node.getSortByField().get())), + computationsAndTypes.build()); + } + @Override public LogicalPlan visitPaginate(Paginate paginate, AnalysisContext context) { LogicalPlan child = paginate.getChild().get(0).accept(this, context); @@ -588,6 +651,27 @@ public LogicalPlan visitCloseCursor(CloseCursor closeCursor, AnalysisContext con return new LogicalCloseCursor(closeCursor.getChild().get(0).accept(this, context)); } + private LogicalSort buildSort( + LogicalPlan child, AnalysisContext context, List sortFields) { + ExpressionReferenceOptimizer optimizer = + new ExpressionReferenceOptimizer(expressionAnalyzer.getRepository(), child); + + List> sortList = + sortFields.stream() + .map( + sortField -> { + var analyzed = expressionAnalyzer.analyze(sortField.getField(), context); + if (analyzed == null) { + throw new UnsupportedOperationException( + String.format("Invalid use of expression %s", sortField.getField())); + } + Expression expression = optimizer.optimize(analyzed, context); + return ImmutablePair.of(analyzeSortOption(sortField.getFieldArgs()), expression); + }) + .collect(Collectors.toList()); + return new LogicalSort(child, sortList); + } + /** * The first argument is always "asc", others are optional. Given nullFirst argument, use its * value. Otherwise just use DEFAULT_ASC/DESC. diff --git a/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java b/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java index 01145dc7df..31719d2fe3 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java +++ b/core/src/main/java/org/opensearch/sql/analysis/DataSourceSchemaIdentifierNameResolver.java @@ -18,8 +18,8 @@ public class DataSourceSchemaIdentifierNameResolver { private String dataSourceName = DEFAULT_DATASOURCE_NAME; private String schemaName = DEFAULT_SCHEMA_NAME; - private String identifierName; - private DataSourceService dataSourceService; + private final String identifierName; + private final DataSourceService dataSourceService; private static final String DOT = "."; diff --git a/core/src/main/java/org/opensearch/sql/analysis/symbol/SymbolTable.java b/core/src/main/java/org/opensearch/sql/analysis/symbol/SymbolTable.java index 8bb6824a63..64a4fc4e09 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/symbol/SymbolTable.java +++ b/core/src/main/java/org/opensearch/sql/analysis/symbol/SymbolTable.java @@ -20,14 +20,14 @@ public class SymbolTable { /** Two-dimension hash table to manage symbols with type in different namespace. */ - private Map> tableByNamespace = + private final Map> tableByNamespace = new EnumMap<>(Namespace.class); /** * Two-dimension hash table to manage symbols with type in different namespace. Comparing with * tableByNamespace, orderedTable use the LinkedHashMap to keep the order of symbol. */ - private Map> orderedTable = + private final Map> orderedTable = new EnumMap<>(Namespace.class); /** diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 973b10310b..f27260dd5f 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -45,6 +45,7 @@ import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; import org.opensearch.sql.ast.tree.FetchCursor; +import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Kmeans; @@ -59,6 +60,7 @@ import org.opensearch.sql.ast.tree.Rename; import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.Values; /** AST nodes visitor Defines the traverse path. */ @@ -109,6 +111,14 @@ public T visitFilter(Filter node, C context) { return visitChildren(node, context); } + public T visitTrendline(Trendline node, C context) { + return visitChildren(node, context); + } + + public T visitTrendlineComputation(Trendline.TrendlineComputation node, C context) { + return visitChildren(node, context); + } + public T visitProject(Project node, C context) { return visitChildren(node, context); } @@ -312,4 +322,8 @@ public T visitFetchCursor(FetchCursor cursor, C context) { public T visitCloseCursor(CloseCursor closeCursor, C context) { return visitChildren(closeCursor, context); } + + public T visitFillNull(FillNull fillNull, C context) { + return visitChildren(fillNull, context); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 4f3056b0f7..d9956609ec 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -5,10 +5,13 @@ package org.opensearch.sql.ast.dsl; +import com.google.common.collect.ImmutableList; import java.util.Arrays; import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; import lombok.experimental.UtilityClass; +import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.ast.expression.AggregateFunction; import org.opensearch.sql.ast.expression.Alias; @@ -46,6 +49,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; +import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Limit; @@ -59,6 +63,7 @@ import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.ast.tree.Sort.SortOption; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; @@ -463,6 +468,18 @@ public static Limit limit(UnresolvedPlan input, Integer limit, Integer offset) { return new Limit(limit, offset).attach(input); } + public static Trendline trendline( + UnresolvedPlan input, + Optional sortField, + Trendline.TrendlineComputation... computations) { + return new Trendline(sortField, Arrays.asList(computations)).attach(input); + } + + public static Trendline.TrendlineComputation computation( + Integer numDataPoints, Field dataField, String alias, Trendline.TrendlineType type) { + return new Trendline.TrendlineComputation(numDataPoints, dataField, alias, type); + } + public static Parse parse( UnresolvedPlan input, ParseMethod parseMethod, @@ -471,4 +488,22 @@ public static Parse parse( java.util.Map arguments) { return new Parse(parseMethod, sourceField, pattern, arguments, input); } + + public static FillNull fillNull(UnresolvedExpression replaceNullWithMe, Field... fields) { + return new FillNull( + FillNull.ContainNullableFieldFill.ofSameValue( + replaceNullWithMe, ImmutableList.copyOf(fields))); + } + + public static FillNull fillNull( + List> fieldAndReplacements) { + ImmutableList.Builder replacementsBuilder = ImmutableList.builder(); + for (ImmutablePair fieldAndReplacement : fieldAndReplacements) { + replacementsBuilder.add( + new FillNull.NullableFieldFill( + fieldAndReplacement.getLeft(), fieldAndReplacement.getRight())); + } + return new FillNull( + FillNull.ContainNullableFieldFill.ofVariousValue(replacementsBuilder.build())); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java b/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java index 2019346fb5..541dbedead 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/Cast.java @@ -12,6 +12,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_DOUBLE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_FLOAT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_INT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_IP; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_LONG; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_SHORT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_STRING; @@ -54,6 +55,7 @@ public class Cast extends UnresolvedExpression { .put("time", CAST_TO_TIME.getName()) .put("timestamp", CAST_TO_TIMESTAMP.getName()) .put("datetime", CAST_TO_DATETIME.getName()) + .put("ip", CAST_TO_IP.getName()) .build(); /** The source expression cast from. */ diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/FillNull.java b/core/src/main/java/org/opensearch/sql/ast/tree/FillNull.java new file mode 100644 index 0000000000..e1e56229b4 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/FillNull.java @@ -0,0 +1,89 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import java.util.List; +import java.util.Objects; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NonNull; +import lombok.RequiredArgsConstructor; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.Node; +import org.opensearch.sql.ast.expression.Field; +import org.opensearch.sql.ast.expression.UnresolvedExpression; + +/** AST node represent FillNull operation. */ +@RequiredArgsConstructor +@AllArgsConstructor +public class FillNull extends UnresolvedPlan { + + @Getter + @RequiredArgsConstructor + public static class NullableFieldFill { + @NonNull private final Field nullableFieldReference; + @NonNull private final UnresolvedExpression replaceNullWithMe; + } + + public interface ContainNullableFieldFill { + List getNullFieldFill(); + + static ContainNullableFieldFill ofVariousValue(List replacements) { + return new VariousValueNullFill(replacements); + } + + static ContainNullableFieldFill ofSameValue( + UnresolvedExpression replaceNullWithMe, List nullableFieldReferences) { + return new SameValueNullFill(replaceNullWithMe, nullableFieldReferences); + } + } + + private static class SameValueNullFill implements ContainNullableFieldFill { + @Getter(onMethod_ = @Override) + private final List nullFieldFill; + + public SameValueNullFill( + UnresolvedExpression replaceNullWithMe, List nullableFieldReferences) { + Objects.requireNonNull(replaceNullWithMe, "Null replacement is required"); + this.nullFieldFill = + Objects.requireNonNull(nullableFieldReferences, "Nullable field reference is required") + .stream() + .map(nullableReference -> new NullableFieldFill(nullableReference, replaceNullWithMe)) + .toList(); + } + } + + @RequiredArgsConstructor + private static class VariousValueNullFill implements ContainNullableFieldFill { + @NonNull + @Getter(onMethod_ = @Override) + private final List nullFieldFill; + } + + private UnresolvedPlan child; + + @NonNull private final ContainNullableFieldFill containNullableFieldFill; + + public List getNullableFieldFills() { + return containNullableFieldFill.getNullFieldFill(); + } + + @Override + public UnresolvedPlan attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return child == null ? List.of() : List.of(child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitFillNull(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Filter.java b/core/src/main/java/org/opensearch/sql/ast/tree/Filter.java index 6c57275db9..da98fef7be 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Filter.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Filter.java @@ -18,7 +18,7 @@ @EqualsAndHashCode(callSuper = false) @Getter public class Filter extends UnresolvedPlan { - private UnresolvedExpression condition; + private final UnresolvedExpression condition; private UnresolvedPlan child; public Filter(UnresolvedExpression condition) { diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Project.java b/core/src/main/java/org/opensearch/sql/ast/tree/Project.java index cffb4dfdce..bbe4d0e2ce 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Project.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Project.java @@ -22,7 +22,7 @@ @EqualsAndHashCode(callSuper = false) public class Project extends UnresolvedPlan { @Setter private List projectList; - private List argExprList; + private final List argExprList; private UnresolvedPlan child; public Project(List projectList) { diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Trendline.java b/core/src/main/java/org/opensearch/sql/ast/tree/Trendline.java new file mode 100644 index 0000000000..aa4fcc200d --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Trendline.java @@ -0,0 +1,71 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import java.util.Optional; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.Node; +import org.opensearch.sql.ast.expression.Field; +import org.opensearch.sql.ast.expression.UnresolvedExpression; + +@ToString +@Getter +@RequiredArgsConstructor +@EqualsAndHashCode(callSuper = false) +public class Trendline extends UnresolvedPlan { + + private UnresolvedPlan child; + private final Optional sortByField; + private final List computations; + + @Override + public Trendline attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return ImmutableList.of(child); + } + + @Override + public T accept(AbstractNodeVisitor visitor, C context) { + return visitor.visitTrendline(this, context); + } + + @Getter + public static class TrendlineComputation extends UnresolvedExpression { + + private final Integer numberOfDataPoints; + private final Field dataField; + private final String alias; + private final TrendlineType computationType; + + public TrendlineComputation( + Integer numberOfDataPoints, Field dataField, String alias, TrendlineType computationType) { + this.numberOfDataPoints = numberOfDataPoints; + this.dataField = dataField; + this.alias = alias; + this.computationType = computationType; + } + + @Override + public R accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitTrendlineComputation(this, context); + } + } + + public enum TrendlineType { + SMA + } +} diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprIpValue.java b/core/src/main/java/org/opensearch/sql/data/model/ExprIpValue.java new file mode 100644 index 0000000000..8bdbec4bb5 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprIpValue.java @@ -0,0 +1,50 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.data.model; + +import inet.ipaddr.IPAddress; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.utils.IPUtils; + +/** Expression IP Address Value. */ +public class ExprIpValue extends AbstractExprValue { + private final IPAddress value; + + public ExprIpValue(String addressString) { + value = IPUtils.toAddress(addressString); + } + + @Override + public String value() { + return value.toCanonicalString(); + } + + @Override + public ExprType type() { + return ExprCoreType.IP; + } + + @Override + public int compare(ExprValue other) { + return IPUtils.compare(value, ((ExprIpValue) other).value); + } + + @Override + public boolean equal(ExprValue other) { + return compare(other) == 0; + } + + @Override + public String toString() { + return String.format("IP %s", value()); + } + + @Override + public IPAddress ipValue() { + return value; + } +} diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprValue.java b/core/src/main/java/org/opensearch/sql/data/model/ExprValue.java index 034ed22a75..da9c329f93 100644 --- a/core/src/main/java/org/opensearch/sql/data/model/ExprValue.java +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprValue.java @@ -5,6 +5,7 @@ package org.opensearch.sql.data.model; +import inet.ipaddr.IPAddress; import java.io.Serializable; import java.time.Instant; import java.time.LocalDate; @@ -102,6 +103,11 @@ default Double doubleValue() { "invalid to get doubleValue from value of type " + type()); } + /** Get IP address value. */ + default IPAddress ipValue() { + throw new ExpressionEvaluationException("invalid to get ipValue from value of type " + type()); + } + /** Get string value. */ default String stringValue() { throw new ExpressionEvaluationException( diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java index 20813045f2..890e0ef8d5 100644 --- a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java @@ -5,6 +5,7 @@ package org.opensearch.sql.data.model; +import inet.ipaddr.IPAddress; import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; @@ -75,6 +76,10 @@ public static ExprValue timestampValue(Instant value) { return new ExprTimestampValue(value); } + public static ExprValue ipValue(String value) { + return new ExprIpValue(value); + } + /** {@link ExprTupleValue} constructor. */ public static ExprValue tupleValue(Map map) { LinkedHashMap valueMap = new LinkedHashMap<>(); @@ -188,6 +193,10 @@ public static Map getTupleValue(ExprValue exprValue) { return exprValue.tupleValue(); } + public static IPAddress getIpValue(ExprValue exprValue) { + return exprValue.ipValue(); + } + public static Boolean getBooleanValue(ExprValue exprValue) { return exprValue.booleanValue(); } diff --git a/core/src/main/java/org/opensearch/sql/data/type/ExprCoreType.java b/core/src/main/java/org/opensearch/sql/data/type/ExprCoreType.java index cbc0c98255..6df2ba6390 100644 --- a/core/src/main/java/org/opensearch/sql/data/type/ExprCoreType.java +++ b/core/src/main/java/org/opensearch/sql/data/type/ExprCoreType.java @@ -45,6 +45,9 @@ public enum ExprCoreType implements ExprType { TIMESTAMP(STRING, DATE, TIME), INTERVAL(UNDEFINED), + /** IP Address. */ + IP(STRING), + /** Struct. */ STRUCT(UNDEFINED), diff --git a/core/src/main/java/org/opensearch/sql/datasource/DataSourceService.java b/core/src/main/java/org/opensearch/sql/datasource/DataSourceService.java index 6af5d19e5c..a8caa4719a 100644 --- a/core/src/main/java/org/opensearch/sql/datasource/DataSourceService.java +++ b/core/src/main/java/org/opensearch/sql/datasource/DataSourceService.java @@ -82,6 +82,9 @@ public interface DataSourceService { * Specifically for addressing use cases in SparkQueryDispatcher. * * @param dataSourceName of the {@link DataSource} + * @param context request context used by the implementation. It is passed by async-query-core. + * refer {@link RequestContext} */ - DataSourceMetadata verifyDataSourceAccessAndGetRawMetadata(String dataSourceName); + DataSourceMetadata verifyDataSourceAccessAndGetRawMetadata( + String dataSourceName, RequestContext context); } diff --git a/core/src/main/java/org/opensearch/sql/datasource/RequestContext.java b/core/src/main/java/org/opensearch/sql/datasource/RequestContext.java new file mode 100644 index 0000000000..199930d340 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/datasource/RequestContext.java @@ -0,0 +1,15 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.datasource; + +/** + * Context interface to provide additional request related information. It is introduced to allow + * async-query-core library user to pass request context information to implementations of data + * accessors. + */ +public interface RequestContext { + Object getAttribute(String name); +} diff --git a/core/src/main/java/org/opensearch/sql/datasource/model/DataSourceMetadata.java b/core/src/main/java/org/opensearch/sql/datasource/model/DataSourceMetadata.java index e3dd0e8ff7..2282bc694a 100644 --- a/core/src/main/java/org/opensearch/sql/datasource/model/DataSourceMetadata.java +++ b/core/src/main/java/org/opensearch/sql/datasource/model/DataSourceMetadata.java @@ -30,36 +30,36 @@ public class DataSourceMetadata { public static final String DEFAULT_RESULT_INDEX = "query_execution_result"; public static final int MAX_RESULT_INDEX_NAME_SIZE = 255; - private static String DATASOURCE_NAME_REGEX = "[@*A-Za-z]+?[*a-zA-Z_\\-0-9]*"; + private static final String DATASOURCE_NAME_REGEX = "[@*A-Za-z]+?[*a-zA-Z_\\-0-9]*"; // OS doesn’t allow uppercase: https://tinyurl.com/yse2xdbx public static final String RESULT_INDEX_NAME_PATTERN = "[a-z0-9_-]+"; - public static String INVALID_RESULT_INDEX_NAME_SIZE = + public static final String INVALID_RESULT_INDEX_NAME_SIZE = "Result index name size must contains less than " + MAX_RESULT_INDEX_NAME_SIZE + " characters."; - public static String INVALID_CHAR_IN_RESULT_INDEX_NAME = + public static final String INVALID_CHAR_IN_RESULT_INDEX_NAME = "Result index name has invalid character. Valid characters are a-z, 0-9, -(hyphen) and" + " _(underscore)."; - public static String INVALID_RESULT_INDEX_PREFIX = + public static final String INVALID_RESULT_INDEX_PREFIX = "Result index must start with " + DEFAULT_RESULT_INDEX; - @JsonProperty private String name; + @JsonProperty private final String name; - @JsonProperty private String description; + @JsonProperty private final String description; @JsonProperty @JsonFormat(with = JsonFormat.Feature.ACCEPT_CASE_INSENSITIVE_PROPERTIES) - private DataSourceType connector; + private final DataSourceType connector; - @JsonProperty private List allowedRoles; + @JsonProperty private final List allowedRoles; - @JsonProperty private Map properties; + @JsonProperty private final Map properties; - @JsonProperty private String resultIndex; + @JsonProperty private final String resultIndex; - @JsonProperty private DataSourceStatus status; + @JsonProperty private final DataSourceStatus status; - public static Function DATASOURCE_TO_RESULT_INDEX = + public static final Function DATASOURCE_TO_RESULT_INDEX = datasourceName -> String.format("%s_%s", DEFAULT_RESULT_INDEX, datasourceName); private DataSourceMetadata(Builder builder) { @@ -128,10 +128,14 @@ public Builder setDataSourceStatus(DataSourceStatus status) { return this; } - public DataSourceMetadata build() { + public DataSourceMetadata validateAndBuild() { validateMissingAttributes(); validateName(); validateCustomResultIndex(); + return build(); + } + + public DataSourceMetadata build() { fillNullAttributes(); return new DataSourceMetadata(this); } @@ -239,6 +243,6 @@ public static DataSourceMetadata defaultOpenSearchDataSourceMetadata() { .setConnector(DataSourceType.OPENSEARCH) .setAllowedRoles(Collections.emptyList()) .setProperties(ImmutableMap.of()) - .build(); + .validateAndBuild(); } } diff --git a/core/src/main/java/org/opensearch/sql/datasource/model/DataSourceStatus.java b/core/src/main/java/org/opensearch/sql/datasource/model/DataSourceStatus.java index bca47217c1..a4282eb419 100644 --- a/core/src/main/java/org/opensearch/sql/datasource/model/DataSourceStatus.java +++ b/core/src/main/java/org/opensearch/sql/datasource/model/DataSourceStatus.java @@ -10,7 +10,7 @@ public enum DataSourceStatus { ACTIVE("active"), DISABLED("disabled"); - private String text; + private final String text; DataSourceStatus(String text) { this.text = text; diff --git a/core/src/main/java/org/opensearch/sql/datasource/model/DataSourceType.java b/core/src/main/java/org/opensearch/sql/datasource/model/DataSourceType.java index c727c3c531..442497094b 100644 --- a/core/src/main/java/org/opensearch/sql/datasource/model/DataSourceType.java +++ b/core/src/main/java/org/opensearch/sql/datasource/model/DataSourceType.java @@ -7,20 +7,23 @@ import java.util.HashMap; import java.util.Map; +import lombok.EqualsAndHashCode; import lombok.RequiredArgsConstructor; @RequiredArgsConstructor +@EqualsAndHashCode public class DataSourceType { - public static DataSourceType PROMETHEUS = new DataSourceType("PROMETHEUS"); - public static DataSourceType OPENSEARCH = new DataSourceType("OPENSEARCH"); - public static DataSourceType SPARK = new DataSourceType("SPARK"); - public static DataSourceType S3GLUE = new DataSourceType("S3GLUE"); + public static final DataSourceType PROMETHEUS = new DataSourceType("PROMETHEUS"); + public static final DataSourceType OPENSEARCH = new DataSourceType("OPENSEARCH"); + public static final DataSourceType SPARK = new DataSourceType("SPARK"); + public static final DataSourceType S3GLUE = new DataSourceType("S3GLUE"); + public static final DataSourceType SECURITY_LAKE = new DataSourceType("SECURITY_LAKE"); // Map from uppercase DataSourceType name to DataSourceType object - private static Map knownValues = new HashMap<>(); + private static final Map knownValues = new HashMap<>(); static { - register(PROMETHEUS, OPENSEARCH, SPARK, S3GLUE); + register(PROMETHEUS, OPENSEARCH, SPARK, S3GLUE, SECURITY_LAKE); } private final String name; diff --git a/core/src/main/java/org/opensearch/sql/exception/QueryEngineException.java b/core/src/main/java/org/opensearch/sql/exception/QueryEngineException.java index b3d13bef71..122d4963fa 100644 --- a/core/src/main/java/org/opensearch/sql/exception/QueryEngineException.java +++ b/core/src/main/java/org/opensearch/sql/exception/QueryEngineException.java @@ -11,4 +11,8 @@ public class QueryEngineException extends RuntimeException { public QueryEngineException(String message) { super(message); } + + public QueryEngineException(String message, Throwable cause) { + super(message, cause); + } } diff --git a/core/src/main/java/org/opensearch/sql/exception/SemanticCheckException.java b/core/src/main/java/org/opensearch/sql/exception/SemanticCheckException.java index 6e0c184af8..c43dfdffc8 100644 --- a/core/src/main/java/org/opensearch/sql/exception/SemanticCheckException.java +++ b/core/src/main/java/org/opensearch/sql/exception/SemanticCheckException.java @@ -7,7 +7,12 @@ /** Semantic Check Exception. */ public class SemanticCheckException extends QueryEngineException { + public SemanticCheckException(String message) { super(message); } + + public SemanticCheckException(String message, Throwable cause) { + super(message, cause); + } } diff --git a/core/src/main/java/org/opensearch/sql/executor/Explain.java b/core/src/main/java/org/opensearch/sql/executor/Explain.java index 0f05b99383..31890a8090 100644 --- a/core/src/main/java/org/opensearch/sql/executor/Explain.java +++ b/core/src/main/java/org/opensearch/sql/executor/Explain.java @@ -8,12 +8,14 @@ import com.google.common.collect.ImmutableMap; import java.util.ArrayList; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.function.Consumer; import java.util.function.Function; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.ast.tree.Sort; +import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.executor.ExecutionEngine.ExplainResponse; import org.opensearch.sql.executor.ExecutionEngine.ExplainResponseNode; import org.opensearch.sql.expression.Expression; @@ -30,6 +32,8 @@ import org.opensearch.sql.planner.physical.RemoveOperator; import org.opensearch.sql.planner.physical.RenameOperator; import org.opensearch.sql.planner.physical.SortOperator; +import org.opensearch.sql.planner.physical.TakeOrderedOperator; +import org.opensearch.sql.planner.physical.TrendlineOperator; import org.opensearch.sql.planner.physical.ValuesOperator; import org.opensearch.sql.planner.physical.WindowOperator; import org.opensearch.sql.storage.TableScanOperator; @@ -73,6 +77,19 @@ public ExplainResponseNode visitSort(SortOperator node, Object context) { ImmutableMap.of("sortList", describeSortList(node.getSortList())))); } + @Override + public ExplainResponseNode visitTakeOrdered(TakeOrderedOperator node, Object context) { + return explain( + node, + context, + explainNode -> + explainNode.setDescription( + ImmutableMap.of( + "limit", node.getLimit(), + "offset", node.getOffset(), + "sortList", describeSortList(node.getSortList())))); + } + @Override public ExplainResponseNode visitTableScan(TableScanOperator node, Object context) { return explain( @@ -197,6 +214,21 @@ public ExplainResponseNode visitNested(NestedOperator node, Object context) { explanNode -> explanNode.setDescription(ImmutableMap.of("nested", node.getFields()))); } + @Override + public ExplainResponseNode visitTrendline(TrendlineOperator node, Object context) { + return explain( + node, + context, + explainNode -> + explainNode.setDescription( + ImmutableMap.of( + "computations", + describeTrendlineComputations( + node.getComputations().stream() + .map(Pair::getKey) + .collect(Collectors.toList()))))); + } + protected ExplainResponseNode explain( PhysicalPlan node, Object context, Consumer doExplain) { ExplainResponseNode explainNode = new ExplainResponseNode(getOperatorName(node)); @@ -231,4 +263,18 @@ private Map> describeSortList( "sortOrder", p.getLeft().getSortOrder().toString(), "nullOrder", p.getLeft().getNullOrder().toString()))); } + + private List> describeTrendlineComputations( + List computations) { + return computations.stream() + .map( + computation -> + ImmutableMap.of( + "computationType", + computation.getComputationType().name().toLowerCase(Locale.ROOT), + "numberOfDataPoints", computation.getNumberOfDataPoints().toString(), + "dataField", computation.getDataField().getChild().get(0).toString(), + "alias", computation.getAlias())) + .collect(Collectors.toList()); + } } diff --git a/core/src/main/java/org/opensearch/sql/executor/streaming/DefaultMetadataLog.java b/core/src/main/java/org/opensearch/sql/executor/streaming/DefaultMetadataLog.java index e439d93f6c..48975a5608 100644 --- a/core/src/main/java/org/opensearch/sql/executor/streaming/DefaultMetadataLog.java +++ b/core/src/main/java/org/opensearch/sql/executor/streaming/DefaultMetadataLog.java @@ -26,7 +26,7 @@ public class DefaultMetadataLog implements MetadataLog { private static final long MIN_ACCEPTABLE_ID = 0L; - private SortedMap metadataMap = new TreeMap<>(); + private final SortedMap metadataMap = new TreeMap<>(); @Override public boolean add(Long batchId, T metadata) { diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index 9975afac7f..44ecc2bc86 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -563,6 +563,10 @@ public static FunctionExpression regexp(Expression... expressions) { return compile(FunctionProperties.None, BuiltinFunctionName.REGEXP, expressions); } + public static FunctionExpression cidrmatch(Expression... expressions) { + return compile(FunctionProperties.None, BuiltinFunctionName.CIDRMATCH, expressions); + } + public static FunctionExpression concat(Expression... expressions) { return compile(FunctionProperties.None, BuiltinFunctionName.CONCAT, expressions); } @@ -831,6 +835,10 @@ public static FunctionExpression castTimestamp(Expression value) { return compile(FunctionProperties.None, BuiltinFunctionName.CAST_TO_TIMESTAMP, value); } + public static FunctionExpression castIp(Expression value) { + return compile(FunctionProperties.None, BuiltinFunctionName.CAST_TO_IP, value); + } + public static FunctionExpression typeof(Expression value) { return compile(FunctionProperties.None, BuiltinFunctionName.TYPEOF, value); } diff --git a/core/src/main/java/org/opensearch/sql/expression/aggregation/AggregatorFunction.java b/core/src/main/java/org/opensearch/sql/expression/aggregation/AggregatorFunctions.java similarity index 99% rename from core/src/main/java/org/opensearch/sql/expression/aggregation/AggregatorFunction.java rename to core/src/main/java/org/opensearch/sql/expression/aggregation/AggregatorFunctions.java index 631eb2e613..698fb20408 100644 --- a/core/src/main/java/org/opensearch/sql/expression/aggregation/AggregatorFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/aggregation/AggregatorFunctions.java @@ -40,7 +40,7 @@ * count accepts values of all types. */ @UtilityClass -public class AggregatorFunction { +public class AggregatorFunctions { /** * Register Aggregation Function. * diff --git a/core/src/main/java/org/opensearch/sql/expression/datetime/DateTimeFunction.java b/core/src/main/java/org/opensearch/sql/expression/datetime/DateTimeFunctions.java similarity index 86% rename from core/src/main/java/org/opensearch/sql/expression/datetime/DateTimeFunction.java rename to core/src/main/java/org/opensearch/sql/expression/datetime/DateTimeFunctions.java index a42a599ad8..411bd27993 100644 --- a/core/src/main/java/org/opensearch/sql/expression/datetime/DateTimeFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/datetime/DateTimeFunctions.java @@ -101,7 +101,7 @@ */ @UtilityClass @SuppressWarnings("unchecked") -public class DateTimeFunction { +public class DateTimeFunctions { // The number of seconds per day public static final long SECONDS_PER_DAY = 86400; @@ -357,8 +357,8 @@ private DefaultFunctionResolver adddate() { BuiltinFunctionName.ADDDATE.getName(), (SerializableFunction>[]) (Stream.concat( - get_date_add_date_sub_signatures(DateTimeFunction::exprAddDateInterval), - get_adddate_subdate_signatures(DateTimeFunction::exprAddDateDays)) + get_date_add_date_sub_signatures(DateTimeFunctions::exprAddDateInterval), + get_adddate_subdate_signatures(DateTimeFunctions::exprAddDateDays)) .toArray(SerializableFunction[]::new))); } @@ -375,41 +375,41 @@ private DefaultFunctionResolver addtime() { return define( BuiltinFunctionName.ADDTIME.getName(), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprAddTime), TIME, TIME, TIME), + nullMissingHandlingWithProperties(DateTimeFunctions::exprAddTime), TIME, TIME, TIME), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprAddTime), TIME, TIME, DATE), + nullMissingHandlingWithProperties(DateTimeFunctions::exprAddTime), TIME, TIME, DATE), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprAddTime), + nullMissingHandlingWithProperties(DateTimeFunctions::exprAddTime), TIME, TIME, TIMESTAMP), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprAddTime), + nullMissingHandlingWithProperties(DateTimeFunctions::exprAddTime), TIMESTAMP, DATE, TIME), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprAddTime), + nullMissingHandlingWithProperties(DateTimeFunctions::exprAddTime), TIMESTAMP, DATE, DATE), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprAddTime), + nullMissingHandlingWithProperties(DateTimeFunctions::exprAddTime), TIMESTAMP, DATE, TIMESTAMP), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprAddTime), + nullMissingHandlingWithProperties(DateTimeFunctions::exprAddTime), TIMESTAMP, TIMESTAMP, TIME), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprAddTime), + nullMissingHandlingWithProperties(DateTimeFunctions::exprAddTime), TIMESTAMP, TIMESTAMP, DATE), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprAddTime), + nullMissingHandlingWithProperties(DateTimeFunctions::exprAddTime), TIMESTAMP, TIMESTAMP, TIMESTAMP)); @@ -425,13 +425,13 @@ private DefaultFunctionResolver convert_tz() { return define( BuiltinFunctionName.CONVERT_TZ.getName(), impl( - nullMissingHandling(DateTimeFunction::exprConvertTZ), + nullMissingHandling(DateTimeFunctions::exprConvertTZ), TIMESTAMP, TIMESTAMP, STRING, STRING), impl( - nullMissingHandling(DateTimeFunction::exprConvertTZ), + nullMissingHandling(DateTimeFunctions::exprConvertTZ), TIMESTAMP, STRING, STRING, @@ -445,9 +445,9 @@ private DefaultFunctionResolver convert_tz() { private DefaultFunctionResolver date() { return define( BuiltinFunctionName.DATE.getName(), - impl(nullMissingHandling(DateTimeFunction::exprDate), DATE, STRING), - impl(nullMissingHandling(DateTimeFunction::exprDate), DATE, DATE), - impl(nullMissingHandling(DateTimeFunction::exprDate), DATE, TIMESTAMP)); + impl(nullMissingHandling(DateTimeFunctions::exprDate), DATE, STRING), + impl(nullMissingHandling(DateTimeFunctions::exprDate), DATE, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprDate), DATE, TIMESTAMP)); } /** @@ -458,35 +458,35 @@ private DefaultFunctionResolver datediff() { return define( BuiltinFunctionName.DATEDIFF.getName(), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprDateDiff), LONG, DATE, DATE), + nullMissingHandlingWithProperties(DateTimeFunctions::exprDateDiff), LONG, DATE, DATE), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprDateDiff), LONG, DATE, TIME), + nullMissingHandlingWithProperties(DateTimeFunctions::exprDateDiff), LONG, DATE, TIME), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprDateDiff), LONG, TIME, DATE), + nullMissingHandlingWithProperties(DateTimeFunctions::exprDateDiff), LONG, TIME, DATE), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprDateDiff), LONG, TIME, TIME), + nullMissingHandlingWithProperties(DateTimeFunctions::exprDateDiff), LONG, TIME, TIME), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprDateDiff), + nullMissingHandlingWithProperties(DateTimeFunctions::exprDateDiff), LONG, TIMESTAMP, DATE), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprDateDiff), + nullMissingHandlingWithProperties(DateTimeFunctions::exprDateDiff), LONG, DATE, TIMESTAMP), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprDateDiff), + nullMissingHandlingWithProperties(DateTimeFunctions::exprDateDiff), LONG, TIMESTAMP, TIMESTAMP), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprDateDiff), + nullMissingHandlingWithProperties(DateTimeFunctions::exprDateDiff), LONG, TIMESTAMP, TIME), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprDateDiff), + nullMissingHandlingWithProperties(DateTimeFunctions::exprDateDiff), LONG, TIME, TIMESTAMP)); @@ -501,15 +501,15 @@ private DefaultFunctionResolver datediff() { private FunctionResolver datetime() { return define( BuiltinFunctionName.DATETIME.getName(), - impl(nullMissingHandling(DateTimeFunction::exprDateTime), TIMESTAMP, STRING, STRING), - impl(nullMissingHandling(DateTimeFunction::exprDateTimeNoTimezone), TIMESTAMP, STRING)); + impl(nullMissingHandling(DateTimeFunctions::exprDateTime), TIMESTAMP, STRING, STRING), + impl(nullMissingHandling(DateTimeFunctions::exprDateTimeNoTimezone), TIMESTAMP, STRING)); } private DefaultFunctionResolver date_add() { return define( BuiltinFunctionName.DATE_ADD.getName(), (SerializableFunction>[]) - get_date_add_date_sub_signatures(DateTimeFunction::exprAddDateInterval) + get_date_add_date_sub_signatures(DateTimeFunctions::exprAddDateInterval) .toArray(SerializableFunction[]::new)); } @@ -517,7 +517,7 @@ private DefaultFunctionResolver date_sub() { return define( BuiltinFunctionName.DATE_SUB.getName(), (SerializableFunction>[]) - get_date_add_date_sub_signatures(DateTimeFunction::exprSubDateInterval) + get_date_add_date_sub_signatures(DateTimeFunctions::exprSubDateInterval) .toArray(SerializableFunction[]::new)); } @@ -525,9 +525,9 @@ private DefaultFunctionResolver date_sub() { private DefaultFunctionResolver day() { return define( BuiltinFunctionName.DAY.getName(), - impl(nullMissingHandling(DateTimeFunction::exprDayOfMonth), INTEGER, DATE), - impl(nullMissingHandling(DateTimeFunction::exprDayOfMonth), INTEGER, TIMESTAMP), - impl(nullMissingHandling(DateTimeFunction::exprDayOfMonth), INTEGER, STRING)); + impl(nullMissingHandling(DateTimeFunctions::exprDayOfMonth), INTEGER, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprDayOfMonth), INTEGER, TIMESTAMP), + impl(nullMissingHandling(DateTimeFunctions::exprDayOfMonth), INTEGER, STRING)); } /** @@ -537,9 +537,9 @@ private DefaultFunctionResolver day() { private DefaultFunctionResolver dayName() { return define( BuiltinFunctionName.DAYNAME.getName(), - impl(nullMissingHandling(DateTimeFunction::exprDayName), STRING, DATE), - impl(nullMissingHandling(DateTimeFunction::exprDayName), STRING, TIMESTAMP), - impl(nullMissingHandling(DateTimeFunction::exprDayName), STRING, STRING)); + impl(nullMissingHandling(DateTimeFunctions::exprDayName), STRING, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprDayName), STRING, TIMESTAMP), + impl(nullMissingHandling(DateTimeFunctions::exprDayName), STRING, STRING)); } /** DAYOFMONTH(STRING/DATE/TIMESTAMP). return the day of the month (1-31). */ @@ -549,12 +549,12 @@ private DefaultFunctionResolver dayOfMonth(BuiltinFunctionName name) { implWithProperties( nullMissingHandlingWithProperties( (functionProperties, arg) -> - DateTimeFunction.dayOfMonthToday(functionProperties.getQueryStartClock())), + DateTimeFunctions.dayOfMonthToday(functionProperties.getQueryStartClock())), INTEGER, TIME), - impl(nullMissingHandling(DateTimeFunction::exprDayOfMonth), INTEGER, DATE), - impl(nullMissingHandling(DateTimeFunction::exprDayOfMonth), INTEGER, STRING), - impl(nullMissingHandling(DateTimeFunction::exprDayOfMonth), INTEGER, TIMESTAMP)); + impl(nullMissingHandling(DateTimeFunctions::exprDayOfMonth), INTEGER, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprDayOfMonth), INTEGER, STRING), + impl(nullMissingHandling(DateTimeFunctions::exprDayOfMonth), INTEGER, TIMESTAMP)); } /** @@ -567,12 +567,12 @@ private DefaultFunctionResolver dayOfWeek(FunctionName name) { implWithProperties( nullMissingHandlingWithProperties( (functionProperties, arg) -> - DateTimeFunction.dayOfWeekToday(functionProperties.getQueryStartClock())), + DateTimeFunctions.dayOfWeekToday(functionProperties.getQueryStartClock())), INTEGER, TIME), - impl(nullMissingHandling(DateTimeFunction::exprDayOfWeek), INTEGER, DATE), - impl(nullMissingHandling(DateTimeFunction::exprDayOfWeek), INTEGER, TIMESTAMP), - impl(nullMissingHandling(DateTimeFunction::exprDayOfWeek), INTEGER, STRING)); + impl(nullMissingHandling(DateTimeFunctions::exprDayOfWeek), INTEGER, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprDayOfWeek), INTEGER, TIMESTAMP), + impl(nullMissingHandling(DateTimeFunctions::exprDayOfWeek), INTEGER, STRING)); } /** DAYOFYEAR(STRING/DATE/TIMESTAMP). return the day of the year for date (1-366). */ @@ -582,111 +582,114 @@ private DefaultFunctionResolver dayOfYear(BuiltinFunctionName dayOfYear) { implWithProperties( nullMissingHandlingWithProperties( (functionProperties, arg) -> - DateTimeFunction.dayOfYearToday(functionProperties.getQueryStartClock())), + DateTimeFunctions.dayOfYearToday(functionProperties.getQueryStartClock())), INTEGER, TIME), - impl(nullMissingHandling(DateTimeFunction::exprDayOfYear), INTEGER, DATE), - impl(nullMissingHandling(DateTimeFunction::exprDayOfYear), INTEGER, TIMESTAMP), - impl(nullMissingHandling(DateTimeFunction::exprDayOfYear), INTEGER, STRING)); + impl(nullMissingHandling(DateTimeFunctions::exprDayOfYear), INTEGER, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprDayOfYear), INTEGER, TIMESTAMP), + impl(nullMissingHandling(DateTimeFunctions::exprDayOfYear), INTEGER, STRING)); } private DefaultFunctionResolver extract() { return define( BuiltinFunctionName.EXTRACT.getName(), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprExtractForTime), + nullMissingHandlingWithProperties(DateTimeFunctions::exprExtractForTime), LONG, STRING, TIME), - impl(nullMissingHandling(DateTimeFunction::exprExtract), LONG, STRING, DATE), - impl(nullMissingHandling(DateTimeFunction::exprExtract), LONG, STRING, TIMESTAMP), - impl(nullMissingHandling(DateTimeFunction::exprExtract), LONG, STRING, STRING)); + impl(nullMissingHandling(DateTimeFunctions::exprExtract), LONG, STRING, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprExtract), LONG, STRING, TIMESTAMP), + impl(nullMissingHandling(DateTimeFunctions::exprExtract), LONG, STRING, STRING)); } /** FROM_DAYS(LONG). return the date value given the day number N. */ private DefaultFunctionResolver from_days() { return define( BuiltinFunctionName.FROM_DAYS.getName(), - impl(nullMissingHandling(DateTimeFunction::exprFromDays), DATE, LONG)); + impl(nullMissingHandling(DateTimeFunctions::exprFromDays), DATE, LONG)); } private FunctionResolver from_unixtime() { return define( BuiltinFunctionName.FROM_UNIXTIME.getName(), - impl(nullMissingHandling(DateTimeFunction::exprFromUnixTime), TIMESTAMP, DOUBLE), + impl(nullMissingHandling(DateTimeFunctions::exprFromUnixTime), TIMESTAMP, DOUBLE), impl( - nullMissingHandling(DateTimeFunction::exprFromUnixTimeFormat), STRING, DOUBLE, STRING)); + nullMissingHandling(DateTimeFunctions::exprFromUnixTimeFormat), + STRING, + DOUBLE, + STRING)); } private DefaultFunctionResolver get_format() { return define( BuiltinFunctionName.GET_FORMAT.getName(), - impl(nullMissingHandling(DateTimeFunction::exprGetFormat), STRING, STRING, STRING)); + impl(nullMissingHandling(DateTimeFunctions::exprGetFormat), STRING, STRING, STRING)); } /** HOUR(STRING/TIME/DATE/TIMESTAMP). return the hour value for time. */ private DefaultFunctionResolver hour(BuiltinFunctionName name) { return define( name.getName(), - impl(nullMissingHandling(DateTimeFunction::exprHour), INTEGER, STRING), - impl(nullMissingHandling(DateTimeFunction::exprHour), INTEGER, TIME), - impl(nullMissingHandling(DateTimeFunction::exprHour), INTEGER, DATE), - impl(nullMissingHandling(DateTimeFunction::exprHour), INTEGER, TIMESTAMP)); + impl(nullMissingHandling(DateTimeFunctions::exprHour), INTEGER, STRING), + impl(nullMissingHandling(DateTimeFunctions::exprHour), INTEGER, TIME), + impl(nullMissingHandling(DateTimeFunctions::exprHour), INTEGER, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprHour), INTEGER, TIMESTAMP)); } private DefaultFunctionResolver last_day() { return define( BuiltinFunctionName.LAST_DAY.getName(), - impl(nullMissingHandling(DateTimeFunction::exprLastDay), DATE, STRING), + impl(nullMissingHandling(DateTimeFunctions::exprLastDay), DATE, STRING), implWithProperties( nullMissingHandlingWithProperties( (functionProperties, arg) -> - DateTimeFunction.exprLastDayToday(functionProperties.getQueryStartClock())), + DateTimeFunctions.exprLastDayToday(functionProperties.getQueryStartClock())), DATE, TIME), - impl(nullMissingHandling(DateTimeFunction::exprLastDay), DATE, DATE), - impl(nullMissingHandling(DateTimeFunction::exprLastDay), DATE, TIMESTAMP)); + impl(nullMissingHandling(DateTimeFunctions::exprLastDay), DATE, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprLastDay), DATE, TIMESTAMP)); } private FunctionResolver makedate() { return define( BuiltinFunctionName.MAKEDATE.getName(), - impl(nullMissingHandling(DateTimeFunction::exprMakeDate), DATE, DOUBLE, DOUBLE)); + impl(nullMissingHandling(DateTimeFunctions::exprMakeDate), DATE, DOUBLE, DOUBLE)); } private FunctionResolver maketime() { return define( BuiltinFunctionName.MAKETIME.getName(), - impl(nullMissingHandling(DateTimeFunction::exprMakeTime), TIME, DOUBLE, DOUBLE, DOUBLE)); + impl(nullMissingHandling(DateTimeFunctions::exprMakeTime), TIME, DOUBLE, DOUBLE, DOUBLE)); } /** MICROSECOND(STRING/TIME/TIMESTAMP). return the microsecond value for time. */ private DefaultFunctionResolver microsecond() { return define( BuiltinFunctionName.MICROSECOND.getName(), - impl(nullMissingHandling(DateTimeFunction::exprMicrosecond), INTEGER, STRING), - impl(nullMissingHandling(DateTimeFunction::exprMicrosecond), INTEGER, TIME), - impl(nullMissingHandling(DateTimeFunction::exprMicrosecond), INTEGER, TIMESTAMP)); + impl(nullMissingHandling(DateTimeFunctions::exprMicrosecond), INTEGER, STRING), + impl(nullMissingHandling(DateTimeFunctions::exprMicrosecond), INTEGER, TIME), + impl(nullMissingHandling(DateTimeFunctions::exprMicrosecond), INTEGER, TIMESTAMP)); } /** MINUTE(STRING/TIME/TIMESTAMP). return the minute value for time. */ private DefaultFunctionResolver minute(BuiltinFunctionName name) { return define( name.getName(), - impl(nullMissingHandling(DateTimeFunction::exprMinute), INTEGER, STRING), - impl(nullMissingHandling(DateTimeFunction::exprMinute), INTEGER, TIME), - impl(nullMissingHandling(DateTimeFunction::exprMinute), INTEGER, DATE), - impl(nullMissingHandling(DateTimeFunction::exprMinute), INTEGER, TIMESTAMP)); + impl(nullMissingHandling(DateTimeFunctions::exprMinute), INTEGER, STRING), + impl(nullMissingHandling(DateTimeFunctions::exprMinute), INTEGER, TIME), + impl(nullMissingHandling(DateTimeFunctions::exprMinute), INTEGER, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprMinute), INTEGER, TIMESTAMP)); } /** MINUTE(STRING/TIME/TIMESTAMP). return the minute value for time. */ private DefaultFunctionResolver minute_of_day() { return define( BuiltinFunctionName.MINUTE_OF_DAY.getName(), - impl(nullMissingHandling(DateTimeFunction::exprMinuteOfDay), INTEGER, STRING), - impl(nullMissingHandling(DateTimeFunction::exprMinuteOfDay), INTEGER, TIME), - impl(nullMissingHandling(DateTimeFunction::exprMinuteOfDay), INTEGER, DATE), - impl(nullMissingHandling(DateTimeFunction::exprMinuteOfDay), INTEGER, TIMESTAMP)); + impl(nullMissingHandling(DateTimeFunctions::exprMinuteOfDay), INTEGER, STRING), + impl(nullMissingHandling(DateTimeFunctions::exprMinuteOfDay), INTEGER, TIME), + impl(nullMissingHandling(DateTimeFunctions::exprMinuteOfDay), INTEGER, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprMinuteOfDay), INTEGER, TIMESTAMP)); } /** MONTH(STRING/DATE/TIMESTAMP). return the month for date (1-12). */ @@ -696,21 +699,21 @@ private DefaultFunctionResolver month(BuiltinFunctionName month) { implWithProperties( nullMissingHandlingWithProperties( (functionProperties, arg) -> - DateTimeFunction.monthOfYearToday(functionProperties.getQueryStartClock())), + DateTimeFunctions.monthOfYearToday(functionProperties.getQueryStartClock())), INTEGER, TIME), - impl(nullMissingHandling(DateTimeFunction::exprMonth), INTEGER, DATE), - impl(nullMissingHandling(DateTimeFunction::exprMonth), INTEGER, TIMESTAMP), - impl(nullMissingHandling(DateTimeFunction::exprMonth), INTEGER, STRING)); + impl(nullMissingHandling(DateTimeFunctions::exprMonth), INTEGER, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprMonth), INTEGER, TIMESTAMP), + impl(nullMissingHandling(DateTimeFunctions::exprMonth), INTEGER, STRING)); } /** MONTHNAME(STRING/DATE/TIMESTAMP). return the full name of the month for date. */ private DefaultFunctionResolver monthName() { return define( BuiltinFunctionName.MONTHNAME.getName(), - impl(nullMissingHandling(DateTimeFunction::exprMonthName), STRING, DATE), - impl(nullMissingHandling(DateTimeFunction::exprMonthName), STRING, TIMESTAMP), - impl(nullMissingHandling(DateTimeFunction::exprMonthName), STRING, STRING)); + impl(nullMissingHandling(DateTimeFunctions::exprMonthName), STRING, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprMonthName), STRING, TIMESTAMP), + impl(nullMissingHandling(DateTimeFunctions::exprMonthName), STRING, STRING)); } /** @@ -720,7 +723,7 @@ private DefaultFunctionResolver monthName() { private DefaultFunctionResolver period_add() { return define( BuiltinFunctionName.PERIOD_ADD.getName(), - impl(nullMissingHandling(DateTimeFunction::exprPeriodAdd), INTEGER, INTEGER, INTEGER)); + impl(nullMissingHandling(DateTimeFunctions::exprPeriodAdd), INTEGER, INTEGER, INTEGER)); } /** @@ -731,35 +734,35 @@ private DefaultFunctionResolver period_add() { private DefaultFunctionResolver period_diff() { return define( BuiltinFunctionName.PERIOD_DIFF.getName(), - impl(nullMissingHandling(DateTimeFunction::exprPeriodDiff), INTEGER, INTEGER, INTEGER)); + impl(nullMissingHandling(DateTimeFunctions::exprPeriodDiff), INTEGER, INTEGER, INTEGER)); } /** QUARTER(STRING/DATE/TIMESTAMP). return the month for date (1-4). */ private DefaultFunctionResolver quarter() { return define( BuiltinFunctionName.QUARTER.getName(), - impl(nullMissingHandling(DateTimeFunction::exprQuarter), INTEGER, DATE), - impl(nullMissingHandling(DateTimeFunction::exprQuarter), INTEGER, TIMESTAMP), - impl(nullMissingHandling(DateTimeFunction::exprQuarter), INTEGER, STRING)); + impl(nullMissingHandling(DateTimeFunctions::exprQuarter), INTEGER, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprQuarter), INTEGER, TIMESTAMP), + impl(nullMissingHandling(DateTimeFunctions::exprQuarter), INTEGER, STRING)); } private DefaultFunctionResolver sec_to_time() { return define( BuiltinFunctionName.SEC_TO_TIME.getName(), - impl((nullMissingHandling(DateTimeFunction::exprSecToTime)), TIME, INTEGER), - impl((nullMissingHandling(DateTimeFunction::exprSecToTime)), TIME, LONG), - impl((nullMissingHandling(DateTimeFunction::exprSecToTimeWithNanos)), TIME, DOUBLE), - impl((nullMissingHandling(DateTimeFunction::exprSecToTimeWithNanos)), TIME, FLOAT)); + impl((nullMissingHandling(DateTimeFunctions::exprSecToTime)), TIME, INTEGER), + impl((nullMissingHandling(DateTimeFunctions::exprSecToTime)), TIME, LONG), + impl((nullMissingHandling(DateTimeFunctions::exprSecToTimeWithNanos)), TIME, DOUBLE), + impl((nullMissingHandling(DateTimeFunctions::exprSecToTimeWithNanos)), TIME, FLOAT)); } /** SECOND(STRING/TIME/TIMESTAMP). return the second value for time. */ private DefaultFunctionResolver second(BuiltinFunctionName name) { return define( name.getName(), - impl(nullMissingHandling(DateTimeFunction::exprSecond), INTEGER, STRING), - impl(nullMissingHandling(DateTimeFunction::exprSecond), INTEGER, TIME), - impl(nullMissingHandling(DateTimeFunction::exprSecond), INTEGER, DATE), - impl(nullMissingHandling(DateTimeFunction::exprSecond), INTEGER, TIMESTAMP)); + impl(nullMissingHandling(DateTimeFunctions::exprSecond), INTEGER, STRING), + impl(nullMissingHandling(DateTimeFunctions::exprSecond), INTEGER, TIME), + impl(nullMissingHandling(DateTimeFunctions::exprSecond), INTEGER, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprSecond), INTEGER, TIMESTAMP)); } private DefaultFunctionResolver subdate() { @@ -767,8 +770,8 @@ private DefaultFunctionResolver subdate() { BuiltinFunctionName.SUBDATE.getName(), (SerializableFunction>[]) (Stream.concat( - get_date_add_date_sub_signatures(DateTimeFunction::exprSubDateInterval), - get_adddate_subdate_signatures(DateTimeFunction::exprSubDateDays)) + get_date_add_date_sub_signatures(DateTimeFunctions::exprSubDateInterval), + get_adddate_subdate_signatures(DateTimeFunctions::exprSubDateDays)) .toArray(SerializableFunction[]::new))); } @@ -785,41 +788,41 @@ private DefaultFunctionResolver subtime() { return define( BuiltinFunctionName.SUBTIME.getName(), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprSubTime), TIME, TIME, TIME), + nullMissingHandlingWithProperties(DateTimeFunctions::exprSubTime), TIME, TIME, TIME), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprSubTime), TIME, TIME, DATE), + nullMissingHandlingWithProperties(DateTimeFunctions::exprSubTime), TIME, TIME, DATE), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprSubTime), + nullMissingHandlingWithProperties(DateTimeFunctions::exprSubTime), TIME, TIME, TIMESTAMP), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprSubTime), + nullMissingHandlingWithProperties(DateTimeFunctions::exprSubTime), TIMESTAMP, TIMESTAMP, TIME), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprSubTime), + nullMissingHandlingWithProperties(DateTimeFunctions::exprSubTime), TIMESTAMP, TIMESTAMP, DATE), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprSubTime), + nullMissingHandlingWithProperties(DateTimeFunctions::exprSubTime), TIMESTAMP, DATE, TIME), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprSubTime), + nullMissingHandlingWithProperties(DateTimeFunctions::exprSubTime), TIMESTAMP, DATE, DATE), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprSubTime), + nullMissingHandlingWithProperties(DateTimeFunctions::exprSubTime), TIMESTAMP, DATE, TIMESTAMP), implWithProperties( - nullMissingHandlingWithProperties(DateTimeFunction::exprSubTime), + nullMissingHandlingWithProperties(DateTimeFunctions::exprSubTime), TIMESTAMP, TIMESTAMP, TIMESTAMP)); @@ -835,7 +838,7 @@ private DefaultFunctionResolver str_to_date() { implWithProperties( nullMissingHandlingWithProperties( (functionProperties, arg, format) -> - DateTimeFunction.exprStrToDate(functionProperties, arg, format)), + DateTimeFunctions.exprStrToDate(functionProperties, arg, format)), TIMESTAMP, STRING, STRING)); @@ -848,10 +851,10 @@ private DefaultFunctionResolver str_to_date() { private DefaultFunctionResolver time() { return define( BuiltinFunctionName.TIME.getName(), - impl(nullMissingHandling(DateTimeFunction::exprTime), TIME, STRING), - impl(nullMissingHandling(DateTimeFunction::exprTime), TIME, DATE), - impl(nullMissingHandling(DateTimeFunction::exprTime), TIME, TIME), - impl(nullMissingHandling(DateTimeFunction::exprTime), TIME, TIMESTAMP)); + impl(nullMissingHandling(DateTimeFunctions::exprTime), TIME, STRING), + impl(nullMissingHandling(DateTimeFunctions::exprTime), TIME, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprTime), TIME, TIME), + impl(nullMissingHandling(DateTimeFunctions::exprTime), TIME, TIMESTAMP)); } /** @@ -867,16 +870,16 @@ private DefaultFunctionResolver time() { private DefaultFunctionResolver timediff() { return define( BuiltinFunctionName.TIMEDIFF.getName(), - impl(nullMissingHandling(DateTimeFunction::exprTimeDiff), TIME, TIME, TIME)); + impl(nullMissingHandling(DateTimeFunctions::exprTimeDiff), TIME, TIME, TIME)); } /** TIME_TO_SEC(STRING/TIME/TIMESTAMP). return the time argument, converted to seconds. */ private DefaultFunctionResolver time_to_sec() { return define( BuiltinFunctionName.TIME_TO_SEC.getName(), - impl(nullMissingHandling(DateTimeFunction::exprTimeToSec), LONG, STRING), - impl(nullMissingHandling(DateTimeFunction::exprTimeToSec), LONG, TIME), - impl(nullMissingHandling(DateTimeFunction::exprTimeToSec), LONG, TIMESTAMP)); + impl(nullMissingHandling(DateTimeFunctions::exprTimeToSec), LONG, STRING), + impl(nullMissingHandling(DateTimeFunctions::exprTimeToSec), LONG, TIME), + impl(nullMissingHandling(DateTimeFunctions::exprTimeToSec), LONG, TIMESTAMP)); } /** @@ -914,7 +917,7 @@ private DefaultFunctionResolver timestampadd() { return define( BuiltinFunctionName.TIMESTAMPADD.getName(), impl( - nullMissingHandling(DateTimeFunction::exprTimestampAdd), + nullMissingHandling(DateTimeFunctions::exprTimestampAdd), TIMESTAMP, STRING, INTEGER, @@ -943,7 +946,7 @@ private DefaultFunctionResolver timestampdiff() { return define( BuiltinFunctionName.TIMESTAMPDIFF.getName(), impl( - nullMissingHandling(DateTimeFunction::exprTimestampDiff), + nullMissingHandling(DateTimeFunctions::exprTimestampDiff), TIMESTAMP, STRING, TIMESTAMP, @@ -962,9 +965,9 @@ private DefaultFunctionResolver timestampdiff() { private DefaultFunctionResolver to_days() { return define( BuiltinFunctionName.TO_DAYS.getName(), - impl(nullMissingHandling(DateTimeFunction::exprToDays), LONG, STRING), - impl(nullMissingHandling(DateTimeFunction::exprToDays), LONG, TIMESTAMP), - impl(nullMissingHandling(DateTimeFunction::exprToDays), LONG, DATE)); + impl(nullMissingHandling(DateTimeFunctions::exprToDays), LONG, STRING), + impl(nullMissingHandling(DateTimeFunctions::exprToDays), LONG, TIMESTAMP), + impl(nullMissingHandling(DateTimeFunctions::exprToDays), LONG, DATE)); } /** @@ -975,8 +978,8 @@ private DefaultFunctionResolver to_days() { private DefaultFunctionResolver to_seconds() { return define( BuiltinFunctionName.TO_SECONDS.getName(), - impl(nullMissingHandling(DateTimeFunction::exprToSeconds), LONG, TIMESTAMP), - impl(nullMissingHandling(DateTimeFunction::exprToSecondsForIntType), LONG, LONG)); + impl(nullMissingHandling(DateTimeFunctions::exprToSeconds), LONG, TIMESTAMP), + impl(nullMissingHandling(DateTimeFunctions::exprToSecondsForIntType), LONG, LONG)); } private FunctionResolver unix_timestamp() { @@ -984,11 +987,11 @@ private FunctionResolver unix_timestamp() { BuiltinFunctionName.UNIX_TIMESTAMP.getName(), implWithProperties( functionProperties -> - DateTimeFunction.unixTimeStamp(functionProperties.getQueryStartClock()), + DateTimeFunctions.unixTimeStamp(functionProperties.getQueryStartClock()), LONG), - impl(nullMissingHandling(DateTimeFunction::unixTimeStampOf), DOUBLE, DATE), - impl(nullMissingHandling(DateTimeFunction::unixTimeStampOf), DOUBLE, TIMESTAMP), - impl(nullMissingHandling(DateTimeFunction::unixTimeStampOf), DOUBLE, DOUBLE)); + impl(nullMissingHandling(DateTimeFunctions::unixTimeStampOf), DOUBLE, DATE), + impl(nullMissingHandling(DateTimeFunctions::unixTimeStampOf), DOUBLE, TIMESTAMP), + impl(nullMissingHandling(DateTimeFunctions::unixTimeStampOf), DOUBLE, DOUBLE)); } /** UTC_DATE(). return the current UTC Date in format yyyy-MM-dd */ @@ -1019,24 +1022,24 @@ private DefaultFunctionResolver week(BuiltinFunctionName week) { implWithProperties( nullMissingHandlingWithProperties( (functionProperties, arg) -> - DateTimeFunction.weekOfYearToday( + DateTimeFunctions.weekOfYearToday( DEFAULT_WEEK_OF_YEAR_MODE, functionProperties.getQueryStartClock())), INTEGER, TIME), - impl(nullMissingHandling(DateTimeFunction::exprWeekWithoutMode), INTEGER, DATE), - impl(nullMissingHandling(DateTimeFunction::exprWeekWithoutMode), INTEGER, TIMESTAMP), - impl(nullMissingHandling(DateTimeFunction::exprWeekWithoutMode), INTEGER, STRING), + impl(nullMissingHandling(DateTimeFunctions::exprWeekWithoutMode), INTEGER, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprWeekWithoutMode), INTEGER, TIMESTAMP), + impl(nullMissingHandling(DateTimeFunctions::exprWeekWithoutMode), INTEGER, STRING), implWithProperties( nullMissingHandlingWithProperties( (functionProperties, time, modeArg) -> - DateTimeFunction.weekOfYearToday( + DateTimeFunctions.weekOfYearToday( modeArg, functionProperties.getQueryStartClock())), INTEGER, TIME, INTEGER), - impl(nullMissingHandling(DateTimeFunction::exprWeek), INTEGER, DATE, INTEGER), - impl(nullMissingHandling(DateTimeFunction::exprWeek), INTEGER, TIMESTAMP, INTEGER), - impl(nullMissingHandling(DateTimeFunction::exprWeek), INTEGER, STRING, INTEGER)); + impl(nullMissingHandling(DateTimeFunctions::exprWeek), INTEGER, DATE, INTEGER), + impl(nullMissingHandling(DateTimeFunctions::exprWeek), INTEGER, TIMESTAMP, INTEGER), + impl(nullMissingHandling(DateTimeFunctions::exprWeek), INTEGER, STRING, INTEGER)); } private DefaultFunctionResolver weekday() { @@ -1050,18 +1053,18 @@ private DefaultFunctionResolver weekday() { - 1)), INTEGER, TIME), - impl(nullMissingHandling(DateTimeFunction::exprWeekday), INTEGER, DATE), - impl(nullMissingHandling(DateTimeFunction::exprWeekday), INTEGER, TIMESTAMP), - impl(nullMissingHandling(DateTimeFunction::exprWeekday), INTEGER, STRING)); + impl(nullMissingHandling(DateTimeFunctions::exprWeekday), INTEGER, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprWeekday), INTEGER, TIMESTAMP), + impl(nullMissingHandling(DateTimeFunctions::exprWeekday), INTEGER, STRING)); } /** YEAR(STRING/DATE/TIMESTAMP). return the year for date (1000-9999). */ private DefaultFunctionResolver year() { return define( BuiltinFunctionName.YEAR.getName(), - impl(nullMissingHandling(DateTimeFunction::exprYear), INTEGER, DATE), - impl(nullMissingHandling(DateTimeFunction::exprYear), INTEGER, TIMESTAMP), - impl(nullMissingHandling(DateTimeFunction::exprYear), INTEGER, STRING)); + impl(nullMissingHandling(DateTimeFunctions::exprYear), INTEGER, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprYear), INTEGER, TIMESTAMP), + impl(nullMissingHandling(DateTimeFunctions::exprYear), INTEGER, STRING)); } /** YEARWEEK(DATE[,mode]). return the week number for date. */ @@ -1075,9 +1078,9 @@ private DefaultFunctionResolver yearweek() { DEFAULT_WEEK_OF_YEAR_MODE, functionProperties.getQueryStartClock())), INTEGER, TIME), - impl(nullMissingHandling(DateTimeFunction::exprYearweekWithoutMode), INTEGER, DATE), - impl(nullMissingHandling(DateTimeFunction::exprYearweekWithoutMode), INTEGER, TIMESTAMP), - impl(nullMissingHandling(DateTimeFunction::exprYearweekWithoutMode), INTEGER, STRING), + impl(nullMissingHandling(DateTimeFunctions::exprYearweekWithoutMode), INTEGER, DATE), + impl(nullMissingHandling(DateTimeFunctions::exprYearweekWithoutMode), INTEGER, TIMESTAMP), + impl(nullMissingHandling(DateTimeFunctions::exprYearweekWithoutMode), INTEGER, STRING), implWithProperties( nullMissingHandlingWithProperties( (functionProperties, time, modeArg) -> @@ -1085,9 +1088,9 @@ private DefaultFunctionResolver yearweek() { INTEGER, TIME, INTEGER), - impl(nullMissingHandling(DateTimeFunction::exprYearweek), INTEGER, DATE, INTEGER), - impl(nullMissingHandling(DateTimeFunction::exprYearweek), INTEGER, TIMESTAMP, INTEGER), - impl(nullMissingHandling(DateTimeFunction::exprYearweek), INTEGER, STRING, INTEGER)); + impl(nullMissingHandling(DateTimeFunctions::exprYearweek), INTEGER, DATE, INTEGER), + impl(nullMissingHandling(DateTimeFunctions::exprYearweek), INTEGER, TIMESTAMP, INTEGER), + impl(nullMissingHandling(DateTimeFunctions::exprYearweek), INTEGER, STRING, INTEGER)); } /** diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index fd5ea14a2e..f8e9cf7c5f 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -132,6 +132,9 @@ public enum BuiltinFunctionName { /** Text Functions. */ TOSTRING(FunctionName.of("tostring")), + /** IP Functions. */ + CIDRMATCH(FunctionName.of("cidrmatch")), + /** Arithmetic Operators. */ ADD(FunctionName.of("+")), ADDFUNCTION(FunctionName.of("add")), @@ -228,6 +231,7 @@ public enum BuiltinFunctionName { CAST_TO_TIME(FunctionName.of("cast_to_time")), CAST_TO_TIMESTAMP(FunctionName.of("cast_to_timestamp")), CAST_TO_DATETIME(FunctionName.of("cast_to_datetime")), + CAST_TO_IP(FunctionName.of("cast_to_ip")), TYPEOF(FunctionName.of("typeof")), /** Relevance Function. */ diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java index 2e16d5f01f..79ea58b860 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionRepository.java @@ -24,16 +24,17 @@ import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.expression.Expression; -import org.opensearch.sql.expression.aggregation.AggregatorFunction; -import org.opensearch.sql.expression.datetime.DateTimeFunction; +import org.opensearch.sql.expression.aggregation.AggregatorFunctions; +import org.opensearch.sql.expression.datetime.DateTimeFunctions; import org.opensearch.sql.expression.datetime.IntervalClause; -import org.opensearch.sql.expression.operator.arthmetic.ArithmeticFunction; -import org.opensearch.sql.expression.operator.arthmetic.MathematicalFunction; -import org.opensearch.sql.expression.operator.convert.TypeCastOperator; -import org.opensearch.sql.expression.operator.predicate.BinaryPredicateOperator; -import org.opensearch.sql.expression.operator.predicate.UnaryPredicateOperator; +import org.opensearch.sql.expression.ip.IPFunctions; +import org.opensearch.sql.expression.operator.arthmetic.ArithmeticFunctions; +import org.opensearch.sql.expression.operator.arthmetic.MathematicalFunctions; +import org.opensearch.sql.expression.operator.convert.TypeCastOperators; +import org.opensearch.sql.expression.operator.predicate.BinaryPredicateOperators; +import org.opensearch.sql.expression.operator.predicate.UnaryPredicateOperators; import org.opensearch.sql.expression.system.SystemFunctions; -import org.opensearch.sql.expression.text.TextFunction; +import org.opensearch.sql.expression.text.TextFunctions; import org.opensearch.sql.expression.window.WindowFunctions; import org.opensearch.sql.storage.StorageEngine; @@ -69,18 +70,19 @@ public static synchronized BuiltinFunctionRepository getInstance() { instance = new BuiltinFunctionRepository(new HashMap<>()); // Register all built-in functions - ArithmeticFunction.register(instance); - BinaryPredicateOperator.register(instance); - MathematicalFunction.register(instance); - UnaryPredicateOperator.register(instance); - AggregatorFunction.register(instance); - DateTimeFunction.register(instance); + ArithmeticFunctions.register(instance); + BinaryPredicateOperators.register(instance); + MathematicalFunctions.register(instance); + UnaryPredicateOperators.register(instance); + AggregatorFunctions.register(instance); + DateTimeFunctions.register(instance); IntervalClause.register(instance); WindowFunctions.register(instance); - TextFunction.register(instance); - TypeCastOperator.register(instance); + TextFunctions.register(instance); + TypeCastOperators.register(instance); SystemFunctions.register(instance); OpenSearchFunctions.register(instance); + IPFunctions.register(instance); } return instance; } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/DefaultFunctionResolver.java b/core/src/main/java/org/opensearch/sql/expression/function/DefaultFunctionResolver.java index 5d0f31594b..e1d0052723 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/DefaultFunctionResolver.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/DefaultFunctionResolver.java @@ -61,7 +61,7 @@ public Pair resolve(FunctionSignature unreso && !FunctionSignature.isVarArgFunction(bestMatchEntry.getValue().getParamTypeList())) { throw new ExpressionEvaluationException( String.format( - "%s function expected %s, but get %s", + "%s function expected %s, but got %s", functionName, formatFunctions(functionBundle.keySet()), unresolvedSignature.formatTypes())); diff --git a/core/src/main/java/org/opensearch/sql/expression/ip/IPFunctions.java b/core/src/main/java/org/opensearch/sql/expression/ip/IPFunctions.java new file mode 100644 index 0000000000..8b3ee23014 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/ip/IPFunctions.java @@ -0,0 +1,60 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.ip; + +import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; +import static org.opensearch.sql.data.type.ExprCoreType.IP; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import static org.opensearch.sql.expression.function.FunctionDSL.define; +import static org.opensearch.sql.expression.function.FunctionDSL.impl; +import static org.opensearch.sql.expression.function.FunctionDSL.nullMissingHandling; + +import inet.ipaddr.IPAddress; +import lombok.experimental.UtilityClass; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.exception.SemanticCheckException; +import org.opensearch.sql.expression.function.BuiltinFunctionName; +import org.opensearch.sql.expression.function.BuiltinFunctionRepository; +import org.opensearch.sql.expression.function.DefaultFunctionResolver; +import org.opensearch.sql.utils.IPUtils; + +/** Utility class that defines and registers IP functions. */ +@UtilityClass +public class IPFunctions { + + public void register(BuiltinFunctionRepository repository) { + repository.register(cidrmatch()); + } + + private DefaultFunctionResolver cidrmatch() { + return define( + BuiltinFunctionName.CIDRMATCH.getName(), + impl(nullMissingHandling(IPFunctions::exprCidrMatch), BOOLEAN, IP, STRING)); + } + + /** + * Returns whether the given IP address is within the specified inclusive CIDR IP address range. + * Supports both IPv4 and IPv6 addresses. + * + * @param addressExprValue IP address (e.g. "198.51.100.14" or "2001:0db8::ff00:42:8329"). + * @param rangeExprValue IP address range string in CIDR notation (e.g. "198.51.100.0/24" or + * "2001:0db8::/32") + * @return true if the address is in the range; otherwise false. + * @throws SemanticCheckException if the address or range is not valid, or if they do not use the + * same version (IPv4 or IPv6). + */ + private ExprValue exprCidrMatch(ExprValue addressExprValue, ExprValue rangeExprValue) { + + IPAddress address = addressExprValue.ipValue(); + IPAddress range = IPUtils.toRange(rangeExprValue.stringValue()); + + return (IPUtils.compare(address, range.getLower()) < 0) + || (IPUtils.compare(address, range.getUpper()) > 0) + ? ExprValueUtils.LITERAL_FALSE + : ExprValueUtils.LITERAL_TRUE; + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/operator/arthmetic/ArithmeticFunction.java b/core/src/main/java/org/opensearch/sql/expression/operator/arthmetic/ArithmeticFunctions.java similarity index 99% rename from core/src/main/java/org/opensearch/sql/expression/operator/arthmetic/ArithmeticFunction.java rename to core/src/main/java/org/opensearch/sql/expression/operator/arthmetic/ArithmeticFunctions.java index 82b91e1d34..164de6d74c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/operator/arthmetic/ArithmeticFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/operator/arthmetic/ArithmeticFunctions.java @@ -37,7 +37,7 @@ * module, Accepts two numbers and produces a number. */ @UtilityClass -public class ArithmeticFunction { +public class ArithmeticFunctions { /** * Register Arithmetic Function. * diff --git a/core/src/main/java/org/opensearch/sql/expression/operator/arthmetic/MathematicalFunction.java b/core/src/main/java/org/opensearch/sql/expression/operator/arthmetic/MathematicalFunctions.java similarity index 99% rename from core/src/main/java/org/opensearch/sql/expression/operator/arthmetic/MathematicalFunction.java rename to core/src/main/java/org/opensearch/sql/expression/operator/arthmetic/MathematicalFunctions.java index 22f4b76573..102834f60d 100644 --- a/core/src/main/java/org/opensearch/sql/expression/operator/arthmetic/MathematicalFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/operator/arthmetic/MathematicalFunctions.java @@ -46,7 +46,7 @@ import org.opensearch.sql.expression.function.SerializableFunction; @UtilityClass -public class MathematicalFunction { +public class MathematicalFunctions { /** * Register Mathematical Functions. * diff --git a/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperator.java b/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperators.java similarity index 94% rename from core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperator.java rename to core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperators.java index db4b29f3b9..b388f7d89a 100644 --- a/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperator.java +++ b/core/src/main/java/org/opensearch/sql/expression/operator/convert/TypeCastOperators.java @@ -11,6 +11,7 @@ import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.FLOAT; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.IP; import static org.opensearch.sql.data.type.ExprCoreType.LONG; import static org.opensearch.sql.data.type.ExprCoreType.SHORT; import static org.opensearch.sql.data.type.ExprCoreType.STRING; @@ -31,6 +32,7 @@ import org.opensearch.sql.data.model.ExprDoubleValue; import org.opensearch.sql.data.model.ExprFloatValue; import org.opensearch.sql.data.model.ExprIntegerValue; +import org.opensearch.sql.data.model.ExprIpValue; import org.opensearch.sql.data.model.ExprLongValue; import org.opensearch.sql.data.model.ExprShortValue; import org.opensearch.sql.data.model.ExprStringValue; @@ -42,7 +44,8 @@ import org.opensearch.sql.expression.function.FunctionDSL; @UtilityClass -public class TypeCastOperator { +public class TypeCastOperators { + /** Register Type Cast Operator. */ public static void register(BuiltinFunctionRepository repository) { repository.register(castToString()); @@ -53,6 +56,7 @@ public static void register(BuiltinFunctionRepository repository) { repository.register(castToFloat()); repository.register(castToDouble()); repository.register(castToBoolean()); + repository.register(castToIp()); repository.register(castToDate()); repository.register(castToTime()); repository.register(castToTimestamp()); @@ -172,6 +176,13 @@ private static DefaultFunctionResolver castToBoolean() { impl(nullMissingHandling((v) -> v), BOOLEAN, BOOLEAN)); } + private static DefaultFunctionResolver castToIp() { + return FunctionDSL.define( + BuiltinFunctionName.CAST_TO_IP.getName(), + impl(nullMissingHandling((v) -> new ExprIpValue(v.stringValue())), IP, STRING), + impl(nullMissingHandling((v) -> v), IP, IP)); + } + private static DefaultFunctionResolver castToDate() { return FunctionDSL.define( BuiltinFunctionName.CAST_TO_DATE.getName(), diff --git a/core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperator.java b/core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperators.java similarity index 97% rename from core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperator.java rename to core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperators.java index bf6b3c22f5..6adc4fb2a3 100644 --- a/core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperator.java +++ b/core/src/main/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperators.java @@ -36,7 +36,7 @@ * equalTo, Compare the left expression and right expression and produces a Boolean. */ @UtilityClass -public class BinaryPredicateOperator { +public class BinaryPredicateOperators { /** * Register Binary Predicate Function. * @@ -118,7 +118,7 @@ public static void register(BuiltinFunctionRepository repository) { * * */ - private static Table andTable = + private static final Table andTable = new ImmutableTable.Builder() .put(LITERAL_TRUE, LITERAL_TRUE, LITERAL_TRUE) .put(LITERAL_TRUE, LITERAL_FALSE, LITERAL_FALSE) @@ -193,7 +193,7 @@ public static void register(BuiltinFunctionRepository repository) { * * */ - private static Table orTable = + private static final Table orTable = new ImmutableTable.Builder() .put(LITERAL_TRUE, LITERAL_TRUE, LITERAL_TRUE) .put(LITERAL_TRUE, LITERAL_FALSE, LITERAL_TRUE) @@ -268,7 +268,7 @@ public static void register(BuiltinFunctionRepository repository) { * * */ - private static Table xorTable = + private static final Table xorTable = new ImmutableTable.Builder() .put(LITERAL_TRUE, LITERAL_TRUE, LITERAL_FALSE) .put(LITERAL_TRUE, LITERAL_FALSE, LITERAL_TRUE) @@ -401,7 +401,7 @@ private static DefaultFunctionResolver notLike() { BuiltinFunctionName.NOT_LIKE.getName(), impl( nullMissingHandling( - (v1, v2) -> UnaryPredicateOperator.not(OperatorUtils.matches(v1, v2))), + (v1, v2) -> UnaryPredicateOperators.not(OperatorUtils.matches(v1, v2))), BOOLEAN, STRING, STRING)); diff --git a/core/src/main/java/org/opensearch/sql/expression/operator/predicate/UnaryPredicateOperator.java b/core/src/main/java/org/opensearch/sql/expression/operator/predicate/UnaryPredicateOperators.java similarity index 83% rename from core/src/main/java/org/opensearch/sql/expression/operator/predicate/UnaryPredicateOperator.java rename to core/src/main/java/org/opensearch/sql/expression/operator/predicate/UnaryPredicateOperators.java index ad9d9ac934..07bb5b2299 100644 --- a/core/src/main/java/org/opensearch/sql/expression/operator/predicate/UnaryPredicateOperator.java +++ b/core/src/main/java/org/opensearch/sql/expression/operator/predicate/UnaryPredicateOperators.java @@ -30,7 +30,8 @@ * The definition of unary predicate function not, Accepts one Boolean value and produces a Boolean. */ @UtilityClass -public class UnaryPredicateOperator { +public class UnaryPredicateOperators { + /** Register Unary Predicate Function. */ public static void register(BuiltinFunctionRepository repository) { repository.register(not()); @@ -45,7 +46,7 @@ public static void register(BuiltinFunctionRepository repository) { private static DefaultFunctionResolver not() { return FunctionDSL.define( BuiltinFunctionName.NOT.getName(), - FunctionDSL.impl(UnaryPredicateOperator::not, BOOLEAN, BOOLEAN)); + FunctionDSL.impl(UnaryPredicateOperators::not, BOOLEAN, BOOLEAN)); } /** @@ -108,11 +109,10 @@ private static DefaultFunctionResolver ifFunction() { org.apache.commons.lang3.tuple.Pair>> functionsOne = typeList.stream() - .map(v -> impl((UnaryPredicateOperator::exprIf), v, BOOLEAN, v, v)) + .map(v -> impl((UnaryPredicateOperators::exprIf), v, BOOLEAN, v, v)) .collect(Collectors.toList()); - DefaultFunctionResolver functionResolver = FunctionDSL.define(functionName, functionsOne); - return functionResolver; + return FunctionDSL.define(functionName, functionsOne); } private static DefaultFunctionResolver ifNull() { @@ -125,31 +125,28 @@ private static DefaultFunctionResolver ifNull() { org.apache.commons.lang3.tuple.Pair>> functionsOne = typeList.stream() - .map(v -> impl((UnaryPredicateOperator::exprIfNull), v, v, v)) + .map(v -> impl((UnaryPredicateOperators::exprIfNull), v, v, v)) .collect(Collectors.toList()); - DefaultFunctionResolver functionResolver = FunctionDSL.define(functionName, functionsOne); - return functionResolver; + return FunctionDSL.define(functionName, functionsOne); } private static DefaultFunctionResolver nullIf() { FunctionName functionName = BuiltinFunctionName.NULLIF.getName(); List typeList = ExprCoreType.coreTypes(); - DefaultFunctionResolver functionResolver = - FunctionDSL.define( - functionName, - typeList.stream() - .map(v -> impl((UnaryPredicateOperator::exprNullIf), v, v, v)) - .collect(Collectors.toList())); - return functionResolver; + return FunctionDSL.define( + functionName, + typeList.stream() + .map(v -> impl((UnaryPredicateOperators::exprNullIf), v, v, v)) + .collect(Collectors.toList())); } /** * v2 if v1 is null. * - * @param v1 varable 1 - * @param v2 varable 2 + * @param v1 variable 1 + * @param v2 variable 2 * @return v2 if v1 is null */ public static ExprValue exprIfNull(ExprValue v1, ExprValue v2) { @@ -157,11 +154,11 @@ public static ExprValue exprIfNull(ExprValue v1, ExprValue v2) { } /** - * return null if v1 equls to v2. + * return null if v1 equals to v2. * - * @param v1 varable 1 - * @param v2 varable 2 - * @return null if v1 equls to v2 + * @param v1 variable 1 + * @param v2 variable 2 + * @return null if v1 equals to v2 */ public static ExprValue exprNullIf(ExprValue v1, ExprValue v2) { return v1.equals(v2) ? LITERAL_NULL : v1; diff --git a/core/src/main/java/org/opensearch/sql/expression/text/TextFunction.java b/core/src/main/java/org/opensearch/sql/expression/text/TextFunctions.java similarity index 94% rename from core/src/main/java/org/opensearch/sql/expression/text/TextFunction.java rename to core/src/main/java/org/opensearch/sql/expression/text/TextFunctions.java index d670843551..2c3bbf7efb 100644 --- a/core/src/main/java/org/opensearch/sql/expression/text/TextFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/text/TextFunctions.java @@ -38,8 +38,8 @@ * implementation should rely on ExprValue. */ @UtilityClass -public class TextFunction { - private static String EMPTY_STRING = ""; +public class TextFunctions { + private static final String EMPTY_STRING = ""; /** * Register String Functions. @@ -76,9 +76,9 @@ public void register(BuiltinFunctionRepository repository) { private DefaultFunctionResolver substringSubstr(FunctionName functionName) { return define( functionName, - impl(nullMissingHandling(TextFunction::exprSubstrStart), STRING, STRING, INTEGER), + impl(nullMissingHandling(TextFunctions::exprSubstrStart), STRING, STRING, INTEGER), impl( - nullMissingHandling(TextFunction::exprSubstrStartLength), + nullMissingHandling(TextFunctions::exprSubstrStartLength), STRING, STRING, INTEGER, @@ -267,7 +267,7 @@ private DefaultFunctionResolver strcmp() { private DefaultFunctionResolver right() { return define( BuiltinFunctionName.RIGHT.getName(), - impl(nullMissingHandling(TextFunction::exprRight), STRING, STRING, INTEGER)); + impl(nullMissingHandling(TextFunctions::exprRight), STRING, STRING, INTEGER)); } /** @@ -279,7 +279,7 @@ private DefaultFunctionResolver right() { private DefaultFunctionResolver left() { return define( BuiltinFunctionName.LEFT.getName(), - impl(nullMissingHandling(TextFunction::exprLeft), STRING, STRING, INTEGER)); + impl(nullMissingHandling(TextFunctions::exprLeft), STRING, STRING, INTEGER)); } /** @@ -292,7 +292,7 @@ private DefaultFunctionResolver left() { private DefaultFunctionResolver ascii() { return define( BuiltinFunctionName.ASCII.getName(), - impl(nullMissingHandling(TextFunction::exprAscii), INTEGER, STRING)); + impl(nullMissingHandling(TextFunctions::exprAscii), INTEGER, STRING)); } /** @@ -310,14 +310,15 @@ private DefaultFunctionResolver locate() { BuiltinFunctionName.LOCATE.getName(), impl( nullMissingHandling( - (SerializableBiFunction) TextFunction::exprLocate), + (SerializableBiFunction) + TextFunctions::exprLocate), INTEGER, STRING, STRING), impl( nullMissingHandling( (SerializableTriFunction) - TextFunction::exprLocate), + TextFunctions::exprLocate), INTEGER, STRING, STRING, @@ -337,7 +338,8 @@ private DefaultFunctionResolver position() { BuiltinFunctionName.POSITION.getName(), impl( nullMissingHandling( - (SerializableBiFunction) TextFunction::exprLocate), + (SerializableBiFunction) + TextFunctions::exprLocate), INTEGER, STRING, STRING)); @@ -353,7 +355,7 @@ private DefaultFunctionResolver position() { private DefaultFunctionResolver replace() { return define( BuiltinFunctionName.REPLACE.getName(), - impl(nullMissingHandling(TextFunction::exprReplace), STRING, STRING, STRING, STRING)); + impl(nullMissingHandling(TextFunctions::exprReplace), STRING, STRING, STRING, STRING)); } /** @@ -365,7 +367,7 @@ private DefaultFunctionResolver replace() { private DefaultFunctionResolver reverse() { return define( BuiltinFunctionName.REVERSE.getName(), - impl(nullMissingHandling(TextFunction::exprReverse), STRING, STRING)); + impl(nullMissingHandling(TextFunctions::exprReverse), STRING, STRING)); } private static ExprValue exprSubstrStart(ExprValue exprValue, ExprValue start) { diff --git a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java index b53d17b38f..c988084d1b 100644 --- a/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java +++ b/core/src/main/java/org/opensearch/sql/planner/DefaultImplementor.java @@ -23,6 +23,7 @@ import org.opensearch.sql.planner.logical.LogicalRemove; import org.opensearch.sql.planner.logical.LogicalRename; import org.opensearch.sql.planner.logical.LogicalSort; +import org.opensearch.sql.planner.logical.LogicalTrendline; import org.opensearch.sql.planner.logical.LogicalValues; import org.opensearch.sql.planner.logical.LogicalWindow; import org.opensearch.sql.planner.physical.AggregationOperator; @@ -38,6 +39,8 @@ import org.opensearch.sql.planner.physical.RemoveOperator; import org.opensearch.sql.planner.physical.RenameOperator; import org.opensearch.sql.planner.physical.SortOperator; +import org.opensearch.sql.planner.physical.TakeOrderedOperator; +import org.opensearch.sql.planner.physical.TrendlineOperator; import org.opensearch.sql.planner.physical.ValuesOperator; import org.opensearch.sql.planner.physical.WindowOperator; import org.opensearch.sql.storage.read.TableScanBuilder; @@ -129,7 +132,13 @@ public PhysicalPlan visitValues(LogicalValues node, C context) { @Override public PhysicalPlan visitLimit(LogicalLimit node, C context) { - return new LimitOperator(visitChild(node, context), node.getLimit(), node.getOffset()); + PhysicalPlan child = visitChild(node, context); + // Optimize sort + limit to take ordered operator + if (child instanceof SortOperator sortChild) { + return new TakeOrderedOperator( + sortChild.getInput(), node.getLimit(), node.getOffset(), sortChild.getSortList()); + } + return new LimitOperator(child, node.getLimit(), node.getOffset()); } @Override @@ -159,6 +168,11 @@ public PhysicalPlan visitCloseCursor(LogicalCloseCursor node, C context) { return new CursorCloseOperator(visitChild(node, context)); } + @Override + public PhysicalPlan visitTrendline(LogicalTrendline plan, C context) { + return new TrendlineOperator(visitChild(plan, context), plan.getComputations()); + } + // Called when paging query requested without `FROM` clause only @Override public PhysicalPlan visitPaginate(LogicalPaginate plan, C context) { diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalNested.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalNested.java index e791a1fad1..089efe707e 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalNested.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalNested.java @@ -19,7 +19,7 @@ @Getter @ToString public class LogicalNested extends LogicalPlan { - private List> fields; + private final List> fields; private final List projectList; /** Constructor of LogicalNested. */ diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java index 2a886ba0ca..13c6d7a979 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanDSL.java @@ -15,6 +15,8 @@ import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.tree.RareTopN.CommandType; import org.opensearch.sql.ast.tree.Sort.SortOption; +import org.opensearch.sql.ast.tree.Trendline; +import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.LiteralExpression; import org.opensearch.sql.expression.NamedExpression; @@ -130,6 +132,11 @@ public static LogicalPlan rareTopN( return new LogicalRareTopN(input, commandType, noOfResults, Arrays.asList(fields), groupByList); } + public static LogicalTrendline trendline( + LogicalPlan input, Pair... computations) { + return new LogicalTrendline(input, Arrays.asList(computations)); + } + @SafeVarargs public LogicalPlan values(List... values) { return new LogicalValues(Arrays.asList(values)); diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java index 156db35306..c9eedd8efc 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitor.java @@ -104,6 +104,10 @@ public R visitAD(LogicalAD plan, C context) { return visitNode(plan, context); } + public R visitTrendline(LogicalTrendline plan, C context) { + return visitNode(plan, context); + } + public R visitPaginate(LogicalPaginate plan, C context) { return visitNode(plan, context); } diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalTrendline.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalTrendline.java new file mode 100644 index 0000000000..3e992035e2 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalTrendline.java @@ -0,0 +1,42 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.logical; + +import java.util.Collections; +import java.util.List; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.apache.commons.lang3.tuple.Pair; +import org.opensearch.sql.ast.tree.Trendline; +import org.opensearch.sql.data.type.ExprCoreType; + +/* + * Trendline logical plan. + */ +@Getter +@ToString +@EqualsAndHashCode(callSuper = true) +public class LogicalTrendline extends LogicalPlan { + private final List> computations; + + /** + * Constructor of LogicalTrendline. + * + * @param child child logical plan + * @param computations the computations for this trendline call. + */ + public LogicalTrendline( + LogicalPlan child, List> computations) { + super(Collections.singletonList(child)); + this.computations = computations; + } + + @Override + public R accept(LogicalPlanNodeVisitor visitor, C context) { + return visitor.visitTrendline(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizer.java b/core/src/main/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizer.java index 5c115f0db8..e805b0dea5 100644 --- a/core/src/main/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizer.java +++ b/core/src/main/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizer.java @@ -12,6 +12,7 @@ import java.util.List; import java.util.stream.Collectors; import org.opensearch.sql.planner.logical.LogicalPlan; +import org.opensearch.sql.planner.optimizer.rule.EvalPushDown; import org.opensearch.sql.planner.optimizer.rule.MergeFilterAndFilter; import org.opensearch.sql.planner.optimizer.rule.PushFilterUnderSort; import org.opensearch.sql.planner.optimizer.rule.read.CreateTableScanBuilder; @@ -46,6 +47,7 @@ public static LogicalPlanOptimizer create() { */ new MergeFilterAndFilter(), new PushFilterUnderSort(), + EvalPushDown.PUSH_DOWN_LIMIT, /* * Phase 2: Transformations that rely on data source push down capability */ diff --git a/core/src/main/java/org/opensearch/sql/planner/optimizer/pattern/Patterns.java b/core/src/main/java/org/opensearch/sql/planner/optimizer/pattern/Patterns.java index ee4e9a20cc..ef2607e018 100644 --- a/core/src/main/java/org/opensearch/sql/planner/optimizer/pattern/Patterns.java +++ b/core/src/main/java/org/opensearch/sql/planner/optimizer/pattern/Patterns.java @@ -12,6 +12,7 @@ import java.util.Optional; import lombok.experimental.UtilityClass; import org.opensearch.sql.planner.logical.LogicalAggregation; +import org.opensearch.sql.planner.logical.LogicalEval; import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalHighlight; import org.opensearch.sql.planner.logical.LogicalLimit; @@ -63,6 +64,10 @@ public static Pattern project(Pattern return Pattern.typeOf(LogicalProject.class).with(source(pattern)); } + public static Pattern evalCapture() { + return Pattern.typeOf(LogicalEval.class).capturedAs(Capture.newCapture()); + } + /** Pattern for {@link TableScanBuilder} and capture it meanwhile. */ public static Pattern scanBuilder() { return Pattern.typeOf(TableScanBuilder.class).capturedAs(Capture.newCapture()); diff --git a/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/EvalPushDown.java b/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/EvalPushDown.java new file mode 100644 index 0000000000..17eaed0e8c --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/optimizer/rule/EvalPushDown.java @@ -0,0 +1,82 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.optimizer.rule; + +import static org.opensearch.sql.planner.optimizer.pattern.Patterns.evalCapture; +import static org.opensearch.sql.planner.optimizer.pattern.Patterns.limit; +import static org.opensearch.sql.planner.optimizer.rule.EvalPushDown.EvalPushDownBuilder.match; + +import com.facebook.presto.matching.Capture; +import com.facebook.presto.matching.Captures; +import com.facebook.presto.matching.Pattern; +import com.facebook.presto.matching.pattern.CapturePattern; +import com.facebook.presto.matching.pattern.WithPattern; +import java.util.List; +import java.util.function.BiFunction; +import lombok.Getter; +import lombok.experimental.Accessors; +import org.opensearch.sql.planner.logical.LogicalEval; +import org.opensearch.sql.planner.logical.LogicalLimit; +import org.opensearch.sql.planner.logical.LogicalPlan; +import org.opensearch.sql.planner.optimizer.Rule; + +/** + * Rule template for all rules related to push down logical plans under eval, so these plans can + * avoid blocking by eval and may have chances to be pushed down into table scan by rules in {@link + * org.opensearch.sql.planner.optimizer.rule.read.TableScanPushDown}. + */ +public class EvalPushDown implements Rule { + + // TODO: Add more rules to push down sort and project + /** Push down optimize rule for limit operator. Transform `limit -> eval` to `eval -> limit` */ + public static final Rule PUSH_DOWN_LIMIT = + match(limit(evalCapture())) + .apply( + (limit, logicalEval) -> { + List child = logicalEval.getChild(); + limit.replaceChildPlans(child); + logicalEval.replaceChildPlans(List.of(limit)); + return logicalEval; + }); + + private final Capture capture; + + @Accessors(fluent = true) + @Getter + private final Pattern pattern; + + private final BiFunction pushDownFunction; + + @SuppressWarnings("unchecked") + public EvalPushDown( + WithPattern pattern, BiFunction pushDownFunction) { + this.pattern = pattern; + this.capture = ((CapturePattern) pattern.getPattern()).capture(); + this.pushDownFunction = pushDownFunction; + } + + @Override + public LogicalPlan apply(T plan, Captures captures) { + LogicalEval logicalEval = captures.get(capture); + return pushDownFunction.apply(plan, logicalEval); + } + + static class EvalPushDownBuilder { + + private WithPattern pattern; + + public static EvalPushDown.EvalPushDownBuilder match( + Pattern pattern) { + EvalPushDown.EvalPushDownBuilder builder = new EvalPushDown.EvalPushDownBuilder<>(); + builder.pattern = (WithPattern) pattern; + return builder; + } + + public EvalPushDown apply(BiFunction pushDownFunction) { + return new EvalPushDown<>(pattern, pushDownFunction); + } + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/FilterOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/FilterOperator.java index ec61d53163..088dd07f8d 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/FilterOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/FilterOperator.java @@ -13,13 +13,13 @@ import lombok.ToString; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.expression.Expression; -import org.opensearch.sql.expression.operator.predicate.BinaryPredicateOperator; +import org.opensearch.sql.expression.operator.predicate.BinaryPredicateOperators; import org.opensearch.sql.storage.bindingtuple.BindingTuple; /** * The Filter operator represents WHERE clause and uses the conditions to evaluate the input {@link * BindingTuple}. The Filter operator only returns the results that evaluated to true. The NULL and - * MISSING are handled by the logic defined in {@link BinaryPredicateOperator}. + * MISSING are handled by the logic defined in {@link BinaryPredicateOperators}. */ @EqualsAndHashCode(callSuper = false) @ToString @@ -28,6 +28,7 @@ public class FilterOperator extends PhysicalPlan { @Getter private final PhysicalPlan input; @Getter private final Expression conditions; @ToString.Exclude private ExprValue next = null; + @ToString.Exclude private boolean nextPrepared = false; @Override public R accept(PhysicalPlanNodeVisitor visitor, C context) { @@ -41,19 +42,34 @@ public List getChild() { @Override public boolean hasNext() { + if (!nextPrepared) { + prepareNext(); + } + return next != null; + } + + @Override + public ExprValue next() { + if (!nextPrepared) { + prepareNext(); + } + ExprValue result = next; + next = null; + nextPrepared = false; + return result; + } + + private void prepareNext() { while (input.hasNext()) { ExprValue inputValue = input.next(); ExprValue exprValue = conditions.valueOf(inputValue.bindingTuples()); - if (!(exprValue.isNull() || exprValue.isMissing()) && (exprValue.booleanValue())) { + if (!(exprValue.isNull() || exprValue.isMissing()) && exprValue.booleanValue()) { next = inputValue; - return true; + nextPrepared = true; + return; } } - return false; - } - - @Override - public ExprValue next() { - return next; + next = null; + nextPrepared = true; } } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/NestedOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/NestedOperator.java index 8539df5463..fb5ec276ac 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/NestedOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/NestedOperator.java @@ -37,7 +37,7 @@ public class NestedOperator extends PhysicalPlan { @Getter private final Set fields; // Needs to be a Set to match legacy implementation @Getter private final Map> groupedPathsAndFields; @EqualsAndHashCode.Exclude private List> result = new ArrayList<>(); - @EqualsAndHashCode.Exclude private List nonNestedFields = new ArrayList<>(); + @EqualsAndHashCode.Exclude private final List nonNestedFields = new ArrayList<>(); @EqualsAndHashCode.Exclude private ListIterator> flattenedResult = result.listIterator(); diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java index 147f0e08dc..0c2764112d 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanDSL.java @@ -64,6 +64,11 @@ public static SortOperator sort(PhysicalPlan input, Pair return new SortOperator(input, Arrays.asList(sorts)); } + public static TakeOrderedOperator takeOrdered( + PhysicalPlan input, Integer limit, Integer offset, Pair... sorts) { + return new TakeOrderedOperator(input, limit, offset, Arrays.asList(sorts)); + } + public static DedupeOperator dedupe(PhysicalPlan input, Expression... expressions) { return new DedupeOperator(input, Arrays.asList(expressions)); } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java index 99b5cc8020..66c7219e39 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitor.java @@ -72,6 +72,10 @@ public R visitSort(SortOperator node, C context) { return visitNode(node, context); } + public R visitTakeOrdered(TakeOrderedOperator node, C context) { + return visitNode(node, context); + } + public R visitRareTopN(RareTopNOperator node, C context) { return visitNode(node, context); } @@ -92,6 +96,10 @@ public R visitML(PhysicalPlan node, C context) { return visitNode(node, context); } + public R visitTrendline(TrendlineOperator node, C context) { + return visitNode(node, context); + } + public R visitCursorClose(CursorCloseOperator node, C context) { return visitNode(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/SortHelper.java b/core/src/main/java/org/opensearch/sql/planner/physical/SortHelper.java new file mode 100644 index 0000000000..ea117ee6df --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/physical/SortHelper.java @@ -0,0 +1,70 @@ +package org.opensearch.sql.planner.physical; + +import static org.opensearch.sql.ast.tree.Sort.NullOrder.NULL_FIRST; +import static org.opensearch.sql.ast.tree.Sort.SortOrder.ASC; + +import com.google.common.collect.Ordering; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import org.apache.commons.lang3.tuple.Pair; +import org.opensearch.sql.ast.tree.Sort.SortOption; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.utils.ExprValueOrdering; +import org.opensearch.sql.expression.Expression; + +public interface SortHelper { + + /** + * Construct an expr comparator for sorting on ExprValue. + * + * @param sortList list of sort fields and their related sort options. + * @return A comparator for ExprValue + */ + static Comparator constructExprComparator( + List> sortList) { + return (o1, o2) -> compareWithExpressions(o1, o2, constructComparator(sortList)); + } + + /** + * Construct an expr ordering for efficiently taking the top-k elements on ExprValue. + * + * @param sortList list of sort fields and their related sort options. + * @return An guava ordering for ExprValue + */ + static Ordering constructExprOrdering(List> sortList) { + return Ordering.from(constructExprComparator(sortList)); + } + + private static List>> constructComparator( + List> sortList) { + List>> comparators = new ArrayList<>(); + for (Pair pair : sortList) { + SortOption option = pair.getLeft(); + ExprValueOrdering ordering = + ASC.equals(option.getSortOrder()) + ? ExprValueOrdering.natural() + : ExprValueOrdering.natural().reverse(); + ordering = + NULL_FIRST.equals(option.getNullOrder()) ? ordering.nullsFirst() : ordering.nullsLast(); + comparators.add(Pair.of(pair.getRight(), ordering)); + } + return comparators; + } + + private static int compareWithExpressions( + ExprValue o1, ExprValue o2, List>> comparators) { + for (Pair> comparator : comparators) { + Expression expression = comparator.getKey(); + int result = + comparator + .getValue() + .compare( + expression.valueOf(o1.bindingTuples()), expression.valueOf(o2.bindingTuples())); + if (result != 0) { + return result; + } + } + return 0; + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/SortOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/SortOperator.java index e3116baedf..b635f01d18 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/SortOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/SortOperator.java @@ -5,25 +5,18 @@ package org.opensearch.sql.planner.physical; -import static org.opensearch.sql.ast.tree.Sort.NullOrder.NULL_FIRST; -import static org.opensearch.sql.ast.tree.Sort.SortOrder.ASC; - import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.PriorityQueue; -import lombok.Builder; import lombok.EqualsAndHashCode; import lombok.Getter; -import lombok.Singular; import lombok.ToString; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.ast.tree.Sort.SortOption; import org.opensearch.sql.data.model.ExprValue; -import org.opensearch.sql.data.utils.ExprValueOrdering; import org.opensearch.sql.expression.Expression; -import org.opensearch.sql.planner.physical.SortOperator.Sorter.SorterBuilder; /** * Sort Operator.The input data is sorted by the sort fields in the {@link SortOperator#sortList}. @@ -36,7 +29,7 @@ public class SortOperator extends PhysicalPlan { @Getter private final PhysicalPlan input; @Getter private final List> sortList; - @EqualsAndHashCode.Exclude private final Sorter sorter; + @EqualsAndHashCode.Exclude private final Comparator sorter; @EqualsAndHashCode.Exclude private Iterator iterator; /** @@ -49,18 +42,7 @@ public class SortOperator extends PhysicalPlan { public SortOperator(PhysicalPlan input, List> sortList) { this.input = input; this.sortList = sortList; - SorterBuilder sorterBuilder = Sorter.builder(); - for (Pair pair : sortList) { - SortOption option = pair.getLeft(); - ExprValueOrdering ordering = - ASC.equals(option.getSortOrder()) - ? ExprValueOrdering.natural() - : ExprValueOrdering.natural().reverse(); - ordering = - NULL_FIRST.equals(option.getNullOrder()) ? ordering.nullsFirst() : ordering.nullsLast(); - sorterBuilder.comparator(Pair.of(pair.getRight(), ordering)); - } - this.sorter = sorterBuilder.build(); + this.sorter = SortHelper.constructExprComparator(sortList); } @Override @@ -94,27 +76,6 @@ public ExprValue next() { return iterator.next(); } - @Builder - public static class Sorter implements Comparator { - @Singular private final List>> comparators; - - @Override - public int compare(ExprValue o1, ExprValue o2) { - for (Pair> comparator : comparators) { - Expression expression = comparator.getKey(); - int result = - comparator - .getValue() - .compare( - expression.valueOf(o1.bindingTuples()), expression.valueOf(o2.bindingTuples())); - if (result != 0) { - return result; - } - } - return 0; - } - } - private Iterator iterator(PriorityQueue result) { return new Iterator() { @Override diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/TakeOrderedOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/TakeOrderedOperator.java new file mode 100644 index 0000000000..a6e0f968e6 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/physical/TakeOrderedOperator.java @@ -0,0 +1,88 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.physical; + +import com.google.common.collect.Ordering; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.apache.commons.lang3.tuple.Pair; +import org.opensearch.sql.ast.tree.Sort.SortOption; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.expression.Expression; + +/** + * TakeOrdered Operator. This operator will sort input data as the order of {@link this#sortList} + * specifies and return {@link this#limit} rows from the {@link this#offset} index. + * + *

Functionally, this operator is a combination of {@link SortOperator} and {@link + * LimitOperator}. But it can reduce the time complexity from O(nlogn) to O(n), and memory from O(n) + * to O(k) due to use guava {@link com.google.common.collect.Ordering}. + * + *

Overall, it's an optimization to replace `Limit(Sort)` in physical plan level since it's all + * about execution. Because most execution engine may not support this operator, it doesn't have a + * related logical operator. + */ +@ToString +@EqualsAndHashCode(callSuper = false) +public class TakeOrderedOperator extends PhysicalPlan { + @Getter private final PhysicalPlan input; + + @Getter private final List> sortList; + @Getter private final Integer limit; + @Getter private final Integer offset; + @EqualsAndHashCode.Exclude private final Ordering ordering; + @EqualsAndHashCode.Exclude private Iterator iterator; + + /** + * TakeOrdered Operator Constructor. + * + * @param input input {@link PhysicalPlan} + * @param limit the limit value from LimitOperator + * @param offset the offset value from LimitOperator + * @param sortList list of sort field from SortOperator + */ + public TakeOrderedOperator( + PhysicalPlan input, + Integer limit, + Integer offset, + List> sortList) { + this.input = input; + this.sortList = sortList; + this.limit = limit; + this.offset = offset; + this.ordering = SortHelper.constructExprOrdering(sortList); + } + + @Override + public R accept(PhysicalPlanNodeVisitor visitor, C context) { + return visitor.visitTakeOrdered(this, context); + } + + @Override + public void open() { + super.open(); + iterator = ordering.leastOf(input, offset + limit).stream().skip(offset).iterator(); + } + + @Override + public List getChild() { + return Collections.singletonList(input); + } + + @Override + public boolean hasNext() { + return iterator.hasNext(); + } + + @Override + public ExprValue next() { + return iterator.next(); + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/TrendlineOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/TrendlineOperator.java new file mode 100644 index 0000000000..7bf10964cf --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/physical/TrendlineOperator.java @@ -0,0 +1,317 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.physical; + +import static java.time.temporal.ChronoUnit.MILLIS; + +import com.google.common.collect.EvictingQueue; +import com.google.common.collect.ImmutableMap.Builder; +import java.time.Instant; +import java.time.LocalTime; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.apache.commons.lang3.tuple.Pair; +import org.opensearch.sql.ast.tree.Trendline; +import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.LiteralExpression; + +/** Trendline command implementation */ +@ToString +@EqualsAndHashCode(callSuper = false) +public class TrendlineOperator extends PhysicalPlan { + @Getter private final PhysicalPlan input; + @Getter private final List> computations; + @EqualsAndHashCode.Exclude private final List accumulators; + @EqualsAndHashCode.Exclude private final Map fieldToIndexMap; + @EqualsAndHashCode.Exclude private final HashSet aliases; + + public TrendlineOperator( + PhysicalPlan input, List> computations) { + this.input = input; + this.computations = computations; + this.accumulators = computations.stream().map(TrendlineOperator::createAccumulator).toList(); + fieldToIndexMap = new HashMap<>(computations.size()); + aliases = new HashSet<>(computations.size()); + for (int i = 0; i < computations.size(); ++i) { + final Trendline.TrendlineComputation computation = computations.get(i).getKey(); + fieldToIndexMap.put(computation.getDataField().getChild().get(0).toString(), i); + aliases.add(computation.getAlias()); + } + } + + @Override + public R accept(PhysicalPlanNodeVisitor visitor, C context) { + return visitor.visitTrendline(this, context); + } + + @Override + public List getChild() { + return Collections.singletonList(input); + } + + @Override + public boolean hasNext() { + return getChild().getFirst().hasNext(); + } + + @Override + public ExprValue next() { + final ExprValue result; + final ExprValue next = input.next(); + final Map inputStruct = consumeInputTuple(next); + final Builder mapBuilder = new Builder<>(); + mapBuilder.putAll(inputStruct); + + // Add calculated trendline values, which might overwrite existing fields from the input. + for (int i = 0; i < accumulators.size(); ++i) { + final ExprValue calculateResult = accumulators.get(i).calculate(); + final String field = computations.get(i).getKey().getAlias(); + if (calculateResult != null) { + mapBuilder.put(field, calculateResult); + } + } + + result = ExprTupleValue.fromExprValueMap(mapBuilder.buildKeepingLast()); + return result; + } + + private Map consumeInputTuple(ExprValue inputValue) { + final Map tupleValue = ExprValueUtils.getTupleValue(inputValue); + for (String bindName : tupleValue.keySet()) { + final Integer index = fieldToIndexMap.get(bindName); + if (index != null) { + final ExprValue fieldValue = tupleValue.get(bindName); + if (!fieldValue.isNull()) { + accumulators.get(index).accumulate(fieldValue); + } + } + } + tupleValue.keySet().removeAll(aliases); + return tupleValue; + } + + private static TrendlineAccumulator createAccumulator( + Pair computation) { + // Add a switch statement based on computation type to choose the accumulator when more + // types of computations are supported. + return new SimpleMovingAverageAccumulator(computation.getKey(), computation.getValue()); + } + + /** Maintains stateful information for calculating the trendline. */ + private interface TrendlineAccumulator { + void accumulate(ExprValue value); + + ExprValue calculate(); + + static ArithmeticEvaluator getEvaluator(ExprCoreType type) { + switch (type) { + case DOUBLE: + return NumericArithmeticEvaluator.INSTANCE; + case DATE: + return DateArithmeticEvaluator.INSTANCE; + case TIME: + return TimeArithmeticEvaluator.INSTANCE; + case TIMESTAMP: + return TimestampArithmeticEvaluator.INSTANCE; + } + throw new IllegalArgumentException( + String.format("Invalid type %s used for moving average.", type.typeName())); + } + } + + private static class SimpleMovingAverageAccumulator implements TrendlineAccumulator { + private final LiteralExpression dataPointsNeeded; + private final EvictingQueue receivedValues; + private final ArithmeticEvaluator evaluator; + private Expression runningTotal = null; + + public SimpleMovingAverageAccumulator( + Trendline.TrendlineComputation computation, ExprCoreType type) { + dataPointsNeeded = DSL.literal(computation.getNumberOfDataPoints().doubleValue()); + receivedValues = EvictingQueue.create(computation.getNumberOfDataPoints()); + evaluator = TrendlineAccumulator.getEvaluator(type); + } + + @Override + public void accumulate(ExprValue value) { + if (dataPointsNeeded.valueOf().integerValue() == 1) { + runningTotal = evaluator.calculateFirstTotal(Collections.singletonList(value)); + receivedValues.add(value); + return; + } + + final ExprValue valueToRemove; + if (receivedValues.size() == dataPointsNeeded.valueOf().integerValue()) { + valueToRemove = receivedValues.remove(); + } else { + valueToRemove = null; + } + receivedValues.add(value); + + if (receivedValues.size() == dataPointsNeeded.valueOf().integerValue()) { + if (runningTotal != null) { + // We can use the previous calculation. + // Subtract the evicted value and add the new value. + // Refactored, that would be previous + (newValue - oldValue). + runningTotal = evaluator.add(runningTotal, value, valueToRemove); + } else { + // This is the first average calculation so sum the entire receivedValues dataset. + final List data = receivedValues.stream().toList(); + runningTotal = evaluator.calculateFirstTotal(data); + } + } + } + + @Override + public ExprValue calculate() { + if (receivedValues.size() < dataPointsNeeded.valueOf().integerValue()) { + return null; + } else if (dataPointsNeeded.valueOf().integerValue() == 1) { + return receivedValues.peek(); + } + return evaluator.evaluate(runningTotal, dataPointsNeeded); + } + } + + private interface ArithmeticEvaluator { + Expression calculateFirstTotal(List dataPoints); + + Expression add(Expression runningTotal, ExprValue incomingValue, ExprValue evictedValue); + + ExprValue evaluate(Expression runningTotal, LiteralExpression numberOfDataPoints); + } + + private static class NumericArithmeticEvaluator implements ArithmeticEvaluator { + private static final NumericArithmeticEvaluator INSTANCE = new NumericArithmeticEvaluator(); + + private NumericArithmeticEvaluator() {} + + @Override + public Expression calculateFirstTotal(List dataPoints) { + Expression total = DSL.literal(0.0D); + for (ExprValue dataPoint : dataPoints) { + total = DSL.add(total, DSL.literal(dataPoint.doubleValue())); + } + return DSL.literal(total.valueOf().doubleValue()); + } + + @Override + public Expression add( + Expression runningTotal, ExprValue incomingValue, ExprValue evictedValue) { + return DSL.literal( + DSL.add(runningTotal, DSL.subtract(DSL.literal(incomingValue), DSL.literal(evictedValue))) + .valueOf() + .doubleValue()); + } + + @Override + public ExprValue evaluate(Expression runningTotal, LiteralExpression numberOfDataPoints) { + return DSL.divide(runningTotal, numberOfDataPoints).valueOf(); + } + } + + private static class DateArithmeticEvaluator implements ArithmeticEvaluator { + private static final DateArithmeticEvaluator INSTANCE = new DateArithmeticEvaluator(); + + private DateArithmeticEvaluator() {} + + @Override + public Expression calculateFirstTotal(List dataPoints) { + return TimestampArithmeticEvaluator.INSTANCE.calculateFirstTotal(dataPoints); + } + + @Override + public Expression add( + Expression runningTotal, ExprValue incomingValue, ExprValue evictedValue) { + return TimestampArithmeticEvaluator.INSTANCE.add(runningTotal, incomingValue, evictedValue); + } + + @Override + public ExprValue evaluate(Expression runningTotal, LiteralExpression numberOfDataPoints) { + final ExprValue timestampResult = + TimestampArithmeticEvaluator.INSTANCE.evaluate(runningTotal, numberOfDataPoints); + return ExprValueUtils.dateValue(timestampResult.dateValue()); + } + } + + private static class TimeArithmeticEvaluator implements ArithmeticEvaluator { + private static final TimeArithmeticEvaluator INSTANCE = new TimeArithmeticEvaluator(); + + private TimeArithmeticEvaluator() {} + + @Override + public Expression calculateFirstTotal(List dataPoints) { + Expression total = DSL.literal(0); + for (ExprValue dataPoint : dataPoints) { + total = DSL.add(total, DSL.literal(MILLIS.between(LocalTime.MIN, dataPoint.timeValue()))); + } + return DSL.literal(total.valueOf().longValue()); + } + + @Override + public Expression add( + Expression runningTotal, ExprValue incomingValue, ExprValue evictedValue) { + return DSL.literal( + DSL.add( + runningTotal, + DSL.subtract( + DSL.literal(MILLIS.between(LocalTime.MIN, incomingValue.timeValue())), + DSL.literal(MILLIS.between(LocalTime.MIN, evictedValue.timeValue())))) + .valueOf()); + } + + @Override + public ExprValue evaluate(Expression runningTotal, LiteralExpression numberOfDataPoints) { + return ExprValueUtils.timeValue( + LocalTime.MIN.plus( + DSL.divide(runningTotal, numberOfDataPoints).valueOf().longValue(), MILLIS)); + } + } + + private static class TimestampArithmeticEvaluator implements ArithmeticEvaluator { + private static final TimestampArithmeticEvaluator INSTANCE = new TimestampArithmeticEvaluator(); + + private TimestampArithmeticEvaluator() {} + + @Override + public Expression calculateFirstTotal(List dataPoints) { + Expression total = DSL.literal(0); + for (ExprValue dataPoint : dataPoints) { + total = DSL.add(total, DSL.literal(dataPoint.timestampValue().toEpochMilli())); + } + return DSL.literal(total.valueOf().longValue()); + } + + @Override + public Expression add( + Expression runningTotal, ExprValue incomingValue, ExprValue evictedValue) { + return DSL.literal( + DSL.add( + runningTotal, + DSL.subtract( + DSL.literal(incomingValue.timestampValue().toEpochMilli()), + DSL.literal(evictedValue.timestampValue().toEpochMilli()))) + .valueOf()); + } + + @Override + public ExprValue evaluate(Expression runningTotal, LiteralExpression numberOfDataPoints) { + return ExprValueUtils.timestampValue( + Instant.ofEpochMilli(DSL.divide(runningTotal, numberOfDataPoints).valueOf().longValue())); + } + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/collector/Rounding.java b/core/src/main/java/org/opensearch/sql/planner/physical/collector/Rounding.java index 82c8af52cd..7645213c67 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/collector/Rounding.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/collector/Rounding.java @@ -46,13 +46,13 @@ public static Rounding createRounding(SpanExpression span) { if (DOUBLE.isCompatible(type)) { return new DoubleRounding(interval); } - if (type.equals(TIMESTAMP)) { + if (type.equals(TIMESTAMP) || type.typeName().equalsIgnoreCase(TIMESTAMP.typeName())) { return new TimestampRounding(interval, span.getUnit().getName()); } - if (type.equals(DATE)) { + if (type.equals(DATE) || type.typeName().equalsIgnoreCase(DATE.typeName())) { return new DateRounding(interval, span.getUnit().getName()); } - if (type.equals(TIME)) { + if (type.equals(TIME) || type.typeName().equalsIgnoreCase(TIME.typeName())) { return new TimeRounding(interval, span.getUnit().getName()); } return new UnknownRounding(); diff --git a/core/src/main/java/org/opensearch/sql/storage/bindingtuple/BindingTuple.java b/core/src/main/java/org/opensearch/sql/storage/bindingtuple/BindingTuple.java index 2487c651ad..c5c12584fd 100644 --- a/core/src/main/java/org/opensearch/sql/storage/bindingtuple/BindingTuple.java +++ b/core/src/main/java/org/opensearch/sql/storage/bindingtuple/BindingTuple.java @@ -17,7 +17,7 @@ * output column name is bindingName, the value is the ExprValue. */ public abstract class BindingTuple implements Environment { - public static BindingTuple EMPTY = + public static final BindingTuple EMPTY = new BindingTuple() { @Override public ExprValue resolve(ReferenceExpression ref) { diff --git a/core/src/main/java/org/opensearch/sql/utils/ExpressionUtils.java b/core/src/main/java/org/opensearch/sql/utils/ExpressionUtils.java index f04bf3748f..8ae0c6ba88 100644 --- a/core/src/main/java/org/opensearch/sql/utils/ExpressionUtils.java +++ b/core/src/main/java/org/opensearch/sql/utils/ExpressionUtils.java @@ -14,7 +14,7 @@ @UtilityClass public class ExpressionUtils { - public static String PATH_SEP = "."; + public static final String PATH_SEP = "."; /** Format the list of {@link Expression}. */ public static String format(List expressionList) { diff --git a/core/src/main/java/org/opensearch/sql/utils/IPUtils.java b/core/src/main/java/org/opensearch/sql/utils/IPUtils.java new file mode 100644 index 0000000000..8874823a03 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/utils/IPUtils.java @@ -0,0 +1,97 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.utils; + +import inet.ipaddr.AddressStringException; +import inet.ipaddr.IPAddress; +import inet.ipaddr.IPAddressString; +import inet.ipaddr.IPAddressStringParameters; +import inet.ipaddr.ipv4.IPv4Address; +import inet.ipaddr.ipv6.IPv6Address; +import lombok.experimental.UtilityClass; +import org.opensearch.sql.exception.SemanticCheckException; + +@UtilityClass +public class IPUtils { + + // Parameters for IP address strings. + private static final IPAddressStringParameters.Builder commonValidationOptions = + new IPAddressStringParameters.Builder() + .allowEmpty(false) + .allowMask(false) + .setEmptyAsLoopback(false) + .allowPrefixOnly(false) + .allow_inet_aton(false) + .allowSingleSegment(false); + + private static final IPAddressStringParameters ipAddressStringParameters = + commonValidationOptions.allowPrefix(false).toParams(); + private static final IPAddressStringParameters ipAddressRangeStringParameters = + commonValidationOptions.allowPrefix(true).toParams(); + + /** + * Builds and returns the {@link IPAddress} represented by the given IP address range string in + * CIDR (classless inter-domain routing) notation. Throws {@link SemanticCheckException} if it + * does not represent a valid IP address range. Supports both IPv4 and IPv6 address ranges. + */ + public static IPAddress toRange(String s) throws SemanticCheckException { + try { + IPAddress range = new IPAddressString(s, ipAddressRangeStringParameters).toAddress(); + + // Convert IPv6 mapped address range to IPv4. + if (range.isIPv4Convertible()) { + final int prefixLength = range.getPrefixLength(); + range = range.toIPv4().setPrefixLength(prefixLength, false); + } + + return range; + + } catch (AddressStringException e) { + final String errorFormat = "IP address range string '%s' is not valid. Error details: %s"; + throw new SemanticCheckException(String.format(errorFormat, s, e.getMessage()), e); + } + } + + /** + * Builds and returns the {@link IPAddress} represented to the given IP address string. Throws + * {@link SemanticCheckException} if it does not represent a valid IP address. Supports both IPv4 + * and IPv6 addresses. + */ + public static IPAddress toAddress(String s) throws SemanticCheckException { + try { + IPAddress address = new IPAddressString(s, ipAddressStringParameters).toAddress(); + + // Convert IPv6 mapped address to IPv4. + if (address.isIPv4Convertible()) { + address = address.toIPv4(); + } + + return address; + } catch (AddressStringException e) { + final String errorFormat = "IP address string '%s' is not valid. Error details: %s"; + throw new SemanticCheckException(String.format(errorFormat, s, e.getMessage()), e); + } + } + + /** + * Compares the given {@link IPAddress} objects for order. Returns a negative integer, zero, or a + * positive integer if the first {@link IPAddress} object is less than, equal to, or greater than + * the second one. IPv4 addresses are mapped to IPv6 for comparison. + */ + public static int compare(IPAddress a, IPAddress b) { + final IPv6Address ipv6A = toIPv6Address(a); + final IPv6Address ipv6B = toIPv6Address(b); + + return ipv6A.compareTo(ipv6B); + } + + /** Returns the {@link IPv6Address} corresponding to the given {@link IPAddress}. */ + private static IPv6Address toIPv6Address(IPAddress ipAddress) { + return ipAddress instanceof IPv4Address iPv4Address + ? iPv4Address.toIPv6() + : (IPv6Address) ipAddress; + } +} diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java index 8d935b11d2..3f4752aa2e 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java @@ -18,6 +18,7 @@ import static org.opensearch.sql.ast.dsl.AstDSL.argument; import static org.opensearch.sql.ast.dsl.AstDSL.booleanLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.compare; +import static org.opensearch.sql.ast.dsl.AstDSL.computation; import static org.opensearch.sql.ast.dsl.AstDSL.field; import static org.opensearch.sql.ast.dsl.AstDSL.filter; import static org.opensearch.sql.ast.dsl.AstDSL.filteredAggregate; @@ -33,6 +34,7 @@ import static org.opensearch.sql.ast.tree.Sort.SortOption; import static org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC; import static org.opensearch.sql.ast.tree.Sort.SortOrder; +import static org.opensearch.sql.ast.tree.Trendline.TrendlineType.SMA; import static org.opensearch.sql.data.model.ExprValueUtils.integerValue; import static org.opensearch.sql.data.model.ExprValueUtils.stringValue; import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; @@ -66,6 +68,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.Disabled; @@ -73,6 +76,7 @@ import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ast.expression.DataType; +import org.opensearch.sql.ast.expression.Field; import org.opensearch.sql.ast.expression.HighlightFunction; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.ParseMethod; @@ -81,12 +85,14 @@ import org.opensearch.sql.ast.tree.AD; import org.opensearch.sql.ast.tree.CloseCursor; import org.opensearch.sql.ast.tree.FetchCursor; +import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.RareTopN.CommandType; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; @@ -157,8 +163,8 @@ public void filter_relation_with_invalid_qualifiedName_ExpressionEvaluationExcep assertEquals( "= function expected {[BYTE,BYTE],[SHORT,SHORT],[INTEGER,INTEGER],[LONG,LONG]," + "[FLOAT,FLOAT],[DOUBLE,DOUBLE],[STRING,STRING],[BOOLEAN,BOOLEAN],[DATE,DATE]," - + "[TIME,TIME],[TIMESTAMP,TIMESTAMP],[INTERVAL,INTERVAL]," - + "[STRUCT,STRUCT],[ARRAY,ARRAY]}, but get [STRING,INTEGER]", + + "[TIME,TIME],[TIMESTAMP,TIMESTAMP],[INTERVAL,INTERVAL],[IP,IP]," + + "[STRUCT,STRUCT],[ARRAY,ARRAY]}, but got [STRING,INTEGER]", exception.getMessage()); } @@ -1437,6 +1443,104 @@ public void kmeanns_relation() { new Kmeans(AstDSL.relation("schema"), argumentMap)); } + @Test + public void fillnull_same_value() { + assertAnalyzeEqual( + LogicalPlanDSL.eval( + LogicalPlanDSL.relation("schema", table), + ImmutablePair.of( + DSL.ref("integer_value", INTEGER), + DSL.ifnull(DSL.ref("integer_value", INTEGER), DSL.literal(0))), + ImmutablePair.of( + DSL.ref("int_null_value", INTEGER), + DSL.ifnull(DSL.ref("int_null_value", INTEGER), DSL.literal(0)))), + new FillNull( + AstDSL.relation("schema"), + FillNull.ContainNullableFieldFill.ofSameValue( + AstDSL.intLiteral(0), + ImmutableList.builder() + .add(AstDSL.field("integer_value")) + .add(AstDSL.field("int_null_value")) + .build()))); + } + + @Test + public void fillnull_various_values() { + assertAnalyzeEqual( + LogicalPlanDSL.eval( + LogicalPlanDSL.relation("schema", table), + ImmutablePair.of( + DSL.ref("integer_value", INTEGER), + DSL.ifnull(DSL.ref("integer_value", INTEGER), DSL.literal(0))), + ImmutablePair.of( + DSL.ref("int_null_value", INTEGER), + DSL.ifnull(DSL.ref("int_null_value", INTEGER), DSL.literal(1)))), + new FillNull( + AstDSL.relation("schema"), + FillNull.ContainNullableFieldFill.ofVariousValue( + ImmutableList.of( + new FillNull.NullableFieldFill( + AstDSL.field("integer_value"), AstDSL.intLiteral(0)), + new FillNull.NullableFieldFill( + AstDSL.field("int_null_value"), AstDSL.intLiteral(1)))))); + } + + @Test + public void trendline() { + assertAnalyzeEqual( + LogicalPlanDSL.trendline( + LogicalPlanDSL.relation("schema", table), + Pair.of(computation(5, field("float_value"), "test_field_alias", SMA), DOUBLE), + Pair.of(computation(1, field("double_value"), "test_field_alias_2", SMA), DOUBLE)), + AstDSL.trendline( + AstDSL.relation("schema"), + Optional.empty(), + computation(5, field("float_value"), "test_field_alias", SMA), + computation(1, field("double_value"), "test_field_alias_2", SMA))); + } + + @Test + public void trendline_datetime_types() { + assertAnalyzeEqual( + LogicalPlanDSL.trendline( + LogicalPlanDSL.relation("schema", table), + Pair.of(computation(5, field("timestamp_value"), "test_field_alias", SMA), TIMESTAMP)), + AstDSL.trendline( + AstDSL.relation("schema"), + Optional.empty(), + computation(5, field("timestamp_value"), "test_field_alias", SMA))); + } + + @Test + public void trendline_illegal_type() { + assertThrows( + SemanticCheckException.class, + () -> + analyze( + AstDSL.trendline( + AstDSL.relation("schema"), + Optional.empty(), + computation(5, field("array_value"), "test_field_alias", SMA)))); + } + + @Test + public void trendline_with_sort() { + assertAnalyzeEqual( + LogicalPlanDSL.trendline( + LogicalPlanDSL.sort( + LogicalPlanDSL.relation("schema", table), + Pair.of( + new SortOption(SortOrder.ASC, NullOrder.NULL_FIRST), + DSL.ref("float_value", ExprCoreType.FLOAT))), + Pair.of(computation(5, field("float_value"), "test_field_alias", SMA), DOUBLE), + Pair.of(computation(1, field("double_value"), "test_field_alias_2", SMA), DOUBLE)), + AstDSL.trendline( + AstDSL.relation("schema"), + Optional.of(field("float_value", argument("asc", booleanLiteral(true)))), + computation(5, field("float_value"), "test_field_alias", SMA), + computation(1, field("double_value"), "test_field_alias_2", SMA))); + } + @Test public void ad_batchRCF_relation() { Map argumentMap = diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTestBase.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTestBase.java index b35cfbb5e1..17f86cadba 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTestBase.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTestBase.java @@ -28,6 +28,7 @@ import org.opensearch.sql.config.TestConfig; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.datasource.DataSourceService; +import org.opensearch.sql.datasource.RequestContext; import org.opensearch.sql.datasource.model.DataSource; import org.opensearch.sql.datasource.model.DataSourceMetadata; import org.opensearch.sql.datasource.model.DataSourceType; @@ -147,15 +148,15 @@ protected Environment typeEnv() { }; } - protected AnalysisContext analysisContext = analysisContext(typeEnvironment(symbolTable())); + protected final AnalysisContext analysisContext = analysisContext(typeEnvironment(symbolTable())); - protected ExpressionAnalyzer expressionAnalyzer = expressionAnalyzer(); + protected final ExpressionAnalyzer expressionAnalyzer = expressionAnalyzer(); - protected Table table = table(); + protected final Table table = table(); - protected DataSourceService dataSourceService = dataSourceService(); + protected final DataSourceService dataSourceService = dataSourceService(); - protected Analyzer analyzer = analyzer(expressionAnalyzer(), dataSourceService); + protected final Analyzer analyzer = analyzer(expressionAnalyzer(), dataSourceService); protected Analyzer analyzer( ExpressionAnalyzer expressionAnalyzer, DataSourceService dataSourceService) { @@ -236,18 +237,19 @@ public Boolean dataSourceExists(String dataSourceName) { } @Override - public DataSourceMetadata verifyDataSourceAccessAndGetRawMetadata(String dataSourceName) { + public DataSourceMetadata verifyDataSourceAccessAndGetRawMetadata( + String dataSourceName, RequestContext requestContext) { return null; } } private class TestTableFunctionImplementation implements TableFunctionImplementation { - private FunctionName functionName; + private final FunctionName functionName; - private List arguments; + private final List arguments; - private Table table; + private final Table table; public TestTableFunctionImplementation( FunctionName functionName, List arguments, Table table) { diff --git a/core/src/test/java/org/opensearch/sql/analysis/TypeEnvironmentTest.java b/core/src/test/java/org/opensearch/sql/analysis/TypeEnvironmentTest.java index 91677a901e..00e2f10469 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/TypeEnvironmentTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/TypeEnvironmentTest.java @@ -21,7 +21,7 @@ public class TypeEnvironmentTest { /** Use context class for push/pop. */ - private AnalysisContext context = new AnalysisContext(); + private final AnalysisContext context = new AnalysisContext(); @Test public void defineFieldSymbolInDifferentEnvironmentsShouldBeAbleToResolve() { diff --git a/core/src/test/java/org/opensearch/sql/config/TestConfig.java b/core/src/test/java/org/opensearch/sql/config/TestConfig.java index 92b6aac64f..6655640e28 100644 --- a/core/src/test/java/org/opensearch/sql/config/TestConfig.java +++ b/core/src/test/java/org/opensearch/sql/config/TestConfig.java @@ -33,7 +33,7 @@ public class TestConfig { public static final String STRING_TYPE_NULL_VALUE_FIELD = "string_null_value"; public static final String STRING_TYPE_MISSING_VALUE_FIELD = "string_missing_value"; - public static Map typeMapping = + public static final Map typeMapping = new ImmutableMap.Builder() .put("integer_value", ExprCoreType.INTEGER) .put(INT_TYPE_NULL_VALUE_FIELD, ExprCoreType.INTEGER) diff --git a/core/src/test/java/org/opensearch/sql/data/model/ExprIpValueTest.java b/core/src/test/java/org/opensearch/sql/data/model/ExprIpValueTest.java new file mode 100644 index 0000000000..b0ef598a5a --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/data/model/ExprIpValueTest.java @@ -0,0 +1,138 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.data.model; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.List; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.exception.SemanticCheckException; +import org.opensearch.sql.utils.IPUtils; + +public class ExprIpValueTest { + + private static final String ipv4String = "1.2.3.4"; + private static final String ipv6String = "2001:db7::ff00:42:8329"; + private static final String ipInvalidString = "INVALID"; + + private static final ExprValue exprIpv4Value = ExprValueUtils.ipValue(ipv4String); + private static final ExprValue exprIpv6Value = ExprValueUtils.ipValue(ipv6String); + + private static final List ipv4LesserStrings = + List.of("1.2.3.3", "01.2.3.3", "::ffff:1.2.3.3", "::ffff:102:303"); + private static final List ipv4EqualStrings = + List.of("1.2.3.4", "01.2.3.4", "::ffff:1.2.3.4", "::ffff:102:304"); + private static final List ipv4GreaterStrings = + List.of("1.2.3.5", "01.2.3.5", "::ffff:1.2.3.5", "::ffff:102:305"); + + private static final List ipv6LesserStrings = + List.of( + "2001:db7::ff00:42:8328", + "2001:0db7::ff00:0042:8328", + "2001:DB7::FF00:42:8328", + "2001:0db7:0000:0000:0000:ff00:0042:8328"); + private static final List ipv6EqualStrings = + List.of( + "2001:db7::ff00:42:8329", + "2001:0db7::ff00:0042:8329", + "2001:DB7::FF00:42:8329", + "2001:0db7:0000:0000:0000:ff00:0042:8329"); + private static final List ipv6GreaterStrings = + List.of( + "2001:db7::ff00:42:8330", + "2001:0db7::ff00:0042:8330", + "2001:DB7::FF00:42:8330", + "2001:0db7:0000:0000:0000:ff00:0042:8330"); + + @Test + public void testInvalid() { + assertThrows( + SemanticCheckException.class, + () -> ExprValueUtils.ipValue(ipInvalidString), + String.format("IP address string '%s' is not valid. Error details: .*", ipInvalidString)); + } + + @Test + public void testValue() { + ipv4EqualStrings.forEach((s) -> assertEquals(ipv4String, ExprValueUtils.ipValue(s).value())); + ipv6EqualStrings.forEach((s) -> assertEquals(ipv6String, ExprValueUtils.ipValue(s).value())); + } + + @Test + public void testType() { + assertEquals(ExprCoreType.IP, exprIpv4Value.type()); + assertEquals(ExprCoreType.IP, exprIpv6Value.type()); + } + + @Test + public void testCompare() { + + // Compare to IP address. + ipv4LesserStrings.forEach( + (s) -> assertTrue(exprIpv4Value.compareTo(ExprValueUtils.ipValue(s)) > 0)); + ipv4EqualStrings.forEach( + (s) -> assertEquals(0, exprIpv4Value.compareTo(ExprValueUtils.ipValue(s)))); + ipv4GreaterStrings.forEach( + (s) -> assertTrue(exprIpv4Value.compareTo(ExprValueUtils.ipValue(s)) < 0)); + ipv6LesserStrings.forEach( + (s) -> assertTrue(exprIpv6Value.compareTo(ExprValueUtils.ipValue(s)) > 0)); + ipv6EqualStrings.forEach( + (s) -> assertEquals(0, exprIpv6Value.compareTo(ExprValueUtils.ipValue(s)))); + ipv6GreaterStrings.forEach( + (s) -> assertTrue(exprIpv6Value.compareTo(ExprValueUtils.ipValue(s)) < 0)); + + // Compare to null/missing value. + assertThrows( + IllegalStateException.class, + () -> exprIpv4Value.compareTo(ExprValueUtils.LITERAL_NULL), + "[BUG] Unreachable, Comparing with NULL or MISSING is undefined"); + assertThrows( + IllegalStateException.class, + () -> exprIpv4Value.compareTo(ExprValueUtils.LITERAL_MISSING), + "[BUG] Unreachable, Comparing with NULL or MISSING is undefined"); + + // Compare to other data type. + assertThrows( + ExpressionEvaluationException.class, + () -> exprIpv4Value.compareTo(ExprValueUtils.LITERAL_TRUE), + "compare expected value have same type, but with [IP, BOOLEAN]"); + } + + @Test + public void testEquals() { + assertEquals(exprIpv4Value, exprIpv4Value); + assertNotEquals(exprIpv4Value, new Object()); + assertNotEquals(exprIpv4Value, ExprValueUtils.LITERAL_NULL); + assertNotEquals(exprIpv4Value, ExprValueUtils.LITERAL_MISSING); + + ipv4EqualStrings.forEach((s) -> assertEquals(exprIpv4Value, ExprValueUtils.ipValue(s))); + ipv6EqualStrings.forEach((s) -> assertEquals(exprIpv6Value, ExprValueUtils.ipValue(s))); + + ipv4LesserStrings.forEach((s) -> assertNotEquals(exprIpv4Value, ExprValueUtils.ipValue(s))); + ipv6GreaterStrings.forEach((s) -> assertNotEquals(exprIpv6Value, ExprValueUtils.ipValue(s))); + } + + @Test + public void testToString() { + ipv4EqualStrings.forEach( + (s) -> + assertEquals(String.format("IP %s", ipv4String), ExprValueUtils.ipValue(s).toString())); + ipv6EqualStrings.forEach( + (s) -> + assertEquals(String.format("IP %s", ipv6String), ExprValueUtils.ipValue(s).toString())); + } + + @Test + public void testIpValue() { + ipv4EqualStrings.forEach((s) -> assertEquals(IPUtils.toAddress(s), exprIpv4Value.ipValue())); + ipv6EqualStrings.forEach((s) -> assertEquals(IPUtils.toAddress(s), exprIpv6Value.ipValue())); + } +} diff --git a/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java b/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java index 0baf5052e4..48db530a94 100644 --- a/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java +++ b/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java @@ -14,6 +14,7 @@ import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; import static org.opensearch.sql.data.type.ExprCoreType.DATE; import static org.opensearch.sql.data.type.ExprCoreType.INTERVAL; +import static org.opensearch.sql.data.type.ExprCoreType.IP; import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; import static org.opensearch.sql.data.type.ExprCoreType.TIME; @@ -47,22 +48,24 @@ import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.storage.bindingtuple.BindingTuple; +import org.opensearch.sql.utils.IPUtils; @DisplayName("Test Expression Value Utils") public class ExprValueUtilsTest { - private static LinkedHashMap testTuple = new LinkedHashMap<>(); + private static final LinkedHashMap testTuple = new LinkedHashMap<>(); static { testTuple.put("1", new ExprIntegerValue(1)); } - private static List numberValues = + private static final List numberValues = Stream.of((byte) 1, (short) 1, 1, 1L, 1f, 1D) .map(ExprValueUtils::fromObjectValue) .collect(Collectors.toList()); - private static List nonNumberValues = + private static final List nonNumberValues = Arrays.asList( + new ExprIpValue("1.2.3.4"), new ExprStringValue("1"), ExprBooleanValue.of(true), new ExprCollectionValue(ImmutableList.of(new ExprIntegerValue(1))), @@ -72,10 +75,10 @@ public class ExprValueUtilsTest { new ExprTimestampValue("2012-08-07 18:00:00"), new ExprIntervalValue(Duration.ofSeconds(100))); - private static List allValues = + private static final List allValues = Lists.newArrayList(Iterables.concat(numberValues, nonNumberValues)); - private static List> numberValueExtractor = + private static final List> numberValueExtractor = Arrays.asList( ExprValueUtils::getByteValue, ExprValueUtils::getShortValue, @@ -83,24 +86,25 @@ public class ExprValueUtilsTest { ExprValueUtils::getLongValue, ExprValueUtils::getFloatValue, ExprValueUtils::getDoubleValue); - private static List> nonNumberValueExtractor = + private static final List> nonNumberValueExtractor = Arrays.asList( + ExprValueUtils::getIpValue, ExprValueUtils::getStringValue, ExprValueUtils::getBooleanValue, ExprValueUtils::getCollectionValue, ExprValueUtils::getTupleValue); - private static List> dateAndTimeValueExtractor = + private static final List> dateAndTimeValueExtractor = Arrays.asList( ExprValue::dateValue, ExprValue::timeValue, ExprValue::timestampValue, ExprValue::intervalValue); - private static List> allValueExtractor = + private static final List> allValueExtractor = Lists.newArrayList( Iterables.concat( numberValueExtractor, nonNumberValueExtractor, dateAndTimeValueExtractor)); - private static List numberTypes = + private static final List numberTypes = Arrays.asList( ExprCoreType.BYTE, ExprCoreType.SHORT, @@ -108,10 +112,11 @@ public class ExprValueUtilsTest { ExprCoreType.LONG, ExprCoreType.FLOAT, ExprCoreType.DOUBLE); - private static List nonNumberTypes = Arrays.asList(STRING, BOOLEAN, ARRAY, STRUCT); - private static List dateAndTimeTypes = + private static final List nonNumberTypes = + Arrays.asList(IP, STRING, BOOLEAN, ARRAY, STRUCT); + private static final List dateAndTimeTypes = Arrays.asList(DATE, TIME, TIMESTAMP, INTERVAL); - private static List allTypes = + private static final List allTypes = Lists.newArrayList(Iterables.concat(numberTypes, nonNumberTypes, dateAndTimeTypes)); private static Stream getValueTestArgumentStream() { @@ -123,6 +128,7 @@ private static Stream getValueTestArgumentStream() { 1L, 1f, 1D, + IPUtils.toAddress("1.2.3.4"), "1", true, Arrays.asList(integerValue(1)), diff --git a/core/src/test/java/org/opensearch/sql/datasource/model/DataSourceMetadataTest.java b/core/src/test/java/org/opensearch/sql/datasource/model/DataSourceMetadataTest.java index 24f830f18e..fe40fac868 100644 --- a/core/src/test/java/org/opensearch/sql/datasource/model/DataSourceMetadataTest.java +++ b/core/src/test/java/org/opensearch/sql/datasource/model/DataSourceMetadataTest.java @@ -36,7 +36,7 @@ public void testBuilderAndGetterMethods() { .setProperties(properties) .setResultIndex("query_execution_result_test123") .setDataSourceStatus(ACTIVE) - .build(); + .validateAndBuild(); assertEquals("test", metadata.getName()); assertEquals("test description", metadata.getDescription()); @@ -59,7 +59,10 @@ public void testDefaultDataSourceMetadata() { @Test public void testNameValidation() { try { - new DataSourceMetadata.Builder().setName("Invalid$$$Name").setConnector(PROMETHEUS).build(); + new DataSourceMetadata.Builder() + .setName("Invalid$$$Name") + .setConnector(PROMETHEUS) + .validateAndBuild(); fail("Should have thrown an IllegalArgumentException"); } catch (IllegalArgumentException e) { assertEquals( @@ -76,7 +79,7 @@ public void testResultIndexValidation() { .setName("test") .setConnector(PROMETHEUS) .setResultIndex("invalid_result_index") - .build(); + .validateAndBuild(); fail("Should have thrown an IllegalArgumentException"); } catch (IllegalArgumentException e) { assertEquals(DataSourceMetadata.INVALID_RESULT_INDEX_PREFIX, e.getMessage()); @@ -86,7 +89,7 @@ public void testResultIndexValidation() { @Test public void testMissingAttributes() { try { - new DataSourceMetadata.Builder().build(); + new DataSourceMetadata.Builder().validateAndBuild(); fail("Should have thrown an IllegalArgumentException due to missing attributes"); } catch (IllegalArgumentException e) { assertTrue(e.getMessage().contains("name")); @@ -97,7 +100,10 @@ public void testMissingAttributes() { @Test public void testFillAttributes() { DataSourceMetadata metadata = - new DataSourceMetadata.Builder().setName("test").setConnector(PROMETHEUS).build(); + new DataSourceMetadata.Builder() + .setName("test") + .setConnector(PROMETHEUS) + .validateAndBuild(); assertEquals("test", metadata.getName()); assertEquals(PROMETHEUS, metadata.getConnector()); @@ -115,7 +121,7 @@ public void testLengthyResultIndexName() { .setName("test") .setConnector(PROMETHEUS) .setResultIndex("query_execution_result_" + RandomStringUtils.randomAlphanumeric(300)) - .build(); + .validateAndBuild(); fail("Should have thrown an IllegalArgumentException"); } catch (IllegalArgumentException e) { assertEquals( @@ -131,7 +137,7 @@ public void testInbuiltLengthyResultIndexName() { new DataSourceMetadata.Builder() .setName(RandomStringUtils.randomAlphabetic(250)) .setConnector(PROMETHEUS) - .build(); + .validateAndBuild(); assertEquals(255, dataSourceMetadata.getResultIndex().length()); } @@ -150,8 +156,8 @@ public void testCopyFromAnotherMetadata() { .setProperties(properties) .setResultIndex("query_execution_result_test123") .setDataSourceStatus(ACTIVE) - .build(); - DataSourceMetadata copiedMetadata = new DataSourceMetadata.Builder(metadata).build(); + .validateAndBuild(); + DataSourceMetadata copiedMetadata = new DataSourceMetadata.Builder(metadata).validateAndBuild(); assertEquals(metadata.getResultIndex(), copiedMetadata.getResultIndex()); assertEquals(metadata.getProperties(), copiedMetadata.getProperties()); } diff --git a/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java b/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java index 897347f22d..febf662843 100644 --- a/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/ExplainTest.java @@ -10,6 +10,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.opensearch.sql.ast.tree.RareTopN.CommandType.TOP; import static org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC; +import static org.opensearch.sql.ast.tree.Trendline.TrendlineType.SMA; import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.STRING; @@ -27,9 +28,12 @@ import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.remove; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.rename; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.sort; +import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.takeOrdered; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.values; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.window; +import com.google.common.collect.ImmutableMap; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Set; @@ -38,6 +42,7 @@ import org.junit.jupiter.api.DisplayNameGeneration; import org.junit.jupiter.api.DisplayNameGenerator; import org.junit.jupiter.api.Test; +import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.executor.ExecutionEngine.ExplainResponse; @@ -51,6 +56,7 @@ import org.opensearch.sql.expression.aggregation.NamedAggregator; import org.opensearch.sql.expression.window.WindowDefinition; import org.opensearch.sql.planner.physical.PhysicalPlan; +import org.opensearch.sql.planner.physical.TrendlineOperator; import org.opensearch.sql.storage.TableScanOperator; @DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) @@ -220,6 +226,26 @@ void can_explain_limit() { explain.apply(plan)); } + @Test + void can_explain_takeOrdered() { + Pair sort = + ImmutablePair.of(Sort.SortOption.DEFAULT_ASC, ref("a", INTEGER)); + PhysicalPlan plan = takeOrdered(tableScan, 10, 5, sort); + assertEquals( + new ExplainResponse( + new ExplainResponseNode( + "TakeOrderedOperator", + Map.of( + "limit", + 10, + "offset", + 5, + "sortList", + Map.of("a", Map.of("sortOrder", "ASC", "nullOrder", "NULL_FIRST"))), + singletonList(tableScan.explainNode()))), + explain.apply(plan)); + } + @Test void can_explain_nested() { Set nestedOperatorArgs = Set.of("message.info", "message"); @@ -235,6 +261,44 @@ void can_explain_nested() { explain.apply(plan)); } + @Test + void can_explain_trendline() { + PhysicalPlan plan = + new TrendlineOperator( + tableScan, + Arrays.asList( + Pair.of( + AstDSL.computation(2, AstDSL.field("distance"), "distance_alias", SMA), DOUBLE), + Pair.of(AstDSL.computation(3, AstDSL.field("time"), "time_alias", SMA), DOUBLE))); + assertEquals( + new ExplainResponse( + new ExplainResponseNode( + "TrendlineOperator", + ImmutableMap.of( + "computations", + List.of( + ImmutableMap.of( + "computationType", + "sma", + "numberOfDataPoints", + "2", + "dataField", + "distance", + "alias", + "distance_alias"), + ImmutableMap.of( + "computationType", + "sma", + "numberOfDataPoints", + "3", + "dataField", + "time", + "alias", + "time_alias"))), + singletonList(tableScan.explainNode()))), + explain.apply(plan)); + } + private static class FakeTableScan extends TableScanOperator { @Override public boolean hasNext() { diff --git a/core/src/test/java/org/opensearch/sql/executor/QueryManagerTest.java b/core/src/test/java/org/opensearch/sql/executor/QueryManagerTest.java index 7f34d348bc..2d8d4d4ee2 100644 --- a/core/src/test/java/org/opensearch/sql/executor/QueryManagerTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/QueryManagerTest.java @@ -17,7 +17,7 @@ class QueryManagerTest { @Mock private QueryId queryId; - private QueryManager queryManager = + private final QueryManager queryManager = id -> { throw new UnsupportedOperationException(); }; diff --git a/core/src/test/java/org/opensearch/sql/executor/execution/IntervalTriggerExecutionTest.java b/core/src/test/java/org/opensearch/sql/executor/execution/IntervalTriggerExecutionTest.java index 9eb99d37e3..030114749c 100644 --- a/core/src/test/java/org/opensearch/sql/executor/execution/IntervalTriggerExecutionTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/execution/IntervalTriggerExecutionTest.java @@ -32,7 +32,7 @@ Helper triggerTask(long interval) { class Helper implements Runnable { - private StreamingQueryPlan.IntervalTriggerExecution executionStrategy; + private final StreamingQueryPlan.IntervalTriggerExecution executionStrategy; private static final int START = 0; @@ -42,7 +42,7 @@ class Helper implements Runnable { private int state = START; - private long interval; + private final long interval; private long taskExecutionTime; diff --git a/core/src/test/java/org/opensearch/sql/executor/execution/StreamingQueryPlanTest.java b/core/src/test/java/org/opensearch/sql/executor/execution/StreamingQueryPlanTest.java index 2e8666aea4..c2cc606a88 100644 --- a/core/src/test/java/org/opensearch/sql/executor/execution/StreamingQueryPlanTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/execution/StreamingQueryPlanTest.java @@ -77,7 +77,7 @@ Helper streamingQuery() { class Helper { - private StreamingQueryPlan queryPlan; + private final StreamingQueryPlan queryPlan; public Helper() { queryPlan = diff --git a/core/src/test/java/org/opensearch/sql/expression/ExpressionTestBase.java b/core/src/test/java/org/opensearch/sql/expression/ExpressionTestBase.java index fd886cdda3..fc7e73d5b2 100644 --- a/core/src/test/java/org/opensearch/sql/expression/ExpressionTestBase.java +++ b/core/src/test/java/org/opensearch/sql/expression/ExpressionTestBase.java @@ -36,7 +36,7 @@ public class ExpressionTestBase { - protected FunctionProperties functionProperties = new FunctionProperties(); + protected final FunctionProperties functionProperties = new FunctionProperties(); protected Environment typeEnv; diff --git a/core/src/test/java/org/opensearch/sql/expression/aggregation/AggregationTest.java b/core/src/test/java/org/opensearch/sql/expression/aggregation/AggregationTest.java index f1a3a9d948..38aa263ddc 100644 --- a/core/src/test/java/org/opensearch/sql/expression/aggregation/AggregationTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/aggregation/AggregationTest.java @@ -17,7 +17,7 @@ public class AggregationTest extends ExpressionTestBase { - protected static List tuples = + protected static final List tuples = Arrays.asList( ExprValueUtils.tupleValue( new ImmutableMap.Builder() @@ -95,7 +95,7 @@ public class AggregationTest extends ExpressionTestBase { "timestamp_value", "2040-01-01 07:00:00"))); - protected static List tuples_with_duplicates = + protected static final List tuples_with_duplicates = Arrays.asList( ExprValueUtils.tupleValue( ImmutableMap.of( @@ -138,7 +138,7 @@ public class AggregationTest extends ExpressionTestBase { "array_value", ImmutableList.of(1, 2)))); - protected static List tuples_with_null_and_missing = + protected static final List tuples_with_null_and_missing = Arrays.asList( ExprValueUtils.tupleValue( ImmutableMap.of("integer_value", 2, "string_value", "m", "double_value", 3d)), @@ -146,7 +146,7 @@ public class AggregationTest extends ExpressionTestBase { ImmutableMap.of("integer_value", 1, "string_value", "f", "double_value", 4d)), ExprValueUtils.tupleValue(Collections.singletonMap("double_value", null))); - protected static List tuples_with_all_null_or_missing = + protected static final List tuples_with_all_null_or_missing = Arrays.asList( ExprValueUtils.tupleValue(Collections.singletonMap("integer_value", null)), ExprValueUtils.tupleValue(Collections.singletonMap("double", null)), diff --git a/core/src/test/java/org/opensearch/sql/expression/aggregation/PercentileApproxAggregatorTest.java b/core/src/test/java/org/opensearch/sql/expression/aggregation/PercentileApproxAggregatorTest.java index ac617e7b32..33fc325204 100644 --- a/core/src/test/java/org/opensearch/sql/expression/aggregation/PercentileApproxAggregatorTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/aggregation/PercentileApproxAggregatorTest.java @@ -13,11 +13,18 @@ package org.opensearch.sql.expression.aggregation; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.when; -import static org.opensearch.sql.data.model.ExprValueUtils.*; -import static org.opensearch.sql.data.type.ExprCoreType.*; +import static org.opensearch.sql.data.model.ExprValueUtils.integerValue; +import static org.opensearch.sql.data.model.ExprValueUtils.longValue; +import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; +import static org.opensearch.sql.data.type.ExprCoreType.FLOAT; +import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.LONG; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; import java.util.ArrayList; import java.util.List; @@ -195,7 +202,7 @@ public void test_percentile_with_invalid_size() { "percentile_approx function expected" + " {[INTEGER,DOUBLE],[INTEGER,DOUBLE,DOUBLE],[LONG,DOUBLE],[LONG,DOUBLE,DOUBLE]," + "[FLOAT,DOUBLE],[FLOAT,DOUBLE,DOUBLE],[DOUBLE,DOUBLE],[DOUBLE,DOUBLE,DOUBLE]}," - + " but get [DOUBLE,STRING]", + + " but got [DOUBLE,STRING]", exception2.getMessage()); } diff --git a/core/src/test/java/org/opensearch/sql/expression/datetime/DateAddAndAddDateTest.java b/core/src/test/java/org/opensearch/sql/expression/datetime/DateAddAndAddDateTest.java index 519e97bdc6..b4ab3a8567 100644 --- a/core/src/test/java/org/opensearch/sql/expression/datetime/DateAddAndAddDateTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/datetime/DateAddAndAddDateTest.java @@ -156,7 +156,7 @@ public void adddate_has_second_signature_but_not_date_add() { () -> date_add(LocalDateTime.of(1961, 4, 12, 9, 7), 100500)); assertEquals( "date_add function expected {[DATE,INTERVAL],[TIMESTAMP,INTERVAL]," - + "[TIME,INTERVAL]}, but get [TIMESTAMP,INTEGER]", + + "[TIME,INTERVAL]}, but got [TIMESTAMP,INTEGER]", exception.getMessage()); } diff --git a/core/src/test/java/org/opensearch/sql/expression/datetime/DateSubAndSubDateTest.java b/core/src/test/java/org/opensearch/sql/expression/datetime/DateSubAndSubDateTest.java index 123ecda0bd..897f49cfee 100644 --- a/core/src/test/java/org/opensearch/sql/expression/datetime/DateSubAndSubDateTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/datetime/DateSubAndSubDateTest.java @@ -139,7 +139,7 @@ public void subdate_has_second_signature_but_not_date_sub() { ExpressionEvaluationException.class, () -> date_sub(LocalDateTime.of(1961, 4, 12, 9, 7), 100500)); assertEquals( - "date_sub function expected {[DATE,INTERVAL],[TIMESTAMP,INTERVAL],[TIME,INTERVAL]}, but get" + "date_sub function expected {[DATE,INTERVAL],[TIMESTAMP,INTERVAL],[TIME,INTERVAL]}, but got" + " [TIMESTAMP,INTEGER]", exception.getMessage()); } diff --git a/core/src/test/java/org/opensearch/sql/expression/datetime/ToSecondsTest.java b/core/src/test/java/org/opensearch/sql/expression/datetime/ToSecondsTest.java index 910fe42a52..e983eb28f6 100644 --- a/core/src/test/java/org/opensearch/sql/expression/datetime/ToSecondsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/datetime/ToSecondsTest.java @@ -8,7 +8,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.opensearch.sql.data.type.ExprCoreType.LONG; -import static org.opensearch.sql.expression.datetime.DateTimeFunction.SECONDS_PER_DAY; +import static org.opensearch.sql.expression.datetime.DateTimeFunctions.SECONDS_PER_DAY; import java.time.Duration; import java.time.LocalDate; diff --git a/core/src/test/java/org/opensearch/sql/expression/function/DefaultFunctionResolverTest.java b/core/src/test/java/org/opensearch/sql/expression/function/DefaultFunctionResolverTest.java index ad9e8a6661..0c0439a764 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/DefaultFunctionResolverTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/DefaultFunctionResolverTest.java @@ -72,7 +72,7 @@ void resolve_function_not_match() { assertThrows( ExpressionEvaluationException.class, () -> resolver.resolve(functionSignature)); assertEquals( - "add function expected {[INTEGER,INTEGER]}, but get [BOOLEAN,BOOLEAN]", + "add function expected {[INTEGER,INTEGER]}, but got [BOOLEAN,BOOLEAN]", exception.getMessage()); } diff --git a/core/src/test/java/org/opensearch/sql/expression/function/FunctionSignatureTest.java b/core/src/test/java/org/opensearch/sql/expression/function/FunctionSignatureTest.java index 2fb5dc468e..b301cf7ba8 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/FunctionSignatureTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/FunctionSignatureTest.java @@ -27,8 +27,8 @@ class FunctionSignatureTest { @Mock private FunctionSignature funcSignature; @Mock private List funcParamTypeList; - private FunctionName unresolvedFuncName = FunctionName.of("add"); - private List unresolvedParamTypeList = + private final FunctionName unresolvedFuncName = FunctionName.of("add"); + private final List unresolvedParamTypeList = Arrays.asList(ExprCoreType.INTEGER, ExprCoreType.FLOAT); @Test diff --git a/core/src/test/java/org/opensearch/sql/expression/function/WideningTypeRuleTest.java b/core/src/test/java/org/opensearch/sql/expression/function/WideningTypeRuleTest.java index 9de1e65108..d38be4c958 100644 --- a/core/src/test/java/org/opensearch/sql/expression/function/WideningTypeRuleTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/function/WideningTypeRuleTest.java @@ -13,6 +13,7 @@ import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.FLOAT; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.IP; import static org.opensearch.sql.data.type.ExprCoreType.LONG; import static org.opensearch.sql.data.type.ExprCoreType.SHORT; import static org.opensearch.sql.data.type.ExprCoreType.STRING; @@ -36,7 +37,7 @@ import org.opensearch.sql.exception.ExpressionEvaluationException; class WideningTypeRuleTest { - private static Table numberWidenRule = + private static final Table numberWidenRule = new ImmutableTable.Builder() .put(BYTE, SHORT, 1) .put(BYTE, INTEGER, 2) @@ -57,6 +58,7 @@ class WideningTypeRuleTest { .put(STRING, TIMESTAMP, 1) .put(STRING, DATE, 1) .put(STRING, TIME, 1) + .put(STRING, IP, 1) .put(DATE, TIMESTAMP, 1) .put(TIME, TIMESTAMP, 1) .put(UNDEFINED, BYTE, 1) diff --git a/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java new file mode 100644 index 0000000000..a74bbda3a1 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/expression/ip/IPFunctionsTest.java @@ -0,0 +1,93 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.ip; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.when; +import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_FALSE; +import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE; +import static org.opensearch.sql.data.type.ExprCoreType.IP; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.exception.SemanticCheckException; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.FunctionExpression; +import org.opensearch.sql.expression.env.Environment; + +@ExtendWith(MockitoExtension.class) +public class IPFunctionsTest { + + // IP range and address constants for testing. + private static final ExprValue IPv4Range = ExprValueUtils.stringValue("198.51.100.0/24"); + private static final ExprValue IPv4RangeMapped = + ExprValueUtils.stringValue("::ffff:198.51.100.0/24"); + private static final ExprValue IPv6Range = ExprValueUtils.stringValue("2001:0db8::/32"); + + private static final ExprValue IPv4AddressBelow = ExprValueUtils.ipValue("198.51.99.1"); + private static final ExprValue IPv4AddressWithin = ExprValueUtils.ipValue("198.51.100.1"); + private static final ExprValue IPv4AddressAbove = ExprValueUtils.ipValue("198.51.101.2"); + + private static final ExprValue IPv6AddressBelow = + ExprValueUtils.ipValue("2001:0db7::ff00:42:8329"); + private static final ExprValue IPv6AddressWithin = + ExprValueUtils.ipValue("2001:0db8::ff00:42:8329"); + private static final ExprValue IPv6AddressAbove = + ExprValueUtils.ipValue("2001:0db9::ff00:42:8329"); + + // Mock value environment for testing. + @Mock private Environment env; + + @Test + public void cidrmatch_invalid_arguments() { + assertThrows( + SemanticCheckException.class, + () -> execute(ExprValueUtils.ipValue("INVALID"), IPv4Range), + "IP address string 'INVALID' is not valid. Error details: .*"); + assertThrows( + SemanticCheckException.class, + () -> execute(IPv4AddressWithin, ExprValueUtils.stringValue("INVALID")), + "IP address range string 'INVALID' is not valid. Error details: .*"); + } + + @Test + public void cidrmatch_valid_arguments() { + + assertEquals(LITERAL_FALSE, execute(IPv4AddressBelow, IPv4Range)); + assertEquals(LITERAL_TRUE, execute(IPv4AddressWithin, IPv4Range)); + assertEquals(LITERAL_FALSE, execute(IPv4AddressAbove, IPv4Range)); + + assertEquals(LITERAL_FALSE, execute(IPv4AddressBelow, IPv4RangeMapped)); + assertEquals(LITERAL_TRUE, execute(IPv4AddressWithin, IPv4RangeMapped)); + assertEquals(LITERAL_FALSE, execute(IPv4AddressAbove, IPv4RangeMapped)); + + assertEquals(LITERAL_FALSE, execute(IPv6AddressBelow, IPv6Range)); + assertEquals(LITERAL_TRUE, execute(IPv6AddressWithin, IPv6Range)); + assertEquals(LITERAL_FALSE, execute(IPv6AddressAbove, IPv6Range)); + } + + /** + * Builds and evaluates a {@code cidrmatch} function expression with the given address and range + * expression values, and returns the resulting value. + */ + private ExprValue execute(ExprValue address, ExprValue range) { + + final String fieldName = "ip_address"; + FunctionExpression exp = DSL.cidrmatch(DSL.ref(fieldName, IP), DSL.literal(range)); + + // Mock the value environment to return the specified field + // expression as the value for the "ip_address" field. + when(DSL.ref(fieldName, IP).valueOf(env)).thenReturn(address); + + return exp.valueOf(env); + } +} diff --git a/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java b/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java index 44a3ccabbd..fd579dfb47 100644 --- a/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/operator/convert/TypeCastOperatorTest.java @@ -7,12 +7,14 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; import static org.opensearch.sql.data.type.ExprCoreType.BYTE; import static org.opensearch.sql.data.type.ExprCoreType.DATE; import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.FLOAT; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.IP; import static org.opensearch.sql.data.type.ExprCoreType.LONG; import static org.opensearch.sql.data.type.ExprCoreType.SHORT; import static org.opensearch.sql.data.type.ExprCoreType.STRING; @@ -29,12 +31,17 @@ import org.opensearch.sql.data.model.ExprDoubleValue; import org.opensearch.sql.data.model.ExprFloatValue; import org.opensearch.sql.data.model.ExprIntegerValue; +import org.opensearch.sql.data.model.ExprIpValue; import org.opensearch.sql.data.model.ExprLongValue; +import org.opensearch.sql.data.model.ExprMissingValue; +import org.opensearch.sql.data.model.ExprNullValue; import org.opensearch.sql.data.model.ExprShortValue; import org.opensearch.sql.data.model.ExprStringValue; import org.opensearch.sql.data.model.ExprTimeValue; import org.opensearch.sql.data.model.ExprTimestampValue; import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.exception.ExpressionEvaluationException; +import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; import org.opensearch.sql.expression.FunctionExpression; @@ -316,10 +323,6 @@ void castToTime() { assertEquals(TIME, expression.type()); assertEquals(new ExprTimeValue("01:01:01"), expression.valueOf()); - expression = DSL.castTime(DSL.literal(new ExprTimestampValue("2012-08-07 01:01:01"))); - assertEquals(TIME, expression.type()); - assertEquals(new ExprTimeValue("01:01:01"), expression.valueOf()); - expression = DSL.castTime(DSL.literal(new ExprTimeValue("01:01:01"))); assertEquals(TIME, expression.type()); assertEquals(new ExprTimeValue("01:01:01"), expression.valueOf()); @@ -334,9 +337,56 @@ void castToTimestamp() { expression = DSL.castTimestamp(DSL.literal(new ExprTimestampValue("2012-08-07 01:01:01"))); assertEquals(TIMESTAMP, expression.type()); assertEquals(new ExprTimestampValue("2012-08-07 01:01:01"), expression.valueOf()); + } - expression = DSL.castTimestamp(DSL.literal(new ExprTimestampValue("2012-08-07 01:01:01"))); - assertEquals(TIMESTAMP, expression.type()); - assertEquals(new ExprTimestampValue("2012-08-07 01:01:01"), expression.valueOf()); + @Test + void castToIp() { + FunctionExpression exp; + + final String ipv4String = "1.2.3.4"; + final String ipv6String = "2001:db7::ff00:42:8329"; + final String ipInvalidString = "INVALID"; + + final ExprValue exprIpv4Value = new ExprIpValue(ipv4String); + final ExprValue exprIpv6Value = new ExprIpValue(ipv6String); + + // From string + exp = DSL.castIp(DSL.literal(ipv4String)); + assertEquals(IP, exp.type()); + assertEquals(exprIpv4Value, exp.valueOf()); + + exp = DSL.castIp(DSL.literal(ipv6String)); + assertEquals(IP, exp.type()); + assertEquals(exprIpv6Value, exp.valueOf()); + + exp = DSL.castIp(DSL.literal(ipInvalidString)); + assertThrows( + SemanticCheckException.class, + exp::valueOf, + String.format("IP address string '%s' is not valid. Error details: .*", ipInvalidString)); + + // From IP address + exp = DSL.castIp(DSL.literal(exprIpv4Value)); + assertEquals(IP, exp.type()); + assertEquals(exprIpv4Value, exp.valueOf()); + + exp = DSL.castIp(DSL.literal(exprIpv6Value)); + assertEquals(IP, exp.type()); + assertEquals(exprIpv6Value, exp.valueOf()); + + // From invalid type + assertThrows( + ExpressionEvaluationException.class, + () -> DSL.castIp(DSL.literal(0)), + "cast_to_ip function expected {[IP],[STRING]}, but got [INTEGER]"); + + // From null or missing value + exp = DSL.castIp(DSL.literal(ExprNullValue.of())); + assertEquals(IP, exp.type()); + assertTrue(exp.valueOf().isNull()); + + exp = DSL.castIp(DSL.literal(ExprMissingValue.of())); + assertEquals(IP, exp.type()); + assertTrue(exp.valueOf().isMissing()); } } diff --git a/core/src/test/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperatorTest.java b/core/src/test/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperatorTest.java index 55dfbd35c2..3fc7f737f8 100644 --- a/core/src/test/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/operator/predicate/BinaryPredicateOperatorTest.java @@ -57,7 +57,7 @@ class BinaryPredicateOperatorTest extends ExpressionTestBase { - private static List STRING_PATTERN_PAIRS = + private static final List STRING_PATTERN_PAIRS = ImmutableList.of( new StringPatternPair("Michael!", ".*"), new StringPatternPair("new*\\n*line", "new\\\\*.\\\\*line"), @@ -584,7 +584,7 @@ void testRegexpString(StringPatternPair stringPatternPair) { assertEquals(stringPatternPair.regExpTest(), expression.valueOf(valueEnv()).integerValue()); } - /** Todo. remove this test cases after script serilization implemented. */ + /** Todo. remove this test cases after script serialization implemented. */ @Test public void serializationTest() throws Exception { Expression expression = DSL.equal(DSL.literal("v1"), DSL.literal("v2")); diff --git a/core/src/test/java/org/opensearch/sql/expression/operator/predicate/UnaryPredicateOperatorTest.java b/core/src/test/java/org/opensearch/sql/expression/operator/predicate/UnaryPredicateOperatorTest.java index f7a1a7008a..7de4f456c9 100644 --- a/core/src/test/java/org/opensearch/sql/expression/operator/predicate/UnaryPredicateOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/operator/predicate/UnaryPredicateOperatorTest.java @@ -221,12 +221,12 @@ public void test_if_predicate(Expression v1, Expression v2, Expression v3, Expre @ParameterizedTest @MethodSource("exprIfNullArguments") public void test_exprIfNull_predicate(ExprValue v1, ExprValue v2, ExprValue expected) { - assertEquals(expected.value(), UnaryPredicateOperator.exprIfNull(v1, v2).value()); + assertEquals(expected.value(), UnaryPredicateOperators.exprIfNull(v1, v2).value()); } @ParameterizedTest @MethodSource("exprNullIfArguments") public void test_exprNullIf_predicate(ExprValue v1, ExprValue v2, ExprValue expected) { - assertEquals(expected.value(), UnaryPredicateOperator.exprNullIf(v1, v2).value()); + assertEquals(expected.value(), UnaryPredicateOperators.exprNullIf(v1, v2).value()); } } diff --git a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java index 45d8f6c03c..8ee0dd7e70 100644 --- a/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/DefaultImplementorTest.java @@ -7,11 +7,13 @@ import static java.util.Collections.emptyList; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; +import static org.opensearch.sql.ast.tree.Trendline.TrendlineType.SMA; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.expression.DSL.literal; @@ -44,8 +46,10 @@ import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.tree.RareTopN.CommandType; import org.opensearch.sql.ast.tree.Sort; +import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.data.model.ExprBooleanValue; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.executor.pagination.PlanSerializer; @@ -63,11 +67,13 @@ import org.opensearch.sql.planner.logical.LogicalPlanDSL; import org.opensearch.sql.planner.logical.LogicalProject; import org.opensearch.sql.planner.logical.LogicalRelation; +import org.opensearch.sql.planner.logical.LogicalTrendline; import org.opensearch.sql.planner.logical.LogicalValues; import org.opensearch.sql.planner.physical.CursorCloseOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.planner.physical.PhysicalPlanDSL; import org.opensearch.sql.planner.physical.ProjectOperator; +import org.opensearch.sql.planner.physical.TrendlineOperator; import org.opensearch.sql.planner.physical.ValuesOperator; import org.opensearch.sql.storage.StorageEngine; import org.opensearch.sql.storage.Table; @@ -278,4 +284,45 @@ public void visitPaginate_should_remove_it_from_tree() { new ProjectOperator(new ValuesOperator(List.of(List.of())), List.of(), List.of()); assertEquals(physicalPlanTree, logicalPlanTree.accept(implementor, null)); } + + @Test + public void visitLimit_support_return_takeOrdered() { + // replace SortOperator + LimitOperator with TakeOrderedOperator + Pair sort = + ImmutablePair.of(Sort.SortOption.DEFAULT_ASC, ref("a", INTEGER)); + var logicalValues = values(emptyList()); + var logicalSort = sort(logicalValues, sort); + var logicalLimit = limit(logicalSort, 10, 5); + PhysicalPlan physicalPlanTree = + PhysicalPlanDSL.takeOrdered(PhysicalPlanDSL.values(emptyList()), 10, 5, sort); + assertEquals(physicalPlanTree, logicalLimit.accept(implementor, null)); + + // don't replace if LimitOperator's child is not SortOperator + Pair newEvalField = + ImmutablePair.of(ref("name1", STRING), ref("name", STRING)); + var logicalEval = eval(logicalSort, newEvalField); + logicalLimit = limit(logicalEval, 10, 5); + physicalPlanTree = + PhysicalPlanDSL.limit( + PhysicalPlanDSL.eval( + PhysicalPlanDSL.sort(PhysicalPlanDSL.values(emptyList()), sort), newEvalField), + 10, + 5); + assertEquals(physicalPlanTree, logicalLimit.accept(implementor, null)); + } + + @Test + public void visitTrendline_should_build_TrendlineOperator() { + var logicalChild = mock(LogicalPlan.class); + var physicalChild = mock(PhysicalPlan.class); + when(logicalChild.accept(implementor, null)).thenReturn(physicalChild); + final Trendline.TrendlineComputation computation = + AstDSL.computation(1, AstDSL.field("field"), "alias", SMA); + var logicalPlan = + new LogicalTrendline( + logicalChild, Collections.singletonList(Pair.of(computation, ExprCoreType.DOUBLE))); + var implemented = logicalPlan.accept(implementor, null); + assertInstanceOf(TrendlineOperator.class, implemented); + assertSame(physicalChild, implemented.getChild().get(0)); + } } diff --git a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java index f212749f48..43ce23ed56 100644 --- a/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/logical/LogicalPlanNodeVisitorTest.java @@ -8,6 +8,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; import static org.mockito.Mockito.mock; +import static org.opensearch.sql.ast.tree.Trendline.TrendlineType.SMA; import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.expression.DSL.named; @@ -25,9 +26,11 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.tree.RareTopN.CommandType; import org.opensearch.sql.ast.tree.Sort.SortOption; import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.expression.DSL; import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.LiteralExpression; @@ -141,6 +144,14 @@ public TableWriteOperator build(PhysicalPlan child) { LogicalCloseCursor closeCursor = new LogicalCloseCursor(cursor); + LogicalTrendline trendline = + new LogicalTrendline( + relation, + Collections.singletonList( + Pair.of( + AstDSL.computation(1, AstDSL.field("testField"), "dummy", SMA), + ExprCoreType.DOUBLE))); + return Stream.of( relation, tableScanBuilder, @@ -163,7 +174,8 @@ public TableWriteOperator build(PhysicalPlan child) { paginate, nested, cursor, - closeCursor) + closeCursor, + trendline) .map(Arguments::of); } diff --git a/core/src/test/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizerTest.java b/core/src/test/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizerTest.java index c25e415cfa..20996503b4 100644 --- a/core/src/test/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizerTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/optimizer/LogicalPlanOptimizerTest.java @@ -15,6 +15,7 @@ import static org.opensearch.sql.data.model.ExprValueUtils.longValue; import static org.opensearch.sql.data.type.ExprCoreType.*; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.aggregation; +import static org.opensearch.sql.planner.logical.LogicalPlanDSL.eval; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.filter; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.highlight; import static org.opensearch.sql.planner.logical.LogicalPlanDSL.limit; @@ -43,6 +44,7 @@ import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.planner.logical.LogicalPaginate; @@ -345,6 +347,27 @@ void table_scan_builder_support_offset_push_down_can_apply_its_rule() { assertEquals(project(tableScanBuilder), optimized); } + /** Limit - Eval --> Eval - Limit. */ + @Test + void push_limit_under_eval() { + Pair evalExpr = + Pair.of(DSL.ref("name1", STRING), DSL.ref("name", STRING)); + assertEquals( + eval(limit(tableScanBuilder, 10, 5), evalExpr), + optimize(limit(eval(relation("schema", table), evalExpr), 10, 5))); + } + + /** Limit - Eval - Scan --> Eval - Scan. */ + @Test + void push_limit_through_eval_into_scan() { + when(tableScanBuilder.pushDownLimit(any())).thenReturn(true); + Pair evalExpr = + Pair.of(DSL.ref("name1", STRING), DSL.ref("name", STRING)); + assertEquals( + eval(tableScanBuilder, evalExpr), + optimize(limit(eval(relation("schema", table), evalExpr), 10, 5))); + } + private LogicalPlan optimize(LogicalPlan plan) { final LogicalPlanOptimizer optimizer = LogicalPlanOptimizer.create(); return optimizer.optimize(plan); diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/FilterOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/FilterOperatorTest.java index bfe3b323c4..ba2354b168 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/FilterOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/FilterOperatorTest.java @@ -8,14 +8,24 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_FALSE; import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_MISSING; import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_NULL; +import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.filter; import com.google.common.collect.ImmutableMap; import java.util.LinkedHashMap; import java.util.List; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayNameGeneration; import org.junit.jupiter.api.DisplayNameGenerator; import org.junit.jupiter.api.Test; @@ -26,12 +36,22 @@ import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; @ExtendWith(MockitoExtension.class) @DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) class FilterOperatorTest extends PhysicalPlanTestBase { @Mock private PhysicalPlan inputPlan; + @Mock private Expression condition; + + private FilterOperator filterOperator; + + @BeforeEach + public void setup() { + filterOperator = filter(inputPlan, condition); + } + @Test public void filter_test() { FilterOperator plan = @@ -82,4 +102,68 @@ public void missing_value_should_been_ignored() { List result = execute(plan); assertEquals(0, result.size()); } + + @Test + public void testHasNextWhenInputHasNoElements() { + when(inputPlan.hasNext()).thenReturn(false); + + assertFalse( + filterOperator.hasNext(), "hasNext() should return false when input has no elements"); + } + + @Test + public void testHasNextWithMatchingCondition() { + ExprValue inputValue = mock(ExprValue.class); + when(inputPlan.hasNext()).thenReturn(true).thenReturn(false); + when(inputPlan.next()).thenReturn(inputValue); + when(condition.valueOf(any())).thenReturn(LITERAL_TRUE); + + assertTrue(filterOperator.hasNext(), "hasNext() should return true when condition matches"); + assertEquals( + inputValue, filterOperator.next(), "next() should return the matching input value"); + } + + @Test + public void testHasNextWithNonMatchingCondition() { + ExprValue inputValue = mock(ExprValue.class); + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()).thenReturn(inputValue); + when(condition.valueOf(any())).thenReturn(LITERAL_FALSE); + + assertFalse( + filterOperator.hasNext(), "hasNext() should return false if no values match the condition"); + } + + @Test + public void testMultipleCallsToHasNextDoNotConsumeInput() { + ExprValue inputValue = mock(ExprValue.class); + when(inputPlan.hasNext()).thenReturn(true); + when(inputPlan.next()).thenReturn(inputValue); + when(condition.valueOf(any())).thenReturn(LITERAL_TRUE); + + assertTrue( + filterOperator.hasNext(), + "First hasNext() call should return true if there is a matching value"); + verify(inputPlan, times(1)).next(); + assertTrue( + filterOperator.hasNext(), + "Subsequent hasNext() calls should still return true without advancing the input"); + verify(inputPlan, times(1)).next(); + assertEquals( + inputValue, filterOperator.next(), "next() should return the matching input value"); + verify(inputPlan, times(1)).next(); + } + + @Test + public void testNextWithoutCallingHasNext() { + ExprValue inputValue = mock(ExprValue.class); + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()).thenReturn(inputValue); + when(condition.valueOf(any())).thenReturn(LITERAL_TRUE); + + assertEquals( + inputValue, + filterOperator.next(), + "next() should return the matching input value even if hasNext() was not called"); + } } diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java index c91ae8787c..26f288e6b6 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanNodeVisitorTest.java @@ -9,6 +9,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; import static org.mockito.Mockito.mock; +import static org.opensearch.sql.ast.tree.Trendline.TrendlineType.SMA; import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.expression.DSL.named; @@ -22,12 +23,14 @@ import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.remove; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.rename; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.sort; +import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.takeOrdered; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.values; import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.window; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; @@ -42,6 +45,7 @@ import org.junit.jupiter.params.provider.MethodSource; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.tree.RareTopN.CommandType; import org.opensearch.sql.ast.tree.Sort.SortOption; import org.opensearch.sql.expression.DSL; @@ -64,7 +68,16 @@ public void print_physical_plan() { agg( rareTopN( filter( - limit(new TestScan(), 1, 1), + limit( + new TrendlineOperator( + new TestScan(), + Collections.singletonList( + Pair.of( + AstDSL.computation( + 1, AstDSL.field("field"), "alias", SMA), + DOUBLE))), + 1, + 1), DSL.equal(DSL.ref("response", INTEGER), DSL.literal(10))), CommandType.TOP, ImmutableList.of(), @@ -84,7 +97,8 @@ public void print_physical_plan() { + "\t\t\tAggregation->\n" + "\t\t\t\tRareTopN->\n" + "\t\t\t\t\tFilter->\n" - + "\t\t\t\t\t\tLimit->", + + "\t\t\t\t\t\tLimit->\n" + + "\t\t\t\t\t\t\tTrendline->", printer.print(plan)); } @@ -117,6 +131,8 @@ public static Stream getPhysicalPlanForTest() { PhysicalPlan sort = sort(plan, Pair.of(SortOption.DEFAULT_ASC, ref)); + PhysicalPlan takeOrdered = takeOrdered(plan, 1, 1, Pair.of(SortOption.DEFAULT_ASC, ref)); + PhysicalPlan dedupe = dedupe(plan, ref); PhysicalPlan values = values(emptyList()); @@ -131,6 +147,12 @@ public static Stream getPhysicalPlanForTest() { PhysicalPlan cursorClose = new CursorCloseOperator(plan); + PhysicalPlan trendline = + new TrendlineOperator( + plan, + Collections.singletonList( + Pair.of(AstDSL.computation(1, AstDSL.field("field"), "alias", SMA), DOUBLE))); + return Stream.of( Arguments.of(filter, "filter"), Arguments.of(aggregation, "aggregation"), @@ -140,12 +162,14 @@ public static Stream getPhysicalPlanForTest() { Arguments.of(remove, "remove"), Arguments.of(eval, "eval"), Arguments.of(sort, "sort"), + Arguments.of(takeOrdered, "takeOrdered"), Arguments.of(dedupe, "dedupe"), Arguments.of(values, "values"), Arguments.of(rareTopN, "rareTopN"), Arguments.of(limit, "limit"), Arguments.of(nested, "nested"), - Arguments.of(cursorClose, "cursorClose")); + Arguments.of(cursorClose, "cursorClose"), + Arguments.of(trendline, "trendline")); } @ParameterizedTest(name = "{1}") @@ -219,6 +243,11 @@ public String visitLimit(LimitOperator node, Integer tabs) { return name(node, "Limit->", tabs); } + @Override + public String visitTrendline(TrendlineOperator node, Integer tabs) { + return name(node, "Trendline->", tabs); + } + private String name(PhysicalPlan node, String current, int tabs) { String child = node.getChild().get(0).accept(this, tabs + 1); StringBuilder sb = new StringBuilder(); diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanTest.java index d63ab35773..eb35c00d9e 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanTest.java @@ -24,7 +24,7 @@ class PhysicalPlanTest { @Mock PhysicalPlan child; - private PhysicalPlan testPlan = + private final PhysicalPlan testPlan = new PhysicalPlan() { @Override public R accept(PhysicalPlanNodeVisitor visitor, C context) { diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanTestBase.java b/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanTestBase.java index 6399f945ed..397f241484 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanTestBase.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/PhysicalPlanTestBase.java @@ -93,7 +93,7 @@ public class PhysicalPlanTestBase { ImmutableMap.of("ip", "74.125.19.106", "action", "POST", "response", 500))) .build(); - private static Map typeMapping = + private static final Map typeMapping = new ImmutableMap.Builder() .put("ip", ExprCoreType.STRING) .put("action", ExprCoreType.STRING) diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/TakeOrderedOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/TakeOrderedOperatorTest.java new file mode 100644 index 0000000000..f2fcb84910 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/planner/physical/TakeOrderedOperatorTest.java @@ -0,0 +1,607 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.physical; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.when; +import static org.opensearch.sql.data.model.ExprValueUtils.tupleValue; +import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.expression.DSL.ref; +import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.limit; +import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.sort; +import static org.opensearch.sql.planner.physical.PhysicalPlanDSL.takeOrdered; + +import com.google.common.collect.ImmutableMap; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import lombok.Getter; +import lombok.Setter; +import org.apache.commons.lang3.tuple.Pair; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.ast.tree.Sort.SortOption; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.expression.Expression; + +/** + * To make sure {@link TakeOrderedOperator} can replace {@link SortOperator} + {@link + * LimitOperator}, this UT will replica all tests in {@link SortOperatorTest} and add more test + * cases on different limit and offset. + */ +@ExtendWith(MockitoExtension.class) +class TakeOrderedOperatorTest extends PhysicalPlanTestBase { + private static PhysicalPlan inputPlan; + + @Getter + @Setter + private static class Wrapper { + Iterator iterator = Collections.emptyIterator(); + } + + private static final Wrapper wrapper = new Wrapper(); + + @BeforeAll + public static void setUp() { + inputPlan = Mockito.mock(PhysicalPlan.class); + when(inputPlan.hasNext()) + .thenAnswer((InvocationOnMock invocation) -> wrapper.iterator.hasNext()); + when(inputPlan.next()).thenAnswer((InvocationOnMock invocation) -> wrapper.iterator.next()); + } + + /** + * construct the map which contain null value, because {@link ImmutableMap} doesn't support null + * value. + */ + private static final Map NULL_MAP = + new HashMap<>() { + { + put("size", 399); + put("response", null); + } + }; + + @Test + public void sort_one_field_asc() { + List inputList = + Arrays.asList( + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 503))); + + List> sortList = + List.of(Pair.of(SortOption.DEFAULT_ASC, ref("response", INTEGER))); + + test_takeOrdered_with_sort_limit( + inputList, + 3, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 399, "response", 503))); + + test_takeOrdered_with_sort_limit( + inputList, + 2, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 499, "response", 404))); + + test_takeOrdered_with_sort_limit( + inputList, + 2, + 1, + sortList, + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 399, "response", 503))); + + test_takeOrdered_with_sort_limit(inputList, 0, 1, sortList); + } + + @Test + public void sort_one_field_with_duplication() { + List inputList = + Arrays.asList( + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 404)), + tupleValue(ImmutableMap.of("size", 399, "response", 503))); + + List> sortList = + List.of(Pair.of(SortOption.DEFAULT_ASC, ref("response", INTEGER))); + + test_takeOrdered_with_sort_limit( + inputList, + 3, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 404)), + tupleValue(ImmutableMap.of("size", 399, "response", 503))); + + test_takeOrdered_with_sort_limit( + inputList, + 2, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 404))); + + test_takeOrdered_with_sort_limit( + inputList, + 2, + 1, + sortList, + tupleValue(ImmutableMap.of("size", 320, "response", 404)), + tupleValue(ImmutableMap.of("size", 399, "response", 503))); + + test_takeOrdered_with_sort_limit(inputList, 0, 1, sortList); + } + + @Test + public void sort_one_field_asc_with_null_value() { + List inputList = + Arrays.asList( + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(NULL_MAP)); + + List> sortList = + List.of(Pair.of(SortOption.DEFAULT_ASC, ref("response", INTEGER))); + + test_takeOrdered_with_sort_limit( + inputList, + 4, + 0, + sortList, + tupleValue(NULL_MAP), + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 399, "response", 503))); + + test_takeOrdered_with_sort_limit( + inputList, + 3, + 0, + sortList, + tupleValue(NULL_MAP), + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 499, "response", 404))); + + test_takeOrdered_with_sort_limit( + inputList, + 3, + 1, + sortList, + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 399, "response", 503))); + + test_takeOrdered_with_sort_limit(inputList, 0, 1, sortList); + } + + @Test + public void sort_one_field_asc_with_missing_value() { + List inputList = + Arrays.asList( + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(ImmutableMap.of("size", 399))); + + List> sortList = + List.of(Pair.of(SortOption.DEFAULT_ASC, ref("response", INTEGER))); + test_takeOrdered_with_sort_limit( + inputList, + 4, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 399)), + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 399, "response", 503))); + + test_takeOrdered_with_sort_limit( + inputList, + 3, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 399)), + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 499, "response", 404))); + + test_takeOrdered_with_sort_limit( + inputList, + 3, + 1, + sortList, + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 399, "response", 503))); + + test_takeOrdered_with_sort_limit(inputList, 0, 1, sortList); + } + + @Test + public void sort_one_field_desc() { + List inputList = + Arrays.asList( + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 503))); + + List> sortList = + List.of(Pair.of(SortOption.DEFAULT_DESC, ref("response", INTEGER))); + + test_takeOrdered_with_sort_limit( + inputList, + 3, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 200))); + + test_takeOrdered_with_sort_limit( + inputList, + 2, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(ImmutableMap.of("size", 499, "response", 404))); + + test_takeOrdered_with_sort_limit( + inputList, + 2, + 1, + sortList, + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 200))); + + test_takeOrdered_with_sort_limit(inputList, 0, 1, sortList); + } + + @Test + public void sort_one_field_desc_with_null_value() { + List inputList = + Arrays.asList( + tupleValue(NULL_MAP), + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 503))); + + List> sortList = + List.of(Pair.of(SortOption.DEFAULT_DESC, ref("response", INTEGER))); + + test_takeOrdered_with_sort_limit( + inputList, + 4, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(NULL_MAP)); + + test_takeOrdered_with_sort_limit( + inputList, + 3, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 200))); + + test_takeOrdered_with_sort_limit( + inputList, + 3, + 1, + sortList, + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(NULL_MAP)); + + test_takeOrdered_with_sort_limit(inputList, 0, 1, sortList); + } + + @Test + public void sort_one_field_with_duplicate_value() { + List inputList = + Arrays.asList( + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 399, "response", 503))); + + List> sortList = + List.of(Pair.of(SortOption.DEFAULT_ASC, ref("response", INTEGER))); + + test_takeOrdered_with_sort_limit( + inputList, + 4, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 399, "response", 503))); + + test_takeOrdered_with_sort_limit( + inputList, + 3, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 499, "response", 404))); + + test_takeOrdered_with_sort_limit( + inputList, + 3, + 1, + sortList, + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 399, "response", 503))); + + test_takeOrdered_with_sort_limit(inputList, 0, 1, sortList); + } + + @Test + public void sort_two_fields_both_asc() { + List inputList = + Arrays.asList( + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(NULL_MAP)); + + List> sortList = + List.of( + Pair.of(SortOption.DEFAULT_ASC, ref("size", INTEGER)), + Pair.of(SortOption.DEFAULT_ASC, ref("response", INTEGER))); + + test_takeOrdered_with_sort_limit( + inputList, + 5, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(NULL_MAP), + tupleValue(ImmutableMap.of("size", 399, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(ImmutableMap.of("size", 499, "response", 404))); + + test_takeOrdered_with_sort_limit( + inputList, + 4, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(NULL_MAP), + tupleValue(ImmutableMap.of("size", 399, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 503))); + + test_takeOrdered_with_sort_limit( + inputList, + 4, + 1, + sortList, + tupleValue(NULL_MAP), + tupleValue(ImmutableMap.of("size", 399, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(ImmutableMap.of("size", 499, "response", 404))); + + test_takeOrdered_with_sort_limit(inputList, 0, 1, sortList); + } + + @Test + public void sort_two_fields_both_desc() { + List inputList = + Arrays.asList( + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(NULL_MAP)); + + List> sortList = + List.of( + Pair.of(SortOption.DEFAULT_DESC, ref("size", INTEGER)), + Pair.of(SortOption.DEFAULT_DESC, ref("response", INTEGER))); + + test_takeOrdered_with_sort_limit( + inputList, + 5, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(ImmutableMap.of("size", 399, "response", 200)), + tupleValue(NULL_MAP), + tupleValue(ImmutableMap.of("size", 320, "response", 200))); + + test_takeOrdered_with_sort_limit( + inputList, + 4, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(ImmutableMap.of("size", 399, "response", 200)), + tupleValue(NULL_MAP)); + + test_takeOrdered_with_sort_limit( + inputList, + 4, + 1, + sortList, + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(ImmutableMap.of("size", 399, "response", 200)), + tupleValue(NULL_MAP), + tupleValue(ImmutableMap.of("size", 320, "response", 200))); + + test_takeOrdered_with_sort_limit(inputList, 0, 1, sortList); + } + + @Test + public void sort_two_fields_asc_and_desc() { + List inputList = + Arrays.asList( + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(NULL_MAP)); + + List> sortList = + List.of( + Pair.of(SortOption.DEFAULT_ASC, ref("size", INTEGER)), + Pair.of(SortOption.DEFAULT_DESC, ref("response", INTEGER))); + + test_takeOrdered_with_sort_limit( + inputList, + 5, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(ImmutableMap.of("size", 399, "response", 200)), + tupleValue(NULL_MAP), + tupleValue(ImmutableMap.of("size", 499, "response", 404))); + + test_takeOrdered_with_sort_limit( + inputList, + 4, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(ImmutableMap.of("size", 399, "response", 200)), + tupleValue(NULL_MAP)); + + test_takeOrdered_with_sort_limit( + inputList, + 4, + 1, + sortList, + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(ImmutableMap.of("size", 399, "response", 200)), + tupleValue(NULL_MAP), + tupleValue(ImmutableMap.of("size", 499, "response", 404))); + + test_takeOrdered_with_sort_limit(inputList, 0, 1, sortList); + } + + @Test + public void sort_two_fields_desc_and_asc() { + List inputList = + Arrays.asList( + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(NULL_MAP)); + + List> sortList = + List.of( + Pair.of(SortOption.DEFAULT_DESC, ref("size", INTEGER)), + Pair.of(SortOption.DEFAULT_ASC, ref("response", INTEGER))); + + test_takeOrdered_with_sort_limit( + inputList, + 5, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(NULL_MAP), + tupleValue(ImmutableMap.of("size", 399, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(ImmutableMap.of("size", 320, "response", 200))); + + test_takeOrdered_with_sort_limit( + inputList, + 4, + 0, + sortList, + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(NULL_MAP), + tupleValue(ImmutableMap.of("size", 399, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 503))); + + test_takeOrdered_with_sort_limit( + inputList, + 4, + 1, + sortList, + tupleValue(NULL_MAP), + tupleValue(ImmutableMap.of("size", 399, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(ImmutableMap.of("size", 320, "response", 200))); + + test_takeOrdered_with_sort_limit(inputList, 0, 1, sortList); + } + + @Test + public void sort_one_field_without_input() { + wrapper.setIterator(Collections.emptyIterator()); + assertEquals( + 0, + execute( + takeOrdered( + inputPlan, 1, 0, Pair.of(SortOption.DEFAULT_ASC, ref("response", INTEGER)))) + .size()); + } + + @Test + public void offset_exceeds_row_number() { + List inputList = + Arrays.asList( + tupleValue(ImmutableMap.of("size", 499, "response", 404)), + tupleValue(ImmutableMap.of("size", 320, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 200)), + tupleValue(ImmutableMap.of("size", 399, "response", 503)), + tupleValue(NULL_MAP)); + + wrapper.setIterator(inputList.iterator()); + PhysicalPlan plan = + takeOrdered(inputPlan, 1, 6, Pair.of(SortOption.DEFAULT_ASC, ref("response", INTEGER))); + List result = execute(plan); + assertEquals(0, result.size()); + } + + private void test_takeOrdered_with_sort_limit( + List inputList, + int limit, + int offset, + List> sortList, + ExprValue... expected) { + wrapper.setIterator(inputList.iterator()); + List compareResult = + execute(limit(sort(inputPlan, sortList.toArray(Pair[]::new)), limit, offset)); + wrapper.setIterator(inputList.iterator()); + List testResult = + execute(takeOrdered(inputPlan, limit, offset, sortList.toArray(Pair[]::new))); + assertEquals(compareResult, testResult); + if (expected.length == 0) { + assertEquals(0, testResult.size()); + } else { + assertThat(testResult, contains(expected)); + } + } +} diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/TrendlineOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/TrendlineOperatorTest.java new file mode 100644 index 0000000000..ef2c2907ce --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/planner/physical/TrendlineOperatorTest.java @@ -0,0 +1,398 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.planner.physical; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.when; +import static org.opensearch.sql.ast.tree.Trendline.TrendlineType.SMA; + +import com.google.common.collect.ImmutableMap; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalTime; +import java.util.Arrays; +import java.util.Collections; +import org.apache.commons.lang3.tuple.Pair; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.ast.dsl.AstDSL; +import org.opensearch.sql.data.model.ExprNullValue; +import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.data.type.ExprCoreType; + +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +@ExtendWith(MockitoExtension.class) +public class TrendlineOperatorTest { + @Mock private PhysicalPlan inputPlan; + + @Test + public void calculates_simple_moving_average_one_field_one_sample() { + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()) + .thenReturn(ExprValueUtils.tupleValue(ImmutableMap.of("distance", 100, "time", 10))); + + var plan = + new TrendlineOperator( + inputPlan, + Collections.singletonList( + Pair.of( + AstDSL.computation(1, AstDSL.field("distance"), "distance_alias", SMA), + ExprCoreType.DOUBLE))); + + plan.open(); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue( + ImmutableMap.of("distance", 100, "time", 10, "distance_alias", 100)), + plan.next()); + } + + @Test + public void calculates_simple_moving_average_one_field_two_samples() { + when(inputPlan.hasNext()).thenReturn(true, true, false); + when(inputPlan.next()) + .thenReturn( + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 100, "time", 10)), + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 200, "time", 10))); + + var plan = + new TrendlineOperator( + inputPlan, + Collections.singletonList( + Pair.of( + AstDSL.computation(2, AstDSL.field("distance"), "distance_alias", SMA), + ExprCoreType.DOUBLE))); + + plan.open(); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 100, "time", 10)), plan.next()); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue( + ImmutableMap.of("distance", 200, "time", 10, "distance_alias", 150.0)), + plan.next()); + assertFalse(plan.hasNext()); + } + + @Test + public void calculates_simple_moving_average_one_field_two_samples_three_rows() { + when(inputPlan.hasNext()).thenReturn(true, true, true, false); + when(inputPlan.next()) + .thenReturn( + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 100, "time", 10)), + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 200, "time", 10)), + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 200, "time", 10))); + + var plan = + new TrendlineOperator( + inputPlan, + Collections.singletonList( + Pair.of( + AstDSL.computation(2, AstDSL.field("distance"), "distance_alias", SMA), + ExprCoreType.DOUBLE))); + + plan.open(); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 100, "time", 10)), plan.next()); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue( + ImmutableMap.of("distance", 200, "time", 10, "distance_alias", 150.0)), + plan.next()); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue( + ImmutableMap.of("distance", 200, "time", 10, "distance_alias", 200.0)), + plan.next()); + assertFalse(plan.hasNext()); + } + + @Test + public void calculates_simple_moving_average_multiple_computations() { + when(inputPlan.hasNext()).thenReturn(true, true, true, false); + when(inputPlan.next()) + .thenReturn( + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 100, "time", 10)), + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 200, "time", 20)), + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 200, "time", 20))); + + var plan = + new TrendlineOperator( + inputPlan, + Arrays.asList( + Pair.of( + AstDSL.computation(2, AstDSL.field("distance"), "distance_alias", SMA), + ExprCoreType.DOUBLE), + Pair.of( + AstDSL.computation(2, AstDSL.field("time"), "time_alias", SMA), + ExprCoreType.DOUBLE))); + + plan.open(); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 100, "time", 10)), plan.next()); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "distance", 200, "time", 20, "distance_alias", 150.0, "time_alias", 15.0)), + plan.next()); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "distance", 200, "time", 20, "distance_alias", 200.0, "time_alias", 20.0)), + plan.next()); + assertFalse(plan.hasNext()); + } + + @Test + public void alias_overwrites_input_field() { + when(inputPlan.hasNext()).thenReturn(true, true, true, false); + when(inputPlan.next()) + .thenReturn( + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 100, "time", 10)), + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 200, "time", 10)), + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 200, "time", 10))); + + var plan = + new TrendlineOperator( + inputPlan, + Collections.singletonList( + Pair.of( + AstDSL.computation(2, AstDSL.field("distance"), "time", SMA), + ExprCoreType.DOUBLE))); + + plan.open(); + assertTrue(plan.hasNext()); + assertEquals(ExprValueUtils.tupleValue(ImmutableMap.of("distance", 100)), plan.next()); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 200, "time", 150.0)), plan.next()); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 200, "time", 200.0)), plan.next()); + assertFalse(plan.hasNext()); + } + + @Test + public void calculates_simple_moving_average_one_field_two_samples_three_rows_null_value() { + when(inputPlan.hasNext()).thenReturn(true, true, true, false); + when(inputPlan.next()) + .thenReturn( + ExprValueUtils.tupleValue(ImmutableMap.of("time", 10)), + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 200, "time", 10)), + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 300, "time", 10))); + + var plan = + new TrendlineOperator( + inputPlan, + Collections.singletonList( + Pair.of( + AstDSL.computation(2, AstDSL.field("distance"), "distance_alias", SMA), + ExprCoreType.DOUBLE))); + + plan.open(); + assertTrue(plan.hasNext()); + assertEquals(ExprValueUtils.tupleValue(ImmutableMap.of("time", 10)), plan.next()); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 200, "time", 10)), plan.next()); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue( + ImmutableMap.of("distance", 300, "time", 10, "distance_alias", 250.0)), + plan.next()); + assertFalse(plan.hasNext()); + } + + @Test + public void use_null_value() { + when(inputPlan.hasNext()).thenReturn(true, true, true, false); + when(inputPlan.next()) + .thenReturn( + ExprValueUtils.tupleValue(ImmutableMap.of("time", 10)), + ExprValueUtils.tupleValue(ImmutableMap.of("distance", ExprNullValue.of(), "time", 10)), + ExprValueUtils.tupleValue(ImmutableMap.of("distance", 100, "time", 10))); + + var plan = + new TrendlineOperator( + inputPlan, + Collections.singletonList( + Pair.of( + AstDSL.computation(1, AstDSL.field("distance"), "distance_alias", SMA), + ExprCoreType.DOUBLE))); + + plan.open(); + assertTrue(plan.hasNext()); + assertEquals(ExprValueUtils.tupleValue(ImmutableMap.of("time", 10)), plan.next()); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue(ImmutableMap.of("distance", ExprNullValue.of(), "time", 10)), + plan.next()); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue( + ImmutableMap.of("distance", 100, "time", 10, "distance_alias", 100)), + plan.next()); + assertFalse(plan.hasNext()); + } + + @Test + public void use_illegal_core_type() { + assertThrows( + IllegalArgumentException.class, + () -> { + new TrendlineOperator( + inputPlan, + Collections.singletonList( + Pair.of( + AstDSL.computation(2, AstDSL.field("distance"), "distance_alias", SMA), + ExprCoreType.ARRAY))); + }); + } + + @Test + public void calculates_simple_moving_average_date() { + when(inputPlan.hasNext()).thenReturn(true, true, true, false); + when(inputPlan.next()) + .thenReturn( + ExprValueUtils.tupleValue( + ImmutableMap.of("date", ExprValueUtils.dateValue(LocalDate.EPOCH))), + ExprValueUtils.tupleValue( + ImmutableMap.of("date", ExprValueUtils.dateValue(LocalDate.EPOCH.plusDays(6)))), + ExprValueUtils.tupleValue( + ImmutableMap.of("date", ExprValueUtils.dateValue(LocalDate.EPOCH.plusDays(12))))); + + var plan = + new TrendlineOperator( + inputPlan, + Collections.singletonList( + Pair.of( + AstDSL.computation(2, AstDSL.field("date"), "date_alias", SMA), + ExprCoreType.DATE))); + + plan.open(); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue( + ImmutableMap.of("date", ExprValueUtils.dateValue(LocalDate.EPOCH))), + plan.next()); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "date", + ExprValueUtils.dateValue(LocalDate.EPOCH.plusDays(6)), + "date_alias", + ExprValueUtils.dateValue(LocalDate.EPOCH.plusDays(3)))), + plan.next()); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "date", + ExprValueUtils.dateValue(LocalDate.EPOCH.plusDays(12)), + "date_alias", + ExprValueUtils.dateValue(LocalDate.EPOCH.plusDays(9)))), + plan.next()); + assertFalse(plan.hasNext()); + } + + @Test + public void calculates_simple_moving_average_time() { + when(inputPlan.hasNext()).thenReturn(true, true, true, false); + when(inputPlan.next()) + .thenReturn( + ExprValueUtils.tupleValue( + ImmutableMap.of("time", ExprValueUtils.timeValue(LocalTime.MIN))), + ExprValueUtils.tupleValue( + ImmutableMap.of("time", ExprValueUtils.timeValue(LocalTime.MIN.plusHours(6)))), + ExprValueUtils.tupleValue( + ImmutableMap.of("time", ExprValueUtils.timeValue(LocalTime.MIN.plusHours(12))))); + + var plan = + new TrendlineOperator( + inputPlan, + Collections.singletonList( + Pair.of( + AstDSL.computation(2, AstDSL.field("time"), "time_alias", SMA), + ExprCoreType.TIME))); + + plan.open(); + assertTrue(plan.hasNext()); + assertEquals(ExprValueUtils.tupleValue(ImmutableMap.of("time", LocalTime.MIN)), plan.next()); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "time", LocalTime.MIN.plusHours(6), "time_alias", LocalTime.MIN.plusHours(3))), + plan.next()); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "time", LocalTime.MIN.plusHours(12), "time_alias", LocalTime.MIN.plusHours(9))), + plan.next()); + assertFalse(plan.hasNext()); + } + + @Test + public void calculates_simple_moving_average_timestamp() { + when(inputPlan.hasNext()).thenReturn(true, true, true, false); + when(inputPlan.next()) + .thenReturn( + ExprValueUtils.tupleValue( + ImmutableMap.of("timestamp", ExprValueUtils.timestampValue(Instant.EPOCH))), + ExprValueUtils.tupleValue( + ImmutableMap.of( + "timestamp", ExprValueUtils.timestampValue(Instant.EPOCH.plusMillis(1000)))), + ExprValueUtils.tupleValue( + ImmutableMap.of( + "timestamp", ExprValueUtils.timestampValue(Instant.EPOCH.plusMillis(1500))))); + + var plan = + new TrendlineOperator( + inputPlan, + Collections.singletonList( + Pair.of( + AstDSL.computation(2, AstDSL.field("timestamp"), "timestamp_alias", SMA), + ExprCoreType.TIMESTAMP))); + + plan.open(); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue(ImmutableMap.of("timestamp", Instant.EPOCH)), plan.next()); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "timestamp", + Instant.EPOCH.plusMillis(1000), + "timestamp_alias", + Instant.EPOCH.plusMillis(500))), + plan.next()); + assertTrue(plan.hasNext()); + assertEquals( + ExprValueUtils.tupleValue( + ImmutableMap.of( + "timestamp", + Instant.EPOCH.plusMillis(1500), + "timestamp_alias", + Instant.EPOCH.plusMillis(1250))), + plan.next()); + assertFalse(plan.hasNext()); + } +} diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/collector/RoundingTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/collector/RoundingTest.java index 3a2601a874..4f6d51c901 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/collector/RoundingTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/collector/RoundingTest.java @@ -5,14 +5,18 @@ package org.opensearch.sql.planner.physical.collector; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.opensearch.sql.data.type.ExprCoreType.DATE; import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.data.type.ExprCoreType.TIME; +import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; import org.junit.jupiter.api.Test; import org.opensearch.sql.data.model.ExprTimeValue; import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.expression.DSL; import org.opensearch.sql.expression.span.SpanExpression; @@ -26,6 +30,35 @@ void time_rounding_illegal_span() { ExpressionEvaluationException.class, () -> rounding.round(new ExprTimeValue("23:30:00"))); } + @Test + void datetime_rounding_span() { + SpanExpression dateSpan = DSL.span(DSL.ref("date", DATE), DSL.literal(1), "d"); + Rounding rounding = Rounding.createRounding(dateSpan); + assertInstanceOf(Rounding.DateRounding.class, rounding); + SpanExpression timeSpan = DSL.span(DSL.ref("time", TIME), DSL.literal(1), "h"); + rounding = Rounding.createRounding(timeSpan); + assertInstanceOf(Rounding.TimeRounding.class, rounding); + SpanExpression timestampSpan = DSL.span(DSL.ref("timestamp", TIMESTAMP), DSL.literal(1), "h"); + rounding = Rounding.createRounding(timestampSpan); + assertInstanceOf(Rounding.TimestampRounding.class, rounding); + } + + @Test + void datetime_rounding_non_core_type_span() { + SpanExpression dateSpan = + DSL.span(DSL.ref("date", new MockDateExprType()), DSL.literal(1), "d"); + Rounding rounding = Rounding.createRounding(dateSpan); + assertInstanceOf(Rounding.DateRounding.class, rounding); + SpanExpression timeSpan = + DSL.span(DSL.ref("time", new MockTimeExprType()), DSL.literal(1), "h"); + rounding = Rounding.createRounding(timeSpan); + assertInstanceOf(Rounding.TimeRounding.class, rounding); + SpanExpression timestampSpan = + DSL.span(DSL.ref("timestamp", new MockTimestampExprType()), DSL.literal(1), "h"); + rounding = Rounding.createRounding(timestampSpan); + assertInstanceOf(Rounding.TimestampRounding.class, rounding); + } + @Test void round_unknown_type() { SpanExpression span = DSL.span(DSL.ref("unknown", STRING), DSL.literal(1), ""); @@ -41,4 +74,25 @@ void resolve() { () -> Rounding.DateTimeUnit.resolve(illegalUnit), "Unable to resolve unit " + illegalUnit); } + + static class MockDateExprType implements ExprType { + @Override + public String typeName() { + return "DATE"; + } + } + + static class MockTimeExprType implements ExprType { + @Override + public String typeName() { + return "TIME"; + } + } + + static class MockTimestampExprType implements ExprType { + @Override + public String typeName() { + return "TIMESTAMP"; + } + } } diff --git a/datasources/build.gradle b/datasources/build.gradle index 9bd233e1f9..1d1127ad0d 100644 --- a/datasources/build.gradle +++ b/datasources/build.gradle @@ -21,8 +21,13 @@ dependencies { implementation group: 'org.opensearch', name: 'opensearch', version: "${opensearch_version}" implementation group: 'org.opensearch', name: 'opensearch-x-content', version: "${opensearch_version}" implementation group: 'org.opensearch', name: 'common-utils', version: "${opensearch_build}" - implementation group: 'commons-io', name: 'commons-io', version: '2.8.0' - implementation 'com.amazonaws:aws-encryption-sdk-java:2.4.1' + implementation group: 'commons-io', name: 'commons-io', version: '2.14.0' + // FIXME. upgrade aws-encryption-sdk-java once the bouncycastle dependency update to 1.78. + implementation ('com.amazonaws:aws-encryption-sdk-java:2.4.1') { + exclude group: 'org.bouncycastle', module: 'bcprov-ext-jdk18on' + } + // Use OpenSearch bouncycastle version. https://github.com/opensearch-project/OpenSearch/blob/main/buildSrc/version.properties + implementation "org.bouncycastle:bcprov-jdk18on:${versions.bouncycastle}" implementation group: 'commons-validator', name: 'commons-validator', version: '1.7' testImplementation group: 'junit', name: 'junit', version: '4.13.2' diff --git a/datasources/src/main/java/org/opensearch/sql/datasources/exceptions/ErrorMessage.java b/datasources/src/main/java/org/opensearch/sql/datasources/exceptions/ErrorMessage.java index a0c0f5e24d..d4c74c7b30 100644 --- a/datasources/src/main/java/org/opensearch/sql/datasources/exceptions/ErrorMessage.java +++ b/datasources/src/main/java/org/opensearch/sql/datasources/exceptions/ErrorMessage.java @@ -14,7 +14,7 @@ /** Error Message. */ public class ErrorMessage { - protected Throwable exception; + protected final Throwable exception; private final int status; diff --git a/datasources/src/main/java/org/opensearch/sql/datasources/glue/GlueDataSourceFactory.java b/datasources/src/main/java/org/opensearch/sql/datasources/glue/GlueDataSourceFactory.java index e0c13ff005..11a33a2969 100644 --- a/datasources/src/main/java/org/opensearch/sql/datasources/glue/GlueDataSourceFactory.java +++ b/datasources/src/main/java/org/opensearch/sql/datasources/glue/GlueDataSourceFactory.java @@ -5,6 +5,8 @@ import java.util.Map; import java.util.Set; import lombok.RequiredArgsConstructor; +import org.apache.commons.lang3.BooleanUtils; +import org.apache.commons.lang3.StringUtils; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.datasource.model.DataSource; import org.opensearch.sql.datasource.model.DataSourceMetadata; @@ -29,7 +31,9 @@ public class GlueDataSourceFactory implements DataSourceFactory { "glue.indexstore.opensearch.auth.password"; public static final String GLUE_INDEX_STORE_OPENSEARCH_REGION = "glue.indexstore.opensearch.region"; + public static final String GLUE_ICEBERG_ENABLED = "glue.iceberg.enabled"; public static final String GLUE_LAKEFORMATION_ENABLED = "glue.lakeformation.enabled"; + public static final String GLUE_LAKEFORMATION_SESSION_TAG = "glue.lakeformation.session_tag"; @Override public DataSourceType getDataSourceType() { @@ -76,5 +80,18 @@ private void validateGlueDataSourceConfiguration(Map dataSourceM DatasourceValidationUtils.validateHost( dataSourceMetadataConfig.get(GLUE_INDEX_STORE_OPENSEARCH_URI), pluginSettings.getSettingValue(Settings.Key.DATASOURCES_URI_HOSTS_DENY_LIST)); + + // validate Lake Formation config + if (BooleanUtils.toBoolean(dataSourceMetadataConfig.get(GLUE_LAKEFORMATION_ENABLED))) { + if (!BooleanUtils.toBoolean(dataSourceMetadataConfig.get(GLUE_ICEBERG_ENABLED))) { + throw new IllegalArgumentException( + "Lake Formation can only be enabled when Iceberg is enabled."); + } + + if (StringUtils.isBlank(dataSourceMetadataConfig.get(GLUE_LAKEFORMATION_SESSION_TAG))) { + throw new IllegalArgumentException( + "Lake Formation session tag must be specified when enabling Lake Formation"); + } + } } } diff --git a/datasources/src/main/java/org/opensearch/sql/datasources/glue/SecurityLakeDataSourceFactory.java b/datasources/src/main/java/org/opensearch/sql/datasources/glue/SecurityLakeDataSourceFactory.java new file mode 100644 index 0000000000..0f336a08d1 --- /dev/null +++ b/datasources/src/main/java/org/opensearch/sql/datasources/glue/SecurityLakeDataSourceFactory.java @@ -0,0 +1,57 @@ +package org.opensearch.sql.datasources.glue; + +import java.util.Map; +import org.apache.commons.lang3.BooleanUtils; +import org.apache.commons.lang3.StringUtils; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.datasource.model.DataSource; +import org.opensearch.sql.datasource.model.DataSourceMetadata; +import org.opensearch.sql.datasource.model.DataSourceType; + +public class SecurityLakeDataSourceFactory extends GlueDataSourceFactory { + + private final Settings pluginSettings; + + public static final String TRUE = "true"; + + public SecurityLakeDataSourceFactory(final Settings pluginSettings) { + super(pluginSettings); + this.pluginSettings = pluginSettings; + } + + @Override + public DataSourceType getDataSourceType() { + return DataSourceType.SECURITY_LAKE; + } + + @Override + public DataSource createDataSource(DataSourceMetadata metadata) { + validateProperties(metadata.getProperties()); + metadata.getProperties().put(GlueDataSourceFactory.GLUE_ICEBERG_ENABLED, TRUE); + metadata.getProperties().put(GlueDataSourceFactory.GLUE_LAKEFORMATION_ENABLED, TRUE); + return super.createDataSource(metadata); + } + + private void validateProperties(Map properties) { + // validate Lake Formation config + if (properties.get(GlueDataSourceFactory.GLUE_ICEBERG_ENABLED) != null + && !BooleanUtils.toBoolean(properties.get(GlueDataSourceFactory.GLUE_ICEBERG_ENABLED))) { + throw new IllegalArgumentException( + GlueDataSourceFactory.GLUE_ICEBERG_ENABLED + + " cannot be false when using Security Lake data source."); + } + + if (properties.get(GlueDataSourceFactory.GLUE_LAKEFORMATION_ENABLED) != null + && !BooleanUtils.toBoolean( + properties.get(GlueDataSourceFactory.GLUE_LAKEFORMATION_ENABLED))) { + throw new IllegalArgumentException( + GLUE_LAKEFORMATION_ENABLED + " cannot be false when using Security Lake data source."); + } + + if (StringUtils.isBlank(properties.get(GLUE_LAKEFORMATION_SESSION_TAG))) { + throw new IllegalArgumentException( + GlueDataSourceFactory.GLUE_LAKEFORMATION_SESSION_TAG + + " must be specified when using Security Lake data source"); + } + } +} diff --git a/datasources/src/main/java/org/opensearch/sql/datasources/rest/RestDataSourceQueryAction.java b/datasources/src/main/java/org/opensearch/sql/datasources/rest/RestDataSourceQueryAction.java index 43249e8a28..558a7fe4b2 100644 --- a/datasources/src/main/java/org/opensearch/sql/datasources/rest/RestDataSourceQueryAction.java +++ b/datasources/src/main/java/org/opensearch/sql/datasources/rest/RestDataSourceQueryAction.java @@ -17,10 +17,12 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import lombok.RequiredArgsConstructor; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.OpenSearchException; import org.opensearch.OpenSearchSecurityException; +import org.opensearch.OpenSearchStatusException; import org.opensearch.client.node.NodeClient; import org.opensearch.core.action.ActionListener; import org.opensearch.core.rest.RestStatus; @@ -28,6 +30,7 @@ import org.opensearch.rest.BytesRestResponse; import org.opensearch.rest.RestChannel; import org.opensearch.rest.RestRequest; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.datasource.model.DataSourceMetadata; import org.opensearch.sql.datasources.exceptions.DataSourceNotFoundException; import org.opensearch.sql.datasources.exceptions.ErrorMessage; @@ -37,7 +40,10 @@ import org.opensearch.sql.datasources.utils.XContentParserUtils; import org.opensearch.sql.legacy.metrics.MetricName; import org.opensearch.sql.legacy.utils.MetricUtils; +import org.opensearch.sql.opensearch.setting.OpenSearchSettings; +import org.opensearch.sql.opensearch.util.RestRequestUtil; +@RequiredArgsConstructor public class RestDataSourceQueryAction extends BaseRestHandler { public static final String DATASOURCE_ACTIONS = "datasource_actions"; @@ -45,6 +51,8 @@ public class RestDataSourceQueryAction extends BaseRestHandler { private static final Logger LOG = LogManager.getLogger(RestDataSourceQueryAction.class); + private final OpenSearchSettings settings; + @Override public String getName() { return DATASOURCE_ACTIONS; @@ -115,6 +123,9 @@ public List routes() { @Override protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient nodeClient) throws IOException { + if (!enabled()) { + return disabledError(restRequest); + } switch (restRequest.method()) { case POST: return executePostRequest(restRequest, nodeClient); @@ -314,4 +325,22 @@ private static boolean isClientError(Exception e) { || e instanceof IllegalArgumentException || e instanceof IllegalStateException; } + + private boolean enabled() { + return settings.getSettingValue(Settings.Key.DATASOURCES_ENABLED); + } + + private RestChannelConsumer disabledError(RestRequest request) { + + RestRequestUtil.consumeAllRequestParameters(request); + + return channel -> { + reportError( + channel, + new OpenSearchStatusException( + String.format("%s setting is false", Settings.Key.DATASOURCES_ENABLED.getKeyValue()), + BAD_REQUEST), + BAD_REQUEST); + }; + } } diff --git a/datasources/src/main/java/org/opensearch/sql/datasources/service/DataSourceServiceImpl.java b/datasources/src/main/java/org/opensearch/sql/datasources/service/DataSourceServiceImpl.java index 4fe42fbd5c..81b6432891 100644 --- a/datasources/src/main/java/org/opensearch/sql/datasources/service/DataSourceServiceImpl.java +++ b/datasources/src/main/java/org/opensearch/sql/datasources/service/DataSourceServiceImpl.java @@ -11,6 +11,7 @@ import java.util.*; import java.util.stream.Collectors; import org.opensearch.sql.datasource.DataSourceService; +import org.opensearch.sql.datasource.RequestContext; import org.opensearch.sql.datasource.model.DataSource; import org.opensearch.sql.datasource.model.DataSourceMetadata; import org.opensearch.sql.datasource.model.DataSourceStatus; @@ -122,7 +123,8 @@ public Boolean dataSourceExists(String dataSourceName) { } @Override - public DataSourceMetadata verifyDataSourceAccessAndGetRawMetadata(String dataSourceName) { + public DataSourceMetadata verifyDataSourceAccessAndGetRawMetadata( + String dataSourceName, RequestContext requestContext) { DataSourceMetadata dataSourceMetadata = getRawDataSourceMetadata(dataSourceName); verifyDataSourceAccess(dataSourceMetadata); return dataSourceMetadata; @@ -167,7 +169,7 @@ private DataSourceMetadata constructUpdatedDatasourceMetadata( break; } } - return metadataBuilder.build(); + return metadataBuilder.validateAndBuild(); } private DataSourceMetadata getRawDataSourceMetadata(String dataSourceName) { @@ -199,6 +201,8 @@ private DataSourceMetadata removeAuthInfo(DataSourceMetadata dataSourceMetadata) entry -> CONFIDENTIAL_AUTH_KEYS.stream() .anyMatch(confidentialKey -> entry.getKey().endsWith(confidentialKey))); - return new DataSourceMetadata.Builder(dataSourceMetadata).setProperties(safeProperties).build(); + return new DataSourceMetadata.Builder(dataSourceMetadata) + .setProperties(safeProperties) + .validateAndBuild(); } } diff --git a/datasources/src/main/java/org/opensearch/sql/datasources/storage/OpenSearchDataSourceMetadataStorage.java b/datasources/src/main/java/org/opensearch/sql/datasources/storage/OpenSearchDataSourceMetadataStorage.java index eeb0302ed0..682d79c972 100644 --- a/datasources/src/main/java/org/opensearch/sql/datasources/storage/OpenSearchDataSourceMetadataStorage.java +++ b/datasources/src/main/java/org/opensearch/sql/datasources/storage/OpenSearchDataSourceMetadataStorage.java @@ -42,11 +42,13 @@ import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.SearchHit; import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.datasource.model.DataSourceMetadata; import org.opensearch.sql.datasources.encryptor.Encryptor; import org.opensearch.sql.datasources.exceptions.DataSourceNotFoundException; import org.opensearch.sql.datasources.service.DataSourceMetadataStorage; import org.opensearch.sql.datasources.utils.XContentParserUtils; +import org.opensearch.sql.opensearch.setting.OpenSearchSettings; public class OpenSearchDataSourceMetadataStorage implements DataSourceMetadataStorage { @@ -61,6 +63,7 @@ public class OpenSearchDataSourceMetadataStorage implements DataSourceMetadataSt private final ClusterService clusterService; private final Encryptor encryptor; + private final OpenSearchSettings settings; /** * This class implements DataSourceMetadataStorage interface using OpenSearch as underlying @@ -71,14 +74,21 @@ public class OpenSearchDataSourceMetadataStorage implements DataSourceMetadataSt * @param encryptor Encryptor. */ public OpenSearchDataSourceMetadataStorage( - Client client, ClusterService clusterService, Encryptor encryptor) { + Client client, + ClusterService clusterService, + Encryptor encryptor, + OpenSearchSettings settings) { this.client = client; this.clusterService = clusterService; this.encryptor = encryptor; + this.settings = settings; } @Override public List getDataSourceMetadata() { + if (!isEnabled()) { + return Collections.emptyList(); + } if (!this.clusterService.state().routingTable().hasIndex(DATASOURCE_INDEX_NAME)) { createDataSourcesIndex(); return Collections.emptyList(); @@ -88,6 +98,9 @@ public List getDataSourceMetadata() { @Override public Optional getDataSourceMetadata(String datasourceName) { + if (!isEnabled()) { + return Optional.empty(); + } if (!this.clusterService.state().routingTable().hasIndex(DATASOURCE_INDEX_NAME)) { createDataSourcesIndex(); return Optional.empty(); @@ -101,6 +114,9 @@ public Optional getDataSourceMetadata(String datasourceName) @Override public void createDataSourceMetadata(DataSourceMetadata dataSourceMetadata) { + if (!isEnabled()) { + throw new IllegalStateException("Data source management is disabled"); + } encryptDecryptAuthenticationData(dataSourceMetadata, true); if (!this.clusterService.state().routingTable().hasIndex(DATASOURCE_INDEX_NAME)) { createDataSourcesIndex(); @@ -134,6 +150,9 @@ public void createDataSourceMetadata(DataSourceMetadata dataSourceMetadata) { @Override public void updateDataSourceMetadata(DataSourceMetadata dataSourceMetadata) { + if (!isEnabled()) { + throw new IllegalStateException("Data source management is disabled"); + } encryptDecryptAuthenticationData(dataSourceMetadata, true); UpdateRequest updateRequest = new UpdateRequest(DATASOURCE_INDEX_NAME, dataSourceMetadata.getName()); @@ -163,6 +182,9 @@ public void updateDataSourceMetadata(DataSourceMetadata dataSourceMetadata) { @Override public void deleteDataSourceMetadata(String datasourceName) { + if (!isEnabled()) { + throw new IllegalStateException("Data source management is disabled"); + } DeleteRequest deleteRequest = new DeleteRequest(DATASOURCE_INDEX_NAME); deleteRequest.id(datasourceName); deleteRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); @@ -302,4 +324,8 @@ private void handleSigV4PropertiesEncryptionDecryption( .ifPresent(list::add); encryptOrDecrypt(propertiesMap, isEncryption, list); } + + private boolean isEnabled() { + return settings.getSettingValue(Settings.Key.DATASOURCES_ENABLED); + } } diff --git a/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportCreateDataSourceAction.java b/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportCreateDataSourceAction.java index 95e6493e05..edf4d14e1e 100644 --- a/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportCreateDataSourceAction.java +++ b/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportCreateDataSourceAction.java @@ -30,8 +30,8 @@ public class TransportCreateDataSourceAction public static final ActionType ACTION_TYPE = new ActionType<>(NAME, CreateDataSourceActionResponse::new); - private DataSourceService dataSourceService; - private org.opensearch.sql.opensearch.setting.OpenSearchSettings settings; + private final DataSourceService dataSourceService; + private final org.opensearch.sql.opensearch.setting.OpenSearchSettings settings; /** * TransportCreateDataSourceAction action for creating datasource. diff --git a/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportDeleteDataSourceAction.java b/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportDeleteDataSourceAction.java index 5578d40651..d17deb7df0 100644 --- a/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportDeleteDataSourceAction.java +++ b/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportDeleteDataSourceAction.java @@ -26,7 +26,7 @@ public class TransportDeleteDataSourceAction public static final ActionType ACTION_TYPE = new ActionType<>(NAME, DeleteDataSourceActionResponse::new); - private DataSourceService dataSourceService; + private final DataSourceService dataSourceService; /** * TransportDeleteDataSourceAction action for deleting datasource. diff --git a/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportGetDataSourceAction.java b/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportGetDataSourceAction.java index 34ad59c80f..e2da2a8e0c 100644 --- a/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportGetDataSourceAction.java +++ b/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportGetDataSourceAction.java @@ -29,7 +29,7 @@ public class TransportGetDataSourceAction public static final ActionType ACTION_TYPE = new ActionType<>(NAME, GetDataSourceActionResponse::new); - private DataSourceService dataSourceService; + private final DataSourceService dataSourceService; /** * TransportGetDataSourceAction action for getting datasource. diff --git a/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportPatchDataSourceAction.java b/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportPatchDataSourceAction.java index 303e905cec..dd55869df7 100644 --- a/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportPatchDataSourceAction.java +++ b/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportPatchDataSourceAction.java @@ -30,7 +30,7 @@ public class TransportPatchDataSourceAction public static final ActionType ACTION_TYPE = new ActionType<>(NAME, PatchDataSourceActionResponse::new); - private DataSourceService dataSourceService; + private final DataSourceService dataSourceService; /** * TransportPatchDataSourceAction action for updating datasource. diff --git a/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportUpdateDataSourceAction.java b/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportUpdateDataSourceAction.java index fefd0f3a01..44e0625cf7 100644 --- a/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportUpdateDataSourceAction.java +++ b/datasources/src/main/java/org/opensearch/sql/datasources/transport/TransportUpdateDataSourceAction.java @@ -29,7 +29,7 @@ public class TransportUpdateDataSourceAction public static final ActionType ACTION_TYPE = new ActionType<>(NAME, UpdateDataSourceActionResponse::new); - private DataSourceService dataSourceService; + private final DataSourceService dataSourceService; /** * TransportUpdateDataSourceAction action for updating datasource. diff --git a/datasources/src/main/java/org/opensearch/sql/datasources/utils/XContentParserUtils.java b/datasources/src/main/java/org/opensearch/sql/datasources/utils/XContentParserUtils.java index 7c8c33b147..4c98b133a8 100644 --- a/datasources/src/main/java/org/opensearch/sql/datasources/utils/XContentParserUtils.java +++ b/datasources/src/main/java/org/opensearch/sql/datasources/utils/XContentParserUtils.java @@ -97,7 +97,7 @@ public static DataSourceMetadata toDataSourceMetadata(XContentParser parser) thr .setAllowedRoles(allowedRoles) .setResultIndex(resultIndex) .setDataSourceStatus(status) - .build(); + .validateAndBuild(); } public static Map toMap(XContentParser parser) throws IOException { diff --git a/datasources/src/test/java/org/opensearch/sql/datasources/glue/GlueDataSourceFactoryTest.java b/datasources/src/test/java/org/opensearch/sql/datasources/glue/GlueDataSourceFactoryTest.java index 52f8ec9cd1..2833717265 100644 --- a/datasources/src/test/java/org/opensearch/sql/datasources/glue/GlueDataSourceFactoryTest.java +++ b/datasources/src/test/java/org/opensearch/sql/datasources/glue/GlueDataSourceFactoryTest.java @@ -210,4 +210,67 @@ void testCreateGLueDatSourceWithInvalidFlintHostSyntax() { Assertions.assertEquals( "Invalid flint host in properties.", illegalArgumentException.getMessage()); } + + @Test + @SneakyThrows + void testCreateGlueDataSourceWithLakeFormationNoIceberg() { + when(settings.getSettingValue(Settings.Key.DATASOURCES_URI_HOSTS_DENY_LIST)) + .thenReturn(Collections.emptyList()); + GlueDataSourceFactory glueDatasourceFactory = new GlueDataSourceFactory(settings); + + HashMap properties = new HashMap<>(); + properties.put("glue.auth.type", "iam_role"); + properties.put("glue.auth.role_arn", "role_arn"); + properties.put("glue.indexstore.opensearch.uri", "http://localhost:9200"); + properties.put("glue.indexstore.opensearch.auth", "noauth"); + properties.put("glue.indexstore.opensearch.region", "us-west-2"); + properties.put("glue.lakeformation.enabled", "true"); + properties.put("glue.iceberg.enabled", "false"); + properties.put("glue.lakeformation.session_tag", "session_tag"); + + DataSourceMetadata metadata = + new DataSourceMetadata.Builder() + .setName("my_glue") + .setConnector(DataSourceType.S3GLUE) + .setProperties(properties) + .build(); + + IllegalArgumentException illegalArgumentException = + Assertions.assertThrows( + IllegalArgumentException.class, () -> glueDatasourceFactory.createDataSource(metadata)); + Assertions.assertEquals( + "Lake Formation can only be enabled when Iceberg is enabled.", + illegalArgumentException.getMessage()); + } + + @Test + @SneakyThrows + void testCreateGlueDataSourceWithLakeFormationNoSessionTags() { + when(settings.getSettingValue(Settings.Key.DATASOURCES_URI_HOSTS_DENY_LIST)) + .thenReturn(Collections.emptyList()); + GlueDataSourceFactory glueDatasourceFactory = new GlueDataSourceFactory(settings); + + HashMap properties = new HashMap<>(); + properties.put("glue.auth.type", "iam_role"); + properties.put("glue.auth.role_arn", "role_arn"); + properties.put("glue.indexstore.opensearch.uri", "http://localhost:9200"); + properties.put("glue.indexstore.opensearch.auth", "noauth"); + properties.put("glue.indexstore.opensearch.region", "us-west-2"); + properties.put("glue.lakeformation.enabled", "true"); + properties.put("glue.iceberg.enabled", "true"); + + DataSourceMetadata metadata = + new DataSourceMetadata.Builder() + .setName("my_glue") + .setConnector(DataSourceType.S3GLUE) + .setProperties(properties) + .build(); + + IllegalArgumentException illegalArgumentException = + Assertions.assertThrows( + IllegalArgumentException.class, () -> glueDatasourceFactory.createDataSource(metadata)); + Assertions.assertEquals( + "Lake Formation session tag must be specified when enabling Lake Formation", + illegalArgumentException.getMessage()); + } } diff --git a/datasources/src/test/java/org/opensearch/sql/datasources/glue/SecurityLakeSourceFactoryTest.java b/datasources/src/test/java/org/opensearch/sql/datasources/glue/SecurityLakeSourceFactoryTest.java new file mode 100644 index 0000000000..561d549826 --- /dev/null +++ b/datasources/src/test/java/org/opensearch/sql/datasources/glue/SecurityLakeSourceFactoryTest.java @@ -0,0 +1,141 @@ +package org.opensearch.sql.datasources.glue; + +import static org.mockito.Mockito.when; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import lombok.SneakyThrows; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.datasource.model.DataSource; +import org.opensearch.sql.datasource.model.DataSourceMetadata; +import org.opensearch.sql.datasource.model.DataSourceType; + +@ExtendWith(MockitoExtension.class) +public class SecurityLakeSourceFactoryTest { + + @Mock private Settings settings; + + @Test + void testGetConnectorType() { + SecurityLakeDataSourceFactory securityLakeDataSourceFactory = + new SecurityLakeDataSourceFactory(settings); + Assertions.assertEquals( + DataSourceType.SECURITY_LAKE, securityLakeDataSourceFactory.getDataSourceType()); + } + + @Test + @SneakyThrows + void testCreateSecurityLakeDataSource() { + when(settings.getSettingValue(Settings.Key.DATASOURCES_URI_HOSTS_DENY_LIST)) + .thenReturn(Collections.emptyList()); + SecurityLakeDataSourceFactory securityLakeDataSourceFactory = + new SecurityLakeDataSourceFactory(settings); + + Map properties = new HashMap<>(); + properties.put("glue.auth.type", "iam_role"); + properties.put("glue.auth.role_arn", "role_arn"); + properties.put("glue.indexstore.opensearch.uri", "http://localhost:9200"); + properties.put("glue.indexstore.opensearch.auth", "noauth"); + properties.put("glue.indexstore.opensearch.region", "us-west-2"); + properties.put("glue.lakeformation.session_tag", "session_tag"); + DataSourceMetadata metadata = + new DataSourceMetadata.Builder() + .setName("my_sl") + .setConnector(DataSourceType.SECURITY_LAKE) + .setProperties(properties) + .build(); + DataSource dataSource = securityLakeDataSourceFactory.createDataSource(metadata); + Assertions.assertEquals(DataSourceType.SECURITY_LAKE, dataSource.getConnectorType()); + + Assertions.assertEquals( + properties.get(GlueDataSourceFactory.GLUE_ICEBERG_ENABLED), + SecurityLakeDataSourceFactory.TRUE); + Assertions.assertEquals( + properties.get(GlueDataSourceFactory.GLUE_LAKEFORMATION_ENABLED), + SecurityLakeDataSourceFactory.TRUE); + } + + @Test + @SneakyThrows + void testCreateSecurityLakeDataSourceIcebergCannotBeDisabled() { + SecurityLakeDataSourceFactory securityLakeDataSourceFactory = + new SecurityLakeDataSourceFactory(settings); + + Map properties = new HashMap<>(); + properties.put("glue.auth.type", "iam_role"); + properties.put("glue.auth.role_arn", "role_arn"); + properties.put("glue.indexstore.opensearch.uri", "http://localhost:9200"); + properties.put("glue.indexstore.opensearch.auth", "noauth"); + properties.put("glue.indexstore.opensearch.region", "us-west-2"); + properties.put("glue.iceberg.enabled", "false"); + DataSourceMetadata metadata = + new DataSourceMetadata.Builder() + .setName("my_sl") + .setConnector(DataSourceType.SECURITY_LAKE) + .setProperties(properties) + .build(); + + Assertions.assertThrows( + IllegalArgumentException.class, + () -> securityLakeDataSourceFactory.createDataSource(metadata)); + } + + @Test + @SneakyThrows + void testCreateSecurityLakeDataSourceLakeFormationCannotBeDisabled() { + SecurityLakeDataSourceFactory securityLakeDataSourceFactory = + new SecurityLakeDataSourceFactory(settings); + + Map properties = new HashMap<>(); + properties.put("glue.auth.type", "iam_role"); + properties.put("glue.auth.role_arn", "role_arn"); + properties.put("glue.indexstore.opensearch.uri", "http://localhost:9200"); + properties.put("glue.indexstore.opensearch.auth", "noauth"); + properties.put("glue.indexstore.opensearch.region", "us-west-2"); + properties.put("glue.iceberg.enabled", "true"); + properties.put("glue.lakeformation.enabled", "false"); + DataSourceMetadata metadata = + new DataSourceMetadata.Builder() + .setName("my_sl") + .setConnector(DataSourceType.SECURITY_LAKE) + .setProperties(properties) + .build(); + + Assertions.assertThrows( + IllegalArgumentException.class, + () -> securityLakeDataSourceFactory.createDataSource(metadata)); + } + + @Test + @SneakyThrows + void testCreateGlueDataSourceWithLakeFormationNoSessionTags() { + SecurityLakeDataSourceFactory securityLakeDataSourceFactory = + new SecurityLakeDataSourceFactory(settings); + + HashMap properties = new HashMap<>(); + properties.put("glue.auth.type", "iam_role"); + properties.put("glue.auth.role_arn", "role_arn"); + properties.put("glue.indexstore.opensearch.uri", "http://localhost:9200"); + properties.put("glue.indexstore.opensearch.auth", "noauth"); + properties.put("glue.indexstore.opensearch.region", "us-west-2"); + properties.put("glue.iceberg.enabled", "true"); + properties.put("glue.lakeformation.enabled", "true"); + + DataSourceMetadata metadata = + new DataSourceMetadata.Builder() + .setName("my_sl") + .setConnector(DataSourceType.SECURITY_LAKE) + .setProperties(properties) + .build(); + + Assertions.assertThrows( + IllegalArgumentException.class, + () -> securityLakeDataSourceFactory.createDataSource(metadata)); + } +} diff --git a/datasources/src/test/java/org/opensearch/sql/datasources/rest/RestDataSourceQueryActionTest.java b/datasources/src/test/java/org/opensearch/sql/datasources/rest/RestDataSourceQueryActionTest.java new file mode 100644 index 0000000000..fbe1b3bee5 --- /dev/null +++ b/datasources/src/test/java/org/opensearch/sql/datasources/rest/RestDataSourceQueryActionTest.java @@ -0,0 +1,83 @@ +package org.opensearch.sql.datasources.rest; + +import com.google.gson.Gson; +import com.google.gson.JsonObject; +import lombok.SneakyThrows; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.ArgumentCaptor; +import org.mockito.ArgumentMatchers; +import org.mockito.Mockito; +import org.opensearch.client.node.NodeClient; +import org.opensearch.rest.RestChannel; +import org.opensearch.rest.RestRequest; +import org.opensearch.rest.RestResponse; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.opensearch.setting.OpenSearchSettings; +import org.opensearch.threadpool.ThreadPool; + +public class RestDataSourceQueryActionTest { + + private OpenSearchSettings settings; + private RestRequest request; + private RestChannel channel; + private NodeClient nodeClient; + private ThreadPool threadPool; + private RestDataSourceQueryAction unit; + + @BeforeEach + public void setup() { + settings = Mockito.mock(OpenSearchSettings.class); + request = Mockito.mock(RestRequest.class); + channel = Mockito.mock(RestChannel.class); + nodeClient = Mockito.mock(NodeClient.class); + threadPool = Mockito.mock(ThreadPool.class); + + Mockito.when(nodeClient.threadPool()).thenReturn(threadPool); + + unit = new RestDataSourceQueryAction(settings); + } + + @Test + @SneakyThrows + public void testWhenDataSourcesAreDisabled() { + setDataSourcesEnabled(false); + unit.handleRequest(request, channel, nodeClient); + Mockito.verifyNoInteractions(nodeClient); + ArgumentCaptor response = ArgumentCaptor.forClass(RestResponse.class); + Mockito.verify(channel, Mockito.times(1)).sendResponse(response.capture()); + Assertions.assertEquals(400, response.getValue().status().getStatus()); + JsonObject actualResponseJson = + new Gson().fromJson(response.getValue().content().utf8ToString(), JsonObject.class); + JsonObject expectedResponseJson = new JsonObject(); + expectedResponseJson.addProperty("status", 400); + expectedResponseJson.add("error", new JsonObject()); + expectedResponseJson.getAsJsonObject("error").addProperty("type", "OpenSearchStatusException"); + expectedResponseJson.getAsJsonObject("error").addProperty("reason", "Invalid Request"); + expectedResponseJson + .getAsJsonObject("error") + .addProperty("details", "plugins.query.datasources.enabled setting is false"); + Assertions.assertEquals(expectedResponseJson, actualResponseJson); + } + + @Test + @SneakyThrows + public void testWhenDataSourcesAreEnabled() { + setDataSourcesEnabled(true); + Mockito.when(request.method()).thenReturn(RestRequest.Method.GET); + unit.handleRequest(request, channel, nodeClient); + Mockito.verify(threadPool, Mockito.times(1)) + .schedule(ArgumentMatchers.any(), ArgumentMatchers.any(), ArgumentMatchers.any()); + Mockito.verifyNoInteractions(channel); + } + + @Test + public void testGetName() { + Assertions.assertEquals("datasource_actions", unit.getName()); + } + + private void setDataSourcesEnabled(boolean value) { + Mockito.when(settings.getSettingValue(Settings.Key.DATASOURCES_ENABLED)).thenReturn(value); + } +} diff --git a/datasources/src/test/java/org/opensearch/sql/datasources/service/DataSourceServiceImplTest.java b/datasources/src/test/java/org/opensearch/sql/datasources/service/DataSourceServiceImplTest.java index 5a94945e5b..9a1022706f 100644 --- a/datasources/src/test/java/org/opensearch/sql/datasources/service/DataSourceServiceImplTest.java +++ b/datasources/src/test/java/org/opensearch/sql/datasources/service/DataSourceServiceImplTest.java @@ -36,6 +36,7 @@ import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.datasource.DataSourceService; +import org.opensearch.sql.datasource.RequestContext; import org.opensearch.sql.datasource.model.DataSource; import org.opensearch.sql.datasource.model.DataSourceMetadata; import org.opensearch.sql.datasource.model.DataSourceStatus; @@ -52,6 +53,7 @@ class DataSourceServiceImplTest { @Mock private DataSourceFactory dataSourceFactory; @Mock private StorageEngine storageEngine; @Mock private DataSourceMetadataStorage dataSourceMetadataStorage; + @Mock private RequestContext requestContext; @Mock private DataSourceUserAuthorizationHelper dataSourceUserAuthorizationHelper; @@ -461,7 +463,9 @@ void testVerifyDataSourceAccessAndGetRawDataSourceMetadataWithDisabledData() { DatasourceDisabledException datasourceDisabledException = Assertions.assertThrows( DatasourceDisabledException.class, - () -> dataSourceService.verifyDataSourceAccessAndGetRawMetadata("testDS")); + () -> + dataSourceService.verifyDataSourceAccessAndGetRawMetadata( + "testDS", requestContext)); Assertions.assertEquals( "Datasource testDS is disabled.", datasourceDisabledException.getMessage()); } @@ -484,7 +488,7 @@ void testVerifyDataSourceAccessAndGetRawDataSourceMetadata() { when(dataSourceMetadataStorage.getDataSourceMetadata("testDS")) .thenReturn(Optional.of(dataSourceMetadata)); DataSourceMetadata dataSourceMetadata1 = - dataSourceService.verifyDataSourceAccessAndGetRawMetadata("testDS"); + dataSourceService.verifyDataSourceAccessAndGetRawMetadata("testDS", requestContext); assertTrue(dataSourceMetadata1.getProperties().containsKey("prometheus.uri")); assertTrue(dataSourceMetadata1.getProperties().containsKey("prometheus.auth.type")); assertTrue(dataSourceMetadata1.getProperties().containsKey("prometheus.auth.username")); diff --git a/datasources/src/test/java/org/opensearch/sql/datasources/storage/OpenSearchDataSourceMetadataStorageTest.java b/datasources/src/test/java/org/opensearch/sql/datasources/storage/OpenSearchDataSourceMetadataStorageTest.java index 55b7528f60..03abe73763 100644 --- a/datasources/src/test/java/org/opensearch/sql/datasources/storage/OpenSearchDataSourceMetadataStorageTest.java +++ b/datasources/src/test/java/org/opensearch/sql/datasources/storage/OpenSearchDataSourceMetadataStorageTest.java @@ -46,10 +46,12 @@ import org.opensearch.index.engine.VersionConflictEngineException; import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.datasource.model.DataSourceMetadata; import org.opensearch.sql.datasource.model.DataSourceType; import org.opensearch.sql.datasources.encryptor.Encryptor; import org.opensearch.sql.datasources.exceptions.DataSourceNotFoundException; +import org.opensearch.sql.opensearch.setting.OpenSearchSettings; @ExtendWith(MockitoExtension.class) public class OpenSearchDataSourceMetadataStorageTest { @@ -64,6 +66,8 @@ public class OpenSearchDataSourceMetadataStorageTest { @Mock private Encryptor encryptor; + @Mock private OpenSearchSettings openSearchSettings; + @Mock(answer = Answers.RETURNS_DEEP_STUBS) private SearchResponse searchResponse; @@ -81,6 +85,7 @@ public class OpenSearchDataSourceMetadataStorageTest { @SneakyThrows @Test public void testGetDataSourceMetadata() { + setDataSourcesEnabled(true); Mockito.when(clusterService.state().routingTable().hasIndex(DATASOURCE_INDEX_NAME)) .thenReturn(true); Mockito.when(client.search(ArgumentMatchers.any())).thenReturn(searchResponseActionFuture); @@ -112,6 +117,7 @@ public void testGetDataSourceMetadata() { @SneakyThrows @Test public void testGetOldDataSourceMetadata() { + setDataSourcesEnabled(true); Mockito.when(clusterService.state().routingTable().hasIndex(DATASOURCE_INDEX_NAME)) .thenReturn(true); Mockito.when(client.search(ArgumentMatchers.any())).thenReturn(searchResponseActionFuture); @@ -145,6 +151,7 @@ public void testGetOldDataSourceMetadata() { @SneakyThrows @Test public void testGetDataSourceMetadataWith404SearchResponse() { + setDataSourcesEnabled(true); Mockito.when(clusterService.state().routingTable().hasIndex(DATASOURCE_INDEX_NAME)) .thenReturn(true); Mockito.when(client.search(ArgumentMatchers.any())).thenReturn(searchResponseActionFuture); @@ -165,6 +172,7 @@ public void testGetDataSourceMetadataWith404SearchResponse() { @SneakyThrows @Test public void testGetDataSourceMetadataWithParsingFailed() { + setDataSourcesEnabled(true); Mockito.when(clusterService.state().routingTable().hasIndex(DATASOURCE_INDEX_NAME)) .thenReturn(true); Mockito.when(client.search(ArgumentMatchers.any())).thenReturn(searchResponseActionFuture); @@ -185,6 +193,7 @@ public void testGetDataSourceMetadataWithParsingFailed() { @SneakyThrows @Test public void testGetDataSourceMetadataWithAWSSigV4() { + setDataSourcesEnabled(true); Mockito.when(clusterService.state().routingTable().hasIndex(DATASOURCE_INDEX_NAME)) .thenReturn(true); Mockito.when(client.search(ArgumentMatchers.any())).thenReturn(searchResponseActionFuture); @@ -216,6 +225,7 @@ public void testGetDataSourceMetadataWithAWSSigV4() { @SneakyThrows @Test public void testGetDataSourceMetadataWithBasicAuth() { + setDataSourcesEnabled(true); Mockito.when(clusterService.state().routingTable().hasIndex(DATASOURCE_INDEX_NAME)) .thenReturn(true); Mockito.when(client.search(ArgumentMatchers.any())).thenReturn(searchResponseActionFuture); @@ -248,6 +258,7 @@ public void testGetDataSourceMetadataWithBasicAuth() { @SneakyThrows @Test public void testGetDataSourceMetadataList() { + setDataSourcesEnabled(true); Mockito.when(clusterService.state().routingTable().hasIndex(DATASOURCE_INDEX_NAME)) .thenReturn(true); Mockito.when(client.search(ArgumentMatchers.any())).thenReturn(searchResponseActionFuture); @@ -272,6 +283,7 @@ public void testGetDataSourceMetadataList() { @SneakyThrows @Test public void testGetDataSourceMetadataListWithNoIndex() { + setDataSourcesEnabled(true); Mockito.when(clusterService.state().routingTable().hasIndex(DATASOURCE_INDEX_NAME)) .thenReturn(Boolean.FALSE); Mockito.when(client.admin().indices().create(ArgumentMatchers.any())) @@ -289,6 +301,7 @@ public void testGetDataSourceMetadataListWithNoIndex() { @SneakyThrows @Test public void testGetDataSourceMetadataWithNoIndex() { + setDataSourcesEnabled(true); Mockito.when(clusterService.state().routingTable().hasIndex(DATASOURCE_INDEX_NAME)) .thenReturn(Boolean.FALSE); Mockito.when(client.admin().indices().create(ArgumentMatchers.any())) @@ -305,6 +318,7 @@ public void testGetDataSourceMetadataWithNoIndex() { @Test public void testCreateDataSourceMetadata() { + setDataSourcesEnabled(true); Mockito.when(clusterService.state().routingTable().hasIndex(DATASOURCE_INDEX_NAME)) .thenReturn(Boolean.FALSE); @@ -330,6 +344,7 @@ public void testCreateDataSourceMetadata() { @Test public void testCreateDataSourceMetadataWithOutCreatingIndex() { + setDataSourcesEnabled(true); Mockito.when(clusterService.state().routingTable().hasIndex(DATASOURCE_INDEX_NAME)) .thenReturn(Boolean.TRUE); Mockito.when(encryptor.encrypt("secret_key")).thenReturn("secret_key"); @@ -350,6 +365,7 @@ public void testCreateDataSourceMetadataWithOutCreatingIndex() { @Test public void testCreateDataSourceMetadataFailedWithNotFoundResponse() { + setDataSourcesEnabled(true); Mockito.when(clusterService.state().routingTable().hasIndex(DATASOURCE_INDEX_NAME)) .thenReturn(Boolean.FALSE); @@ -383,6 +399,7 @@ public void testCreateDataSourceMetadataFailedWithNotFoundResponse() { @Test public void testCreateDataSourceMetadataWithVersionConflict() { + setDataSourcesEnabled(true); Mockito.when(clusterService.state().routingTable().hasIndex(DATASOURCE_INDEX_NAME)) .thenReturn(Boolean.FALSE); @@ -413,6 +430,7 @@ public void testCreateDataSourceMetadataWithVersionConflict() { @Test public void testCreateDataSourceMetadataWithException() { + setDataSourcesEnabled(true); Mockito.when(clusterService.state().routingTable().hasIndex(DATASOURCE_INDEX_NAME)) .thenReturn(Boolean.FALSE); @@ -444,6 +462,7 @@ public void testCreateDataSourceMetadataWithException() { @Test public void testCreateDataSourceMetadataWithIndexCreationFailed() { + setDataSourcesEnabled(true); Mockito.when(clusterService.state().routingTable().hasIndex(DATASOURCE_INDEX_NAME)) .thenReturn(Boolean.FALSE); @@ -474,6 +493,7 @@ public void testCreateDataSourceMetadataWithIndexCreationFailed() { @Test public void testUpdateDataSourceMetadata() { + setDataSourcesEnabled(true); Mockito.when(encryptor.encrypt("secret_key")).thenReturn("secret_key"); Mockito.when(encryptor.encrypt("access_key")).thenReturn("access_key"); Mockito.when(client.update(ArgumentMatchers.any())).thenReturn(updateResponseActionFuture); @@ -492,6 +512,7 @@ public void testUpdateDataSourceMetadata() { @Test public void testUpdateDataSourceMetadataWithNOOP() { + setDataSourcesEnabled(true); Mockito.when(encryptor.encrypt("secret_key")).thenReturn("secret_key"); Mockito.when(encryptor.encrypt("access_key")).thenReturn("access_key"); Mockito.when(client.update(ArgumentMatchers.any())).thenReturn(updateResponseActionFuture); @@ -510,6 +531,7 @@ public void testUpdateDataSourceMetadataWithNOOP() { @Test public void testUpdateDataSourceMetadataWithNotFoundResult() { + setDataSourcesEnabled(true); Mockito.when(encryptor.encrypt("secret_key")).thenReturn("secret_key"); Mockito.when(encryptor.encrypt("access_key")).thenReturn("access_key"); Mockito.when(client.update(ArgumentMatchers.any())).thenReturn(updateResponseActionFuture); @@ -536,6 +558,7 @@ public void testUpdateDataSourceMetadataWithNotFoundResult() { @Test public void testUpdateDataSourceMetadataWithDocumentMissingException() { + setDataSourcesEnabled(true); Mockito.when(encryptor.encrypt("secret_key")).thenReturn("secret_key"); Mockito.when(encryptor.encrypt("access_key")).thenReturn("access_key"); Mockito.when(client.update(ArgumentMatchers.any())) @@ -561,6 +584,7 @@ public void testUpdateDataSourceMetadataWithDocumentMissingException() { @Test public void testUpdateDataSourceMetadataWithRuntimeException() { + setDataSourcesEnabled(true); Mockito.when(encryptor.encrypt("secret_key")).thenReturn("secret_key"); Mockito.when(encryptor.encrypt("access_key")).thenReturn("access_key"); Mockito.when(client.update(ArgumentMatchers.any())) @@ -586,6 +610,7 @@ public void testUpdateDataSourceMetadataWithRuntimeException() { @Test public void testDeleteDataSourceMetadata() { + setDataSourcesEnabled(true); Mockito.when(client.delete(ArgumentMatchers.any())).thenReturn(deleteResponseActionFuture); Mockito.when(deleteResponseActionFuture.actionGet()).thenReturn(deleteResponse); Mockito.when(deleteResponse.getResult()).thenReturn(DocWriteResponse.Result.DELETED); @@ -600,6 +625,7 @@ public void testDeleteDataSourceMetadata() { @Test public void testDeleteDataSourceMetadataWhichisAlreadyDeleted() { + setDataSourcesEnabled(true); Mockito.when(client.delete(ArgumentMatchers.any())).thenReturn(deleteResponseActionFuture); Mockito.when(deleteResponseActionFuture.actionGet()).thenReturn(deleteResponse); Mockito.when(deleteResponse.getResult()).thenReturn(DocWriteResponse.Result.NOT_FOUND); @@ -619,6 +645,7 @@ public void testDeleteDataSourceMetadataWhichisAlreadyDeleted() { @Test public void testDeleteDataSourceMetadataWithUnexpectedResult() { + setDataSourcesEnabled(true); Mockito.when(client.delete(ArgumentMatchers.any())).thenReturn(deleteResponseActionFuture); Mockito.when(deleteResponseActionFuture.actionGet()).thenReturn(deleteResponse); Mockito.when(deleteResponse.getResult()).thenReturn(DocWriteResponse.Result.NOOP); @@ -637,6 +664,43 @@ public void testDeleteDataSourceMetadataWithUnexpectedResult() { Mockito.verify(client.threadPool().getThreadContext(), Mockito.times(1)).stashContext(); } + @Test + public void testWhenDataSourcesAreDisabled() { + setDataSourcesEnabled(false); + + Assertions.assertEquals( + Optional.empty(), this.openSearchDataSourceMetadataStorage.getDataSourceMetadata("dummy")); + + Assertions.assertEquals( + Collections.emptyList(), this.openSearchDataSourceMetadataStorage.getDataSourceMetadata()); + + Assertions.assertThrows( + IllegalStateException.class, + () -> { + this.openSearchDataSourceMetadataStorage.createDataSourceMetadata( + getDataSourceMetadata()); + }, + "Data source management is disabled"); + + Assertions.assertThrows( + IllegalStateException.class, + () -> { + this.openSearchDataSourceMetadataStorage.updateDataSourceMetadata( + getDataSourceMetadata()); + }, + "Data source management is disabled"); + + Assertions.assertThrows( + IllegalStateException.class, + () -> { + this.openSearchDataSourceMetadataStorage.deleteDataSourceMetadata("dummy"); + }, + "Data source management is disabled"); + + Mockito.verify(clusterService.state().routingTable(), Mockito.times(0)) + .hasIndex(DATASOURCE_INDEX_NAME); + } + private String getBasicDataSourceMetadataString() throws JsonProcessingException { Map properties = new HashMap<>(); properties.put("prometheus.auth.type", "basicauth"); @@ -744,4 +808,11 @@ public void serialize( } }; } + + private void setDataSourcesEnabled(boolean enabled) { + Mockito.when( + openSearchSettings.getSettingValue( + ArgumentMatchers.eq(Settings.Key.DATASOURCES_ENABLED))) + .thenReturn(enabled); + } } diff --git a/docs/category.json b/docs/category.json index e90c674a2e..32f56cfb46 100644 --- a/docs/category.json +++ b/docs/category.json @@ -14,6 +14,7 @@ "user/ppl/cmd/information_schema.rst", "user/ppl/cmd/eval.rst", "user/ppl/cmd/fields.rst", + "user/ppl/cmd/fillnull.rst", "user/ppl/cmd/grok.rst", "user/ppl/cmd/head.rst", "user/ppl/cmd/parse.rst", @@ -24,16 +25,18 @@ "user/ppl/cmd/sort.rst", "user/ppl/cmd/stats.rst", "user/ppl/cmd/syntax.rst", + "user/ppl/cmd/trendline.rst", "user/ppl/cmd/top.rst", "user/ppl/cmd/where.rst", "user/ppl/general/identifiers.rst", "user/ppl/general/datatypes.rst", - "user/ppl/functions/math.rst", - "user/ppl/functions/datetime.rst", - "user/ppl/functions/string.rst", "user/ppl/functions/condition.rst", + "user/ppl/functions/datetime.rst", + "user/ppl/functions/expressions.rst", + "user/ppl/functions/ip.rst", + "user/ppl/functions/math.rst", "user/ppl/functions/relevance.rst", - "user/ppl/functions/expressions.rst" + "user/ppl/functions/string.rst" ], "sql_cli": [ "user/dql/expressions.rst", diff --git a/docs/dev/intro-architecture.md b/docs/dev/intro-architecture.md index 88b7065864..34fdeec958 100644 --- a/docs/dev/intro-architecture.md +++ b/docs/dev/intro-architecture.md @@ -12,7 +12,7 @@ In the high level, the OD-SQL Engine could be divided into four major sub-module * *Parser*: Currently, there are two Lex&Parser coexists. The Druid Lex&Parser is the original one from NLPChina. The input AST of Core Engine is from the Druid Lex&Parser. The [ANTLR](https://github.com/opensearch-project/sql/blob/main/legacy/src/main/antlr/OpenSearchLegacySqlLexer.g4) Lex&Parser is added by us to customized the verification and exception handling. * *Analyzer*: The analyzer module take the output from ANTLR Lex&Parser then perform syntax and semantic analyze. -* *Core Engine*: The QueryAction take the output from Druid Lex&Parser and translate to the OpenSearch DSL if possible. This is an NLPChina original module. The QueryPlanner Builder is added by us to support the JOIN and Post-processing logic. The QueryPlanner will take the take the output from Druid Lex&Parser and build the PhysicalPlan +* *Core Engine*: The QueryAction take the output from Druid Lex&Parser and translate to the OpenSearch DSL if possible. This is an NLPChina original module. The QueryPlanner Builder is added by us to support the JOIN and Post-processing logic. The QueryPlanner will take the output from Druid Lex&Parser and build the PhysicalPlan * *Execution*: The execution module execute QueryAction or QueryPlanner and return the response to the client. Different from the Frontend, Analyzer and Core Engine which running on the Transport Thread and can’t do any blocking operation. The Execution module running on the client threadpool and can perform the blocking operation. There are also others modules include in the OD-SQL engine. diff --git a/docs/dev/opensearch-pagination.md b/docs/dev/opensearch-pagination.md index 4982b13d7f..1919af30fe 100644 --- a/docs/dev/opensearch-pagination.md +++ b/docs/dev/opensearch-pagination.md @@ -477,4 +477,44 @@ Response: } +``` + +#### plugins.sql.pagination.api + +This setting controls whether the SQL search queries in OpenSearch use Point-In-Time (PIT) with search_after or the traditional scroll mechanism for fetching paginated results. + +- Default Value: true +- Possible Values: true or false +- When set to true, the search query in the background uses PIT with search_after instead of scroll to retrieve paginated results. The Cursor Id returned to the user will encode relevant pagination query-related information, which will be used to fetch the subsequent pages of results. +- This setting is node-level. +- This setting can be updated dynamically. + +Example: + +``` +>> curl -H 'Content-Type: application/json' -X PUT localhost:9200/_cluster/settings -d '{ + "transient" : { + "plugins.sql.pagination.api" : "true" + } +}' +``` + +Response: + +``` +{ + "acknowledged" : true, + "persistent" : { }, + "transient" : { + "plugins" : { + "sql" : { + "pagination" : { + "api" : "true" + } + } + } + } +} + + ``` diff --git a/docs/user/admin/settings.rst b/docs/user/admin/settings.rst index 6531e84aa1..cbcb4f329d 100644 --- a/docs/user/admin/settings.rst +++ b/docs/user/admin/settings.rst @@ -196,13 +196,57 @@ Result set:: Note: the legacy settings of ``opendistro.sql.cursor.keep_alive`` is deprecated, it will fallback to the new settings if you request an update with the legacy name. +plugins.sql.pagination.api +================================ + +Description +----------- + +This setting controls whether the SQL search queries in OpenSearch use Point-In-Time (PIT) with search_after or the traditional scroll mechanism for fetching paginated results. + +1. Default Value: true +2. Possible Values: true or false +3. When set to true, the search query in the background uses PIT with search_after instead of scroll to retrieve paginated results. The Cursor Id returned to the user will encode relevant pagination query-related information, which will be used to fetch the subsequent pages of results. +4. This setting is node-level. +5. This setting can be updated dynamically. + + +Example +------- + +You can update the setting with a new value like this. + +SQL query:: + + >> curl -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings -d '{ + "transient" : { + "plugins.sql.pagination.api" : "true" + } + }' + +Result set:: + + { + "acknowledged" : true, + "persistent" : { }, + "transient" : { + "plugins" : { + "sql" : { + "pagination" : { + "api" : "true" + } + } + } + } + } + plugins.query.size_limit =========================== Description ----------- -The new engine fetches a default size of index from OpenSearch set by this setting, the default value is 200. You can change the value to any value not greater than the max result window value in index level (10000 by default), here is an example:: +The new engine fetches a default size of index from OpenSearch set by this setting, the default value equals to max result window in index level (10000 by default). You can change the value to any value not greater than the max result window value in index level (`index.max_result_window`), here is an example:: >> curl -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings -d '{ "transient" : { @@ -595,6 +639,75 @@ Request:: } } +plugins.query.executionengine.async_query.external_scheduler.enabled +===================================================================== + +Description +----------- +This setting controls whether the external scheduler is enabled for async queries. + +* Default Value: true +* Scope: Node-level +* Dynamic Update: Yes, this setting can be updated dynamically. + +To disable the external scheduler, use the following command: + +Request :: + + sh$ curl -sS -H 'Content-Type: application/json' -X PUT localhost:9200/_cluster/settings \ + ... -d '{"transient":{"plugins.query.executionengine.async_query.external_scheduler.enabled":"false"}}' + { + "acknowledged": true, + "persistent": {}, + "transient": { + "plugins": { + "query": { + "executionengine": { + "async_query": { + "external_scheduler": { + "enabled": "false" + } + } + } + } + } + } + } + +plugins.query.executionengine.async_query.external_scheduler.interval +===================================================================== + +Description +----------- +This setting defines the interval at which the external scheduler applies for auto refresh queries. It optimizes Spark applications by allowing them to automatically decide whether to use the Spark scheduler or the external scheduler. + +* Default Value: None (must be explicitly set) +* Format: A string representing a time duration follows Spark `CalendarInterval `__ format (e.g., ``10 minutes`` for 10 minutes, ``1 hour`` for 1 hour). + +To modify the interval to 10 minutes for example, use this command: + +Request :: + + sh$ curl -sS -H 'Content-Type: application/json' -X PUT localhost:9200/_cluster/settings \ + ... -d '{"transient":{"plugins.query.executionengine.async_query.external_scheduler.interval":"10 minutes"}}' + { + "acknowledged": true, + "persistent": {}, + "transient": { + "plugins": { + "query": { + "executionengine": { + "async_query": { + "external_scheduler": { + "interval": "10 minutes" + } + } + } + } + } + } + } + plugins.query.executionengine.spark.streamingjobs.housekeeper.interval ====================================================================== @@ -630,3 +743,142 @@ Request :: } } } + +plugins.query.datasources.enabled +================================= + +Description +----------- + +This setting controls whether datasources are enabled. + +1. The default value is true +2. This setting is node scope +3. This setting can be updated dynamically + +Update Settings Request:: + + sh$ curl -sS -H 'Content-Type: application/json' -X PUT 'localhost:9200/_cluster/settings?pretty' \ + ... -d '{"transient":{"plugins.query.datasources.enabled":"false"}}' + { + "acknowledged": true, + "persistent": {}, + "transient": { + "plugins": { + "query": { + "datasources": { + "enabled": "false" + } + } + } + } + } + +When Attempting to Call Data Source APIs:: + + sh$ curl -sS -H 'Content-Type: application/json' -X GET 'localhost:9200/_plugins/_query/_datasources' + { + "status": 400, + "error": { + "type": "OpenSearchStatusException", + "reason": "Invalid Request", + "details": "plugins.query.datasources.enabled setting is false" + } + } + +When Attempting to List Data Source:: + + sh$ curl -sS -H 'Content-Type: application/json' -X POST 'localhost:9200/_plugins/_ppl' \ + ... -d '{"query":"show datasources"}' + { + "schema": [ + { + "name": "DATASOURCE_NAME", + "type": "string" + }, + { + "name": "CONNECTOR_TYPE", + "type": "string" + } + ], + "datarows": [], + "total": 0, + "size": 0 + } + +To Re-enable Data Sources::: + + sh$ curl -sS -H 'Content-Type: application/json' -X PUT 'localhost:9200/_cluster/settings?pretty' \ + ... -d '{"transient":{"plugins.query.datasources.enabled":"true"}}' + { + "acknowledged": true, + "persistent": {}, + "transient": { + "plugins": { + "query": { + "datasources": { + "enabled": "true" + } + } + } + } + } + +plugins.query.field_type_tolerance +================================== + +Description +----------- + +This setting controls whether preserve arrays. If this setting is set to false, then an array is reduced +to the first non array value of any level of nesting. + +1. The default value is true (preserve arrays) +2. This setting is node scope +3. This setting can be updated dynamically + +Querying a field containing array values will return the full array values:: + + os> SELECT accounts FROM people; + fetched rows / total rows = 1/1 + +-----------------------+ + | accounts | + +-----------------------+ + | [{'id': 1},{'id': 2}] | + +-----------------------+ + +Disable field type tolerance:: + + >> curl -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings -d '{ + "transient" : { + "plugins.query.field_type_tolerance" : false + } + }' + +When field type tolerance is disabled, arrays are collapsed to the first non array value:: + + os> SELECT accounts FROM people; + fetched rows / total rows = 1/1 + +-----------+ + | accounts | + +-----------+ + | {'id': 1} | + +-----------+ + +Reenable field type tolerance:: + + >> curl -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings -d '{ + "transient" : { + "plugins.query.field_type_tolerance" : true + } + }' + +Limitations: +------------ +OpenSearch does not natively support the ARRAY data type but does allow multi-value fields implicitly. The +SQL/PPL plugin adheres strictly to the data type semantics defined in index mappings. When parsing OpenSearch +responses, it expects data to match the declared type and does not account for data in array format. If the +plugins.query.field_type_tolerance setting is enabled, the SQL/PPL plugin will handle array datasets by returning +scalar data types, allowing basic queries (e.g., SELECT * FROM tbl WHERE condition). However, using multi-value +fields in expressions or functions will result in exceptions. If this setting is disabled or absent, only the +first element of an array is returned, preserving the default behavior. \ No newline at end of file diff --git a/docs/user/beyond/partiql.rst b/docs/user/beyond/partiql.rst index 76fec8405d..d8e4b0722b 100644 --- a/docs/user/beyond/partiql.rst +++ b/docs/user/beyond/partiql.rst @@ -202,11 +202,11 @@ Selecting top level for object fields, object fields of array value and nested f os> SELECT city, accounts, projects FROM people; fetched rows / total rows = 1/1 - +-----------------------------------------------------+------------+----------------------------------------------------------------------------------------------------------------+ - | city | accounts | projects | - |-----------------------------------------------------+------------+----------------------------------------------------------------------------------------------------------------| - | {'name': 'Seattle', 'location': {'latitude': 10.5}} | {'id': 1} | [{'name': 'AWS Redshift Spectrum querying'},{'name': 'AWS Redshift security'},{'name': 'AWS Aurora security'}] | - +-----------------------------------------------------+------------+----------------------------------------------------------------------------------------------------------------+ + +-----------------------------------------------------+-----------------------+----------------------------------------------------------------------------------------------------------------+ + | city | accounts | projects | + |-----------------------------------------------------+-----------------------+----------------------------------------------------------------------------------------------------------------| + | {'name': 'Seattle', 'location': {'latitude': 10.5}} | [{'id': 1},{'id': 2}] | [{'name': 'AWS Redshift Spectrum querying'},{'name': 'AWS Redshift security'},{'name': 'AWS Aurora security'}] | + +-----------------------------------------------------+-----------------------+----------------------------------------------------------------------------------------------------------------+ Example 2: Selecting Deeper Levels ---------------------------------- @@ -215,11 +215,11 @@ Selecting at deeper levels for object fields of regular value returns inner fiel os> SELECT city.location, city.location.latitude FROM people; fetched rows / total rows = 1/1 - +--------------------+--------------------------+ - | city.location | city.location.latitude | - |--------------------+--------------------------| - | {'latitude': 10.5} | 10.5 | - +--------------------+--------------------------+ + +--------------------+------------------------+ + | city.location | city.location.latitude | + |--------------------+------------------------| + | {'latitude': 10.5} | 10.5 | + +--------------------+------------------------+ For selecting second level for nested fields, please read on and find more details in the following sections. diff --git a/docs/user/dql/aggregations.rst b/docs/user/dql/aggregations.rst index 42db4cdb4f..adf933c09c 100644 --- a/docs/user/dql/aggregations.rst +++ b/docs/user/dql/aggregations.rst @@ -34,12 +34,12 @@ The group by expression could be identifier:: os> SELECT gender, sum(age) FROM accounts GROUP BY gender; fetched rows / total rows = 2/2 - +----------+------------+ - | gender | sum(age) | - |----------+------------| - | F | 28 | - | M | 101 | - +----------+------------+ + +--------+----------+ + | gender | sum(age) | + |--------+----------| + | F | 28 | + | M | 101 | + +--------+----------+ Ordinal @@ -49,12 +49,12 @@ The group by expression could be ordinal:: os> SELECT gender, sum(age) FROM accounts GROUP BY 1; fetched rows / total rows = 2/2 - +----------+------------+ - | gender | sum(age) | - |----------+------------| - | F | 28 | - | M | 101 | - +----------+------------+ + +--------+----------+ + | gender | sum(age) | + |--------+----------| + | F | 28 | + | M | 101 | + +--------+----------+ Expression @@ -64,14 +64,14 @@ The group by expression could be expression:: os> SELECT abs(account_number), sum(age) FROM accounts GROUP BY abs(account_number); fetched rows / total rows = 4/4 - +-----------------------+------------+ - | abs(account_number) | sum(age) | - |-----------------------+------------| - | 1 | 32 | - | 13 | 28 | - | 18 | 33 | - | 6 | 36 | - +-----------------------+------------+ + +---------------------+----------+ + | abs(account_number) | sum(age) | + |---------------------+----------| + | 1 | 32 | + | 13 | 28 | + | 18 | 33 | + | 6 | 36 | + +---------------------+----------+ Aggregation @@ -91,12 +91,12 @@ The aggregation could be used select:: os> SELECT gender, sum(age) FROM accounts GROUP BY gender; fetched rows / total rows = 2/2 - +----------+------------+ - | gender | sum(age) | - |----------+------------| - | F | 28 | - | M | 101 | - +----------+------------+ + +--------+----------+ + | gender | sum(age) | + |--------+----------| + | F | 28 | + | M | 101 | + +--------+----------+ Expression over Aggregation --------------------------- @@ -105,12 +105,12 @@ The aggregation could be used as arguments of expression:: os> SELECT gender, sum(age) * 2 as sum2 FROM accounts GROUP BY gender; fetched rows / total rows = 2/2 - +----------+--------+ - | gender | sum2 | - |----------+--------| - | F | 56 | - | M | 202 | - +----------+--------+ + +--------+------+ + | gender | sum2 | + |--------+------| + | F | 56 | + | M | 202 | + +--------+------+ Expression as argument of Aggregation ------------------------------------- @@ -119,12 +119,12 @@ The aggregation could has expression as arguments:: os> SELECT gender, sum(age * 2) as sum2 FROM accounts GROUP BY gender; fetched rows / total rows = 2/2 - +----------+--------+ - | gender | sum2 | - |----------+--------| - | F | 56 | - | M | 202 | - +----------+--------+ + +--------+------+ + | gender | sum2 | + |--------+------| + | F | 56 | + | M | 202 | + +--------+------+ COUNT Aggregations ------------------ @@ -150,12 +150,12 @@ Example:: os> SELECT gender, count(*) as countV FROM accounts GROUP BY gender; fetched rows / total rows = 2/2 - +----------+----------+ - | gender | countV | - |----------+----------| - | F | 1 | - | M | 3 | - +----------+----------+ + +--------+--------+ + | gender | countV | + |--------+--------| + | F | 1 | + | M | 3 | + +--------+--------+ SUM --- @@ -169,12 +169,12 @@ Example:: os> SELECT gender, sum(age) as sumV FROM accounts GROUP BY gender; fetched rows / total rows = 2/2 - +----------+--------+ - | gender | sumV | - |----------+--------| - | F | 28 | - | M | 101 | - +----------+--------+ + +--------+------+ + | gender | sumV | + |--------+------| + | F | 28 | + | M | 101 | + +--------+------+ AVG --- @@ -188,12 +188,12 @@ Example:: os> SELECT gender, avg(age) as avgV FROM accounts GROUP BY gender; fetched rows / total rows = 2/2 - +----------+--------------------+ - | gender | avgV | - |----------+--------------------| - | F | 28.0 | - | M | 33.666666666666664 | - +----------+--------------------+ + +--------+--------------------+ + | gender | avgV | + |--------+--------------------| + | F | 28.0 | + | M | 33.666666666666664 | + +--------+--------------------+ MAX --- @@ -207,11 +207,11 @@ Example:: os> SELECT max(age) as maxV FROM accounts; fetched rows / total rows = 1/1 - +--------+ - | maxV | - |--------| - | 36 | - +--------+ + +------+ + | maxV | + |------| + | 36 | + +------+ MIN --- @@ -225,11 +225,11 @@ Example:: os> SELECT min(age) as minV FROM accounts; fetched rows / total rows = 1/1 - +--------+ - | minV | - |--------| - | 28 | - +--------+ + +------+ + | minV | + |------| + | 28 | + +------+ VAR_POP ------- @@ -364,11 +364,11 @@ To get the count of distinct values of a field, you can add a keyword ``DISTINCT os> SELECT COUNT(DISTINCT gender), COUNT(gender) FROM accounts; fetched rows / total rows = 1/1 - +--------------------------+-----------------+ - | COUNT(DISTINCT gender) | COUNT(gender) | - |--------------------------+-----------------| - | 2 | 4 | - +--------------------------+-----------------+ + +------------------------+---------------+ + | COUNT(DISTINCT gender) | COUNT(gender) | + |------------------------+---------------| + | 2 | 4 | + +------------------------+---------------+ PERCENTILE or PERCENTILE_APPROX ------------------------------- @@ -382,12 +382,12 @@ Example:: os> SELECT gender, percentile(age, 90) as p90 FROM accounts GROUP BY gender; fetched rows / total rows = 2/2 - +----------+-------+ - | gender | p90 | - |----------+-------| - | F | 28 | - | M | 36 | - +----------+-------+ + +--------+-----+ + | gender | p90 | + |--------+-----| + | F | 28 | + | M | 36 | + +--------+-----+ HAVING Clause ============= @@ -413,11 +413,11 @@ Here is an example for typical use of ``HAVING`` clause:: ... GROUP BY gender ... HAVING sum(age) > 100; fetched rows / total rows = 1/1 - +----------+------------+ - | gender | sum(age) | - |----------+------------| - | M | 101 | - +----------+------------+ + +--------+----------+ + | gender | sum(age) | + |--------+----------| + | M | 101 | + +--------+----------+ Here is another example for using alias in ``HAVING`` condition. Note that if an identifier is ambiguous, for example present both as a select alias and an index field, preference is alias. This means the identifier will be replaced by expression aliased in ``SELECT`` clause:: @@ -427,11 +427,11 @@ Here is another example for using alias in ``HAVING`` condition. Note that if an ... GROUP BY gender ... HAVING s > 100; fetched rows / total rows = 1/1 - +----------+-----+ - | gender | s | - |----------+-----| - | M | 101 | - +----------+-----+ + +--------+-----+ + | gender | s | + |--------+-----| + | M | 101 | + +--------+-----+ HAVING without GROUP BY ----------------------- @@ -443,11 +443,11 @@ Additionally, a ``HAVING`` clause can work without ``GROUP BY`` clause. This is ... FROM accounts ... HAVING sum(age) > 100; fetched rows / total rows = 1/1 - +------------------------+ - | 'Total of age > 100' | - |------------------------| - | Total of age > 100 | - +------------------------+ + +----------------------+ + | 'Total of age > 100' | + |----------------------| + | Total of age > 100 | + +----------------------+ FILTER Clause @@ -465,12 +465,12 @@ The group by aggregation with ``FILTER`` clause can set different conditions for os> SELECT avg(age) FILTER(WHERE balance > 10000) AS filtered, gender FROM accounts GROUP BY gender fetched rows / total rows = 2/2 - +------------+----------+ - | filtered | gender | - |------------+----------| - | 28.0 | F | - | 32.0 | M | - +------------+----------+ + +----------+--------+ + | filtered | gender | + |----------+--------| + | 28.0 | F | + | 32.0 | M | + +----------+--------+ FILTER without GROUP BY ----------------------- @@ -482,11 +482,11 @@ The ``FILTER`` clause can be used in aggregation functions without GROUP BY as w ... count(*) FILTER(WHERE age > 34) AS filtered ... FROM accounts fetched rows / total rows = 1/1 - +--------------+------------+ - | unfiltered | filtered | - |--------------+------------| - | 4 | 1 | - +--------------+------------+ + +------------+----------+ + | unfiltered | filtered | + |------------+----------| + | 4 | 1 | + +------------+----------+ Distinct count aggregate with FILTER ------------------------------------ @@ -495,9 +495,9 @@ The ``FILTER`` clause is also used in distinct count to do the filtering before os> SELECT COUNT(DISTINCT firstname) FILTER(WHERE age > 30) AS distinct_count FROM accounts fetched rows / total rows = 1/1 - +------------------+ - | distinct_count | - |------------------| - | 3 | - +------------------+ + +----------------+ + | distinct_count | + |----------------| + | 3 | + +----------------+ diff --git a/docs/user/dql/basics.rst b/docs/user/dql/basics.rst index a03ac4db70..a59f193086 100644 --- a/docs/user/dql/basics.rst +++ b/docs/user/dql/basics.rst @@ -191,14 +191,14 @@ This produces results like this for example:: os> SELECT firstname, lastname, _index, _sort FROM accounts; fetched rows / total rows = 4/4 - +-------------+------------+----------+---------+ - | firstname | lastname | _index | _sort | - |-------------+------------+----------+---------| - | Amber | Duke | accounts | -2 | - | Hattie | Bond | accounts | -2 | - | Nanette | Bates | accounts | -2 | - | Dale | Adams | accounts | -2 | - +-------------+------------+----------+---------+ + +-----------+----------+----------+-------+ + | firstname | lastname | _index | _sort | + |-----------+----------+----------+-------| + | Amber | Duke | accounts | -2 | + | Hattie | Bond | accounts | -2 | + | Nanette | Bates | accounts | -2 | + | Dale | Adams | accounts | -2 | + +-----------+----------+----------+-------+ Example 3: Using Field Alias ---------------------------- @@ -303,13 +303,13 @@ In fact your can use any expression in a ``DISTINCT`` clause as follows:: os> SELECT DISTINCT SUBSTRING(lastname, 1, 1) FROM accounts; fetched rows / total rows = 3/3 - +-----------------------------+ - | SUBSTRING(lastname, 1, 1) | - |-----------------------------| - | A | - | B | - | D | - +-----------------------------+ + +---------------------------+ + | SUBSTRING(lastname, 1, 1) | + |---------------------------| + | A | + | B | + | D | + +---------------------------+ FROM ==== @@ -988,14 +988,14 @@ Note that the example above is essentially sorting on a predicate expression. In os> SELECT employer FROM accounts ORDER BY employer ASC NULLS LAST; fetched rows / total rows = 4/4 - +------------+ - | employer | - |------------| - | Netagy | - | Pyrami | - | Quility | - | null | - +------------+ + +----------+ + | employer | + |----------| + | Netagy | + | Pyrami | + | Quility | + | null | + +----------+ The sorting rule can be summarized as follows: @@ -1010,14 +1010,14 @@ Here is another example for sort in descending order without ``NULLS`` clause:: os> SELECT employer FROM accounts ORDER BY employer DESC; fetched rows / total rows = 4/4 - +------------+ - | employer | - |------------| - | Quility | - | Pyrami | - | Netagy | - | null | - +------------+ + +----------+ + | employer | + |----------| + | Quility | + | Pyrami | + | Netagy | + | null | + +----------+ Example 3: Ordering by Aggregate Functions @@ -1027,23 +1027,23 @@ Aggregate functions are allowed to be used in ``ORDER BY`` clause. You can refer os> SELECT gender, MAX(age) FROM accounts GROUP BY gender ORDER BY MAX(age) DESC; fetched rows / total rows = 2/2 - +----------+------------+ - | gender | MAX(age) | - |----------+------------| - | M | 36 | - | F | 28 | - +----------+------------+ + +--------+----------+ + | gender | MAX(age) | + |--------+----------| + | M | 36 | + | F | 28 | + +--------+----------+ Even if it's not present in ``SELECT`` clause, it can be also used as follows:: os> SELECT gender, MIN(age) FROM accounts GROUP BY gender ORDER BY MAX(age) DESC; fetched rows / total rows = 2/2 - +----------+------------+ - | gender | MIN(age) | - |----------+------------| - | M | 32 | - | F | 28 | - +----------+------------+ + +--------+----------+ + | gender | MIN(age) | + |--------+----------| + | M | 32 | + | F | 28 | + +--------+----------+ LIMIT ===== @@ -1147,12 +1147,12 @@ Offset position can be given following the OFFSET keyword as well, here is an ex >od SELECT age FROM accounts ORDER BY age LIMIT 2 OFFSET 1 fetched rows / total rows = 2/2 - +-------+ - | age | - |-------| - | 32 | - | 33 | - +-------+ + +-----+ + | age | + |-----| + | 32 | + | 33 | + +-----+ Limitation diff --git a/docs/user/dql/complex.rst b/docs/user/dql/complex.rst index 17009d712b..906ea21904 100644 --- a/docs/user/dql/complex.rst +++ b/docs/user/dql/complex.rst @@ -247,14 +247,14 @@ Here is another example with aggregation function and GROUP BY in subquery:: ... SELECT AVG(balance) AS avg_balance FROM accounts GROUP BY gender, age ... ) AS a; fetched rows / total rows = 4/4 - +---------------+ - | avg_balance | - |---------------| - | 32838.0 | - | 39225.0 | - | 4180.0 | - | 5686.0 | - +---------------+ + +-------------+ + | avg_balance | + |-------------| + | 32838.0 | + | 39225.0 | + | 4180.0 | + | 5686.0 | + +-------------+ Query with multiple layers of subquery is supported as well, here follows a example:: @@ -265,12 +265,12 @@ Query with multiple layers of subquery is supported as well, here follows a exam ... ) AS accounts WHERE age < 35 ... ) AS accounts fetched rows / total rows = 2/2 - +--------+ - | name | - |--------| - | Duke | - | Adams | - +--------+ + +-------+ + | name | + |-------| + | Duke | + | Adams | + +-------+ JOINs diff --git a/docs/user/dql/expressions.rst b/docs/user/dql/expressions.rst index 123bba046a..18a5bdce8f 100644 --- a/docs/user/dql/expressions.rst +++ b/docs/user/dql/expressions.rst @@ -34,29 +34,29 @@ Here is an example for different type of literals:: os> SELECT 123, 'hello', false, -4.567, DATE '2020-07-07', TIME '01:01:01', TIMESTAMP '2020-07-07 01:01:01'; fetched rows / total rows = 1/1 - +-------+-----------+---------+----------+---------------------+-------------------+-----------------------------------+ - | 123 | 'hello' | false | -4.567 | DATE '2020-07-07' | TIME '01:01:01' | TIMESTAMP '2020-07-07 01:01:01' | - |-------+-----------+---------+----------+---------------------+-------------------+-----------------------------------| - | 123 | hello | False | -4.567 | 2020-07-07 | 01:01:01 | 2020-07-07 01:01:01 | - +-------+-----------+---------+----------+---------------------+-------------------+-----------------------------------+ + +-----+---------+-------+--------+-------------------+-----------------+---------------------------------+ + | 123 | 'hello' | false | -4.567 | DATE '2020-07-07' | TIME '01:01:01' | TIMESTAMP '2020-07-07 01:01:01' | + |-----+---------+-------+--------+-------------------+-----------------+---------------------------------| + | 123 | hello | False | -4.567 | 2020-07-07 | 01:01:01 | 2020-07-07 01:01:01 | + +-----+---------+-------+--------+-------------------+-----------------+---------------------------------+ os> SELECT "Hello", 'Hello', "It""s", 'It''s', "It's", '"Its"', 'It\'s', 'It\\\'s', "\I\t\s" fetched rows / total rows = 1/1 - +-----------+-----------+-----------+-----------+----------+-----------+-----------+-------------+------------+ - | "Hello" | 'Hello' | "It""s" | 'It''s' | "It's" | '"Its"' | 'It\'s' | 'It\\\'s' | "\I\t\s" | - |-----------+-----------+-----------+-----------+----------+-----------+-----------+-------------+------------| - | Hello | Hello | It"s | It's | It's | "Its" | It's | It\'s | \I\t\s | - +-----------+-----------+-----------+-----------+----------+-----------+-----------+-------------+------------+ + +---------+---------+---------+---------+--------+---------+---------+-----------+----------+ + | "Hello" | 'Hello' | "It""s" | 'It''s' | "It's" | '"Its"' | 'It\'s' | 'It\\\'s' | "\I\t\s" | + |---------+---------+---------+---------+--------+---------+---------+-----------+----------| + | Hello | Hello | It"s | It's | It's | "Its" | It's | It\'s | \I\t\s | + +---------+---------+---------+---------+--------+---------+---------+-----------+----------+ os> SELECT {DATE '2020-07-07'}, {D '2020-07-07'}, {TIME '01:01:01'}, {T '01:01:01'}, {TIMESTAMP '2020-07-07 01:01:01'}, {TS '2020-07-07 01:01:01'} fetched rows / total rows = 1/1 - +-----------------------+--------------------+---------------------+------------------+-------------------------------------+------------------------------+ - | {DATE '2020-07-07'} | {D '2020-07-07'} | {TIME '01:01:01'} | {T '01:01:01'} | {TIMESTAMP '2020-07-07 01:01:01'} | {TS '2020-07-07 01:01:01'} | - |-----------------------+--------------------+---------------------+------------------+-------------------------------------+------------------------------| - | 2020-07-07 | 2020-07-07 | 01:01:01 | 01:01:01 | 2020-07-07 01:01:01 | 2020-07-07 01:01:01 | - +-----------------------+--------------------+---------------------+------------------+-------------------------------------+------------------------------+ + +---------------------+------------------+-------------------+----------------+-----------------------------------+----------------------------+ + | {DATE '2020-07-07'} | {D '2020-07-07'} | {TIME '01:01:01'} | {T '01:01:01'} | {TIMESTAMP '2020-07-07 01:01:01'} | {TS '2020-07-07 01:01:01'} | + |---------------------+------------------+-------------------+----------------+-----------------------------------+----------------------------| + | 2020-07-07 | 2020-07-07 | 01:01:01 | 01:01:01 | 2020-07-07 01:01:01 | 2020-07-07 01:01:01 | + +---------------------+------------------+-------------------+----------------+-----------------------------------+----------------------------+ Limitations ----------- @@ -102,11 +102,11 @@ Here is an example for different type of arithmetic expressions:: os> SELECT 1 + 2, (9 - 1) % 3, 2 * 4 / 3; fetched rows / total rows = 1/1 - +---------+---------------+-------------+ - | 1 + 2 | (9 - 1) % 3 | 2 * 4 / 3 | - |---------+---------------+-------------| - | 3 | 2 | 2 | - +---------+---------------+-------------+ + +-------+-------------+-----------+ + | 1 + 2 | (9 - 1) % 3 | 2 * 4 / 3 | + |-------+-------------+-----------| + | 3 | 2 | 2 | + +-------+-------------+-----------+ Comparison Operators ================================== @@ -162,38 +162,38 @@ Here is an example for different type of comparison operators:: os> SELECT 2 > 1, 2 >= 1, 2 < 1, 2 != 1, 2 <= 1, 2 = 1; fetched rows / total rows = 1/1 - +---------+----------+---------+----------+----------+---------+ - | 2 > 1 | 2 >= 1 | 2 < 1 | 2 != 1 | 2 <= 1 | 2 = 1 | - |---------+----------+---------+----------+----------+---------| - | True | True | False | True | False | False | - +---------+----------+---------+----------+----------+---------+ + +-------+--------+-------+--------+--------+-------+ + | 2 > 1 | 2 >= 1 | 2 < 1 | 2 != 1 | 2 <= 1 | 2 = 1 | + |-------+--------+-------+--------+--------+-------| + | True | True | False | True | False | False | + +-------+--------+-------+--------+--------+-------+ It is possible to compare datetimes. When comparing different datetime types, for example `DATE` and `TIME`, both converted to `TIMESTAMP`. The following rule is applied on coversion: a `TIME` applied to today's date; `DATE` is interpreted at midnight. See example below:: os> SELECT current_time() > current_date() AS `now.time > today`, typeof(current_time()) AS `now.time.type`, typeof(current_date()) AS `now.date.type`; fetched rows / total rows = 1/1 - +--------------------+-----------------+-----------------+ - | now.time > today | now.time.type | now.date.type | - |--------------------+-----------------+-----------------| - | True | TIME | DATE | - +--------------------+-----------------+-----------------+ + +------------------+---------------+---------------+ + | now.time > today | now.time.type | now.date.type | + |------------------+---------------+---------------| + | True | TIME | DATE | + +------------------+---------------+---------------+ os> SELECT current_time() = now() AS `now.time = now`, typeof(current_time()) AS `now.time.type`, typeof(now()) AS `now.type`; fetched rows / total rows = 1/1 - +------------------+-----------------+------------+ - | now.time = now | now.time.type | now.type | - |------------------+-----------------+------------| - | True | TIME | TIMESTAMP | - +------------------+-----------------+------------+ + +----------------+---------------+-----------+ + | now.time = now | now.time.type | now.type | + |----------------+---------------+-----------| + | True | TIME | TIMESTAMP | + +----------------+---------------+-----------+ os> SELECT subtime(now(), current_time()) = current_date() AS `midnight = now.date`, typeof(subtime(now(), current_time())) AS `midnight.type`, typeof(current_date()) AS `now.date.type`; fetched rows / total rows = 1/1 - +-----------------------+-----------------+-----------------+ - | midnight = now.date | midnight.type | now.date.type | - |-----------------------+-----------------+-----------------| - | True | TIMESTAMP | DATE | - +-----------------------+-----------------+-----------------+ + +---------------------+---------------+---------------+ + | midnight = now.date | midnight.type | now.date.type | + |---------------------+---------------+---------------| + | True | TIMESTAMP | DATE | + +---------------------+---------------+---------------+ LIKE @@ -203,11 +203,11 @@ expr LIKE pattern. The expr is string value, pattern is supports literal text, a os> SELECT 'axyzb' LIKE 'a%b', 'acb' LIKE 'A_B', 'axyzb' NOT LIKE 'a%b', 'acb' NOT LIKE 'a_b'; fetched rows / total rows = 1/1 - +----------------------+--------------------+--------------------------+------------------------+ - | 'axyzb' LIKE 'a%b' | 'acb' LIKE 'A_B' | 'axyzb' NOT LIKE 'a%b' | 'acb' NOT LIKE 'a_b' | - |----------------------+--------------------+--------------------------+------------------------| - | True | True | False | False | - +----------------------+--------------------+--------------------------+------------------------+ + +--------------------+------------------+------------------------+----------------------+ + | 'axyzb' LIKE 'a%b' | 'acb' LIKE 'A_B' | 'axyzb' NOT LIKE 'a%b' | 'acb' NOT LIKE 'a_b' | + |--------------------+------------------+------------------------+----------------------| + | True | True | False | False | + +--------------------+------------------+------------------------+----------------------+ NULL value test --------------- @@ -216,11 +216,11 @@ Here is an example for null value test:: os> SELECT 0 IS NULL, 0 IS NOT NULL, NULL IS NULL, NULL IS NOT NULL; fetched rows / total rows = 1/1 - +-------------+-----------------+----------------+--------------------+ - | 0 IS NULL | 0 IS NOT NULL | NULL IS NULL | NULL IS NOT NULL | - |-------------+-----------------+----------------+--------------------| - | False | True | True | False | - +-------------+-----------------+----------------+--------------------+ + +-----------+---------------+--------------+------------------+ + | 0 IS NULL | 0 IS NOT NULL | NULL IS NULL | NULL IS NOT NULL | + |-----------+---------------+--------------+------------------| + | False | True | True | False | + +-----------+---------------+--------------+------------------+ REGEXP value test @@ -230,11 +230,11 @@ expr REGEXP pattern. The expr is string value, pattern is supports regular expre os> SELECT 'Hello!' REGEXP '.*', 'a' REGEXP 'b'; fetched rows / total rows = 1/1 - +------------------------+------------------+ - | 'Hello!' REGEXP '.*' | 'a' REGEXP 'b' | - |------------------------+------------------| - | 1 | 0 | - +------------------------+------------------+ + +----------------------+----------------+ + | 'Hello!' REGEXP '.*' | 'a' REGEXP 'b' | + |----------------------+----------------| + | 1 | 0 | + +----------------------+----------------+ IN value list test ------------------ @@ -243,11 +243,11 @@ Here is an example for IN value test:: os> SELECT 1 in (1, 2), 3 not in (1, 2); fetched rows / total rows = 1/1 - +---------------+-------------------+ - | 1 in (1, 2) | 3 not in (1, 2) | - |---------------+-------------------| - | True | True | - +---------------+-------------------+ + +-------------+-----------------+ + | 1 in (1, 2) | 3 not in (1, 2) | + |-------------+-----------------| + | True | True | + +-------------+-----------------+ BETWEEN range test ------------------ @@ -259,11 +259,11 @@ Here is an example for range test by BETWEEN expression:: ... 4 BETWEEN 1 AND 3, ... 4 NOT BETWEEN 1 AND 3; fetched rows / total rows = 1/1 - +---------------------+---------------------+-------------------------+ - | 1 BETWEEN 1 AND 3 | 4 BETWEEN 1 AND 3 | 4 NOT BETWEEN 1 AND 3 | - |---------------------+---------------------+-------------------------| - | True | False | True | - +---------------------+---------------------+-------------------------+ + +-------------------+-------------------+-----------------------+ + | 1 BETWEEN 1 AND 3 | 4 BETWEEN 1 AND 3 | 4 NOT BETWEEN 1 AND 3 | + |-------------------+-------------------+-----------------------| + | True | False | True | + +-------------------+-------------------+-----------------------+ Function Call @@ -293,11 +293,11 @@ Here is an example for different type of arithmetic expressions:: os> SELECT abs(-1.234), abs(-1 * abs(-5)); fetched rows / total rows = 1/1 - +---------------+---------------------+ - | abs(-1.234) | abs(-1 * abs(-5)) | - |---------------+---------------------| - | 1.234 | 5 | - +---------------+---------------------+ + +-------------+-------------------+ + | abs(-1.234) | abs(-1 * abs(-5)) | + |-------------+-------------------| + | 1.234 | 5 | + +-------------+-------------------+ Date function examples ---------------------- @@ -306,11 +306,11 @@ Here is an example for different type of arithmetic expressions:: os> SELECT dayofmonth(DATE '2020-07-07'); fetched rows / total rows = 1/1 - +---------------------------------+ - | dayofmonth(DATE '2020-07-07') | - |---------------------------------| - | 7 | - +---------------------------------+ + +-------------------------------+ + | dayofmonth(DATE '2020-07-07') | + |-------------------------------| + | 7 | + +-------------------------------+ Limitations ----------- diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index b445fffa63..a347614ba4 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -49,21 +49,21 @@ Cast to string example:: os> SELECT cast(true as string) as cbool, cast(1 as string) as cint, cast(DATE '2012-08-07' as string) as cdate fetched rows / total rows = 1/1 - +---------+--------+------------+ - | cbool | cint | cdate | - |---------+--------+------------| - | true | 1 | 2012-08-07 | - +---------+--------+------------+ + +-------+------+------------+ + | cbool | cint | cdate | + |-------+------+------------| + | true | 1 | 2012-08-07 | + +-------+------+------------+ Cast to number example:: os> SELECT cast(true as int) as cbool, cast('1' as integer) as cstring fetched rows / total rows = 1/1 - +---------+-----------+ - | cbool | cstring | - |---------+-----------| - | 1 | 1 | - +---------+-----------+ + +-------+---------+ + | cbool | cstring | + |-------+---------| + | 1 | 1 | + +-------+---------+ Cast to date example:: @@ -79,11 +79,11 @@ Cast function can be chained:: os> SELECT cast(cast(true as string) as boolean) as cbool fetched rows / total rows = 1/1 - +---------+ - | cbool | - |---------| - | True | - +---------+ + +-------+ + | cbool | + |-------| + | True | + +-------+ Mathematical Functions @@ -103,11 +103,11 @@ Example:: os> SELECT ABS(0), ABS(10), ABS(-10), ABS(12.34567), ABS(-12.34567) fetched rows / total rows = 1/1 - +----------+-----------+------------+-----------------+------------------+ - | ABS(0) | ABS(10) | ABS(-10) | ABS(12.34567) | ABS(-12.34567) | - |----------+-----------+------------+-----------------+------------------| - | 0 | 10 | 10 | 12.34567 | 12.34567 | - +----------+-----------+------------+-----------------+------------------+ + +--------+---------+----------+---------------+----------------+ + | ABS(0) | ABS(10) | ABS(-10) | ABS(12.34567) | ABS(-12.34567) | + |--------+---------+----------+---------------+----------------| + | 0 | 10 | 10 | 12.34567 | 12.34567 | + +--------+---------+----------+---------------+----------------+ ACOS @@ -151,11 +151,11 @@ Example:: os> SELECT ADD(2, 1), ADD(2.5, 3); fetched rows / total rows = 1/1 - +-------------+---------------+ - | ADD(2, 1) | ADD(2.5, 3) | - |-------------+---------------| - | 3 | 5.5 | - +-------------+---------------+ + +-----------+-------------+ + | ADD(2, 1) | ADD(2.5, 3) | + |-----------+-------------| + | 3 | 5.5 | + +-----------+-------------+ ASIN ---- @@ -173,11 +173,11 @@ Example:: os> SELECT ASIN(0) fetched rows / total rows = 1/1 - +-----------+ - | ASIN(0) | - |-----------| - | 0.0 | - +-----------+ + +---------+ + | ASIN(0) | + |---------| + | 0.0 | + +---------+ ATAN @@ -244,11 +244,11 @@ Example:: os> SELECT CBRT(8), CBRT(9.261), CBRT(-27); fetched rows / total rows = 1/1 - +-----------+---------------+-------------+ - | CBRT(8) | CBRT(9.261) | CBRT(-27) | - |-----------+---------------+-------------| - | 2.0 | 2.1 | -3.0 | - +-----------+---------------+-------------+ + +---------+-------------+-----------+ + | CBRT(8) | CBRT(9.261) | CBRT(-27) | + |---------+-------------+-----------| + | 2.0 | 2.1 | -3.0 | + +---------+-------------+-----------+ CEIL @@ -281,29 +281,29 @@ Example:: os> SELECT CEILING(0), CEILING(50.00005), CEILING(-50.00005); fetched rows / total rows = 1/1 - +--------------+---------------------+----------------------+ - | CEILING(0) | CEILING(50.00005) | CEILING(-50.00005) | - |--------------+---------------------+----------------------| - | 0 | 51 | -50 | - +--------------+---------------------+----------------------+ + +------------+-------------------+--------------------+ + | CEILING(0) | CEILING(50.00005) | CEILING(-50.00005) | + |------------+-------------------+--------------------| + | 0 | 51 | -50 | + +------------+-------------------+--------------------+ os> SELECT CEILING(3147483647.12345), CEILING(113147483647.12345), CEILING(3147483647.00001); fetched rows / total rows = 1/1 - +-----------------------------+-------------------------------+-----------------------------+ - | CEILING(3147483647.12345) | CEILING(113147483647.12345) | CEILING(3147483647.00001) | - |-----------------------------+-------------------------------+-----------------------------| - | 3147483648 | 113147483648 | 3147483648 | - +-----------------------------+-------------------------------+-----------------------------+ + +---------------------------+-----------------------------+---------------------------+ + | CEILING(3147483647.12345) | CEILING(113147483647.12345) | CEILING(3147483647.00001) | + |---------------------------+-----------------------------+---------------------------| + | 3147483648 | 113147483648 | 3147483648 | + +---------------------------+-----------------------------+---------------------------+ Example:: os> SELECT CEIL(0), CEIL(12.34567), CEIL(-12.34567) fetched rows / total rows = 1/1 - +-----------+------------------+-------------------+ - | CEIL(0) | CEIL(12.34567) | CEIL(-12.34567) | - |-----------+------------------+-------------------| - | 0 | 13 | -12 | - +-----------+------------------+-------------------+ + +---------+----------------+-----------------+ + | CEIL(0) | CEIL(12.34567) | CEIL(-12.34567) | + |---------+----------------+-----------------| + | 0 | 13 | -12 | + +---------+----------------+-----------------+ CONV @@ -322,11 +322,11 @@ Example:: os> SELECT CONV('12', 10, 16), CONV('2C', 16, 10), CONV(12, 10, 2), CONV(1111, 2, 10) fetched rows / total rows = 1/1 - +----------------------+----------------------+-------------------+---------------------+ - | CONV('12', 10, 16) | CONV('2C', 16, 10) | CONV(12, 10, 2) | CONV(1111, 2, 10) | - |----------------------+----------------------+-------------------+---------------------| - | c | 44 | 1100 | 15 | - +----------------------+----------------------+-------------------+---------------------+ + +--------------------+--------------------+-----------------+-------------------+ + | CONV('12', 10, 16) | CONV('2C', 16, 10) | CONV(12, 10, 2) | CONV(1111, 2, 10) | + |--------------------+--------------------+-----------------+-------------------| + | c | 44 | 1100 | 15 | + +--------------------+--------------------+-----------------+-------------------+ COS @@ -345,11 +345,11 @@ Example:: os> SELECT COS(0) fetched rows / total rows = 1/1 - +----------+ - | COS(0) | - |----------| - | 1.0 | - +----------+ + +--------+ + | COS(0) | + |--------| + | 1.0 | + +--------+ COSH @@ -414,11 +414,11 @@ Example:: os> SELECT CRC32('MySQL') fetched rows / total rows = 1/1 - +------------------+ - | CRC32('MySQL') | - |------------------| - | 3259397556 | - +------------------+ + +----------------+ + | CRC32('MySQL') | + |----------------| + | 3259397556 | + +----------------+ DEGREES @@ -462,11 +462,11 @@ Example:: os> SELECT DIVIDE(10, 2), DIVIDE(7.5, 3); fetched rows / total rows = 1/1 - +-----------------+------------------+ - | DIVIDE(10, 2) | DIVIDE(7.5, 3) | - |-----------------+------------------| - | 5 | 2.5 | - +-----------------+------------------+ + +---------------+----------------+ + | DIVIDE(10, 2) | DIVIDE(7.5, 3) | + |---------------+----------------| + | 5 | 2.5 | + +---------------+----------------+ E @@ -533,11 +533,11 @@ Example:: os> SELECT EXPM1(-1), EXPM1(0), EXPM1(1), EXPM1(1.5) fetched rows / total rows = 1/1 - +---------------------+------------+-------------------+-------------------+ - | EXPM1(-1) | EXPM1(0) | EXPM1(1) | EXPM1(1.5) | - |---------------------+------------+-------------------+-------------------| - | -0.6321205588285577 | 0.0 | 1.718281828459045 | 3.481689070338065 | - +---------------------+------------+-------------------+-------------------+ + +---------------------+----------+-------------------+-------------------+ + | EXPM1(-1) | EXPM1(0) | EXPM1(1) | EXPM1(1.5) | + |---------------------+----------+-------------------+-------------------| + | -0.6321205588285577 | 0.0 | 1.718281828459045 | 3.481689070338065 | + +---------------------+----------+-------------------+-------------------+ FLOOR @@ -558,27 +558,27 @@ Example:: os> SELECT FLOOR(0), FLOOR(50.00005), FLOOR(-50.00005); fetched rows / total rows = 1/1 - +------------+-------------------+--------------------+ - | FLOOR(0) | FLOOR(50.00005) | FLOOR(-50.00005) | - |------------+-------------------+--------------------| - | 0 | 50 | -51 | - +------------+-------------------+--------------------+ + +----------+-----------------+------------------+ + | FLOOR(0) | FLOOR(50.00005) | FLOOR(-50.00005) | + |----------+-----------------+------------------| + | 0 | 50 | -51 | + +----------+-----------------+------------------+ os> SELECT FLOOR(3147483647.12345), FLOOR(113147483647.12345), FLOOR(3147483647.00001); fetched rows / total rows = 1/1 - +---------------------------+-----------------------------+---------------------------+ - | FLOOR(3147483647.12345) | FLOOR(113147483647.12345) | FLOOR(3147483647.00001) | - |---------------------------+-----------------------------+---------------------------| - | 3147483647 | 113147483647 | 3147483647 | - +---------------------------+-----------------------------+---------------------------+ + +-------------------------+---------------------------+-------------------------+ + | FLOOR(3147483647.12345) | FLOOR(113147483647.12345) | FLOOR(3147483647.00001) | + |-------------------------+---------------------------+-------------------------| + | 3147483647 | 113147483647 | 3147483647 | + +-------------------------+---------------------------+-------------------------+ os> SELECT FLOOR(282474973688888.022), FLOOR(9223372036854775807.022), FLOOR(9223372036854775807.0000001); fetched rows / total rows = 1/1 - +------------------------------+----------------------------------+--------------------------------------+ - | FLOOR(282474973688888.022) | FLOOR(9223372036854775807.022) | FLOOR(9223372036854775807.0000001) | - |------------------------------+----------------------------------+--------------------------------------| - | 282474973688888 | 9223372036854775807 | 9223372036854775807 | - +------------------------------+----------------------------------+--------------------------------------+ + +----------------------------+--------------------------------+------------------------------------+ + | FLOOR(282474973688888.022) | FLOOR(9223372036854775807.022) | FLOOR(9223372036854775807.0000001) | + |----------------------------+--------------------------------+------------------------------------| + | 282474973688888 | 9223372036854775807 | 9223372036854775807 | + +----------------------------+--------------------------------+------------------------------------+ LN @@ -597,11 +597,11 @@ Example:: os> select LN(1), LN(e()), LN(10), LN(12.34567); fetched rows / total rows = 1/1 - +---------+-----------+-------------------+--------------------+ - | LN(1) | LN(e()) | LN(10) | LN(12.34567) | - |---------+-----------+-------------------+--------------------| - | 0.0 | 1.0 | 2.302585092994046 | 2.5133053943094317 | - +---------+-----------+-------------------+--------------------+ + +-------+---------+-------------------+--------------------+ + | LN(1) | LN(e()) | LN(10) | LN(12.34567) | + |-------+---------+-------------------+--------------------| + | 0.0 | 1.0 | 2.302585092994046 | 2.5133053943094317 | + +-------+---------+-------------------+--------------------+ LOG @@ -623,11 +623,11 @@ Example:: os> select LOG(1), LOG(e()), LOG(2, 65536), LOG(10, 10000); fetched rows / total rows = 1/1 - +----------+------------+-----------------+------------------+ - | LOG(1) | LOG(e()) | LOG(2, 65536) | LOG(10, 10000) | - |----------+------------+-----------------+------------------| - | 0.0 | 1.0 | 16.0 | 4.0 | - +----------+------------+-----------------+------------------+ + +--------+----------+---------------+----------------+ + | LOG(1) | LOG(e()) | LOG(2, 65536) | LOG(10, 10000) | + |--------+----------+---------------+----------------| + | 0.0 | 1.0 | 16.0 | 4.0 | + +--------+----------+---------------+----------------+ LOG2 @@ -646,11 +646,11 @@ Example:: os> select LOG2(1), LOG2(8), LOG2(65536), LOG2(8.8245); fetched rows / total rows = 1/1 - +-----------+-----------+---------------+--------------------+ - | LOG2(1) | LOG2(8) | LOG2(65536) | LOG2(8.8245) | - |-----------+-----------+---------------+--------------------| - | 0.0 | 3.0 | 16.0 | 3.1415145369723745 | - +-----------+-----------+---------------+--------------------+ + +---------+---------+-------------+--------------------+ + | LOG2(1) | LOG2(8) | LOG2(65536) | LOG2(8.8245) | + |---------+---------+-------------+--------------------| + | 0.0 | 3.0 | 16.0 | 3.1415145369723745 | + +---------+---------+-------------+--------------------+ LOG10 @@ -669,11 +669,11 @@ Example:: os> select LOG10(1), LOG10(8), LOG10(1000), LOG10(8.8245); fetched rows / total rows = 1/1 - +------------+--------------------+---------------+--------------------+ - | LOG10(1) | LOG10(8) | LOG10(1000) | LOG10(8.8245) | - |------------+--------------------+---------------+--------------------| - | 0.0 | 0.9030899869919435 | 3.0 | 0.9456901074431278 | - +------------+--------------------+---------------+--------------------+ + +----------+--------------------+-------------+--------------------+ + | LOG10(1) | LOG10(8) | LOG10(1000) | LOG10(8.8245) | + |----------+--------------------+-------------+--------------------| + | 0.0 | 0.9030899869919435 | 3.0 | 0.9456901074431278 | + +----------+--------------------+-------------+--------------------+ MOD @@ -694,11 +694,11 @@ Example:: os> SELECT MOD(3, 2), MOD(3.1, 2) fetched rows / total rows = 1/1 - +-------------+---------------+ - | MOD(3, 2) | MOD(3.1, 2) | - |-------------+---------------| - | 1 | 1.1 | - +-------------+---------------+ + +-----------+-------------+ + | MOD(3, 2) | MOD(3.1, 2) | + |-----------+-------------| + | 1 | 1.1 | + +-----------+-------------+ MODULUS ------- @@ -718,11 +718,11 @@ Example:: os> SELECT MODULUS(3, 2), MODULUS(3.1, 2) fetched rows / total rows = 1/1 - +-----------------+-------------------+ - | MODULUS(3, 2) | MODULUS(3.1, 2) | - |-----------------+-------------------| - | 1 | 1.1 | - +-----------------+-------------------+ + +---------------+-----------------+ + | MODULUS(3, 2) | MODULUS(3.1, 2) | + |---------------+-----------------| + | 1 | 1.1 | + +---------------+-----------------+ MULTIPLY @@ -743,11 +743,11 @@ Example:: os> SELECT MULTIPLY(1, 2), MULTIPLY(-2, 1), MULTIPLY(1.5, 2); fetched rows / total rows = 1/1 - +------------------+-------------------+--------------------+ - | MULTIPLY(1, 2) | MULTIPLY(-2, 1) | MULTIPLY(1.5, 2) | - |------------------+-------------------+--------------------| - | 2 | -2 | 3.0 | - +------------------+-------------------+--------------------+ + +----------------+-----------------+------------------+ + | MULTIPLY(1, 2) | MULTIPLY(-2, 1) | MULTIPLY(1.5, 2) | + |----------------+-----------------+------------------| + | 2 | -2 | 3.0 | + +----------------+-----------------+------------------+ PI @@ -789,11 +789,11 @@ Example:: os> SELECT POW(3, 2), POW(-3, 2), POW(3, -2) fetched rows / total rows = 1/1 - +-------------+--------------+--------------------+ - | POW(3, 2) | POW(-3, 2) | POW(3, -2) | - |-------------+--------------+--------------------| - | 9.0 | 9.0 | 0.1111111111111111 | - +-------------+--------------+--------------------+ + +-----------+------------+--------------------+ + | POW(3, 2) | POW(-3, 2) | POW(3, -2) | + |-----------+------------+--------------------| + | 9.0 | 9.0 | 0.1111111111111111 | + +-----------+------------+--------------------+ POWER @@ -814,11 +814,11 @@ Example:: os> SELECT POWER(3, 2), POWER(-3, 2), POWER(3, -2) fetched rows / total rows = 1/1 - +---------------+----------------+--------------------+ - | POWER(3, 2) | POWER(-3, 2) | POWER(3, -2) | - |---------------+----------------+--------------------| - | 9.0 | 9.0 | 0.1111111111111111 | - +---------------+----------------+--------------------+ + +-------------+--------------+--------------------+ + | POWER(3, 2) | POWER(-3, 2) | POWER(3, -2) | + |-------------+--------------+--------------------| + | 9.0 | 9.0 | 0.1111111111111111 | + +-------------+--------------+--------------------+ RADIANS @@ -883,11 +883,11 @@ Example:: os> SELECT RINT(1.7); fetched rows / total rows = 1/1 - +-------------+ - | RINT(1.7) | - |-------------| - | 2.0 | - +-------------+ + +-----------+ + | RINT(1.7) | + |-----------| + | 2.0 | + +-----------+ ROUND @@ -910,11 +910,11 @@ Example:: os> SELECT ROUND(12.34), ROUND(12.34, 1), ROUND(12.34, -1), ROUND(12, 1) fetched rows / total rows = 1/1 - +----------------+-------------------+--------------------+----------------+ - | ROUND(12.34) | ROUND(12.34, 1) | ROUND(12.34, -1) | ROUND(12, 1) | - |----------------+-------------------+--------------------+----------------| - | 12.0 | 12.3 | 10.0 | 12 | - +----------------+-------------------+--------------------+----------------+ + +--------------+-----------------+------------------+--------------+ + | ROUND(12.34) | ROUND(12.34, 1) | ROUND(12.34, -1) | ROUND(12, 1) | + |--------------+-----------------+------------------+--------------| + | 12.0 | 12.3 | 10.0 | 12 | + +--------------+-----------------+------------------+--------------+ SIGN @@ -933,11 +933,11 @@ Example:: os> SELECT SIGN(1), SIGN(0), SIGN(-1.1) fetched rows / total rows = 1/1 - +-----------+-----------+--------------+ - | SIGN(1) | SIGN(0) | SIGN(-1.1) | - |-----------+-----------+--------------| - | 1 | 0 | -1 | - +-----------+-----------+--------------+ + +---------+---------+------------+ + | SIGN(1) | SIGN(0) | SIGN(-1.1) | + |---------+---------+------------| + | 1 | 0 | -1 | + +---------+---------+------------+ SIGNUM @@ -958,11 +958,11 @@ Example:: os> SELECT SIGNUM(1), SIGNUM(0), SIGNUM(-1.1) fetched rows / total rows = 1/1 - +-------------+-------------+----------------+ - | SIGNUM(1) | SIGNUM(0) | SIGNUM(-1.1) | - |-------------+-------------+----------------| - | 1 | 0 | -1 | - +-------------+-------------+----------------+ + +-----------+-----------+--------------+ + | SIGNUM(1) | SIGNUM(0) | SIGNUM(-1.1) | + |-----------+-----------+--------------| + | 1 | 0 | -1 | + +-----------+-----------+--------------+ SIN @@ -981,11 +981,11 @@ Example:: os> select sin(0), sin(1), sin(pi()), abs(sin(pi())) < 0.0001; fetched rows / total rows = 1/1 - +----------+--------------------+------------------------+---------------------------+ - | sin(0) | sin(1) | sin(pi()) | abs(sin(pi())) < 0.0001 | - |----------+--------------------+------------------------+---------------------------| - | 0.0 | 0.8414709848078965 | 1.2246467991473532e-16 | True | - +----------+--------------------+------------------------+---------------------------+ + +--------+--------------------+------------------------+-------------------------+ + | sin(0) | sin(1) | sin(pi()) | abs(sin(pi())) < 0.0001 | + |--------+--------------------+------------------------+-------------------------| + | 0.0 | 0.8414709848078965 | 1.2246467991473532e-16 | True | + +--------+--------------------+------------------------+-------------------------+ SINH @@ -1030,11 +1030,11 @@ Example:: os> SELECT SQRT(4), SQRT(4.41) fetched rows / total rows = 1/1 - +-----------+--------------+ - | SQRT(4) | SQRT(4.41) | - |-----------+--------------| - | 2.0 | 2.1 | - +-----------+--------------+ + +---------+------------+ + | SQRT(4) | SQRT(4.41) | + |---------+------------| + | 2.0 | 2.1 | + +---------+------------+ STRCMP @@ -1053,11 +1053,11 @@ Example:: os> SELECT STRCMP('hello', 'world'), STRCMP('hello', 'hello') fetched rows / total rows = 1/1 - +----------------------------+----------------------------+ - | STRCMP('hello', 'world') | STRCMP('hello', 'hello') | - |----------------------------+----------------------------| - | -1 | 0 | - +----------------------------+----------------------------+ + +--------------------------+--------------------------+ + | STRCMP('hello', 'world') | STRCMP('hello', 'hello') | + |--------------------------+--------------------------| + | -1 | 0 | + +--------------------------+--------------------------+ SUBTRACT @@ -1078,11 +1078,11 @@ Example:: os> SELECT SUBTRACT(2, 1), SUBTRACT(2.5, 3); fetched rows / total rows = 1/1 - +------------------+--------------------+ - | SUBTRACT(2, 1) | SUBTRACT(2.5, 3) | - |------------------+--------------------| - | 1 | -0.5 | - +------------------+--------------------+ + +----------------+------------------+ + | SUBTRACT(2, 1) | SUBTRACT(2.5, 3) | + |----------------+------------------| + | 1 | -0.5 | + +----------------+------------------+ TAN @@ -1101,11 +1101,11 @@ Example:: os> SELECT TAN(0) fetched rows / total rows = 1/1 - +----------+ - | TAN(0) | - |----------| - | 0.0 | - +----------+ + +--------+ + | TAN(0) | + |--------| + | 0.0 | + +--------+ TRUNCATE @@ -1126,11 +1126,11 @@ FLOAT/DOUBLE -> DOUBLE Example:: fetched rows / total rows = 1/1 - +----------------------+-----------------------+-------------------+ - | TRUNCATE(56.78, 1) | TRUNCATE(56.78, -1) | TRUNCATE(56, 1) | - |----------------------+-----------------------+-------------------| - | 56.7 | 50 | 56 | - +----------------------+-----------------------+-------------------+ + +--------------------+---------------------+-----------------+ + | TRUNCATE(56.78, 1) | TRUNCATE(56.78, -1) | TRUNCATE(56, 1) | + |--------------------+---------------------+-----------------| + | 56.7 | 50 | 56 | + +--------------------+---------------------+-----------------+ Date and Time Functions @@ -1163,11 +1163,11 @@ Example:: os> SELECT ADDDATE(DATE('2020-08-26'), INTERVAL 1 HOUR) AS `'2020-08-26' + 1h`, ADDDATE(DATE('2020-08-26'), 1) AS `'2020-08-26' + 1`, ADDDATE(TIMESTAMP('2020-08-26 01:01:01'), 1) AS `ts '2020-08-26 01:01:01' + 1` fetched rows / total rows = 1/1 - +---------------------+--------------------+--------------------------------+ - | '2020-08-26' + 1h | '2020-08-26' + 1 | ts '2020-08-26 01:01:01' + 1 | - |---------------------+--------------------+--------------------------------| - | 2020-08-26 01:00:00 | 2020-08-27 | 2020-08-27 01:01:01 | - +---------------------+--------------------+--------------------------------+ + +---------------------+------------------+------------------------------+ + | '2020-08-26' + 1h | '2020-08-26' + 1 | ts '2020-08-26 01:01:01' + 1 | + |---------------------+------------------+------------------------------| + | 2020-08-26 01:00:00 | 2020-08-27 | 2020-08-27 01:01:01 | + +---------------------+------------------+------------------------------+ ADDTIME @@ -1200,35 +1200,35 @@ Example:: os> SELECT ADDTIME(TIME('23:59:59'), DATE('2004-01-01')) AS `'23:59:59' + 0` fetched rows / total rows = 1/1 - +------------------+ - | '23:59:59' + 0 | - |------------------| - | 23:59:59 | - +------------------+ + +----------------+ + | '23:59:59' + 0 | + |----------------| + | 23:59:59 | + +----------------+ os> SELECT ADDTIME(DATE('2004-01-01'), TIME('23:59:59')) AS `'2004-01-01' + '23:59:59'` fetched rows / total rows = 1/1 - +-----------------------------+ - | '2004-01-01' + '23:59:59' | - |-----------------------------| - | 2004-01-01 23:59:59 | - +-----------------------------+ - - os> SELECT ADDTIME(TIME('10:20:30'), TIME('00:05:42')) AS `'10:20:30' + '00:05:42'` - fetched rows / total rows = 1/1 +---------------------------+ - | '10:20:30' + '00:05:42' | + | '2004-01-01' + '23:59:59' | |---------------------------| - | 10:26:12 | + | 2004-01-01 23:59:59 | +---------------------------+ + os> SELECT ADDTIME(TIME('10:20:30'), TIME('00:05:42')) AS `'10:20:30' + '00:05:42'` + fetched rows / total rows = 1/1 + +-------------------------+ + | '10:20:30' + '00:05:42' | + |-------------------------| + | 10:26:12 | + +-------------------------+ + os> SELECT ADDTIME(TIMESTAMP('2007-02-28 10:20:30'), TIMESTAMP('2002-03-04 20:40:50')) AS `'2007-02-28 10:20:30' + '20:40:50'` fetched rows / total rows = 1/1 - +--------------------------------------+ - | '2007-02-28 10:20:30' + '20:40:50' | - |--------------------------------------| - | 2007-03-01 07:01:20 | - +--------------------------------------+ + +------------------------------------+ + | '2007-02-28 10:20:30' + '20:40:50' | + |------------------------------------| + | 2007-03-01 07:01:20 | + +------------------------------------+ CONVERT_TZ @@ -1247,86 +1247,86 @@ Example:: os> SELECT CONVERT_TZ('2008-12-25 05:30:00', '+00:00', 'America/Los_Angeles') fetched rows / total rows = 1/1 - +----------------------------------------------------------------------+ - | CONVERT_TZ('2008-12-25 05:30:00', '+00:00', 'America/Los_Angeles') | - |----------------------------------------------------------------------| - | 2008-12-24 21:30:00 | - +----------------------------------------------------------------------+ + +--------------------------------------------------------------------+ + | CONVERT_TZ('2008-12-25 05:30:00', '+00:00', 'America/Los_Angeles') | + |--------------------------------------------------------------------| + | 2008-12-24 21:30:00 | + +--------------------------------------------------------------------+ os> SELECT CONVERT_TZ("2010-10-10 10:10:10", "+01:00", "-10:00") fetched rows / total rows = 1/1 - +---------------------------------------------------------+ - | CONVERT_TZ("2010-10-10 10:10:10", "+01:00", "-10:00") | - |---------------------------------------------------------| - | 2010-10-09 23:10:10 | - +---------------------------------------------------------+ + +-------------------------------------------------------+ + | CONVERT_TZ("2010-10-10 10:10:10", "+01:00", "-10:00") | + |-------------------------------------------------------| + | 2010-10-09 23:10:10 | + +-------------------------------------------------------+ When the datedate, or either of the two time zone fields are invalid format, then the result is null. In this example any timestamp that is not will result in null. Example:: os> SELECT CONVERT_TZ("test", "+01:00", "-10:00") fetched rows / total rows = 1/1 - +------------------------------------------+ - | CONVERT_TZ("test", "+01:00", "-10:00") | - |------------------------------------------| - | null | - +------------------------------------------+ + +----------------------------------------+ + | CONVERT_TZ("test", "+01:00", "-10:00") | + |----------------------------------------| + | null | + +----------------------------------------+ When the timestamp, or either of the two time zone fields are invalid format, then the result is null. In this example any timezone that is not <+HH:mm> or <-HH:mm> will result in null. Example:: os> SELECT CONVERT_TZ("2010-10-10 10:10:10", "test", "-10:00") fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | CONVERT_TZ("2010-10-10 10:10:10", "test", "-10:00") | - |-------------------------------------------------------| - | null | - +-------------------------------------------------------+ + +-----------------------------------------------------+ + | CONVERT_TZ("2010-10-10 10:10:10", "test", "-10:00") | + |-----------------------------------------------------| + | null | + +-----------------------------------------------------+ The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range will result in null. Example:: os> SELECT CONVERT_TZ("2010-10-10 10:10:10", "+01:00", "+14:00") fetched rows / total rows = 1/1 - +---------------------------------------------------------+ - | CONVERT_TZ("2010-10-10 10:10:10", "+01:00", "+14:00") | - |---------------------------------------------------------| - | 2010-10-10 23:10:10 | - +---------------------------------------------------------+ + +-------------------------------------------------------+ + | CONVERT_TZ("2010-10-10 10:10:10", "+01:00", "+14:00") | + |-------------------------------------------------------| + | 2010-10-10 23:10:10 | + +-------------------------------------------------------+ The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range will result in null. Example:: os> SELECT CONVERT_TZ("2010-10-10 10:10:10", "+01:00", "+14:01") fetched rows / total rows = 1/1 - +---------------------------------------------------------+ - | CONVERT_TZ("2010-10-10 10:10:10", "+01:00", "+14:01") | - |---------------------------------------------------------| - | null | - +---------------------------------------------------------+ + +-------------------------------------------------------+ + | CONVERT_TZ("2010-10-10 10:10:10", "+01:00", "+14:01") | + |-------------------------------------------------------| + | null | + +-------------------------------------------------------+ The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range will result in null. Example:: os> SELECT CONVERT_TZ("2010-10-10 10:10:10", "+01:00", "-13:59") fetched rows / total rows = 1/1 - +---------------------------------------------------------+ - | CONVERT_TZ("2010-10-10 10:10:10", "+01:00", "-13:59") | - |---------------------------------------------------------| - | 2010-10-09 19:11:10 | - +---------------------------------------------------------+ + +-------------------------------------------------------+ + | CONVERT_TZ("2010-10-10 10:10:10", "+01:00", "-13:59") | + |-------------------------------------------------------| + | 2010-10-09 19:11:10 | + +-------------------------------------------------------+ The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range will result in null. Example:: os> SELECT CONVERT_TZ("2010-10-10 10:10:10", "+01:00", "-14:00") fetched rows / total rows = 1/1 - +---------------------------------------------------------+ - | CONVERT_TZ("2010-10-10 10:10:10", "+01:00", "-14:00") | - |---------------------------------------------------------| - | null | - +---------------------------------------------------------+ + +-------------------------------------------------------+ + | CONVERT_TZ("2010-10-10 10:10:10", "+01:00", "-14:00") | + |-------------------------------------------------------| + | null | + +-------------------------------------------------------+ CURDATE @@ -1346,11 +1346,11 @@ Example:: > SELECT CURDATE(); fetched rows / total rows = 1/1 - +-------------+ - | CURDATE() | - |-------------| - | 2022-08-02 | - +-------------+ + +------------+ + | CURDATE() | + |------------| + | 2022-08-02 | + +------------+ CURRENT_DATE @@ -1365,11 +1365,11 @@ Example:: > SELECT CURRENT_DATE(); fetched rows / total rows = 1/1 - +------------------+ - | CURRENT_DATE() | - |------------------+ - | 2022-08-02 | - +------------------+ + +----------------+ + | CURRENT_DATE() | + |----------------+ + | 2022-08-02 | + +----------------+ CURRENT_TIME @@ -1384,11 +1384,11 @@ Example:: > SELECT CURRENT_TIME(); fetched rows / total rows = 1/1 - +-----------------+ - | CURRENT_TIME() | - |-----------------+ - | 15:39:05 | - +-----------------+ + +----------------+ + | CURRENT_TIME() | + |----------------+ + | 15:39:05 | + +----------------+ CURRENT_TIMESTAMP @@ -1403,11 +1403,11 @@ Example:: > SELECT CURRENT_TIMESTAMP(); fetched rows / total rows = 1/1 - +-----------------------+ - | CURRENT_TIMESTAMP() | - |-----------------------+ - | 2022-08-02 15:54:19 | - +-----------------------+ + +---------------------+ + | CURRENT_TIMESTAMP() | + |---------------------+ + | 2022-08-02 15:54:19 | + +---------------------+ CURTIME @@ -1427,11 +1427,11 @@ Example:: > SELECT CURTIME() as value_1, CURTIME() as value_2; fetched rows / total rows = 1/1 - +-----------+-----------+ - | value_1 | value_2 | - |-----------+-----------| - | 15:39:05 | 15:39:05 | - +-----------+-----------+ + +----------+----------+ + | value_1 | value_2 | + |----------+----------| + | 15:39:05 | 15:39:05 | + +----------+----------+ DATE @@ -1450,11 +1450,11 @@ Example:: os> SELECT DATE('2020-08-26'), DATE(TIMESTAMP('2020-08-26 13:49:00')), DATE('2020-08-26 13:49:00'), DATE('2020-08-26 13:49') fetched rows / total rows = 1/1 - +----------------------+------------------------------------------+-------------------------------+----------------------------+ - | DATE('2020-08-26') | DATE(TIMESTAMP('2020-08-26 13:49:00')) | DATE('2020-08-26 13:49:00') | DATE('2020-08-26 13:49') | - |----------------------+------------------------------------------+-------------------------------+----------------------------| - | 2020-08-26 | 2020-08-26 | 2020-08-26 | 2020-08-26 | - +----------------------+------------------------------------------+-------------------------------+----------------------------+ + +--------------------+----------------------------------------+-----------------------------+--------------------------+ + | DATE('2020-08-26') | DATE(TIMESTAMP('2020-08-26 13:49:00')) | DATE('2020-08-26 13:49:00') | DATE('2020-08-26 13:49') | + |--------------------+----------------------------------------+-----------------------------+--------------------------| + | 2020-08-26 | 2020-08-26 | 2020-08-26 | 2020-08-26 | + +--------------------+----------------------------------------+-----------------------------+--------------------------+ DATETIME @@ -1477,44 +1477,44 @@ Example:: os> SELECT DATETIME('2008-12-25 05:30:00+00:00', 'America/Los_Angeles') fetched rows / total rows = 1/1 - +----------------------------------------------------------------+ - | DATETIME('2008-12-25 05:30:00+00:00', 'America/Los_Angeles') | - |----------------------------------------------------------------| - | 2008-12-24 21:30:00 | - +----------------------------------------------------------------+ + +--------------------------------------------------------------+ + | DATETIME('2008-12-25 05:30:00+00:00', 'America/Los_Angeles') | + |--------------------------------------------------------------| + | 2008-12-24 21:30:00 | + +--------------------------------------------------------------+ This example converts from -10:00 timezone to +10:00 timezone. Example:: os> SELECT DATETIME('2004-02-28 23:00:00-10:00', '+10:00') fetched rows / total rows = 1/1 - +---------------------------------------------------+ - | DATETIME('2004-02-28 23:00:00-10:00', '+10:00') | - |---------------------------------------------------| - | 2004-02-29 19:00:00 | - +---------------------------------------------------+ + +-------------------------------------------------+ + | DATETIME('2004-02-28 23:00:00-10:00', '+10:00') | + |-------------------------------------------------| + | 2004-02-29 19:00:00 | + +-------------------------------------------------+ This example uses the timezone -14:00, which is outside of the range -13:59 and +14:00. This results in a null value. Example:: os> SELECT DATETIME('2008-01-01 02:00:00', '-14:00') fetched rows / total rows = 1/1 - +---------------------------------------------+ - | DATETIME('2008-01-01 02:00:00', '-14:00') | - |---------------------------------------------| - | null | - +---------------------------------------------+ + +-------------------------------------------+ + | DATETIME('2008-01-01 02:00:00', '-14:00') | + |-------------------------------------------| + | null | + +-------------------------------------------+ February 30th is not a day, so it returns null. Example:: os> SELECT DATETIME('2008-02-30 02:00:00', '-00:00') fetched rows / total rows = 1/1 - +---------------------------------------------+ - | DATETIME('2008-02-30 02:00:00', '-00:00') | - |---------------------------------------------| - | null | - +---------------------------------------------+ + +-------------------------------------------+ + | DATETIME('2008-02-30 02:00:00', '-00:00') | + |-------------------------------------------| + | null | + +-------------------------------------------+ DATETIME(datetime) examples @@ -1523,33 +1523,33 @@ Example:: os> SELECT DATETIME('2008-02-10 02:00:00') fetched rows / total rows = 1/1 - +-----------------------------------+ - | DATETIME('2008-02-10 02:00:00') | - |-----------------------------------| - | 2008-02-10 02:00:00 | - +-----------------------------------+ + +---------------------------------+ + | DATETIME('2008-02-10 02:00:00') | + |---------------------------------| + | 2008-02-10 02:00:00 | + +---------------------------------+ February 30th is not a day, so it returns null. Example:: os> SELECT DATETIME('2008-02-30 02:00:00') fetched rows / total rows = 1/1 - +-----------------------------------+ - | DATETIME('2008-02-30 02:00:00') | - |-----------------------------------| - | null | - +-----------------------------------+ + +---------------------------------+ + | DATETIME('2008-02-30 02:00:00') | + |---------------------------------| + | null | + +---------------------------------+ DATETIME with a datetime and no seperate timezone to convert to returns the datetime object without a timezone. Example:: os> SELECT DATETIME('2008-02-10 02:00:00+04:00') fetched rows / total rows = 1/1 - +-----------------------------------------+ - | DATETIME('2008-02-10 02:00:00+04:00') | - |-----------------------------------------| - | 2008-02-10 02:00:00 | - +-----------------------------------------+ + +---------------------------------------+ + | DATETIME('2008-02-10 02:00:00+04:00') | + |---------------------------------------| + | 2008-02-10 02:00:00 | + +---------------------------------------+ DATE_ADD @@ -1572,11 +1572,11 @@ Example:: os> SELECT DATE_ADD(DATE('2020-08-26'), INTERVAL 1 HOUR) AS `'2020-08-26' + 1h`, DATE_ADD(TIMESTAMP('2020-08-26 01:01:01'), INTERVAL 1 DAY) as `ts '2020-08-26 01:01:01' + 1d` fetched rows / total rows = 1/1 - +---------------------+---------------------------------+ - | '2020-08-26' + 1h | ts '2020-08-26 01:01:01' + 1d | - |---------------------+---------------------------------| - | 2020-08-26 01:00:00 | 2020-08-27 01:01:01 | - +---------------------+---------------------------------+ + +---------------------+-------------------------------+ + | '2020-08-26' + 1h | ts '2020-08-26 01:01:01' + 1d | + |---------------------+-------------------------------| + | 2020-08-26 01:00:00 | 2020-08-27 01:01:01 | + +---------------------+-------------------------------+ DATE_FORMAT @@ -1671,11 +1671,11 @@ Example:: os> SELECT DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f'), DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r') fetched rows / total rows = 1/1 - +------------------------------------------------------+-----------------------------------------------------------------------+ - | DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f') | DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r') | - |------------------------------------------------------+-----------------------------------------------------------------------| - | 13:14:15.012345 | 1998-Jan-31st 01:14:15 PM | - +------------------------------------------------------+-----------------------------------------------------------------------+ + +----------------------------------------------------+---------------------------------------------------------------------+ + | DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f') | DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r') | + |----------------------------------------------------+---------------------------------------------------------------------| + | 13:14:15.012345 | 1998-Jan-31st 01:14:15 PM | + +----------------------------------------------------+---------------------------------------------------------------------+ DATE_SUB @@ -1698,11 +1698,11 @@ Example:: os> SELECT DATE_SUB(DATE('2008-01-02'), INTERVAL 31 DAY) AS `'2008-01-02' - 31d`, DATE_SUB(TIMESTAMP('2020-08-26 01:01:01'), INTERVAL 1 HOUR) AS `ts '2020-08-26 01:01:01' + 1h` fetched rows / total rows = 1/1 - +----------------------+---------------------------------+ - | '2008-01-02' - 31d | ts '2020-08-26 01:01:01' + 1h | - |----------------------+---------------------------------| - | 2007-12-02 00:00:00 | 2020-08-26 00:01:01 | - +----------------------+---------------------------------+ + +---------------------+-------------------------------+ + | '2008-01-02' - 31d | ts '2020-08-26 01:01:01' + 1h | + |---------------------+-------------------------------| + | 2007-12-02 00:00:00 | 2020-08-26 00:01:01 | + +---------------------+-------------------------------+ DATEDIFF @@ -1718,11 +1718,11 @@ Example:: os> SELECT DATEDIFF(TIMESTAMP('2000-01-02 00:00:00'), TIMESTAMP('2000-01-01 23:59:59')) AS `'2000-01-02' - '2000-01-01'`, DATEDIFF(DATE('2001-02-01'), TIMESTAMP('2004-01-01 00:00:00')) AS `'2001-02-01' - '2004-01-01'`, DATEDIFF(TIME('23:59:59'), TIME('00:00:00')) AS `today - today` fetched rows / total rows = 1/1 - +-------------------------------+-------------------------------+-----------------+ - | '2000-01-02' - '2000-01-01' | '2001-02-01' - '2004-01-01' | today - today | - |-------------------------------+-------------------------------+-----------------| - | 1 | -1064 | 0 | - +-------------------------------+-------------------------------+-----------------+ + +-----------------------------+-----------------------------+---------------+ + | '2000-01-02' - '2000-01-01' | '2001-02-01' - '2004-01-01' | today - today | + |-----------------------------+-----------------------------+---------------| + | 1 | -1064 | 0 | + +-----------------------------+-----------------------------+---------------+ DAY @@ -1743,11 +1743,11 @@ Example:: os> SELECT DAY(DATE('2020-08-26')) fetched rows / total rows = 1/1 - +---------------------------+ - | DAY(DATE('2020-08-26')) | - |---------------------------| - | 26 | - +---------------------------+ + +-------------------------+ + | DAY(DATE('2020-08-26')) | + |-------------------------| + | 26 | + +-------------------------+ DAYNAME @@ -1766,11 +1766,11 @@ Example:: os> SELECT DAYNAME(DATE('2020-08-26')) fetched rows / total rows = 1/1 - +-------------------------------+ - | DAYNAME(DATE('2020-08-26')) | - |-------------------------------| - | Wednesday | - +-------------------------------+ + +-----------------------------+ + | DAYNAME(DATE('2020-08-26')) | + |-----------------------------| + | Wednesday | + +-----------------------------+ DAYOFMONTH @@ -1791,11 +1791,11 @@ Example:: os> SELECT DAYOFMONTH(DATE('2020-08-26')) fetched rows / total rows = 1/1 - +----------------------------------+ - | DAYOFMONTH(DATE('2020-08-26')) | - |----------------------------------| - | 26 | - +----------------------------------+ + +--------------------------------+ + | DAYOFMONTH(DATE('2020-08-26')) | + |--------------------------------| + | 26 | + +--------------------------------+ DAY_OF_MONTH @@ -1816,11 +1816,11 @@ Example:: os> SELECT DAY_OF_MONTH('2020-08-26') fetched rows / total rows = 1/1 - +------------------------------+ - | DAY_OF_MONTH('2020-08-26') | - |------------------------------| - | 26 | - +------------------------------+ + +----------------------------+ + | DAY_OF_MONTH('2020-08-26') | + |----------------------------| + | 26 | + +----------------------------+ DAYOFWEEK @@ -1841,11 +1841,11 @@ Example:: os> SELECT DAYOFWEEK('2020-08-26'), DAY_OF_WEEK('2020-08-26') fetched rows / total rows = 1/1 - +---------------------------+-----------------------------+ - | DAYOFWEEK('2020-08-26') | DAY_OF_WEEK('2020-08-26') | - |---------------------------+-----------------------------| - | 4 | 4 | - +---------------------------+-----------------------------+ + +-------------------------+---------------------------+ + | DAYOFWEEK('2020-08-26') | DAY_OF_WEEK('2020-08-26') | + |-------------------------+---------------------------| + | 4 | 4 | + +-------------------------+---------------------------+ DAYOFYEAR @@ -1866,19 +1866,19 @@ Example:: os> SELECT DAYOFYEAR(DATE('2020-08-26')) fetched rows / total rows = 1/1 - +---------------------------------+ - | DAYOFYEAR(DATE('2020-08-26')) | - |---------------------------------| - | 239 | - +---------------------------------+ + +-------------------------------+ + | DAYOFYEAR(DATE('2020-08-26')) | + |-------------------------------| + | 239 | + +-------------------------------+ os> SELECT DAYOFYEAR(TIMESTAMP('2020-08-26 00:00:00')) fetched rows / total rows = 1/1 - +-----------------------------------------------+ - | DAYOFYEAR(TIMESTAMP('2020-08-26 00:00:00')) | - |-----------------------------------------------| - | 239 | - +-----------------------------------------------+ + +---------------------------------------------+ + | DAYOFYEAR(TIMESTAMP('2020-08-26 00:00:00')) | + |---------------------------------------------| + | 239 | + +---------------------------------------------+ DAY_OF_YEAR @@ -1898,19 +1898,19 @@ Example:: os> SELECT DAY_OF_YEAR(DATE('2020-08-26')) fetched rows / total rows = 1/1 - +-----------------------------------+ - | DAY_OF_YEAR(DATE('2020-08-26')) | - |-----------------------------------| - | 239 | - +-----------------------------------+ + +---------------------------------+ + | DAY_OF_YEAR(DATE('2020-08-26')) | + |---------------------------------| + | 239 | + +---------------------------------+ os> SELECT DAY_OF_YEAR(TIMESTAMP('2020-08-26 00:00:00')) fetched rows / total rows = 1/1 - +-------------------------------------------------+ - | DAY_OF_YEAR(TIMESTAMP('2020-08-26 00:00:00')) | - |-------------------------------------------------| - | 239 | - +-------------------------------------------------+ + +-----------------------------------------------+ + | DAY_OF_YEAR(TIMESTAMP('2020-08-26 00:00:00')) | + |-----------------------------------------------| + | 239 | + +-----------------------------------------------+ EXTRACT @@ -1978,11 +1978,11 @@ Example:: os> SELECT extract(YEAR_MONTH FROM "2023-02-07 10:11:12"); fetched rows / total rows = 1/1 - +--------------------------------------------------+ - | extract(YEAR_MONTH FROM "2023-02-07 10:11:12") | - |--------------------------------------------------| - | 202302 | - +--------------------------------------------------+ + +------------------------------------------------+ + | extract(YEAR_MONTH FROM "2023-02-07 10:11:12") | + |------------------------------------------------| + | 202302 | + +------------------------------------------------+ FROM_DAYS @@ -2001,11 +2001,11 @@ Example:: os> SELECT FROM_DAYS(733687) fetched rows / total rows = 1/1 - +---------------------+ - | FROM_DAYS(733687) | - |---------------------| - | 2008-10-07 | - +---------------------+ + +-------------------+ + | FROM_DAYS(733687) | + |-------------------| + | 2008-10-07 | + +-------------------+ FROM_UNIXTIME @@ -2030,19 +2030,19 @@ Examples:: os> select FROM_UNIXTIME(1220249547) fetched rows / total rows = 1/1 - +-----------------------------+ - | FROM_UNIXTIME(1220249547) | - |-----------------------------| - | 2008-09-01 06:12:27 | - +-----------------------------+ + +---------------------------+ + | FROM_UNIXTIME(1220249547) | + |---------------------------| + | 2008-09-01 06:12:27 | + +---------------------------+ os> select FROM_UNIXTIME(1220249547, '%T') fetched rows / total rows = 1/1 - +-----------------------------------+ - | FROM_UNIXTIME(1220249547, '%T') | - |-----------------------------------| - | 06:12:27 | - +-----------------------------------+ + +---------------------------------+ + | FROM_UNIXTIME(1220249547, '%T') | + |---------------------------------| + | 06:12:27 | + +---------------------------------+ GET_FORMAT @@ -2061,11 +2061,11 @@ Examples:: os> select GET_FORMAT(DATE, 'USA'); fetched rows / total rows = 1/1 - +---------------------------+ - | GET_FORMAT(DATE, 'USA') | - |---------------------------| - | %m.%d.%Y | - +---------------------------+ + +-------------------------+ + | GET_FORMAT(DATE, 'USA') | + |-------------------------| + | %m.%d.%Y | + +-------------------------+ HOUR @@ -2085,11 +2085,11 @@ Example:: os> SELECT HOUR('01:02:03'), HOUR_OF_DAY('01:02:03') fetched rows / total rows = 1/1 - +--------------------+---------------------------+ - | HOUR('01:02:03') | HOUR_OF_DAY('01:02:03') | - |--------------------+---------------------------| - | 1 | 1 | - +--------------------+---------------------------+ + +------------------+-------------------------+ + | HOUR('01:02:03') | HOUR_OF_DAY('01:02:03') | + |------------------+-------------------------| + | 1 | 1 | + +------------------+-------------------------+ LAST_DAY @@ -2105,11 +2105,11 @@ Example:: os> SELECT last_day('2023-02-06'); fetched rows / total rows = 1/1 - +--------------------------+ - | last_day('2023-02-06') | - |--------------------------| - | 2023-02-28 | - +--------------------------+ + +------------------------+ + | last_day('2023-02-06') | + |------------------------| + | 2023-02-28 | + +------------------------+ LOCALTIMESTAMP @@ -2177,11 +2177,11 @@ Example:: os> select MAKEDATE(1945, 5.9), MAKEDATE(1984, 1984) fetched rows / total rows = 1/1 - +-----------------------+------------------------+ - | MAKEDATE(1945, 5.9) | MAKEDATE(1984, 1984) | - |-----------------------+------------------------| - | 1945-01-06 | 1989-06-06 | - +-----------------------+------------------------+ + +---------------------+----------------------+ + | MAKEDATE(1945, 5.9) | MAKEDATE(1984, 1984) | + |---------------------+----------------------| + | 1945-01-06 | 1989-06-06 | + +---------------------+----------------------+ MAKETIME @@ -2209,11 +2209,11 @@ Example:: os> select MAKETIME(20, 30, 40), MAKETIME(20.2, 49.5, 42.100502) fetched rows / total rows = 1/1 - +------------------------+-----------------------------------+ - | MAKETIME(20, 30, 40) | MAKETIME(20.2, 49.5, 42.100502) | - |------------------------+-----------------------------------| - | 20:30:40 | 20:50:42.100502 | - +------------------------+-----------------------------------+ + +----------------------+---------------------------------+ + | MAKETIME(20, 30, 40) | MAKETIME(20.2, 49.5, 42.100502) | + |----------------------+---------------------------------| + | 20:30:40 | 20:50:42.100502 | + +----------------------+---------------------------------+ MICROSECOND @@ -2232,11 +2232,11 @@ Example:: os> SELECT MICROSECOND((TIME '01:02:03.123456')) fetched rows / total rows = 1/1 - +-----------------------------------------+ - | MICROSECOND((TIME '01:02:03.123456')) | - |-----------------------------------------| - | 123456 | - +-----------------------------------------+ + +---------------------------------------+ + | MICROSECOND((TIME '01:02:03.123456')) | + |---------------------------------------| + | 123456 | + +---------------------------------------+ MINUTE @@ -2256,11 +2256,11 @@ Example:: os> SELECT MINUTE(time('01:02:03')), MINUTE_OF_HOUR(time('01:02:03')) fetched rows / total rows = 1/1 - +----------------------------+------------------------------------+ - | MINUTE(time('01:02:03')) | MINUTE_OF_HOUR(time('01:02:03')) | - |----------------------------+------------------------------------| - | 2 | 2 | - +----------------------------+------------------------------------+ + +--------------------------+----------------------------------+ + | MINUTE(time('01:02:03')) | MINUTE_OF_HOUR(time('01:02:03')) | + |--------------------------+----------------------------------| + | 2 | 2 | + +--------------------------+----------------------------------+ MINUTE_OF_DAY @@ -2279,11 +2279,11 @@ Example:: os> SELECT MINUTE_OF_DAY((TIME '01:02:03')) fetched rows / total rows = 1/1 - +------------------------------------+ - | MINUTE_OF_DAY((TIME '01:02:03')) | - |------------------------------------| - | 62 | - +------------------------------------+ + +----------------------------------+ + | MINUTE_OF_DAY((TIME '01:02:03')) | + |----------------------------------| + | 62 | + +----------------------------------+ MONTH @@ -2304,20 +2304,20 @@ Example:: os> SELECT MONTH(DATE('2020-08-26')) fetched rows / total rows = 1/1 - +-----------------------------+ - | MONTH(DATE('2020-08-26')) | - |-----------------------------| - | 8 | - +-----------------------------+ + +---------------------------+ + | MONTH(DATE('2020-08-26')) | + |---------------------------| + | 8 | + +---------------------------+ os> SELECT MONTH_OF_YEAR(DATE('2020-08-26')) fetched rows / total rows = 1/1 - +-------------------------------------+ - | MONTH_OF_YEAR(DATE('2020-08-26')) | - |-------------------------------------| - | 8 | - +-------------------------------------+ + +-----------------------------------+ + | MONTH_OF_YEAR(DATE('2020-08-26')) | + |-----------------------------------| + | 8 | + +-----------------------------------+ MONTHNAME @@ -2336,11 +2336,11 @@ Example:: os> SELECT MONTHNAME(DATE('2020-08-26')) fetched rows / total rows = 1/1 - +---------------------------------+ - | MONTHNAME(DATE('2020-08-26')) | - |---------------------------------| - | August | - +---------------------------------+ + +-------------------------------+ + | MONTHNAME(DATE('2020-08-26')) | + |-------------------------------| + | August | + +-------------------------------+ NOW @@ -2383,11 +2383,11 @@ Example:: os> SELECT PERIOD_ADD(200801, 2), PERIOD_ADD(200801, -12) fetched rows / total rows = 1/1 - +-------------------------+---------------------------+ - | PERIOD_ADD(200801, 2) | PERIOD_ADD(200801, -12) | - |-------------------------+---------------------------| - | 200803 | 200701 | - +-------------------------+---------------------------+ + +-----------------------+-------------------------+ + | PERIOD_ADD(200801, 2) | PERIOD_ADD(200801, -12) | + |-----------------------+-------------------------| + | 200803 | 200701 | + +-----------------------+-------------------------+ PERIOD_DIFF @@ -2406,11 +2406,11 @@ Example:: os> SELECT PERIOD_DIFF(200802, 200703), PERIOD_DIFF(200802, 201003) fetched rows / total rows = 1/1 - +-------------------------------+-------------------------------+ - | PERIOD_DIFF(200802, 200703) | PERIOD_DIFF(200802, 201003) | - |-------------------------------+-------------------------------| - | 11 | -25 | - +-------------------------------+-------------------------------+ + +-----------------------------+-----------------------------+ + | PERIOD_DIFF(200802, 200703) | PERIOD_DIFF(200802, 201003) | + |-----------------------------+-----------------------------| + | 11 | -25 | + +-----------------------------+-----------------------------+ QUARTER @@ -2429,11 +2429,11 @@ Example:: os> SELECT QUARTER(DATE('2020-08-26')) fetched rows / total rows = 1/1 - +-------------------------------+ - | QUARTER(DATE('2020-08-26')) | - |-------------------------------| - | 3 | - +-------------------------------+ + +-----------------------------+ + | QUARTER(DATE('2020-08-26')) | + |-----------------------------| + | 3 | + +-----------------------------+ SEC_TO_TIME @@ -2455,27 +2455,27 @@ Example:: os> SELECT SEC_TO_TIME(3601) fetched rows / total rows = 1/1 - +---------------------+ - | SEC_TO_TIME(3601) | - |---------------------| - | 01:00:01 | - +---------------------+ + +-------------------+ + | SEC_TO_TIME(3601) | + |-------------------| + | 01:00:01 | + +-------------------+ os> SELECT sec_to_time(1234.123); fetched rows / total rows = 1/1 - +-------------------------+ - | sec_to_time(1234.123) | - |-------------------------| - | 00:20:34.123 | - +-------------------------+ + +-----------------------+ + | sec_to_time(1234.123) | + |-----------------------| + | 00:20:34.123 | + +-----------------------+ os> SELECT sec_to_time(NULL); fetched rows / total rows = 1/1 - +---------------------+ - | sec_to_time(NULL) | - |---------------------| - | null | - +---------------------+ + +-------------------+ + | sec_to_time(NULL) | + |-------------------| + | null | + +-------------------+ SECOND @@ -2495,19 +2495,19 @@ Example:: os> SELECT SECOND((TIME '01:02:03')) fetched rows / total rows = 1/1 - +-----------------------------+ - | SECOND((TIME '01:02:03')) | - |-----------------------------| - | 3 | - +-----------------------------+ + +---------------------------+ + | SECOND((TIME '01:02:03')) | + |---------------------------| + | 3 | + +---------------------------+ os> SELECT SECOND_OF_MINUTE(time('01:02:03')) fetched rows / total rows = 1/1 - +--------------------------------------+ - | SECOND_OF_MINUTE(time('01:02:03')) | - |--------------------------------------| - | 3 | - +--------------------------------------+ + +------------------------------------+ + | SECOND_OF_MINUTE(time('01:02:03')) | + |------------------------------------| + | 3 | + +------------------------------------+ STR_TO_DATE @@ -2529,11 +2529,11 @@ Example:: OS> SELECT str_to_date("01,5,2013", "%d,%m,%Y") fetched rows / total rows = 1/1 - +----------------------------------------+ - | str_to_date("01,5,2013", "%d,%m,%Y") | - |----------------------------------------| - | 2013-05-01 00:00:00 | - +----------------------------------------+ + +--------------------------------------+ + | str_to_date("01,5,2013", "%d,%m,%Y") | + |--------------------------------------| + | 2013-05-01 00:00:00 | + +--------------------------------------+ SUBDATE @@ -2563,11 +2563,11 @@ Example:: os> SELECT SUBDATE(DATE('2008-01-02'), INTERVAL 31 DAY) AS `'2008-01-02' - 31d`, SUBDATE(DATE('2020-08-26'), 1) AS `'2020-08-26' - 1`, SUBDATE(TIMESTAMP('2020-08-26 01:01:01'), 1) AS `ts '2020-08-26 01:01:01' - 1` fetched rows / total rows = 1/1 - +----------------------+--------------------+--------------------------------+ - | '2008-01-02' - 31d | '2020-08-26' - 1 | ts '2020-08-26 01:01:01' - 1 | - |----------------------+--------------------+--------------------------------| - | 2007-12-02 00:00:00 | 2020-08-25 | 2020-08-25 01:01:01 | - +----------------------+--------------------+--------------------------------+ + +---------------------+------------------+------------------------------+ + | '2008-01-02' - 31d | '2020-08-26' - 1 | ts '2020-08-26 01:01:01' - 1 | + |---------------------+------------------+------------------------------| + | 2007-12-02 00:00:00 | 2020-08-25 | 2020-08-25 01:01:01 | + +---------------------+------------------+------------------------------+ SUBTIME @@ -2600,35 +2600,35 @@ Example:: os> SELECT SUBTIME(TIME('23:59:59'), DATE('2004-01-01')) AS `'23:59:59' - 0` fetched rows / total rows = 1/1 - +------------------+ - | '23:59:59' - 0 | - |------------------| - | 23:59:59 | - +------------------+ + +----------------+ + | '23:59:59' - 0 | + |----------------| + | 23:59:59 | + +----------------+ os> SELECT SUBTIME(DATE('2004-01-01'), TIME('23:59:59')) AS `'2004-01-01' - '23:59:59'` fetched rows / total rows = 1/1 - +-----------------------------+ - | '2004-01-01' - '23:59:59' | - |-----------------------------| - | 2003-12-31 00:00:01 | - +-----------------------------+ - - os> SELECT SUBTIME(TIME('10:20:30'), TIME('00:05:42')) AS `'10:20:30' - '00:05:42'` - fetched rows / total rows = 1/1 +---------------------------+ - | '10:20:30' - '00:05:42' | + | '2004-01-01' - '23:59:59' | |---------------------------| - | 10:14:48 | + | 2003-12-31 00:00:01 | +---------------------------+ + os> SELECT SUBTIME(TIME('10:20:30'), TIME('00:05:42')) AS `'10:20:30' - '00:05:42'` + fetched rows / total rows = 1/1 + +-------------------------+ + | '10:20:30' - '00:05:42' | + |-------------------------| + | 10:14:48 | + +-------------------------+ + os> SELECT SUBTIME(TIMESTAMP('2007-03-01 10:20:30'), TIMESTAMP('2002-03-04 20:40:50')) AS `'2007-03-01 10:20:30' - '20:40:50'` fetched rows / total rows = 1/1 - +--------------------------------------+ - | '2007-03-01 10:20:30' - '20:40:50' | - |--------------------------------------| - | 2007-02-28 13:39:40 | - +--------------------------------------+ + +------------------------------------+ + | '2007-03-01 10:20:30' - '20:40:50' | + |------------------------------------| + | 2007-02-28 13:39:40 | + +------------------------------------+ SYSDATE @@ -2674,11 +2674,11 @@ Example:: os> SELECT TIME('13:49:00'), TIME('13:49'), TIME(TIMESTAMP('2020-08-26 13:49:00')), TIME('2020-08-26 13:49:00') fetched rows / total rows = 1/1 - +--------------------+-----------------+------------------------------------------+-------------------------------+ - | TIME('13:49:00') | TIME('13:49') | TIME(TIMESTAMP('2020-08-26 13:49:00')) | TIME('2020-08-26 13:49:00') | - |--------------------+-----------------+------------------------------------------+-------------------------------| - | 13:49:00 | 13:49:00 | 13:49:00 | 13:49:00 | - +--------------------+-----------------+------------------------------------------+-------------------------------+ + +------------------+---------------+----------------------------------------+-----------------------------+ + | TIME('13:49:00') | TIME('13:49') | TIME(TIMESTAMP('2020-08-26 13:49:00')) | TIME('2020-08-26 13:49:00') | + |------------------+---------------+----------------------------------------+-----------------------------| + | 13:49:00 | 13:49:00 | 13:49:00 | 13:49:00 | + +------------------+---------------+----------------------------------------+-----------------------------+ TIME_FORMAT ----------- @@ -2728,11 +2728,11 @@ Example:: os> SELECT TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T') fetched rows / total rows = 1/1 - +------------------------------------------------------------------------------+ - | TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T') | - |------------------------------------------------------------------------------| - | 012345 13 01 01 14 PM 01:14:15 PM 15 15 13:14:15 | - +------------------------------------------------------------------------------+ + +----------------------------------------------------------------------------+ + | TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T') | + |----------------------------------------------------------------------------| + | 012345 13 01 01 14 PM 01:14:15 PM 15 15 13:14:15 | + +----------------------------------------------------------------------------+ TIME_TO_SEC @@ -2751,11 +2751,11 @@ Example:: os> SELECT TIME_TO_SEC(TIME '22:23:00') fetched rows / total rows = 1/1 - +--------------------------------+ - | TIME_TO_SEC(TIME '22:23:00') | - |--------------------------------| - | 80580 | - +--------------------------------+ + +------------------------------+ + | TIME_TO_SEC(TIME '22:23:00') | + |------------------------------| + | 80580 | + +------------------------------+ TIMEDIFF @@ -2774,11 +2774,11 @@ Example:: os> SELECT TIMEDIFF('23:59:59', '13:00:00') fetched rows / total rows = 1/1 - +------------------------------------+ - | TIMEDIFF('23:59:59', '13:00:00') | - |------------------------------------| - | 10:59:59 | - +------------------------------------+ + +----------------------------------+ + | TIMEDIFF('23:59:59', '13:00:00') | + |----------------------------------| + | 10:59:59 | + +----------------------------------+ TIMESTAMP @@ -2802,11 +2802,11 @@ Example:: os> SELECT TIMESTAMP('2020-08-26 13:49:00'), TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42')) fetched rows / total rows = 1/1 - +------------------------------------+------------------------------------------------------+ - | TIMESTAMP('2020-08-26 13:49:00') | TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42')) | - |------------------------------------+------------------------------------------------------| - | 2020-08-26 13:49:00 | 2020-08-27 02:04:42 | - +------------------------------------+------------------------------------------------------+ + +----------------------------------+----------------------------------------------------+ + | TIMESTAMP('2020-08-26 13:49:00') | TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42')) | + |----------------------------------+----------------------------------------------------| + | 2020-08-26 13:49:00 | 2020-08-27 02:04:42 | + +----------------------------------+----------------------------------------------------+ TIMESTAMPADD @@ -2826,11 +2826,11 @@ Examples:: os> SELECT TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00'), TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') fetched rows / total rows = 1/1 - +------------------------------------------------+----------------------------------------------------+ - | TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | - |------------------------------------------------+----------------------------------------------------| - | 2000-01-18 00:00:00 | 1999-10-01 00:00:00 | - +------------------------------------------------+----------------------------------------------------+ + +----------------------------------------------+--------------------------------------------------+ + | TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | + |----------------------------------------------+--------------------------------------------------| + | 2000-01-18 00:00:00 | 1999-10-01 00:00:00 | + +----------------------------------------------+--------------------------------------------------+ TIMESTAMPDIFF @@ -2851,11 +2851,11 @@ Examples:: os> SELECT TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00'), TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00')) fetched rows / total rows = 1/1 - +---------------------------------------------------------------------+-------------------------------------------------------------+ - | TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00')) | - |---------------------------------------------------------------------+-------------------------------------------------------------| - | 4 | -23 | - +---------------------------------------------------------------------+-------------------------------------------------------------+ + +-------------------------------------------------------------------+-----------------------------------------------------------+ + | TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00')) | + |-------------------------------------------------------------------+-----------------------------------------------------------| + | 4 | -23 | + +-------------------------------------------------------------------+-----------------------------------------------------------+ TO_DAYS @@ -2874,11 +2874,11 @@ Example:: os> SELECT TO_DAYS(DATE '2008-10-07') fetched rows / total rows = 1/1 - +------------------------------+ - | TO_DAYS(DATE '2008-10-07') | - |------------------------------| - | 733687 | - +------------------------------+ + +----------------------------+ + | TO_DAYS(DATE '2008-10-07') | + |----------------------------| + | 733687 | + +----------------------------+ TO_SECONDS @@ -2898,11 +2898,11 @@ Example:: os> SELECT TO_SECONDS(DATE '2008-10-07'), TO_SECONDS(950228) fetched rows / total rows = 1/1 - +---------------------------------+----------------------+ - | TO_SECONDS(DATE '2008-10-07') | TO_SECONDS(950228) | - |---------------------------------+----------------------| - | 63390556800 | 62961148800 | - +---------------------------------+----------------------+ + +-------------------------------+--------------------+ + | TO_SECONDS(DATE '2008-10-07') | TO_SECONDS(950228) | + |-------------------------------+--------------------| + | 63390556800 | 62961148800 | + +-------------------------------+--------------------+ UNIX_TIMESTAMP @@ -2924,19 +2924,19 @@ Examples:: os> select UNIX_TIMESTAMP(20771122143845) fetched rows / total rows = 1/1 - +----------------------------------+ - | UNIX_TIMESTAMP(20771122143845) | - |----------------------------------| - | 3404817525.0 | - +----------------------------------+ + +--------------------------------+ + | UNIX_TIMESTAMP(20771122143845) | + |--------------------------------| + | 3404817525.0 | + +--------------------------------+ os> select UNIX_TIMESTAMP(TIMESTAMP('1996-11-15 17:05:42')) fetched rows / total rows = 1/1 - +----------------------------------------------------+ - | UNIX_TIMESTAMP(TIMESTAMP('1996-11-15 17:05:42')) | - |----------------------------------------------------| - | 848077542.0 | - +----------------------------------------------------+ + +--------------------------------------------------+ + | UNIX_TIMESTAMP(TIMESTAMP('1996-11-15 17:05:42')) | + |--------------------------------------------------| + | 848077542.0 | + +--------------------------------------------------+ UTC_DATE @@ -2955,11 +2955,11 @@ Example:: > SELECT UTC_DATE(); fetched rows / total rows = 1/1 - +--------------+ - | utc_date() | - |--------------| - | 2022-10-03 | - +--------------+ + +------------+ + | utc_date() | + |------------| + | 2022-10-03 | + +------------+ UTC_TIME @@ -2978,11 +2978,11 @@ Example:: > SELECT UTC_TIME(); fetched rows / total rows = 1/1 - +--------------+ - | utc_time() | - |--------------| - | 17:54:27 | - +--------------+ + +------------+ + | utc_time() | + |------------| + | 17:54:27 | + +------------+ UTC_TIMESTAMP @@ -3067,11 +3067,11 @@ Example:: os> SELECT WEEK(DATE('2008-02-20')), WEEK(DATE('2008-02-20'), 1) fetched rows / total rows = 1/1 - +----------------------------+-------------------------------+ - | WEEK(DATE('2008-02-20')) | WEEK(DATE('2008-02-20'), 1) | - |----------------------------+-------------------------------| - | 7 | 8 | - +----------------------------+-------------------------------+ + +--------------------------+-----------------------------+ + | WEEK(DATE('2008-02-20')) | WEEK(DATE('2008-02-20'), 1) | + |--------------------------+-----------------------------| + | 7 | 8 | + +--------------------------+-----------------------------+ WEEKDAY @@ -3092,11 +3092,11 @@ Example:: os> SELECT weekday('2020-08-26'), weekday('2020-08-27') fetched rows / total rows = 1/1 - +-------------------------+-------------------------+ - | weekday('2020-08-26') | weekday('2020-08-27') | - |-------------------------+-------------------------| - | 2 | 3 | - +-------------------------+-------------------------+ + +-----------------------+-----------------------+ + | weekday('2020-08-26') | weekday('2020-08-27') | + |-----------------------+-----------------------| + | 2 | 3 | + +-----------------------+-----------------------+ WEEK_OF_YEAR @@ -3116,11 +3116,11 @@ Example:: os> SELECT WEEK_OF_YEAR(DATE('2008-02-20')), WEEK_OF_YEAR(DATE('2008-02-20'), 1) fetched rows / total rows = 1/1 - +------------------------------------+---------------------------------------+ - | WEEK_OF_YEAR(DATE('2008-02-20')) | WEEK_OF_YEAR(DATE('2008-02-20'), 1) | - |------------------------------------+---------------------------------------| - | 7 | 8 | - +------------------------------------+---------------------------------------+ + +----------------------------------+-------------------------------------+ + | WEEK_OF_YEAR(DATE('2008-02-20')) | WEEK_OF_YEAR(DATE('2008-02-20'), 1) | + |----------------------------------+-------------------------------------| + | 7 | 8 | + +----------------------------------+-------------------------------------+ WEEKOFYEAR @@ -3140,11 +3140,11 @@ Example:: os> SELECT WEEKOFYEAR(DATE('2008-02-20')), WEEKOFYEAR(DATE('2008-02-20'), 1) fetched rows / total rows = 1/1 - +----------------------------------+-------------------------------------+ - | WEEKOFYEAR(DATE('2008-02-20')) | WEEKOFYEAR(DATE('2008-02-20'), 1) | - |----------------------------------+-------------------------------------| - | 7 | 8 | - +----------------------------------+-------------------------------------+ + +--------------------------------+-----------------------------------+ + | WEEKOFYEAR(DATE('2008-02-20')) | WEEKOFYEAR(DATE('2008-02-20'), 1) | + |--------------------------------+-----------------------------------| + | 7 | 8 | + +--------------------------------+-----------------------------------+ YEAR @@ -3163,11 +3163,11 @@ Example:: os> SELECT YEAR(DATE('2020-08-26')) fetched rows / total rows = 1/1 - +----------------------------+ - | YEAR(DATE('2020-08-26')) | - |----------------------------| - | 2020 | - +----------------------------+ + +--------------------------+ + | YEAR(DATE('2020-08-26')) | + |--------------------------| + | 2020 | + +--------------------------+ YEARWEEK @@ -3186,11 +3186,11 @@ Example:: os> SELECT YEARWEEK('2020-08-26'), YEARWEEK('2019-01-05', 0) fetched rows / total rows = 1/1 - +--------------------------+-----------------------------+ - | YEARWEEK('2020-08-26') | YEARWEEK('2019-01-05', 0) | - |--------------------------+-----------------------------| - | 202034 | 201852 | - +--------------------------+-----------------------------+ + +------------------------+---------------------------+ + | YEARWEEK('2020-08-26') | YEARWEEK('2019-01-05', 0) | + |------------------------+---------------------------| + | 202034 | 201852 | + +------------------------+---------------------------+ String Functions @@ -3212,11 +3212,11 @@ Example:: os> SELECT ASCII('hello') fetched rows / total rows = 1/1 - +------------------+ - | ASCII('hello') | - |------------------| - | 104 | - +------------------+ + +----------------+ + | ASCII('hello') | + |----------------| + | 104 | + +----------------+ CONCAT @@ -3235,11 +3235,11 @@ Example:: os> SELECT CONCAT('hello ', 'whole ', 'world', '!'), CONCAT('hello', 'world'), CONCAT('hello', null) fetched rows / total rows = 1/1 - +--------------------------------------------+----------------------------+-------------------------+ - | CONCAT('hello ', 'whole ', 'world', '!') | CONCAT('hello', 'world') | CONCAT('hello', null) | - |--------------------------------------------+----------------------------+-------------------------| - | hello whole world! | helloworld | null | - +--------------------------------------------+----------------------------+-------------------------+ + +------------------------------------------+--------------------------+-----------------------+ + | CONCAT('hello ', 'whole ', 'world', '!') | CONCAT('hello', 'world') | CONCAT('hello', null) | + |------------------------------------------+--------------------------+-----------------------| + | hello whole world! | helloworld | null | + +------------------------------------------+--------------------------+-----------------------+ CONCAT_WS @@ -3258,11 +3258,11 @@ Example:: os> SELECT CONCAT_WS(',', 'hello', 'world') fetched rows / total rows = 1/1 - +------------------------------------+ - | CONCAT_WS(',', 'hello', 'world') | - |------------------------------------| - | hello,world | - +------------------------------------+ + +----------------------------------+ + | CONCAT_WS(',', 'hello', 'world') | + |----------------------------------| + | hello,world | + +----------------------------------+ LEFT @@ -3278,11 +3278,11 @@ Example:: os> SELECT LEFT('helloworld', 5), LEFT('HELLOWORLD', 0) fetched rows / total rows = 1/1 - +-------------------------+-------------------------+ - | LEFT('helloworld', 5) | LEFT('HELLOWORLD', 0) | - |-------------------------+-------------------------| - | hello | | - +-------------------------+-------------------------+ + +-----------------------+-----------------------+ + | LEFT('helloworld', 5) | LEFT('HELLOWORLD', 0) | + |-----------------------+-----------------------| + | hello | | + +-----------------------+-----------------------+ LENGTH @@ -3301,11 +3301,11 @@ Example:: os> SELECT LENGTH('helloworld') fetched rows / total rows = 1/1 - +------------------------+ - | LENGTH('helloworld') | - |------------------------| - | 10 | - +------------------------+ + +----------------------+ + | LENGTH('helloworld') | + |----------------------| + | 10 | + +----------------------+ LOCATE @@ -3327,11 +3327,11 @@ Example:: os> SELECT LOCATE('world', 'helloworld'), LOCATE('world', 'helloworldworld', 7) fetched rows / total rows = 1/1 - +---------------------------------+-----------------------------------------+ - | LOCATE('world', 'helloworld') | LOCATE('world', 'helloworldworld', 7) | - |---------------------------------+-----------------------------------------| - | 6 | 11 | - +---------------------------------+-----------------------------------------+ + +-------------------------------+---------------------------------------+ + | LOCATE('world', 'helloworld') | LOCATE('world', 'helloworldworld', 7) | + |-------------------------------+---------------------------------------| + | 6 | 11 | + +-------------------------------+---------------------------------------+ LOWER @@ -3350,11 +3350,11 @@ Example:: os> SELECT LOWER('helloworld'), LOWER('HELLOWORLD') fetched rows / total rows = 1/1 - +-----------------------+-----------------------+ - | LOWER('helloworld') | LOWER('HELLOWORLD') | - |-----------------------+-----------------------| - | helloworld | helloworld | - +-----------------------+-----------------------+ + +---------------------+---------------------+ + | LOWER('helloworld') | LOWER('HELLOWORLD') | + |---------------------+---------------------| + | helloworld | helloworld | + +---------------------+---------------------+ LTRIM @@ -3373,11 +3373,11 @@ Example:: os> SELECT LTRIM(' hello'), LTRIM('hello ') fetched rows / total rows = 1/1 - +---------------------+---------------------+ - | LTRIM(' hello') | LTRIM('hello ') | - |---------------------+---------------------| - | hello | hello | - +---------------------+---------------------+ + +-------------------+-------------------+ + | LTRIM(' hello') | LTRIM('hello ') | + |-------------------+-------------------| + | hello | hello | + +-------------------+-------------------+ POSITION @@ -3398,11 +3398,11 @@ Example:: os> SELECT POSITION('world' IN 'helloworld'), POSITION('invalid' IN 'helloworld'); fetched rows / total rows = 1/1 - +-------------------------------------+---------------------------------------+ - | POSITION('world' IN 'helloworld') | POSITION('invalid' IN 'helloworld') | - |-------------------------------------+---------------------------------------| - | 6 | 0 | - +-------------------------------------+---------------------------------------+ + +-----------------------------------+-------------------------------------+ + | POSITION('world' IN 'helloworld') | POSITION('invalid' IN 'helloworld') | + |-----------------------------------+-------------------------------------| + | 6 | 0 | + +-----------------------------------+-------------------------------------+ REPLACE @@ -3421,11 +3421,11 @@ Example:: os> SELECT REPLACE('Hello World!', 'World', 'OpenSearch') fetched rows / total rows = 1/1 - +--------------------------------------------------+ - | REPLACE('Hello World!', 'World', 'OpenSearch') | - |--------------------------------------------------| - | Hello OpenSearch! | - +--------------------------------------------------+ + +------------------------------------------------+ + | REPLACE('Hello World!', 'World', 'OpenSearch') | + |------------------------------------------------| + | Hello OpenSearch! | + +------------------------------------------------+ REVERSE @@ -3444,11 +3444,11 @@ Example:: os> SELECT REVERSE('abcde'), REVERSE(null) fetched rows / total rows = 1/1 - +--------------------+-----------------+ - | REVERSE('abcde') | REVERSE(null) | - |--------------------+-----------------| - | edcba | null | - +--------------------+-----------------+ + +------------------+---------------+ + | REVERSE('abcde') | REVERSE(null) | + |------------------+---------------| + | edcba | null | + +------------------+---------------+ RIGHT @@ -3467,11 +3467,11 @@ Example:: os> SELECT RIGHT('helloworld', 5), RIGHT('HELLOWORLD', 0) fetched rows / total rows = 1/1 - +--------------------------+--------------------------+ - | RIGHT('helloworld', 5) | RIGHT('HELLOWORLD', 0) | - |--------------------------+--------------------------| - | world | | - +--------------------------+--------------------------+ + +------------------------+------------------------+ + | RIGHT('helloworld', 5) | RIGHT('HELLOWORLD', 0) | + |------------------------+------------------------| + | world | | + +------------------------+------------------------+ RTRIM @@ -3490,11 +3490,11 @@ Example:: os> SELECT RTRIM(' hello'), RTRIM('hello ') fetched rows / total rows = 1/1 - +---------------------+---------------------+ - | RTRIM(' hello') | RTRIM('hello ') | - |---------------------+---------------------| - | hello | hello | - +---------------------+---------------------+ + +-------------------+-------------------+ + | RTRIM(' hello') | RTRIM('hello ') | + |-------------------+-------------------| + | hello | hello | + +-------------------+-------------------+ SUBSTRING @@ -3515,11 +3515,11 @@ Example:: os> SELECT SUBSTRING('helloworld', 5), SUBSTRING('helloworld', 5, 3) fetched rows / total rows = 1/1 - +------------------------------+---------------------------------+ - | SUBSTRING('helloworld', 5) | SUBSTRING('helloworld', 5, 3) | - |------------------------------+---------------------------------| - | oworld | owo | - +------------------------------+---------------------------------+ + +----------------------------+-------------------------------+ + | SUBSTRING('helloworld', 5) | SUBSTRING('helloworld', 5, 3) | + |----------------------------+-------------------------------| + | oworld | owo | + +----------------------------+-------------------------------+ TRIM @@ -3536,11 +3536,11 @@ Example:: os> SELECT TRIM(' hello'), TRIM('hello ') fetched rows / total rows = 1/1 - +--------------------+--------------------+ - | TRIM(' hello') | TRIM('hello ') | - |--------------------+--------------------| - | hello | hello | - +--------------------+--------------------+ + +------------------+------------------+ + | TRIM(' hello') | TRIM('hello ') | + |------------------+------------------| + | hello | hello | + +------------------+------------------+ UPPER @@ -3559,11 +3559,11 @@ Example:: os> SELECT UPPER('helloworld'), UPPER('HELLOWORLD') fetched rows / total rows = 1/1 - +-----------------------+-----------------------+ - | UPPER('helloworld') | UPPER('HELLOWORLD') | - |-----------------------+-----------------------| - | HELLOWORLD | HELLOWORLD | - +-----------------------+-----------------------+ + +---------------------+---------------------+ + | UPPER('helloworld') | UPPER('HELLOWORLD') | + |---------------------+---------------------| + | HELLOWORLD | HELLOWORLD | + +---------------------+---------------------+ @@ -3586,31 +3586,31 @@ Example One:: os> SELECT IFNULL(123, 321), IFNULL(321, 123) fetched rows / total rows = 1/1 - +--------------------+--------------------+ - | IFNULL(123, 321) | IFNULL(321, 123) | - |--------------------+--------------------| - | 123 | 321 | - +--------------------+--------------------+ + +------------------+------------------+ + | IFNULL(123, 321) | IFNULL(321, 123) | + |------------------+------------------| + | 123 | 321 | + +------------------+------------------+ Example Two:: os> SELECT IFNULL(321, 1/0), IFNULL(1/0, 123) fetched rows / total rows = 1/1 - +--------------------+--------------------+ - | IFNULL(321, 1/0) | IFNULL(1/0, 123) | - |--------------------+--------------------| - | 321 | 123 | - +--------------------+--------------------+ + +------------------+------------------+ + | IFNULL(321, 1/0) | IFNULL(1/0, 123) | + |------------------+------------------| + | 321 | 123 | + +------------------+------------------+ Example Three:: os> SELECT IFNULL(1/0, 1/0) fetched rows / total rows = 1/1 - +--------------------+ - | IFNULL(1/0, 1/0) | - |--------------------| - | null | - +--------------------+ + +------------------+ + | IFNULL(1/0, 1/0) | + |------------------| + | null | + +------------------+ NULLIF @@ -3629,11 +3629,11 @@ Example:: os> SELECT NULLIF(123, 123), NULLIF(321, 123), NULLIF(1/0, 321), NULLIF(321, 1/0), NULLIF(1/0, 1/0) fetched rows / total rows = 1/1 - +--------------------+--------------------+--------------------+--------------------+--------------------+ - | NULLIF(123, 123) | NULLIF(321, 123) | NULLIF(1/0, 321) | NULLIF(321, 1/0) | NULLIF(1/0, 1/0) | - |--------------------+--------------------+--------------------+--------------------+--------------------| - | null | 321 | null | 321 | null | - +--------------------+--------------------+--------------------+--------------------+--------------------+ + +------------------+------------------+------------------+------------------+------------------+ + | NULLIF(123, 123) | NULLIF(321, 123) | NULLIF(1/0, 321) | NULLIF(321, 1/0) | NULLIF(1/0, 1/0) | + |------------------+------------------+------------------+------------------+------------------| + | null | 321 | null | 321 | null | + +------------------+------------------+------------------+------------------+------------------+ ISNULL @@ -3652,11 +3652,11 @@ Example:: os> SELECT ISNULL(1/0), ISNULL(123) fetched rows / total rows = 1/1 - +---------------+---------------+ - | ISNULL(1/0) | ISNULL(123) | - |---------------+---------------| - | True | False | - +---------------+---------------+ + +-------------+-------------+ + | ISNULL(1/0) | ISNULL(123) | + |-------------+-------------| + | True | False | + +-------------+-------------+ IF @@ -3677,19 +3677,19 @@ Example:: os> SELECT IF(100 > 200, '100', '200') fetched rows / total rows = 1/1 - +-------------------------------+ - | IF(100 > 200, '100', '200') | - |-------------------------------| - | 200 | - +-------------------------------+ + +-----------------------------+ + | IF(100 > 200, '100', '200') | + |-----------------------------| + | 200 | + +-----------------------------+ os> SELECT IF(200 > 100, '100', '200') fetched rows / total rows = 1/1 - +-------------------------------+ - | IF(200 > 100, '100', '200') | - |-------------------------------| - | 100 | - +-------------------------------+ + +-----------------------------+ + | IF(200 > 100, '100', '200') | + |-----------------------------| + | 100 | + +-----------------------------+ CASE @@ -3744,11 +3744,11 @@ Here are examples for simple case syntax:: ... ELSE TRIM(' Absolute three ') ... END AS func_result; fetched rows / total rows = 1/1 - +---------------+-------------------+----------------+ - | simple_case | func_case_value | func_result | - |---------------+-------------------+----------------| - | One | Absolute two | Absolute three | - +---------------+-------------------+----------------+ + +-------------+-----------------+----------------+ + | simple_case | func_case_value | func_result | + |-------------+-----------------+----------------| + | One | Absolute two | Absolute three | + +-------------+-----------------+----------------+ Here are examples for searched case syntax:: @@ -3764,11 +3764,11 @@ Here are examples for searched case syntax:: ... WHEN 'hello' = 'world' THEN 'Hello' ... END AS no_else; fetched rows / total rows = 1/1 - +-----------------+------------------+-----------+ - | single_search | multi_searches | no_else | - |-----------------+------------------+-----------| - | One | Hello | null | - +-----------------+------------------+-----------+ + +---------------+----------------+---------+ + | single_search | multi_searches | no_else | + |---------------+----------------+---------| + | One | Hello | null | + +---------------+----------------+---------+ RELEVANCE @@ -3803,22 +3803,22 @@ Example with only ``field`` and ``query`` expressions, and all other parameters os> SELECT lastname, address FROM accounts WHERE match(address, 'Street'); fetched rows / total rows = 2/2 - +------------+--------------------+ - | lastname | address | - |------------+--------------------| - | Bond | 671 Bristol Street | - | Bates | 789 Madison Street | - +------------+--------------------+ + +----------+--------------------+ + | lastname | address | + |----------+--------------------| + | Bond | 671 Bristol Street | + | Bates | 789 Madison Street | + +----------+--------------------+ Another example to show how to set custom values for the optional parameters:: os> SELECT lastname FROM accounts WHERE match(firstname, 'Hattie', operator='AND', boost=2.0); fetched rows / total rows = 1/1 - +------------+ - | lastname | - |------------| - | Bond | - +------------+ + +----------+ + | lastname | + |----------| + | Bond | + +----------+ MATCHQUERY @@ -3833,32 +3833,32 @@ Example with only ``field`` and ``query`` expressions, and all other parameters os> SELECT lastname, address FROM accounts WHERE matchquery(address, 'Street'); fetched rows / total rows = 2/2 - +------------+--------------------+ - | lastname | address | - |------------+--------------------| - | Bond | 671 Bristol Street | - | Bates | 789 Madison Street | - +------------+--------------------+ + +----------+--------------------+ + | lastname | address | + |----------+--------------------| + | Bond | 671 Bristol Street | + | Bates | 789 Madison Street | + +----------+--------------------+ Another example to show how to set custom values for the optional parameters:: os> SELECT lastname FROM accounts WHERE matchquery(firstname, 'Hattie', operator='AND', boost=2.0); fetched rows / total rows = 1/1 - +------------+ - | lastname | - |------------| - | Bond | - +------------+ + +----------+ + | lastname | + |----------| + | Bond | + +----------+ The matchquery function also supports an alternative syntax:: os> SELECT firstname FROM accounts WHERE firstname = matchquery('Hattie'); fetched rows / total rows = 1/1 - +-------------+ - | firstname | - |-------------| - | Hattie | - +-------------+ + +-----------+ + | firstname | + |-----------| + | Hattie | + +-----------+ MATCH_QUERY @@ -3873,32 +3873,32 @@ Example with only ``field`` and ``query`` expressions, and all other parameters os> SELECT lastname, address FROM accounts WHERE match_query(address, 'Street'); fetched rows / total rows = 2/2 - +------------+--------------------+ - | lastname | address | - |------------+--------------------| - | Bond | 671 Bristol Street | - | Bates | 789 Madison Street | - +------------+--------------------+ + +----------+--------------------+ + | lastname | address | + |----------+--------------------| + | Bond | 671 Bristol Street | + | Bates | 789 Madison Street | + +----------+--------------------+ Another example to show how to set custom values for the optional parameters:: os> SELECT lastname FROM accounts WHERE match_query(firstname, 'Hattie', operator='AND', boost=2.0); fetched rows / total rows = 1/1 - +------------+ - | lastname | - |------------| - | Bond | - +------------+ + +----------+ + | lastname | + |----------| + | Bond | + +----------+ The match_query function also supports an alternative syntax:: os> SELECT firstname FROM accounts WHERE firstname = match_query('Hattie'); fetched rows / total rows = 1/1 - +-------------+ - | firstname | - |-------------| - | Hattie | - +-------------+ + +-----------+ + | firstname | + |-----------| + | Hattie | + +-----------+ MATCH_PHRASE @@ -3943,19 +3943,19 @@ The match_phrase function also supports an alternative syntax:: os> SELECT firstname FROM accounts WHERE firstname = match_phrase('Hattie'); fetched rows / total rows = 1/1 - +-------------+ - | firstname | - |-------------| - | Hattie | - +-------------+ + +-----------+ + | firstname | + |-----------| + | Hattie | + +-----------+ os> SELECT firstname FROM accounts WHERE firstname = matchphrase('Hattie'); fetched rows / total rows = 1/1 - +-------------+ - | firstname | - |-------------| - | Hattie | - +-------------+ + +-----------+ + | firstname | + |-----------| + | Hattie | + +-----------+ MATCH_BOOL_PREFIX @@ -3982,22 +3982,22 @@ Example with only ``field`` and ``query`` expressions, and all other parameters os> SELECT firstname, address FROM accounts WHERE match_bool_prefix(address, 'Bristol Stre'); fetched rows / total rows = 2/2 - +-------------+--------------------+ - | firstname | address | - |-------------+--------------------| - | Hattie | 671 Bristol Street | - | Nanette | 789 Madison Street | - +-------------+--------------------+ + +-----------+--------------------+ + | firstname | address | + |-----------+--------------------| + | Hattie | 671 Bristol Street | + | Nanette | 789 Madison Street | + +-----------+--------------------+ Another example to show how to set custom values for the optional parameters:: os> SELECT firstname, address FROM accounts WHERE match_bool_prefix(address, 'Bristol Street', minimum_should_match=2); fetched rows / total rows = 1/1 - +-------------+--------------------+ - | firstname | address | - |-------------+--------------------| - | Hattie | 671 Bristol Street | - +-------------+--------------------+ + +-----------+--------------------+ + | firstname | address | + |-----------+--------------------| + | Hattie | 671 Bristol Street | + +-----------+--------------------+ MATCH_PHRASE_PREFIX @@ -4084,40 +4084,40 @@ Example with only ``fields`` and ``query`` expressions, and all other parameters os> select id, title, author from books where multi_match(['title'], 'Pooh House'); fetched rows / total rows = 2/2 - +------+--------------------------+----------------------+ - | id | title | author | - |------+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - | 2 | Winnie-the-Pooh | Alan Alexander Milne | - +------+--------------------------+----------------------+ + +----+--------------------------+----------------------+ + | id | title | author | + |----+--------------------------+----------------------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | + | 2 | Winnie-the-Pooh | Alan Alexander Milne | + +----+--------------------------+----------------------+ Another example to show how to set custom values for the optional parameters:: os> select id, title, author from books where multi_match(['title'], 'Pooh House', operator='AND', analyzer=default); fetched rows / total rows = 1/1 - +------+--------------------------+----------------------+ - | id | title | author | - |------+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - +------+--------------------------+----------------------+ + +----+--------------------------+----------------------+ + | id | title | author | + |----+--------------------------+----------------------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | + +----+--------------------------+----------------------+ The multi_match function also supports an alternative syntax:: os> SELECT firstname FROM accounts WHERE firstname = multi_match('Hattie'); fetched rows / total rows = 1/1 - +-------------+ - | firstname | - |-------------| - | Hattie | - +-------------+ + +-----------+ + | firstname | + |-----------| + | Hattie | + +-----------+ os> SELECT firstname FROM accounts WHERE firstname = multimatch('Hattie'); fetched rows / total rows = 1/1 - +-------------+ - | firstname | - |-------------| - | Hattie | - +-------------+ + +-----------+ + | firstname | + |-----------| + | Hattie | + +-----------+ SIMPLE_QUERY_STRING @@ -4154,22 +4154,22 @@ Example with only ``fields`` and ``query`` expressions, and all other parameters os> select id, title, author from books where simple_query_string(['title'], 'Pooh House'); fetched rows / total rows = 2/2 - +------+--------------------------+----------------------+ - | id | title | author | - |------+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - | 2 | Winnie-the-Pooh | Alan Alexander Milne | - +------+--------------------------+----------------------+ + +----+--------------------------+----------------------+ + | id | title | author | + |----+--------------------------+----------------------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | + | 2 | Winnie-the-Pooh | Alan Alexander Milne | + +----+--------------------------+----------------------+ Another example to show how to set custom values for the optional parameters:: os> select id, title, author from books where simple_query_string(['title'], 'Pooh House', flags='ALL', default_operator='AND'); fetched rows / total rows = 1/1 - +------+--------------------------+----------------------+ - | id | title | author | - |------+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - +------+--------------------------+----------------------+ + +----+--------------------------+----------------------+ + | id | title | author | + |----+--------------------------+----------------------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | + +----+--------------------------+----------------------+ QUERY_STRING @@ -4216,22 +4216,22 @@ Example with only ``fields`` and ``query`` expressions, and all other parameters os> select id, title, author from books where query_string(['title'], 'Pooh House'); fetched rows / total rows = 2/2 - +------+--------------------------+----------------------+ - | id | title | author | - |------+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - | 2 | Winnie-the-Pooh | Alan Alexander Milne | - +------+--------------------------+----------------------+ + +----+--------------------------+----------------------+ + | id | title | author | + |----+--------------------------+----------------------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | + | 2 | Winnie-the-Pooh | Alan Alexander Milne | + +----+--------------------------+----------------------+ Another example to show how to set custom values for the optional parameters:: os> select id, title, author from books where query_string(['title'], 'Pooh House', default_operator='AND'); fetched rows / total rows = 1/1 - +------+--------------------------+----------------------+ - | id | title | author | - |------+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - +------+--------------------------+----------------------+ + +----+--------------------------+----------------------+ + | id | title | author | + |----+--------------------------+----------------------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | + +----+--------------------------+----------------------+ QUERY @@ -4278,22 +4278,22 @@ Example with only ``query_expressions``, and all other parameters are set defaul os> select id, title, author from books where query('title:Pooh House'); fetched rows / total rows = 2/2 - +------+--------------------------+----------------------+ - | id | title | author | - |------+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - | 2 | Winnie-the-Pooh | Alan Alexander Milne | - +------+--------------------------+----------------------+ + +----+--------------------------+----------------------+ + | id | title | author | + |----+--------------------------+----------------------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | + | 2 | Winnie-the-Pooh | Alan Alexander Milne | + +----+--------------------------+----------------------+ Another example to show how to set custom values for the optional parameters:: os> select id, title, author from books where query('title:Pooh House', default_operator='AND'); fetched rows / total rows = 1/1 - +------+--------------------------+----------------------+ - | id | title | author | - |------+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - +------+--------------------------+----------------------+ + +----+--------------------------+----------------------+ + | id | title | author | + |----+--------------------------+----------------------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | + +----+--------------------------+----------------------+ SCORE @@ -4323,20 +4323,20 @@ Example boosting score:: os> select id, title, author, _score from books where score(query('title:Pooh House', default_operator='AND'), 2.0); fetched rows / total rows = 1/1 - +------+--------------------------+----------------------+-----------+ - | id | title | author | _score | - |------+--------------------------+----------------------+-----------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | 1.5884793 | - +------+--------------------------+----------------------+-----------+ + +----+--------------------------+----------------------+-----------+ + | id | title | author | _score | + |----+--------------------------+----------------------+-----------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | 1.5884793 | + +----+--------------------------+----------------------+-----------+ os> select id, title, author, _score from books where score(query('title:Pooh House', default_operator='AND'), 5.0) OR score(query('title:Winnie', default_operator='AND'), 1.5); fetched rows / total rows = 2/2 - +------+--------------------------+----------------------+-----------+ - | id | title | author | _score | - |------+--------------------------+----------------------+-----------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | 3.9711983 | - | 2 | Winnie-the-Pooh | Alan Alexander Milne | 1.1581701 | - +------+--------------------------+----------------------+-----------+ + +----+--------------------------+----------------------+-----------+ + | id | title | author | _score | + |----+--------------------------+----------------------+-----------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | 3.9711983 | + | 2 | Winnie-the-Pooh | Alan Alexander Milne | 1.1581701 | + +----+--------------------------+----------------------+-----------+ HIGHLIGHT @@ -4435,45 +4435,45 @@ Example with ``field`` and ``path`` parameters:: os> SELECT nested(message.info, message) FROM nested; fetched rows / total rows = 2/2 - +---------------------------------+ - | nested(message.info, message) | - |---------------------------------| - | a | - | b | - +---------------------------------+ + +-------------------------------+ + | nested(message.info, message) | + |-------------------------------| + | a | + | b | + +-------------------------------+ Example with ``field.*`` used in SELECT clause:: os> SELECT nested(message.*) FROM nested; fetched rows / total rows = 2/2 - +--------------------------+-----------------------------+------------------------+ - | nested(message.author) | nested(message.dayOfWeek) | nested(message.info) | - |--------------------------+-----------------------------+------------------------| - | e | 1 | a | - | f | 2 | b | - +--------------------------+-----------------------------+------------------------+ + +------------------------+---------------------------+----------------------+ + | nested(message.author) | nested(message.dayOfWeek) | nested(message.info) | + |------------------------+---------------------------+----------------------| + | e | 1 | a | + | f | 2 | b | + +------------------------+---------------------------+----------------------+ Example with ``field`` and ``path`` parameters in the SELECT and WHERE clause:: os> SELECT nested(message.info, message) FROM nested WHERE nested(message.info, message) = 'b'; fetched rows / total rows = 1/1 - +---------------------------------+ - | nested(message.info, message) | - |---------------------------------| - | b | - +---------------------------------+ + +-------------------------------+ + | nested(message.info, message) | + |-------------------------------| + | b | + +-------------------------------+ Example with ``field`` and ``path`` parameters in the SELECT and ORDER BY clause:: os> SELECT nested(message.info, message) FROM nested ORDER BY nested(message.info, message) DESC; fetched rows / total rows = 2/2 - +---------------------------------+ - | nested(message.info, message) | - |---------------------------------| - | b | - | a | - +---------------------------------+ + +-------------------------------+ + | nested(message.info, message) | + |-------------------------------| + | b | + | a | + +-------------------------------+ System Functions @@ -4495,9 +4495,9 @@ Example:: os> select typeof(DATE('2008-04-14')) as `typeof(date)`, typeof(1) as `typeof(int)`, typeof(now()) as `typeof(now())`, typeof(accounts) as `typeof(column)` from people fetched rows / total rows = 1/1 - +----------------+---------------+-----------------+------------------+ - | typeof(date) | typeof(int) | typeof(now()) | typeof(column) | - |----------------+---------------+-----------------+------------------| - | DATE | INTEGER | TIMESTAMP | OBJECT | - +----------------+---------------+-----------------+------------------+ + +--------------+-------------+---------------+----------------+ + | typeof(date) | typeof(int) | typeof(now()) | typeof(column) | + |--------------+-------------+---------------+----------------| + | DATE | INTEGER | TIMESTAMP | OBJECT | + +--------------+-------------+---------------+----------------+ diff --git a/docs/user/dql/metadata.rst b/docs/user/dql/metadata.rst index a02bcf096a..aba4eb0c75 100644 --- a/docs/user/dql/metadata.rst +++ b/docs/user/dql/metadata.rst @@ -35,20 +35,21 @@ Example 1: Show All Indices Information SQL query:: os> SHOW TABLES LIKE '%' - fetched rows / total rows = 9/9 - +----------------+---------------+-----------------+--------------+-----------+------------+--------------+-------------+-----------------------------+------------------+ - | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION | - |----------------+---------------+-----------------+--------------+-----------+------------+--------------+-------------+-----------------------------+------------------| - | docTestCluster | null | .ql-datasources | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | account2 | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | accounts | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | apache | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | books | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | nested | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | nyc_taxi | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | people | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | wildcard | BASE TABLE | null | null | null | null | null | null | - +----------------+---------------+-----------------+--------------+-----------+------------+--------------+-------------+-----------------------------+------------------+ + fetched rows / total rows = 10/10 + +----------------+-------------+-----------------+------------+---------+----------+------------+-----------+---------------------------+----------------+ + | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION | + |----------------+-------------+-----------------+------------+---------+----------+------------+-----------+---------------------------+----------------| + | docTestCluster | null | .ql-datasources | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | account2 | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | accounts | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | apache | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | books | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | nested | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | nyc_taxi | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | people | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | weblogs | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | wildcard | BASE TABLE | null | null | null | null | null | null | + +----------------+-------------+-----------------+------------+---------+----------+------------+-----------+---------------------------+----------------+ Example 2: Show Specific Index Information ------------------------------------------ @@ -59,12 +60,12 @@ SQL query:: os> SHOW TABLES LIKE "acc%" fetched rows / total rows = 2/2 - +----------------+---------------+--------------+--------------+-----------+------------+--------------+-------------+-----------------------------+------------------+ - | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION | - |----------------+---------------+--------------+--------------+-----------+------------+--------------+-------------+-----------------------------+------------------| - | docTestCluster | null | account2 | BASE TABLE | null | null | null | null | null | null | - | docTestCluster | null | accounts | BASE TABLE | null | null | null | null | null | null | - +----------------+---------------+--------------+--------------+-----------+------------+--------------+-------------+-----------------------------+------------------+ + +----------------+-------------+------------+------------+---------+----------+------------+-----------+---------------------------+----------------+ + | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION | + |----------------+-------------+------------+------------+---------+----------+------------+-----------+---------------------------+----------------| + | docTestCluster | null | account2 | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | accounts | BASE TABLE | null | null | null | null | null | null | + +----------------+-------------+------------+------------+---------+----------+------------+-----------+---------------------------+----------------+ Example 3: Describe Index Fields Information -------------------------------------------- @@ -75,21 +76,21 @@ SQL query:: os> DESCRIBE TABLES LIKE 'accounts' fetched rows / total rows = 11/11 - +----------------+---------------+--------------+----------------+-------------+-------------+---------------+-----------------+------------------+------------------+------------+-----------+--------------+-----------------+--------------------+---------------------+--------------------+---------------+-----------------+----------------+---------------+--------------------+--------------------+----------------------+ - | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | COLUMN_NAME | DATA_TYPE | TYPE_NAME | COLUMN_SIZE | BUFFER_LENGTH | DECIMAL_DIGITS | NUM_PREC_RADIX | NULLABLE | REMARKS | COLUMN_DEF | SQL_DATA_TYPE | SQL_DATETIME_SUB | CHAR_OCTET_LENGTH | ORDINAL_POSITION | IS_NULLABLE | SCOPE_CATALOG | SCOPE_SCHEMA | SCOPE_TABLE | SOURCE_DATA_TYPE | IS_AUTOINCREMENT | IS_GENERATEDCOLUMN | - |----------------+---------------+--------------+----------------+-------------+-------------+---------------+-----------------+------------------+------------------+------------+-----------+--------------+-----------------+--------------------+---------------------+--------------------+---------------+-----------------+----------------+---------------+--------------------+--------------------+----------------------| - | docTestCluster | null | accounts | account_number | null | long | null | null | null | 10 | 2 | null | null | null | null | null | 0 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | firstname | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 1 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | address | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 2 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | balance | null | long | null | null | null | 10 | 2 | null | null | null | null | null | 3 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | gender | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 4 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | city | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 5 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | employer | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 6 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | state | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 7 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | age | null | long | null | null | null | 10 | 2 | null | null | null | null | null | 8 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | email | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 9 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | lastname | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 10 | | null | null | null | null | NO | | - +----------------+---------------+--------------+----------------+-------------+-------------+---------------+-----------------+------------------+------------------+------------+-----------+--------------+-----------------+--------------------+---------------------+--------------------+---------------+-----------------+----------------+---------------+--------------------+--------------------+----------------------+ + +----------------+-------------+------------+----------------+-----------+-----------+-------------+---------------+----------------+----------------+----------+---------+------------+---------------+------------------+-------------------+------------------+-------------+---------------+--------------+-------------+------------------+------------------+--------------------+ + | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | COLUMN_NAME | DATA_TYPE | TYPE_NAME | COLUMN_SIZE | BUFFER_LENGTH | DECIMAL_DIGITS | NUM_PREC_RADIX | NULLABLE | REMARKS | COLUMN_DEF | SQL_DATA_TYPE | SQL_DATETIME_SUB | CHAR_OCTET_LENGTH | ORDINAL_POSITION | IS_NULLABLE | SCOPE_CATALOG | SCOPE_SCHEMA | SCOPE_TABLE | SOURCE_DATA_TYPE | IS_AUTOINCREMENT | IS_GENERATEDCOLUMN | + |----------------+-------------+------------+----------------+-----------+-----------+-------------+---------------+----------------+----------------+----------+---------+------------+---------------+------------------+-------------------+------------------+-------------+---------------+--------------+-------------+------------------+------------------+--------------------| + | docTestCluster | null | accounts | account_number | null | long | null | null | null | 10 | 2 | null | null | null | null | null | 0 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | firstname | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 1 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | address | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 2 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | balance | null | long | null | null | null | 10 | 2 | null | null | null | null | null | 3 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | gender | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 4 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | city | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 5 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | employer | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 6 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | state | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 7 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | age | null | long | null | null | null | 10 | 2 | null | null | null | null | null | 8 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | email | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 9 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | lastname | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 10 | | null | null | null | null | NO | | + +----------------+-------------+------------+----------------+-----------+-----------+-------------+---------------+----------------+----------------+----------+---------+------------+---------------+------------------+-------------------+------------------+-------------+---------------+--------------+-------------+------------------+------------------+--------------------+ Example 4: Describe Index With Fields Filter -------------------------------------------- @@ -100,9 +101,9 @@ SQL query:: os> DESCRIBE TABLES LIKE "accounts" COLUMNS LIKE "%name" fetched rows / total rows = 2/2 - +----------------+---------------+--------------+---------------+-------------+-------------+---------------+-----------------+------------------+------------------+------------+-----------+--------------+-----------------+--------------------+---------------------+--------------------+---------------+-----------------+----------------+---------------+--------------------+--------------------+----------------------+ - | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | COLUMN_NAME | DATA_TYPE | TYPE_NAME | COLUMN_SIZE | BUFFER_LENGTH | DECIMAL_DIGITS | NUM_PREC_RADIX | NULLABLE | REMARKS | COLUMN_DEF | SQL_DATA_TYPE | SQL_DATETIME_SUB | CHAR_OCTET_LENGTH | ORDINAL_POSITION | IS_NULLABLE | SCOPE_CATALOG | SCOPE_SCHEMA | SCOPE_TABLE | SOURCE_DATA_TYPE | IS_AUTOINCREMENT | IS_GENERATEDCOLUMN | - |----------------+---------------+--------------+---------------+-------------+-------------+---------------+-----------------+------------------+------------------+------------+-----------+--------------+-----------------+--------------------+---------------------+--------------------+---------------+-----------------+----------------+---------------+--------------------+--------------------+----------------------| - | docTestCluster | null | accounts | firstname | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 1 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | lastname | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 10 | | null | null | null | null | NO | | - +----------------+---------------+--------------+---------------+-------------+-------------+---------------+-----------------+------------------+------------------+------------+-----------+--------------+-----------------+--------------------+---------------------+--------------------+---------------+-----------------+----------------+---------------+--------------------+--------------------+----------------------+ + +----------------+-------------+------------+-------------+-----------+-----------+-------------+---------------+----------------+----------------+----------+---------+------------+---------------+------------------+-------------------+------------------+-------------+---------------+--------------+-------------+------------------+------------------+--------------------+ + | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | COLUMN_NAME | DATA_TYPE | TYPE_NAME | COLUMN_SIZE | BUFFER_LENGTH | DECIMAL_DIGITS | NUM_PREC_RADIX | NULLABLE | REMARKS | COLUMN_DEF | SQL_DATA_TYPE | SQL_DATETIME_SUB | CHAR_OCTET_LENGTH | ORDINAL_POSITION | IS_NULLABLE | SCOPE_CATALOG | SCOPE_SCHEMA | SCOPE_TABLE | SOURCE_DATA_TYPE | IS_AUTOINCREMENT | IS_GENERATEDCOLUMN | + |----------------+-------------+------------+-------------+-----------+-----------+-------------+---------------+----------------+----------------+----------+---------+------------+---------------+------------------+-------------------+------------------+-------------+---------------+--------------+-------------+------------------+------------------+--------------------| + | docTestCluster | null | accounts | firstname | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 1 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | lastname | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 10 | | null | null | null | null | NO | | + +----------------+-------------+------------+-------------+-----------+-----------+-------------+---------------+----------------+----------------+----------+---------+------------+---------------+------------------+-------------------+------------------+-------------+---------------+--------------+-------------+------------------+------------------+--------------------+ diff --git a/docs/user/dql/window.rst b/docs/user/dql/window.rst index feb2aaa44e..f0c53da055 100644 --- a/docs/user/dql/window.rst +++ b/docs/user/dql/window.rst @@ -53,14 +53,14 @@ Here is an example for ``COUNT`` function:: ... ) AS cnt ... FROM accounts; fetched rows / total rows = 4/4 - +----------+-----------+-------+ - | gender | balance | cnt | - |----------+-----------+-------| - | F | 32838 | 1 | - | M | 4180 | 1 | - | M | 5686 | 2 | - | M | 39225 | 3 | - +----------+-----------+-------+ + +--------+---------+-----+ + | gender | balance | cnt | + |--------+---------+-----| + | F | 32838 | 1 | + | M | 4180 | 1 | + | M | 5686 | 2 | + | M | 39225 | 3 | + +--------+---------+-----+ MIN --- @@ -74,14 +74,14 @@ Here is an example for ``MIN`` function:: ... ) AS cnt ... FROM accounts; fetched rows / total rows = 4/4 - +----------+-----------+-------+ - | gender | balance | cnt | - |----------+-----------+-------| - | F | 32838 | 32838 | - | M | 4180 | 4180 | - | M | 5686 | 4180 | - | M | 39225 | 4180 | - +----------+-----------+-------+ + +--------+---------+-------+ + | gender | balance | cnt | + |--------+---------+-------| + | F | 32838 | 32838 | + | M | 4180 | 4180 | + | M | 5686 | 4180 | + | M | 39225 | 4180 | + +--------+---------+-------+ MAX --- @@ -95,14 +95,14 @@ Here is an example for ``MAX`` function:: ... ) AS cnt ... FROM accounts; fetched rows / total rows = 4/4 - +----------+-----------+-------+ - | gender | balance | cnt | - |----------+-----------+-------| - | F | 32838 | 32838 | - | M | 4180 | 4180 | - | M | 5686 | 5686 | - | M | 39225 | 39225 | - +----------+-----------+-------+ + +--------+---------+-------+ + | gender | balance | cnt | + |--------+---------+-------| + | F | 32838 | 32838 | + | M | 4180 | 4180 | + | M | 5686 | 5686 | + | M | 39225 | 39225 | + +--------+---------+-------+ AVG --- @@ -116,14 +116,14 @@ Here is an example for ``AVG`` function:: ... ) AS cnt ... FROM accounts; fetched rows / total rows = 4/4 - +----------+-----------+--------------------+ - | gender | balance | cnt | - |----------+-----------+--------------------| - | F | 32838 | 32838.0 | - | M | 4180 | 4180.0 | - | M | 5686 | 4933.0 | - | M | 39225 | 16363.666666666666 | - +----------+-----------+--------------------+ + +--------+---------+--------------------+ + | gender | balance | cnt | + |--------+---------+--------------------| + | F | 32838 | 32838.0 | + | M | 4180 | 4180.0 | + | M | 5686 | 4933.0 | + | M | 39225 | 16363.666666666666 | + +--------+---------+--------------------+ SUM --- @@ -137,14 +137,14 @@ Here is an example for ``SUM`` function:: ... ) AS cnt ... FROM accounts; fetched rows / total rows = 4/4 - +----------+-----------+-------+ - | gender | balance | cnt | - |----------+-----------+-------| - | F | 32838 | 32838 | - | M | 4180 | 4180 | - | M | 5686 | 9866 | - | M | 39225 | 49091 | - +----------+-----------+-------+ + +--------+---------+-------+ + | gender | balance | cnt | + |--------+---------+-------| + | F | 32838 | 32838 | + | M | 4180 | 4180 | + | M | 5686 | 9866 | + | M | 39225 | 49091 | + +--------+---------+-------+ STDDEV_POP ---------- @@ -158,14 +158,14 @@ Here is an example for ``STDDEV_POP`` function:: ... ) AS val ... FROM accounts; fetched rows / total rows = 4/4 - +----------+-----------+--------------------+ - | gender | balance | val | - |----------+-----------+--------------------| - | F | 32838 | 0.0 | - | M | 4180 | 0.0 | - | M | 5686 | 753.0 | - | M | 39225 | 16177.091422406222 | - +----------+-----------+--------------------+ + +--------+---------+--------------------+ + | gender | balance | val | + |--------+---------+--------------------| + | F | 32838 | 0.0 | + | M | 4180 | 0.0 | + | M | 5686 | 753.0 | + | M | 39225 | 16177.091422406222 | + +--------+---------+--------------------+ STDDEV_SAMP ----------- @@ -179,14 +179,14 @@ Here is an example for ``STDDEV_SAMP`` function:: ... ) AS val ... FROM accounts; fetched rows / total rows = 4/4 - +----------+-----------+--------------------+ - | gender | balance | val | - |----------+-----------+--------------------| - | F | 32838 | 0.0 | - | M | 4180 | 0.0 | - | M | 5686 | 1064.9028124669405 | - | M | 39225 | 19812.809753624886 | - +----------+-----------+--------------------+ + +--------+---------+--------------------+ + | gender | balance | val | + |--------+---------+--------------------| + | F | 32838 | 0.0 | + | M | 4180 | 0.0 | + | M | 5686 | 1064.9028124669405 | + | M | 39225 | 19812.809753624886 | + +--------+---------+--------------------+ VAR_POP ------- @@ -200,14 +200,14 @@ Here is an example for ``SUM`` function:: ... ) AS val ... FROM accounts; fetched rows / total rows = 4/4 - +----------+-----------+--------------------+ - | gender | balance | val | - |----------+-----------+--------------------| - | F | 32838 | 0.0 | - | M | 4180 | 0.0 | - | M | 5686 | 567009.0 | - | M | 39225 | 261698286.88888893 | - +----------+-----------+--------------------+ + +--------+---------+--------------------+ + | gender | balance | val | + |--------+---------+--------------------| + | F | 32838 | 0.0 | + | M | 4180 | 0.0 | + | M | 5686 | 567009.0 | + | M | 39225 | 261698286.88888893 | + +--------+---------+--------------------+ VAR_SAMP -------- @@ -221,14 +221,14 @@ Here is an example for ``SUM`` function:: ... ) AS val ... FROM accounts; fetched rows / total rows = 4/4 - +----------+-----------+-------------------+ - | gender | balance | val | - |----------+-----------+-------------------| - | F | 32838 | 0.0 | - | M | 4180 | 0.0 | - | M | 5686 | 1134018.0 | - | M | 39225 | 392547430.3333334 | - +----------+-----------+-------------------+ + +--------+---------+-------------------+ + | gender | balance | val | + |--------+---------+-------------------| + | F | 32838 | 0.0 | + | M | 4180 | 0.0 | + | M | 5686 | 1134018.0 | + | M | 39225 | 392547430.3333334 | + +--------+---------+-------------------+ Ranking Functions @@ -248,14 +248,14 @@ ROW_NUMBER os> SELECT gender, balance, ROW_NUMBER() OVER(PARTITION BY gender ORDER BY balance) AS num FROM accounts; fetched rows / total rows = 4/4 - +----------+-----------+-------+ - | gender | balance | num | - |----------+-----------+-------| - | F | 32838 | 1 | - | M | 4180 | 1 | - | M | 5686 | 2 | - | M | 39225 | 3 | - +----------+-----------+-------+ + +--------+---------+-----+ + | gender | balance | num | + |--------+---------+-----| + | F | 32838 | 1 | + | M | 4180 | 1 | + | M | 5686 | 2 | + | M | 39225 | 3 | + +--------+---------+-----+ Similarly as regular ``ORDER BY`` clause, you can specify null ordering by ``NULLS FIRST`` or ``NULLS LAST`` which has exactly same behavior:: @@ -267,14 +267,14 @@ Similarly as regular ``ORDER BY`` clause, you can specify null ordering by ``NUL ... FROM accounts ... ORDER BY employer NULLS LAST; fetched rows / total rows = 4/4 - +------------+-------+ - | employer | num | - |------------+-------| - | Netagy | 1 | - | Pyrami | 2 | - | Quility | 3 | - | null | 4 | - +------------+-------+ + +----------+-----+ + | employer | num | + |----------+-----| + | Netagy | 1 | + | Pyrami | 2 | + | Quility | 3 | + | null | 4 | + +----------+-----+ RANK ---- @@ -283,14 +283,14 @@ RANK os> SELECT gender, RANK() OVER(ORDER BY gender DESC) AS rnk FROM accounts; fetched rows / total rows = 4/4 - +----------+-------+ - | gender | rnk | - |----------+-------| - | M | 1 | - | M | 1 | - | M | 1 | - | F | 4 | - +----------+-------+ + +--------+-----+ + | gender | rnk | + |--------+-----| + | M | 1 | + | M | 1 | + | M | 1 | + | F | 4 | + +--------+-----+ DENSE_RANK @@ -300,12 +300,12 @@ Similarly as ``RANK``, ``DENSE_RANK`` function also assigns a rank to each row. os> SELECT gender, DENSE_RANK() OVER(ORDER BY gender DESC) AS rnk FROM accounts; fetched rows / total rows = 4/4 - +----------+-------+ - | gender | rnk | - |----------+-------| - | M | 1 | - | M | 1 | - | M | 1 | - | F | 2 | - +----------+-------+ + +--------+-----+ + | gender | rnk | + |--------+-----| + | M | 1 | + | M | 1 | + | M | 1 | + | F | 2 | + +--------+-----+ diff --git a/docs/user/general/comments.rst b/docs/user/general/comments.rst index ab959da342..536843695e 100644 --- a/docs/user/general/comments.rst +++ b/docs/user/general/comments.rst @@ -26,11 +26,11 @@ A single-line comment starts with either ``#`` or ``--``. All characters in the ... -- comments ... 123; -- comments fetched rows / total rows = 1/1 - +-------+ - | 123 | - |-------| - | 123 | - +-------+ + +-----+ + | 123 | + |-----| + | 123 | + +-----+ Note that double-dash style requires at least one whitespace followed. @@ -48,10 +48,10 @@ A block comment is enclosed within ``/*`` and ``*/`` across one or multiple line ... /* comments */ ... 123; fetched rows / total rows = 1/1 - +-------+ - | 123 | - |-------| - | 123 | - +-------+ + +-----+ + | 123 | + |-----| + | 123 | + +-----+ Additionally, ``/*! ... */`` is supported though ignored for now. This may be used to support optimization hints in future. diff --git a/docs/user/general/datatypes.rst b/docs/user/general/datatypes.rst index c423bd7b10..3e115b249e 100644 --- a/docs/user/general/datatypes.rst +++ b/docs/user/general/datatypes.rst @@ -115,7 +115,7 @@ A data type can be converted to another, implicitly or explicitly or impossibly, The general rules and design tenets for data type conversion include: -1. Implicit conversion is defined by type precedence which is represented by the type hierarchy tree. See `Data Type Conversion in SQL/PPL `_ for more details. +1. Implicit conversion is defined by type precedence which is represented by the type hierarchy tree. See `Data Type Conversion in SQL/PPL `_ for more details. 2. Explicit conversion defines the complete set of conversion allowed. If no explicit conversion defined, implicit conversion should be impossible too. 3. On the other hand, if implicit conversion can occur between 2 types, then explicit conversion should be allowed too. 4. Conversion within a data type family is considered as conversion between different data representation and should be supported as much as possible. @@ -188,11 +188,11 @@ Here are a few examples for implicit type conversion:: ... 'True' = true, ... DATE('2021-06-10') < '2021-06-11'; fetched rows / total rows = 1/1 - +-----------+-----------------+-------------------------------------+ - | 1 = 1.0 | 'True' = true | DATE('2021-06-10') < '2021-06-11' | - |-----------+-----------------+-------------------------------------| - | True | True | True | - +-----------+-----------------+-------------------------------------+ + +---------+---------------+-----------------------------------+ + | 1 = 1.0 | 'True' = true | DATE('2021-06-10') < '2021-06-11' | + |---------+---------------+-----------------------------------| + | True | True | True | + +---------+---------------+-----------------------------------+ Here are a few examples for explicit type conversion:: @@ -201,11 +201,11 @@ Here are a few examples for explicit type conversion:: ... CAST(1.2 AS STRING), ... CAST('2021-06-10 00:00:00' AS TIMESTAMP); fetched rows / total rows = 1/1 - +---------------------+-----------------------+--------------------------------------------+ - | CAST(true AS INT) | CAST(1.2 AS STRING) | CAST('2021-06-10 00:00:00' AS TIMESTAMP) | - |---------------------+-----------------------+--------------------------------------------| - | 1 | 1.2 | 2021-06-10 00:00:00 | - +---------------------+-----------------------+--------------------------------------------+ + +-------------------+---------------------+------------------------------------------+ + | CAST(true AS INT) | CAST(1.2 AS STRING) | CAST('2021-06-10 00:00:00' AS TIMESTAMP) | + |-------------------+---------------------+------------------------------------------| + | 1 | 1.2 | 2021-06-10 00:00:00 | + +-------------------+---------------------+------------------------------------------+ Undefined Data Type =================== @@ -216,11 +216,11 @@ Here are examples for NULL literal and expressions with NULL literal involved:: os> SELECT NULL, NULL = NULL, 1 + NULL, LENGTH(NULL); fetched rows / total rows = 1/1 - +--------+---------------+------------+----------------+ - | NULL | NULL = NULL | 1 + NULL | LENGTH(NULL) | - |--------+---------------+------------+----------------| - | null | null | null | null | - +--------+---------------+------------+----------------+ + +------+-------------+----------+--------------+ + | NULL | NULL = NULL | 1 + NULL | LENGTH(NULL) | + |------+-------------+----------+--------------| + | null | null | null | null | + +------+-------------+----------+--------------+ Numeric Data Types @@ -318,11 +318,11 @@ A string can also represent and be converted to date and time types (except to i ... '2021-06-18' < DATE('2021-06-17'), ... '10:20:00' <= TIME('11:00:00'); fetched rows / total rows = 1/1 - +------------------------------------------------------------+-------------------------------------+----------------------------------+ - | TIMESTAMP('2021-06-17 00:00:00') = '2021-06-17 00:00:00' | '2021-06-18' < DATE('2021-06-17') | '10:20:00' <= TIME('11:00:00') | - |------------------------------------------------------------+-------------------------------------+----------------------------------| - | True | False | True | - +------------------------------------------------------------+-------------------------------------+----------------------------------+ + +----------------------------------------------------------+-----------------------------------+--------------------------------+ + | TIMESTAMP('2021-06-17 00:00:00') = '2021-06-17 00:00:00' | '2021-06-18' < DATE('2021-06-17') | '10:20:00' <= TIME('11:00:00') | + |----------------------------------------------------------+-----------------------------------+--------------------------------| + | True | False | True | + +----------------------------------------------------------+-----------------------------------+--------------------------------+ Please, see `more examples here <../dql/expressions.rst#toc-entry-15>`_. @@ -400,6 +400,48 @@ Querying such index will provide a response with ``schema`` block as shown below "status": 200 } +If the sql query contains an `IndexDateField` and a literal value with an operator (such as a term query or a range query), then the literal value can be in the `IndexDateField` format. + +.. code-block:: json + + { + "mappings" : { + "properties" : { + "release_date" : { + "type" : "date", + "format": "dd-MMM-yy" + } + } + } + } + +Querying such an `IndexDateField` (``release_date``) will provide a response with ``schema`` and ``datarows`` blocks as shown below. + +.. code-block:: json + + { + "query" : "SELECT release_date FROM test_index WHERE release_date = \"03-Jan-21\"" + } + +.. code-block:: json + + { + "schema": [ + { + "name": "release_date", + "type": "date" + } + ], + "datarows": [ + [ + "2021-01-03" + ] + ], + "total": 1, + "size": 1, + "status": 200 + } + String Data Types ================= @@ -407,11 +449,11 @@ A string is a sequence of characters enclosed in either single or double quotes. os> SELECT 'hello', "world", '"hello"', "'world'", '''hello''', """world""", 'I\'m', 'I''m', "I\"m" fetched rows / total rows = 1/1 - +-----------+-----------+-------------+-------------+---------------+---------------+----------+----------+----------+ - | 'hello' | "world" | '"hello"' | "'world'" | '''hello''' | """world""" | 'I\'m' | 'I''m' | "I\"m" | - |-----------+-----------+-------------+-------------+---------------+---------------+----------+----------+----------| - | hello | world | "hello" | 'world' | 'hello' | "world" | I'm | I'm | I"m | - +-----------+-----------+-------------+-------------+---------------+---------------+----------+----------+----------+ + +---------+---------+-----------+-----------+-------------+-------------+--------+--------+--------+ + | 'hello' | "world" | '"hello"' | "'world'" | '''hello''' | """world""" | 'I\'m' | 'I''m' | "I\"m" | + |---------+---------+-----------+-----------+-------------+-------------+--------+--------+--------| + | hello | world | "hello" | 'world' | 'hello' | "world" | I'm | I'm | I"m | + +---------+---------+-----------+-----------+-------------+-------------+--------+--------+--------+ Boolean Data Types ================== @@ -422,8 +464,8 @@ A boolean can be represented by constant value ``TRUE`` or ``FALSE``. Besides, c ... true, FALSE, ... CAST('TRUE' AS boolean), CAST('false' AS boolean); fetched rows / total rows = 1/1 - +--------+---------+---------------------------+----------------------------+ - | true | FALSE | CAST('TRUE' AS boolean) | CAST('false' AS boolean) | - |--------+---------+---------------------------+----------------------------| - | True | False | True | False | - +--------+---------+---------------------------+----------------------------+ + +------+-------+-------------------------+--------------------------+ + | true | FALSE | CAST('TRUE' AS boolean) | CAST('false' AS boolean) | + |------+-------+-------------------------+--------------------------| + | True | False | True | False | + +------+-------+-------------------------+--------------------------+ diff --git a/docs/user/general/identifiers.rst b/docs/user/general/identifiers.rst index fad2fa4b23..033525f99f 100644 --- a/docs/user/general/identifiers.rst +++ b/docs/user/general/identifiers.rst @@ -40,14 +40,14 @@ Here are examples for using index pattern directly without quotes:: os> SELECT * FROM *cc*nts; fetched rows / total rows = 4/4 - +------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------+ - | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | - |------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------| - | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | - | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | - | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | - | 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | - +------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------+ + +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ + | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | + |----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------| + | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | + | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | + | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | + | 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | + +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ Delimited Identifiers @@ -76,14 +76,14 @@ Here are examples for quoting an index name by back ticks:: os> SELECT * FROM `accounts`; fetched rows / total rows = 4/4 - +------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------+ - | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | - |------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------| - | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | - | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | - | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | - | 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | - +------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------+ + +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ + | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | + |----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------| + | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | + | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | + | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | + | 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | + +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ Case Sensitivity @@ -121,23 +121,23 @@ The first example is to show a column name qualified by full table name original os> SELECT city, accounts.age, ABS(accounts.balance) FROM accounts WHERE accounts.age < 30; fetched rows / total rows = 1/1 - +--------+-------+-------------------------+ - | city | age | ABS(accounts.balance) | - |--------+-------+-------------------------| - | Nogal | 28 | 32838 | - +--------+-------+-------------------------+ + +-------+-----+-----------------------+ + | city | age | ABS(accounts.balance) | + |-------+-----+-----------------------| + | Nogal | 28 | 32838 | + +-------+-----+-----------------------+ The second example is to show a field name qualified by index alias specified. Similarly, the alias qualifier is optional in this case:: os> SELECT city, acc.age, ABS(acc.balance) FROM accounts AS acc WHERE acc.age > 30; fetched rows / total rows = 3/3 - +--------+-------+--------------------+ - | city | age | ABS(acc.balance) | - |--------+-------+--------------------| - | Brogan | 32 | 39225 | - | Dante | 36 | 5686 | - | Orick | 33 | 4180 | - +--------+-------+--------------------+ + +--------+-----+------------------+ + | city | age | ABS(acc.balance) | + |--------+-----+------------------| + | Brogan | 32 | 39225 | + | Dante | 36 | 5686 | + | Orick | 33 | 4180 | + +--------+-----+------------------+ Note that in both examples above, the qualifier is removed in response. This happens only when identifiers selected is a simple field name. In other cases, expressions rather than an atom field, the column name in response is exactly the same as the text in ``SELECT``clause. @@ -160,22 +160,22 @@ Query wildcard indices:: os> SELECT count(*) as cnt FROM acc*; fetched rows / total rows = 1/1 - +-------+ - | cnt | - |-------| - | 5 | - +-------+ + +-----+ + | cnt | + |-----| + | 5 | + +-----+ Query delimited multiple indices seperated by ``,``:: os> SELECT count(*) as cnt FROM `accounts,account2`; fetched rows / total rows = 1/1 - +-------+ - | cnt | - |-------| - | 5 | - +-------+ + +-----+ + | cnt | + |-----| + | 5 | + +-----+ @@ -241,7 +241,7 @@ tableName = ``logs.12.13.1``. 3. ``my_prometheus.http_requests_total`` -datasourceName = ```my_prometheus`` [Is in the list of datasources configured]. +datasourceName = ``my_prometheus`` [Is in the list of datasources configured]. schemaName = ``default`` [No supported schema found, so default to `default`]. diff --git a/docs/user/general/values.rst b/docs/user/general/values.rst index 178609f175..f675b42e75 100644 --- a/docs/user/general/values.rst +++ b/docs/user/general/values.rst @@ -19,14 +19,14 @@ Here is an example, Nanette doesn't have email field and Dail has employer filed os> SELECT firstname, employer, email FROM accounts; fetched rows / total rows = 4/4 - +-------------+------------+-----------------------+ - | firstname | employer | email | - |-------------+------------+-----------------------| - | Amber | Pyrami | amberduke@pyrami.com | - | Hattie | Netagy | hattiebond@netagy.com | - | Nanette | Quility | null | - | Dale | null | daleadams@boink.com | - +-------------+------------+-----------------------+ + +-----------+----------+-----------------------+ + | firstname | employer | email | + |-----------+----------+-----------------------| + | Amber | Pyrami | amberduke@pyrami.com | + | Hattie | Netagy | hattiebond@netagy.com | + | Nanette | Quility | null | + | Dale | null | daleadams@boink.com | + +-----------+----------+-----------------------+ General NULL and MISSING Values Handling @@ -37,14 +37,14 @@ Here is an example:: os> SELECT firstname, employer LIKE 'Quility', email LIKE '%com' FROM accounts; fetched rows / total rows = 4/4 - +-------------+---------------------------+---------------------+ - | firstname | employer LIKE 'Quility' | email LIKE '%com' | - |-------------+---------------------------+---------------------| - | Amber | False | True | - | Hattie | False | True | - | Nanette | True | null | - | Dale | null | True | - +-------------+---------------------------+---------------------+ + +-----------+-------------------------+-------------------+ + | firstname | employer LIKE 'Quility' | email LIKE '%com' | + |-----------+-------------------------+-------------------| + | Amber | False | True | + | Hattie | False | True | + | Nanette | True | null | + | Dale | null | True | + +-----------+-------------------------+-------------------+ Special NULL and MISSING Values Handling ---------------------------------------- diff --git a/docs/user/interfaces/asyncqueryinterface.rst b/docs/user/interfaces/asyncqueryinterface.rst index af49a59838..9b889f7f97 100644 --- a/docs/user/interfaces/asyncqueryinterface.rst +++ b/docs/user/interfaces/asyncqueryinterface.rst @@ -68,6 +68,8 @@ Async Query Creation API ====================================== If security plugin is enabled, this API can only be invoked by users with permission ``cluster:admin/opensearch/ql/async_query/create``. +Limitation: Spark SQL queries that create User-Defined Functions (UDFs) are not allowed. + HTTP URI: ``_plugins/_async_query`` HTTP VERB: ``POST`` diff --git a/docs/user/limitations/limitations.rst b/docs/user/limitations/limitations.rst index 8ce75a0e25..22ad3c2a17 100644 --- a/docs/user/limitations/limitations.rst +++ b/docs/user/limitations/limitations.rst @@ -101,3 +101,32 @@ The response in JDBC format with cursor id:: } The query with `aggregation` and `join` does not support pagination for now. + +Limitations on Using Multi-valued Fields +======================================== + +OpenSearch does not natively support the ARRAY data type but does allow multi-value fields implicitly. The +SQL/PPL plugin adheres strictly to the data type semantics defined in index mappings. When parsing OpenSearch +responses, it expects data to match the declared type and does not account for data in array format. If the +plugins.query.field_type_tolerance setting is enabled, the SQL/PPL plugin will handle array datasets by returning +scalar data types, allowing basic queries (e.g., SELECT * FROM tbl WHERE condition). However, using multi-value +fields in expressions or functions will result in exceptions. If this setting is disabled or absent, only the +first element of an array is returned, preserving the default behavior. + +For example, the following query tries to calculate the absolute value of a field that contains arrays of +longs:: + + POST _plugins/_sql/ + { + "query": "SELECT id, ABS(long_array) FROM multi_value_long" + } +The response in JSON format is:: + + { + "error": { + "reason": "Invalid SQL query", + "details": "invalid to get longValue from value of type ARRAY", + "type": "ExpressionEvaluationException" + }, + "status": 400 + } diff --git a/docs/user/optimization/optimization.rst b/docs/user/optimization/optimization.rst index 8ab998309d..454c9ec066 100644 --- a/docs/user/optimization/optimization.rst +++ b/docs/user/optimization/optimization.rst @@ -44,7 +44,7 @@ The consecutive Filter operator will be merged as one Filter operator:: { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=accounts, sourceBuilder={\"from\":0,\"size\":200,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"range\":{\"age\":{\"from\":null,\"to\":20,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}},{\"range\":{\"age\":{\"from\":10,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, searchDone=false)" + "request": "OpenSearchQueryRequest(indexName=accounts, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"range\":{\"age\":{\"from\":null,\"to\":20,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}},{\"range\":{\"age\":{\"from\":10,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, searchDone=false)" }, "children": [] } @@ -71,7 +71,7 @@ The Filter operator should be push down under Sort operator:: { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=accounts, sourceBuilder={\"from\":0,\"size\":200,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":null,\"to\":20,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, searchDone=false)" + "request": "OpenSearchQueryRequest(indexName=accounts, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":null,\"to\":20,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, searchDone=false)" }, "children": [] } @@ -102,7 +102,7 @@ The Project list will push down to Query DSL to `filter the source `_. +Without sort push down optimization, the sort operator will sort the result from child operator. By default, only 10000 docs will extracted from the source index, `you can change this value by using size_limit setting <../admin/settings.rst#opensearch-query-size-limit>`_. diff --git a/docs/user/ppl/admin/connectors/prometheus_connector.rst b/docs/user/ppl/admin/connectors/prometheus_connector.rst index 1dfe6cda22..812df4f894 100644 --- a/docs/user/ppl/admin/connectors/prometheus_connector.rst +++ b/docs/user/ppl/admin/connectors/prometheus_connector.rst @@ -87,16 +87,16 @@ Sample Example:: > source = my_prometheus.prometheus_http_requests_total; - +------------+------------------------+--------------------------------+---------------+-------------+-------------+ - | @value | @timestamp | handler | code | instance | job | - |------------+------------------------+--------------------------------+---------------+-------------+-------------| - | 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 2 | "2022-11-03 07:18:44" | "/-/ready" | 400 | 192.15.2.1 | prometheus | - | 9 | "2022-11-03 07:18:54" | "/-/promql" | 400 | 192.15.2.1 | prometheus | - | 11 | "2022-11-03 07:18:64" |"/-/metrics" | 500 | 192.15.2.1 | prometheus | - +------------+------------------------+--------------------------------+---------------+-------------+-------------+ + +--------+-----------------------+--------------+------+------------+------------+ + | @value | @timestamp | handler | code | instance | job | + |--------+-----------------------+--------------+------+------------+------------| + | 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | 192.15.1.1 | prometheus | + | 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | 192.15.1.1 | prometheus | + | 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | 192.15.1.1 | prometheus | + | 2 | "2022-11-03 07:18:44" | "/-/ready" | 400 | 192.15.2.1 | prometheus | + | 9 | "2022-11-03 07:18:54" | "/-/promql" | 400 | 192.15.2.1 | prometheus | + | 11 | "2022-11-03 07:18:64" | "/-/metrics" | 500 | 192.15.2.1 | prometheus | + +--------+-----------------------+--------------+------+------------+------------+ @@ -119,30 +119,30 @@ Example queries 1. Metric Selection Query:: > source = my_prometheus.prometheus_http_requests_total - +------------+------------------------+--------------------------------+---------------+-------------+-------------+ - | @value | @timestamp | handler | code | instance | job | - |------------+------------------------+--------------------------------+---------------+-------------+-------------| - | 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 2 | "2022-11-03 07:18:44" | "/-/ready" | 400 | 192.15.2.1 | prometheus | - | 9 | "2022-11-03 07:18:54" | "/-/promql" | 400 | 192.15.2.1 | prometheus | - | 11 | "2022-11-03 07:18:64" |"/-/metrics" | 500 | 192.15.2.1 | prometheus | - +------------+------------------------+--------------------------------+---------------+-------------+-------------+ + +--------+-----------------------+--------------+------+------------+------------+ + | @value | @timestamp | handler | code | instance | job | + |--------+-----------------------+--------------+------+------------+------------| + | 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | 192.15.1.1 | prometheus | + | 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | 192.15.1.1 | prometheus | + | 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | 192.15.1.1 | prometheus | + | 2 | "2022-11-03 07:18:44" | "/-/ready" | 400 | 192.15.2.1 | prometheus | + | 9 | "2022-11-03 07:18:54" | "/-/promql" | 400 | 192.15.2.1 | prometheus | + | 11 | "2022-11-03 07:18:64" | "/-/metrics" | 500 | 192.15.2.1 | prometheus | + +--------+-----------------------+--------------+------+------------+------------+ 2. Metric Selecting Query with specific dimensions:: > source = my_prometheus.prometheus_http_requests_total | where handler='/-/ready' and code='200' - +------------+------------------------+--------------------------------+---------------+-------------+-------------+ - | @value | @timestamp | handler | code | instance | job | - |------------+------------------------+--------------------------------+---------------+-------------+-------------| - | 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 2 | "2022-11-03 07:18:44" | "/-/ready" | 200 | 192.15.2.1 | prometheus | - | 9 | "2022-11-03 07:18:54" | "/-/ready" | 200 | 192.15.2.1 | prometheus | - | 11 | "2022-11-03 07:18:64" | "/-/ready" | 200 | 192.15.2.1 | prometheus | - +------------+------------------------+--------------------------------+---------------+-------------+-------------+ + +--------+-----------------------+------------+------+------------+------------+ + | @value | @timestamp | handler | code | instance | job | + |--------+-----------------------+------------+------+------------+------------| + | 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | 192.15.1.1 | prometheus | + | 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | 192.15.1.1 | prometheus | + | 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | 192.15.1.1 | prometheus | + | 2 | "2022-11-03 07:18:44" | "/-/ready" | 200 | 192.15.2.1 | prometheus | + | 9 | "2022-11-03 07:18:54" | "/-/ready" | 200 | 192.15.2.1 | prometheus | + | 11 | "2022-11-03 07:18:64" | "/-/ready" | 200 | 192.15.2.1 | prometheus | + +--------+-----------------------+------------+------+------------+------------+ 3. Average aggregation on a metric:: @@ -199,16 +199,16 @@ PromQL Support for prometheus Connector Example:: > source=my_prometheus.query_range('prometheus_http_requests_total', 1686694425, 1686700130, 14) - +------------+------------------------+--------------------------------+---------------+-------------+-------------+ - | @value | @timestamp | handler | code | instance | job | - |------------+------------------------+--------------------------------+---------------+-------------+-------------| - | 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | 192.15.1.1 | prometheus | - | 2 | "2022-11-03 07:18:44" | "/-/ready" | 400 | 192.15.2.1 | prometheus | - | 9 | "2022-11-03 07:18:54" | "/-/promql" | 400 | 192.15.2.1 | prometheus | - | 11 | "2022-11-03 07:18:64" |"/-/metrics" | 500 | 192.15.2.1 | prometheus | - +------------+------------------------+--------------------------------+---------------+-------------+-------------+ + +--------+-----------------------+--------------+------+------------+------------+ + | @value | @timestamp | handler | code | instance | job | + |--------+-----------------------+--------------+------+------------+------------| + | 5 | "2022-11-03 07:18:14" | "/-/ready" | 200 | 192.15.1.1 | prometheus | + | 3 | "2022-11-03 07:18:24" | "/-/ready" | 200 | 192.15.1.1 | prometheus | + | 7 | "2022-11-03 07:18:34" | "/-/ready" | 200 | 192.15.1.1 | prometheus | + | 2 | "2022-11-03 07:18:44" | "/-/ready" | 400 | 192.15.2.1 | prometheus | + | 9 | "2022-11-03 07:18:54" | "/-/promql" | 400 | 192.15.2.1 | prometheus | + | 11 | "2022-11-03 07:18:64" | "/-/metrics" | 500 | 192.15.2.1 | prometheus | + +--------+-----------------------+--------------+------+------------+------------+ Prometheus Connector Table Functions diff --git a/docs/user/ppl/admin/connectors/s3glue_connector.rst b/docs/user/ppl/admin/connectors/s3glue_connector.rst index 5e91df70e5..48f19a9d1e 100644 --- a/docs/user/ppl/admin/connectors/s3glue_connector.rst +++ b/docs/user/ppl/admin/connectors/s3glue_connector.rst @@ -42,7 +42,9 @@ Glue Connector Properties. * Basic Auth required ``glue.indexstore.opensearch.auth.username`` and ``glue.indexstore.opensearch.auth.password`` * AWSSigV4 Auth requires ``glue.indexstore.opensearch.auth.region`` and ``glue.auth.role_arn`` * ``glue.indexstore.opensearch.region`` [Required for awssigv4 auth] -* ``glue.lakeformation.enabled`` determines whether to enable lakeformation for queries. Default value is ``"false"`` if not specified +* ``glue.iceberg.enabled`` determines whether to enable Iceberg for the session. Default value is ``"false"`` if not specified. +* ``glue.lakeformation.enabled`` determines whether to enable Lake Formation for queries when Iceberg is also enabled. If Iceberg is not enabled, then this property has no effect. Default value is ``"false"`` if not specified. +* ``glue.lakeformation.session_tag`` what session tag to use when assuming the data source role. This property is required when both Iceberg and Lake Formation are enabled. Sample Glue dataSource configuration ======================================== @@ -71,8 +73,7 @@ Glue datasource configuration:: "glue.auth.role_arn": "role_arn", "glue.indexstore.opensearch.uri": "http://adsasdf.amazonopensearch.com:9200", "glue.indexstore.opensearch.auth" :"awssigv4", - "glue.indexstore.opensearch.auth.region" :"awssigv4", - "glue.lakeformation.enabled": "true" + "glue.indexstore.opensearch.auth.region" :"us-east-1" }, "resultIndex": "query_execution_result" }] diff --git a/docs/user/ppl/admin/connectors/security_lake_connector.rst b/docs/user/ppl/admin/connectors/security_lake_connector.rst new file mode 100644 index 0000000000..6afddca131 --- /dev/null +++ b/docs/user/ppl/admin/connectors/security_lake_connector.rst @@ -0,0 +1,78 @@ +.. highlight:: sh + +==================== +Security Lake Connector +==================== + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 1 + + +Introduction +============ + +Security Lake connector provides a way to query Security Lake tables. + +Required resources for Security Lake Connector +======================================== +* ``EMRServerless Spark Execution Engine Config Setting``: Since we execute s3Glue queries on top of spark execution engine, we require this configuration. + More details: `ExecutionEngine Config <../../../interfaces/asyncqueryinterface.rst#id2>`_ +* ``S3``: This is where the data lies. +* ``Glue``: Metadata store: Glue takes care of table metadata. +* ``Lake Formation``: AWS service that performs authorization on Security Lake tables +* ``Security Lake``: AWS service that orchestrates creation of S3 files, Glue tables, and Lake Formation permissions. +* ``Opensearch IndexStore``: Index for s3 data lies in opensearch and also acts as temporary buffer for query results. + +We currently only support emr-serverless as spark execution engine and Glue as metadata store. we will add more support in future. + +Glue Connector Properties. + +* ``resultIndex`` is a new parameter specific to glue connector. Stores the results of queries executed on the data source. If unavailable, it defaults to .query_execution_result. +* ``glue.auth.type`` [Required] + * This parameters provides the authentication type information required for execution engine to connect to glue. + * S3 Glue connector currently only supports ``iam_role`` authentication and the below parameters is required. + * ``glue.auth.role_arn`` +* ``glue.indexstore.opensearch.*`` [Required] + * This parameters provides the Opensearch domain host information for glue connector. This opensearch instance is used for writing index data back and also + * ``glue.indexstore.opensearch.uri`` [Required] + * ``glue.indexstore.opensearch.auth`` [Required] + * Accepted values include ["noauth", "basicauth", "awssigv4"] + * Basic Auth required ``glue.indexstore.opensearch.auth.username`` and ``glue.indexstore.opensearch.auth.password`` + * AWSSigV4 Auth requires ``glue.indexstore.opensearch.auth.region`` and ``glue.auth.role_arn`` + * ``glue.indexstore.opensearch.region`` [Required for awssigv4 auth] +* ``glue.lakeformation.session_tag`` [Required] + * What session tag to use when assuming the data source role. + +Sample Glue dataSource configuration +======================================== + +Glue datasource configuration:: + + [{ + "name" : "my_sl", + "connector": "security_lake", + "properties" : { + "glue.auth.type": "iam_role", + "glue.auth.role_arn": "role_arn", + "glue.indexstore.opensearch.uri": "http://adsasdf.amazonopensearch.com:9200", + "glue.indexstore.opensearch.auth" :"awssigv4", + "glue.indexstore.opensearch.auth.region" :"us-east-1", + "glue.lakeformation.session_tag": "sesson_tag" + }, + "resultIndex": "query_execution_result" + }] + +Sample Security Lake datasource queries APIS +===================================== + +Sample Queries + +* Select Query : ``select * from mysl.amazon_security_lake_glue_db_eu_west_1.amazon_security_lake_table_eu_west_1_vpc_flow_2_0 limit 1`` +* Create Covering Index Query: ``create index srcip_time on mysl.amazon_security_lake_glue_db_eu_west_1.amazon_security_lake_table_eu_west_1_vpc_flow_2_0 (src_endpoint.ip, time) WITH (auto_refresh=true)`` + +These queries would work only top of async queries. Documentation: `Async Query APIs <../../../interfaces/asyncqueryinterface.rst>`_ + +Documentation for Index Queries: https://github.com/opensearch-project/opensearch-spark/blob/main/docs/index.md diff --git a/docs/user/ppl/admin/cross_cluster_search.rst b/docs/user/ppl/admin/cross_cluster_search.rst index f57ea288e8..4b267a9340 100644 --- a/docs/user/ppl/admin/cross_cluster_search.rst +++ b/docs/user/ppl/admin/cross_cluster_search.rst @@ -40,14 +40,14 @@ Example PPL query:: os> source=my_remote_cluster:accounts; fetched rows / total rows = 4/4 - +------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------+ - | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | - |------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------| - | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | - | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | - | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | - | 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | - +------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------+ + +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ + | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | + |----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------| + | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | + | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | + | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | + | 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | + +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ Limitation diff --git a/docs/user/ppl/admin/settings.rst b/docs/user/ppl/admin/settings.rst index ad56408693..28e6897d3d 100644 --- a/docs/user/ppl/admin/settings.rst +++ b/docs/user/ppl/admin/settings.rst @@ -125,9 +125,9 @@ plugins.query.size_limit Description ----------- -The size configure the maximum amount of documents to be pull from OpenSearch. The default value is: 200 +The size configure the maximum amount of documents to be pull from OpenSearch. The default value is: 10000 -Notes: This setting will impact the correctness of the aggregation operation, for example, there are 1000 docs in the index, by default, only 200 docs will be extract from index and do aggregation. +Notes: This setting will impact the correctness of the aggregation operation, for example, there are 1000 docs in the index, if you change the value to 200, only 200 docs will be extract from index and do aggregation. Example ------- diff --git a/docs/user/ppl/cmd/ad.rst b/docs/user/ppl/cmd/ad.rst index 103c7f7483..5d7a572c96 100644 --- a/docs/user/ppl/cmd/ad.rst +++ b/docs/user/ppl/cmd/ad.rst @@ -50,11 +50,11 @@ PPL query:: > source=nyc_taxi | fields value, timestamp | AD time_field='timestamp' | where value=10844.0 fetched rows / total rows = 1/1 - +---------+---------------------+---------+-----------------+ - | value | timestamp | score | anomaly_grade | - |---------+---------------------+---------+-----------------| - | 10844.0 | 2014-07-01 00:00:00 | 0.0 | 0.0 | - +---------+---------------------+---------+-----------------+ + +---------+---------------------+-------+---------------+ + | value | timestamp | score | anomaly_grade | + |---------+---------------------+-------+---------------| + | 10844.0 | 2014-07-01 00:00:00 | 0.0 | 0.0 | + +---------+---------------------+-------+---------------+ Example 2: Detecting events in New York City from taxi ridership data with time-series data independently with each category ============================================================================================================================ @@ -65,12 +65,12 @@ PPL query:: > source=nyc_taxi | fields category, value, timestamp | AD time_field='timestamp' category_field='category' | where value=10844.0 or value=6526.0 fetched rows / total rows = 2/2 - +------------+---------+---------------------+---------+-----------------+ - | category | value | timestamp | score | anomaly_grade | - |------------+---------+---------------------+---------+-----------------| - | night | 10844.0 | 2014-07-01 00:00:00 | 0.0 | 0.0 | - | day | 6526.0 | 2014-07-01 06:00:00 | 0.0 | 0.0 | - +------------+---------+---------------------+---------+-----------------+ + +----------+---------+---------------------+-------+---------------+ + | category | value | timestamp | score | anomaly_grade | + |----------+---------+---------------------+-------+---------------| + | night | 10844.0 | 2014-07-01 00:00:00 | 0.0 | 0.0 | + | day | 6526.0 | 2014-07-01 06:00:00 | 0.0 | 0.0 | + +----------+---------+---------------------+-------+---------------+ Example 3: Detecting events in New York City from taxi ridership data with non-time-series data @@ -82,11 +82,11 @@ PPL query:: > source=nyc_taxi | fields value | AD | where value=10844.0 fetched rows / total rows = 1/1 - +---------+---------+-------------+ - | value | score | anomalous | - |---------+---------+-------------| - | 10844.0 | 0.0 | False | - +---------+---------+-------------+ + +---------+-------+-----------+ + | value | score | anomalous | + |---------+-------+-----------| + | 10844.0 | 0.0 | False | + +---------+-------+-----------+ Example 4: Detecting events in New York City from taxi ridership data with non-time-series data independently with each category ================================================================================================================================ @@ -97,9 +97,9 @@ PPL query:: > source=nyc_taxi | fields category, value | AD category_field='category' | where value=10844.0 or value=6526.0 fetched rows / total rows = 2/2 - +------------+---------+---------+-------------+ - | category | value | score | anomalous | - |------------+---------+---------+-------------| - | night | 10844.0 | 0.0 | False | - | day | 6526.0 | 0.0 | False | - +------------+---------+---------+-------------+ + +----------+---------+-------+-----------+ + | category | value | score | anomalous | + |----------+---------+-------+-----------| + | night | 10844.0 | 0.0 | False | + | day | 6526.0 | 0.0 | False | + +----------+---------+-------+-----------+ diff --git a/docs/user/ppl/cmd/dedup.rst b/docs/user/ppl/cmd/dedup.rst index ebceb9e0bd..362d1637f7 100644 --- a/docs/user/ppl/cmd/dedup.rst +++ b/docs/user/ppl/cmd/dedup.rst @@ -34,12 +34,12 @@ PPL query:: os> source=accounts | dedup gender | fields account_number, gender; fetched rows / total rows = 2/2 - +------------------+----------+ - | account_number | gender | - |------------------+----------| - | 1 | M | - | 13 | F | - +------------------+----------+ + +----------------+--------+ + | account_number | gender | + |----------------+--------| + | 1 | M | + | 13 | F | + +----------------+--------+ Example 2: Keep 2 duplicates documents ====================================== @@ -50,13 +50,13 @@ PPL query:: os> source=accounts | dedup 2 gender | fields account_number, gender; fetched rows / total rows = 3/3 - +------------------+----------+ - | account_number | gender | - |------------------+----------| - | 1 | M | - | 6 | M | - | 13 | F | - +------------------+----------+ + +----------------+--------+ + | account_number | gender | + |----------------+--------| + | 1 | M | + | 6 | M | + | 13 | F | + +----------------+--------+ Example 3: Keep or Ignore the empty field by default ============================================ @@ -67,14 +67,14 @@ PPL query:: os> source=accounts | dedup email keepempty=true | fields account_number, email; fetched rows / total rows = 4/4 - +------------------+-----------------------+ - | account_number | email | - |------------------+-----------------------| - | 1 | amberduke@pyrami.com | - | 6 | hattiebond@netagy.com | - | 13 | null | - | 18 | daleadams@boink.com | - +------------------+-----------------------+ + +----------------+-----------------------+ + | account_number | email | + |----------------+-----------------------| + | 1 | amberduke@pyrami.com | + | 6 | hattiebond@netagy.com | + | 13 | null | + | 18 | daleadams@boink.com | + +----------------+-----------------------+ The example show dedup the document by ignore the empty value field. @@ -83,13 +83,13 @@ PPL query:: os> source=accounts | dedup email | fields account_number, email; fetched rows / total rows = 3/3 - +------------------+-----------------------+ - | account_number | email | - |------------------+-----------------------| - | 1 | amberduke@pyrami.com | - | 6 | hattiebond@netagy.com | - | 18 | daleadams@boink.com | - +------------------+-----------------------+ + +----------------+-----------------------+ + | account_number | email | + |----------------+-----------------------| + | 1 | amberduke@pyrami.com | + | 6 | hattiebond@netagy.com | + | 18 | daleadams@boink.com | + +----------------+-----------------------+ Example 4: Dedup in consecutive document @@ -101,13 +101,13 @@ PPL query:: os> source=accounts | dedup gender consecutive=true | fields account_number, gender; fetched rows / total rows = 3/3 - +------------------+----------+ - | account_number | gender | - |------------------+----------| - | 1 | M | - | 13 | F | - | 18 | M | - +------------------+----------+ + +----------------+--------+ + | account_number | gender | + |----------------+--------| + | 1 | M | + | 13 | F | + | 18 | M | + +----------------+--------+ Limitation ========== diff --git a/docs/user/ppl/cmd/describe.rst b/docs/user/ppl/cmd/describe.rst index a0ecbd3169..2b03ceda57 100644 --- a/docs/user/ppl/cmd/describe.rst +++ b/docs/user/ppl/cmd/describe.rst @@ -33,21 +33,21 @@ PPL query:: os> describe accounts; fetched rows / total rows = 11/11 - +----------------+---------------+--------------+----------------+-------------+-------------+---------------+-----------------+------------------+------------------+------------+-----------+--------------+-----------------+--------------------+---------------------+--------------------+---------------+-----------------+----------------+---------------+--------------------+--------------------+----------------------+ - | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | COLUMN_NAME | DATA_TYPE | TYPE_NAME | COLUMN_SIZE | BUFFER_LENGTH | DECIMAL_DIGITS | NUM_PREC_RADIX | NULLABLE | REMARKS | COLUMN_DEF | SQL_DATA_TYPE | SQL_DATETIME_SUB | CHAR_OCTET_LENGTH | ORDINAL_POSITION | IS_NULLABLE | SCOPE_CATALOG | SCOPE_SCHEMA | SCOPE_TABLE | SOURCE_DATA_TYPE | IS_AUTOINCREMENT | IS_GENERATEDCOLUMN | - |----------------+---------------+--------------+----------------+-------------+-------------+---------------+-----------------+------------------+------------------+------------+-----------+--------------+-----------------+--------------------+---------------------+--------------------+---------------+-----------------+----------------+---------------+--------------------+--------------------+----------------------| - | docTestCluster | null | accounts | account_number | null | long | null | null | null | 10 | 2 | null | null | null | null | null | 0 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | firstname | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 1 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | address | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 2 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | balance | null | long | null | null | null | 10 | 2 | null | null | null | null | null | 3 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | gender | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 4 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | city | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 5 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | employer | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 6 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | state | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 7 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | age | null | long | null | null | null | 10 | 2 | null | null | null | null | null | 8 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | email | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 9 | | null | null | null | null | NO | | - | docTestCluster | null | accounts | lastname | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 10 | | null | null | null | null | NO | | - +----------------+---------------+--------------+----------------+-------------+-------------+---------------+-----------------+------------------+------------------+------------+-----------+--------------+-----------------+--------------------+---------------------+--------------------+---------------+-----------------+----------------+---------------+--------------------+--------------------+----------------------+ + +----------------+-------------+------------+----------------+-----------+-----------+-------------+---------------+----------------+----------------+----------+---------+------------+---------------+------------------+-------------------+------------------+-------------+---------------+--------------+-------------+------------------+------------------+--------------------+ + | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | COLUMN_NAME | DATA_TYPE | TYPE_NAME | COLUMN_SIZE | BUFFER_LENGTH | DECIMAL_DIGITS | NUM_PREC_RADIX | NULLABLE | REMARKS | COLUMN_DEF | SQL_DATA_TYPE | SQL_DATETIME_SUB | CHAR_OCTET_LENGTH | ORDINAL_POSITION | IS_NULLABLE | SCOPE_CATALOG | SCOPE_SCHEMA | SCOPE_TABLE | SOURCE_DATA_TYPE | IS_AUTOINCREMENT | IS_GENERATEDCOLUMN | + |----------------+-------------+------------+----------------+-----------+-----------+-------------+---------------+----------------+----------------+----------+---------+------------+---------------+------------------+-------------------+------------------+-------------+---------------+--------------+-------------+------------------+------------------+--------------------| + | docTestCluster | null | accounts | account_number | null | long | null | null | null | 10 | 2 | null | null | null | null | null | 0 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | firstname | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 1 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | address | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 2 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | balance | null | long | null | null | null | 10 | 2 | null | null | null | null | null | 3 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | gender | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 4 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | city | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 5 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | employer | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 6 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | state | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 7 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | age | null | long | null | null | null | 10 | 2 | null | null | null | null | null | 8 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | email | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 9 | | null | null | null | null | NO | | + | docTestCluster | null | accounts | lastname | null | text | null | null | null | 10 | 2 | null | null | null | null | null | 10 | | null | null | null | null | NO | | + +----------------+-------------+------------+----------------+-----------+-----------+-------------+---------------+----------------+----------------+----------+---------+------------+---------------+------------------+-------------------+------------------+-------------+---------------+--------------+-------------+------------------+------------------+--------------------+ Example 2: Fetch metadata with condition and filter =================================================== @@ -76,13 +76,13 @@ PPL query:: os> describe my_prometheus.prometheus_http_requests_total; fetched rows / total rows = 6/6 - +-----------------+----------------+--------------------------------+---------------+-------------+ - | TABLE_CATALOG | TABLE_SCHEMA | TABLE_NAME | COLUMN_NAME | DATA_TYPE | - |-----------------+----------------+--------------------------------+---------------+-------------| - | my_prometheus | default | prometheus_http_requests_total | handler | keyword | - | my_prometheus | default | prometheus_http_requests_total | code | keyword | - | my_prometheus | default | prometheus_http_requests_total | instance | keyword | - | my_prometheus | default | prometheus_http_requests_total | @timestamp | timestamp | - | my_prometheus | default | prometheus_http_requests_total | @value | double | - | my_prometheus | default | prometheus_http_requests_total | job | keyword | - +-----------------+----------------+--------------------------------+---------------+-------------+ + +---------------+--------------+--------------------------------+-------------+-----------+ + | TABLE_CATALOG | TABLE_SCHEMA | TABLE_NAME | COLUMN_NAME | DATA_TYPE | + |---------------+--------------+--------------------------------+-------------+-----------| + | my_prometheus | default | prometheus_http_requests_total | handler | keyword | + | my_prometheus | default | prometheus_http_requests_total | code | keyword | + | my_prometheus | default | prometheus_http_requests_total | instance | keyword | + | my_prometheus | default | prometheus_http_requests_total | @timestamp | timestamp | + | my_prometheus | default | prometheus_http_requests_total | @value | double | + | my_prometheus | default | prometheus_http_requests_total | job | keyword | + +---------------+--------------+--------------------------------+-------------+-----------+ diff --git a/docs/user/ppl/cmd/eval.rst b/docs/user/ppl/cmd/eval.rst index 48a14ae0a8..c950028674 100644 --- a/docs/user/ppl/cmd/eval.rst +++ b/docs/user/ppl/cmd/eval.rst @@ -30,14 +30,14 @@ PPL query:: os> source=accounts | eval doubleAge = age * 2 | fields age, doubleAge ; fetched rows / total rows = 4/4 - +-------+-------------+ - | age | doubleAge | - |-------+-------------| - | 32 | 64 | - | 36 | 72 | - | 28 | 56 | - | 33 | 66 | - +-------+-------------+ + +-----+-----------+ + | age | doubleAge | + |-----+-----------| + | 32 | 64 | + | 36 | 72 | + | 28 | 56 | + | 33 | 66 | + +-----+-----------+ Example 2: Override the existing field @@ -49,14 +49,14 @@ PPL query:: os> source=accounts | eval age = age + 1 | fields age ; fetched rows / total rows = 4/4 - +-------+ - | age | - |-------| - | 33 | - | 37 | - | 29 | - | 34 | - +-------+ + +-----+ + | age | + |-----| + | 33 | + | 37 | + | 29 | + | 34 | + +-----+ Example 3: Create the new field with field defined in eval ========================================================== @@ -67,14 +67,14 @@ PPL query:: os> source=accounts | eval doubleAge = age * 2, ddAge = doubleAge * 2 | fields age, doubleAge, ddAge ; fetched rows / total rows = 4/4 - +-------+-------------+---------+ - | age | doubleAge | ddAge | - |-------+-------------+---------| - | 32 | 64 | 128 | - | 36 | 72 | 144 | - | 28 | 56 | 112 | - | 33 | 66 | 132 | - +-------+-------------+---------+ + +-----+-----------+-------+ + | age | doubleAge | ddAge | + |-----+-----------+-------| + | 32 | 64 | 128 | + | 36 | 72 | 144 | + | 28 | 56 | 112 | + | 33 | 66 | 132 | + +-----+-----------+-------+ Limitation ========== diff --git a/docs/user/ppl/cmd/fields.rst b/docs/user/ppl/cmd/fields.rst index dbae5b20a4..32c3a665d7 100644 --- a/docs/user/ppl/cmd/fields.rst +++ b/docs/user/ppl/cmd/fields.rst @@ -31,14 +31,14 @@ PPL query:: os> source=accounts | fields account_number, firstname, lastname; fetched rows / total rows = 4/4 - +------------------+-------------+------------+ - | account_number | firstname | lastname | - |------------------+-------------+------------| - | 1 | Amber | Duke | - | 6 | Hattie | Bond | - | 13 | Nanette | Bates | - | 18 | Dale | Adams | - +------------------+-------------+------------+ + +----------------+-----------+----------+ + | account_number | firstname | lastname | + |----------------+-----------+----------| + | 1 | Amber | Duke | + | 6 | Hattie | Bond | + | 13 | Nanette | Bates | + | 18 | Dale | Adams | + +----------------+-----------+----------+ Example 2: Remove specified fields from result ============================================== @@ -49,12 +49,12 @@ PPL query:: os> source=accounts | fields account_number, firstname, lastname | fields - account_number ; fetched rows / total rows = 4/4 - +-------------+------------+ - | firstname | lastname | - |-------------+------------| - | Amber | Duke | - | Hattie | Bond | - | Nanette | Bates | - | Dale | Adams | - +-------------+------------+ + +-----------+----------+ + | firstname | lastname | + |-----------+----------| + | Amber | Duke | + | Hattie | Bond | + | Nanette | Bates | + | Dale | Adams | + +-----------+----------+ diff --git a/docs/user/ppl/cmd/fillnull.rst b/docs/user/ppl/cmd/fillnull.rst new file mode 100644 index 0000000000..4a9e38d353 --- /dev/null +++ b/docs/user/ppl/cmd/fillnull.rst @@ -0,0 +1,62 @@ +============= +fillnull +============= + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + + +Description +============ +Using ``fillnull`` command to fill null with provided value in one or more fields in the search result. + + +Syntax +============ +`fillnull [with in ["," ]] | [using = ["," = ]]` + +* null-replacement: mandatory. The value used to replace `null`s. +* nullable-field: mandatory. Field reference. The `null` values in the field referred to by the property will be replaced with the values from the null-replacement. + +Example 1: fillnull one field +====================================================================== + +The example show fillnull one field. + +PPL query:: + + os> source=accounts | fields email, employer | fillnull with '' in email ; + fetched rows / total rows = 4/4 + +-----------------------+----------+ + | email | employer | + |-----------------------+----------| + | amberduke@pyrami.com | Pyrami | + | hattiebond@netagy.com | Netagy | + | | Quility | + | daleadams@boink.com | null | + +-----------------------+----------+ + +Example 2: fillnull applied to multiple fields +======================================================================== + +The example show fillnull applied to multiple fields. + +PPL query:: + + os> source=accounts | fields email, employer | fillnull using email = '', employer = '' ; + fetched rows / total rows = 4/4 + +-----------------------+---------------+ + | email | employer | + |-----------------------+---------------| + | amberduke@pyrami.com | Pyrami | + | hattiebond@netagy.com | Netagy | + | | Quility | + | daleadams@boink.com | | + +-----------------------+---------------+ + +Limitation +========== +The ``fillnull`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. \ No newline at end of file diff --git a/docs/user/ppl/cmd/grok.rst b/docs/user/ppl/cmd/grok.rst index 6a121c7431..35f3b0c846 100644 --- a/docs/user/ppl/cmd/grok.rst +++ b/docs/user/ppl/cmd/grok.rst @@ -72,14 +72,14 @@ PPL query:: os> source=apache | grok message '%{COMMONAPACHELOG}' | fields COMMONAPACHELOG, timestamp, response, bytes ; fetched rows / total rows = 4/4 - +-----------------------------------------------------------------------------------------------------------------------------+----------------------------+------------+---------+ - | COMMONAPACHELOG | timestamp | response | bytes | - |-----------------------------------------------------------------------------------------------------------------------------+----------------------------+------------+---------| - | 177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927 | 28/Sep/2022:10:15:57 -0700 | 404 | 19927 | - | 127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722 | 28/Sep/2022:10:15:57 -0700 | 100 | 28722 | - | 118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439 | 28/Sep/2022:10:15:57 -0700 | 401 | 27439 | - | 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | 28/Sep/2022:10:15:57 -0700 | 301 | 9481 | - +-----------------------------------------------------------------------------------------------------------------------------+----------------------------+------------+---------+ + +-----------------------------------------------------------------------------------------------------------------------------+----------------------------+----------+-------+ + | COMMONAPACHELOG | timestamp | response | bytes | + |-----------------------------------------------------------------------------------------------------------------------------+----------------------------+----------+-------| + | 177.95.8.74 - upton5450 [28/Sep/2022:10:15:57 -0700] "HEAD /e-business/mindshare HTTP/1.0" 404 19927 | 28/Sep/2022:10:15:57 -0700 | 404 | 19927 | + | 127.45.152.6 - pouros8756 [28/Sep/2022:10:15:57 -0700] "GET /architectures/convergence/niches/mindshare HTTP/1.0" 100 28722 | 28/Sep/2022:10:15:57 -0700 | 100 | 28722 | + | 118.223.210.105 - - [28/Sep/2022:10:15:57 -0700] "PATCH /strategize/out-of-the-box HTTP/1.0" 401 27439 | 28/Sep/2022:10:15:57 -0700 | 401 | 27439 | + | 210.204.15.104 - - [28/Sep/2022:10:15:57 -0700] "POST /users HTTP/1.1" 301 9481 | 28/Sep/2022:10:15:57 -0700 | 301 | 9481 | + +-----------------------------------------------------------------------------------------------------------------------------+----------------------------+----------+-------+ Limitations =========== diff --git a/docs/user/ppl/cmd/head.rst b/docs/user/ppl/cmd/head.rst index 1b4599f5de..cd4aed5a54 100644 --- a/docs/user/ppl/cmd/head.rst +++ b/docs/user/ppl/cmd/head.rst @@ -30,14 +30,14 @@ PPL query:: os> source=accounts | fields firstname, age | head; fetched rows / total rows = 4/4 - +-------------+-------+ - | firstname | age | - |-------------+-------| - | Amber | 32 | - | Hattie | 36 | - | Nanette | 28 | - | Dale | 33 | - +-------------+-------+ + +-----------+-----+ + | firstname | age | + |-----------+-----| + | Amber | 32 | + | Hattie | 36 | + | Nanette | 28 | + | Dale | 33 | + +-----------+-----+ Example 2: Get first N results =========================================== @@ -48,13 +48,13 @@ PPL query:: os> source=accounts | fields firstname, age | head 3; fetched rows / total rows = 3/3 - +-------------+-------+ - | firstname | age | - |-------------+-------| - | Amber | 32 | - | Hattie | 36 | - | Nanette | 28 | - +-------------+-------+ + +-----------+-----+ + | firstname | age | + |-----------+-----| + | Amber | 32 | + | Hattie | 36 | + | Nanette | 28 | + +-----------+-----+ Example 3: Get first N results after offset M ============================================= @@ -65,13 +65,13 @@ PPL query:: os> source=accounts | fields firstname, age | head 3 from 1; fetched rows / total rows = 3/3 - +-------------+-------+ - | firstname | age | - |-------------+-------| - | Hattie | 36 | - | Nanette | 28 | - | Dale | 33 | - +-------------+-------+ + +-----------+-----+ + | firstname | age | + |-----------+-----| + | Hattie | 36 | + | Nanette | 28 | + | Dale | 33 | + +-----------+-----+ Limitation ========== diff --git a/docs/user/ppl/cmd/information_schema.rst b/docs/user/ppl/cmd/information_schema.rst index 26341d6972..4210502eda 100644 --- a/docs/user/ppl/cmd/information_schema.rst +++ b/docs/user/ppl/cmd/information_schema.rst @@ -29,11 +29,11 @@ PPL query for fetching PROMETHEUS TABLES with where clause:: os> source = my_prometheus.information_schema.tables | where TABLE_NAME='prometheus_http_requests_total' fetched rows / total rows = 1/1 - +-----------------+----------------+--------------------------------+--------------+--------+---------------------------+ - | TABLE_CATALOG | TABLE_SCHEMA | TABLE_NAME | TABLE_TYPE | UNIT | REMARKS | - |-----------------+----------------+--------------------------------+--------------+--------+---------------------------| - | my_prometheus | default | prometheus_http_requests_total | counter | | Counter of HTTP requests. | - +-----------------+----------------+--------------------------------+--------------+--------+---------------------------+ + +---------------+--------------+--------------------------------+------------+------+---------------------------+ + | TABLE_CATALOG | TABLE_SCHEMA | TABLE_NAME | TABLE_TYPE | UNIT | REMARKS | + |---------------+--------------+--------------------------------+------------+------+---------------------------| + | my_prometheus | default | prometheus_http_requests_total | counter | | Counter of HTTP requests. | + +---------------+--------------+--------------------------------+------------+------+---------------------------+ Example 2: Search tables in prometheus datasource. @@ -45,13 +45,13 @@ PPL query for searching PROMETHEUS TABLES:: os> source = my_prometheus.information_schema.tables | where LIKE(TABLE_NAME, "%http%"); fetched rows / total rows = 6/6 - +-----------------+----------------+--------------------------------------------+--------------+--------+----------------------------------------------------+ - | TABLE_CATALOG | TABLE_SCHEMA | TABLE_NAME | TABLE_TYPE | UNIT | REMARKS | - |-----------------+----------------+--------------------------------------------+--------------+--------+----------------------------------------------------| - | my_prometheus | default | prometheus_http_requests_total | counter | | Counter of HTTP requests. | - | my_prometheus | default | promhttp_metric_handler_requests_in_flight | gauge | | Current number of scrapes being served. | - | my_prometheus | default | prometheus_http_request_duration_seconds | histogram | | Histogram of latencies for HTTP requests. | - | my_prometheus | default | prometheus_sd_http_failures_total | counter | | Number of HTTP service discovery refresh failures. | - | my_prometheus | default | promhttp_metric_handler_requests_total | counter | | Total number of scrapes by HTTP status code. | - | my_prometheus | default | prometheus_http_response_size_bytes | histogram | | Histogram of response size for HTTP requests. | - +-----------------+----------------+--------------------------------------------+--------------+--------+----------------------------------------------------+ + +---------------+--------------+--------------------------------------------+------------+------+----------------------------------------------------+ + | TABLE_CATALOG | TABLE_SCHEMA | TABLE_NAME | TABLE_TYPE | UNIT | REMARKS | + |---------------+--------------+--------------------------------------------+------------+------+----------------------------------------------------| + | my_prometheus | default | prometheus_http_requests_total | counter | | Counter of HTTP requests. | + | my_prometheus | default | promhttp_metric_handler_requests_in_flight | gauge | | Current number of scrapes being served. | + | my_prometheus | default | prometheus_http_request_duration_seconds | histogram | | Histogram of latencies for HTTP requests. | + | my_prometheus | default | prometheus_sd_http_failures_total | counter | | Number of HTTP service discovery refresh failures. | + | my_prometheus | default | promhttp_metric_handler_requests_total | counter | | Total number of scrapes by HTTP status code. | + | my_prometheus | default | prometheus_http_response_size_bytes | histogram | | Histogram of response size for HTTP requests. | + +---------------+--------------+--------------------------------------------+------------+------+----------------------------------------------------+ diff --git a/docs/user/ppl/cmd/ml.rst b/docs/user/ppl/cmd/ml.rst index 2e04674c1e..a48c1ec589 100644 --- a/docs/user/ppl/cmd/ml.rst +++ b/docs/user/ppl/cmd/ml.rst @@ -56,11 +56,11 @@ PPL query:: os> source=nyc_taxi | fields value, timestamp | ml action='train' algorithm='rcf' time_field='timestamp' | where value=10844.0 fetched rows / total rows = 1/1 - +---------+---------------------+---------+-----------------+ - | value | timestamp | score | anomaly_grade | - |---------+---------------------+---------+-----------------| - | 10844.0 | 2014-07-01 00:00:00 | 0.0 | 0.0 | - +---------+---------------------+---------+-----------------+ + +---------+---------------------+-------+---------------+ + | value | timestamp | score | anomaly_grade | + |---------+---------------------+-------+---------------| + | 10844.0 | 2014-07-01 00:00:00 | 0.0 | 0.0 | + +---------+---------------------+-------+---------------+ Example 2: Detecting events in New York City from taxi ridership data with time-series data independently with each category ============================================================================================================================ @@ -71,12 +71,12 @@ PPL query:: os> source=nyc_taxi | fields category, value, timestamp | ml action='train' algorithm='rcf' time_field='timestamp' category_field='category' | where value=10844.0 or value=6526.0 fetched rows / total rows = 2/2 - +------------+---------+---------------------+---------+-----------------+ - | category | value | timestamp | score | anomaly_grade | - |------------+---------+---------------------+---------+-----------------| - | night | 10844.0 | 2014-07-01 00:00:00 | 0.0 | 0.0 | - | day | 6526.0 | 2014-07-01 06:00:00 | 0.0 | 0.0 | - +------------+---------+---------------------+---------+-----------------+ + +----------+---------+---------------------+-------+---------------+ + | category | value | timestamp | score | anomaly_grade | + |----------+---------+---------------------+-------+---------------| + | night | 10844.0 | 2014-07-01 00:00:00 | 0.0 | 0.0 | + | day | 6526.0 | 2014-07-01 06:00:00 | 0.0 | 0.0 | + +----------+---------+---------------------+-------+---------------+ Example 3: Detecting events in New York City from taxi ridership data with non-time-series data @@ -88,11 +88,11 @@ PPL query:: os> source=nyc_taxi | fields value | ml action='train' algorithm='rcf' | where value=10844.0 fetched rows / total rows = 1/1 - +---------+---------+-------------+ - | value | score | anomalous | - |---------+---------+-------------| - | 10844.0 | 0.0 | False | - +---------+---------+-------------+ + +---------+-------+-----------+ + | value | score | anomalous | + |---------+-------+-----------| + | 10844.0 | 0.0 | False | + +---------+-------+-----------+ Example 4: Detecting events in New York City from taxi ridership data with non-time-series data independently with each category ================================================================================================================================ @@ -103,12 +103,12 @@ PPL query:: os> source=nyc_taxi | fields category, value | ml action='train' algorithm='rcf' category_field='category' | where value=10844.0 or value=6526.0 fetched rows / total rows = 2/2 - +------------+---------+---------+-------------+ - | category | value | score | anomalous | - |------------+---------+---------+-------------| - | night | 10844.0 | 0.0 | False | - | day | 6526.0 | 0.0 | False | - +------------+---------+---------+-------------+ + +----------+---------+-------+-----------+ + | category | value | score | anomalous | + |----------+---------+-------+-----------| + | night | 10844.0 | 0.0 | False | + | day | 6526.0 | 0.0 | False | + +----------+---------+-------+-----------+ KMEANS ====== diff --git a/docs/user/ppl/cmd/parse.rst b/docs/user/ppl/cmd/parse.rst index 82eff8ee85..d1015cccb9 100644 --- a/docs/user/ppl/cmd/parse.rst +++ b/docs/user/ppl/cmd/parse.rst @@ -72,13 +72,13 @@ PPL query:: os> source=accounts | parse address '(?\d+) (?.+)' | where cast(streetNumber as int) > 500 | sort num(streetNumber) | fields streetNumber, street ; fetched rows / total rows = 3/3 - +----------------+----------------+ - | streetNumber | street | - |----------------+----------------| - | 671 | Bristol Street | - | 789 | Madison Street | - | 880 | Holmes Lane | - +----------------+----------------+ + +--------------+----------------+ + | streetNumber | street | + |--------------+----------------| + | 671 | Bristol Street | + | 789 | Madison Street | + | 880 | Holmes Lane | + +--------------+----------------+ Limitations =========== diff --git a/docs/user/ppl/cmd/patterns.rst b/docs/user/ppl/cmd/patterns.rst index 370404ecb6..13f08d0aa6 100644 --- a/docs/user/ppl/cmd/patterns.rst +++ b/docs/user/ppl/cmd/patterns.rst @@ -31,14 +31,14 @@ PPL query:: os> source=accounts | patterns email | fields email, patterns_field ; fetched rows / total rows = 4/4 - +-----------------------+------------------+ - | email | patterns_field | - |-----------------------+------------------| - | amberduke@pyrami.com | @. | - | hattiebond@netagy.com | @. | - | null | | - | daleadams@boink.com | @. | - +-----------------------+------------------+ + +-----------------------+----------------+ + | email | patterns_field | + |-----------------------+----------------| + | amberduke@pyrami.com | @. | + | hattiebond@netagy.com | @. | + | null | | + | daleadams@boink.com | @. | + +-----------------------+----------------+ Example 2: Extract log patterns =============================== diff --git a/docs/user/ppl/cmd/rare.rst b/docs/user/ppl/cmd/rare.rst index 35b660daa7..f6013711ae 100644 --- a/docs/user/ppl/cmd/rare.rst +++ b/docs/user/ppl/cmd/rare.rst @@ -32,12 +32,12 @@ PPL query:: os> source=accounts | rare gender; fetched rows / total rows = 2/2 - +----------+ - | gender | - |----------| - | F | - | M | - +----------+ + +--------+ + | gender | + |--------| + | F | + | M | + +--------+ Example 2: Find the least common values organized by gender @@ -49,14 +49,14 @@ PPL query:: os> source=accounts | rare age by gender; fetched rows / total rows = 4/4 - +----------+-------+ - | gender | age | - |----------+-------| - | F | 28 | - | M | 32 | - | M | 33 | - | M | 36 | - +----------+-------+ + +--------+-----+ + | gender | age | + |--------+-----| + | F | 28 | + | M | 32 | + | M | 33 | + | M | 36 | + +--------+-----+ Limitation ========== diff --git a/docs/user/ppl/cmd/rename.rst b/docs/user/ppl/cmd/rename.rst index a4383a9f5f..c942884248 100644 --- a/docs/user/ppl/cmd/rename.rst +++ b/docs/user/ppl/cmd/rename.rst @@ -31,14 +31,14 @@ PPL query:: os> source=accounts | rename account_number as an | fields an; fetched rows / total rows = 4/4 - +------+ - | an | - |------| - | 1 | - | 6 | - | 13 | - | 18 | - +------+ + +----+ + | an | + |----| + | 1 | + | 6 | + | 13 | + | 18 | + +----+ Example 2: Rename multiple fields @@ -50,14 +50,14 @@ PPL query:: os> source=accounts | rename account_number as an, employer as emp | fields an, emp; fetched rows / total rows = 4/4 - +------+---------+ - | an | emp | - |------+---------| - | 1 | Pyrami | - | 6 | Netagy | - | 13 | Quility | - | 18 | null | - +------+---------+ + +----+---------+ + | an | emp | + |----+---------| + | 1 | Pyrami | + | 6 | Netagy | + | 13 | Quility | + | 18 | null | + +----+---------+ Limitation ========== diff --git a/docs/user/ppl/cmd/search.rst b/docs/user/ppl/cmd/search.rst index 5299f9f78a..9e55daddeb 100644 --- a/docs/user/ppl/cmd/search.rst +++ b/docs/user/ppl/cmd/search.rst @@ -37,14 +37,14 @@ PPL query:: os> source=accounts; fetched rows / total rows = 4/4 - +------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------+ - | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | - |------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------| - | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | - | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | - | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | - | 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | - +------------------+-------------+----------------------+-----------+----------+--------+------------+---------+-------+-----------------------+------------+ + +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ + | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | + |----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------| + | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | + | 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond | + | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | + | 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams | + +----------------+-----------+----------------------+---------+--------+--------+----------+-------+-----+-----------------------+----------+ Example 2: Fetch data with condition ==================================== @@ -55,10 +55,10 @@ PPL query:: os> source=accounts account_number=1 or gender="F"; fetched rows / total rows = 2/2 - +------------------+-------------+--------------------+-----------+----------+--------+------------+---------+-------+----------------------+------------+ - | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | - |------------------+-------------+--------------------+-----------+----------+--------+------------+---------+-------+----------------------+------------| - | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | - | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | - +------------------+-------------+--------------------+-----------+----------+--------+------------+---------+-------+----------------------+------------+ + +----------------+-----------+--------------------+---------+--------+--------+----------+-------+-----+----------------------+----------+ + | account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | + |----------------+-----------+--------------------+---------+--------+--------+----------+-------+-----+----------------------+----------| + | 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | + | 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | + +----------------+-----------+--------------------+---------+--------+--------+----------+-------+-----+----------------------+----------+ diff --git a/docs/user/ppl/cmd/showdatasources.rst b/docs/user/ppl/cmd/showdatasources.rst index f7c6beb82f..d954ef0c04 100644 --- a/docs/user/ppl/cmd/showdatasources.rst +++ b/docs/user/ppl/cmd/showdatasources.rst @@ -28,9 +28,9 @@ PPL query for all PROMETHEUS DATASOURCES:: os> show datasources | where CONNECTOR_TYPE='PROMETHEUS'; fetched rows / total rows = 1/1 - +-------------------+------------------+ - | DATASOURCE_NAME | CONNECTOR_TYPE | - |-------------------+------------------| - | my_prometheus | PROMETHEUS | - +-------------------+------------------+ + +-----------------+----------------+ + | DATASOURCE_NAME | CONNECTOR_TYPE | + |-----------------+----------------| + | my_prometheus | PROMETHEUS | + +-----------------+----------------+ diff --git a/docs/user/ppl/cmd/sort.rst b/docs/user/ppl/cmd/sort.rst index 6a00ebc24c..377f0a5e01 100644 --- a/docs/user/ppl/cmd/sort.rst +++ b/docs/user/ppl/cmd/sort.rst @@ -32,14 +32,14 @@ PPL query:: os> source=accounts | sort age | fields account_number, age; fetched rows / total rows = 4/4 - +------------------+-------+ - | account_number | age | - |------------------+-------| - | 13 | 28 | - | 1 | 32 | - | 18 | 33 | - | 6 | 36 | - +------------------+-------+ + +----------------+-----+ + | account_number | age | + |----------------+-----| + | 13 | 28 | + | 1 | 32 | + | 18 | 33 | + | 6 | 36 | + +----------------+-----+ Example 2: Sort by one field return all the result @@ -51,14 +51,14 @@ PPL query:: os> source=accounts | sort age | fields account_number, age; fetched rows / total rows = 4/4 - +------------------+-------+ - | account_number | age | - |------------------+-------| - | 13 | 28 | - | 1 | 32 | - | 18 | 33 | - | 6 | 36 | - +------------------+-------+ + +----------------+-----+ + | account_number | age | + |----------------+-----| + | 13 | 28 | + | 1 | 32 | + | 18 | 33 | + | 6 | 36 | + +----------------+-----+ Example 3: Sort by one field in descending order @@ -70,14 +70,14 @@ PPL query:: os> source=accounts | sort - age | fields account_number, age; fetched rows / total rows = 4/4 - +------------------+-------+ - | account_number | age | - |------------------+-------| - | 6 | 36 | - | 18 | 33 | - | 1 | 32 | - | 13 | 28 | - +------------------+-------+ + +----------------+-----+ + | account_number | age | + |----------------+-----| + | 6 | 36 | + | 18 | 33 | + | 1 | 32 | + | 13 | 28 | + +----------------+-----+ Example 4: Sort by multiple field ============================= @@ -88,14 +88,14 @@ PPL query:: os> source=accounts | sort + gender, - age | fields account_number, gender, age; fetched rows / total rows = 4/4 - +------------------+----------+-------+ - | account_number | gender | age | - |------------------+----------+-------| - | 13 | F | 28 | - | 6 | M | 36 | - | 18 | M | 33 | - | 1 | M | 32 | - +------------------+----------+-------+ + +----------------+--------+-----+ + | account_number | gender | age | + |----------------+--------+-----| + | 13 | F | 28 | + | 6 | M | 36 | + | 18 | M | 33 | + | 1 | M | 32 | + +----------------+--------+-----+ Example 4: Sort by field include null value =========================================== @@ -106,11 +106,11 @@ PPL query:: os> source=accounts | sort employer | fields employer; fetched rows / total rows = 4/4 - +------------+ - | employer | - |------------| - | null | - | Netagy | - | Pyrami | - | Quility | - +------------+ + +----------+ + | employer | + |----------| + | null | + | Netagy | + | Pyrami | + | Quility | + +----------+ diff --git a/docs/user/ppl/cmd/stats.rst b/docs/user/ppl/cmd/stats.rst index 096d3eacfc..7d5da804ce 100644 --- a/docs/user/ppl/cmd/stats.rst +++ b/docs/user/ppl/cmd/stats.rst @@ -43,7 +43,7 @@ stats ... [by-clause] * Description: The by clause could be the fields and expressions like scalar functions and aggregation functions. Besides, the span clause can be used to split specific field into buckets in the same interval, the stats then does the aggregation by these span buckets. * Default: If no is specified, the stats command returns only one row, which is the aggregation over the entire result set. -* span-expression: optional. +* span-expression: optional, at most one. * Syntax: span(field_expr, interval_expr) * Description: The unit of the interval expression is the natural unit by default. If the field is a date and time type field, and the interval is in date/time units, you will need to specify the unit in the interval expression. For example, to split the field ``age`` into buckets by 10 years, it looks like ``span(age, 10)``. And here is another example of time span, the span to split a ``timestamp`` field into hourly intervals, it looks like ``span(timestamp, 1h)``. @@ -86,11 +86,11 @@ Example:: os> source=accounts | stats count(); fetched rows / total rows = 1/1 - +-----------+ - | count() | - |-----------| - | 4 | - +-----------+ + +---------+ + | count() | + |---------| + | 4 | + +---------+ SUM --- @@ -104,12 +104,12 @@ Example:: os> source=accounts | stats sum(age) by gender; fetched rows / total rows = 2/2 - +------------+----------+ - | sum(age) | gender | - |------------+----------| - | 28 | F | - | 101 | M | - +------------+----------+ + +----------+--------+ + | sum(age) | gender | + |----------+--------| + | 28 | F | + | 101 | M | + +----------+--------+ AVG --- @@ -123,12 +123,12 @@ Example:: os> source=accounts | stats avg(age) by gender; fetched rows / total rows = 2/2 - +--------------------+----------+ - | avg(age) | gender | - |--------------------+----------| - | 28.0 | F | - | 33.666666666666664 | M | - +--------------------+----------+ + +--------------------+--------+ + | avg(age) | gender | + |--------------------+--------| + | 28.0 | F | + | 33.666666666666664 | M | + +--------------------+--------+ MAX --- @@ -142,11 +142,11 @@ Example:: os> source=accounts | stats max(age); fetched rows / total rows = 1/1 - +------------+ - | max(age) | - |------------| - | 36 | - +------------+ + +----------+ + | max(age) | + |----------| + | 36 | + +----------+ MIN --- @@ -160,11 +160,11 @@ Example:: os> source=accounts | stats min(age); fetched rows / total rows = 1/1 - +------------+ - | min(age) | - |------------| - | 28 | - +------------+ + +----------+ + | min(age) | + |----------| + | 28 | + +----------+ VAR_SAMP -------- @@ -196,11 +196,11 @@ Example:: os> source=accounts | stats var_pop(age); fetched rows / total rows = 1/1 - +----------------+ - | var_pop(age) | - |----------------| - | 8.1875 | - +----------------+ + +--------------+ + | var_pop(age) | + |--------------| + | 8.1875 | + +--------------+ STDDEV_SAMP ----------- @@ -214,11 +214,11 @@ Example:: os> source=accounts | stats stddev_samp(age); fetched rows / total rows = 1/1 - +--------------------+ - | stddev_samp(age) | - |--------------------| - | 3.304037933599835 | - +--------------------+ + +-------------------+ + | stddev_samp(age) | + |-------------------| + | 3.304037933599835 | + +-------------------+ STDDEV_POP ---------- @@ -273,12 +273,12 @@ Example:: os> source=accounts | stats percentile(age, 90) by gender; fetched rows / total rows = 2/2 - +-----------------------+----------+ - | percentile(age, 90) | gender | - |-----------------------+----------| - | 28 | F | - | 36 | M | - +-----------------------+----------+ + +---------------------+--------+ + | percentile(age, 90) | gender | + |---------------------+--------| + | 28 | F | + | 36 | M | + +---------------------+--------+ Example 1: Calculate the count of events ======================================== @@ -289,11 +289,11 @@ PPL query:: os> source=accounts | stats count(); fetched rows / total rows = 1/1 - +-----------+ - | count() | - |-----------| - | 4 | - +-----------+ + +---------+ + | count() | + |---------| + | 4 | + +---------+ Example 2: Calculate the average of a field @@ -305,11 +305,11 @@ PPL query:: os> source=accounts | stats avg(age); fetched rows / total rows = 1/1 - +------------+ - | avg(age) | - |------------| - | 32.25 | - +------------+ + +----------+ + | avg(age) | + |----------| + | 32.25 | + +----------+ Example 3: Calculate the average of a field by group @@ -321,12 +321,12 @@ PPL query:: os> source=accounts | stats avg(age) by gender; fetched rows / total rows = 2/2 - +--------------------+----------+ - | avg(age) | gender | - |--------------------+----------| - | 28.0 | F | - | 33.666666666666664 | M | - +--------------------+----------+ + +--------------------+--------+ + | avg(age) | gender | + |--------------------+--------| + | 28.0 | F | + | 33.666666666666664 | M | + +--------------------+--------+ Example 4: Calculate the average, sum and count of a field by group @@ -338,12 +338,12 @@ PPL query:: os> source=accounts | stats avg(age), sum(age), count() by gender; fetched rows / total rows = 2/2 - +--------------------+------------+-----------+----------+ - | avg(age) | sum(age) | count() | gender | - |--------------------+------------+-----------+----------| - | 28.0 | 28 | 1 | F | - | 33.666666666666664 | 101 | 3 | M | - +--------------------+------------+-----------+----------+ + +--------------------+----------+---------+--------+ + | avg(age) | sum(age) | count() | gender | + |--------------------+----------+---------+--------| + | 28.0 | 28 | 1 | F | + | 33.666666666666664 | 101 | 3 | M | + +--------------------+----------+---------+--------+ Example 5: Calculate the maximum of a field =========================================== @@ -354,11 +354,11 @@ PPL query:: os> source=accounts | stats max(age); fetched rows / total rows = 1/1 - +------------+ - | max(age) | - |------------| - | 36 | - +------------+ + +----------+ + | max(age) | + |----------| + | 36 | + +----------+ Example 6: Calculate the maximum and minimum of a field by group ================================================================ @@ -369,12 +369,12 @@ PPL query:: os> source=accounts | stats max(age), min(age) by gender; fetched rows / total rows = 2/2 - +------------+------------+----------+ - | max(age) | min(age) | gender | - |------------+------------+----------| - | 28 | 28 | F | - | 36 | 32 | M | - +------------+------------+----------+ + +----------+----------+--------+ + | max(age) | min(age) | gender | + |----------+----------+--------| + | 28 | 28 | F | + | 36 | 32 | M | + +----------+----------+--------+ Example 7: Calculate the distinct count of a field ================================================== @@ -385,11 +385,11 @@ PPL query:: os> source=accounts | stats count(gender), distinct_count(gender); fetched rows / total rows = 1/1 - +-----------------+--------------------------+ - | count(gender) | distinct_count(gender) | - |-----------------+--------------------------| - | 4 | 2 | - +-----------------+--------------------------+ + +---------------+------------------------+ + | count(gender) | distinct_count(gender) | + |---------------+------------------------| + | 4 | 2 | + +---------------+------------------------+ Example 8: Calculate the count by a span ======================================== @@ -400,12 +400,12 @@ PPL query:: os> source=accounts | stats count(age) by span(age, 10) as age_span fetched rows / total rows = 2/2 - +--------------+------------+ - | count(age) | age_span | - |--------------+------------| - | 1 | 20 | - | 3 | 30 | - +--------------+------------+ + +------------+----------+ + | count(age) | age_span | + |------------+----------| + | 1 | 20 | + | 3 | 30 | + +------------+----------+ Example 9: Calculate the count by a gender and span =================================================== @@ -416,13 +416,27 @@ PPL query:: os> source=accounts | stats count() as cnt by span(age, 5) as age_span, gender fetched rows / total rows = 3/3 - +-------+------------+----------+ - | cnt | age_span | gender | - |-------+------------+----------| - | 1 | 25 | F | - | 2 | 30 | M | - | 1 | 35 | M | - +-------+------------+----------+ + +-----+----------+--------+ + | cnt | age_span | gender | + |-----+----------+--------| + | 1 | 25 | F | + | 2 | 30 | M | + | 1 | 35 | M | + +-----+----------+--------+ + +Span will always be the first grouping key whatever order you specify. + +PPL query:: + + os> source=accounts | stats count() as cnt by gender, span(age, 5) as age_span + fetched rows / total rows = 3/3 + +-----+----------+--------+ + | cnt | age_span | gender | + |-----+----------+--------| + | 1 | 25 | F | + | 2 | 30 | M | + | 1 | 35 | M | + +-----+----------+--------+ Example 10: Calculate the count and get email list by a gender and span ======================================================================= @@ -433,13 +447,13 @@ PPL query:: os> source=accounts | stats count() as cnt, take(email, 5) by span(age, 5) as age_span, gender fetched rows / total rows = 3/3 - +-------+--------------------------------------------+------------+----------+ - | cnt | take(email, 5) | age_span | gender | - |-------+--------------------------------------------+------------+----------| - | 1 | [] | 25 | F | - | 2 | [amberduke@pyrami.com,daleadams@boink.com] | 30 | M | - | 1 | [hattiebond@netagy.com] | 35 | M | - +-------+--------------------------------------------+------------+----------+ + +-----+--------------------------------------------+----------+--------+ + | cnt | take(email, 5) | age_span | gender | + |-----+--------------------------------------------+----------+--------| + | 1 | [] | 25 | F | + | 2 | [amberduke@pyrami.com,daleadams@boink.com] | 30 | M | + | 1 | [hattiebond@netagy.com] | 35 | M | + +-----+--------------------------------------------+----------+--------+ Example 11: Calculate the percentile of a field =============================================== @@ -450,11 +464,11 @@ PPL query:: os> source=accounts | stats percentile(age, 90); fetched rows / total rows = 1/1 - +-----------------------+ - | percentile(age, 90) | - |-----------------------| - | 36 | - +-----------------------+ + +---------------------+ + | percentile(age, 90) | + |---------------------| + | 36 | + +---------------------+ Example 12: Calculate the percentile of a field by group @@ -466,12 +480,12 @@ PPL query:: os> source=accounts | stats percentile(age, 90) by gender; fetched rows / total rows = 2/2 - +-----------------------+----------+ - | percentile(age, 90) | gender | - |-----------------------+----------| - | 28 | F | - | 36 | M | - +-----------------------+----------+ + +---------------------+--------+ + | percentile(age, 90) | gender | + |---------------------+--------| + | 28 | F | + | 36 | M | + +---------------------+--------+ Example 13: Calculate the percentile by a gender and span ========================================================= @@ -482,10 +496,10 @@ PPL query:: os> source=accounts | stats percentile(age, 90) as p90 by span(age, 10) as age_span, gender fetched rows / total rows = 2/2 - +-------+------------+----------+ - | p90 | age_span | gender | - |-------+------------+----------| - | 28 | 20 | F | - | 36 | 30 | M | - +-------+------------+----------+ + +-----+----------+--------+ + | p90 | age_span | gender | + |-----+----------+--------| + | 28 | 20 | F | + | 36 | 30 | M | + +-----+----------+--------+ diff --git a/docs/user/ppl/cmd/top.rst b/docs/user/ppl/cmd/top.rst index cbab675d09..6fa4d9cdb0 100644 --- a/docs/user/ppl/cmd/top.rst +++ b/docs/user/ppl/cmd/top.rst @@ -32,12 +32,12 @@ PPL query:: os> source=accounts | top gender; fetched rows / total rows = 2/2 - +----------+ - | gender | - |----------| - | M | - | F | - +----------+ + +--------+ + | gender | + |--------| + | M | + | F | + +--------+ Example 2: Find the most common values in a field =========================================== @@ -48,11 +48,11 @@ PPL query:: os> source=accounts | top 1 gender; fetched rows / total rows = 1/1 - +----------+ - | gender | - |----------| - | M | - +----------+ + +--------+ + | gender | + |--------| + | M | + +--------+ Example 2: Find the most common values organized by gender ==================================================== @@ -63,12 +63,12 @@ PPL query:: os> source=accounts | top 1 age by gender; fetched rows / total rows = 2/2 - +----------+-------+ - | gender | age | - |----------+-------| - | F | 28 | - | M | 32 | - +----------+-------+ + +--------+-----+ + | gender | age | + |--------+-----| + | F | 28 | + | M | 32 | + +--------+-----+ Limitation ========== diff --git a/docs/user/ppl/cmd/trendline.rst b/docs/user/ppl/cmd/trendline.rst new file mode 100644 index 0000000000..e6df0d7a2c --- /dev/null +++ b/docs/user/ppl/cmd/trendline.rst @@ -0,0 +1,90 @@ +============= +trendline +============= + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + + +Description +============ +| Using ``trendline`` command to calculate moving averages of fields. + +Syntax +============ +`TRENDLINE [sort <[+|-] sort-field>] SMA(number-of-datapoints, field) [AS alias] [SMA(number-of-datapoints, field) [AS alias]]...` + +* [+|-]: optional. The plus [+] stands for ascending order and NULL/MISSING first and a minus [-] stands for descending order and NULL/MISSING last. **Default:** ascending order and NULL/MISSING first. +* sort-field: mandatory when sorting is used. The field used to sort. +* number-of-datapoints: mandatory. The number of datapoints to calculate the moving average (must be greater than zero). +* field: mandatory. The name of the field the moving average should be calculated for. +* alias: optional. The name of the resulting column containing the moving average (defaults to the field name with "_trendline"). + +At the moment only the Simple Moving Average (SMA) type is supported. + +It is calculated like + + f[i]: The value of field 'f' in the i-th data-point + n: The number of data-points in the moving window (period) + t: The current time index + + SMA(t) = (1/n) * Σ(f[i]), where i = t-n+1 to t + +Example 1: Calculate the moving average on one field. +===================================================== + +The example shows how to calculate the moving average on one field. + +PPL query:: + + os> source=accounts | trendline sma(2, account_number) as an | fields an; + fetched rows / total rows = 4/4 + +------+ + | an | + |------| + | null | + | 3.5 | + | 9.5 | + | 15.5 | + +------+ + + +Example 2: Calculate the moving average on multiple fields. +=========================================================== + +The example shows how to calculate the moving average on multiple fields. + +PPL query:: + + os> source=accounts | trendline sma(2, account_number) as an sma(2, age) as age_trend | fields an, age_trend ; + fetched rows / total rows = 4/4 + +------+-----------+ + | an | age_trend | + |------+-----------| + | null | null | + | 3.5 | 34.0 | + | 9.5 | 32.0 | + | 15.5 | 30.5 | + +------+-----------+ + +Example 4: Calculate the moving average on one field without specifying an alias. +================================================================================= + +The example shows how to calculate the moving average on one field. + +PPL query:: + + os> source=accounts | trendline sma(2, account_number) | fields account_number_trendline; + fetched rows / total rows = 4/4 + +--------------------------+ + | account_number_trendline | + |--------------------------| + | null | + | 3.5 | + | 9.5 | + | 15.5 | + +--------------------------+ + diff --git a/docs/user/ppl/cmd/where.rst b/docs/user/ppl/cmd/where.rst index 4d8718d69f..115bffe7de 100644 --- a/docs/user/ppl/cmd/where.rst +++ b/docs/user/ppl/cmd/where.rst @@ -29,10 +29,10 @@ PPL query:: os> source=accounts | where account_number=1 or gender="F" | fields account_number, gender; fetched rows / total rows = 2/2 - +------------------+----------+ - | account_number | gender | - |------------------+----------| - | 1 | M | - | 13 | F | - +------------------+----------+ + +----------------+--------+ + | account_number | gender | + |----------------+--------| + | 1 | M | + | 13 | F | + +----------------+--------+ diff --git a/docs/user/ppl/functions/condition.rst b/docs/user/ppl/functions/condition.rst index fea76bedda..9ce130072e 100644 --- a/docs/user/ppl/functions/condition.rst +++ b/docs/user/ppl/functions/condition.rst @@ -24,14 +24,14 @@ Example:: os> source=accounts | eval result = isnull(employer) | fields result, employer, firstname fetched rows / total rows = 4/4 - +----------+------------+-------------+ - | result | employer | firstname | - |----------+------------+-------------| - | False | Pyrami | Amber | - | False | Netagy | Hattie | - | False | Quility | Nanette | - | True | null | Dale | - +----------+------------+-------------+ + +--------+----------+-----------+ + | result | employer | firstname | + |--------+----------+-----------| + | False | Pyrami | Amber | + | False | Netagy | Hattie | + | False | Quility | Nanette | + | True | null | Dale | + +--------+----------+-----------+ ISNOTNULL --------- @@ -49,11 +49,11 @@ Example:: os> source=accounts | where not isnotnull(employer) | fields account_number, employer fetched rows / total rows = 1/1 - +------------------+------------+ - | account_number | employer | - |------------------+------------| - | 18 | null | - +------------------+------------+ + +----------------+----------+ + | account_number | employer | + |----------------+----------| + | 18 | null | + +----------------+----------+ EXISTS ------ @@ -64,11 +64,11 @@ Example, the account 13 doesn't have email field:: os> source=accounts | where isnull(email) | fields account_number, email fetched rows / total rows = 1/1 - +------------------+---------+ - | account_number | email | - |------------------+---------| - | 13 | null | - +------------------+---------+ + +----------------+-------+ + | account_number | email | + |----------------+-------| + | 13 | null | + +----------------+-------+ IFNULL ------ @@ -86,14 +86,14 @@ Example:: os> source=accounts | eval result = ifnull(employer, 'default') | fields result, employer, firstname fetched rows / total rows = 4/4 - +----------+------------+-------------+ - | result | employer | firstname | - |----------+------------+-------------| - | Pyrami | Pyrami | Amber | - | Netagy | Netagy | Hattie | - | Quility | Quility | Nanette | - | default | null | Dale | - +----------+------------+-------------+ + +---------+----------+-----------+ + | result | employer | firstname | + |---------+----------+-----------| + | Pyrami | Pyrami | Amber | + | Netagy | Netagy | Hattie | + | Quility | Quility | Nanette | + | default | null | Dale | + +---------+----------+-----------+ NULLIF ------ @@ -101,7 +101,7 @@ NULLIF Description >>>>>>>>>>> -Usage: nullif(field1, field2) return null if two parameters are same, otherwiser return field1. +Usage: nullif(field1, field2) return null if two parameters are same, otherwise return field1. Argument type: all the supported data type, (NOTE : if two parameters has different type, if two parameters has different type, you will fail semantic check) @@ -111,14 +111,14 @@ Example:: os> source=accounts | eval result = nullif(employer, 'Pyrami') | fields result, employer, firstname fetched rows / total rows = 4/4 - +----------+------------+-------------+ - | result | employer | firstname | - |----------+------------+-------------| - | null | Pyrami | Amber | - | Netagy | Netagy | Hattie | - | Quility | Quility | Nanette | - | null | null | Dale | - +----------+------------+-------------+ + +---------+----------+-----------+ + | result | employer | firstname | + |---------+----------+-----------| + | null | Pyrami | Amber | + | Netagy | Netagy | Hattie | + | Quility | Quility | Nanette | + | null | null | Dale | + +---------+----------+-----------+ ISNULL @@ -137,14 +137,14 @@ Example:: os> source=accounts | eval result = isnull(employer) | fields result, employer, firstname fetched rows / total rows = 4/4 - +----------+------------+-------------+ - | result | employer | firstname | - |----------+------------+-------------| - | False | Pyrami | Amber | - | False | Netagy | Hattie | - | False | Quility | Nanette | - | True | null | Dale | - +----------+------------+-------------+ + +--------+----------+-----------+ + | result | employer | firstname | + |--------+----------+-----------| + | False | Pyrami | Amber | + | False | Netagy | Hattie | + | False | Quility | Nanette | + | True | null | Dale | + +--------+----------+-----------+ IF ------ @@ -152,7 +152,7 @@ IF Description >>>>>>>>>>> -Usage: if(condition, expr1, expr2) return expr1 if condition is true, otherwiser return expr2. +Usage: if(condition, expr1, expr2) return expr1 if condition is true, otherwise return expr2. Argument type: all the supported data type, (NOTE : if expr1 and expr2 are different type, you will fail semantic check @@ -162,22 +162,33 @@ Example:: os> source=accounts | eval result = if(true, firstname, lastname) | fields result, firstname, lastname fetched rows / total rows = 4/4 - +----------+-------------+------------+ - | result | firstname | lastname | - |----------+-------------+------------| - | Amber | Amber | Duke | - | Hattie | Hattie | Bond | - | Nanette | Nanette | Bates | - | Dale | Dale | Adams | - +----------+-------------+------------+ + +---------+-----------+----------+ + | result | firstname | lastname | + |---------+-----------+----------| + | Amber | Amber | Duke | + | Hattie | Hattie | Bond | + | Nanette | Nanette | Bates | + | Dale | Dale | Adams | + +---------+-----------+----------+ os> source=accounts | eval result = if(false, firstname, lastname) | fields result, firstname, lastname fetched rows / total rows = 4/4 - +----------+-------------+------------+ - | result | firstname | lastname | - |----------+-------------+------------| - | Duke | Amber | Duke | - | Bond | Hattie | Bond | - | Bates | Nanette | Bates | - | Adams | Dale | Adams | - +----------+-------------+------------+ + +--------+-----------+----------+ + | result | firstname | lastname | + |--------+-----------+----------| + | Duke | Amber | Duke | + | Bond | Hattie | Bond | + | Bates | Nanette | Bates | + | Adams | Dale | Adams | + +--------+-----------+----------+ + + os> source=accounts | eval is_vip = if(age > 30 AND isnotnull(employer), true, false) | fields is_vip, firstname, lastname + fetched rows / total rows = 4/4 + +--------+-----------+----------+ + | is_vip | firstname | lastname | + |--------+-----------+----------| + | True | Amber | Duke | + | True | Hattie | Bond | + | False | Nanette | Bates | + | False | Dale | Adams | + +--------+-----------+----------+ diff --git a/docs/user/ppl/functions/conversion.rst b/docs/user/ppl/functions/conversion.rst index a4a59a6cd1..31fb3e3cdf 100644 --- a/docs/user/ppl/functions/conversion.rst +++ b/docs/user/ppl/functions/conversion.rst @@ -38,21 +38,21 @@ Cast to string example:: os> source=people | eval `cbool` = CAST(true as string), `cint` = CAST(1 as string), `cdate` = CAST(CAST('2012-08-07' as date) as string) | fields `cbool`, `cint`, `cdate` fetched rows / total rows = 1/1 - +---------+--------+------------+ - | cbool | cint | cdate | - |---------+--------+------------| - | true | 1 | 2012-08-07 | - +---------+--------+------------+ + +-------+------+------------+ + | cbool | cint | cdate | + |-------+------+------------| + | true | 1 | 2012-08-07 | + +-------+------+------------+ Cast to number example:: os> source=people | eval `cbool` = CAST(true as int), `cstring` = CAST('1' as int) | fields `cbool`, `cstring` fetched rows / total rows = 1/1 - +---------+-----------+ - | cbool | cstring | - |---------+-----------| - | 1 | 1 | - +---------+-----------+ + +-------+---------+ + | cbool | cstring | + |-------+---------| + | 1 | 1 | + +-------+---------+ Cast to date example:: @@ -68,8 +68,8 @@ Cast function can be chained:: os> source=people | eval `cbool` = CAST(CAST(true as string) as boolean) | fields `cbool` fetched rows / total rows = 1/1 - +---------+ - | cbool | - |---------| - | True | - +---------+ + +-------+ + | cbool | + |-------| + | True | + +-------+ diff --git a/docs/user/ppl/functions/datetime.rst b/docs/user/ppl/functions/datetime.rst index 9e75e41136..c0d42297ac 100644 --- a/docs/user/ppl/functions/datetime.rst +++ b/docs/user/ppl/functions/datetime.rst @@ -35,11 +35,11 @@ Example:: os> source=people | eval `'2020-08-26' + 1h` = ADDDATE(DATE('2020-08-26'), INTERVAL 1 HOUR), `'2020-08-26' + 1` = ADDDATE(DATE('2020-08-26'), 1), `ts '2020-08-26 01:01:01' + 1` = ADDDATE(TIMESTAMP('2020-08-26 01:01:01'), 1) | fields `'2020-08-26' + 1h`, `'2020-08-26' + 1`, `ts '2020-08-26 01:01:01' + 1` fetched rows / total rows = 1/1 - +---------------------+--------------------+--------------------------------+ - | '2020-08-26' + 1h | '2020-08-26' + 1 | ts '2020-08-26 01:01:01' + 1 | - |---------------------+--------------------+--------------------------------| - | 2020-08-26 01:00:00 | 2020-08-27 | 2020-08-27 01:01:01 | - +---------------------+--------------------+--------------------------------+ + +---------------------+------------------+------------------------------+ + | '2020-08-26' + 1h | '2020-08-26' + 1 | ts '2020-08-26 01:01:01' + 1 | + |---------------------+------------------+------------------------------| + | 2020-08-26 01:00:00 | 2020-08-27 | 2020-08-27 01:01:01 | + +---------------------+------------------+------------------------------+ @@ -73,35 +73,35 @@ Example:: os> source=people | eval `'23:59:59' + 0` = ADDTIME(TIME('23:59:59'), DATE('2004-01-01')) | fields `'23:59:59' + 0` fetched rows / total rows = 1/1 - +------------------+ - | '23:59:59' + 0 | - |------------------| - | 23:59:59 | - +------------------+ + +----------------+ + | '23:59:59' + 0 | + |----------------| + | 23:59:59 | + +----------------+ os> source=people | eval `'2004-01-01' + '23:59:59'` = ADDTIME(DATE('2004-01-01'), TIME('23:59:59')) | fields `'2004-01-01' + '23:59:59'` fetched rows / total rows = 1/1 - +-----------------------------+ - | '2004-01-01' + '23:59:59' | - |-----------------------------| - | 2004-01-01 23:59:59 | - +-----------------------------+ - - os> source=people | eval `'10:20:30' + '00:05:42'` = ADDTIME(TIME('10:20:30'), TIME('00:05:42')) | fields `'10:20:30' + '00:05:42'` - fetched rows / total rows = 1/1 +---------------------------+ - | '10:20:30' + '00:05:42' | + | '2004-01-01' + '23:59:59' | |---------------------------| - | 10:26:12 | + | 2004-01-01 23:59:59 | +---------------------------+ + os> source=people | eval `'10:20:30' + '00:05:42'` = ADDTIME(TIME('10:20:30'), TIME('00:05:42')) | fields `'10:20:30' + '00:05:42'` + fetched rows / total rows = 1/1 + +-------------------------+ + | '10:20:30' + '00:05:42' | + |-------------------------| + | 10:26:12 | + +-------------------------+ + os> source=people | eval `'2007-02-28 10:20:30' + '20:40:50'` = ADDTIME(TIMESTAMP('2007-02-28 10:20:30'), TIMESTAMP('2002-03-04 20:40:50')) | fields `'2007-02-28 10:20:30' + '20:40:50'` fetched rows / total rows = 1/1 - +--------------------------------------+ - | '2007-02-28 10:20:30' + '20:40:50' | - |--------------------------------------| - | 2007-03-01 07:01:20 | - +--------------------------------------+ + +------------------------------------+ + | '2007-02-28 10:20:30' + '20:40:50' | + |------------------------------------| + | 2007-03-01 07:01:20 | + +------------------------------------+ CONVERT_TZ @@ -121,121 +121,121 @@ Example:: os> source=people | eval `convert_tz('2008-05-15 12:00:00','+00:00','+10:00')` = convert_tz('2008-05-15 12:00:00','+00:00','+10:00') | fields `convert_tz('2008-05-15 12:00:00','+00:00','+10:00')` fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-05-15 12:00:00','+00:00','+10:00') | - |-------------------------------------------------------| - | 2008-05-15 22:00:00 | - +-------------------------------------------------------+ + +-----------------------------------------------------+ + | convert_tz('2008-05-15 12:00:00','+00:00','+10:00') | + |-----------------------------------------------------| + | 2008-05-15 22:00:00 | + +-----------------------------------------------------+ The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as +15:00 in this example will return null. Example:: os> source=people | eval `convert_tz('2008-05-15 12:00:00','+00:00','+15:00')` = convert_tz('2008-05-15 12:00:00','+00:00','+15:00')| fields `convert_tz('2008-05-15 12:00:00','+00:00','+15:00')` fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-05-15 12:00:00','+00:00','+15:00') | - |-------------------------------------------------------| - | null | - +-------------------------------------------------------+ + +-----------------------------------------------------+ + | convert_tz('2008-05-15 12:00:00','+00:00','+15:00') | + |-----------------------------------------------------| + | null | + +-----------------------------------------------------+ Conversion from a positive timezone to a negative timezone that goes over date line. Example:: os> source=people | eval `convert_tz('2008-05-15 12:00:00','+03:30','-10:00')` = convert_tz('2008-05-15 12:00:00','+03:30','-10:00') | fields `convert_tz('2008-05-15 12:00:00','+03:30','-10:00')` fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-05-15 12:00:00','+03:30','-10:00') | - |-------------------------------------------------------| - | 2008-05-14 22:30:00 | - +-------------------------------------------------------+ + +-----------------------------------------------------+ + | convert_tz('2008-05-15 12:00:00','+03:30','-10:00') | + |-----------------------------------------------------| + | 2008-05-14 22:30:00 | + +-----------------------------------------------------+ Valid dates are required in convert_tz, invalid dates such as April 31st (not a date in the Gregorian calendar) will result in null. Example:: os> source=people | eval `convert_tz('2008-04-31 12:00:00','+03:30','-10:00')` = convert_tz('2008-04-31 12:00:00','+03:30','-10:00') | fields `convert_tz('2008-04-31 12:00:00','+03:30','-10:00')` fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-04-31 12:00:00','+03:30','-10:00') | - |-------------------------------------------------------| - | null | - +-------------------------------------------------------+ + +-----------------------------------------------------+ + | convert_tz('2008-04-31 12:00:00','+03:30','-10:00') | + |-----------------------------------------------------| + | null | + +-----------------------------------------------------+ Valid dates are required in convert_tz, invalid dates such as February 30th (not a date in the Gregorian calendar) will result in null. Example:: os> source=people | eval `convert_tz('2008-02-30 12:00:00','+03:30','-10:00')` = convert_tz('2008-02-30 12:00:00','+03:30','-10:00') | fields `convert_tz('2008-02-30 12:00:00','+03:30','-10:00')` fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-02-30 12:00:00','+03:30','-10:00') | - |-------------------------------------------------------| - | null | - +-------------------------------------------------------+ + +-----------------------------------------------------+ + | convert_tz('2008-02-30 12:00:00','+03:30','-10:00') | + |-----------------------------------------------------| + | null | + +-----------------------------------------------------+ February 29th 2008 is a valid date because it is a leap year. Example:: os> source=people | eval `convert_tz('2008-02-29 12:00:00','+03:30','-10:00')` = convert_tz('2008-02-29 12:00:00','+03:30','-10:00') | fields `convert_tz('2008-02-29 12:00:00','+03:30','-10:00')` fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-02-29 12:00:00','+03:30','-10:00') | - |-------------------------------------------------------| - | 2008-02-28 22:30:00 | - +-------------------------------------------------------+ + +-----------------------------------------------------+ + | convert_tz('2008-02-29 12:00:00','+03:30','-10:00') | + |-----------------------------------------------------| + | 2008-02-28 22:30:00 | + +-----------------------------------------------------+ Valid dates are required in convert_tz, invalid dates such as February 29th 2007 (2007 is not a leap year) will result in null. Example:: os> source=people | eval `convert_tz('2007-02-29 12:00:00','+03:30','-10:00')` = convert_tz('2007-02-29 12:00:00','+03:30','-10:00') | fields `convert_tz('2007-02-29 12:00:00','+03:30','-10:00')` fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2007-02-29 12:00:00','+03:30','-10:00') | - |-------------------------------------------------------| - | null | - +-------------------------------------------------------+ + +-----------------------------------------------------+ + | convert_tz('2007-02-29 12:00:00','+03:30','-10:00') | + |-----------------------------------------------------| + | null | + +-----------------------------------------------------+ The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as +14:01 in this example will return null. Example:: os> source=people | eval `convert_tz('2008-02-01 12:00:00','+14:01','+00:00')` = convert_tz('2008-02-01 12:00:00','+14:01','+00:00') | fields `convert_tz('2008-02-01 12:00:00','+14:01','+00:00')` fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-02-01 12:00:00','+14:01','+00:00') | - |-------------------------------------------------------| - | null | - +-------------------------------------------------------+ + +-----------------------------------------------------+ + | convert_tz('2008-02-01 12:00:00','+14:01','+00:00') | + |-----------------------------------------------------| + | null | + +-----------------------------------------------------+ The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as +14:00 in this example will return a correctly converted date time object. Example:: os> source=people | eval `convert_tz('2008-02-01 12:00:00','+14:00','+00:00')` = convert_tz('2008-02-01 12:00:00','+14:00','+00:00') | fields `convert_tz('2008-02-01 12:00:00','+14:00','+00:00')` fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-02-01 12:00:00','+14:00','+00:00') | - |-------------------------------------------------------| - | 2008-01-31 22:00:00 | - +-------------------------------------------------------+ + +-----------------------------------------------------+ + | convert_tz('2008-02-01 12:00:00','+14:00','+00:00') | + |-----------------------------------------------------| + | 2008-01-31 22:00:00 | + +-----------------------------------------------------+ The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range, such as -14:00 will result in null Example:: os> source=people | eval `convert_tz('2008-02-01 12:00:00','-14:00','+00:00')` = convert_tz('2008-02-01 12:00:00','-14:00','+00:00') | fields `convert_tz('2008-02-01 12:00:00','-14:00','+00:00')` fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-02-01 12:00:00','-14:00','+00:00') | - |-------------------------------------------------------| - | null | - +-------------------------------------------------------+ + +-----------------------------------------------------+ + | convert_tz('2008-02-01 12:00:00','-14:00','+00:00') | + |-----------------------------------------------------| + | null | + +-----------------------------------------------------+ The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. This timezone is within range so it is valid and will convert the time. Example:: os> source=people | eval `convert_tz('2008-02-01 12:00:00','-13:59','+00:00')` = convert_tz('2008-02-01 12:00:00','-13:59','+00:00') | fields `convert_tz('2008-02-01 12:00:00','-13:59','+00:00')` fetched rows / total rows = 1/1 - +-------------------------------------------------------+ - | convert_tz('2008-02-01 12:00:00','-13:59','+00:00') | - |-------------------------------------------------------| - | 2008-02-02 01:59:00 | - +-------------------------------------------------------+ + +-----------------------------------------------------+ + | convert_tz('2008-02-01 12:00:00','-13:59','+00:00') | + |-----------------------------------------------------| + | 2008-02-02 01:59:00 | + +-----------------------------------------------------+ CURDATE @@ -255,11 +255,11 @@ Example:: > source=people | eval `CURDATE()` = CURDATE() | fields `CURDATE()` fetched rows / total rows = 1/1 - +-------------+ - | CURDATE() | - |-------------| - | 2022-08-02 | - +-------------+ + +------------+ + | CURDATE() | + |------------| + | 2022-08-02 | + +------------+ CURRENT_DATE @@ -336,11 +336,11 @@ Example:: > source=people | eval `value_1` = CURTIME(), `value_2` = CURTIME() | fields `value_1`, `value_2` fetched rows / total rows = 1/1 - +-----------+-----------+ - | value_1 | value_2 | - |-----------+-----------| - | 15:39:05 | 15:39:05 | - +-----------+-----------+ + +----------+----------+ + | value_1 | value_2 | + |----------+----------| + | 15:39:05 | 15:39:05 | + +----------+----------+ DATE @@ -359,35 +359,35 @@ Example:: os> source=people | eval `DATE('2020-08-26')` = DATE('2020-08-26') | fields `DATE('2020-08-26')` fetched rows / total rows = 1/1 - +----------------------+ - | DATE('2020-08-26') | - |----------------------| - | 2020-08-26 | - +----------------------+ + +--------------------+ + | DATE('2020-08-26') | + |--------------------| + | 2020-08-26 | + +--------------------+ os> source=people | eval `DATE(TIMESTAMP('2020-08-26 13:49:00'))` = DATE(TIMESTAMP('2020-08-26 13:49:00')) | fields `DATE(TIMESTAMP('2020-08-26 13:49:00'))` fetched rows / total rows = 1/1 - +------------------------------------------+ - | DATE(TIMESTAMP('2020-08-26 13:49:00')) | - |------------------------------------------| - | 2020-08-26 | - +------------------------------------------+ + +----------------------------------------+ + | DATE(TIMESTAMP('2020-08-26 13:49:00')) | + |----------------------------------------| + | 2020-08-26 | + +----------------------------------------+ os> source=people | eval `DATE('2020-08-26 13:49')` = DATE('2020-08-26 13:49') | fields `DATE('2020-08-26 13:49')` fetched rows / total rows = 1/1 - +----------------------------+ - | DATE('2020-08-26 13:49') | - |----------------------------| - | 2020-08-26 | - +----------------------------+ + +--------------------------+ + | DATE('2020-08-26 13:49') | + |--------------------------| + | 2020-08-26 | + +--------------------------+ os> source=people | eval `DATE('2020-08-26 13:49')` = DATE('2020-08-26 13:49') | fields `DATE('2020-08-26 13:49')` fetched rows / total rows = 1/1 - +----------------------------+ - | DATE('2020-08-26 13:49') | - |----------------------------| - | 2020-08-26 | - +----------------------------+ + +--------------------------+ + | DATE('2020-08-26 13:49') | + |--------------------------| + | 2020-08-26 | + +--------------------------+ DATE_ADD @@ -410,11 +410,11 @@ Example:: os> source=people | eval `'2020-08-26' + 1h` = DATE_ADD(DATE('2020-08-26'), INTERVAL 1 HOUR), `ts '2020-08-26 01:01:01' + 1d` = DATE_ADD(TIMESTAMP('2020-08-26 01:01:01'), INTERVAL 1 DAY) | fields `'2020-08-26' + 1h`, `ts '2020-08-26 01:01:01' + 1d` fetched rows / total rows = 1/1 - +---------------------+---------------------------------+ - | '2020-08-26' + 1h | ts '2020-08-26 01:01:01' + 1d | - |---------------------+---------------------------------| - | 2020-08-26 01:00:00 | 2020-08-27 01:01:01 | - +---------------------+---------------------------------+ + +---------------------+-------------------------------+ + | '2020-08-26' + 1h | ts '2020-08-26 01:01:01' + 1d | + |---------------------+-------------------------------| + | 2020-08-26 01:00:00 | 2020-08-27 01:01:01 | + +---------------------+-------------------------------+ DATE_FORMAT @@ -509,11 +509,11 @@ Example:: os> source=people | eval `DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f')` = DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f'), `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r')` = DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r') | fields `DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f')`, `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r')` fetched rows / total rows = 1/1 - +------------------------------------------------------+-----------------------------------------------------------------------+ - | DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f') | DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r') | - |------------------------------------------------------+-----------------------------------------------------------------------| - | 13:14:15.012345 | 1998-Jan-31st 01:14:15 PM | - +------------------------------------------------------+-----------------------------------------------------------------------+ + +----------------------------------------------------+---------------------------------------------------------------------+ + | DATE_FORMAT('1998-01-31 13:14:15.012345', '%T.%f') | DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), '%Y-%b-%D %r') | + |----------------------------------------------------+---------------------------------------------------------------------| + | 13:14:15.012345 | 1998-Jan-31st 01:14:15 PM | + +----------------------------------------------------+---------------------------------------------------------------------+ DATETIME @@ -538,11 +538,11 @@ Example:: os> source=people | eval `DATETIME('2004-02-28 23:00:00-10:00', '+10:00')` = DATETIME('2004-02-28 23:00:00-10:00', '+10:00') | fields `DATETIME('2004-02-28 23:00:00-10:00', '+10:00')` fetched rows / total rows = 1/1 - +---------------------------------------------------+ - | DATETIME('2004-02-28 23:00:00-10:00', '+10:00') | - |---------------------------------------------------| - | 2004-02-29 19:00:00 | - +---------------------------------------------------+ + +-------------------------------------------------+ + | DATETIME('2004-02-28 23:00:00-10:00', '+10:00') | + |-------------------------------------------------| + | 2004-02-29 19:00:00 | + +-------------------------------------------------+ The valid timezone range for convert_tz is (-13:59, +14:00) inclusive. Timezones outside of the range will result in null. @@ -550,11 +550,11 @@ Example:: os> source=people | eval `DATETIME('2008-01-01 02:00:00', '-14:00')` = DATETIME('2008-01-01 02:00:00', '-14:00') | fields `DATETIME('2008-01-01 02:00:00', '-14:00')` fetched rows / total rows = 1/1 - +---------------------------------------------+ - | DATETIME('2008-01-01 02:00:00', '-14:00') | - |---------------------------------------------| - | null | - +---------------------------------------------+ + +-------------------------------------------+ + | DATETIME('2008-01-01 02:00:00', '-14:00') | + |-------------------------------------------| + | null | + +-------------------------------------------+ DATE_SUB @@ -577,11 +577,11 @@ Example:: os> source=people | eval `'2008-01-02' - 31d` = DATE_SUB(DATE('2008-01-02'), INTERVAL 31 DAY), `ts '2020-08-26 01:01:01' + 1h` = DATE_SUB(TIMESTAMP('2020-08-26 01:01:01'), INTERVAL 1 HOUR) | fields `'2008-01-02' - 31d`, `ts '2020-08-26 01:01:01' + 1h` fetched rows / total rows = 1/1 - +----------------------+---------------------------------+ - | '2008-01-02' - 31d | ts '2020-08-26 01:01:01' + 1h | - |----------------------+---------------------------------| - | 2007-12-02 00:00:00 | 2020-08-26 00:01:01 | - +----------------------+---------------------------------+ + +---------------------+-------------------------------+ + | '2008-01-02' - 31d | ts '2020-08-26 01:01:01' + 1h | + |---------------------+-------------------------------| + | 2007-12-02 00:00:00 | 2020-08-26 00:01:01 | + +---------------------+-------------------------------+ DATEDIFF @@ -597,11 +597,11 @@ Example:: os> source=people | eval `'2000-01-02' - '2000-01-01'` = DATEDIFF(TIMESTAMP('2000-01-02 00:00:00'), TIMESTAMP('2000-01-01 23:59:59')), `'2001-02-01' - '2004-01-01'` = DATEDIFF(DATE('2001-02-01'), TIMESTAMP('2004-01-01 00:00:00')), `today - today` = DATEDIFF(TIME('23:59:59'), TIME('00:00:00')) | fields `'2000-01-02' - '2000-01-01'`, `'2001-02-01' - '2004-01-01'`, `today - today` fetched rows / total rows = 1/1 - +-------------------------------+-------------------------------+-----------------+ - | '2000-01-02' - '2000-01-01' | '2001-02-01' - '2004-01-01' | today - today | - |-------------------------------+-------------------------------+-----------------| - | 1 | -1064 | 0 | - +-------------------------------+-------------------------------+-----------------+ + +-----------------------------+-----------------------------+---------------+ + | '2000-01-02' - '2000-01-01' | '2001-02-01' - '2004-01-01' | today - today | + |-----------------------------+-----------------------------+---------------| + | 1 | -1064 | 0 | + +-----------------------------+-----------------------------+---------------+ DAY @@ -622,11 +622,11 @@ Example:: os> source=people | eval `DAY(DATE('2020-08-26'))` = DAY(DATE('2020-08-26')) | fields `DAY(DATE('2020-08-26'))` fetched rows / total rows = 1/1 - +---------------------------+ - | DAY(DATE('2020-08-26')) | - |---------------------------| - | 26 | - +---------------------------+ + +-------------------------+ + | DAY(DATE('2020-08-26')) | + |-------------------------| + | 26 | + +-------------------------+ DAYNAME @@ -645,11 +645,11 @@ Example:: os> source=people | eval `DAYNAME(DATE('2020-08-26'))` = DAYNAME(DATE('2020-08-26')) | fields `DAYNAME(DATE('2020-08-26'))` fetched rows / total rows = 1/1 - +-------------------------------+ - | DAYNAME(DATE('2020-08-26')) | - |-------------------------------| - | Wednesday | - +-------------------------------+ + +-----------------------------+ + | DAYNAME(DATE('2020-08-26')) | + |-----------------------------| + | Wednesday | + +-----------------------------+ DAYOFMONTH @@ -670,11 +670,11 @@ Example:: os> source=people | eval `DAYOFMONTH(DATE('2020-08-26'))` = DAYOFMONTH(DATE('2020-08-26')) | fields `DAYOFMONTH(DATE('2020-08-26'))` fetched rows / total rows = 1/1 - +----------------------------------+ - | DAYOFMONTH(DATE('2020-08-26')) | - |----------------------------------| - | 26 | - +----------------------------------+ + +--------------------------------+ + | DAYOFMONTH(DATE('2020-08-26')) | + |--------------------------------| + | 26 | + +--------------------------------+ DAY_OF_MONTH @@ -695,11 +695,11 @@ Example:: os> source=people | eval `DAY_OF_MONTH(DATE('2020-08-26'))` = DAY_OF_MONTH(DATE('2020-08-26')) | fields `DAY_OF_MONTH(DATE('2020-08-26'))` fetched rows / total rows = 1/1 - +------------------------------------+ - | DAY_OF_MONTH(DATE('2020-08-26')) | - |------------------------------------| - | 26 | - +------------------------------------+ + +----------------------------------+ + | DAY_OF_MONTH(DATE('2020-08-26')) | + |----------------------------------| + | 26 | + +----------------------------------+ DAYOFWEEK @@ -720,11 +720,11 @@ Example:: os> source=people | eval `DAYOFWEEK(DATE('2020-08-26'))` = DAYOFWEEK(DATE('2020-08-26')) | fields `DAYOFWEEK(DATE('2020-08-26'))` fetched rows / total rows = 1/1 - +---------------------------------+ - | DAYOFWEEK(DATE('2020-08-26')) | - |---------------------------------| - | 4 | - +---------------------------------+ + +-------------------------------+ + | DAYOFWEEK(DATE('2020-08-26')) | + |-------------------------------| + | 4 | + +-------------------------------+ DAY_OF_WEEK @@ -745,11 +745,11 @@ Example:: os> source=people | eval `DAY_OF_WEEK(DATE('2020-08-26'))` = DAY_OF_WEEK(DATE('2020-08-26')) | fields `DAY_OF_WEEK(DATE('2020-08-26'))` fetched rows / total rows = 1/1 - +-----------------------------------+ - | DAY_OF_WEEK(DATE('2020-08-26')) | - |-----------------------------------| - | 4 | - +-----------------------------------+ + +---------------------------------+ + | DAY_OF_WEEK(DATE('2020-08-26')) | + |---------------------------------| + | 4 | + +---------------------------------+ DAYOFYEAR @@ -770,11 +770,11 @@ Example:: os> source=people | eval `DAYOFYEAR(DATE('2020-08-26'))` = DAYOFYEAR(DATE('2020-08-26')) | fields `DAYOFYEAR(DATE('2020-08-26'))` fetched rows / total rows = 1/1 - +---------------------------------+ - | DAYOFYEAR(DATE('2020-08-26')) | - |---------------------------------| - | 239 | - +---------------------------------+ + +-------------------------------+ + | DAYOFYEAR(DATE('2020-08-26')) | + |-------------------------------| + | 239 | + +-------------------------------+ DAY_OF_YEAR @@ -795,11 +795,11 @@ Example:: os> source=people | eval `DAY_OF_YEAR(DATE('2020-08-26'))` = DAY_OF_YEAR(DATE('2020-08-26')) | fields `DAY_OF_YEAR(DATE('2020-08-26'))` fetched rows / total rows = 1/1 - +-----------------------------------+ - | DAY_OF_YEAR(DATE('2020-08-26')) | - |-----------------------------------| - | 239 | - +-----------------------------------+ + +---------------------------------+ + | DAY_OF_YEAR(DATE('2020-08-26')) | + |---------------------------------| + | 239 | + +---------------------------------+ EXTRACT @@ -866,11 +866,11 @@ Example:: os> source=people | eval `extract(YEAR_MONTH FROM "2023-02-07 10:11:12")` = extract(YEAR_MONTH FROM "2023-02-07 10:11:12") | fields `extract(YEAR_MONTH FROM "2023-02-07 10:11:12")` fetched rows / total rows = 1/1 - +--------------------------------------------------+ - | extract(YEAR_MONTH FROM "2023-02-07 10:11:12") | - |--------------------------------------------------| - | 202302 | - +--------------------------------------------------+ + +------------------------------------------------+ + | extract(YEAR_MONTH FROM "2023-02-07 10:11:12") | + |------------------------------------------------| + | 202302 | + +------------------------------------------------+ FROM_DAYS @@ -889,11 +889,11 @@ Example:: os> source=people | eval `FROM_DAYS(733687)` = FROM_DAYS(733687) | fields `FROM_DAYS(733687)` fetched rows / total rows = 1/1 - +---------------------+ - | FROM_DAYS(733687) | - |---------------------| - | 2008-10-07 | - +---------------------+ + +-------------------+ + | FROM_DAYS(733687) | + |-------------------| + | 2008-10-07 | + +-------------------+ FROM_UNIXTIME @@ -917,19 +917,19 @@ Examples:: os> source=people | eval `FROM_UNIXTIME(1220249547)` = FROM_UNIXTIME(1220249547) | fields `FROM_UNIXTIME(1220249547)` fetched rows / total rows = 1/1 - +-----------------------------+ - | FROM_UNIXTIME(1220249547) | - |-----------------------------| - | 2008-09-01 06:12:27 | - +-----------------------------+ + +---------------------------+ + | FROM_UNIXTIME(1220249547) | + |---------------------------| + | 2008-09-01 06:12:27 | + +---------------------------+ os> source=people | eval `FROM_UNIXTIME(1220249547, '%T')` = FROM_UNIXTIME(1220249547, '%T') | fields `FROM_UNIXTIME(1220249547, '%T')` fetched rows / total rows = 1/1 - +-----------------------------------+ - | FROM_UNIXTIME(1220249547, '%T') | - |-----------------------------------| - | 06:12:27 | - +-----------------------------------+ + +---------------------------------+ + | FROM_UNIXTIME(1220249547, '%T') | + |---------------------------------| + | 06:12:27 | + +---------------------------------+ GET_FORMAT @@ -947,11 +947,11 @@ Examples:: os> source=people | eval `GET_FORMAT(DATE, 'USA')` = GET_FORMAT(DATE, 'USA') | fields `GET_FORMAT(DATE, 'USA')` fetched rows / total rows = 1/1 - +---------------------------+ - | GET_FORMAT(DATE, 'USA') | - |---------------------------| - | %m.%d.%Y | - +---------------------------+ + +-------------------------+ + | GET_FORMAT(DATE, 'USA') | + |-------------------------| + | %m.%d.%Y | + +-------------------------+ HOUR @@ -972,11 +972,11 @@ Example:: os> source=people | eval `HOUR(TIME('01:02:03'))` = HOUR(TIME('01:02:03')) | fields `HOUR(TIME('01:02:03'))` fetched rows / total rows = 1/1 - +--------------------------+ - | HOUR(TIME('01:02:03')) | - |--------------------------| - | 1 | - +--------------------------+ + +------------------------+ + | HOUR(TIME('01:02:03')) | + |------------------------| + | 1 | + +------------------------+ HOUR_OF_DAY @@ -997,11 +997,11 @@ Example:: os> source=people | eval `HOUR_OF_DAY(TIME('01:02:03'))` = HOUR_OF_DAY(TIME('01:02:03')) | fields `HOUR_OF_DAY(TIME('01:02:03'))` fetched rows / total rows = 1/1 - +---------------------------------+ - | HOUR_OF_DAY(TIME('01:02:03')) | - |---------------------------------| - | 1 | - +---------------------------------+ + +-------------------------------+ + | HOUR_OF_DAY(TIME('01:02:03')) | + |-------------------------------| + | 1 | + +-------------------------------+ LAST_DAY @@ -1017,11 +1017,11 @@ Example:: os> source=people | eval `last_day('2023-02-06')` = last_day('2023-02-06') | fields `last_day('2023-02-06')` fetched rows / total rows = 1/1 - +--------------------------+ - | last_day('2023-02-06') | - |--------------------------| - | 2023-02-28 | - +--------------------------+ + +------------------------+ + | last_day('2023-02-06') | + |------------------------| + | 2023-02-28 | + +------------------------+ LOCALTIMESTAMP @@ -1089,11 +1089,11 @@ Example:: os> source=people | eval `MAKEDATE(1945, 5.9)` = MAKEDATE(1945, 5.9), `MAKEDATE(1984, 1984)` = MAKEDATE(1984, 1984) | fields `MAKEDATE(1945, 5.9)`, `MAKEDATE(1984, 1984)` fetched rows / total rows = 1/1 - +-----------------------+------------------------+ - | MAKEDATE(1945, 5.9) | MAKEDATE(1984, 1984) | - |-----------------------+------------------------| - | 1945-01-06 | 1989-06-06 | - +-----------------------+------------------------+ + +---------------------+----------------------+ + | MAKEDATE(1945, 5.9) | MAKEDATE(1984, 1984) | + |---------------------+----------------------| + | 1945-01-06 | 1989-06-06 | + +---------------------+----------------------+ MAKETIME @@ -1121,11 +1121,11 @@ Example:: os> source=people | eval `MAKETIME(20, 30, 40)` = MAKETIME(20, 30, 40), `MAKETIME(20.2, 49.5, 42.100502)` = MAKETIME(20.2, 49.5, 42.100502) | fields `MAKETIME(20, 30, 40)`, `MAKETIME(20.2, 49.5, 42.100502)` fetched rows / total rows = 1/1 - +------------------------+-----------------------------------+ - | MAKETIME(20, 30, 40) | MAKETIME(20.2, 49.5, 42.100502) | - |------------------------+-----------------------------------| - | 20:30:40 | 20:50:42.100502 | - +------------------------+-----------------------------------+ + +----------------------+---------------------------------+ + | MAKETIME(20, 30, 40) | MAKETIME(20.2, 49.5, 42.100502) | + |----------------------+---------------------------------| + | 20:30:40 | 20:50:42.100502 | + +----------------------+---------------------------------+ MICROSECOND @@ -1144,11 +1144,11 @@ Example:: os> source=people | eval `MICROSECOND(TIME('01:02:03.123456'))` = MICROSECOND(TIME('01:02:03.123456')) | fields `MICROSECOND(TIME('01:02:03.123456'))` fetched rows / total rows = 1/1 - +----------------------------------------+ - | MICROSECOND(TIME('01:02:03.123456')) | - |----------------------------------------| - | 123456 | - +----------------------------------------+ + +--------------------------------------+ + | MICROSECOND(TIME('01:02:03.123456')) | + |--------------------------------------| + | 123456 | + +--------------------------------------+ MINUTE @@ -1169,11 +1169,11 @@ Example:: os> source=people | eval `MINUTE(TIME('01:02:03'))` = MINUTE(TIME('01:02:03')) | fields `MINUTE(TIME('01:02:03'))` fetched rows / total rows = 1/1 - +----------------------------+ - | MINUTE(TIME('01:02:03')) | - |----------------------------| - | 2 | - +----------------------------+ + +--------------------------+ + | MINUTE(TIME('01:02:03')) | + |--------------------------| + | 2 | + +--------------------------+ MINUTE_OF_DAY @@ -1192,11 +1192,11 @@ Example:: os> source=people | eval `MINUTE_OF_DAY(TIME('01:02:03'))` = MINUTE_OF_DAY(TIME('01:02:03')) | fields `MINUTE_OF_DAY(TIME('01:02:03'))` fetched rows / total rows = 1/1 - +-----------------------------------+ - | MINUTE_OF_DAY(TIME('01:02:03')) | - |-----------------------------------| - | 62 | - +-----------------------------------+ + +---------------------------------+ + | MINUTE_OF_DAY(TIME('01:02:03')) | + |---------------------------------| + | 62 | + +---------------------------------+ MINUTE_OF_HOUR @@ -1217,11 +1217,11 @@ Example:: os> source=people | eval `MINUTE_OF_HOUR(TIME('01:02:03'))` = MINUTE_OF_HOUR(TIME('01:02:03')) | fields `MINUTE_OF_HOUR(TIME('01:02:03'))` fetched rows / total rows = 1/1 - +------------------------------------+ - | MINUTE_OF_HOUR(TIME('01:02:03')) | - |------------------------------------| - | 2 | - +------------------------------------+ + +----------------------------------+ + | MINUTE_OF_HOUR(TIME('01:02:03')) | + |----------------------------------| + | 2 | + +----------------------------------+ MONTH @@ -1242,11 +1242,11 @@ Example:: os> source=people | eval `MONTH(DATE('2020-08-26'))` = MONTH(DATE('2020-08-26')) | fields `MONTH(DATE('2020-08-26'))` fetched rows / total rows = 1/1 - +-----------------------------+ - | MONTH(DATE('2020-08-26')) | - |-----------------------------| - | 8 | - +-----------------------------+ + +---------------------------+ + | MONTH(DATE('2020-08-26')) | + |---------------------------| + | 8 | + +---------------------------+ MONTH_OF_YEAR @@ -1267,11 +1267,11 @@ Example:: os> source=people | eval `MONTH_OF_YEAR(DATE('2020-08-26'))` = MONTH_OF_YEAR(DATE('2020-08-26')) | fields `MONTH_OF_YEAR(DATE('2020-08-26'))` fetched rows / total rows = 1/1 - +-------------------------------------+ - | MONTH_OF_YEAR(DATE('2020-08-26')) | - |-------------------------------------| - | 8 | - +-------------------------------------+ + +-----------------------------------+ + | MONTH_OF_YEAR(DATE('2020-08-26')) | + |-----------------------------------| + | 8 | + +-----------------------------------+ MONTHNAME @@ -1290,11 +1290,11 @@ Example:: os> source=people | eval `MONTHNAME(DATE('2020-08-26'))` = MONTHNAME(DATE('2020-08-26')) | fields `MONTHNAME(DATE('2020-08-26'))` fetched rows / total rows = 1/1 - +---------------------------------+ - | MONTHNAME(DATE('2020-08-26')) | - |---------------------------------| - | August | - +---------------------------------+ + +-------------------------------+ + | MONTHNAME(DATE('2020-08-26')) | + |-------------------------------| + | August | + +-------------------------------+ NOW @@ -1337,11 +1337,11 @@ Example:: os> source=people | eval `PERIOD_ADD(200801, 2)` = PERIOD_ADD(200801, 2), `PERIOD_ADD(200801, -12)` = PERIOD_ADD(200801, -12) | fields `PERIOD_ADD(200801, 2)`, `PERIOD_ADD(200801, -12)` fetched rows / total rows = 1/1 - +-------------------------+---------------------------+ - | PERIOD_ADD(200801, 2) | PERIOD_ADD(200801, -12) | - |-------------------------+---------------------------| - | 200803 | 200701 | - +-------------------------+---------------------------+ + +-----------------------+-------------------------+ + | PERIOD_ADD(200801, 2) | PERIOD_ADD(200801, -12) | + |-----------------------+-------------------------| + | 200803 | 200701 | + +-----------------------+-------------------------+ PERIOD_DIFF @@ -1360,11 +1360,11 @@ Example:: os> source=people | eval `PERIOD_DIFF(200802, 200703)` = PERIOD_DIFF(200802, 200703), `PERIOD_DIFF(200802, 201003)` = PERIOD_DIFF(200802, 201003) | fields `PERIOD_DIFF(200802, 200703)`, `PERIOD_DIFF(200802, 201003)` fetched rows / total rows = 1/1 - +-------------------------------+-------------------------------+ - | PERIOD_DIFF(200802, 200703) | PERIOD_DIFF(200802, 201003) | - |-------------------------------+-------------------------------| - | 11 | -25 | - +-------------------------------+-------------------------------+ + +-----------------------------+-----------------------------+ + | PERIOD_DIFF(200802, 200703) | PERIOD_DIFF(200802, 201003) | + |-----------------------------+-----------------------------| + | 11 | -25 | + +-----------------------------+-----------------------------+ QUARTER @@ -1383,11 +1383,11 @@ Example:: os> source=people | eval `QUARTER(DATE('2020-08-26'))` = QUARTER(DATE('2020-08-26')) | fields `QUARTER(DATE('2020-08-26'))` fetched rows / total rows = 1/1 - +-------------------------------+ - | QUARTER(DATE('2020-08-26')) | - |-------------------------------| - | 3 | - +-------------------------------+ + +-----------------------------+ + | QUARTER(DATE('2020-08-26')) | + |-----------------------------| + | 3 | + +-----------------------------+ SEC_TO_TIME @@ -1409,11 +1409,11 @@ Example:: os> source=people | eval `SEC_TO_TIME(3601)` = SEC_TO_TIME(3601) | eval `SEC_TO_TIME(1234.123)` = SEC_TO_TIME(1234.123) | fields `SEC_TO_TIME(3601)`, `SEC_TO_TIME(1234.123)` fetched rows / total rows = 1/1 - +---------------------+-------------------------+ - | SEC_TO_TIME(3601) | SEC_TO_TIME(1234.123) | - |---------------------+-------------------------| - | 01:00:01 | 00:20:34.123 | - +---------------------+-------------------------+ + +-------------------+-----------------------+ + | SEC_TO_TIME(3601) | SEC_TO_TIME(1234.123) | + |-------------------+-----------------------| + | 01:00:01 | 00:20:34.123 | + +-------------------+-----------------------+ SECOND @@ -1434,11 +1434,11 @@ Example:: os> source=people | eval `SECOND(TIME('01:02:03'))` = SECOND(TIME('01:02:03')) | fields `SECOND(TIME('01:02:03'))` fetched rows / total rows = 1/1 - +----------------------------+ - | SECOND(TIME('01:02:03')) | - |----------------------------| - | 3 | - +----------------------------+ + +--------------------------+ + | SECOND(TIME('01:02:03')) | + |--------------------------| + | 3 | + +--------------------------+ SECOND_OF_MINUTE @@ -1459,11 +1459,11 @@ Example:: os> source=people | eval `SECOND_OF_MINUTE(TIME('01:02:03'))` = SECOND_OF_MINUTE(TIME('01:02:03')) | fields `SECOND_OF_MINUTE(TIME('01:02:03'))` fetched rows / total rows = 1/1 - +--------------------------------------+ - | SECOND_OF_MINUTE(TIME('01:02:03')) | - |--------------------------------------| - | 3 | - +--------------------------------------+ + +------------------------------------+ + | SECOND_OF_MINUTE(TIME('01:02:03')) | + |------------------------------------| + | 3 | + +------------------------------------+ STR_TO_DATE @@ -1485,11 +1485,11 @@ Example:: OS> source=people | eval `str_to_date("01,5,2013", "%d,%m,%Y")` = str_to_date("01,5,2013", "%d,%m,%Y") | fields = `str_to_date("01,5,2013", "%d,%m,%Y")` fetched rows / total rows = 1/1 - +----------------------------------------+ - | str_to_date("01,5,2013", "%d,%m,%Y") | - |----------------------------------------| - | 2013-05-01 00:00:00 | - +----------------------------------------+ + +--------------------------------------+ + | str_to_date("01,5,2013", "%d,%m,%Y") | + |--------------------------------------| + | 2013-05-01 00:00:00 | + +--------------------------------------+ SUBDATE @@ -1519,11 +1519,11 @@ Example:: os> source=people | eval `'2008-01-02' - 31d` = SUBDATE(DATE('2008-01-02'), INTERVAL 31 DAY), `'2020-08-26' - 1` = SUBDATE(DATE('2020-08-26'), 1), `ts '2020-08-26 01:01:01' - 1` = SUBDATE(TIMESTAMP('2020-08-26 01:01:01'), 1) | fields `'2008-01-02' - 31d`, `'2020-08-26' - 1`, `ts '2020-08-26 01:01:01' - 1` fetched rows / total rows = 1/1 - +----------------------+--------------------+--------------------------------+ - | '2008-01-02' - 31d | '2020-08-26' - 1 | ts '2020-08-26 01:01:01' - 1 | - |----------------------+--------------------+--------------------------------| - | 2007-12-02 00:00:00 | 2020-08-25 | 2020-08-25 01:01:01 | - +----------------------+--------------------+--------------------------------+ + +---------------------+------------------+------------------------------+ + | '2008-01-02' - 31d | '2020-08-26' - 1 | ts '2020-08-26 01:01:01' - 1 | + |---------------------+------------------+------------------------------| + | 2007-12-02 00:00:00 | 2020-08-25 | 2020-08-25 01:01:01 | + +---------------------+------------------+------------------------------+ SUBTIME @@ -1556,35 +1556,35 @@ Example:: os> source=people | eval `'23:59:59' - 0` = SUBTIME(TIME('23:59:59'), DATE('2004-01-01')) | fields `'23:59:59' - 0` fetched rows / total rows = 1/1 - +------------------+ - | '23:59:59' - 0 | - |------------------| - | 23:59:59 | - +------------------+ + +----------------+ + | '23:59:59' - 0 | + |----------------| + | 23:59:59 | + +----------------+ os> source=people | eval `'2004-01-01' - '23:59:59'` = SUBTIME(DATE('2004-01-01'), TIME('23:59:59')) | fields `'2004-01-01' - '23:59:59'` fetched rows / total rows = 1/1 - +-----------------------------+ - | '2004-01-01' - '23:59:59' | - |-----------------------------| - | 2003-12-31 00:00:01 | - +-----------------------------+ - - os> source=people | eval `'10:20:30' - '00:05:42'` = SUBTIME(TIME('10:20:30'), TIME('00:05:42')) | fields `'10:20:30' - '00:05:42'` - fetched rows / total rows = 1/1 +---------------------------+ - | '10:20:30' - '00:05:42' | + | '2004-01-01' - '23:59:59' | |---------------------------| - | 10:14:48 | + | 2003-12-31 00:00:01 | +---------------------------+ + os> source=people | eval `'10:20:30' - '00:05:42'` = SUBTIME(TIME('10:20:30'), TIME('00:05:42')) | fields `'10:20:30' - '00:05:42'` + fetched rows / total rows = 1/1 + +-------------------------+ + | '10:20:30' - '00:05:42' | + |-------------------------| + | 10:14:48 | + +-------------------------+ + os> source=people | eval `'2007-03-01 10:20:30' - '20:40:50'` = SUBTIME(TIMESTAMP('2007-03-01 10:20:30'), TIMESTAMP('2002-03-04 20:40:50')) | fields `'2007-03-01 10:20:30' - '20:40:50'` fetched rows / total rows = 1/1 - +--------------------------------------+ - | '2007-03-01 10:20:30' - '20:40:50' | - |--------------------------------------| - | 2007-02-28 13:39:40 | - +--------------------------------------+ + +------------------------------------+ + | '2007-03-01 10:20:30' - '20:40:50' | + |------------------------------------| + | 2007-02-28 13:39:40 | + +------------------------------------+ SYSDATE @@ -1630,35 +1630,35 @@ Example:: os> source=people | eval `TIME('13:49:00')` = TIME('13:49:00') | fields `TIME('13:49:00')` fetched rows / total rows = 1/1 - +--------------------+ - | TIME('13:49:00') | - |--------------------| - | 13:49:00 | - +--------------------+ + +------------------+ + | TIME('13:49:00') | + |------------------| + | 13:49:00 | + +------------------+ os> source=people | eval `TIME('13:49')` = TIME('13:49') | fields `TIME('13:49')` fetched rows / total rows = 1/1 - +-----------------+ - | TIME('13:49') | - |-----------------| - | 13:49:00 | - +-----------------+ + +---------------+ + | TIME('13:49') | + |---------------| + | 13:49:00 | + +---------------+ os> source=people | eval `TIME('2020-08-26 13:49:00')` = TIME('2020-08-26 13:49:00') | fields `TIME('2020-08-26 13:49:00')` fetched rows / total rows = 1/1 - +-------------------------------+ - | TIME('2020-08-26 13:49:00') | - |-------------------------------| - | 13:49:00 | - +-------------------------------+ + +-----------------------------+ + | TIME('2020-08-26 13:49:00') | + |-----------------------------| + | 13:49:00 | + +-----------------------------+ os> source=people | eval `TIME('2020-08-26 13:49')` = TIME('2020-08-26 13:49') | fields `TIME('2020-08-26 13:49')` fetched rows / total rows = 1/1 - +----------------------------+ - | TIME('2020-08-26 13:49') | - |----------------------------| - | 13:49:00 | - +----------------------------+ + +--------------------------+ + | TIME('2020-08-26 13:49') | + |--------------------------| + | 13:49:00 | + +--------------------------+ TIME_FORMAT @@ -1709,11 +1709,11 @@ Example:: os> source=people | eval `TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T')` = TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T') | fields `TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T')` fetched rows / total rows = 1/1 - +------------------------------------------------------------------------------+ - | TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T') | - |------------------------------------------------------------------------------| - | 012345 13 01 01 14 PM 01:14:15 PM 15 15 13:14:15 | - +------------------------------------------------------------------------------+ + +----------------------------------------------------------------------------+ + | TIME_FORMAT('1998-01-31 13:14:15.012345', '%f %H %h %I %i %p %r %S %s %T') | + |----------------------------------------------------------------------------| + | 012345 13 01 01 14 PM 01:14:15 PM 15 15 13:14:15 | + +----------------------------------------------------------------------------+ TIME_TO_SEC @@ -1732,11 +1732,11 @@ Example:: os> source=people | eval `TIME_TO_SEC(TIME('22:23:00'))` = TIME_TO_SEC(TIME('22:23:00')) | fields `TIME_TO_SEC(TIME('22:23:00'))` fetched rows / total rows = 1/1 - +---------------------------------+ - | TIME_TO_SEC(TIME('22:23:00')) | - |---------------------------------| - | 80580 | - +---------------------------------+ + +-------------------------------+ + | TIME_TO_SEC(TIME('22:23:00')) | + |-------------------------------| + | 80580 | + +-------------------------------+ TIMEDIFF @@ -1755,11 +1755,11 @@ Example:: os> source=people | eval `TIMEDIFF('23:59:59', '13:00:00')` = TIMEDIFF('23:59:59', '13:00:00') | fields `TIMEDIFF('23:59:59', '13:00:00')` fetched rows / total rows = 1/1 - +------------------------------------+ - | TIMEDIFF('23:59:59', '13:00:00') | - |------------------------------------| - | 10:59:59 | - +------------------------------------+ + +----------------------------------+ + | TIMEDIFF('23:59:59', '13:00:00') | + |----------------------------------| + | 10:59:59 | + +----------------------------------+ TIMESTAMP @@ -1783,11 +1783,11 @@ Example:: os> source=people | eval `TIMESTAMP('2020-08-26 13:49:00')` = TIMESTAMP('2020-08-26 13:49:00'), `TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42'))` = TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42')) | fields `TIMESTAMP('2020-08-26 13:49:00')`, `TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42'))` fetched rows / total rows = 1/1 - +------------------------------------+------------------------------------------------------+ - | TIMESTAMP('2020-08-26 13:49:00') | TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42')) | - |------------------------------------+------------------------------------------------------| - | 2020-08-26 13:49:00 | 2020-08-27 02:04:42 | - +------------------------------------+------------------------------------------------------+ + +----------------------------------+----------------------------------------------------+ + | TIMESTAMP('2020-08-26 13:49:00') | TIMESTAMP('2020-08-26 13:49:00', TIME('12:15:42')) | + |----------------------------------+----------------------------------------------------| + | 2020-08-26 13:49:00 | 2020-08-27 02:04:42 | + +----------------------------------+----------------------------------------------------+ TIMESTAMPADD @@ -1808,11 +1808,11 @@ Examples:: os> source=people | eval `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')` = TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | eval `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` = TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | fields `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')`, `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` fetched rows / total rows = 1/1 - +------------------------------------------------+----------------------------------------------------+ - | TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | - |------------------------------------------------+----------------------------------------------------| - | 2000-01-18 00:00:00 | 1999-10-01 00:00:00 | - +------------------------------------------------+----------------------------------------------------+ + +----------------------------------------------+--------------------------------------------------+ + | TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | + |----------------------------------------------+--------------------------------------------------| + | 2000-01-18 00:00:00 | 1999-10-01 00:00:00 | + +----------------------------------------------+--------------------------------------------------+ TIMESTAMPDIFF @@ -1834,11 +1834,11 @@ Examples:: os> source=people | eval `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')` = TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | eval `TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00'))` = TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00')) | fields `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')`, `TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00'))` fetched rows / total rows = 1/1 - +---------------------------------------------------------------------+-------------------------------------------------------------+ - | TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00')) | - |---------------------------------------------------------------------+-------------------------------------------------------------| - | 4 | -23 | - +---------------------------------------------------------------------+-------------------------------------------------------------+ + +-------------------------------------------------------------------+-----------------------------------------------------------+ + | TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | TIMESTAMPDIFF(SECOND, time('00:00:23'), time('00:00:00')) | + |-------------------------------------------------------------------+-----------------------------------------------------------| + | 4 | -23 | + +-------------------------------------------------------------------+-----------------------------------------------------------+ TO_DAYS @@ -1857,11 +1857,11 @@ Example:: os> source=people | eval `TO_DAYS(DATE('2008-10-07'))` = TO_DAYS(DATE('2008-10-07')) | fields `TO_DAYS(DATE('2008-10-07'))` fetched rows / total rows = 1/1 - +-------------------------------+ - | TO_DAYS(DATE('2008-10-07')) | - |-------------------------------| - | 733687 | - +-------------------------------+ + +-----------------------------+ + | TO_DAYS(DATE('2008-10-07')) | + |-----------------------------| + | 733687 | + +-----------------------------+ TO_SECONDS @@ -1881,11 +1881,11 @@ Example:: os> source=people | eval `TO_SECONDS(DATE('2008-10-07'))` = TO_SECONDS(DATE('2008-10-07')) | eval `TO_SECONDS(950228)` = TO_SECONDS(950228) | fields `TO_SECONDS(DATE('2008-10-07'))`, `TO_SECONDS(950228)` fetched rows / total rows = 1/1 - +----------------------------------+----------------------+ - | TO_SECONDS(DATE('2008-10-07')) | TO_SECONDS(950228) | - |----------------------------------+----------------------| - | 63390556800 | 62961148800 | - +----------------------------------+----------------------+ + +--------------------------------+--------------------+ + | TO_SECONDS(DATE('2008-10-07')) | TO_SECONDS(950228) | + |--------------------------------+--------------------| + | 63390556800 | 62961148800 | + +--------------------------------+--------------------+ UNIX_TIMESTAMP @@ -1907,11 +1907,11 @@ Example:: os> source=people | eval `UNIX_TIMESTAMP(double)` = UNIX_TIMESTAMP(20771122143845), `UNIX_TIMESTAMP(timestamp)` = UNIX_TIMESTAMP(TIMESTAMP('1996-11-15 17:05:42')) | fields `UNIX_TIMESTAMP(double)`, `UNIX_TIMESTAMP(timestamp)` fetched rows / total rows = 1/1 - +--------------------------+-----------------------------+ - | UNIX_TIMESTAMP(double) | UNIX_TIMESTAMP(timestamp) | - |--------------------------+-----------------------------| - | 3404817525.0 | 848077542.0 | - +--------------------------+-----------------------------+ + +------------------------+---------------------------+ + | UNIX_TIMESTAMP(double) | UNIX_TIMESTAMP(timestamp) | + |------------------------+---------------------------| + | 3404817525.0 | 848077542.0 | + +------------------------+---------------------------+ UTC_DATE @@ -1930,11 +1930,11 @@ Example:: > source=people | eval `UTC_DATE()` = UTC_DATE() | fields `UTC_DATE()` fetched rows / total rows = 1/1 - +--------------+ - | UTC_DATE() | - |--------------| - | 2022-10-03 | - +--------------+ + +------------+ + | UTC_DATE() | + |------------| + | 2022-10-03 | + +------------+ UTC_TIME @@ -1953,11 +1953,11 @@ Example:: > source=people | eval `UTC_TIME()` = UTC_TIME() | fields `UTC_TIME()` fetched rows / total rows = 1/1 - +--------------+ - | UTC_TIME() | - |--------------| - | 17:54:27 | - +--------------+ + +------------+ + | UTC_TIME() | + |------------| + | 17:54:27 | + +------------+ UTC_TIMESTAMP @@ -2042,11 +2042,11 @@ Example:: os> source=people | eval `WEEK(DATE('2008-02-20'))` = WEEK(DATE('2008-02-20')), `WEEK(DATE('2008-02-20'), 1)` = WEEK(DATE('2008-02-20'), 1) | fields `WEEK(DATE('2008-02-20'))`, `WEEK(DATE('2008-02-20'), 1)` fetched rows / total rows = 1/1 - +----------------------------+-------------------------------+ - | WEEK(DATE('2008-02-20')) | WEEK(DATE('2008-02-20'), 1) | - |----------------------------+-------------------------------| - | 7 | 8 | - +----------------------------+-------------------------------+ + +--------------------------+-----------------------------+ + | WEEK(DATE('2008-02-20')) | WEEK(DATE('2008-02-20'), 1) | + |--------------------------+-----------------------------| + | 7 | 8 | + +--------------------------+-----------------------------+ WEEKDAY @@ -2067,11 +2067,11 @@ Example:: os> source=people | eval `weekday(DATE('2020-08-26'))` = weekday(DATE('2020-08-26')) | eval `weekday(DATE('2020-08-27'))` = weekday(DATE('2020-08-27')) | fields `weekday(DATE('2020-08-26'))`, `weekday(DATE('2020-08-27'))` fetched rows / total rows = 1/1 - +-------------------------------+-------------------------------+ - | weekday(DATE('2020-08-26')) | weekday(DATE('2020-08-27')) | - |-------------------------------+-------------------------------| - | 2 | 3 | - +-------------------------------+-------------------------------+ + +-----------------------------+-----------------------------+ + | weekday(DATE('2020-08-26')) | weekday(DATE('2020-08-27')) | + |-----------------------------+-----------------------------| + | 2 | 3 | + +-----------------------------+-----------------------------+ WEEK_OF_YEAR @@ -2133,11 +2133,11 @@ Example:: os> source=people | eval `WEEK_OF_YEAR(DATE('2008-02-20'))` = WEEK(DATE('2008-02-20')), `WEEK_OF_YEAR(DATE('2008-02-20'), 1)` = WEEK_OF_YEAR(DATE('2008-02-20'), 1) | fields `WEEK_OF_YEAR(DATE('2008-02-20'))`, `WEEK_OF_YEAR(DATE('2008-02-20'), 1)` fetched rows / total rows = 1/1 - +------------------------------------+---------------------------------------+ - | WEEK_OF_YEAR(DATE('2008-02-20')) | WEEK_OF_YEAR(DATE('2008-02-20'), 1) | - |------------------------------------+---------------------------------------| - | 7 | 8 | - +------------------------------------+---------------------------------------+ + +----------------------------------+-------------------------------------+ + | WEEK_OF_YEAR(DATE('2008-02-20')) | WEEK_OF_YEAR(DATE('2008-02-20'), 1) | + |----------------------------------+-------------------------------------| + | 7 | 8 | + +----------------------------------+-------------------------------------+ YEAR @@ -2156,11 +2156,11 @@ Example:: os> source=people | eval `YEAR(DATE('2020-08-26'))` = YEAR(DATE('2020-08-26')) | fields `YEAR(DATE('2020-08-26'))` fetched rows / total rows = 1/1 - +----------------------------+ - | YEAR(DATE('2020-08-26')) | - |----------------------------| - | 2020 | - +----------------------------+ + +--------------------------+ + | YEAR(DATE('2020-08-26')) | + |--------------------------| + | 2020 | + +--------------------------+ YEARWEEK @@ -2179,10 +2179,10 @@ Example:: os> source=people | eval `YEARWEEK('2020-08-26')` = YEARWEEK('2020-08-26') | eval `YEARWEEK('2019-01-05', 1)` = YEARWEEK('2019-01-05', 1) | fields `YEARWEEK('2020-08-26')`, `YEARWEEK('2019-01-05', 1)` fetched rows / total rows = 1/1 - +--------------------------+-----------------------------+ - | YEARWEEK('2020-08-26') | YEARWEEK('2019-01-05', 1) | - |--------------------------+-----------------------------| - | 202034 | 201901 | - +--------------------------+-----------------------------+ + +------------------------+---------------------------+ + | YEARWEEK('2020-08-26') | YEARWEEK('2019-01-05', 1) | + |------------------------+---------------------------| + | 202034 | 201901 | + +------------------------+---------------------------+ diff --git a/docs/user/ppl/functions/expressions.rst b/docs/user/ppl/functions/expressions.rst index ac48324680..d25063d559 100644 --- a/docs/user/ppl/functions/expressions.rst +++ b/docs/user/ppl/functions/expressions.rst @@ -48,13 +48,13 @@ Here is an example for different type of arithmetic expressions:: os> source=accounts | where age > (25 + 5) | fields age ; fetched rows / total rows = 3/3 - +-------+ - | age | - |-------| - | 32 | - | 36 | - | 33 | - +-------+ + +-----+ + | age | + |-----| + | 32 | + | 36 | + | 33 | + +-----+ Predicate Operators =================== @@ -108,11 +108,11 @@ Here is an example for comparison operators:: os> source=accounts | where age > 33 | fields age ; fetched rows / total rows = 1/1 - +-------+ - | age | - |-------| - | 36 | - +-------+ + +-----+ + | age | + |-----| + | 36 | + +-----+ IN @@ -122,12 +122,12 @@ IN operator test field in value lists:: os> source=accounts | where age in (32, 33) | fields age ; fetched rows / total rows = 2/2 - +-------+ - | age | - |-------| - | 32 | - | 33 | - +-------+ + +-----+ + | age | + |-----| + | 32 | + | 33 | + +-----+ OR @@ -137,12 +137,12 @@ OR operator :: os> source=accounts | where age = 32 OR age = 33 | fields age ; fetched rows / total rows = 2/2 - +-------+ - | age | - |-------| - | 32 | - | 33 | - +-------+ + +-----+ + | age | + |-----| + | 32 | + | 33 | + +-----+ NOT @@ -152,10 +152,10 @@ NOT operator :: os> source=accounts | where not age in (32, 33) | fields age ; fetched rows / total rows = 2/2 - +-------+ - | age | - |-------| - | 36 | - | 28 | - +-------+ + +-----+ + | age | + |-----| + | 36 | + | 28 | + +-----+ diff --git a/docs/user/ppl/functions/ip.rst b/docs/user/ppl/functions/ip.rst new file mode 100644 index 0000000000..30cb9020b0 --- /dev/null +++ b/docs/user/ppl/functions/ip.rst @@ -0,0 +1,38 @@ +==================== +IP Address Functions +==================== + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 1 + +CIDRMATCH +--------- + +Description +>>>>>>>>>>> + +Usage: `cidrmatch(ip, cidr)` checks if `ip` is within the specified `cidr` range. + +Argument type: STRING, STRING + +Return type: BOOLEAN + +Example:: + + > source=weblogs | where cidrmatch(host, '1.2.3.0/24') | fields host, url + fetched rows / total rows = 2/2 + +---------+--------------------+ + | host | url | + |---------|--------------------| + | 1.2.3.4 | /history/voyager1/ | + | 1.2.3.5 | /history/voyager2/ | + +---------+--------------------+ + +Note: + - `ip` can be an IPv4 or IPv6 address + - `cidr` can be an IPv4 or IPv6 block + - `ip` and `cidr` must both be valid and non-missing/non-null + diff --git a/docs/user/ppl/functions/math.rst b/docs/user/ppl/functions/math.rst index c5eb07b5da..65f544461b 100644 --- a/docs/user/ppl/functions/math.rst +++ b/docs/user/ppl/functions/math.rst @@ -25,11 +25,11 @@ Example:: os> source=people | eval `ABS(-1)` = ABS(-1) | fields `ABS(-1)` fetched rows / total rows = 1/1 - +-----------+ - | ABS(-1) | - |-----------| - | 1 | - +-----------+ + +---------+ + | ABS(-1) | + |---------| + | 1 | + +---------+ ACOS @@ -71,11 +71,11 @@ Example:: os> source=people | eval `ASIN(0)` = ASIN(0) | fields `ASIN(0)` fetched rows / total rows = 1/1 - +-----------+ - | ASIN(0) | - |-----------| - | 0.0 | - +-----------+ + +---------+ + | ASIN(0) | + |---------| + | 0.0 | + +---------+ ATAN @@ -150,19 +150,19 @@ Example:: os> source=people | eval `CEILING(0)` = CEILING(0), `CEILING(50.00005)` = CEILING(50.00005), `CEILING(-50.00005)` = CEILING(-50.00005) | fields `CEILING(0)`, `CEILING(50.00005)`, `CEILING(-50.00005)` fetched rows / total rows = 1/1 - +--------------+---------------------+----------------------+ - | CEILING(0) | CEILING(50.00005) | CEILING(-50.00005) | - |--------------+---------------------+----------------------| - | 0 | 51 | -50 | - +--------------+---------------------+----------------------+ + +------------+-------------------+--------------------+ + | CEILING(0) | CEILING(50.00005) | CEILING(-50.00005) | + |------------+-------------------+--------------------| + | 0 | 51 | -50 | + +------------+-------------------+--------------------+ os> source=people | eval `CEILING(3147483647.12345)` = CEILING(3147483647.12345), `CEILING(113147483647.12345)` = CEILING(113147483647.12345), `CEILING(3147483647.00001)` = CEILING(3147483647.00001) | fields `CEILING(3147483647.12345)`, `CEILING(113147483647.12345)`, `CEILING(3147483647.00001)` fetched rows / total rows = 1/1 - +-----------------------------+-------------------------------+-----------------------------+ - | CEILING(3147483647.12345) | CEILING(113147483647.12345) | CEILING(3147483647.00001) | - |-----------------------------+-------------------------------+-----------------------------| - | 3147483648 | 113147483648 | 3147483648 | - +-----------------------------+-------------------------------+-----------------------------+ + +---------------------------+-----------------------------+---------------------------+ + | CEILING(3147483647.12345) | CEILING(113147483647.12345) | CEILING(3147483647.00001) | + |---------------------------+-----------------------------+---------------------------| + | 3147483648 | 113147483648 | 3147483648 | + +---------------------------+-----------------------------+---------------------------+ CONV @@ -181,11 +181,11 @@ Example:: os> source=people | eval `CONV('12', 10, 16)` = CONV('12', 10, 16), `CONV('2C', 16, 10)` = CONV('2C', 16, 10), `CONV(12, 10, 2)` = CONV(12, 10, 2), `CONV(1111, 2, 10)` = CONV(1111, 2, 10) | fields `CONV('12', 10, 16)`, `CONV('2C', 16, 10)`, `CONV(12, 10, 2)`, `CONV(1111, 2, 10)` fetched rows / total rows = 1/1 - +----------------------+----------------------+-------------------+---------------------+ - | CONV('12', 10, 16) | CONV('2C', 16, 10) | CONV(12, 10, 2) | CONV(1111, 2, 10) | - |----------------------+----------------------+-------------------+---------------------| - | c | 44 | 1100 | 15 | - +----------------------+----------------------+-------------------+---------------------+ + +--------------------+--------------------+-----------------+-------------------+ + | CONV('12', 10, 16) | CONV('2C', 16, 10) | CONV(12, 10, 2) | CONV(1111, 2, 10) | + |--------------------+--------------------+-----------------+-------------------| + | c | 44 | 1100 | 15 | + +--------------------+--------------------+-----------------+-------------------+ COS @@ -204,11 +204,11 @@ Example:: os> source=people | eval `COS(0)` = COS(0) | fields `COS(0)` fetched rows / total rows = 1/1 - +----------+ - | COS(0) | - |----------| - | 1.0 | - +----------+ + +--------+ + | COS(0) | + |--------| + | 1.0 | + +--------+ COT @@ -250,11 +250,11 @@ Example:: os> source=people | eval `CRC32('MySQL')` = CRC32('MySQL') | fields `CRC32('MySQL')` fetched rows / total rows = 1/1 - +------------------+ - | CRC32('MySQL') | - |------------------| - | 3259397556 | - +------------------+ + +----------------+ + | CRC32('MySQL') | + |----------------| + | 3259397556 | + +----------------+ DEGREES @@ -342,27 +342,27 @@ Example:: os> source=people | eval `FLOOR(0)` = FLOOR(0), `FLOOR(50.00005)` = FLOOR(50.00005), `FLOOR(-50.00005)` = FLOOR(-50.00005) | fields `FLOOR(0)`, `FLOOR(50.00005)`, `FLOOR(-50.00005)` fetched rows / total rows = 1/1 - +------------+-------------------+--------------------+ - | FLOOR(0) | FLOOR(50.00005) | FLOOR(-50.00005) | - |------------+-------------------+--------------------| - | 0 | 50 | -51 | - +------------+-------------------+--------------------+ + +----------+-----------------+------------------+ + | FLOOR(0) | FLOOR(50.00005) | FLOOR(-50.00005) | + |----------+-----------------+------------------| + | 0 | 50 | -51 | + +----------+-----------------+------------------+ os> source=people | eval `FLOOR(3147483647.12345)` = FLOOR(3147483647.12345), `FLOOR(113147483647.12345)` = FLOOR(113147483647.12345), `FLOOR(3147483647.00001)` = FLOOR(3147483647.00001) | fields `FLOOR(3147483647.12345)`, `FLOOR(113147483647.12345)`, `FLOOR(3147483647.00001)` fetched rows / total rows = 1/1 - +---------------------------+-----------------------------+---------------------------+ - | FLOOR(3147483647.12345) | FLOOR(113147483647.12345) | FLOOR(3147483647.00001) | - |---------------------------+-----------------------------+---------------------------| - | 3147483647 | 113147483647 | 3147483647 | - +---------------------------+-----------------------------+---------------------------+ + +-------------------------+---------------------------+-------------------------+ + | FLOOR(3147483647.12345) | FLOOR(113147483647.12345) | FLOOR(3147483647.00001) | + |-------------------------+---------------------------+-------------------------| + | 3147483647 | 113147483647 | 3147483647 | + +-------------------------+---------------------------+-------------------------+ os> source=people | eval `FLOOR(282474973688888.022)` = FLOOR(282474973688888.022), `FLOOR(9223372036854775807.022)` = FLOOR(9223372036854775807.022), `FLOOR(9223372036854775807.0000001)` = FLOOR(9223372036854775807.0000001) | fields `FLOOR(282474973688888.022)`, `FLOOR(9223372036854775807.022)`, `FLOOR(9223372036854775807.0000001)` fetched rows / total rows = 1/1 - +------------------------------+----------------------------------+--------------------------------------+ - | FLOOR(282474973688888.022) | FLOOR(9223372036854775807.022) | FLOOR(9223372036854775807.0000001) | - |------------------------------+----------------------------------+--------------------------------------| - | 282474973688888 | 9223372036854775807 | 9223372036854775807 | - +------------------------------+----------------------------------+--------------------------------------+ + +----------------------------+--------------------------------+------------------------------------+ + | FLOOR(282474973688888.022) | FLOOR(9223372036854775807.022) | FLOOR(9223372036854775807.0000001) | + |----------------------------+--------------------------------+------------------------------------| + | 282474973688888 | 9223372036854775807 | 9223372036854775807 | + +----------------------------+--------------------------------+------------------------------------+ LN @@ -406,11 +406,11 @@ Example:: os> source=people | eval `LOG(2)` = LOG(2), `LOG(2, 8)` = LOG(2, 8) | fields `LOG(2)`, `LOG(2, 8)` fetched rows / total rows = 1/1 - +--------------------+-------------+ - | LOG(2) | LOG(2, 8) | - |--------------------+-------------| - | 0.6931471805599453 | 3.0 | - +--------------------+-------------+ + +--------------------+-----------+ + | LOG(2) | LOG(2, 8) | + |--------------------+-----------| + | 0.6931471805599453 | 3.0 | + +--------------------+-----------+ LOG2 @@ -431,11 +431,11 @@ Example:: os> source=people | eval `LOG2(8)` = LOG2(8) | fields `LOG2(8)` fetched rows / total rows = 1/1 - +-----------+ - | LOG2(8) | - |-----------| - | 3.0 | - +-----------+ + +---------+ + | LOG2(8) | + |---------| + | 3.0 | + +---------+ LOG10 @@ -456,11 +456,11 @@ Example:: os> source=people | eval `LOG10(100)` = LOG10(100) | fields `LOG10(100)` fetched rows / total rows = 1/1 - +--------------+ - | LOG10(100) | - |--------------| - | 2.0 | - +--------------+ + +------------+ + | LOG10(100) | + |------------| + | 2.0 | + +------------+ MOD @@ -479,11 +479,11 @@ Example:: os> source=people | eval `MOD(3, 2)` = MOD(3, 2), `MOD(3.1, 2)` = MOD(3.1, 2) | fields `MOD(3, 2)`, `MOD(3.1, 2)` fetched rows / total rows = 1/1 - +-------------+---------------+ - | MOD(3, 2) | MOD(3.1, 2) | - |-------------+---------------| - | 1 | 1.1 | - +-------------+---------------+ + +-----------+-------------+ + | MOD(3, 2) | MOD(3.1, 2) | + |-----------+-------------| + | 1 | 1.1 | + +-----------+-------------+ PI @@ -525,11 +525,11 @@ Example:: os> source=people | eval `POW(3, 2)` = POW(3, 2), `POW(-3, 2)` = POW(-3, 2), `POW(3, -2)` = POW(3, -2) | fields `POW(3, 2)`, `POW(-3, 2)`, `POW(3, -2)` fetched rows / total rows = 1/1 - +-------------+--------------+--------------------+ - | POW(3, 2) | POW(-3, 2) | POW(3, -2) | - |-------------+--------------+--------------------| - | 9.0 | 9.0 | 0.1111111111111111 | - +-------------+--------------+--------------------+ + +-----------+------------+--------------------+ + | POW(3, 2) | POW(-3, 2) | POW(3, -2) | + |-----------+------------+--------------------| + | 9.0 | 9.0 | 0.1111111111111111 | + +-----------+------------+--------------------+ POWER @@ -550,11 +550,11 @@ Example:: os> source=people | eval `POWER(3, 2)` = POWER(3, 2), `POWER(-3, 2)` = POWER(-3, 2), `POWER(3, -2)` = POWER(3, -2) | fields `POWER(3, 2)`, `POWER(-3, 2)`, `POWER(3, -2)` fetched rows / total rows = 1/1 - +---------------+----------------+--------------------+ - | POWER(3, 2) | POWER(-3, 2) | POWER(3, -2) | - |---------------+----------------+--------------------| - | 9.0 | 9.0 | 0.1111111111111111 | - +---------------+----------------+--------------------+ + +-------------+--------------+--------------------+ + | POWER(3, 2) | POWER(-3, 2) | POWER(3, -2) | + |-------------+--------------+--------------------| + | 9.0 | 9.0 | 0.1111111111111111 | + +-------------+--------------+--------------------+ RADIANS @@ -622,11 +622,11 @@ Example:: os> source=people | eval `ROUND(12.34)` = ROUND(12.34), `ROUND(12.34, 1)` = ROUND(12.34, 1), `ROUND(12.34, -1)` = ROUND(12.34, -1), `ROUND(12, 1)` = ROUND(12, 1) | fields `ROUND(12.34)`, `ROUND(12.34, 1)`, `ROUND(12.34, -1)`, `ROUND(12, 1)` fetched rows / total rows = 1/1 - +----------------+-------------------+--------------------+----------------+ - | ROUND(12.34) | ROUND(12.34, 1) | ROUND(12.34, -1) | ROUND(12, 1) | - |----------------+-------------------+--------------------+----------------| - | 12.0 | 12.3 | 10.0 | 12 | - +----------------+-------------------+--------------------+----------------+ + +--------------+-----------------+------------------+--------------+ + | ROUND(12.34) | ROUND(12.34, 1) | ROUND(12.34, -1) | ROUND(12, 1) | + |--------------+-----------------+------------------+--------------| + | 12.0 | 12.3 | 10.0 | 12 | + +--------------+-----------------+------------------+--------------+ SIGN @@ -645,11 +645,11 @@ Example:: os> source=people | eval `SIGN(1)` = SIGN(1), `SIGN(0)` = SIGN(0), `SIGN(-1.1)` = SIGN(-1.1) | fields `SIGN(1)`, `SIGN(0)`, `SIGN(-1.1)` fetched rows / total rows = 1/1 - +-----------+-----------+--------------+ - | SIGN(1) | SIGN(0) | SIGN(-1.1) | - |-----------+-----------+--------------| - | 1 | 0 | -1 | - +-----------+-----------+--------------+ + +---------+---------+------------+ + | SIGN(1) | SIGN(0) | SIGN(-1.1) | + |---------+---------+------------| + | 1 | 0 | -1 | + +---------+---------+------------+ SIN @@ -668,11 +668,11 @@ Example:: os> source=people | eval `SIN(0)` = SIN(0) | fields `SIN(0)` fetched rows / total rows = 1/1 - +----------+ - | SIN(0) | - |----------| - | 0.0 | - +----------+ + +--------+ + | SIN(0) | + |--------| + | 0.0 | + +--------+ SQRT @@ -694,11 +694,11 @@ Example:: os> source=people | eval `SQRT(4)` = SQRT(4), `SQRT(4.41)` = SQRT(4.41) | fields `SQRT(4)`, `SQRT(4.41)` fetched rows / total rows = 1/1 - +-----------+--------------+ - | SQRT(4) | SQRT(4.41) | - |-----------+--------------| - | 2.0 | 2.1 | - +-----------+--------------+ + +---------+------------+ + | SQRT(4) | SQRT(4.41) | + |---------+------------| + | 2.0 | 2.1 | + +---------+------------+ CBRT @@ -719,9 +719,9 @@ Example:: opensearchsql> source=location | eval `CBRT(8)` = CBRT(8), `CBRT(9.261)` = CBRT(9.261), `CBRT(-27)` = CBRT(-27) | fields `CBRT(8)`, `CBRT(9.261)`, `CBRT(-27)`; fetched rows / total rows = 2/2 - +-----------+---------------+-------------+ - | CBRT(8) | CBRT(9.261) | CBRT(-27) | - |-----------+---------------+-------------| - | 2.0 | 2.1 | -3.0 | - | 2.0 | 2.1 | -3.0 | - +-----------+---------------+-------------+ + +---------+-------------+-----------+ + | CBRT(8) | CBRT(9.261) | CBRT(-27) | + |---------+-------------+-----------| + | 2.0 | 2.1 | -3.0 | + | 2.0 | 2.1 | -3.0 | + +---------+-------------+-----------+ diff --git a/docs/user/ppl/functions/relevance.rst b/docs/user/ppl/functions/relevance.rst index fb31edb0d2..a1f240ee05 100644 --- a/docs/user/ppl/functions/relevance.rst +++ b/docs/user/ppl/functions/relevance.rst @@ -37,12 +37,12 @@ Example with only ``field`` and ``query`` expressions, and all other parameters os> source=accounts | where match(address, 'Street') | fields lastname, address; fetched rows / total rows = 2/2 - +------------+--------------------+ - | lastname | address | - |------------+--------------------| - | Bond | 671 Bristol Street | - | Bates | 789 Madison Street | - +------------+--------------------+ + +----------+--------------------+ + | lastname | address | + |----------+--------------------| + | Bond | 671 Bristol Street | + | Bates | 789 Madison Street | + +----------+--------------------+ @@ -50,11 +50,11 @@ Another example to show how to set custom values for the optional parameters:: os> source=accounts | where match(firstname, 'Hattie', operator='AND', boost=2.0) | fields lastname; fetched rows / total rows = 1/1 - +------------+ - | lastname | - |------------| - | Bond | - +------------+ + +----------+ + | lastname | + |----------| + | Bond | + +----------+ MATCH_PHRASE @@ -175,22 +175,22 @@ Example with only ``fields`` and ``query`` expressions, and all other parameters os> source=books | where multi_match(['title'], 'Pooh House') | fields id, title, author; fetched rows / total rows = 2/2 - +------+--------------------------+----------------------+ - | id | title | author | - |------+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - | 2 | Winnie-the-Pooh | Alan Alexander Milne | - +------+--------------------------+----------------------+ + +----+--------------------------+----------------------+ + | id | title | author | + |----+--------------------------+----------------------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | + | 2 | Winnie-the-Pooh | Alan Alexander Milne | + +----+--------------------------+----------------------+ Another example to show how to set custom values for the optional parameters:: os> source=books | where multi_match(['title'], 'Pooh House', operator='AND', analyzer=default) | fields id, title, author; fetched rows / total rows = 1/1 - +------+--------------------------+----------------------+ - | id | title | author | - |------+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - +------+--------------------------+----------------------+ + +----+--------------------------+----------------------+ + | id | title | author | + |----+--------------------------+----------------------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | + +----+--------------------------+----------------------+ SIMPLE_QUERY_STRING @@ -228,22 +228,22 @@ Example with only ``fields`` and ``query`` expressions, and all other parameters os> source=books | where simple_query_string(['title'], 'Pooh House') | fields id, title, author; fetched rows / total rows = 2/2 - +------+--------------------------+----------------------+ - | id | title | author | - |------+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - | 2 | Winnie-the-Pooh | Alan Alexander Milne | - +------+--------------------------+----------------------+ + +----+--------------------------+----------------------+ + | id | title | author | + |----+--------------------------+----------------------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | + | 2 | Winnie-the-Pooh | Alan Alexander Milne | + +----+--------------------------+----------------------+ Another example to show how to set custom values for the optional parameters:: os> source=books | where simple_query_string(['title'], 'Pooh House', flags='ALL', default_operator='AND') | fields id, title, author; fetched rows / total rows = 1/1 - +------+--------------------------+----------------------+ - | id | title | author | - |------+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - +------+--------------------------+----------------------+ + +----+--------------------------+----------------------+ + | id | title | author | + |----+--------------------------+----------------------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | + +----+--------------------------+----------------------+ MATCH_BOOL_PREFIX @@ -270,22 +270,22 @@ Example with only ``field`` and ``query`` expressions, and all other parameters os> source=accounts | where match_bool_prefix(address, 'Bristol Stre') | fields firstname, address fetched rows / total rows = 2/2 - +-------------+--------------------+ - | firstname | address | - |-------------+--------------------| - | Hattie | 671 Bristol Street | - | Nanette | 789 Madison Street | - +-------------+--------------------+ + +-----------+--------------------+ + | firstname | address | + |-----------+--------------------| + | Hattie | 671 Bristol Street | + | Nanette | 789 Madison Street | + +-----------+--------------------+ Another example to show how to set custom values for the optional parameters:: os> source=accounts | where match_bool_prefix(address, 'Bristol Stre', minimum_should_match = 2) | fields firstname, address fetched rows / total rows = 1/1 - +-------------+--------------------+ - | firstname | address | - |-------------+--------------------| - | Hattie | 671 Bristol Street | - +-------------+--------------------+ + +-----------+--------------------+ + | firstname | address | + |-----------+--------------------| + | Hattie | 671 Bristol Street | + +-----------+--------------------+ QUERY_STRING @@ -335,22 +335,22 @@ Example with only ``fields`` and ``query`` expressions, and all other parameters os> source=books | where query_string(['title'], 'Pooh House') | fields id, title, author; fetched rows / total rows = 2/2 - +------+--------------------------+----------------------+ - | id | title | author | - |------+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - | 2 | Winnie-the-Pooh | Alan Alexander Milne | - +------+--------------------------+----------------------+ + +----+--------------------------+----------------------+ + | id | title | author | + |----+--------------------------+----------------------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | + | 2 | Winnie-the-Pooh | Alan Alexander Milne | + +----+--------------------------+----------------------+ Another example to show how to set custom values for the optional parameters:: os> source=books | where query_string(['title'], 'Pooh House', default_operator='AND') | fields id, title, author; fetched rows / total rows = 1/1 - +------+--------------------------+----------------------+ - | id | title | author | - |------+--------------------------+----------------------| - | 1 | The House at Pooh Corner | Alan Alexander Milne | - +------+--------------------------+----------------------+ + +----+--------------------------+----------------------+ + | id | title | author | + |----+--------------------------+----------------------| + | 1 | The House at Pooh Corner | Alan Alexander Milne | + +----+--------------------------+----------------------+ Limitations >>>>>>>>>>> diff --git a/docs/user/ppl/functions/string.rst b/docs/user/ppl/functions/string.rst index edf5220f4f..0dbb09cbb8 100644 --- a/docs/user/ppl/functions/string.rst +++ b/docs/user/ppl/functions/string.rst @@ -24,11 +24,11 @@ Example:: os> source=people | eval `CONCAT('hello', 'world')` = CONCAT('hello', 'world'), `CONCAT('hello ', 'whole ', 'world', '!')` = CONCAT('hello ', 'whole ', 'world', '!') | fields `CONCAT('hello', 'world')`, `CONCAT('hello ', 'whole ', 'world', '!')` fetched rows / total rows = 1/1 - +----------------------------+--------------------------------------------+ - | CONCAT('hello', 'world') | CONCAT('hello ', 'whole ', 'world', '!') | - |----------------------------+--------------------------------------------| - | helloworld | hello whole world! | - +----------------------------+--------------------------------------------+ + +--------------------------+------------------------------------------+ + | CONCAT('hello', 'world') | CONCAT('hello ', 'whole ', 'world', '!') | + |--------------------------+------------------------------------------| + | helloworld | hello whole world! | + +--------------------------+------------------------------------------+ CONCAT_WS @@ -47,11 +47,11 @@ Example:: os> source=people | eval `CONCAT_WS(',', 'hello', 'world')` = CONCAT_WS(',', 'hello', 'world') | fields `CONCAT_WS(',', 'hello', 'world')` fetched rows / total rows = 1/1 - +------------------------------------+ - | CONCAT_WS(',', 'hello', 'world') | - |------------------------------------| - | hello,world | - +------------------------------------+ + +----------------------------------+ + | CONCAT_WS(',', 'hello', 'world') | + |----------------------------------| + | hello,world | + +----------------------------------+ LENGTH @@ -74,11 +74,11 @@ Example:: os> source=people | eval `LENGTH('helloworld')` = LENGTH('helloworld') | fields `LENGTH('helloworld')` fetched rows / total rows = 1/1 - +------------------------+ - | LENGTH('helloworld') | - |------------------------| - | 10 | - +------------------------+ + +----------------------+ + | LENGTH('helloworld') | + |----------------------| + | 10 | + +----------------------+ LIKE @@ -98,11 +98,11 @@ Example:: os> source=people | eval `LIKE('hello world', '_ello%')` = LIKE('hello world', '_ELLO%') | fields `LIKE('hello world', '_ello%')` fetched rows / total rows = 1/1 - +---------------------------------+ - | LIKE('hello world', '_ello%') | - |---------------------------------| - | True | - +---------------------------------+ + +-------------------------------+ + | LIKE('hello world', '_ello%') | + |-------------------------------| + | True | + +-------------------------------+ LOWER @@ -121,11 +121,11 @@ Example:: os> source=people | eval `LOWER('helloworld')` = LOWER('helloworld'), `LOWER('HELLOWORLD')` = LOWER('HELLOWORLD') | fields `LOWER('helloworld')`, `LOWER('HELLOWORLD')` fetched rows / total rows = 1/1 - +-----------------------+-----------------------+ - | LOWER('helloworld') | LOWER('HELLOWORLD') | - |-----------------------+-----------------------| - | helloworld | helloworld | - +-----------------------+-----------------------+ + +---------------------+---------------------+ + | LOWER('helloworld') | LOWER('HELLOWORLD') | + |---------------------+---------------------| + | helloworld | helloworld | + +---------------------+---------------------+ LTRIM @@ -144,11 +144,11 @@ Example:: os> source=people | eval `LTRIM(' hello')` = LTRIM(' hello'), `LTRIM('hello ')` = LTRIM('hello ') | fields `LTRIM(' hello')`, `LTRIM('hello ')` fetched rows / total rows = 1/1 - +---------------------+---------------------+ - | LTRIM(' hello') | LTRIM('hello ') | - |---------------------+---------------------| - | hello | hello | - +---------------------+---------------------+ + +-------------------+-------------------+ + | LTRIM(' hello') | LTRIM('hello ') | + |-------------------+-------------------| + | hello | hello | + +-------------------+-------------------+ POSITION @@ -169,11 +169,11 @@ Example:: os> source=people | eval `POSITION('world' IN 'helloworld')` = POSITION('world' IN 'helloworld'), `POSITION('invalid' IN 'helloworld')`= POSITION('invalid' IN 'helloworld') | fields `POSITION('world' IN 'helloworld')`, `POSITION('invalid' IN 'helloworld')` fetched rows / total rows = 1/1 - +-------------------------------------+---------------------------------------+ - | POSITION('world' IN 'helloworld') | POSITION('invalid' IN 'helloworld') | - |-------------------------------------+---------------------------------------| - | 6 | 0 | - +-------------------------------------+---------------------------------------+ + +-----------------------------------+-------------------------------------+ + | POSITION('world' IN 'helloworld') | POSITION('invalid' IN 'helloworld') | + |-----------------------------------+-------------------------------------| + | 6 | 0 | + +-----------------------------------+-------------------------------------+ REVERSE @@ -192,11 +192,11 @@ Example:: os> source=people | eval `REVERSE('abcde')` = REVERSE('abcde') | fields `REVERSE('abcde')` fetched rows / total rows = 1/1 - +--------------------+ - | REVERSE('abcde') | - |--------------------| - | edcba | - +--------------------+ + +------------------+ + | REVERSE('abcde') | + |------------------| + | edcba | + +------------------+ RIGHT @@ -215,11 +215,11 @@ Example:: os> source=people | eval `RIGHT('helloworld', 5)` = RIGHT('helloworld', 5), `RIGHT('HELLOWORLD', 0)` = RIGHT('HELLOWORLD', 0) | fields `RIGHT('helloworld', 5)`, `RIGHT('HELLOWORLD', 0)` fetched rows / total rows = 1/1 - +--------------------------+--------------------------+ - | RIGHT('helloworld', 5) | RIGHT('HELLOWORLD', 0) | - |--------------------------+--------------------------| - | world | | - +--------------------------+--------------------------+ + +------------------------+------------------------+ + | RIGHT('helloworld', 5) | RIGHT('HELLOWORLD', 0) | + |------------------------+------------------------| + | world | | + +------------------------+------------------------+ RTRIM @@ -238,11 +238,11 @@ Example:: os> source=people | eval `RTRIM(' hello')` = RTRIM(' hello'), `RTRIM('hello ')` = RTRIM('hello ') | fields `RTRIM(' hello')`, `RTRIM('hello ')` fetched rows / total rows = 1/1 - +---------------------+---------------------+ - | RTRIM(' hello') | RTRIM('hello ') | - |---------------------+---------------------| - | hello | hello | - +---------------------+---------------------+ + +-------------------+-------------------+ + | RTRIM(' hello') | RTRIM('hello ') | + |-------------------+-------------------| + | hello | hello | + +-------------------+-------------------+ SUBSTRING @@ -263,11 +263,11 @@ Example:: os> source=people | eval `SUBSTRING('helloworld', 5)` = SUBSTRING('helloworld', 5), `SUBSTRING('helloworld', 5, 3)` = SUBSTRING('helloworld', 5, 3) | fields `SUBSTRING('helloworld', 5)`, `SUBSTRING('helloworld', 5, 3)` fetched rows / total rows = 1/1 - +------------------------------+---------------------------------+ - | SUBSTRING('helloworld', 5) | SUBSTRING('helloworld', 5, 3) | - |------------------------------+---------------------------------| - | oworld | owo | - +------------------------------+---------------------------------+ + +----------------------------+-------------------------------+ + | SUBSTRING('helloworld', 5) | SUBSTRING('helloworld', 5, 3) | + |----------------------------+-------------------------------| + | oworld | owo | + +----------------------------+-------------------------------+ TRIM @@ -284,11 +284,11 @@ Example:: os> source=people | eval `TRIM(' hello')` = TRIM(' hello'), `TRIM('hello ')` = TRIM('hello ') | fields `TRIM(' hello')`, `TRIM('hello ')` fetched rows / total rows = 1/1 - +--------------------+--------------------+ - | TRIM(' hello') | TRIM('hello ') | - |--------------------+--------------------| - | hello | hello | - +--------------------+--------------------+ + +------------------+------------------+ + | TRIM(' hello') | TRIM('hello ') | + |------------------+------------------| + | hello | hello | + +------------------+------------------+ UPPER @@ -307,8 +307,8 @@ Example:: os> source=people | eval `UPPER('helloworld')` = UPPER('helloworld'), `UPPER('HELLOWORLD')` = UPPER('HELLOWORLD') | fields `UPPER('helloworld')`, `UPPER('HELLOWORLD')` fetched rows / total rows = 1/1 - +-----------------------+-----------------------+ - | UPPER('helloworld') | UPPER('HELLOWORLD') | - |-----------------------+-----------------------| - | HELLOWORLD | HELLOWORLD | - +-----------------------+-----------------------+ + +---------------------+---------------------+ + | UPPER('helloworld') | UPPER('HELLOWORLD') | + |---------------------+---------------------| + | HELLOWORLD | HELLOWORLD | + +---------------------+---------------------+ diff --git a/docs/user/ppl/functions/system.rst b/docs/user/ppl/functions/system.rst index cfe0414c49..698933a3c4 100644 --- a/docs/user/ppl/functions/system.rst +++ b/docs/user/ppl/functions/system.rst @@ -24,8 +24,8 @@ Example:: os> source=people | eval `typeof(date)` = typeof(DATE('2008-04-14')), `typeof(int)` = typeof(1), `typeof(now())` = typeof(now()), `typeof(column)` = typeof(accounts) | fields `typeof(date)`, `typeof(int)`, `typeof(now())`, `typeof(column)` fetched rows / total rows = 1/1 - +----------------+---------------+-----------------+------------------+ - | typeof(date) | typeof(int) | typeof(now()) | typeof(column) | - |----------------+---------------+-----------------+------------------| - | DATE | INTEGER | TIMESTAMP | OBJECT | - +----------------+---------------+-----------------+------------------+ + +--------------+-------------+---------------+----------------+ + | typeof(date) | typeof(int) | typeof(now()) | typeof(column) | + |--------------+-------------+---------------+----------------| + | DATE | INTEGER | TIMESTAMP | OBJECT | + +--------------+-------------+---------------+----------------+ diff --git a/docs/user/ppl/general/datatypes.rst b/docs/user/ppl/general/datatypes.rst index 18555dec3d..ba8281b6a9 100644 --- a/docs/user/ppl/general/datatypes.rst +++ b/docs/user/ppl/general/datatypes.rst @@ -356,11 +356,11 @@ PPL query:: os> source=people | fields city, city.name, city.location.latitude; fetched rows / total rows = 1/1 - +-----------------------------------------------------+-------------+--------------------------+ - | city | city.name | city.location.latitude | - |-----------------------------------------------------+-------------+--------------------------| - | {'name': 'Seattle', 'location': {'latitude': 10.5}} | Seattle | 10.5 | - +-----------------------------------------------------+-------------+--------------------------+ + +-----------------------------------------------------+-----------+------------------------+ + | city | city.name | city.location.latitude | + |-----------------------------------------------------+-----------+------------------------| + | {'name': 'Seattle', 'location': {'latitude': 10.5}} | Seattle | 10.5 | + +-----------------------------------------------------+-----------+------------------------+ Example 2: Group by struct inner attribute @@ -372,11 +372,11 @@ PPL query:: os> source=people | stats count() by city.name; fetched rows / total rows = 1/1 - +-----------+-------------+ - | count() | city.name | - |-----------+-------------| - | 1 | Seattle | - +-----------+-------------+ + +---------+-----------+ + | count() | city.name | + |---------+-----------| + | 1 | Seattle | + +---------+-----------+ Example 3: Selecting Field of Array Value ----------------------------------------- @@ -385,8 +385,8 @@ Select deeper level for object fields of array value which returns the first ele os> source = people | fields accounts, accounts.id; fetched rows / total rows = 1/1 - +------------+---------------+ - | accounts | accounts.id | - |------------+---------------| - | {'id': 1} | 1 | - +------------+---------------+ \ No newline at end of file + +-----------------------+-------------+ + | accounts | accounts.id | + |-----------------------+-------------| + | [{'id': 1},{'id': 2}] | 1 | + +-----------------------+-------------+ \ No newline at end of file diff --git a/docs/user/ppl/general/identifiers.rst b/docs/user/ppl/general/identifiers.rst index 51fc36c40f..bab540ffdd 100644 --- a/docs/user/ppl/general/identifiers.rst +++ b/docs/user/ppl/general/identifiers.rst @@ -39,14 +39,14 @@ Here are examples for using index pattern directly without quotes:: os> source=accounts | fields account_number, firstname, lastname; fetched rows / total rows = 4/4 - +------------------+-------------+------------+ - | account_number | firstname | lastname | - |------------------+-------------+------------| - | 1 | Amber | Duke | - | 6 | Hattie | Bond | - | 13 | Nanette | Bates | - | 18 | Dale | Adams | - +------------------+-------------+------------+ + +----------------+-----------+----------+ + | account_number | firstname | lastname | + |----------------+-----------+----------| + | 1 | Amber | Duke | + | 6 | Hattie | Bond | + | 13 | Nanette | Bates | + | 18 | Dale | Adams | + +----------------+-----------+----------+ Delimited Identifiers @@ -73,14 +73,14 @@ Here are examples for quoting an index name by back ticks:: os> source=`accounts` | fields `account_number`; fetched rows / total rows = 4/4 - +------------------+ - | account_number | - |------------------| - | 1 | - | 6 | - | 13 | - | 18 | - +------------------+ + +----------------+ + | account_number | + |----------------| + | 1 | + | 6 | + | 13 | + | 18 | + +----------------+ Cross-Cluster Index Identifiers @@ -135,29 +135,29 @@ Query wildcard indices:: os> source=acc* | stats count(); fetched rows / total rows = 1/1 - +-----------+ - | count() | - |-----------| - | 5 | - +-----------+ + +---------+ + | count() | + |---------| + | 5 | + +---------+ Query multiple indices seperated by ``,``:: os> source=accounts, account2 | stats count(); fetched rows / total rows = 1/1 - +-----------+ - | count() | - |-----------| - | 5 | - +-----------+ + +---------+ + | count() | + |---------| + | 5 | + +---------+ Query delimited multiple indices seperated by ``,``:: os> source=`accounts,account2` | stats count(); fetched rows / total rows = 1/1 - +-----------+ - | count() | - |-----------| - | 5 | - +-----------+ + +---------+ + | count() | + |---------| + | 5 | + +---------+ diff --git a/docs/user/ppl/index.rst b/docs/user/ppl/index.rst index 1fa981b1b7..ef8cff334e 100644 --- a/docs/user/ppl/index.rst +++ b/docs/user/ppl/index.rst @@ -74,6 +74,8 @@ The query start with search command and then flowing a set of command delimited - `stats command `_ + - `trendline command `_ + - `where command `_ - `head command `_ @@ -102,6 +104,8 @@ The query start with search command and then flowing a set of command delimited - `System Functions `_ + - `IP Address Functions `_ + * **Optimization** - `Optimization <../../user/optimization/optimization.rst>`_ diff --git a/docs/user/ppl/interfaces/endpoint.rst b/docs/user/ppl/interfaces/endpoint.rst index 793b94eb8d..fb931fb0ba 100644 --- a/docs/user/ppl/interfaces/endpoint.rst +++ b/docs/user/ppl/interfaces/endpoint.rst @@ -91,7 +91,7 @@ The following PPL query demonstrated that where and stats command were pushed do { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName=accounts, sourceBuilder={\"from\":0,\"size\":200,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":10,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}],\"aggregations\":{\"avg(age)\":{\"avg\":{\"field\":\"age\"}}}}, searchDone=false)" + "request": "OpenSearchQueryRequest(indexName=accounts, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":10,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}],\"aggregations\":{\"avg(age)\":{\"avg\":{\"field\":\"age\"}}}}, searchDone=false)" }, "children": [] } diff --git a/doctest/build.gradle b/doctest/build.gradle index ec5a26b52b..91d54c9cb2 100644 --- a/doctest/build.gradle +++ b/doctest/build.gradle @@ -5,6 +5,8 @@ import org.opensearch.gradle.testclusters.RunTask +import java.util.concurrent.Callable + plugins { id 'base' id 'com.wiredforcode.spawn' @@ -20,8 +22,16 @@ def path = project(':').projectDir def plugin_path = project(':doctest').projectDir task cloneSqlCli(type: Exec) { - // clone the sql-cli repo locally - commandLine 'git', 'clone', 'https://github.com/opensearch-project/sql-cli.git' + def repoDir = new File("${project.projectDir}/sql-cli") + + if (repoDir.exists()) { + // Repository already exists, fetch and checkout latest + commandLine 'git', '-C', repoDir.absolutePath, 'fetch', 'origin', 'main' + commandLine 'git', '-C', repoDir.absolutePath, 'checkout', 'origin/main' + } else { + // Repository doesn't exist, clone it + commandLine 'git', 'clone', 'https://github.com/opensearch-project/sql-cli.git', repoDir.absolutePath + } } task bootstrap(type: Exec, dependsOn: ['cloneSqlCli', 'spotlessJava']) { @@ -109,6 +119,10 @@ if (version_tokens.length > 1) { String mlCommonsRemoteFile = 'https://ci.opensearch.org/ci/dbc/distribution-build-opensearch/' + opensearch_no_snapshot + '/latest/linux/x64/tar/builds/opensearch/plugins/opensearch-ml-' + opensearch_build + '.zip' String mlCommonsPlugin = 'opensearch-ml' +String bwcOpenSearchJSDownload = 'https://ci.opensearch.org/ci/dbc/distribution-build-opensearch/' + opensearch_no_snapshot + '/latest/linux/x64/tar/builds/' + + 'opensearch/plugins/opensearch-job-scheduler-' + opensearch_build + '.zip' +String jsPlugin = 'opensearch-job-scheduler' + testClusters { docTestCluster { // Disable loading of `ML-commons` plugin, because it might be unavailable (not released yet). @@ -133,6 +147,7 @@ testClusters { } })) */ + plugin(getJobSchedulerPlugin(jsPlugin, bwcOpenSearchJSDownload)) plugin ':opensearch-sql-plugin' testDistribution = 'archive' } @@ -159,3 +174,49 @@ spotless { googleJavaFormat('1.17.0').reflowLongStrings().groupArtifact('com.google.googlejavaformat:google-java-format') } } + +def getJobSchedulerPlugin(String jsPlugin, String bwcOpenSearchJSDownload) { + return provider(new Callable() { + @Override + RegularFile call() throws Exception { + return new RegularFile() { + @Override + File getAsFile() { + // Use absolute paths + String basePath = new File('.').getCanonicalPath() + File dir = new File(basePath + File.separator + 'doctest' + File.separator + jsPlugin) + + // Log the directory path for debugging + println("Creating directory: " + dir.getAbsolutePath()) + + // Create directory if it doesn't exist + if (!dir.exists()) { + if (!dir.mkdirs()) { + throw new IOException("Failed to create directory: " + dir.getAbsolutePath()) + } + } + + // Define the file path + File f = new File(dir, jsPlugin + '-' + opensearch_build + '.zip') + + // Download file if it doesn't exist + if (!f.exists()) { + println("Downloading file from: " + bwcOpenSearchJSDownload) + println("Saving to file: " + f.getAbsolutePath()) + + new URL(bwcOpenSearchJSDownload).withInputStream { ins -> + f.withOutputStream { it << ins } + } + } + + // Check if the file was created successfully + if (!f.exists()) { + throw new FileNotFoundException("File was not created: " + f.getAbsolutePath()) + } + + return fileTree(f.getParent()).matching { include f.getName() }.singleFile + } + } + } + }) +} diff --git a/doctest/requirements.txt b/doctest/requirements.txt index 7d178b80ae..7d5e2afa2d 100644 --- a/doctest/requirements.txt +++ b/doctest/requirements.txt @@ -1 +1,2 @@ -zc.customdoctests==1.0.1 \ No newline at end of file +zc.customdoctests==1.0.1 +setuptools>=70.0.0 diff --git a/doctest/test_data/multi_value_long.json b/doctest/test_data/multi_value_long.json new file mode 100644 index 0000000000..3c139630f6 --- /dev/null +++ b/doctest/test_data/multi_value_long.json @@ -0,0 +1,5 @@ +{"id": 1, "long_array": [1, 2]} +{"id": 2, "long_array": [3, 4]} +{"id": 3, "long_array": [1, 5]} +{"id": 4, "long_array": [1, 2]} +{"id": 5, "long_array": [2, 3]} \ No newline at end of file diff --git a/doctest/test_data/weblogs.json b/doctest/test_data/weblogs.json new file mode 100644 index 0000000000..afb1679e22 --- /dev/null +++ b/doctest/test_data/weblogs.json @@ -0,0 +1,6 @@ +{"host":"::1","method":"GET","url":"/history/apollo/","response":"200","bytes":"6245"} +{"host":"0.0.0.2","method":"GET","url":"/shuttle/missions/sts-73/mission-sts-73.html","response":"200","bytes":"4085"} +{"host":"::3","method":"GET","url":"/shuttle/countdown/countdown.html","response":"200","bytes":"3985"} +{"host":"::FFFF:1.2.3.4","method":"GET","url":"/history/voyager1/","response":"200","bytes":"1234"} +{"host":"1.2.3.5","method":"GET","url":"/history/voyager2/","response": "200","bytes":"4321"} +{"host":"::FFFF:1234","method":"GET","url":"/history/artemis/","response":"200","bytes": "9876"} diff --git a/doctest/test_docs.py b/doctest/test_docs.py index 1fedbdf49e..1d46766c6d 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -29,7 +29,7 @@ WILDCARD = "wildcard" NESTED = "nested" DATASOURCES = ".ql-datasources" - +WEBLOGS = "weblogs" class DocTestConnection(OpenSearchConnection): @@ -48,10 +48,34 @@ def process(self, statement): click.echo(output) +""" +For _explain requests, there are several additional request fields that will inconsistently +appear/change depending on underlying cluster state. This method normalizes these responses in-place +to make _explain doctests more consistent. + +If the passed response is not an _explain response, the input is left unmodified. +""" +def normalize_explain_response(data): + if "root" in data: + data = data["root"] + + if (request := data.get("description", {}).get("request", None)) and request.startswith("OpenSearchQueryRequest("): + for filter_field in ["needClean", "pitId", "cursorKeepAlive", "searchAfter", "searchResponse"]: + request = re.sub(f", {filter_field}=\\w+", "", request) + data["description"]["request"] = request + + for child in data.get("children", []): + normalize_explain_response(child) + + return data + + def pretty_print(s): try: - d = json.loads(s) - print(json.dumps(d, indent=2)) + data = json.loads(s) + normalize_explain_response(data) + + print(json.dumps(data, indent=2)) except json.decoder.JSONDecodeError: print(s) @@ -98,6 +122,7 @@ def set_up_test_indices(test): load_file("wildcard.json", index_name=WILDCARD) load_file("nested_objects.json", index_name=NESTED) load_file("datasources.json", index_name=DATASOURCES) + load_file("weblogs.json", index_name=WEBLOGS) def load_file(filename, index_name): @@ -126,7 +151,7 @@ def set_up(test): def tear_down(test): # drop leftover tables after each test - test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED], ignore_unavailable=True) + test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED, WEBLOGS], ignore_unavailable=True) docsuite = partial(doctest.DocFileSuite, diff --git a/doctest/test_mapping/weblogs.json b/doctest/test_mapping/weblogs.json new file mode 100644 index 0000000000..05b9784313 --- /dev/null +++ b/doctest/test_mapping/weblogs.json @@ -0,0 +1,21 @@ +{ + "mappings": { + "properties": { + "host": { + "type": "ip" + }, + "method": { + "type": "text" + }, + "url": { + "type": "text" + }, + "response": { + "type": "text" + }, + "bytes": { + "type": "text" + } + } + } +} diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index 7f93135c49..2c3521197d 100644 Binary files a/gradle/wrapper/gradle-wrapper.jar and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index a1f138116b..8d1ea75dcd 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -2,8 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.9-bin.zip networkTimeout=10000 zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists -distributionSha256Sum=3e1af3ae886920c3ac87f7a91f816c0c7c436f276a6eefdb3da152100fef72ae +distributionSha256Sum=d725d707bfabd4dfdc958c624003b3c80accc03f7037b5122c4b1d0ef15cecab diff --git a/gradlew b/gradlew index 1aa94a4269..02fae2493a 100755 --- a/gradlew +++ b/gradlew @@ -15,6 +15,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # +# SPDX-License-Identifier: Apache-2.0 +# ############################################################################## # @@ -55,7 +57,7 @@ # Darwin, MinGW, and NonStop. # # (3) This script is generated from the Groovy template -# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt # within the Gradle project. # # You can find Gradle at https://github.com/gradle/gradle/. @@ -84,7 +86,7 @@ done # shellcheck disable=SC2034 APP_BASE_NAME=${0##*/} # Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) -APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit +APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s' "$PWD" ) || exit # Use the maximum available, or set MAX_FD != -1 to use that value. MAX_FD=maximum diff --git a/gradlew.bat b/gradlew.bat index 6689b85bee..0ebb4c6c76 100644 --- a/gradlew.bat +++ b/gradlew.bat @@ -13,6 +13,8 @@ @rem See the License for the specific language governing permissions and @rem limitations under the License. @rem +@rem SPDX-License-Identifier: Apache-2.0 +@rem @if "%DEBUG%"=="" @echo off @rem ########################################################################## diff --git a/integ-test/build.gradle b/integ-test/build.gradle index a0f82b6f1b..798a0be536 100644 --- a/integ-test/build.gradle +++ b/integ-test/build.gradle @@ -44,7 +44,7 @@ apply plugin: 'java' apply plugin: 'io.freefair.lombok' apply plugin: 'com.wiredforcode.spawn' -String baseVersion = "2.15.0" +String baseVersion = "2.17.0" String bwcVersion = baseVersion + ".0"; String baseName = "sqlBwcCluster" String bwcFilePath = "src/test/resources/bwc/" @@ -80,7 +80,6 @@ ext { var projectAbsPath = projectDir.getAbsolutePath() File downloadedSecurityPlugin = Paths.get(projectAbsPath, 'bin', 'opensearch-security-snapshot.zip').toFile() - configureSecurityPlugin = { OpenSearchCluster cluster -> cluster.getNodes().forEach { node -> @@ -138,6 +137,10 @@ ext { cluster.plugin provider((Callable) (() -> (RegularFile) (() -> downloadedSecurityPlugin))) } + + bwcOpenSearchJSDownload = 'https://ci.opensearch.org/ci/dbc/distribution-build-opensearch/' + baseVersion + '/latest/linux/x64/tar/builds/' + + 'opensearch/plugins/opensearch-job-scheduler-' + bwcVersion + '.zip' + bwcJobSchedulerPath = bwcFilePath + "job-scheduler/" } tasks.withType(licenseHeaders.class) { @@ -153,7 +156,6 @@ configurations.all { resolutionStrategy.force "commons-logging:commons-logging:1.2" // enforce 1.1.3, https://www.whitesourcesoftware.com/vulnerability-database/WS-2019-0379 resolutionStrategy.force 'commons-codec:commons-codec:1.13' - resolutionStrategy.force 'com.google.guava:guava:32.0.1-jre' resolutionStrategy.force "com.fasterxml.jackson.core:jackson-core:${versions.jackson}" resolutionStrategy.force "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:${versions.jackson}" resolutionStrategy.force "com.fasterxml.jackson.dataformat:jackson-dataformat-smile:${versions.jackson}" @@ -166,6 +168,7 @@ configurations.all { resolutionStrategy.force "joda-time:joda-time:2.10.12" resolutionStrategy.force "org.slf4j:slf4j-api:1.7.36" resolutionStrategy.force "com.amazonaws:aws-java-sdk-core:${aws_java_sdk_version}" + resolutionStrategy.force "com.google.guava:guava:${guava_version}" } configurations { @@ -191,9 +194,15 @@ dependencies { testCompileOnly 'org.apiguardian:apiguardian-api:1.1.2' // Needed for BWC tests + zipArchive group: 'org.opensearch.plugin', name:'opensearch-job-scheduler', version: "${opensearch_build}" zipArchive group: 'org.opensearch.plugin', name:'opensearch-sql-plugin', version: "${bwcVersion}-SNAPSHOT" } +java { + sourceCompatibility = JavaVersion.VERSION_21 + targetCompatibility = JavaVersion.VERSION_21 +} + dependencyLicenses.enabled = false testingConventions.enabled = false forbiddenApisTest.enabled = false @@ -214,22 +223,42 @@ testClusters.all { } } +def getJobSchedulerPlugin() { + provider(new Callable() { + @Override + RegularFile call() throws Exception { + return new RegularFile() { + @Override + File getAsFile() { + return configurations.zipArchive.asFileTree.matching { + include '**/opensearch-job-scheduler*' + }.singleFile + } + } + } + }) +} + testClusters { integTest { testDistribution = 'archive' + plugin(getJobSchedulerPlugin()) plugin ":opensearch-sql-plugin" setting "plugins.query.datasources.encryption.masterkey", "1234567812345678" } remoteCluster { testDistribution = 'archive' + plugin(getJobSchedulerPlugin()) plugin ":opensearch-sql-plugin" } integTestWithSecurity { testDistribution = 'archive' + plugin(getJobSchedulerPlugin()) plugin ":opensearch-sql-plugin" } remoteIntegTestWithSecurity { testDistribution = 'archive' + plugin(getJobSchedulerPlugin()) plugin ":opensearch-sql-plugin" } } @@ -497,6 +526,24 @@ task comparisonTest(type: RestIntegTestTask) { testDistribution = "ARCHIVE" versions = [baseVersion, opensearch_version] numberOfNodes = 3 + plugin(provider(new Callable(){ + @Override + RegularFile call() throws Exception { + return new RegularFile() { + @Override + File getAsFile() { + if (new File("$project.rootDir/$bwcFilePath/job-scheduler/$bwcVersion").exists()) { + project.delete(files("$project.rootDir/$bwcFilePath/job-scheduler/$bwcVersion")) + } + project.mkdir bwcJobSchedulerPath + bwcVersion + ant.get(src: bwcOpenSearchJSDownload, + dest: bwcJobSchedulerPath + bwcVersion, + httpusecaches: false) + return fileTree(bwcJobSchedulerPath + bwcVersion).getSingleFile() + } + } + } + })) plugin(provider(new Callable(){ @Override RegularFile call() throws Exception { @@ -517,17 +564,18 @@ task comparisonTest(type: RestIntegTestTask) { } List> plugins = [ - provider(new Callable() { - @Override - RegularFile call() throws Exception { - return new RegularFile() { - @Override - File getAsFile() { - return fileTree(bwcFilePath + project.version).getSingleFile() + getJobSchedulerPlugin(), + provider(new Callable() { + @Override + RegularFile call() throws Exception { + return new RegularFile() { + @Override + File getAsFile() { + return fileTree(bwcFilePath + project.version).getSingleFile() + } } } - } - }) + }) ] // Creates 2 test clusters with 3 nodes of the old version. diff --git a/integ-test/src/test/java/org/opensearch/sql/asyncquery/AsyncQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/asyncquery/AsyncQueryIT.java index 9b5cc96b0e..c41a52b6fd 100644 --- a/integ-test/src/test/java/org/opensearch/sql/asyncquery/AsyncQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/asyncquery/AsyncQueryIT.java @@ -51,6 +51,32 @@ public void asyncQueryEnabledSettingsTest() throws IOException { updateClusterSettings(new ClusterSetting(PERSISTENT, setting, null)); } + @Test + public void dataSourceDisabledSettingsTest() throws IOException { + String setting = "plugins.query.datasources.enabled"; + // disable + updateClusterSettings(new ClusterSetting(PERSISTENT, setting, "false")); + + String query = "select 1"; + Response response = null; + try { + executeAsyncQueryToString(query); + } catch (ResponseException ex) { + response = ex.getResponse(); + } + + JSONObject result = new JSONObject(TestUtils.getResponseBody(response)); + assertThat(result.getInt("status"), equalTo(400)); + JSONObject error = result.getJSONObject("error"); + assertThat(error.getString("reason"), equalTo("Invalid Request")); + assertThat( + error.getString("details"), equalTo("plugins.query.datasources.enabled setting is false")); + assertThat(error.getString("type"), equalTo("IllegalAccessException")); + + // reset the setting + updateClusterSettings(new ClusterSetting(PERSISTENT, setting, null)); + } + protected String executeAsyncQueryToString(String query) throws IOException { Response response = client().performRequest(buildAsyncRequest(query, ASYNC_QUERY_ACTION_URL)); Assert.assertEquals(200, response.getStatusLine().getStatusCode()); diff --git a/integ-test/src/test/java/org/opensearch/sql/correctness/tests/TestReportTest.java b/integ-test/src/test/java/org/opensearch/sql/correctness/tests/TestReportTest.java index 9ac5151b21..43f678f60e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/correctness/tests/TestReportTest.java +++ b/integ-test/src/test/java/org/opensearch/sql/correctness/tests/TestReportTest.java @@ -22,7 +22,7 @@ /** Test for {@link TestReport} */ public class TestReportTest { - private TestReport report = new TestReport(); + private final TestReport report = new TestReport(); @Test public void testSuccessReport() { diff --git a/integ-test/src/test/java/org/opensearch/sql/correctness/testset/TestQuerySet.java b/integ-test/src/test/java/org/opensearch/sql/correctness/testset/TestQuerySet.java index 161d314c1d..b3dfbec259 100644 --- a/integ-test/src/test/java/org/opensearch/sql/correctness/testset/TestQuerySet.java +++ b/integ-test/src/test/java/org/opensearch/sql/correctness/testset/TestQuerySet.java @@ -14,7 +14,7 @@ /** Test query set including SQL queries for comparison testing. */ public class TestQuerySet implements Iterable { - private List queries; + private final List queries; /** * Construct by a test query file. diff --git a/integ-test/src/test/java/org/opensearch/sql/datasource/DataSourceAPIsIT.java b/integ-test/src/test/java/org/opensearch/sql/datasource/DataSourceAPIsIT.java index 5d693d6652..31fd781c51 100644 --- a/integ-test/src/test/java/org/opensearch/sql/datasource/DataSourceAPIsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/datasource/DataSourceAPIsIT.java @@ -19,10 +19,13 @@ import java.io.IOException; import java.lang.reflect.Type; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import lombok.SneakyThrows; +import lombok.Value; +import org.json.JSONObject; import org.junit.After; import org.junit.AfterClass; import org.junit.Assert; @@ -30,6 +33,7 @@ import org.opensearch.client.Request; import org.opensearch.client.Response; import org.opensearch.client.ResponseException; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.datasource.model.DataSourceMetadata; import org.opensearch.sql.datasource.model.DataSourceType; import org.opensearch.sql.ppl.PPLIntegTestCase; @@ -387,6 +391,136 @@ public void patchDataSourceAPITest() { Assert.assertEquals("test", dataSourceMetadata.getDescription()); } + @Test + public void testDataSourcesEnabledSettingIsTrueByDefault() { + Assert.assertTrue(getDataSourceEnabledSetting("defaults")); + } + + @Test + public void testDataSourcesEnabledSettingCanBeSetToTransientFalse() { + setDataSourcesEnabled("transient", false); + Assert.assertFalse(getDataSourceEnabledSetting("transient")); + } + + @Test + public void testDataSourcesEnabledSettingCanBeSetToTransientTrue() { + setDataSourcesEnabled("transient", true); + Assert.assertTrue(getDataSourceEnabledSetting("transient")); + } + + @Test + public void testDataSourcesEnabledSettingCanBeSetToPersistentFalse() { + setDataSourcesEnabled("persistent", false); + Assert.assertFalse(getDataSourceEnabledSetting("persistent")); + } + + @Test + public void testDataSourcesEnabledSettingCanBeSetToPersistentTrue() { + setDataSourcesEnabled("persistent", true); + Assert.assertTrue(getDataSourceEnabledSetting("persistent")); + } + + @Test + public void testDataSourcesEnabledSetToFalseRejectsApiOperations() { + setDataSourcesEnabled("transient", false); + validateAllDataSourceApisWithEnabledSetting(false); + } + + @Test + public void testDataSourcesEnabledSetToTrueAllowsApiOperations() { + setDataSourcesEnabled("transient", true); + validateAllDataSourceApisWithEnabledSetting(true); + } + + @SneakyThrows + private void validateAllDataSourceApisWithEnabledSetting(boolean dataSourcesEnabled) { + + @Value + class TestCase { + Request request; + int expectedResponseCodeOnSuccess; + String expectResponseToContainOnSuccess; + } + + TestCase[] testCases = + new TestCase[] { + // create + new TestCase( + getCreateDataSourceRequest(mockDataSourceMetadata("dummy")), + 201, + "Created DataSource"), + // read + new TestCase(getFetchDataSourceRequest("dummy"), 200, "dummy"), + // update + new TestCase( + getUpdateDataSourceRequest(mockDataSourceMetadata("dummy")), + 200, + "Updated DataSource"), + // list + new TestCase(getFetchDataSourceRequest(null), 200, "dummy"), + // delete + new TestCase(getDeleteDataSourceRequest("dummy"), 204, null) + }; + + for (TestCase testCase : testCases) { + + // data source APIs are eventually consistent. sleep delay is added for consistency + // see createDataSourceAPITest above. + Thread.sleep(2_000); + + final int expectedResponseCode = + dataSourcesEnabled ? testCase.getExpectedResponseCodeOnSuccess() : 400; + + final String expectedResponseBodyToContain = + dataSourcesEnabled + ? testCase.getExpectResponseToContainOnSuccess() + : "plugins.query.datasources.enabled setting is false"; + + Response response; + + try { + response = client().performRequest(testCase.getRequest()); + } catch (ResponseException e) { + response = e.getResponse(); + } + + Assert.assertEquals( + String.format( + "Test for " + testCase + " failed. Expected response code of %s, but got %s", + expectedResponseCode, + response.getStatusLine().getStatusCode()), + expectedResponseCode, + response.getStatusLine().getStatusCode()); + + if (expectedResponseBodyToContain != null) { + + String responseBody = getResponseBody(response); + + Assert.assertTrue( + String.format( + "Test for " + testCase + " failed. '%s' failed to contain '%s'", + responseBody, + expectedResponseBodyToContain), + responseBody.contains(expectedResponseBodyToContain)); + } + } + } + + @SneakyThrows + private boolean getDataSourceEnabledSetting(String... clusterSettingsTypeKeys) { + + final String settingKey = Settings.Key.DATASOURCES_ENABLED.getKeyValue(); + + JSONObject settings = getAllClusterSettings(); + + return Arrays.stream(clusterSettingsTypeKeys) + .map(settings::getJSONObject) + .filter(obj -> obj.has(settingKey)) + .map(obj -> obj.getBoolean(settingKey)) + .findFirst() + .orElseThrow(); + } + public DataSourceMetadata mockDataSourceMetadata(String name) { return new DataSourceMetadata.Builder() .setName(name) diff --git a/integ-test/src/test/java/org/opensearch/sql/datasource/DataSourceEnabledIT.java b/integ-test/src/test/java/org/opensearch/sql/datasource/DataSourceEnabledIT.java new file mode 100644 index 0000000000..a53c04d871 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/datasource/DataSourceEnabledIT.java @@ -0,0 +1,165 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.datasource; + +import static org.opensearch.sql.legacy.TestUtils.getResponseBody; + +import java.io.IOException; +import lombok.SneakyThrows; +import org.json.JSONObject; +import org.junit.After; +import org.junit.Assert; +import org.junit.Test; +import org.opensearch.client.Request; +import org.opensearch.client.Response; +import org.opensearch.client.ResponseException; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class DataSourceEnabledIT extends PPLIntegTestCase { + + @After + public void cleanUp() throws IOException { + wipeAllClusterSettings(); + } + + @Test + public void testAsyncQueryAPIFailureIfSettingIsDisabled() { + setDataSourcesEnabled("transient", false); + assertDataSourceCount(0); + assertSelectFromDataSourceReturnsDoesNotExist(); + assertAsyncQueryApiDisabled(); + } + + @Test + public void testDataSourceCreationWithDefaultSettings() { + createOpenSearchDataSource(); + createIndex(); + assertDataSourceCount(1); + assertSelectFromDataSourceReturnsSuccess(); + assertSelectFromDummyIndexInValidDataSourceDataSourceReturnsDoesNotExist(); + deleteSelfDataSourceCreated(); + deleteIndex(); + } + + @Test + public void testAfterPreviousEnable() { + setDataSourcesEnabled("transient", true); + createOpenSearchDataSource(); + createIndex(); + assertDataSourceCount(1); + assertSelectFromDataSourceReturnsSuccess(); + assertSelectFromDummyIndexInValidDataSourceDataSourceReturnsDoesNotExist(); + setDataSourcesEnabled("transient", false); + assertDataSourceCount(0); + assertSelectFromDataSourceReturnsDoesNotExist(); + assertAsyncQueryApiDisabled(); + setDataSourcesEnabled("transient", true); + deleteSelfDataSourceCreated(); + deleteIndex(); + } + + @SneakyThrows + private void assertSelectFromDataSourceReturnsDoesNotExist() { + Request request = new Request("POST", "/_plugins/_sql"); + request.setJsonEntity(new JSONObject().put("query", "select * from self.myindex").toString()); + Response response = performRequest(request); + Assert.assertEquals(404, response.getStatusLine().getStatusCode()); + String result = getResponseBody(response); + Assert.assertTrue(result.contains("IndexNotFoundException[no such index [self.myindex]]")); + } + + @SneakyThrows + private void assertSelectFromDummyIndexInValidDataSourceDataSourceReturnsDoesNotExist() { + Request request = new Request("POST", "/_plugins/_sql"); + request.setJsonEntity(new JSONObject().put("query", "select * from self.dummy").toString()); + Response response = performRequest(request); + Assert.assertEquals(404, response.getStatusLine().getStatusCode()); + String result = getResponseBody(response); + // subtle difference in error messaging shows that it resolved self to a data source + Assert.assertTrue(result.contains("IndexNotFoundException[no such index [dummy]]")); + } + + @SneakyThrows + private void assertSelectFromDataSourceReturnsSuccess() { + Request request = new Request("POST", "/_plugins/_sql"); + request.setJsonEntity(new JSONObject().put("query", "select * from self.myindex").toString()); + Response response = performRequest(request); + Assert.assertEquals(200, response.getStatusLine().getStatusCode()); + JSONObject result = new JSONObject(getResponseBody(response)); + Assert.assertTrue(result.has("datarows")); + Assert.assertTrue(result.has("schema")); + Assert.assertTrue(result.has("total")); + Assert.assertTrue(result.has("size")); + Assert.assertEquals(200, result.getNumber("status")); + } + + private void createIndex() { + Request request = new Request("PUT", "/myindex"); + Response response = performRequest(request); + Assert.assertEquals(200, response.getStatusLine().getStatusCode()); + } + + private void deleteIndex() { + Request request = new Request("DELETE", "/myindex"); + Response response = performRequest(request); + Assert.assertEquals(200, response.getStatusLine().getStatusCode()); + } + + private void createOpenSearchDataSource() { + Request request = new Request("POST", "/_plugins/_query/_datasources"); + request.setJsonEntity( + new JSONObject().put("connector", "OPENSEARCH").put("name", "self").toString()); + Response response = performRequest(request); + Assert.assertEquals(201, response.getStatusLine().getStatusCode()); + } + + @SneakyThrows + private void assertAsyncQueryApiDisabled() { + + Request request = new Request("POST", "/_plugins/_async_query"); + + request.setJsonEntity( + new JSONObject() + .put("query", "select * from self.myindex") + .put("datasource", "self") + .put("lang", "sql") + .toString()); + + Response response = performRequest(request); + Assert.assertEquals(400, response.getStatusLine().getStatusCode()); + + String expectBodyToContain = "plugins.query.datasources.enabled setting is false"; + Assert.assertTrue(getResponseBody(response).contains(expectBodyToContain)); + } + + @SneakyThrows + private void assertDataSourceCount(int expected) { + Request request = new Request("POST", "/_plugins/_ppl"); + request.setJsonEntity(new JSONObject().put("query", "show datasources").toString()); + Response response = performRequest(request); + Assert.assertEquals(200, response.getStatusLine().getStatusCode()); + JSONObject jsonBody = new JSONObject(getResponseBody(response)); + Assert.assertEquals(expected, jsonBody.getNumber("size")); + Assert.assertEquals(expected, jsonBody.getNumber("total")); + Assert.assertEquals(expected, jsonBody.getJSONArray("datarows").length()); + } + + @SneakyThrows + private Response performRequest(Request request) { + try { + return client().performRequest(request); + } catch (ResponseException e) { + return e.getResponse(); + } + } + + @SneakyThrows + private void deleteSelfDataSourceCreated() { + Request deleteRequest = getDeleteDataSourceRequest("self"); + Response deleteResponse = client().performRequest(deleteRequest); + Assert.assertEquals(204, deleteResponse.getStatusLine().getStatusCode()); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/jdbc/CursorIT.java b/integ-test/src/test/java/org/opensearch/sql/jdbc/CursorIT.java index 325c81107f..e2b6287191 100644 --- a/integ-test/src/test/java/org/opensearch/sql/jdbc/CursorIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/jdbc/CursorIT.java @@ -22,6 +22,7 @@ import java.sql.ResultSet; import java.sql.Statement; import java.util.List; +import java.util.Map; import javax.annotation.Nullable; import lombok.SneakyThrows; import org.json.JSONObject; @@ -115,6 +116,8 @@ public void select_all_no_cursor() { var restResponse = executeRestQuery(query, null); assertEquals(rows, restResponse.getInt("total")); + var restPrettyResponse = executeRestQuery(query, null, Map.of("pretty", "true")); + assertEquals(rows, restPrettyResponse.getInt("total")); } } @@ -133,6 +136,8 @@ public void select_count_all_no_cursor() { var restResponse = executeRestQuery(query, null); assertEquals(rows, restResponse.getInt("total")); + var restPrettyResponse = executeRestQuery(query, null, Map.of("pretty", "true")); + assertEquals(rows, restPrettyResponse.getInt("total")); } } @@ -151,6 +156,8 @@ public void select_all_small_table_big_cursor() { var restResponse = executeRestQuery(query, null); assertEquals(rows, restResponse.getInt("total")); + var restPrettyResponse = executeRestQuery(query, null, Map.of("pretty", "true")); + assertEquals(rows, restPrettyResponse.getInt("total")); } } @@ -169,6 +176,8 @@ public void select_all_small_table_small_cursor() { var restResponse = executeRestQuery(query, null); assertEquals(rows, restResponse.getInt("total")); + var restPrettyResponse = executeRestQuery(query, null, Map.of("pretty", "true")); + assertEquals(rows, restPrettyResponse.getInt("total")); } } @@ -187,6 +196,8 @@ public void select_all_big_table_small_cursor() { var restResponse = executeRestQuery(query, null); assertEquals(rows, restResponse.getInt("total")); + var restPrettyResponse = executeRestQuery(query, null, Map.of("pretty", "true")); + assertEquals(rows, restPrettyResponse.getInt("total")); } } @@ -205,6 +216,8 @@ public void select_all_big_table_big_cursor() { var restResponse = executeRestQuery(query, null); assertEquals(rows, restResponse.getInt("total")); + var restPrettyResponse = executeRestQuery(query, null, Map.of("pretty", "true")); + assertEquals(rows, restPrettyResponse.getInt("total")); } } @@ -217,6 +230,12 @@ private static String getConnectionString() { @SneakyThrows protected JSONObject executeRestQuery(String query, @Nullable Integer fetch_size) { + return executeRestQuery(query, fetch_size, Map.of()); + } + + @SneakyThrows + protected JSONObject executeRestQuery( + String query, @Nullable Integer fetch_size, Map params) { Request request = new Request("POST", QUERY_API_ENDPOINT); if (fetch_size != null) { request.setJsonEntity( @@ -224,6 +243,7 @@ protected JSONObject executeRestQuery(String query, @Nullable Integer fetch_size } else { request.setJsonEntity(String.format("{ \"query\": \"%s\" }", query)); } + request.addParameters(params); RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder(); restOptionsBuilder.addHeader("Content-Type", "application/json"); diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java index 9a416c9683..b75da57c57 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java @@ -25,6 +25,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.Locale; @@ -689,13 +690,18 @@ public void sanitizeTest() throws IOException { String.format( Locale.ROOT, "SELECT firstname, lastname FROM %s", TEST_INDEX_BANK_CSV_SANITIZE), false); - List lines = csvResult.getLines(); - assertEquals(5, lines.size()); - assertEquals(lines.get(0), "'+Amber JOHnny,Duke Willmington+"); - assertEquals(lines.get(1), "'-Hattie,Bond-"); - assertEquals(lines.get(2), "'=Nanette,Bates="); - assertEquals(lines.get(3), "'@Dale,Adams@"); - assertEquals(lines.get(4), "\",Elinor\",\"Ratliff,,,\""); + List actualLines = csvResult.getLines(); + assertEquals(5, actualLines.size()); + + List expectedLines = + Arrays.asList( + "'+Amber JOHnny,Duke Willmington+", + "'-Hattie,Bond-", + "'=Nanette,Bates=", + "'@Dale,Adams@", + "\",Elinor\",\"Ratliff,,,\""); + + assertContainsSameItems(expectedLines, actualLines); } @Test @@ -719,6 +725,15 @@ private void verifyFieldOrder(final String[] expectedFields) throws IOException verifyFieldOrder(expectedFields, query); } + private void assertContainsSameItems(List expectedLines, List actualLines) { + Collections.sort(expectedLines); + Collections.sort(actualLines); + assertEquals(expectedLines.size(), actualLines.size()); + for (int i = 0; i < expectedLines.size(); i++) { + assertEquals(expectedLines.get(i), actualLines.get(i)); + } + } + private void verifyFieldOrder(final String[] expectedFields, final String query) throws IOException { diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/CursorIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/CursorIT.java index abd2bbbcc2..565c40b121 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/CursorIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/CursorIT.java @@ -182,7 +182,7 @@ public void validTotalResultWithAndWithoutPaginationOrderBy() throws IOException String selectQuery = StringUtils.format( "SELECT firstname, state FROM %s ORDER BY balance DESC ", TEST_INDEX_ACCOUNT); - verifyWithAndWithoutPaginationResponse(selectQuery + " LIMIT 2000", selectQuery, 26, false); + verifyWithAndWithoutPaginationResponse(selectQuery + " LIMIT 2000", selectQuery, 25, false); } @Test @@ -280,7 +280,9 @@ public void testRegressionOnDateFormatChange() throws IOException { Arrays.asList( "2015-01-01 00:00:00.000", "2015-01-01 12:10:30.000", - "1585882955", // by existing design, this is not formatted in MySQL standard format + // Conversion will be applied when dateTime is stored on unix timestamp, + // https://github.com/opensearch-project/sql/pull/3160 + "2020-04-03 03:02:35.000", "2020-04-08 06:10:30.000"); assertThat(actualDateList, equalTo(expectedDateList)); @@ -393,7 +395,7 @@ public void invalidCursorIdNotDecodable() throws IOException { JSONObject resp = new JSONObject(TestUtils.getResponseBody(response)); assertThat(resp.getInt("status"), equalTo(400)); - assertThat(resp.query("/error/type"), equalTo("illegal_argument_exception")); + assertThat(resp.query("/error/type"), equalTo("IllegalArgumentException")); } /** @@ -440,6 +442,17 @@ public void noPaginationWithNonJDBCFormat() throws IOException { assertThat(rows.length, equalTo(1000)); } + @Test + public void testMalformedCursorGracefullyHandled() throws IOException { + ResponseException result = + assertThrows( + "Expected query with malformed cursor to raise error, but didn't", + ResponseException.class, + () -> executeCursorQuery("d:a11b4db33f")); + assertTrue(result.getMessage().contains("Malformed cursor")); + assertEquals(result.getResponse().getStatusLine().getStatusCode(), 400); + } + public void verifyWithAndWithoutPaginationResponse( String sqlQuery, String cursorQuery, int fetch_size, boolean shouldFallBackToV1) throws IOException { diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/ExplainIT.java index b42e9f84f4..27f8eca3ef 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/ExplainIT.java @@ -185,7 +185,7 @@ public void orderByOnNestedFieldTest() throws Exception { Assert.assertThat( result.replaceAll("\\s+", ""), equalTo( - "{\"from\":0,\"size\":200,\"sort\":[{\"message.info\":" + "{\"from\":0,\"size\":10000,\"sort\":[{\"message.info\":" + "{\"order\":\"asc\",\"nested\":{\"path\":\"message\"}}}]}")); } diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/GetEndpointQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/GetEndpointQueryIT.java index 81edb54556..6cc4aba811 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/GetEndpointQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/GetEndpointQueryIT.java @@ -16,7 +16,7 @@ /** Tests to cover requests with "?format=csv" parameter */ public class GetEndpointQueryIT extends SQLIntegTestCase { - @Rule public ExpectedException rule = ExpectedException.none(); + @Rule public final ExpectedException rule = ExpectedException.none(); @Override protected void init() throws Exception { diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/JdbcTestIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/JdbcTestIT.java index 74acad4f52..4ad88c632b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/JdbcTestIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/JdbcTestIT.java @@ -155,8 +155,7 @@ public void dateFunctionNameCaseInsensitiveTest() { public void ipTypeShouldPassJdbcFormatter() { assertThat( executeQuery( - "SELECT host AS hostIP FROM " + TestsConstants.TEST_INDEX_WEBLOG + " ORDER BY hostIP", - "jdbc"), + "SELECT host FROM " + TestsConstants.TEST_INDEX_WEBLOGS + " ORDER BY host", "jdbc"), containsString("\"type\": \"ip\"")); } diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/JoinAliasWriterRuleIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/JoinAliasWriterRuleIT.java index 75b2b45df6..3933338f0a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/JoinAliasWriterRuleIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/JoinAliasWriterRuleIT.java @@ -17,7 +17,7 @@ /** Test cases for writing missing join table aliases. */ public class JoinAliasWriterRuleIT extends SQLIntegTestCase { - @Rule public ExpectedException exception = ExpectedException.none(); + @Rule public final ExpectedException exception = ExpectedException.none(); protected void init() throws Exception { loadIndex(Index.ORDER); // opensearch-sql_test_index_order diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/JoinIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/JoinIT.java index 8019454b77..8c2ea96474 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/JoinIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/JoinIT.java @@ -288,6 +288,8 @@ public void hintMultiSearchCanRunFewTimesNL() throws IOException { Assert.assertThat(hits.length(), equalTo(42)); } + // TODO: Fix joinWithGeoIntersectNL test when SQL_PAGINATION_API_SEARCH_AFTER is true + @Ignore @Test public void joinWithGeoIntersectNL() throws IOException { @@ -455,7 +457,7 @@ public void joinParseCheckSelectedFieldsSplitNLConditionOrderGT() throws IOExcep "SELECT /*! USE_NL*/ a.firstname, a.lastname, a.gender, d.firstname, d.age FROM %s a" + " JOIN %s d on a.age < d.age WHERE (d.firstname = 'Lynn' OR d.firstname =" + " 'Obrien') AND a.firstname = 'Mcgee'", - TEST_INDEX_PEOPLE, + TEST_INDEX_PEOPLE2, TEST_INDEX_ACCOUNT); JSONObject result = executeQuery(query); @@ -501,7 +503,7 @@ public void joinParseCheckSelectedFieldsSplitNLConditionOrderLT() throws IOExcep "SELECT /*! USE_NL*/ a.firstname, a.lastname, a.gender, d.firstname, d.age FROM %s a" + " JOIN %s d on a.age > d.age WHERE (d.firstname = 'Sandoval' OR d.firstname =" + " 'Hewitt') AND a.firstname = 'Fulton'", - TEST_INDEX_PEOPLE, + TEST_INDEX_PEOPLE2, TEST_INDEX_ACCOUNT); JSONObject result = executeQuery(query); diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/MalformedQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/MalformedQueryIT.java new file mode 100644 index 0000000000..84b60fdabd --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/MalformedQueryIT.java @@ -0,0 +1,82 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.legacy; + +import java.io.IOException; +import java.util.Locale; +import org.apache.hc.core5.http.ParseException; +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.json.JSONObject; +import org.junit.Assert; +import org.opensearch.client.ResponseException; + +/** Tests for clean handling of various types of invalid queries */ +public class MalformedQueryIT extends SQLIntegTestCase { + @Override + protected void init() throws Exception { + loadIndex(Index.BANK); + loadIndex(Index.BANK_TWO); + } + + public void testJoinWithInvalidCondition() throws IOException, ParseException { + ResponseException result = + assertThrows( + "Expected Join query with malformed 'ON' to raise error, but didn't", + ResponseException.class, + () -> + executeQuery( + String.format( + Locale.ROOT, + "SELECT a.firstname, b.age FROM %s AS a INNER JOIN %s AS b %%" + + " a.account_number=b.account_number", + TestsConstants.TEST_INDEX_BANK, + TestsConstants.TEST_INDEX_BANK_TWO))); + var errMsg = new JSONObject(EntityUtils.toString(result.getResponse().getEntity())); + + Assert.assertEquals("SqlParseException", errMsg.getJSONObject("error").getString("type")); + Assert.assertEquals(400, errMsg.getInt("status")); + } + + public void testWrappedWildcardInSubquery() throws IOException, ParseException { + ResponseException result = + assertThrows( + "Expected wildcard subquery to raise error, but didn't", + ResponseException.class, + () -> + executeQuery( + String.format( + Locale.ROOT, + "SELECT a.first_name FROM %s AS a WHERE a.age IN (SELECT age FROM" + + " `opensearch-sql_test_index_*` WHERE age > 30)", + TestsConstants.TEST_INDEX_BANK, + TestsConstants.TEST_INDEX_BANK_TWO))); + var errMsg = new JSONObject(EntityUtils.toString(result.getResponse().getEntity())); + System.err.println("Full response: " + errMsg); + + Assert.assertEquals("IndexNotFoundException", errMsg.getJSONObject("error").getString("type")); + Assert.assertEquals(404, errMsg.getInt("status")); + } + + public void testUnwrappedWildcardInSubquery() throws IOException, ParseException { + ResponseException result = + assertThrows( + "Expected wildcard subquery to raise error, but didn't", + ResponseException.class, + () -> + executeQuery( + String.format( + Locale.ROOT, + "SELECT a.first_name FROM %s AS a WHERE a.age IN (SELECT age FROM * WHERE" + + " age > 30)", + TestsConstants.TEST_INDEX_BANK, + TestsConstants.TEST_INDEX_BANK_TWO))); + var errMsg = new JSONObject(EntityUtils.toString(result.getResponse().getEntity())); + System.err.println("Full response: " + errMsg); + + Assert.assertEquals("IndexNotFoundException", errMsg.getJSONObject("error").getString("type")); + Assert.assertEquals(404, errMsg.getInt("status")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/MultiQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/MultiQueryIT.java index 84750f8a27..bee85ac314 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/MultiQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/MultiQueryIT.java @@ -18,9 +18,9 @@ public class MultiQueryIT extends SQLIntegTestCase { - private static String MINUS_SCROLL_DEFAULT_HINT = + private static final String MINUS_SCROLL_DEFAULT_HINT = " /*! MINUS_SCROLL_FETCH_AND_RESULT_LIMITS(1000, 50, 100) */ "; - private static String MINUS_TERMS_OPTIMIZATION_HINT = + private static final String MINUS_TERMS_OPTIMIZATION_HINT = " /*! MINUS_USE_TERMS_OPTIMIZATION(true) */ "; @Override diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/ObjectFieldSelectIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/ObjectFieldSelectIT.java index 3a2f48d497..aadd79469d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/ObjectFieldSelectIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/ObjectFieldSelectIT.java @@ -14,6 +14,7 @@ import org.json.JSONArray; import org.json.JSONObject; import org.junit.Test; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.legacy.utils.StringUtils; /** @@ -79,9 +80,20 @@ public void testSelectNestedFieldItself() { @Test public void testSelectObjectFieldOfArrayValuesItself() { JSONObject response = new JSONObject(query("SELECT accounts FROM %s")); + verifyDataRows(response, rows(new JSONArray("[{\"id\":1},{\"id\":2}]"))); + } - // Only the first element of the list of is returned. - verifyDataRows(response, rows(new JSONObject("{\"id\": 1}"))); + @Test + public void testSelectObjectFieldOfArrayValuesItselfNoFieldTypeTolerance() throws Exception { + updateClusterSettings( + new ClusterSetting(PERSISTENT, Settings.Key.FIELD_TYPE_TOLERANCE.getKeyValue(), "false")); + try { + JSONObject response = new JSONObject(query("SELECT accounts FROM %s")); + verifyDataRows(response, rows(new JSONObject("{\"id\":1}"))); + } finally { + updateClusterSettings( + new ClusterSetting(PERSISTENT, Settings.Key.FIELD_TYPE_TOLERANCE.getKeyValue(), "true")); + } } @Test diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/OpenSearchSQLRestTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/OpenSearchSQLRestTestCase.java index d73e3468d4..ced69d54a0 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/OpenSearchSQLRestTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/OpenSearchSQLRestTestCase.java @@ -195,7 +195,9 @@ protected static void wipeAllOpenSearchIndices(RestClient client) throws IOExcep try { // System index, mostly named .opensearch-xxx or .opendistro-xxx, are not allowed to // delete - if (!indexName.startsWith(".opensearch") && !indexName.startsWith(".opendistro")) { + if (!indexName.startsWith(".opensearch") + && !indexName.startsWith(".opendistro") + && !indexName.startsWith(".ql")) { client.performRequest(new Request("DELETE", "/" + indexName)); } } catch (Exception e) { diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/RestIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/RestIntegTestCase.java index a94047c1e4..3d53b96668 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/RestIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/RestIntegTestCase.java @@ -273,8 +273,8 @@ public enum Index { getOrderIndexMapping(), "src/test/resources/order.json"), WEBLOG( - TestsConstants.TEST_INDEX_WEBLOG, - "weblog", + TestsConstants.TEST_INDEX_WEBLOGS, + "weblogs", getWeblogsIndexMapping(), "src/test/resources/weblogs.json"), DATE( diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 8a0ad563a6..1728be74e6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -20,6 +20,7 @@ import static org.opensearch.sql.legacy.TestUtils.getDogs3IndexMapping; import static org.opensearch.sql.legacy.TestUtils.getEmployeeNestedTypeIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getGameOfThronesIndexMapping; +import static org.opensearch.sql.legacy.TestUtils.getGeopointIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getJoinTypeIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getLocationIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getMappingFile; @@ -54,6 +55,7 @@ import javax.management.remote.JMXConnector; import javax.management.remote.JMXConnectorFactory; import javax.management.remote.JMXServiceURL; +import lombok.SneakyThrows; import org.apache.commons.lang3.StringUtils; import org.json.JSONArray; import org.json.JSONObject; @@ -172,6 +174,15 @@ protected void resetQuerySizeLimit() throws IOException { DEFAULT_QUERY_SIZE_LIMIT.toString())); } + @SneakyThrows + protected void setDataSourcesEnabled(String clusterSettingType, boolean value) { + updateClusterSettings( + new ClusterSetting( + clusterSettingType, + Settings.Key.DATASOURCES_ENABLED.getKeyValue(), + Boolean.toString(value))); + } + protected static void wipeAllClusterSettings() throws IOException { updateClusterSettings(new ClusterSetting("persistent", "*", null)); updateClusterSettings(new ClusterSetting("transient", "*", null)); @@ -239,12 +250,17 @@ protected Request getSqlCursorCloseRequest(String cursorRequest) { } protected String executeQuery(String query, String requestType) { + return executeQuery(query, requestType, Map.of()); + } + + protected String executeQuery(String query, String requestType, Map params) { try { String endpoint = "/_plugins/_sql?format=" + requestType; String requestBody = makeRequest(query); Request sqlRequest = new Request("POST", endpoint); sqlRequest.setJsonEntity(requestBody); + sqlRequest.addParameters(params); Response response = client().performRequest(sqlRequest); Assert.assertEquals(200, response.getStatusLine().getStatusCode()); @@ -446,6 +462,12 @@ protected String makeRequest(String query, int fetch_size) { return String.format("{ \"fetch_size\": \"%s\", \"query\": \"%s\" }", fetch_size, query); } + protected String makeRequest(String query, int fetch_size, String filterQuery) { + return String.format( + "{ \"fetch_size\": \"%s\", \"query\": \"%s\", \"filter\" : %s }", + fetch_size, query, filterQuery); + } + protected String makeFetchLessRequest(String query) { return String.format("{\n" + " \"query\": \"%s\"\n" + "}", query); } @@ -639,8 +661,8 @@ public enum Index { getOrderIndexMapping(), "src/test/resources/order.json"), WEBLOG( - TestsConstants.TEST_INDEX_WEBLOG, - "weblog", + TestsConstants.TEST_INDEX_WEBLOGS, + "weblogs", getWeblogsIndexMapping(), "src/test/resources/weblogs.json"), DATE( @@ -685,6 +707,15 @@ public enum Index { "calcs", getMappingFile("calcs_index_mappings.json"), "src/test/resources/calcs.json"), + // Calcs has enough records for shards to be interesting, but updating the existing mapping with + // shards in-place + // breaks existing tests. Aside from introducing a primary shard setting > 1, this index is + // identical to CALCS. + CALCS_WITH_SHARDS( + TestsConstants.TEST_INDEX_CALCS, + "calcs", + getMappingFile("calcs_with_shards_index_mappings.json"), + "src/test/resources/calcs.json"), DATE_FORMATS( TestsConstants.TEST_INDEX_DATE_FORMATS, "date_formats", @@ -709,7 +740,12 @@ public enum Index { TestsConstants.TEST_INDEX_NESTED_WITH_NULLS, "multi_nested", getNestedTypeIndexMapping(), - "src/test/resources/nested_with_nulls.json"); + "src/test/resources/nested_with_nulls.json"), + GEOPOINTS( + TestsConstants.TEST_INDEX_GEOPOINT, + "dates", + getGeopointIndexMapping(), + "src/test/resources/geopoints.json"); private final String name; private final String type; diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SubqueryIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SubqueryIT.java index c1d656628f..39abad92df 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SubqueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SubqueryIT.java @@ -37,7 +37,7 @@ public class SubqueryIT extends SQLIntegTestCase { - @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Rule public final ExpectedException exceptionRule = ExpectedException.none(); @Override protected void init() throws Exception { diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java index 65cacf16d2..195dda0cbd 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java @@ -245,6 +245,11 @@ public static String getDataTypeNonnumericIndexMapping() { return getMappingFile(mappingFile); } + public static String getGeopointIndexMapping() { + String mappingFile = "geopoint_index_mapping.json"; + return getMappingFile(mappingFile); + } + public static void loadBulk(Client client, String jsonPath, String defaultIndex) throws Exception { System.out.println(String.format("Loading file %s into opensearch cluster", jsonPath)); diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index 29bc9813fa..1e336f544e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -43,7 +43,7 @@ public class TestsConstants { public static final String TEST_INDEX_BANK_CSV_SANITIZE = TEST_INDEX_BANK + "_csv_sanitize"; public static final String TEST_INDEX_BANK_RAW_SANITIZE = TEST_INDEX_BANK + "_raw_sanitize"; public static final String TEST_INDEX_ORDER = TEST_INDEX + "_order"; - public static final String TEST_INDEX_WEBLOG = TEST_INDEX + "_weblog"; + public static final String TEST_INDEX_WEBLOGS = TEST_INDEX + "_weblogs"; public static final String TEST_INDEX_DATE = TEST_INDEX + "_date"; public static final String TEST_INDEX_DATE_TIME = TEST_INDEX + "_datetime"; public static final String TEST_INDEX_DEEP_NESTED = TEST_INDEX + "_deep_nested"; @@ -57,6 +57,7 @@ public class TestsConstants { public static final String TEST_INDEX_WILDCARD = TEST_INDEX + "_wildcard"; public static final String TEST_INDEX_MULTI_NESTED_TYPE = TEST_INDEX + "_multi_nested"; public static final String TEST_INDEX_NESTED_WITH_NULLS = TEST_INDEX + "_nested_with_nulls"; + public static final String TEST_INDEX_GEOPOINT = TEST_INDEX + "_geopoint"; public static final String DATASOURCES = ".ql-datasources"; public static final String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/CsvFormatIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/CsvFormatIT.java index a9eb18c2a1..21240bf416 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/CsvFormatIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/CsvFormatIT.java @@ -6,6 +6,7 @@ package org.opensearch.sql.ppl; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_CSV_SANITIZE; +import static org.opensearch.sql.util.TestUtils.assertRowsEqual; import java.io.IOException; import java.util.Locale; @@ -27,7 +28,7 @@ public void sanitizeTest() throws IOException { Locale.ROOT, "source=%s | fields firstname, lastname", TEST_INDEX_BANK_CSV_SANITIZE)); - assertEquals( + assertRowsEqual( StringUtils.format( "firstname,lastname%n" + "'+Amber JOHnny,Duke Willmington+%n" @@ -47,7 +48,7 @@ public void escapeSanitizeTest() throws IOException { "source=%s | fields firstname, lastname", TEST_INDEX_BANK_CSV_SANITIZE), false); - assertEquals( + assertRowsEqual( StringUtils.format( "firstname,lastname%n" + "+Amber JOHnny,Duke Willmington+%n" diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/DateTimeComparisonIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/DateTimeComparisonIT.java index 7cc083cbb6..dca33b9f96 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/DateTimeComparisonIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/DateTimeComparisonIT.java @@ -43,9 +43,9 @@ public void resetTimeZone() { TimeZone.setDefault(testTz); } - private String functionCall; - private String name; - private Boolean expectedResult; + private final String functionCall; + private final String name; + private final Boolean expectedResult; public DateTimeComparisonIT( @Name("functionCall") String functionCall, diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/DateTimeImplementationIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/DateTimeImplementationIT.java index f9dc7d8027..e777a4f454 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/DateTimeImplementationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/DateTimeImplementationIT.java @@ -6,8 +6,10 @@ package org.opensearch.sql.ppl; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DATE; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DATE_FORMATS; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; import static org.opensearch.sql.util.MatcherUtils.verifySchema; import static org.opensearch.sql.util.MatcherUtils.verifySome; @@ -20,6 +22,7 @@ public class DateTimeImplementationIT extends PPLIntegTestCase { @Override public void init() throws IOException { loadIndex(Index.DATE); + loadIndex(Index.DATE_FORMATS); } @Test @@ -176,4 +179,38 @@ public void nullDateTimeInvalidDateValueMonth() throws IOException { verifySchema(result, schema("f", null, "timestamp")); verifySome(result.getJSONArray("datarows"), rows(new Object[] {null})); } + + @Test + public void testSpanDatetimeWithCustomFormat() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | eval a = 1 | stats count() as cnt by span(yyyy-MM-dd, 1d) as span", + TEST_INDEX_DATE_FORMATS)); + verifySchema(result, schema("cnt", null, "integer"), schema("span", null, "date")); + verifyDataRows(result, rows(2, "1984-04-12")); + } + + @Test + public void testSpanDatetimeWithEpochMillisFormat() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | eval a = 1 | stats count() as cnt by span(epoch_millis, 1d) as span", + TEST_INDEX_DATE_FORMATS)); + verifySchema(result, schema("cnt", null, "integer"), schema("span", null, "timestamp")); + verifyDataRows(result, rows(2, "1984-04-12 00:00:00")); + } + + @Test + public void testSpanDatetimeWithDisjunctiveDifferentFormats() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | eval a = 1 | stats count() as cnt by span(yyyy-MM-dd_OR_epoch_millis," + + " 1d) as span", + TEST_INDEX_DATE_FORMATS)); + verifySchema(result, schema("cnt", null, "integer"), schema("span", null, "timestamp")); + verifyDataRows(result, rows(2, "1984-04-12 00:00:00")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/DedupCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/DedupCommandIT.java index 7a6cf16bb4..b69fce6785 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/DedupCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/DedupCommandIT.java @@ -11,6 +11,9 @@ import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.json.JSONArray; import org.json.JSONObject; import org.junit.jupiter.api.Test; @@ -35,7 +38,16 @@ public void testConsecutiveDedup() throws IOException { executeQuery( String.format( "source=%s | dedup male consecutive=true | fields male", TEST_INDEX_BANK)); - verifyDataRows(result, rows(true), rows(false), rows(true), rows(false)); + List actualRows = extractActualRows(result); + List expectedRows = getExpectedDedupRows(actualRows); + assertTrue("Deduplication was not consecutive", expectedRows != null); + assertEquals( + "Row count after deduplication does not match", expectedRows.size(), actualRows.size()); + + // Verify the expected and actual rows match + for (int i = 0; i < expectedRows.size(); i++) { + assertArrayEquals(expectedRows.get(i), actualRows.get(i)); + } } @Test @@ -62,4 +74,51 @@ public void testKeepEmptyDedup() throws IOException { rows("Virginia", null), rows("Dillard", 48086)); } + + private List extractActualRows(JSONObject result) { + JSONArray dataRows = result.getJSONArray("datarows"); + List actualRows = new ArrayList<>(); + for (int i = 0; i < dataRows.length(); i++) { + JSONArray row = dataRows.getJSONArray(i); + actualRows.add(new Object[] {row.get(0)}); + } + return actualRows; + } + + // Create the expected deduplicated rows + private List getExpectedDedupRows(List actualRows) { + if (verifyConsecutiveDeduplication(actualRows)) { + return createExpectedRows(actualRows); + } + return null; + } + + // Verify consecutive deduplication + private boolean verifyConsecutiveDeduplication(List actualRows) { + Object previousValue = null; + + for (Object[] currentRow : actualRows) { + Object currentValue = currentRow[0]; + if (previousValue != null && currentValue.equals(previousValue)) { + return false; // If consecutive values are the same, deduplication fails + } + previousValue = currentValue; + } + return true; + } + + // Create the expected rows after deduplication + private List createExpectedRows(List actualRows) { + List expectedRows = new ArrayList<>(); + Object previousValue = null; + + for (Object[] currentRow : actualRows) { + Object currentValue = currentRow[0]; + if (previousValue == null || !currentValue.equals(previousValue)) { + expectedRows.add(currentRow); + } + previousValue = currentValue; + } + return expectedRows; + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index fce975ef92..531a24bad6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -76,6 +76,56 @@ public void testSortPushDownExplain() throws Exception { + "| fields age")); } + @Test + public void testLimitPushDownExplain() throws Exception { + String expected = loadFromFile("expectedOutput/ppl/explain_limit_push.json"); + + assertJsonEquals( + expected, + explainQueryToString( + "source=opensearch-sql_test_index_account" + + "| eval ageMinus = age - 30 " + + "| head 5 " + + "| fields ageMinus")); + } + + @Test + public void testFillNullPushDownExplain() throws Exception { + String expected = loadFromFile("expectedOutput/ppl/explain_fillnull_push.json"); + + assertJsonEquals( + expected, + explainQueryToString( + "source=opensearch-sql_test_index_account" + + " | fillnull with -1 in age,balance | fields age, balance")); + } + + @Test + public void testTrendlinePushDownExplain() throws Exception { + String expected = loadFromFile("expectedOutput/ppl/explain_trendline_push.json"); + + assertJsonEquals( + expected, + explainQueryToString( + "source=opensearch-sql_test_index_account" + + "| head 5 " + + "| trendline sma(2, age) as ageTrend " + + "| fields ageTrend")); + } + + @Test + public void testTrendlineWithSortPushDownExplain() throws Exception { + String expected = loadFromFile("expectedOutput/ppl/explain_trendline_sort_push.json"); + + assertJsonEquals( + expected, + explainQueryToString( + "source=opensearch-sql_test_index_account" + + "| head 5 " + + "| trendline sort age sma(2, age) as ageTrend " + + "| fields ageTrend")); + } + String loadFromFile(String filename) throws Exception { URI uri = Resources.getResource(filename).toURI(); return new String(Files.readAllBytes(Paths.get(uri))); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/FillNullCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/FillNullCommandIT.java new file mode 100644 index 0000000000..d88d31c997 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/FillNullCommandIT.java @@ -0,0 +1,214 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_CALCS; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; + +public class FillNullCommandIT extends PPLIntegTestCase { + @Override + public void init() throws IOException { + loadIndex(Index.CALCS); + } + + @Test + public void testFillNullSameValueOneField() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | fields str2, num0 | fillnull with -1 in num0", TEST_INDEX_CALCS)); + verifyDataRows( + result, + rows("one", 12.3), + rows("two", -12.3), + rows("three", 15.7), + rows(null, -15.7), + rows("five", 3.5), + rows("six", -3.5), + rows(null, 0), + rows("eight", -1), + rows("nine", 10), + rows("ten", -1), + rows("eleven", -1), + rows("twelve", -1), + rows(null, -1), + rows("fourteen", -1), + rows("fifteen", -1), + rows("sixteen", -1), + rows(null, -1)); + } + + @Test + public void testFillNullSameValueTwoFields() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | fields num0, num2 | fillnull with -1 in num0,num2", TEST_INDEX_CALCS)); + verifyDataRows( + result, + rows(12.3, 17.86), + rows(-12.3, 16.73), + rows(15.7, -1), + rows(-15.7, 8.51), + rows(3.5, 6.46), + rows(-3.5, 8.98), + rows(0, 11.69), + rows(-1, 17.25), + rows(10, -1), + rows(-1, 11.5), + rows(-1, 6.8), + rows(-1, 3.79), + rows(-1, -1), + rows(-1, 13.04), + rows(-1, -1), + rows(-1, 10.98), + rows(-1, 7.87)); + } + + @Test + public void testFillNullVariousValuesOneField() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | fields str2, num0 | fillnull using num0 = -1", TEST_INDEX_CALCS)); + verifyDataRows( + result, + rows("one", 12.3), + rows("two", -12.3), + rows("three", 15.7), + rows(null, -15.7), + rows("five", 3.5), + rows("six", -3.5), + rows(null, 0), + rows("eight", -1), + rows("nine", 10), + rows("ten", -1), + rows("eleven", -1), + rows("twelve", -1), + rows(null, -1), + rows("fourteen", -1), + rows("fifteen", -1), + rows("sixteen", -1), + rows(null, -1)); + } + + @Test + public void testFillNullVariousValuesTwoFields() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | fields num0, num2 | fillnull using num0 = -1, num2 = -2", + TEST_INDEX_CALCS)); + verifyDataRows( + result, + rows(12.3, 17.86), + rows(-12.3, 16.73), + rows(15.7, -2), + rows(-15.7, 8.51), + rows(3.5, 6.46), + rows(-3.5, 8.98), + rows(0, 11.69), + rows(-1, 17.25), + rows(10, -2), + rows(-1, 11.5), + rows(-1, 6.8), + rows(-1, 3.79), + rows(-1, -2), + rows(-1, 13.04), + rows(-1, -2), + rows(-1, 10.98), + rows(-1, 7.87)); + } + + @Test + public void testFillNullWithOtherField() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | fillnull using num0 = num1 | fields str2, num0", TEST_INDEX_CALCS)); + verifyDataRows( + result, + rows("one", 12.3), + rows("two", -12.3), + rows("three", 15.7), + rows(null, -15.7), + rows("five", 3.5), + rows("six", -3.5), + rows(null, 0), + rows("eight", 11.38), + rows("nine", 10), + rows("ten", 12.4), + rows("eleven", 10.32), + rows("twelve", 2.47), + rows(null, 12.05), + rows("fourteen", 10.37), + rows("fifteen", 7.1), + rows("sixteen", 16.81), + rows(null, 7.12)); + } + + @Test + public void testFillNullWithFunctionOnOtherField() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | fillnull with ceil(num1) in num0 | fields str2, num0", + TEST_INDEX_CALCS)); + verifyDataRows( + result, + rows("one", 12.3), + rows("two", -12.3), + rows("three", 15.7), + rows(null, -15.7), + rows("five", 3.5), + rows("six", -3.5), + rows(null, 0), + rows("eight", 12), + rows("nine", 10), + rows("ten", 13), + rows("eleven", 11), + rows("twelve", 3), + rows(null, 13), + rows("fourteen", 11), + rows("fifteen", 8), + rows("sixteen", 17), + rows(null, 8)); + } + + @Test + public void testFillNullWithFunctionMultipleCommands() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | fillnull with num1 in num0 | fields str2, num0 | fillnull with" + + " 'unknown' in str2", + TEST_INDEX_CALCS)); + verifyDataRows( + result, + rows("one", 12.3), + rows("two", -12.3), + rows("three", 15.7), + rows("unknown", -15.7), + rows("five", 3.5), + rows("six", -3.5), + rows("unknown", 0), + rows("eight", 11.38), + rows("nine", 10), + rows("ten", 12.4), + rows("eleven", 10.32), + rows("twelve", 2.47), + rows("unknown", 12.05), + rows("fourteen", 10.37), + rows("fifteen", 7.1), + rows("sixteen", 16.81), + rows("unknown", 7.12)); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/IPComparisonIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/IPComparisonIT.java new file mode 100644 index 0000000000..a19ea32a68 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/IPComparisonIT.java @@ -0,0 +1,145 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.legacy.SQLIntegTestCase; + +public class IPComparisonIT extends PPLIntegTestCase { + + @Override + public void init() throws IOException { + loadIndex(SQLIntegTestCase.Index.WEBLOG); + } + + @Test + public void test_equal() throws IOException { + JSONObject result; + final String operator = "="; + + result = executeComparisonQuery(operator, "1.2.3.4"); + verifyDataRows(result, rows("1.2.3.4")); + + result = executeComparisonQuery(operator, "::ffff:1.2.3.4"); + verifyDataRows(result, rows("1.2.3.4")); + + result = executeComparisonQuery(operator, "::1"); + verifyDataRows(result, rows("::1")); + + result = executeComparisonQuery(operator, "0000:0000:0000:0000:0000:0000:0000:0001"); + verifyDataRows(result, rows("::1")); + } + + @Test + public void test_not_equal() throws IOException { + JSONObject result; + final String operator = "!="; + + result = executeComparisonQuery(operator, "1.2.3.4"); + verifyDataRows( + result, rows("::1"), rows("0.0.0.2"), rows("::3"), rows("1.2.3.5"), rows("::ffff:1234")); + + result = executeComparisonQuery(operator, "::ffff:1.2.3.4"); + verifyDataRows( + result, rows("::1"), rows("0.0.0.2"), rows("::3"), rows("1.2.3.5"), rows("::ffff:1234")); + + result = executeComparisonQuery(operator, "::1"); + verifyDataRows( + result, + rows("0.0.0.2"), + rows("::3"), + rows("1.2.3.4"), + rows("1.2.3.5"), + rows("::ffff:1234")); + + result = executeComparisonQuery(operator, "0000:0000:0000:0000:0000:0000:0000:0001"); + verifyDataRows( + result, + rows("0.0.0.2"), + rows("::3"), + rows("1.2.3.4"), + rows("1.2.3.5"), + rows("::ffff:1234")); + } + + @Test + public void test_greater_than() throws IOException { + JSONObject result; + final String operator = ">"; + + result = executeComparisonQuery(operator, "1.2.3.3"); + verifyDataRows(result, rows("1.2.3.4"), rows("1.2.3.5")); + + result = executeComparisonQuery(operator, "1.2.3.4"); + verifyDataRows(result, rows("1.2.3.5")); + + result = executeComparisonQuery(operator, "1.2.3.5"); + verifyDataRows(result); + } + + @Test + public void test_greater_than_or_equal_to() throws IOException { + JSONObject result; + final String operator = ">="; + + result = executeComparisonQuery(operator, "1.2.3.4"); + verifyDataRows(result, rows("1.2.3.4"), rows("1.2.3.5")); + + result = executeComparisonQuery(operator, "1.2.3.5"); + verifyDataRows(result, rows("1.2.3.5")); + + result = executeComparisonQuery(operator, "1.2.3.6"); + verifyDataRows(result); + } + + @Test + public void test_less_than() throws IOException { + JSONObject result; + final String operator = "<"; + + result = executeComparisonQuery(operator, "::4"); + verifyDataRows(result, rows("::1"), rows("::3")); + + result = executeComparisonQuery(operator, "::3"); + verifyDataRows(result, rows("::1")); + + result = executeComparisonQuery(operator, "::1"); + verifyDataRows(result); + } + + @Test + public void test_less_than_or_equal_to() throws IOException { + JSONObject result; + final String operator = "<="; + + result = executeComparisonQuery(operator, "::3"); + verifyDataRows(result, rows("::1"), rows("::3")); + + result = executeComparisonQuery(operator, "::1"); + verifyDataRows(result, rows("::1")); + + result = executeComparisonQuery(operator, "::0"); + verifyDataRows(result); + } + + /** + * Executes a query comparison on the weblogs test index with the given comparison operator and IP + * address string, and returns the resulting {@link JSONObject}; + */ + private JSONObject executeComparisonQuery(String comparisonOperator, String addressString) + throws IOException { + String formatString = "source=%s | where host %s '%s' | fields host"; + String query = + String.format(formatString, TEST_INDEX_WEBLOGS, comparisonOperator, addressString); + return executeQuery(query); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/IPFunctionsIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/IPFunctionsIT.java new file mode 100644 index 0000000000..1b0dbf711c --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/IPFunctionsIT.java @@ -0,0 +1,57 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; + +public class IPFunctionsIT extends PPLIntegTestCase { + + @Override + public void init() throws IOException { + loadIndex(Index.WEBLOG); + } + + @Test + public void test_cidrmatch() throws IOException { + + JSONObject result; + + // No matches + result = + executeQuery( + String.format( + "source=%s | where cidrmatch(host, '250.0.0.0/24') | fields host", + TEST_INDEX_WEBLOGS)); + verifySchema(result, schema("host", null, "ip")); + verifyDataRows(result); + + // One match + result = + executeQuery( + String.format( + "source=%s | where cidrmatch(host, '0.0.0.0/24') | fields host", + TEST_INDEX_WEBLOGS)); + verifySchema(result, schema("host", null, "ip")); + verifyDataRows(result, rows("0.0.0.2")); + + // Multiple matches + result = + executeQuery( + String.format( + "source=%s | where cidrmatch(host, '1.2.3.0/24') | fields host", + TEST_INDEX_WEBLOGS)); + verifySchema(result, schema("host", null, "ip")); + verifyDataRows(result, rows("1.2.3.4"), rows("1.2.3.5")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/PPLPluginIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/PPLPluginIT.java index 44f79a8944..96362a2fd2 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/PPLPluginIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/PPLPluginIT.java @@ -27,7 +27,7 @@ import org.opensearch.sql.util.TestUtils; public class PPLPluginIT extends PPLIntegTestCase { - @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Rule public final ExpectedException exceptionRule = ExpectedException.none(); private static final String PERSISTENT = "persistent"; diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ParseCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ParseCommandIT.java index 7f25f6f160..5e672812c8 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ParseCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ParseCommandIT.java @@ -6,11 +6,13 @@ package org.opensearch.sql.ppl; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; -import static org.opensearch.sql.util.MatcherUtils.rows; -import static org.opensearch.sql.util.MatcherUtils.verifyOrder; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.json.JSONArray; import org.json.JSONObject; +import org.junit.Assert; import org.junit.Test; public class ParseCommandIT extends PPLIntegTestCase { @@ -26,15 +28,23 @@ public void testParseCommand() throws IOException { executeQuery( String.format( "source=%s | parse email '.+@(?.+)' | fields email, host", TEST_INDEX_BANK)); - verifyOrder( - result, - rows("amberduke@pyrami.com", "pyrami.com"), - rows("hattiebond@netagy.com", "netagy.com"), - rows("nanettebates@quility.com", "quility.com"), - rows("daleadams@boink.com", "boink.com"), - rows("elinorratliff@scentric.com", "scentric.com"), - rows("virginiaayala@filodyne.com", "filodyne.com"), - rows("dillardmcpherson@quailcom.com", "quailcom.com")); + + // Create the expected rows + List expectedRows = + new ArrayList<>( + List.of( + new Object[] {"amberduke@pyrami.com", "pyrami.com"}, + new Object[] {"hattiebond@netagy.com", "netagy.com"}, + new Object[] {"nanettebates@quility.com", "quility.com"}, + new Object[] {"daleadams@boink.com", "boink.com"}, + new Object[] {"elinorratliff@scentric.com", "scentric.com"}, + new Object[] {"virginiaayala@filodyne.com", "filodyne.com"}, + new Object[] {"dillardmcpherson@quailcom.com", "quailcom.com"})); + + List actualRows = convertJsonToRows(result, 2); + sortRowsByFirstColumn(expectedRows); + sortRowsByFirstColumn(actualRows); + compareRows(expectedRows, actualRows); } @Test @@ -43,15 +53,23 @@ public void testParseCommandReplaceOriginalField() throws IOException { executeQuery( String.format( "source=%s | parse email '.+@(?.+)' | fields email", TEST_INDEX_BANK)); - verifyOrder( - result, - rows("pyrami.com"), - rows("netagy.com"), - rows("quility.com"), - rows("boink.com"), - rows("scentric.com"), - rows("filodyne.com"), - rows("quailcom.com")); + + // Create the expected rows + List expectedRows = + new ArrayList<>( + List.of( + new Object[] {"pyrami.com"}, + new Object[] {"netagy.com"}, + new Object[] {"quility.com"}, + new Object[] {"boink.com"}, + new Object[] {"scentric.com"}, + new Object[] {"filodyne.com"}, + new Object[] {"quailcom.com"})); + + List actualRows = convertJsonToRows(result, 1); + sortRowsByFirstColumn(expectedRows); + sortRowsByFirstColumn(actualRows); + compareRows(expectedRows, actualRows); } @Test @@ -62,14 +80,52 @@ public void testParseCommandWithOtherRunTimeFields() throws IOException { "source=%s | parse email '.+@(?.+)' | " + "eval eval_result=1 | fields host, eval_result", TEST_INDEX_BANK)); - verifyOrder( - result, - rows("pyrami.com", 1), - rows("netagy.com", 1), - rows("quility.com", 1), - rows("boink.com", 1), - rows("scentric.com", 1), - rows("filodyne.com", 1), - rows("quailcom.com", 1)); + + // Create the expected rows as List + List expectedRows = + new ArrayList<>( + List.of( + new Object[] {"pyrami.com", 1}, + new Object[] {"netagy.com", 1}, + new Object[] {"quility.com", 1}, + new Object[] {"boink.com", 1}, + new Object[] {"scentric.com", 1}, + new Object[] {"filodyne.com", 1}, + new Object[] {"quailcom.com", 1})); + + List actualRows = convertJsonToRows(result, 2); + sortRowsByFirstColumn(expectedRows); + sortRowsByFirstColumn(actualRows); + compareRows(expectedRows, actualRows); + } + + // Convert JSON response to List + private List convertJsonToRows(JSONObject result, int columnCount) { + JSONArray dataRows = result.getJSONArray("datarows"); + List rows = new ArrayList<>(); + for (int i = 0; i < dataRows.length(); i++) { + JSONArray row = dataRows.getJSONArray(i); + Object[] rowData = new Object[columnCount]; + for (int j = 0; j < columnCount; j++) { + rowData[j] = row.get(j); + } + rows.add(rowData); + } + return rows; + } + + // Sort rows by the first column + private void sortRowsByFirstColumn(List rows) { + rows.sort((a, b) -> ((String) a[0]).compareTo((String) b[0])); + } + + private void compareRows(List expectedRows, List actualRows) { + if (expectedRows.size() != actualRows.size()) { + Assert.fail( + "Row count is different. expectedRows:" + expectedRows + ", actualRows: " + actualRows); + } + for (int i = 0; i < expectedRows.size(); i++) { + assertArrayEquals(expectedRows.get(i), actualRows.get(i)); + } } } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ResourceMonitorIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ResourceMonitorIT.java index eed2369590..2799ab1016 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ResourceMonitorIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ResourceMonitorIT.java @@ -34,7 +34,7 @@ public void queryExceedResourceLimitShouldFail() throws IOException { assertEquals(500, exception.getResponse().getStatusLine().getStatusCode()); assertThat( exception.getMessage(), - Matchers.containsString("resource is not enough to run the" + " query, quit.")); + Matchers.containsString("insufficient resources to run the query, quit.")); // update plugins.ppl.query.memory_limit to default value 85% updateClusterSettings( diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/SortCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/SortCommandIT.java index c90a506252..b234dd032d 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/SortCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/SortCommandIT.java @@ -8,10 +8,17 @@ import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.verifyOrder; import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.json.JSONArray; import org.json.JSONObject; import org.junit.Test; @@ -22,6 +29,7 @@ public void init() throws IOException { loadIndex(Index.BANK); loadIndex(Index.BANK_WITH_NULL_VALUES); loadIndex(Index.DOG); + loadIndex(Index.WEBLOG); } @Test @@ -38,17 +46,77 @@ public void testSortWithNullValue() throws IOException { String.format( "source=%s | sort balance | fields firstname, balance", TEST_INDEX_BANK_WITH_NULL_VALUES)); + + JSONArray dataRows = result.getJSONArray("datarows"); + + // Filter null balance rows + List nullRows = filterRows(dataRows, 1, true); + + // Verify the set values for null balances as rows with null balance can return in any order + List expectedNullRows = + Arrays.asList( + new Object[] {"Hattie", null}, + new Object[] {"Elinor", null}, + new Object[] {"Virginia", null}); + assertSetEquals(expectedNullRows, nullRows); + + // Filter non-null balance rows and create filtered result + List nonNullRows = filterRows(dataRows, 1, false); + JSONObject filteredResult = createFilteredResult(result, nonNullRows); + verifyOrder( - result, - rows("Hattie", null), - rows("Elinor", null), - rows("Virginia", null), + filteredResult, rows("Dale", 4180), rows("Nanette", 32838), rows("Amber JOHnny", 39225), rows("Dillard", 48086)); } + private void assertSetEquals(List expected, List actual) { + Set> expectedSet = new HashSet<>(); + for (Object[] arr : expected) { + expectedSet.add(Arrays.asList(arr)); + } + + Set> actualSet = new HashSet<>(); + for (Object[] arr : actual) { + actualSet.add(Arrays.asList(arr)); + } + + assertEquals(expectedSet, actualSet); + } + + // Filter rows by null or non-null values based on the specified column index + private List filterRows(JSONArray dataRows, int columnIndex, boolean isNull) { + List filteredRows = new ArrayList<>(); + for (int i = 0; i < dataRows.length(); i++) { + JSONArray row = dataRows.getJSONArray(i); + if ((isNull && row.isNull(columnIndex)) || (!isNull && !row.isNull(columnIndex))) { + Object[] rowData = new Object[row.length()]; + for (int j = 0; j < row.length(); j++) { + rowData[j] = row.isNull(j) ? null : row.get(j); + } + filteredRows.add(rowData); + } + } + return filteredRows; + } + + // Create a new JSONObject with filtered rows and updated metadata + private JSONObject createFilteredResult(JSONObject originalResult, List filteredRows) { + JSONArray jsonArray = new JSONArray(); + for (Object[] row : filteredRows) { + jsonArray.put(new JSONArray(row)); + } + + JSONObject filteredResult = new JSONObject(); + filteredResult.put("schema", originalResult.getJSONArray("schema")); + filteredResult.put("total", jsonArray.length()); + filteredResult.put("datarows", jsonArray); + filteredResult.put("size", jsonArray.length()); + return filteredResult; + } + @Test public void testSortStringField() throws IOException { JSONObject result = @@ -64,6 +132,20 @@ public void testSortStringField() throws IOException { rows("Ratliff")); } + @Test + public void testSortIpField() throws IOException { + final JSONObject result = + executeQuery(String.format("source=%s | fields host | sort host", TEST_INDEX_WEBLOGS)); + verifyOrder( + result, + rows("::1"), + rows("::3"), + rows("::ffff:1234"), + rows("0.0.0.2"), + rows("1.2.3.4"), + rows("1.2.3.5")); + } + @Test public void testSortMultipleFields() throws IOException { JSONObject result = diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/StandaloneIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/StandaloneIT.java index f81e1b6615..d484f3c4d0 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/StandaloneIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/StandaloneIT.java @@ -149,8 +149,12 @@ public void onFailure(Exception e) { private Settings defaultSettings() { return new Settings() { - private final Map defaultSettings = - new ImmutableMap.Builder().put(Key.QUERY_SIZE_LIMIT, 200).build(); + private final Map defaultSettings = + new ImmutableMap.Builder() + .put(Key.QUERY_SIZE_LIMIT, 200) + .put(Key.SQL_PAGINATION_API_SEARCH_AFTER, true) + .put(Key.FIELD_TYPE_TOLERANCE, true) + .build(); @Override public T getSettingValue(Key key) { diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java index 40acd2f093..2d1cd709e1 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java @@ -11,7 +11,9 @@ import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRowsInOrder; import static org.opensearch.sql.util.MatcherUtils.verifySchema; +import static org.opensearch.sql.util.MatcherUtils.verifySchemaInOrder; import java.io.IOException; import org.json.JSONObject; @@ -190,6 +192,54 @@ public void testStatsAliasedSpan() throws IOException { verifyDataRows(response, rows(1, 20), rows(6, 30)); } + @Test + public void testStatsBySpanAndMultipleFields() throws IOException { + JSONObject response = + executeQuery( + String.format( + "source=%s | stats count() by span(age,10), gender, state", TEST_INDEX_BANK)); + verifySchemaInOrder( + response, + schema("count()", null, "integer"), + schema("span(age,10)", null, "integer"), + schema("gender", null, "string"), + schema("state", null, "string")); + verifyDataRowsInOrder( + response, + rows(1, 20, "f", "VA"), + rows(1, 30, "f", "IN"), + rows(1, 30, "f", "PA"), + rows(1, 30, "m", "IL"), + rows(1, 30, "m", "MD"), + rows(1, 30, "m", "TN"), + rows(1, 30, "m", "WA")); + } + + @Test + public void testStatsByMultipleFieldsAndSpan() throws IOException { + // Use verifySchemaInOrder() and verifyDataRowsInOrder() to check that the span column is always + // the first column in result whatever the order of span in query is first or last one + JSONObject response = + executeQuery( + String.format( + "source=%s | stats count() by gender, state, span(age,10)", TEST_INDEX_BANK)); + verifySchemaInOrder( + response, + schema("count()", null, "integer"), + schema("span(age,10)", null, "integer"), + schema("gender", null, "string"), + schema("state", null, "string")); + verifyDataRowsInOrder( + response, + rows(1, 20, "f", "VA"), + rows(1, 30, "f", "IN"), + rows(1, 30, "f", "PA"), + rows(1, 30, "m", "IL"), + rows(1, 30, "m", "MD"), + rows(1, 30, "m", "TN"), + rows(1, 30, "m", "WA")); + } + @Test public void testStatsPercentile() throws IOException { JSONObject response = diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/TrendlineCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/TrendlineCommandIT.java new file mode 100644 index 0000000000..38baa0f01f --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/TrendlineCommandIT.java @@ -0,0 +1,78 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; + +public class TrendlineCommandIT extends PPLIntegTestCase { + + @Override + public void init() throws IOException { + loadIndex(Index.BANK); + } + + @Test + public void testTrendline() throws IOException { + final JSONObject result = + executeQuery( + String.format( + "source=%s | where balance > 39000 | sort balance | trendline sma(2, balance) as" + + " balance_trend | fields balance_trend", + TEST_INDEX_BANK)); + verifyDataRows(result, rows(new Object[] {null}), rows(44313.0), rows(39882.5)); + } + + @Test + public void testTrendlineMultipleFields() throws IOException { + final JSONObject result = + executeQuery( + String.format( + "source=%s | where balance > 39000 | sort balance | trendline sma(2, balance) as" + + " balance_trend sma(2, account_number) as account_number_trend | fields" + + " balance_trend, account_number_trend", + TEST_INDEX_BANK)); + verifyDataRows(result, rows(null, null), rows(44313.0, 28.5), rows(39882.5, 13.0)); + } + + @Test + public void testTrendlineOverwritesExistingField() throws IOException { + final JSONObject result = + executeQuery( + String.format( + "source=%s | where balance > 39000 | sort balance | trendline sma(2, balance) as" + + " age | fields age", + TEST_INDEX_BANK)); + verifyDataRows(result, rows(new Object[] {null}), rows(44313.0), rows(39882.5)); + } + + @Test + public void testTrendlineNoAlias() throws IOException { + final JSONObject result = + executeQuery( + String.format( + "source=%s | where balance > 39000 | sort balance | trendline sma(2, balance) |" + + " fields balance_trendline", + TEST_INDEX_BANK)); + verifyDataRows(result, rows(new Object[] {null}), rows(44313.0), rows(39882.5)); + } + + @Test + public void testTrendlineWithSort() throws IOException { + final JSONObject result = + executeQuery( + String.format( + "source=%s | where balance > 39000 | trendline sort balance sma(2, balance) |" + + " fields balance_trendline", + TEST_INDEX_BANK)); + verifyDataRows(result, rows(new Object[] {null}), rows(44313.0), rows(39882.5)); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java index 29358bd1c3..901c2a41e4 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/AggregationIT.java @@ -9,14 +9,17 @@ import static org.opensearch.sql.legacy.plugin.RestSqlAction.QUERY_API_ENDPOINT; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verify; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; import static org.opensearch.sql.util.MatcherUtils.verifySchema; import static org.opensearch.sql.util.MatcherUtils.verifySome; import static org.opensearch.sql.util.TestUtils.getResponseBody; +import static org.opensearch.sql.util.TestUtils.roundOfResponse; import java.io.IOException; import java.util.List; import java.util.Locale; +import org.json.JSONArray; import org.json.JSONObject; import org.junit.jupiter.api.Test; import org.opensearch.client.Request; @@ -396,8 +399,9 @@ public void testMaxDoublePushedDown() throws IOException { @Test public void testAvgDoublePushedDown() throws IOException { var response = executeQuery(String.format("SELECT avg(num3)" + " from %s", TEST_INDEX_CALCS)); + JSONArray responseJSON = roundOfResponse(response.getJSONArray("datarows")); verifySchema(response, schema("avg(num3)", null, "double")); - verifyDataRows(response, rows(-6.12D)); + verify(responseJSON, rows(-6.12D)); } @Test @@ -456,8 +460,9 @@ public void testAvgDoubleInMemory() throws IOException { executeQuery( String.format( "SELECT avg(num3)" + " OVER(PARTITION BY datetime1) from %s", TEST_INDEX_CALCS)); + JSONArray roundOfResponse = roundOfResponse(response.getJSONArray("datarows")); verifySchema(response, schema("avg(num3) OVER(PARTITION BY datetime1)", null, "double")); - verifySome(response.getJSONArray("datarows"), rows(-6.12D)); + verifySome(roundOfResponse, rows(-6.12D)); } @Test diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/ConditionalIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/ConditionalIT.java index deb41653e2..9cf4fa2e8a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/ConditionalIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/ConditionalIT.java @@ -20,6 +20,9 @@ import com.fasterxml.jackson.core.JsonFactory; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.json.JSONArray; import org.json.JSONObject; import org.junit.Test; import org.opensearch.action.search.SearchResponse; @@ -69,10 +72,17 @@ public void ifnullWithNullInputTest() { schema("IFNULL(null, firstname)", "IFNULL1", "keyword"), schema("IFNULL(firstname, null)", "IFNULL2", "keyword"), schema("IFNULL(null, null)", "IFNULL3", "byte")); - verifyDataRows( - response, - rows("Hattie", "Hattie", LITERAL_NULL.value()), - rows("Elinor", "Elinor", LITERAL_NULL.value())); + // Retrieve the actual data rows + JSONArray dataRows = response.getJSONArray("datarows"); + + // Create expected rows dynamically based on the actual data received + // IFNULL1 will be firstname + // IFNULL2 will be firstname + List expectedRows = + createExpectedRows(dataRows, new int[] {0, 0}, LITERAL_NULL.value()); + + // Verify the actual data rows against the expected rows + verifyRows(dataRows, expectedRows); } @Test @@ -216,10 +226,50 @@ public void ifWithTrueAndFalseCondition() throws IOException { schema("IF(2 > 0, firstname, lastname)", "IF1", "keyword"), schema("firstname", "IF2", "text"), schema("lastname", "IF3", "keyword")); - verifyDataRows( - response, - rows("Duke Willmington", "Amber JOHnny", "Amber JOHnny", "Duke Willmington"), - rows("Bond", "Hattie", "Hattie", "Bond")); + + // Retrieve the actual data rows + JSONArray dataRows = response.getJSONArray("datarows"); + + // Create expected rows based on the actual data received as data can be different for the + // different data sources + // IF0 will be lastname as 2 < 0 is false + // IF1 will be firstname as 2 > 0 is true + List expectedRows = createExpectedRows(dataRows, new int[] {0, 1, 1, 0}); + + // Verify the actual data rows against the expected rows + verifyRows(dataRows, expectedRows); + } + + // Convert a JSONArray to a List with dynamic row construction + private List createExpectedRows( + JSONArray dataRows, int[] columnIndices, Object... staticValues) { + List expectedRows = new ArrayList<>(); + for (int i = 0; i < dataRows.length(); i++) { + JSONArray row = dataRows.getJSONArray(i); + Object[] rowData = new Object[columnIndices.length + staticValues.length]; + int k = 0; + for (int j = 0; j < columnIndices.length; j++) { + rowData[k++] = row.get(columnIndices[j]); + } + for (Object staticValue : staticValues) { + rowData[k++] = staticValue; + } + expectedRows.add(rowData); + } + return expectedRows; + } + + // Verify the actual data rows against the expected rows + private void verifyRows(JSONArray dataRows, List expectedRows) { + for (int i = 0; i < dataRows.length(); i++) { + JSONArray actualRow = dataRows.getJSONArray(i); + Object[] expectedRow = expectedRows.get(i); + Object[] actualRowData = new Object[expectedRow.length]; + for (int j = 0; j < actualRowData.length; j++) { + actualRowData[j] = actualRow.isNull(j) ? LITERAL_NULL.value() : actualRow.get(j); + } + assertArrayEquals(expectedRow, actualRowData); + } } private SearchHits query(String query) throws IOException { diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/CsvFormatIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/CsvFormatIT.java index 330268c0e4..d400ad646f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/CsvFormatIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/CsvFormatIT.java @@ -6,7 +6,8 @@ package org.opensearch.sql.sql; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_CSV_SANITIZE; -import static org.opensearch.sql.protocol.response.format.FlatResponseFormatter.CONTENT_TYPE; +import static org.opensearch.sql.protocol.response.format.CsvResponseFormatter.CONTENT_TYPE; +import static org.opensearch.sql.util.TestUtils.assertRowsEqual; import java.io.IOException; import java.util.Locale; @@ -30,7 +31,7 @@ public void sanitizeTest() { String.format( Locale.ROOT, "SELECT firstname, lastname FROM %s", TEST_INDEX_BANK_CSV_SANITIZE), "csv"); - assertEquals( + assertRowsEqual( StringUtils.format( "firstname,lastname%n" + "'+Amber JOHnny,Duke Willmington+%n" @@ -48,7 +49,7 @@ public void escapeSanitizeTest() { String.format( Locale.ROOT, "SELECT firstname, lastname FROM %s", TEST_INDEX_BANK_CSV_SANITIZE), "csv&sanitize=false"); - assertEquals( + assertRowsEqual( StringUtils.format( "firstname,lastname%n" + "+Amber JOHnny,Duke Willmington+%n" diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeComparisonIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeComparisonIT.java index af3d81e374..d385b54dff 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeComparisonIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/DateTimeComparisonIT.java @@ -50,9 +50,9 @@ public void resetTimeZone() { TimeZone.setDefault(testTz); } - private String functionCall; - private String name; - private Boolean expectedResult; + private final String functionCall; + private final String name; + private final Boolean expectedResult; public DateTimeComparisonIT( @Name("functionCall") String functionCall, diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/ExpressionIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/ExpressionIT.java index be1471641e..45da456553 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/ExpressionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/ExpressionIT.java @@ -29,7 +29,7 @@ @Ignore public class ExpressionIT extends RestIntegTestCase { - @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Rule public final ExpectedException exceptionRule = ExpectedException.none(); @Override protected void init() throws Exception { diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/GeopointFormatsIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/GeopointFormatsIT.java new file mode 100644 index 0000000000..f25eeec241 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/GeopointFormatsIT.java @@ -0,0 +1,60 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import java.util.Map; +import org.apache.commons.lang3.tuple.Pair; +import org.json.JSONArray; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.legacy.SQLIntegTestCase; + +public class GeopointFormatsIT extends SQLIntegTestCase { + + @Override + public void init() throws Exception { + loadIndex(Index.GEOPOINTS); + } + + @Test + public void testReadingGeopoints() throws IOException { + String query = String.format("SELECT point FROM %s LIMIT 5", Index.GEOPOINTS.getName()); + JSONObject result = executeJdbcRequest(query); + verifySchema(result, schema("point", null, "geo_point")); + verifyDataRows( + result, + rows(Map.of("lon", 74, "lat", 40.71)), + rows(Map.of("lon", 74, "lat", 40.71)), + rows(Map.of("lon", 74, "lat", 40.71)), + rows(Map.of("lon", 74, "lat", 40.71)), + rows(Map.of("lon", 74, "lat", 40.71))); + } + + private static final double TOLERANCE = 1E-5; + + public void testReadingGeoHash() throws IOException { + String query = String.format("SELECT point FROM %s WHERE _id='6'", Index.GEOPOINTS.getName()); + JSONObject result = executeJdbcRequest(query); + verifySchema(result, schema("point", null, "geo_point")); + Pair point = getGeoValue(result); + assertEquals(40.71, point.getLeft(), TOLERANCE); + assertEquals(74, point.getRight(), TOLERANCE); + } + + private Pair getGeoValue(JSONObject result) { + JSONObject geoRaw = + (JSONObject) ((JSONArray) ((JSONArray) result.get("datarows")).get(0)).get(0); + double lat = geoRaw.getDouble("lat"); + double lon = geoRaw.getDouble("lon"); + return Pair.of(lat, lon); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/MathematicalFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/MathematicalFunctionIT.java index 60b7632ad0..b7f2ced5fb 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/MathematicalFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/MathematicalFunctionIT.java @@ -15,6 +15,7 @@ import java.io.IOException; import java.util.Locale; +import org.json.JSONArray; import org.json.JSONObject; import org.junit.jupiter.api.Test; import org.opensearch.client.Request; @@ -96,9 +97,24 @@ public void testE() throws IOException { @Test public void testExpm1() throws IOException { JSONObject result = - executeQuery("select expm1(account_number) FROM " + TEST_INDEX_BANK + " LIMIT 2"); - verifySchema(result, schema("expm1(account_number)", null, "double")); - verifyDataRows(result, rows(Math.expm1(1)), rows(Math.expm1(6))); + executeQuery( + "select account_number, expm1(account_number) FROM " + TEST_INDEX_BANK + " LIMIT 2"); + verifySchema( + result, + schema("account_number", null, "long"), + schema("expm1(account_number)", null, "double")); + JSONArray dataRows = result.getJSONArray("datarows"); + + // Extract and calculate expected values dynamically + for (int i = 0; i < dataRows.length(); i++) { + JSONArray row = dataRows.getJSONArray(i); + long accountNumber = row.getLong(0); // Extract the account_number + double actualExpm1Value = row.getDouble(1); // Extract the expm1 value + double expectedExpm1Value = Math.expm1(accountNumber); // Calculate the expected expm1 value + + assertEquals( + expectedExpm1Value, actualExpm1Value, 0.000001); // Delta for floating-point comparison + } } @Test diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/NestedIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/NestedIT.java index 96bbae94e5..18d93dbb2a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/NestedIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/NestedIT.java @@ -16,6 +16,10 @@ import static org.opensearch.sql.util.MatcherUtils.verifySchema; import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; import java.util.List; import java.util.Map; import org.json.JSONArray; @@ -423,7 +427,7 @@ public void test_nested_in_where_as_predicate_expression_with_multiple_condition + " nested(message.dayOfWeek) >= 4"; JSONObject result = executeJdbcRequest(query); assertEquals(2, result.getInt("total")); - verifyDataRows(result, rows("c", "ab", 4), rows("zz", "aa", 6)); + verifyDataRows(result, rows("c", "ab", 4), rows("zz", new JSONArray(List.of("aa", "bb")), 6)); } @Test @@ -448,14 +452,19 @@ public void nested_function_all_subfields() { schema("nested(message.author)", null, "keyword"), schema("nested(message.dayOfWeek)", null, "long"), schema("nested(message.info)", null, "keyword")); - verifyDataRows( - result, - rows("e", 1, "a"), - rows("f", 2, "b"), - rows("g", 1, "c"), - rows("h", 4, "c"), - rows("i", 5, "a"), - rows("zz", 6, "zz")); + + // Define expected rows as a list (author, dayOfWeek, info) + List> expectedList = + Arrays.asList( + Arrays.asList("e", 1, "a"), + Arrays.asList("f", 2, "b"), + Arrays.asList("g", 1, "c"), + Arrays.asList("h", 4, "c"), + Arrays.asList("i", 5, "a"), + Arrays.asList("zz", 6, "zz")); + + List> actualList = extractActualRowsBasedOnSchemaOrder(result); + sortAndAssertEquals(expectedList, actualList); } @Test @@ -470,14 +479,19 @@ public void nested_function_all_subfields_and_specified_subfield() { schema("nested(message.dayOfWeek)", null, "long"), schema("nested(message.info)", null, "keyword"), schema("nested(comment.data)", null, "keyword")); - verifyDataRows( - result, - rows("e", 1, "a", "ab"), - rows("f", 2, "b", "aa"), - rows("g", 1, "c", "aa"), - rows("h", 4, "c", "ab"), - rows("i", 5, "a", "ab"), - rows("zz", 6, "zz", new JSONArray(List.of("aa", "bb")))); + + // Convert the expected rows to a List> for comparison + List> expectedList = + Arrays.asList( + Arrays.asList("e", 1, "a", "ab"), + Arrays.asList("f", 2, "b", "aa"), + Arrays.asList("g", 1, "c", "aa"), + Arrays.asList("h", 4, "c", "ab"), + Arrays.asList("i", 5, "a", "ab"), + Arrays.asList("zz", 6, "zz", Arrays.asList("aa", "bb"))); + + List> actualList = extractActualRowsBasedOnSchemaOrder(result); + sortAndAssertEquals(expectedList, actualList); } @Test @@ -513,14 +527,19 @@ public void nested_function_all_subfields_for_two_nested_fields() { schema("nested(message.info)", null, "keyword"), schema("nested(comment.data)", null, "keyword"), schema("nested(comment.likes)", null, "long")); - verifyDataRows( - result, - rows("e", 1, "a", "ab", 3), - rows("f", 2, "b", "aa", 2), - rows("g", 1, "c", "aa", 3), - rows("h", 4, "c", "ab", 1), - rows("i", 5, "a", "ab", 1), - rows("zz", 6, "zz", new JSONArray(List.of("aa", "bb")), 10)); + + // Define expected rows + List> expectedList = + Arrays.asList( + Arrays.asList("e", 1, "a", "ab", 3), + Arrays.asList("f", 2, "b", "aa", 2), + Arrays.asList("g", 1, "c", "aa", 3), + Arrays.asList("h", 4, "c", "ab", 1), + Arrays.asList("i", 5, "a", "ab", 1), + Arrays.asList("zz", 6, "zz", Arrays.asList("aa", "bb"), 10)); + + List> actualList = extractActualRowsBasedOnSchemaOrder(result); + sortAndAssertEquals(expectedList, actualList); } @Test @@ -535,14 +554,18 @@ public void nested_function_all_subfields_and_non_nested_field() { schema("nested(message.dayOfWeek)", null, "long"), schema("nested(message.info)", null, "keyword"), schema("myNum", null, "long")); - verifyDataRows( - result, - rows("e", 1, "a", 1), - rows("f", 2, "b", 2), - rows("g", 1, "c", 3), - rows("h", 4, "c", 4), - rows("i", 5, "a", 4), - rows("zz", 6, "zz", new JSONArray(List.of(3, 4)))); + + List> expectedList = + Arrays.asList( + Arrays.asList("e", 1, "a", 1), + Arrays.asList("f", 2, "b", 2), + Arrays.asList("g", 1, "c", 3), + Arrays.asList("h", 4, "c", 4), + Arrays.asList("i", 5, "a", 4), + Arrays.asList("zz", 6, "zz", Arrays.asList(3, 4))); + + List> actualList = extractActualRowsBasedOnSchemaOrder(result); + sortAndAssertEquals(expectedList, actualList); } @Test @@ -591,4 +614,83 @@ public void nested_function_all_subfields_in_wrong_clause() { + " \"status\": 500\n" + "}")); } + + // Extract rows based on schema + private List> extractActualRowsBasedOnSchemaOrder(JSONObject result) { + JSONArray dataRows = result.getJSONArray("datarows"); + JSONArray schema = result.getJSONArray("schema"); + + Map schemaIndexMap = createSchemaIndexMap(schema); + return extractRows(dataRows, schema, schemaIndexMap); + } + + // Create a map of schema names to their indices + private Map createSchemaIndexMap(JSONArray schema) { + Map schemaIndexMap = new HashMap<>(); + for (int i = 0; i < schema.length(); i++) { + schemaIndexMap.put(schema.getJSONObject(i).getString("name"), i); + } + return schemaIndexMap; + } + + // Extract rows based on the schema order and expected order + private List> extractRows( + JSONArray dataRows, JSONArray schema, Map schemaIndexMap) { + // Define the expected order for the first three fields + List expectedOrder = + Arrays.asList( + "nested(message.author)", "nested(message.dayOfWeek)", "nested(message.info)"); + List> actualList = new ArrayList<>(); + for (int i = 0; i < dataRows.length(); i++) { + JSONArray row = dataRows.getJSONArray(i); + List extractedRow = new ArrayList<>(); + + // Extract fields in the expected order + extractExpectedFields(extractedRow, row, expectedOrder, schemaIndexMap); + + // Add remaining fields in the schema order + addRemainingFields(extractedRow, row, schema, expectedOrder); + + actualList.add(extractedRow); + } + return actualList; + } + + // Extract fields in the expected order + private void extractExpectedFields( + List extractedRow, + JSONArray row, + List expectedOrder, + Map schemaIndexMap) { + for (String fieldName : expectedOrder) { + int fieldIndex = schemaIndexMap.get(fieldName); + Object fieldValue = row.get(fieldIndex); + extractedRow.add(fieldValue); + } + } + + // Add remaining fields in the schema order, skipping those in the expected order + private void addRemainingFields( + List extractedRow, JSONArray row, JSONArray schema, List expectedOrder) { + for (int j = 0; j < schema.length(); j++) { + String fieldName = schema.getJSONObject(j).getString("name"); + if (!expectedOrder.contains(fieldName)) { + Object fieldValue = row.get(j); + // Convert JSONArrays to lists if necessary + if (fieldValue instanceof JSONArray) { + extractedRow.add(((JSONArray) fieldValue).toList()); + } else { + extractedRow.add(fieldValue); + } + } + } + } + + // Sort lists and assert equality + private void sortAndAssertEquals(List> expectedList, List> actualList) { + Comparator> comparator = Comparator.comparing(Object::toString); + expectedList.sort(comparator); + actualList.sort(comparator); + assertEquals(expectedList, actualList); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/PaginationFilterIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/PaginationFilterIT.java index 038596cf57..9a945ec86f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/PaginationFilterIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/PaginationFilterIT.java @@ -34,25 +34,30 @@ public class PaginationFilterIT extends SQLIntegTestCase { */ private static final Map STATEMENT_TO_NUM_OF_PAGES = Map.of( - "SELECT * FROM " + TestsConstants.TEST_INDEX_ACCOUNT, 1000, + "SELECT * FROM " + TestsConstants.TEST_INDEX_ACCOUNT, + 1000, "SELECT * FROM " + TestsConstants.TEST_INDEX_ACCOUNT + " WHERE match(address, 'street')", - 385, + 385, "SELECT * FROM " - + TestsConstants.TEST_INDEX_ACCOUNT - + " WHERE match(address, 'street') AND match(city, 'Ola')", - 1, + + TestsConstants.TEST_INDEX_ACCOUNT + + " WHERE match(address, 'street') AND match(city, 'Ola')", + 1, "SELECT firstname, lastname, highlight(address) FROM " - + TestsConstants.TEST_INDEX_ACCOUNT - + " WHERE match(address, 'street') AND match(state, 'OH')", - 5, + + TestsConstants.TEST_INDEX_ACCOUNT + + " WHERE match(address, 'street') AND match(state, 'OH')", + 5, "SELECT firstname, lastname, highlight('*') FROM " - + TestsConstants.TEST_INDEX_ACCOUNT - + " WHERE match(address, 'street') AND match(state, 'OH')", - 5, - "SELECT * FROM " + TestsConstants.TEST_INDEX_BEER + " WHERE true", 60, - "SELECT * FROM " + TestsConstants.TEST_INDEX_BEER + " WHERE Id=10", 1, - "SELECT * FROM " + TestsConstants.TEST_INDEX_BEER + " WHERE Id + 5=15", 1, - "SELECT * FROM " + TestsConstants.TEST_INDEX_BANK, 7); + + TestsConstants.TEST_INDEX_ACCOUNT + + " WHERE match(address, 'street') AND match(state, 'OH')", + 5, + "SELECT * FROM " + TestsConstants.TEST_INDEX_BEER + " WHERE true", + 60, + "SELECT * FROM " + TestsConstants.TEST_INDEX_BEER + " WHERE Id=10", + 1, + "SELECT * FROM " + TestsConstants.TEST_INDEX_BEER + " WHERE Id + 5=15", + 1, + "SELECT * FROM " + TestsConstants.TEST_INDEX_BANK, + 7); private final String sqlStatement; diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/PaginationIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/PaginationIT.java index 49ef7c583e..fbe1e378e2 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/PaginationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/PaginationIT.java @@ -7,6 +7,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static org.opensearch.sql.legacy.TestUtils.getResponseBody; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_CALCS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ONLINE; @@ -18,6 +19,7 @@ import org.junit.Test; import org.opensearch.client.Request; import org.opensearch.client.RequestOptions; +import org.opensearch.client.Response; import org.opensearch.client.ResponseException; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.legacy.SQLIntegTestCase; @@ -215,4 +217,52 @@ public void testQueryWithoutFrom() { assertEquals(1, response.getInt("total")); assertEquals(1, response.getJSONArray("datarows").getJSONArray(0).getInt(0)); } + + @Test + public void testAlias() throws Exception { + String indexName = Index.ONLINE.getName(); + String aliasName = "alias_ONLINE"; + String filterQuery = "{\n" + " \"term\": {\n" + " \"107\": 72 \n" + " }\n" + "}"; + + // Execute the SQL query with filter + String selectQuery = "SELECT * FROM " + TEST_INDEX_ONLINE; + JSONObject initialResponse = + new JSONObject(executeFetchQuery(selectQuery, 10, "jdbc", filterQuery)); + assertEquals(initialResponse.getInt("size"), 10); + + // Create an alias + String createAliasQuery = + String.format( + "{ \"actions\": [ { \"add\": { \"index\": \"%s\", \"alias\": \"%s\" } } ] }", + indexName, aliasName); + Request createAliasRequest = new Request("POST", "/_aliases"); + createAliasRequest.setJsonEntity(createAliasQuery); + JSONObject aliasResponse = new JSONObject(executeRequest(createAliasRequest)); + + // Assert that alias creation was acknowledged + assertTrue(aliasResponse.getBoolean("acknowledged")); + + // Query using the alias + String aliasSelectQuery = String.format("SELECT * FROM %s", aliasName); + JSONObject aliasQueryResponse = new JSONObject(executeFetchQuery(aliasSelectQuery, 4, "jdbc")); + assertEquals(4, aliasQueryResponse.getInt("size")); + + // Query using the alias with filter + JSONObject aliasFilteredResponse = + new JSONObject(executeFetchQuery(aliasSelectQuery, 4, "jdbc", filterQuery)); + assertEquals(aliasFilteredResponse.getInt("size"), 4); + } + + private String executeFetchQuery(String query, int fetchSize, String requestType, String filter) + throws IOException { + String endpoint = "/_plugins/_sql?format=" + requestType; + String requestBody = makeRequest(query, fetchSize, filter); + + Request sqlRequest = new Request("POST", endpoint); + sqlRequest.setJsonEntity(requestBody); + + Response response = client().performRequest(sqlRequest); + String responseString = getResponseBody(response, true); + return responseString; + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/PaginationWindowIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/PaginationWindowIT.java index 246cbfc4a0..4c387e2c17 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/PaginationWindowIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/PaginationWindowIT.java @@ -5,9 +5,11 @@ package org.opensearch.sql.sql; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_PHRASE; +import static org.opensearch.sql.legacy.TestsConstants.*; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import org.json.JSONObject; import org.junit.After; import org.junit.Test; @@ -18,6 +20,7 @@ public class PaginationWindowIT extends SQLIntegTestCase { @Override public void init() throws IOException { loadIndex(Index.PHRASE); + loadIndex(Index.CALCS_WITH_SHARDS); } @After @@ -92,4 +95,41 @@ public void testFetchSizeLargerThanResultWindowFails() throws IOException { () -> executeQueryTemplate("SELECT * FROM %s", TEST_INDEX_PHRASE, window + 1)); resetMaxResultWindow(TEST_INDEX_PHRASE); } + + @Test + public void testMultiShardPagesEqualsActualData() throws IOException { + // A bug made it so when pulling unordered data from an index with multiple shards, data gets + // lost if the fetchSize + // is not a multiple of the shard count. This tests that, for an index with 4 shards, pulling + // one page of 10 records + // is equivalent to pulling two pages of 5 records. + + var query = "SELECT key from " + TEST_INDEX_CALCS; + + var expectedResponse = new JSONObject(executeFetchQuery(query, 10, "jdbc")); + var expectedRows = expectedResponse.getJSONArray("datarows"); + + List expectedKeys = new ArrayList<>(); + for (int i = 0; i < expectedRows.length(); i++) { + expectedKeys.add(expectedRows.getJSONArray(i).getString(0)); + } + + var actualPage1 = new JSONObject(executeFetchQuery(query, 5, "jdbc")); + + var actualRows1 = actualPage1.getJSONArray("datarows"); + var cursor = actualPage1.getString("cursor"); + var actualPage2 = executeCursorQuery(cursor); + + var actualRows2 = actualPage2.getJSONArray("datarows"); + + List actualKeys = new ArrayList<>(); + for (int i = 0; i < actualRows1.length(); i++) { + actualKeys.add(actualRows1.getJSONArray(i).getString(0)); + } + for (int i = 0; i < actualRows2.length(); i++) { + actualKeys.add(actualRows2.getJSONArray(i).getString(0)); + } + + assertEquals(expectedKeys, actualKeys); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/QueryValidationIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/QueryValidationIT.java index e42b68631f..8e38af2a92 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/QueryValidationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/QueryValidationIT.java @@ -29,7 +29,7 @@ */ public class QueryValidationIT extends SQLIntegTestCase { - @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Rule public final ExpectedException exceptionRule = ExpectedException.none(); @Override protected void init() throws Exception { diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/RawFormatIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/RawFormatIT.java index 9d2861ce98..0f085a1cde 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/RawFormatIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/RawFormatIT.java @@ -6,10 +6,12 @@ package org.opensearch.sql.sql; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_RAW_SANITIZE; -import static org.opensearch.sql.protocol.response.format.FlatResponseFormatter.CONTENT_TYPE; +import static org.opensearch.sql.protocol.response.format.RawResponseFormatter.CONTENT_TYPE; +import static org.opensearch.sql.util.TestUtils.assertRowsEqual; import java.io.IOException; import java.util.Locale; +import java.util.Map; import org.junit.Test; import org.opensearch.client.Request; import org.opensearch.client.Response; @@ -30,7 +32,8 @@ public void rawFormatWithPipeFieldTest() { String.format( Locale.ROOT, "SELECT firstname, lastname FROM %s", TEST_INDEX_BANK_RAW_SANITIZE), "raw"); - assertEquals( + + assertRowsEqual( StringUtils.format( "firstname|lastname%n" + "+Amber JOHnny|Duke Willmington+%n" @@ -41,6 +44,25 @@ public void rawFormatWithPipeFieldTest() { result); } + @Test + public void rawFormatPrettyWithPipeFieldTest() { + String result = + executeQuery( + String.format( + Locale.ROOT, "SELECT firstname, lastname FROM %s", TEST_INDEX_BANK_RAW_SANITIZE), + "raw", + Map.of("pretty", "true")); + assertEquals( + StringUtils.format( + "firstname |lastname %n" + + "+Amber JOHnny|Duke Willmington+%n" + + "-Hattie |Bond- %n" + + "=Nanette |Bates= %n" + + "@Dale |Adams@ %n" + + "@Elinor |\"Ratliff|||\" %n"), + result); + } + @Test public void contentHeaderTest() throws IOException { String query = diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/ScoreQueryIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/ScoreQueryIT.java index 6616746d99..a1f71dcf6c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/ScoreQueryIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/ScoreQueryIT.java @@ -8,7 +8,7 @@ import static org.hamcrest.Matchers.containsString; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; -import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataAddressRows; import static org.opensearch.sql.util.MatcherUtils.verifySchema; import java.io.IOException; @@ -123,8 +123,7 @@ public void scoreQueryTest() throws IOException { TestsConstants.TEST_INDEX_ACCOUNT), "jdbc")); verifySchema(result, schema("address", null, "text"), schema("_score", null, "float")); - verifyDataRows( - result, rows("154 Douglass Street", 650.1515), rows("565 Hall Street", 3.2507575)); + verifyDataAddressRows(result, rows("154 Douglass Street"), rows("565 Hall Street")); } @Test @@ -154,7 +153,8 @@ public void scoreQueryDefaultBoostQueryTest() throws IOException { + "where score(matchQuery(address, 'Powell')) order by _score desc limit 2", TestsConstants.TEST_INDEX_ACCOUNT), "jdbc")); + verifySchema(result, schema("address", null, "text"), schema("_score", null, "float")); - verifyDataRows(result, rows("305 Powell Street", 6.501515)); + verifyDataAddressRows(result, rows("305 Powell Street")); } } diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/StandalonePaginationIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/StandalonePaginationIT.java index e884734c96..f6951f4a2c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/StandalonePaginationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/StandalonePaginationIT.java @@ -166,6 +166,8 @@ private Settings defaultSettings() { new ImmutableMap.Builder() .put(Key.QUERY_SIZE_LIMIT, 200) .put(Key.SQL_CURSOR_KEEP_ALIVE, TimeValue.timeValueMinutes(1)) + .put(Key.SQL_PAGINATION_API_SEARCH_AFTER, true) + .put(Key.FIELD_TYPE_TOLERANCE, true) .build(); @Override diff --git a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java index d444218c66..d4db502407 100644 --- a/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java @@ -144,11 +144,26 @@ public static void verifySchema(JSONObject response, Matcher... matc } } + @SafeVarargs + public static void verifySchemaInOrder(JSONObject response, Matcher... matchers) { + try { + verifyInOrder(response.getJSONArray("schema"), matchers); + } catch (Exception e) { + LOG.error(String.format("verify schema failed, response: %s", response.toString()), e); + throw e; + } + } + @SafeVarargs public static void verifyDataRows(JSONObject response, Matcher... matchers) { verify(response.getJSONArray("datarows"), matchers); } + @SafeVarargs + public static void verifyDataAddressRows(JSONObject response, Matcher... matchers) { + verifyAddressRow(response.getJSONArray("datarows"), matchers); + } + @SafeVarargs public static void verifyColumn(JSONObject response, Matcher... matchers) { verify(response.getJSONArray("schema"), matchers); @@ -173,6 +188,32 @@ public static void verify(JSONArray array, Matcher... matchers) { assertThat(objects, containsInAnyOrder(matchers)); } + // TODO: this is temporary fix for fixing serverless tests to pass as it creates multiple shards + // leading to score differences. + public static void verifyAddressRow(JSONArray array, Matcher... matchers) { + // List to store the processed elements from the JSONArray + List objects = new ArrayList<>(); + + // Iterate through each element in the JSONArray + array + .iterator() + .forEachRemaining( + o -> { + // Check if o is a JSONArray with exactly 2 elements + if (o instanceof JSONArray && ((JSONArray) o).length() == 2) { + // Check if the second element is a BigDecimal/_score value + if (((JSONArray) o).get(1) instanceof BigDecimal) { + // Remove the _score element from response data rows to skip the assertion as it + // will be different when compared against multiple shards + ((JSONArray) o).remove(1); + } + } + objects.add((T) o); + }); + assertEquals(matchers.length, objects.size()); + assertThat(objects, containsInAnyOrder(matchers)); + } + @SafeVarargs @SuppressWarnings("unchecked") public static void verifyInOrder(JSONArray array, Matcher... matchers) { diff --git a/integ-test/src/test/java/org/opensearch/sql/util/TestUtils.java b/integ-test/src/test/java/org/opensearch/sql/util/TestUtils.java index 589fb1f9ae..d8bf9153f3 100644 --- a/integ-test/src/test/java/org/opensearch/sql/util/TestUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/util/TestUtils.java @@ -6,8 +6,7 @@ package org.opensearch.sql.util; import static com.google.common.base.Strings.isNullOrEmpty; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.*; import static org.opensearch.sql.executor.pagination.PlanSerializer.CURSOR_PREFIX; import java.io.BufferedReader; @@ -17,23 +16,30 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; +import java.math.BigDecimal; +import java.math.RoundingMode; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Locale; +import java.util.Set; import java.util.stream.Collectors; +import org.json.JSONArray; import org.json.JSONObject; +import org.junit.Assert; import org.opensearch.action.bulk.BulkRequest; import org.opensearch.action.bulk.BulkResponse; import org.opensearch.action.index.IndexRequest; import org.opensearch.client.Client; import org.opensearch.client.Request; import org.opensearch.client.Response; +import org.opensearch.client.ResponseException; import org.opensearch.client.RestClient; import org.opensearch.common.xcontent.XContentType; import org.opensearch.sql.legacy.cursor.CursorType; @@ -123,10 +129,75 @@ public static Response performRequest(RestClient client, Request request) { } return response; } catch (IOException e) { + if (isRefreshPolicyError(e)) { + try { + return retryWithoutRefreshPolicy(request, client); + } catch (IOException ex) { + throw new IllegalStateException("Failed to perform request without refresh policy.", ex); + } + } throw new IllegalStateException("Failed to perform request", e); } } + /** + * Checks if the IOException is due to an unsupported refresh policy. + * + * @param e The IOException to check. + * @return true if the exception is due to a refresh policy error, false otherwise. + */ + private static boolean isRefreshPolicyError(IOException e) { + return e instanceof ResponseException + && ((ResponseException) e).getResponse().getStatusLine().getStatusCode() == 400 + && e.getMessage().contains("true refresh policy is not supported."); + } + + /** + * Attempts to perform the request without the refresh policy. + * + * @param request The original request. + * @param client client connection + * @return The response after retrying the request. + * @throws IOException If the request fails. + */ + private static Response retryWithoutRefreshPolicy(Request request, RestClient client) + throws IOException { + Request req = + new Request(request.getMethod(), request.getEndpoint().replaceAll("refresh=true", "")); + req.setEntity(request.getEntity()); + return client.performRequest(req); + } + + /** + * Compares two multiline strings representing rows of addresses to ensure they are equivalent. + * This method checks if the entire content of the expected and actual strings are the same. If + * they differ, it breaks down the strings into lines and performs a step-by-step comparison: + * + * @param expected The expected string representing rows of data. + * @param actual The actual string to compare against the expected. + */ + public static void assertRowsEqual(String expected, String actual) { + if (expected.equals(actual)) { + return; + } + + List expectedLines = List.of(expected.split("\n")); + List actualLines = List.of(actual.split("\n")); + + if (expectedLines.size() != actualLines.size()) { + Assert.fail("Line count is different. expected=" + expected + ", actual=" + actual); + } + + if (!expectedLines.get(0).equals(actualLines.get(0))) { + Assert.fail("Header is different. expected=" + expected + ", actual=" + actual); + } + + Set expectedItems = new HashSet<>(expectedLines.subList(1, expectedLines.size())); + Set actualItems = new HashSet<>(actualLines.subList(1, actualLines.size())); + + assertEquals(expectedItems, actualItems); + } + public static String getAccountIndexMapping() { return "{ \"mappings\": {" + " \"properties\": {\n" @@ -772,6 +843,29 @@ public static String getResponseBody(Response response, boolean retainNewLines) return sb.toString(); } + // TODO: this is temporary fix for fixing serverless tests to pass with 2 digit precision value + public static JSONArray roundOfResponse(JSONArray array) { + JSONArray responseJSON = new JSONArray(); + array + .iterator() + .forEachRemaining( + o -> { + JSONArray jsonArray = new JSONArray(); + ((JSONArray) o) + .iterator() + .forEachRemaining( + i -> { + if (i instanceof BigDecimal) { + jsonArray.put(((BigDecimal) i).setScale(2, RoundingMode.HALF_UP)); + } else { + jsonArray.put(i); + } + }); + responseJSON.put(jsonArray); + }); + return responseJSON; + } + public static String fileToString( final String filePathFromProjectRoot, final boolean removeNewLines) throws IOException { diff --git a/integ-test/src/test/resources/correctness/bugfixes/3121.txt b/integ-test/src/test/resources/correctness/bugfixes/3121.txt new file mode 100644 index 0000000000..f60f724897 --- /dev/null +++ b/integ-test/src/test/resources/correctness/bugfixes/3121.txt @@ -0,0 +1 @@ +SELECT Origin, Dest FROM (SELECT * FROM opensearch_dashboards_sample_data_flights WHERE AvgTicketPrice > 100 GROUP BY Origin, Dest, AvgTicketPrice) AS flights WHERE AvgTicketPrice < 1000 ORDER BY AvgTicketPrice LIMIT 30 diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_fillnull_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_fillnull_push.json new file mode 100644 index 0000000000..7e5e1c1c20 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_fillnull_push.json @@ -0,0 +1,28 @@ +{ + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[age, balance]" + }, + "children": [ + { + "name": "EvalOperator", + "description": { + "expressions": { + "balance": "ifnull(balance, -1)", + "age": "ifnull(age, -1)" + } + }, + "children": [ + { + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true, searchDone=false, pitId=null, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + }, + "children": [] + } + ] + } + ] + } +} diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_agg_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_agg_push.json index 568b397f07..8035822357 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_agg_push.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_agg_push.json @@ -8,7 +8,7 @@ { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName\u003dopensearch-sql_test_index_account, sourceBuilder\u003d{\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, searchDone\u003dfalse)" + "request": "OpenSearchQueryRequest(indexName\u003dopensearch-sql_test_index_account, sourceBuilder\u003d{\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, needClean\u003dtrue, searchDone\u003dfalse, pitId\u003dnull, cursorKeepAlive\u003dnull, searchAfter\u003dnull, searchResponse\u003dnull)" }, "children": [] } diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push.json index 0e7087aa1f..3e92a17b97 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_filter_push.json @@ -8,7 +8,7 @@ { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName\u003dopensearch-sql_test_index_account, sourceBuilder\u003d{\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"bool\":{\"filter\":[{\"range\":{\"balance\":{\"from\":10000,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},{\"range\":{\"age\":{\"from\":null,\"to\":40,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, searchDone\u003dfalse)" + "request": "OpenSearchQueryRequest(indexName\u003dopensearch-sql_test_index_account, sourceBuilder\u003d{\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"bool\":{\"filter\":[{\"range\":{\"balance\":{\"from\":10000,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},{\"range\":{\"age\":{\"from\":null,\"to\":40,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, needClean\u003dtrue, searchDone\u003dfalse, pitId\u003dnull, cursorKeepAlive\u003dnull, searchAfter\u003dnull, searchResponse\u003dnull)" }, "children": [] } diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_push.json new file mode 100644 index 0000000000..0a0b58f17d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_limit_push.json @@ -0,0 +1,27 @@ +{ + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[ageMinus]" + }, + "children": [ + { + "name": "EvalOperator", + "description": { + "expressions": { + "ageMinus": "-(age, 30)" + } + }, + "children": [ + { + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\"}, needClean\u003dtrue, searchDone\u003dfalse, pitId\u003dnull, cursorKeepAlive\u003dnull, searchAfter\u003dnull, searchResponse\u003dnull)" + }, + "children": [] + } + ] + } + ] + } +} diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_output.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_output.json index 8d45714283..bd7310810e 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_output.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_output.json @@ -31,7 +31,7 @@ { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName\u003dopensearch-sql_test_index_account, sourceBuilder\u003d{\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, searchDone\u003dfalse)" + "request": "OpenSearchQueryRequest(indexName\u003dopensearch-sql_test_index_account, sourceBuilder\u003d{\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, needClean\u003dtrue, searchDone\u003dfalse, pitId\u003dnull, cursorKeepAlive\u003dnull, searchAfter\u003dnull, searchResponse\u003dnull)" }, "children": [] } diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_push.json index af2a57e536..e2630e24f9 100644 --- a/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_push.json +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_sort_push.json @@ -8,7 +8,7 @@ { "name": "OpenSearchIndexScan", "description": { - "request": "OpenSearchQueryRequest(indexName\u003dopensearch-sql_test_index_account, sourceBuilder\u003d{\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, searchDone\u003dfalse)" + "request": "OpenSearchQueryRequest(indexName\u003dopensearch-sql_test_index_account, sourceBuilder\u003d{\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, needClean\u003dtrue, searchDone\u003dfalse, pitId\u003dnull, cursorKeepAlive\u003dnull, searchAfter\u003dnull, searchResponse\u003dnull)" }, "children": [] } diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_trendline_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_trendline_push.json new file mode 100644 index 0000000000..754535dc32 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_trendline_push.json @@ -0,0 +1,32 @@ +{ + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[ageTrend]" + }, + "children": [ + { + "name": "TrendlineOperator", + "description": { + "computations": [ + { + "computationType" : "sma", + "numberOfDataPoints" : "2", + "dataField" : "age", + "alias" : "ageTrend" + } + ] + }, + "children": [ + { + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\"}, needClean\u003dtrue, searchDone\u003dfalse, pitId\u003dnull, cursorKeepAlive\u003dnull, searchAfter\u003dnull, searchResponse\u003dnull)" + }, + "children": [] + } + ] + } + ] + } +} diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_trendline_sort_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_trendline_sort_push.json new file mode 100644 index 0000000000..6629434108 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_trendline_sort_push.json @@ -0,0 +1,32 @@ +{ + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[ageTrend]" + }, + "children": [ + { + "name": "TrendlineOperator", + "description": { + "computations": [ + { + "computationType" : "sma", + "numberOfDataPoints" : "2", + "dataField" : "age", + "alias" : "ageTrend" + } + ] + }, + "children": [ + { + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName\u003dopensearch-sql_test_index_account, sourceBuilder\u003d{\"from\":0,\"size\":5,\"timeout\":\"1m\",\"sort\":[{\"age\":{\"order\":\"asc\",\"missing\":\"_first\"}}]}, needClean\u003dtrue, searchDone\u003dfalse, pitId\u003dnull, cursorKeepAlive\u003dnull, searchAfter\u003dnull, searchResponse\u003dnull)" + }, + "children": [] + } + ] + } + ] + } +} diff --git a/integ-test/src/test/resources/geopoints.json b/integ-test/src/test/resources/geopoints.json new file mode 100644 index 0000000000..95900fe811 --- /dev/null +++ b/integ-test/src/test/resources/geopoints.json @@ -0,0 +1,12 @@ +{"index": {"_id": "1"}} +{"point": {"lat": 40.71, "lon": 74.00}} +{"index": {"_id": "2"}} +{"point": "40.71,74.00"} +{"index": {"_id": "3"}} +{"point": [74.00, 40.71]} +{"index": {"_id": "4"}} +{"point": "POINT (74.00 40.71)"} +{"index": {"_id": "5"}} +{"point": {"type": "Point", "coordinates": [74.00, 40.71]}} +{"index": {"_id": "6"}} +{"point": "txhxegj0uyp3"} diff --git a/integ-test/src/test/resources/indexDefinitions/calcs_with_shards_index_mappings.json b/integ-test/src/test/resources/indexDefinitions/calcs_with_shards_index_mappings.json new file mode 100644 index 0000000000..560e1d55e6 --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/calcs_with_shards_index_mappings.json @@ -0,0 +1,99 @@ +{ + "mappings" : { + "properties" : { + "key" : { + "type" : "keyword" + }, + "num0" : { + "type" : "double" + }, + "num1" : { + "type" : "double" + }, + "num2" : { + "type" : "double" + }, + "num3" : { + "type" : "double" + }, + "num4" : { + "type" : "double" + }, + "str0" : { + "type" : "keyword" + }, + "str1" : { + "type" : "keyword" + }, + "str2" : { + "type" : "keyword" + }, + "str3" : { + "type" : "keyword" + }, + "int0" : { + "type" : "integer" + }, + "int1" : { + "type" : "integer" + }, + "int2" : { + "type" : "integer" + }, + "int3" : { + "type" : "integer" + }, + "bool0" : { + "type" : "boolean" + }, + "bool1" : { + "type" : "boolean" + }, + "bool2" : { + "type" : "boolean" + }, + "bool3" : { + "type" : "boolean" + }, + "date0" : { + "type" : "date", + "format": "year_month_day" + }, + "date1" : { + "type" : "date", + "format": "year_month_day" + }, + "date2" : { + "type" : "date", + "format": "year_month_day" + }, + "date3" : { + "type" : "date", + "format": "year_month_day" + }, + "time0" : { + "type" : "date", + "format": "date_time_no_millis" + }, + "time1" : { + "type" : "date", + "format": "hour_minute_second" + }, + "datetime0" : { + "type" : "date", + "format": "date_time_no_millis" + }, + "datetime1" : { + "type" : "date" + }, + "zzz" : { + "type" : "keyword" + } + } + }, + "settings": { + "index": { + "number_of_shards": 4 + } + } +} diff --git a/integ-test/src/test/resources/indexDefinitions/geopoint_index_mapping.json b/integ-test/src/test/resources/indexDefinitions/geopoint_index_mapping.json new file mode 100644 index 0000000000..61340530d8 --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/geopoint_index_mapping.json @@ -0,0 +1,9 @@ +{ + "mappings": { + "properties": { + "point": { + "type": "geo_point" + } + } + } +} diff --git a/integ-test/src/test/resources/weblogs.json b/integ-test/src/test/resources/weblogs.json index 4228e9c4d2..27d39b83be 100644 --- a/integ-test/src/test/resources/weblogs.json +++ b/integ-test/src/test/resources/weblogs.json @@ -1,6 +1,12 @@ {"index":{}} -{"host": "199.72.81.55", "method": "GET", "url": "/history/apollo/", "response": "200", "bytes": "6245"} +{"host": "::1", "method": "GET", "url": "/history/apollo/", "response": "200", "bytes": "6245"} {"index":{}} -{"host": "199.120.110.21", "method": "GET", "url": "/shuttle/missions/sts-73/mission-sts-73.html", "response": "200", "bytes": "4085"} +{"host": "0.0.0.2", "method": "GET", "url": "/shuttle/missions/sts-73/mission-sts-73.html", "response": "200", "bytes": "4085"} {"index":{}} -{"host": "205.212.115.106", "method": "GET", "url": "/shuttle/countdown/countdown.html", "response": "200", "bytes": "3985"} +{"host": "::3", "method": "GET", "url": "/shuttle/countdown/countdown.html", "response": "200", "bytes": "3985"} +{"index":{}} +{"host": "::FFFF:1.2.3.4", "method": "GET", "url": "/history/voyager1/", "response": "200", "bytes": "1234"} +{"index":{}} +{"host": "1.2.3.5", "method": "GET", "url": "/history/voyager2/", "response": "200", "bytes": "4321"} +{"index":{}} +{"host": "::FFFF:1234", "method": "GET", "url": "/history/artemis/", "response": "200", "bytes": "9876"} diff --git a/legacy/build.gradle b/legacy/build.gradle index 0467db183d..e3ddf27066 100644 --- a/legacy/build.gradle +++ b/legacy/build.gradle @@ -107,7 +107,7 @@ dependencies { because 'https://www.whitesourcesoftware.com/vulnerability-database/WS-2019-0379' } } - implementation group: 'com.google.guava', name: 'guava', version: '32.0.1-jre' + implementation group: 'com.google.guava', name: 'guava', version: "${guava_version}" implementation group: 'org.json', name: 'json', version:'20231013' implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0' implementation group: 'org.apache.commons', name: 'commons-text', version: '1.10.0' diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/antlr/semantic/scope/SymbolTable.java b/legacy/src/main/java/org/opensearch/sql/legacy/antlr/semantic/scope/SymbolTable.java index ee9f4545a6..0f65ee1b99 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/antlr/semantic/scope/SymbolTable.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/antlr/semantic/scope/SymbolTable.java @@ -20,7 +20,7 @@ public class SymbolTable { /** Two-dimension hash table to manage symbols with type in different namespace */ - private Map> tableByNamespace = + private final Map> tableByNamespace = new EnumMap<>(Namespace.class); /** diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/antlr/semantic/types/function/AggregateFunction.java b/legacy/src/main/java/org/opensearch/sql/legacy/antlr/semantic/types/function/AggregateFunction.java index 9cebf3dda6..5ede1aa1a5 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/antlr/semantic/types/function/AggregateFunction.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/antlr/semantic/types/function/AggregateFunction.java @@ -24,7 +24,7 @@ public enum AggregateFunction implements TypeExpression { AVG(func(T(NUMBER)).to(DOUBLE)), SUM(func(T(NUMBER)).to(T)); - private TypeExpressionSpec[] specifications; + private final TypeExpressionSpec[] specifications; AggregateFunction(TypeExpressionSpec... specifications) { this.specifications = specifications; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/cursor/DefaultCursor.java b/legacy/src/main/java/org/opensearch/sql/legacy/cursor/DefaultCursor.java index c5be0066fc..2b0de9022c 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/cursor/DefaultCursor.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/cursor/DefaultCursor.java @@ -5,8 +5,19 @@ package org.opensearch.sql.legacy.cursor; +import static org.opensearch.core.xcontent.DeprecationHandler.IGNORE_DEPRECATIONS; +import static org.opensearch.sql.common.setting.Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Strings; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.security.AccessController; +import java.security.PrivilegedAction; import java.util.Base64; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -18,6 +29,16 @@ import lombok.Setter; import org.json.JSONArray; import org.json.JSONObject; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.search.SearchModule; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.sql.legacy.esdomain.LocalClusterState; import org.opensearch.sql.legacy.executor.format.Schema; /** @@ -40,6 +61,10 @@ public class DefaultCursor implements Cursor { private static final String SCROLL_ID = "s"; private static final String SCHEMA_COLUMNS = "c"; private static final String FIELD_ALIAS_MAP = "a"; + private static final String PIT_ID = "p"; + private static final String SEARCH_REQUEST = "r"; + private static final String SORT_FIELDS = "h"; + private static final ObjectMapper objectMapper = new ObjectMapper(); /** * To get mappings for index to check if type is date needed for @@ -70,11 +95,28 @@ public class DefaultCursor implements Cursor { /** To get next batch of result */ private String scrollId; + /** To get Point In Time */ + private String pitId; + + /** To get next batch of result with search after api */ + private SearchSourceBuilder searchSourceBuilder; + + /** To get last sort values * */ + private Object[] sortFields; + /** To reduce the number of rows left by fetchSize */ @NonNull private Integer fetchSize; private Integer limit; + /** + * {@link NamedXContentRegistry} from {@link SearchModule} used for construct {@link QueryBuilder} + * from DSL query string. + */ + private static final NamedXContentRegistry xContentRegistry = + new NamedXContentRegistry( + new SearchModule(Settings.EMPTY, Collections.emptyList()).getNamedXContents()); + @Override public CursorType getType() { return type; @@ -82,19 +124,56 @@ public CursorType getType() { @Override public String generateCursorId() { - if (rowsLeft <= 0 || Strings.isNullOrEmpty(scrollId)) { + if (rowsLeft <= 0 || isCursorIdNullOrEmpty()) { return null; } JSONObject json = new JSONObject(); json.put(FETCH_SIZE, fetchSize); json.put(ROWS_LEFT, rowsLeft); json.put(INDEX_PATTERN, indexPattern); - json.put(SCROLL_ID, scrollId); json.put(SCHEMA_COLUMNS, getSchemaAsJson()); json.put(FIELD_ALIAS_MAP, fieldAliasMap); + if (LocalClusterState.state().getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER)) { + json.put(PIT_ID, pitId); + String sortFieldValue = + AccessController.doPrivileged( + (PrivilegedAction) + () -> { + try { + return objectMapper.writeValueAsString(sortFields); + } catch (JsonProcessingException e) { + throw new RuntimeException( + "Failed to parse sort fields from JSON string.", e); + } + }); + json.put(SORT_FIELDS, sortFieldValue); + setSearchRequestString(json, searchSourceBuilder); + } else { + json.put(SCROLL_ID, scrollId); + } return String.format("%s:%s", type.getId(), encodeCursor(json)); } + private void setSearchRequestString(JSONObject cursorJson, SearchSourceBuilder sourceBuilder) { + try { + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + XContentBuilder builder = XContentFactory.jsonBuilder(outputStream); + sourceBuilder.toXContent(builder, null); + builder.close(); + + String searchRequestBase64 = Base64.getEncoder().encodeToString(outputStream.toByteArray()); + cursorJson.put("searchSourceBuilder", searchRequestBase64); + } catch (IOException ex) { + throw new RuntimeException("Failed to set search request string on cursor json.", ex); + } + } + + private boolean isCursorIdNullOrEmpty() { + return LocalClusterState.state().getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER) + ? Strings.isNullOrEmpty(pitId) + : Strings.isNullOrEmpty(scrollId); + } + public static DefaultCursor from(String cursorId) { /** * It is assumed that cursorId here is the second part of the original cursor passed by the @@ -105,13 +184,50 @@ public static DefaultCursor from(String cursorId) { cursor.setFetchSize(json.getInt(FETCH_SIZE)); cursor.setRowsLeft(json.getLong(ROWS_LEFT)); cursor.setIndexPattern(json.getString(INDEX_PATTERN)); - cursor.setScrollId(json.getString(SCROLL_ID)); + if (LocalClusterState.state().getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER)) { + populateCursorForPit(json, cursor); + } else { + cursor.setScrollId(json.getString(SCROLL_ID)); + } cursor.setColumns(getColumnsFromSchema(json.getJSONArray(SCHEMA_COLUMNS))); cursor.setFieldAliasMap(fieldAliasMap(json.getJSONObject(FIELD_ALIAS_MAP))); return cursor; } + private static void populateCursorForPit(JSONObject json, DefaultCursor cursor) { + cursor.setPitId(json.getString(PIT_ID)); + + cursor.setSortFields(getSortFieldsFromJson(json)); + + // Retrieve and set the SearchSourceBuilder from the JSON field + String searchSourceBuilderBase64 = json.getString("searchSourceBuilder"); + byte[] bytes = Base64.getDecoder().decode(searchSourceBuilderBase64); + ByteArrayInputStream streamInput = new ByteArrayInputStream(bytes); + try { + XContentParser parser = + XContentType.JSON + .xContent() + .createParser(xContentRegistry, IGNORE_DEPRECATIONS, streamInput); + SearchSourceBuilder sourceBuilder = SearchSourceBuilder.fromXContent(parser); + cursor.setSearchSourceBuilder(sourceBuilder); + } catch (IOException ex) { + throw new RuntimeException("Failed to get searchSourceBuilder from cursor Id", ex); + } + } + + private static Object[] getSortFieldsFromJson(JSONObject json) { + return AccessController.doPrivileged( + (PrivilegedAction) + () -> { + try { + return objectMapper.readValue(json.getString(SORT_FIELDS), Object[].class); + } catch (JsonProcessingException e) { + throw new RuntimeException("Failed to parse sort fields from JSON string.", e); + } + }); + } + private JSONArray getSchemaAsJson() { JSONArray schemaJson = new JSONArray(); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/domain/Condition.java b/legacy/src/main/java/org/opensearch/sql/legacy/domain/Condition.java index f86635910a..2c86ff00ad 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/domain/Condition.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/domain/Condition.java @@ -52,15 +52,15 @@ public enum OPERATOR { NTERM, NREGEXP; - public static Map methodNameToOpear; + public static final Map methodNameToOpear; - public static Map operStringToOpear; + public static final Map operStringToOpear; - public static Map simpleOperStringToOpear; + public static final Map simpleOperStringToOpear; - private static BiMap negatives; + private static final BiMap negatives; - private static BiMap simpleReverses; + private static final BiMap simpleReverses; static { methodNameToOpear = new HashMap<>(); @@ -161,7 +161,7 @@ public Boolean isSimpleOperator() { private String name; - private SQLExpr nameExpr; + private final SQLExpr nameExpr; private Object value; @@ -173,7 +173,7 @@ public SQLExpr getValueExpr() { return valueExpr; } - private SQLExpr valueExpr; + private final SQLExpr valueExpr; private OPERATOR OPERATOR; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/domain/Having.java b/legacy/src/main/java/org/opensearch/sql/legacy/domain/Having.java index a53fb0c275..b5a66d5eb5 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/domain/Having.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/domain/Having.java @@ -44,7 +44,7 @@ public class Having { /** Conditions parsed out of HAVING clause */ private final List conditions; - private HavingParser havingParser; + private final HavingParser havingParser; public List getHavingFields() { return havingParser.getHavingFields(); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/domain/IndexStatement.java b/legacy/src/main/java/org/opensearch/sql/legacy/domain/IndexStatement.java index 2a5be5728c..b2efe99327 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/domain/IndexStatement.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/domain/IndexStatement.java @@ -8,8 +8,8 @@ /** Class used to differentiate SHOW and DESCRIBE statements */ public class IndexStatement implements QueryStatement { - private StatementType statementType; - private String query; + private final StatementType statementType; + private final String query; private String indexPattern; private String columnPattern; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/domain/JoinSelect.java b/legacy/src/main/java/org/opensearch/sql/legacy/domain/JoinSelect.java index 211b33c68a..a7d18d7a41 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/domain/JoinSelect.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/domain/JoinSelect.java @@ -12,8 +12,8 @@ /** Created by Eliran on 20/8/2015. */ public class JoinSelect extends Query { - private TableOnJoinSelect firstTable; - private TableOnJoinSelect secondTable; + private final TableOnJoinSelect firstTable; + private final TableOnJoinSelect secondTable; private Where connectedWhere; private List hints; private List connectedConditions; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/domain/KVValue.java b/legacy/src/main/java/org/opensearch/sql/legacy/domain/KVValue.java index d864cbac12..9488952c2b 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/domain/KVValue.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/domain/KVValue.java @@ -7,7 +7,7 @@ public class KVValue implements Cloneable { public String key; - public Object value; + public final Object value; public KVValue(Object value) { this.value = value; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/domain/Order.java b/legacy/src/main/java/org/opensearch/sql/legacy/domain/Order.java index f593d6c428..57280b7df2 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/domain/Order.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/domain/Order.java @@ -12,7 +12,7 @@ public class Order { private String nestedPath; private String name; private String type; - private Field sortField; + private final Field sortField; public boolean isScript() { return sortField != null && sortField.isScriptField(); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/domain/Paramer.java b/legacy/src/main/java/org/opensearch/sql/legacy/domain/Paramer.java index 38ca556199..e3b6f710b6 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/domain/Paramer.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/domain/Paramer.java @@ -30,7 +30,7 @@ public class Paramer { public String value; public Integer slop; - public Map fieldsBoosts = new HashMap<>(); + public final Map fieldsBoosts = new HashMap<>(); public String type; public Float tieBreaker; public Operator operator; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/domain/Query.java b/legacy/src/main/java/org/opensearch/sql/legacy/domain/Query.java index 6f891e7fc5..0fe302cd22 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/domain/Query.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/domain/Query.java @@ -12,7 +12,7 @@ public abstract class Query implements QueryStatement { private Where where = null; - private List from = new ArrayList<>(); + private final List from = new ArrayList<>(); public Where getWhere() { return this.where; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/domain/Select.java b/legacy/src/main/java/org/opensearch/sql/legacy/domain/Select.java index 2faa8cc6e5..1de86f43dc 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/domain/Select.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/domain/Select.java @@ -36,11 +36,11 @@ public class Select extends Query { "PERCENTILES", "SCRIPTED_METRIC"); - private List hints = new ArrayList<>(); - private List fields = new ArrayList<>(); - private List> groupBys = new ArrayList<>(); + private final List hints = new ArrayList<>(); + private final List fields = new ArrayList<>(); + private final List> groupBys = new ArrayList<>(); private Having having; - private List orderBys = new ArrayList<>(); + private final List orderBys = new ArrayList<>(); private int offset; private Integer rowCount; private boolean containsSubQueries; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/domain/Where.java b/legacy/src/main/java/org/opensearch/sql/legacy/domain/Where.java index d6f767203b..342d9e5521 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/domain/Where.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/domain/Where.java @@ -22,7 +22,7 @@ public static Where newInstance() { return new Where(CONN.AND); } - private LinkedList wheres = new LinkedList<>(); + private final LinkedList wheres = new LinkedList<>(); protected CONN conn; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/domain/bucketpath/BucketPath.java b/legacy/src/main/java/org/opensearch/sql/legacy/domain/bucketpath/BucketPath.java index 635d0062a5..e2f6c6a7e0 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/domain/bucketpath/BucketPath.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/domain/bucketpath/BucketPath.java @@ -15,7 +15,7 @@ *

https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline.html#buckets-path-syntax */ public class BucketPath { - private Deque pathStack = new ArrayDeque<>(); + private final Deque pathStack = new ArrayDeque<>(); public BucketPath add(Path path) { if (pathStack.isEmpty()) { diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/domain/hints/Hint.java b/legacy/src/main/java/org/opensearch/sql/legacy/domain/hints/Hint.java index b83c63aae1..7a6c445b83 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/domain/hints/Hint.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/domain/hints/Hint.java @@ -7,8 +7,8 @@ /** Created by Eliran on 5/9/2015. */ public class Hint { - private HintType type; - private Object[] params; + private final HintType type; + private final Object[] params; public Hint(HintType type, Object[] params) { this.type = type; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/ElasticHitsExecutor.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/ElasticHitsExecutor.java index 62a6d63ef7..052cdb7b36 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/ElasticHitsExecutor.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/ElasticHitsExecutor.java @@ -5,13 +5,99 @@ package org.opensearch.sql.legacy.executor; +import static org.opensearch.search.sort.FieldSortBuilder.DOC_FIELD_NAME; +import static org.opensearch.search.sort.SortOrder.ASC; +import static org.opensearch.sql.common.setting.Settings.Key.SQL_CURSOR_KEEP_ALIVE; +import static org.opensearch.sql.common.setting.Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER; +import static org.opensearch.sql.opensearch.storage.OpenSearchIndex.METADATA_FIELD_ID; + import java.io.IOException; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.action.search.SearchRequestBuilder; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.Client; +import org.opensearch.common.unit.TimeValue; import org.opensearch.search.SearchHits; +import org.opensearch.search.builder.PointInTimeBuilder; +import org.opensearch.search.sort.SortOrder; +import org.opensearch.sql.legacy.domain.Select; +import org.opensearch.sql.legacy.esdomain.LocalClusterState; import org.opensearch.sql.legacy.exception.SqlParseException; +import org.opensearch.sql.legacy.pit.PointInTimeHandler; + +/** Executor for search requests with pagination. */ +public abstract class ElasticHitsExecutor { + protected static final Logger LOG = LogManager.getLogger(); + protected PointInTimeHandler pit; + protected Client client; + + /** + * Executes search request + * + * @throws IOException If an input or output exception occurred + * @throws SqlParseException If parsing exception occurred + */ + protected abstract void run() throws IOException, SqlParseException; + + /** + * Get search hits after execution + * + * @return Search hits + */ + protected abstract SearchHits getHits(); + + /** + * Get response for search request with pit/scroll + * + * @param request search request + * @param select sql select + * @param size fetch size + * @param previousResponse response for previous request + * @param pit point in time + * @return search response for subsequent request + */ + public SearchResponse getResponseWithHits( + SearchRequestBuilder request, + Select select, + int size, + SearchResponse previousResponse, + PointInTimeHandler pit) { + // Set Size + request.setSize(size); + SearchResponse responseWithHits; -/** Created by Eliran on 21/8/2016. */ -public interface ElasticHitsExecutor { - void run() throws IOException, SqlParseException; + if (LocalClusterState.state().getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER)) { + // Set sort field for search_after + boolean ordered = select.isOrderdSelect(); + if (!ordered) { + request.addSort(DOC_FIELD_NAME, ASC); + request.addSort(METADATA_FIELD_ID, SortOrder.ASC); + } + // Set PIT + request.setPointInTime(new PointInTimeBuilder(pit.getPitId())); + // from and size is alternate method to paginate result. + // If select has from clause, search after is not required. + if (previousResponse != null && select.getFrom().isEmpty()) { + request.searchAfter(previousResponse.getHits().getSortFields()); + } + responseWithHits = request.get(); + } else { + // Set scroll + TimeValue keepAlive = LocalClusterState.state().getSettingValue(SQL_CURSOR_KEEP_ALIVE); + if (previousResponse != null) { + responseWithHits = + client + .prepareSearchScroll(previousResponse.getScrollId()) + .setScroll(keepAlive) + .execute() + .actionGet(); + } else { + request.setScroll(keepAlive); + responseWithHits = request.get(); + } + } - SearchHits getHits(); + return responseWithHits; + } } diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/GetIndexRequestRestListener.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/GetIndexRequestRestListener.java index 58808ee8f3..fbe1cca6f3 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/GetIndexRequestRestListener.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/GetIndexRequestRestListener.java @@ -24,7 +24,7 @@ /** Created by Eliran on 6/10/2015. */ public class GetIndexRequestRestListener extends RestBuilderListener { - private GetIndexRequest getIndexRequest; + private final GetIndexRequest getIndexRequest; public GetIndexRequestRestListener(RestChannel channel, GetIndexRequest getIndexRequest) { super(channel); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/cursor/CursorCloseExecutor.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/cursor/CursorCloseExecutor.java index 7282eaed4c..b94ef51b7d 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/cursor/CursorCloseExecutor.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/cursor/CursorCloseExecutor.java @@ -6,6 +6,7 @@ package org.opensearch.sql.legacy.executor.cursor; import static org.opensearch.core.rest.RestStatus.OK; +import static org.opensearch.sql.common.setting.Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER; import java.util.Map; import org.apache.logging.log4j.LogManager; @@ -18,8 +19,11 @@ import org.opensearch.rest.RestChannel; import org.opensearch.sql.legacy.cursor.CursorType; import org.opensearch.sql.legacy.cursor.DefaultCursor; +import org.opensearch.sql.legacy.esdomain.LocalClusterState; import org.opensearch.sql.legacy.metrics.MetricName; import org.opensearch.sql.legacy.metrics.Metrics; +import org.opensearch.sql.legacy.pit.PointInTimeHandler; +import org.opensearch.sql.legacy.pit.PointInTimeHandlerImpl; import org.opensearch.sql.legacy.rewriter.matchtoterm.VerificationException; public class CursorCloseExecutor implements CursorRestExecutor { @@ -29,7 +33,7 @@ public class CursorCloseExecutor implements CursorRestExecutor { private static final String SUCCEEDED_TRUE = "{\"succeeded\":true}"; private static final String SUCCEEDED_FALSE = "{\"succeeded\":false}"; - private String cursorId; + private final String cursorId; public CursorCloseExecutor(String cursorId) { this.cursorId = cursorId; @@ -79,14 +83,26 @@ public String execute(Client client, Map params) throws Exceptio } private String handleDefaultCursorCloseRequest(Client client, DefaultCursor cursor) { - String scrollId = cursor.getScrollId(); - ClearScrollResponse clearScrollResponse = - client.prepareClearScroll().addScrollId(scrollId).get(); - if (clearScrollResponse.isSucceeded()) { - return SUCCEEDED_TRUE; + if (LocalClusterState.state().getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER)) { + String pitId = cursor.getPitId(); + PointInTimeHandler pit = new PointInTimeHandlerImpl(client, pitId); + try { + pit.delete(); + return SUCCEEDED_TRUE; + } catch (RuntimeException e) { + Metrics.getInstance().getNumericalMetric(MetricName.FAILED_REQ_COUNT_SYS).increment(); + return SUCCEEDED_FALSE; + } } else { - Metrics.getInstance().getNumericalMetric(MetricName.FAILED_REQ_COUNT_SYS).increment(); - return SUCCEEDED_FALSE; + String scrollId = cursor.getScrollId(); + ClearScrollResponse clearScrollResponse = + client.prepareClearScroll().addScrollId(scrollId).get(); + if (clearScrollResponse.isSucceeded()) { + return SUCCEEDED_TRUE; + } else { + Metrics.getInstance().getNumericalMetric(MetricName.FAILED_REQ_COUNT_SYS).increment(); + return SUCCEEDED_FALSE; + } } } } diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/cursor/CursorResultExecutor.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/cursor/CursorResultExecutor.java index 66c69f3430..14ca3aea62 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/cursor/CursorResultExecutor.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/cursor/CursorResultExecutor.java @@ -6,6 +6,8 @@ package org.opensearch.sql.legacy.executor.cursor; import static org.opensearch.core.rest.RestStatus.OK; +import static org.opensearch.sql.common.setting.Settings.Key.SQL_CURSOR_KEEP_ALIVE; +import static org.opensearch.sql.common.setting.Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER; import java.util.Arrays; import java.util.Map; @@ -14,14 +16,17 @@ import org.json.JSONException; import org.opensearch.OpenSearchException; import org.opensearch.action.search.ClearScrollResponse; +import org.opensearch.action.search.SearchRequest; import org.opensearch.action.search.SearchResponse; import org.opensearch.client.Client; import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.rest.RestStatus; import org.opensearch.rest.BytesRestResponse; import org.opensearch.rest.RestChannel; import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; -import org.opensearch.sql.common.setting.Settings; +import org.opensearch.search.builder.PointInTimeBuilder; +import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.sql.legacy.cursor.CursorType; import org.opensearch.sql.legacy.cursor.DefaultCursor; import org.opensearch.sql.legacy.esdomain.LocalClusterState; @@ -29,12 +34,15 @@ import org.opensearch.sql.legacy.executor.format.Protocol; import org.opensearch.sql.legacy.metrics.MetricName; import org.opensearch.sql.legacy.metrics.Metrics; +import org.opensearch.sql.legacy.pit.PointInTimeHandler; +import org.opensearch.sql.legacy.pit.PointInTimeHandlerImpl; import org.opensearch.sql.legacy.rewriter.matchtoterm.VerificationException; +import org.opensearch.sql.opensearch.response.error.ErrorMessageFactory; public class CursorResultExecutor implements CursorRestExecutor { - private String cursorId; - private Format format; + private final String cursorId; + private final Format format; private static final Logger LOG = LogManager.getLogger(CursorResultExecutor.class); @@ -52,7 +60,15 @@ public void execute(Client client, Map params, RestChannel chann } catch (IllegalArgumentException | JSONException e) { Metrics.getInstance().getNumericalMetric(MetricName.FAILED_REQ_COUNT_CUS).increment(); LOG.error("Error parsing the cursor", e); - channel.sendResponse(new BytesRestResponse(channel, e)); + channel.sendResponse( + new BytesRestResponse( + RestStatus.BAD_REQUEST, + "application/json; charset=UTF-8", + ErrorMessageFactory.createErrorMessage( + new IllegalArgumentException( + "Malformed cursor: unable to extract cursor information"), + RestStatus.BAD_REQUEST.getStatus()) + .toString())); } catch (OpenSearchException e) { int status = (e.status().getStatus()); if (status > 399 && status < 500) { @@ -91,14 +107,27 @@ public String execute(Client client, Map params) throws Exceptio } private String handleDefaultCursorRequest(Client client, DefaultCursor cursor) { - String previousScrollId = cursor.getScrollId(); LocalClusterState clusterState = LocalClusterState.state(); - TimeValue scrollTimeout = clusterState.getSettingValue(Settings.Key.SQL_CURSOR_KEEP_ALIVE); - SearchResponse scrollResponse = - client.prepareSearchScroll(previousScrollId).setScroll(scrollTimeout).get(); + TimeValue paginationTimeout = clusterState.getSettingValue(SQL_CURSOR_KEEP_ALIVE); + + SearchResponse scrollResponse = null; + if (clusterState.getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER)) { + String pitId = cursor.getPitId(); + SearchSourceBuilder source = cursor.getSearchSourceBuilder(); + source.searchAfter(cursor.getSortFields()); + source.pointInTimeBuilder(new PointInTimeBuilder(pitId)); + SearchRequest searchRequest = new SearchRequest(); + searchRequest.source(source); + scrollResponse = client.search(searchRequest).actionGet(); + } else { + String previousScrollId = cursor.getScrollId(); + scrollResponse = + client.prepareSearchScroll(previousScrollId).setScroll(paginationTimeout).get(); + } SearchHits searchHits = scrollResponse.getHits(); SearchHit[] searchHitArray = searchHits.getHits(); String newScrollId = scrollResponse.getScrollId(); + String newPitId = scrollResponse.pointInTimeId(); int rowsLeft = (int) cursor.getRowsLeft(); int fetch = cursor.getFetchSize(); @@ -124,16 +153,37 @@ private String handleDefaultCursorRequest(Client client, DefaultCursor cursor) { if (rowsLeft <= 0) { /** Clear the scroll context on last page */ - ClearScrollResponse clearScrollResponse = - client.prepareClearScroll().addScrollId(newScrollId).get(); - if (!clearScrollResponse.isSucceeded()) { - Metrics.getInstance().getNumericalMetric(MetricName.FAILED_REQ_COUNT_SYS).increment(); - LOG.info("Error closing the cursor context {} ", newScrollId); + if (newScrollId != null) { + ClearScrollResponse clearScrollResponse = + client.prepareClearScroll().addScrollId(newScrollId).get(); + if (!clearScrollResponse.isSucceeded()) { + Metrics.getInstance().getNumericalMetric(MetricName.FAILED_REQ_COUNT_SYS).increment(); + LOG.info("Error closing the cursor context {} ", newScrollId); + } + } + if (newPitId != null) { + PointInTimeHandler pit = new PointInTimeHandlerImpl(client, newPitId); + try { + pit.delete(); + } catch (RuntimeException e) { + Metrics.getInstance().getNumericalMetric(MetricName.FAILED_REQ_COUNT_SYS).increment(); + LOG.info("Error deleting point in time {} ", newPitId); + } } } cursor.setRowsLeft(rowsLeft); - cursor.setScrollId(newScrollId); + if (clusterState.getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER)) { + cursor.setPitId(newPitId); + cursor.setSearchSourceBuilder(cursor.getSearchSourceBuilder()); + cursor.setSortFields( + scrollResponse + .getHits() + .getAt(scrollResponse.getHits().getHits().length - 1) + .getSortValues()); + } else { + cursor.setScrollId(newScrollId); + } Protocol protocol = new Protocol(client, searchHits, format.name().toLowerCase(), cursor); return protocol.cursorFormat(); } diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DataRows.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DataRows.java index fc153afae8..514d016866 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DataRows.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DataRows.java @@ -11,9 +11,9 @@ public class DataRows implements Iterable { - private long size; - private long totalHits; - private List rows; + private final long size; + private final long totalHits; + private final List rows; public DataRows(long size, long totalHits, List rows) { this.size = size; @@ -61,7 +61,7 @@ public void remove() { // Inner class for Row object public static class Row { - private Map data; + private final Map data; public Row(Map data) { this.data = data; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DateFieldFormatter.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DateFieldFormatter.java index dc239abd84..f8dd261be4 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DateFieldFormatter.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DateFieldFormatter.java @@ -21,6 +21,7 @@ import org.apache.logging.log4j.Logger; import org.opensearch.sql.legacy.esdomain.LocalClusterState; import org.opensearch.sql.legacy.esdomain.mapping.FieldMappings; +import org.opensearch.sql.legacy.utils.StringUtils; /** Formatter to transform date fields into a consistent format for consumption by clients. */ public class DateFieldFormatter { @@ -41,7 +42,7 @@ public class DateFieldFormatter { private final Map> dateFieldFormatMap; private final Map fieldAliasMap; - private Set dateColumns; + private final Set dateColumns; public DateFieldFormatter( String indexName, List columns, Map fieldAliasMap) { @@ -83,7 +84,6 @@ public void applyJDBCDateFormat(Map rowSource) { Date date = parseDateString(formats, columnOriginalDate.toString()); if (date != null) { rowSource.put(columnName, DateFormat.getFormattedDate(date, FORMAT_JDBC)); - break; } else { LOG.warn("Could not parse date value; returning original value"); } @@ -152,15 +152,27 @@ private Date parseDateString(List formats, String columnOriginalDate) { switch (columnFormat) { case "date_optional_time": case "strict_date_optional_time": - parsedDate = - DateUtils.parseDate( - columnOriginalDate, - FORMAT_DOT_OPENSEARCH_DASHBOARDS_SAMPLE_DATA_LOGS_EXCEPTION, - FORMAT_DOT_OPENSEARCH_DASHBOARDS_SAMPLE_DATA_FLIGHTS_EXCEPTION, - FORMAT_DOT_OPENSEARCH_DASHBOARDS_SAMPLE_DATA_FLIGHTS_EXCEPTION_NO_TIME, - FORMAT_DOT_OPENSEARCH_DASHBOARDS_SAMPLE_DATA_ECOMMERCE_EXCEPTION, - FORMAT_DOT_DATE_AND_TIME, - FORMAT_DOT_DATE); + // It's possible to have date stored in second / millisecond form without explicit + // format hint. + // Parse it on a best-effort basis. + if (StringUtils.isNumeric(columnOriginalDate)) { + long timestamp = Long.parseLong(columnOriginalDate); + if (timestamp > Integer.MAX_VALUE) { + parsedDate = new Date(timestamp); + } else { + parsedDate = new Date(timestamp * 1000); + } + } else { + parsedDate = + DateUtils.parseDate( + columnOriginalDate, + FORMAT_DOT_OPENSEARCH_DASHBOARDS_SAMPLE_DATA_LOGS_EXCEPTION, + FORMAT_DOT_OPENSEARCH_DASHBOARDS_SAMPLE_DATA_FLIGHTS_EXCEPTION, + FORMAT_DOT_OPENSEARCH_DASHBOARDS_SAMPLE_DATA_FLIGHTS_EXCEPTION_NO_TIME, + FORMAT_DOT_OPENSEARCH_DASHBOARDS_SAMPLE_DATA_ECOMMERCE_EXCEPTION, + FORMAT_DOT_DATE_AND_TIME, + FORMAT_DOT_DATE); + } break; case "epoch_millis": parsedDate = new Date(Long.parseLong(columnOriginalDate)); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DateFormat.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DateFormat.java index fc9237918c..abb04d7f81 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DateFormat.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DateFormat.java @@ -14,7 +14,7 @@ public class DateFormat { - private static Map formatMap = new HashMap<>(); + private static final Map formatMap = new HashMap<>(); static { // Special cases that are parsed separately @@ -104,31 +104,31 @@ public static String getFormattedDate(java.util.Date date, String dateFormat) { } private static class Date { - static String BASIC_DATE = "yyyyMMdd"; - static String BASIC_ORDINAL_DATE = "yyyyDDD"; - static String BASIC_WEEK_DATE = "YYYY'W'wwu"; + static final String BASIC_DATE = "yyyyMMdd"; + static final String BASIC_ORDINAL_DATE = "yyyyDDD"; + static final String BASIC_WEEK_DATE = "YYYY'W'wwu"; - static String DATE = "yyyy-MM-dd"; - static String ORDINAL_DATE = "yyyy-DDD"; + static final String DATE = "yyyy-MM-dd"; + static final String ORDINAL_DATE = "yyyy-DDD"; - static String YEAR = "yyyy"; - static String YEAR_MONTH = "yyyy-MM"; + static final String YEAR = "yyyy"; + static final String YEAR_MONTH = "yyyy-MM"; - static String WEEK_DATE = "YYYY-'W'ww-u"; - static String WEEKYEAR = "YYYY"; - static String WEEKYEAR_WEEK = "YYYY-'W'ww"; + static final String WEEK_DATE = "YYYY-'W'ww-u"; + static final String WEEKYEAR = "YYYY"; + static final String WEEKYEAR_WEEK = "YYYY-'W'ww"; } private static class Time { - static String T = "'T'"; - static String BASIC_TIME = "HHmmss"; - static String TIME = "HH:mm:ss"; + static final String T = "'T'"; + static final String BASIC_TIME = "HHmmss"; + static final String TIME = "HH:mm:ss"; - static String HOUR = "HH"; - static String HOUR_MINUTE = "HH:mm"; + static final String HOUR = "HH"; + static final String HOUR_MINUTE = "HH:mm"; - static String MILLIS = ".SSS"; - static String TZ = "Z"; - static String TZZ = "XX"; + static final String MILLIS = ".SSS"; + static final String TZ = "Z"; + static final String TZZ = "XX"; } } diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DeleteResultSet.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DeleteResultSet.java index 24afb0a7af..2e040d78fb 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DeleteResultSet.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DeleteResultSet.java @@ -13,8 +13,8 @@ import org.opensearch.sql.legacy.domain.Delete; public class DeleteResultSet extends ResultSet { - private Delete query; - private Object queryResult; + private final Delete query; + private final Object queryResult; public static final String DELETED = "deleted_rows"; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DescribeResultSet.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DescribeResultSet.java index eba6db2453..74df2ddeeb 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DescribeResultSet.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/DescribeResultSet.java @@ -29,8 +29,8 @@ public class DescribeResultSet extends ResultSet { */ public static final String DEFAULT_OBJECT_DATATYPE = "object"; - private IndexStatement statement; - private Object queryResult; + private final IndexStatement statement; + private final Object queryResult; public DescribeResultSet(Client client, IndexStatement statement, Object queryResult) { this.client = client; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/ErrorMessage.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/ErrorMessage.java index aa0d02bed8..b75689ba9c 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/ErrorMessage.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/ErrorMessage.java @@ -10,12 +10,12 @@ public class ErrorMessage { - protected E exception; + protected final E exception; - private int status; - private String type; - private String reason; - private String details; + private final int status; + private final String type; + private final String reason; + private final String details; public ErrorMessage(E exception, int status) { this.exception = exception; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/OpenSearchErrorMessage.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/OpenSearchErrorMessage.java index 8117d241b1..09c09919ec 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/OpenSearchErrorMessage.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/OpenSearchErrorMessage.java @@ -12,8 +12,8 @@ public class OpenSearchErrorMessage extends ErrorMessage { - OpenSearchErrorMessage(OpenSearchException exception, int status) { - super(exception, status); + OpenSearchErrorMessage(OpenSearchException exception, int defaultStatus) { + super(exception, exception.status() != null ? exception.status().getStatus() : defaultStatus); } @Override diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java index 00feabf5d8..3344829859 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutor.java @@ -5,23 +5,32 @@ package org.opensearch.sql.legacy.executor.format; +import static org.opensearch.sql.common.setting.Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER; + import java.util.Map; +import java.util.Objects; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.OpenSearchException; +import org.opensearch.action.search.SearchRequestBuilder; import org.opensearch.action.search.SearchResponse; import org.opensearch.client.Client; import org.opensearch.core.common.Strings; import org.opensearch.core.rest.RestStatus; import org.opensearch.rest.BytesRestResponse; import org.opensearch.rest.RestChannel; +import org.opensearch.search.builder.PointInTimeBuilder; import org.opensearch.sql.legacy.cursor.Cursor; import org.opensearch.sql.legacy.cursor.DefaultCursor; +import org.opensearch.sql.legacy.esdomain.LocalClusterState; import org.opensearch.sql.legacy.exception.SqlParseException; import org.opensearch.sql.legacy.executor.QueryActionElasticExecutor; import org.opensearch.sql.legacy.executor.RestExecutor; +import org.opensearch.sql.legacy.pit.PointInTimeHandler; +import org.opensearch.sql.legacy.pit.PointInTimeHandlerImpl; import org.opensearch.sql.legacy.query.DefaultQueryAction; import org.opensearch.sql.legacy.query.QueryAction; +import org.opensearch.sql.legacy.query.SqlOpenSearchRequestBuilder; import org.opensearch.sql.legacy.query.join.BackOffRetryStrategy; public class PrettyFormatRestExecutor implements RestExecutor { @@ -90,15 +99,32 @@ public String execute(Client client, Map params, QueryAction que private Protocol buildProtocolForDefaultQuery(Client client, DefaultQueryAction queryAction) throws SqlParseException { - SearchResponse response = (SearchResponse) queryAction.explain().get(); - String scrollId = response.getScrollId(); + PointInTimeHandler pit = null; + SearchResponse response; + SqlOpenSearchRequestBuilder sqlOpenSearchRequestBuilder = queryAction.explain(); + if (LocalClusterState.state().getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER)) { + pit = new PointInTimeHandlerImpl(client, queryAction.getSelect().getIndexArr()); + pit.create(); + SearchRequestBuilder searchRequest = queryAction.getRequestBuilder(); + searchRequest.setPointInTime(new PointInTimeBuilder(pit.getPitId())); + response = searchRequest.get(); + } else { + response = (SearchResponse) sqlOpenSearchRequestBuilder.get(); + } Protocol protocol; - if (!Strings.isNullOrEmpty(scrollId)) { + if (isDefaultCursor(response, queryAction)) { DefaultCursor defaultCursor = new DefaultCursor(); - defaultCursor.setScrollId(scrollId); defaultCursor.setLimit(queryAction.getSelect().getRowCount()); defaultCursor.setFetchSize(queryAction.getSqlRequest().fetchSize()); + if (LocalClusterState.state().getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER)) { + defaultCursor.setPitId(pit.getPitId()); + defaultCursor.setSearchSourceBuilder(queryAction.getRequestBuilder().request().source()); + defaultCursor.setSortFields( + response.getHits().getAt(response.getHits().getHits().length - 1).getSortValues()); + } else { + defaultCursor.setScrollId(response.getScrollId()); + } protocol = new Protocol(client, queryAction, response.getHits(), format, defaultCursor); } else { protocol = new Protocol(client, queryAction, response.getHits(), format, Cursor.NULL_CURSOR); @@ -106,4 +132,14 @@ private Protocol buildProtocolForDefaultQuery(Client client, DefaultQueryAction return protocol; } + + protected boolean isDefaultCursor(SearchResponse searchResponse, DefaultQueryAction queryAction) { + if (LocalClusterState.state().getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER)) { + return queryAction.getSqlRequest().fetchSize() != 0 + && Objects.requireNonNull(searchResponse.getHits().getTotalHits()).value + >= queryAction.getSqlRequest().fetchSize(); + } else { + return !Strings.isNullOrEmpty(searchResponse.getScrollId()); + } + } } diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/Protocol.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/Protocol.java index e6ea767e17..95600d234a 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/Protocol.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/Protocol.java @@ -36,7 +36,7 @@ public class Protocol { static final int ERROR_STATUS = 500; private final String formatType; - private int status; + private final int status; private long size; private long total; private ResultSet resultSet; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/Schema.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/Schema.java index b29369f713..e2946e946b 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/Schema.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/Schema.java @@ -19,7 +19,7 @@ public class Schema implements Iterable { private String indexName; private List columns; - private static Set types; + private static final Set types; static { types = getTypes(); @@ -121,10 +121,10 @@ public String nameLowerCase() { public static class Column { private final String name; - private String alias; + private final String alias; private final Type type; - private boolean identifiedByAlias; + private final boolean identifiedByAlias; public Column(String name, String alias, Type type, boolean identifiedByAlias) { this.name = name; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/SelectResultSet.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/SelectResultSet.java index c60691cb7c..84b7c00857 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/SelectResultSet.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/SelectResultSet.java @@ -26,6 +26,8 @@ import java.util.stream.StreamSupport; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.opensearch.action.admin.indices.alias.get.GetAliasesRequest; +import org.opensearch.action.admin.indices.alias.get.GetAliasesResponse; import org.opensearch.action.admin.indices.mapping.get.GetFieldMappingsRequest; import org.opensearch.action.admin.indices.mapping.get.GetFieldMappingsResponse; import org.opensearch.action.search.ClearScrollResponse; @@ -40,6 +42,7 @@ import org.opensearch.search.aggregations.metrics.NumericMetricsAggregation; import org.opensearch.search.aggregations.metrics.Percentile; import org.opensearch.search.aggregations.metrics.Percentiles; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.legacy.cursor.Cursor; import org.opensearch.sql.legacy.cursor.DefaultCursor; import org.opensearch.sql.legacy.domain.ColumnTypeProvider; @@ -49,11 +52,14 @@ import org.opensearch.sql.legacy.domain.Query; import org.opensearch.sql.legacy.domain.Select; import org.opensearch.sql.legacy.domain.TableOnJoinSelect; +import org.opensearch.sql.legacy.esdomain.LocalClusterState; import org.opensearch.sql.legacy.esdomain.mapping.FieldMapping; import org.opensearch.sql.legacy.exception.SqlFeatureNotImplementedException; import org.opensearch.sql.legacy.executor.Format; import org.opensearch.sql.legacy.metrics.MetricName; import org.opensearch.sql.legacy.metrics.Metrics; +import org.opensearch.sql.legacy.pit.PointInTimeHandler; +import org.opensearch.sql.legacy.pit.PointInTimeHandlerImpl; import org.opensearch.sql.legacy.utils.SQLFunctions; public class SelectResultSet extends ResultSet { @@ -64,7 +70,7 @@ public class SelectResultSet extends ResultSet { private final String formatType; private Query query; - private Object queryResult; + private final Object queryResult; private boolean selectAll; private String indexName; @@ -76,11 +82,11 @@ public class SelectResultSet extends ResultSet { private long totalHits; private long internalTotalHits; private List rows; - private Cursor cursor; + private final Cursor cursor; private DateFieldFormatter dateFieldFormatter; // alias -> base field name - private Map fieldAliasMap = new HashMap<>(); + private final Map fieldAliasMap = new HashMap<>(); public SelectResultSet( Client client, @@ -160,7 +166,11 @@ private void populateResultSetFromDefaultCursor(DefaultCursor cursor) { private void loadFromEsState(Query query) { String indexName = fetchIndexName(query); String[] fieldNames = fetchFieldsAsArray(query); - + GetAliasesResponse getAliasesResponse = + client.admin().indices().getAliases(new GetAliasesRequest(indexName)).actionGet(); + if (getAliasesResponse != null && !getAliasesResponse.getAliases().isEmpty()) { + indexName = getAliasesResponse.getAliases().keySet().iterator().next(); + } // Reset boolean in the case of JOIN query where multiple calls to loadFromEsState() are made selectAll = isSimpleQuerySelectAll(query) || isJoinQuerySelectAll(query, fieldNames); @@ -563,13 +573,25 @@ private void populateDefaultCursor(DefaultCursor cursor) { Integer limit = cursor.getLimit(); long rowsLeft = rowsLeft(cursor.getFetchSize(), cursor.getLimit()); if (rowsLeft <= 0) { - // close the cursor - String scrollId = cursor.getScrollId(); - ClearScrollResponse clearScrollResponse = - client.prepareClearScroll().addScrollId(scrollId).get(); - if (!clearScrollResponse.isSucceeded()) { - Metrics.getInstance().getNumericalMetric(MetricName.FAILED_REQ_COUNT_SYS).increment(); - LOG.error("Error closing the cursor context {} ", scrollId); + // Delete Point In Time ID + if (LocalClusterState.state().getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER)) { + String pitId = cursor.getPitId(); + PointInTimeHandler pit = new PointInTimeHandlerImpl(client, pitId); + try { + pit.delete(); + } catch (RuntimeException e) { + Metrics.getInstance().getNumericalMetric(MetricName.FAILED_REQ_COUNT_SYS).increment(); + LOG.info("Error deleting point in time {} ", pitId); + } + } else { + // close the cursor + String scrollId = cursor.getScrollId(); + ClearScrollResponse clearScrollResponse = + client.prepareClearScroll().addScrollId(scrollId).get(); + if (!clearScrollResponse.isSucceeded()) { + Metrics.getInstance().getNumericalMetric(MetricName.FAILED_REQ_COUNT_SYS).increment(); + LOG.error("Error closing the cursor context {} ", scrollId); + } } return; } diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/ShowResultSet.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/ShowResultSet.java index 263bf1e7db..9b7d15807b 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/ShowResultSet.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/format/ShowResultSet.java @@ -22,8 +22,8 @@ public class ShowResultSet extends ResultSet { private static final String TABLE_TYPE = "BASE TABLE"; - private IndexStatement statement; - private Object queryResult; + private final IndexStatement statement; + private final Object queryResult; public ShowResultSet(Client client, IndexStatement statement, Object queryResult) { this.client = client; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticJoinExecutor.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticJoinExecutor.java index f0ffafc470..5ff52cf657 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticJoinExecutor.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticJoinExecutor.java @@ -5,6 +5,8 @@ package org.opensearch.sql.legacy.executor.join; +import static org.opensearch.sql.common.setting.Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER; + import java.io.IOException; import java.util.Collection; import java.util.HashMap; @@ -12,15 +14,12 @@ import java.util.List; import java.util.Map; import java.util.Set; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; +import java.util.stream.Stream; import org.apache.lucene.search.TotalHits; import org.apache.lucene.search.TotalHits.Relation; -import org.opensearch.action.search.SearchRequestBuilder; import org.opensearch.action.search.SearchResponse; import org.opensearch.client.Client; import org.opensearch.common.document.DocumentField; -import org.opensearch.common.unit.TimeValue; import org.opensearch.core.rest.RestStatus; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.mapper.MapperService; @@ -28,11 +27,13 @@ import org.opensearch.rest.RestChannel; import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; -import org.opensearch.search.sort.FieldSortBuilder; -import org.opensearch.search.sort.SortOrder; import org.opensearch.sql.legacy.domain.Field; +import org.opensearch.sql.legacy.esdomain.LocalClusterState; import org.opensearch.sql.legacy.exception.SqlParseException; import org.opensearch.sql.legacy.executor.ElasticHitsExecutor; +import org.opensearch.sql.legacy.metrics.MetricName; +import org.opensearch.sql.legacy.metrics.Metrics; +import org.opensearch.sql.legacy.pit.PointInTimeHandlerImpl; import org.opensearch.sql.legacy.query.SqlElasticRequestBuilder; import org.opensearch.sql.legacy.query.join.HashJoinElasticRequestBuilder; import org.opensearch.sql.legacy.query.join.JoinRequestBuilder; @@ -41,16 +42,16 @@ import org.opensearch.sql.legacy.query.planner.HashJoinQueryPlanRequestBuilder; /** Created by Eliran on 15/9/2015. */ -public abstract class ElasticJoinExecutor implements ElasticHitsExecutor { - private static final Logger LOG = LogManager.getLogger(); +public abstract class ElasticJoinExecutor extends ElasticHitsExecutor { protected List results; // Keep list to avoid copy to new array in SearchHits - protected MetaSearchResult metaResults; + protected final MetaSearchResult metaResults; protected final int MAX_RESULTS_ON_ONE_FETCH = 10000; - private Set aliasesOnReturn; - private boolean allFieldsReturn; + private final Set aliasesOnReturn; + private final boolean allFieldsReturn; + protected final String[] indices; - protected ElasticJoinExecutor(JoinRequestBuilder requestBuilder) { + protected ElasticJoinExecutor(Client client, JoinRequestBuilder requestBuilder) { metaResults = new MetaSearchResult(); aliasesOnReturn = new HashSet<>(); List firstTableReturnedField = requestBuilder.getFirstTable().getReturnedFields(); @@ -58,6 +59,8 @@ protected ElasticJoinExecutor(JoinRequestBuilder requestBuilder) { allFieldsReturn = (firstTableReturnedField == null || firstTableReturnedField.size() == 0) && (secondTableReturnedField == null || secondTableReturnedField.size() == 0); + indices = getIndices(requestBuilder); + this.client = client; } public void sendResponse(RestChannel channel) throws IOException { @@ -85,10 +88,28 @@ public void sendResponse(RestChannel channel) throws IOException { } public void run() throws IOException, SqlParseException { - long timeBefore = System.currentTimeMillis(); - results = innerRun(); - long joinTimeInMilli = System.currentTimeMillis() - timeBefore; - this.metaResults.setTookImMilli(joinTimeInMilli); + try { + long timeBefore = System.currentTimeMillis(); + if (LocalClusterState.state().getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER)) { + pit = new PointInTimeHandlerImpl(client, indices); + pit.create(); + } + results = innerRun(); + long joinTimeInMilli = System.currentTimeMillis() - timeBefore; + this.metaResults.setTookImMilli(joinTimeInMilli); + } catch (Exception e) { + LOG.error("Failed during join query run.", e); + throw new IllegalStateException("Error occurred during join query run", e); + } finally { + if (LocalClusterState.state().getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER)) { + try { + pit.delete(); + } catch (RuntimeException e) { + Metrics.getInstance().getNumericalMetric(MetricName.FAILED_REQ_COUNT_SYS).increment(); + LOG.info("Error deleting point in time {} ", pit); + } + } + } } protected abstract List innerRun() throws IOException, SqlParseException; @@ -103,7 +124,7 @@ public SearchHits getHits() { public static ElasticJoinExecutor createJoinExecutor( Client client, SqlElasticRequestBuilder requestBuilder) { if (requestBuilder instanceof HashJoinQueryPlanRequestBuilder) { - return new QueryPlanElasticExecutor((HashJoinQueryPlanRequestBuilder) requestBuilder); + return new QueryPlanElasticExecutor(client, (HashJoinQueryPlanRequestBuilder) requestBuilder); } else if (requestBuilder instanceof HashJoinElasticRequestBuilder) { HashJoinElasticRequestBuilder hashJoin = (HashJoinElasticRequestBuilder) requestBuilder; return new HashJoinElasticExecutor(client, hashJoin); @@ -256,23 +277,22 @@ protected void updateMetaSearchResults(SearchResponse searchResponse) { this.metaResults.updateTimeOut(searchResponse.isTimedOut()); } - protected SearchResponse scrollOneTimeWithMax( - Client client, TableInJoinRequestBuilder tableRequest) { - SearchRequestBuilder scrollRequest = - tableRequest - .getRequestBuilder() - .setScroll(new TimeValue(60000)) - .setSize(MAX_RESULTS_ON_ONE_FETCH); - boolean ordered = tableRequest.getOriginalSelect().isOrderdSelect(); - if (!ordered) { - scrollRequest.addSort(FieldSortBuilder.DOC_FIELD_NAME, SortOrder.ASC); - } - SearchResponse responseWithHits = scrollRequest.get(); - // on ordered select - not using SCAN , elastic returns hits on first scroll - // es5.0 elastic always return docs on scan - // if(!ordered) - // responseWithHits = client.prepareSearchScroll(responseWithHits.getScrollId()) - // .setScroll(new TimeValue(600000)).get(); - return responseWithHits; + public SearchResponse getResponseWithHits( + TableInJoinRequestBuilder tableRequest, int size, SearchResponse previousResponse) { + + return getResponseWithHits( + tableRequest.getRequestBuilder(), + tableRequest.getOriginalSelect(), + size, + previousResponse, + pit); + } + + public String[] getIndices(JoinRequestBuilder joinRequestBuilder) { + return Stream.concat( + Stream.of(joinRequestBuilder.getFirstTable().getOriginalSelect().getIndexArr()), + Stream.of(joinRequestBuilder.getSecondTable().getOriginalSelect().getIndexArr())) + .distinct() + .toArray(String[]::new); } } diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticUtils.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticUtils.java index 7b6228a3d2..70e7118ad5 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticUtils.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/ElasticUtils.java @@ -6,6 +6,7 @@ package org.opensearch.sql.legacy.executor.join; import static org.opensearch.core.xcontent.ToXContent.EMPTY_PARAMS; +import static org.opensearch.sql.opensearch.storage.OpenSearchIndex.METADATA_FIELD_ID; import com.google.common.collect.ImmutableMap; import java.io.IOException; @@ -39,6 +40,7 @@ public static SearchResponse scrollOneTimeWithHits( boolean ordered = originalSelect.isOrderdSelect(); if (!ordered) { scrollRequest.addSort(FieldSortBuilder.DOC_FIELD_NAME, SortOrder.ASC); + scrollRequest.addSort(METADATA_FIELD_ID, SortOrder.ASC); } SearchResponse responseWithHits = scrollRequest.get(); // on ordered select - not using SCAN , elastic returns hits on first scroll diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/HashJoinComparisonStructure.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/HashJoinComparisonStructure.java index 8216feac66..cf81a6fe49 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/HashJoinComparisonStructure.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/HashJoinComparisonStructure.java @@ -15,8 +15,8 @@ /** Created by Eliran on 2/11/2015. */ public class HashJoinComparisonStructure { - private HashMap>> comparisonIDtoComparisonFields; - private HashMap> comparisonIDtoComparisonHash; + private final HashMap>> comparisonIDtoComparisonFields; + private final HashMap> comparisonIDtoComparisonHash; public HashJoinComparisonStructure(List>> t1ToT2FieldsComparisons) { comparisonIDtoComparisonFields = new HashMap<>(); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/HashJoinElasticExecutor.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/HashJoinElasticExecutor.java index 06a913205d..46b31b447d 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/HashJoinElasticExecutor.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/HashJoinElasticExecutor.java @@ -20,7 +20,6 @@ import org.opensearch.action.search.SearchResponse; import org.opensearch.client.Client; import org.opensearch.common.document.DocumentField; -import org.opensearch.common.unit.TimeValue; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.QueryBuilders; @@ -35,17 +34,14 @@ /** Created by Eliran on 22/8/2015. */ public class HashJoinElasticExecutor extends ElasticJoinExecutor { - private HashJoinElasticRequestBuilder requestBuilder; - - private Client client; + private final HashJoinElasticRequestBuilder requestBuilder; private boolean useQueryTermsFilterOptimization = false; private final int MAX_RESULTS_FOR_FIRST_TABLE = 100000; - HashJoinComparisonStructure hashJoinComparisonStructure; - private Set alreadyMatched; + final HashJoinComparisonStructure hashJoinComparisonStructure; + private final Set alreadyMatched; public HashJoinElasticExecutor(Client client, HashJoinElasticRequestBuilder requestBuilder) { - super(requestBuilder); - this.client = client; + super(client, requestBuilder); this.requestBuilder = requestBuilder; this.useQueryTermsFilterOptimization = requestBuilder.isUseTermFiltersOptimization(); this.hashJoinComparisonStructure = @@ -54,7 +50,6 @@ public HashJoinElasticExecutor(Client client, HashJoinElasticRequestBuilder requ } public List innerRun() throws IOException, SqlParseException { - Map>> optimizationTermsFilterStructure = initOptimizationStructure(); @@ -124,16 +119,12 @@ private List createCombinedResults(TableInJoinRequestBuilder secondTa Integer hintLimit = secondTableRequest.getHintLimit(); SearchResponse searchResponse; boolean finishedScrolling; + if (hintLimit != null && hintLimit < MAX_RESULTS_ON_ONE_FETCH) { - searchResponse = secondTableRequest.getRequestBuilder().setSize(hintLimit).get(); + searchResponse = getResponseWithHits(secondTableRequest, hintLimit, null); finishedScrolling = true; } else { - searchResponse = - secondTableRequest - .getRequestBuilder() - .setScroll(new TimeValue(60000)) - .setSize(MAX_RESULTS_ON_ONE_FETCH) - .get(); + searchResponse = getResponseWithHits(secondTableRequest, MAX_RESULTS_ON_ONE_FETCH, null); // es5.0 no need to scroll again! // searchResponse = client.prepareSearchScroll(searchResponse.getScrollId()) // .setScroll(new TimeValue(600000)).get(); @@ -214,11 +205,7 @@ private List createCombinedResults(TableInJoinRequestBuilder secondTa if (secondTableHits.length > 0 && (hintLimit == null || fetchedSoFarFromSecondTable >= hintLimit)) { searchResponse = - client - .prepareSearchScroll(searchResponse.getScrollId()) - .setScroll(new TimeValue(600000)) - .execute() - .actionGet(); + getResponseWithHits(secondTableRequest, MAX_RESULTS_ON_ONE_FETCH, searchResponse); } else { break; } @@ -292,12 +279,13 @@ private List fetchAllHits(TableInJoinRequestBuilder tableInJoinReques private List scrollTillLimit( TableInJoinRequestBuilder tableInJoinRequest, Integer hintLimit) { - SearchResponse scrollResp = scrollOneTimeWithMax(client, tableInJoinRequest); + SearchResponse response = + getResponseWithHits(tableInJoinRequest, MAX_RESULTS_ON_ONE_FETCH, null); - updateMetaSearchResults(scrollResp); + updateMetaSearchResults(response); List hitsWithScan = new ArrayList<>(); int curentNumOfResults = 0; - SearchHit[] hits = scrollResp.getHits().getHits(); + SearchHit[] hits = response.getHits().getHits(); if (hintLimit == null) { hintLimit = MAX_RESULTS_FOR_FIRST_TABLE; @@ -311,13 +299,8 @@ private List scrollTillLimit( System.out.println("too many results for first table, stoping at:" + curentNumOfResults); break; } - scrollResp = - client - .prepareSearchScroll(scrollResp.getScrollId()) - .setScroll(new TimeValue(600000)) - .execute() - .actionGet(); - hits = scrollResp.getHits().getHits(); + response = getResponseWithHits(tableInJoinRequest, MAX_RESULTS_FOR_FIRST_TABLE, response); + hits = response.getHits().getHits(); } return hitsWithScan; } diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/NestedLoopsElasticExecutor.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/NestedLoopsElasticExecutor.java index 56c5f96af5..f4e4347e06 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/NestedLoopsElasticExecutor.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/NestedLoopsElasticExecutor.java @@ -18,7 +18,6 @@ import org.opensearch.action.search.SearchResponse; import org.opensearch.client.Client; import org.opensearch.common.document.DocumentField; -import org.opensearch.common.unit.TimeValue; import org.opensearch.index.mapper.MapperService; import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; @@ -39,11 +38,9 @@ public class NestedLoopsElasticExecutor extends ElasticJoinExecutor { private static final Logger LOG = LogManager.getLogger(); private final NestedLoopsElasticRequestBuilder nestedLoopsRequest; - private final Client client; public NestedLoopsElasticExecutor(Client client, NestedLoopsElasticRequestBuilder nestedLoops) { - super(nestedLoops); - this.client = client; + super(client, nestedLoops); this.nestedLoopsRequest = nestedLoops; } @@ -111,11 +108,26 @@ protected List innerRun() throws SqlParseException { if (!BackOffRetryStrategy.isHealthy()) { throw new IllegalStateException("Memory circuit is broken"); } - firstTableResponse = - client - .prepareSearchScroll(firstTableResponse.getScrollId()) - .setScroll(new TimeValue(600000)) - .get(); + /* Fetching next result page. + Using scroll api - only scrollId from previous response is required for scroll request. + Using pit with search_after - we need to recreate search request along with pitId and + sort fields from previous response. + Here we are finding required size for recreating search request with pit and search after. + Conditions for size are similar as firstFetch(). + In case of scroll, this size will be ignored and size from first request will be used. + */ + Integer hintLimit = nestedLoopsRequest.getFirstTable().getHintLimit(); + if (hintLimit != null && hintLimit < MAX_RESULTS_ON_ONE_FETCH) { + firstTableResponse = + getResponseWithHits( + nestedLoopsRequest.getFirstTable(), hintLimit, firstTableResponse); + } else { + firstTableResponse = + getResponseWithHits( + nestedLoopsRequest.getFirstTable(), + MAX_RESULTS_ON_ONE_FETCH, + firstTableResponse); + } } else { finishedWithFirstTable = true; } @@ -287,12 +299,11 @@ private FetchWithScrollResponse firstFetch(TableInJoinRequestBuilder tableReques boolean needScrollForFirstTable = false; SearchResponse responseWithHits; if (hintLimit != null && hintLimit < MAX_RESULTS_ON_ONE_FETCH) { - responseWithHits = tableRequest.getRequestBuilder().setSize(hintLimit).get(); needScrollForFirstTable = false; } else { // scroll request with max. - responseWithHits = scrollOneTimeWithMax(client, tableRequest); + responseWithHits = getResponseWithHits(tableRequest, MAX_RESULTS_ON_ONE_FETCH, null); if (responseWithHits.getHits().getTotalHits() != null && responseWithHits.getHits().getTotalHits().value < MAX_RESULTS_ON_ONE_FETCH) { needScrollForFirstTable = true; @@ -367,8 +378,8 @@ private void reverseOrderOfCondition(Condition cond, String t1Alias, String t2Al } private class FetchWithScrollResponse { - private SearchResponse response; - private boolean needScrollForFirstTable; + private final SearchResponse response; + private final boolean needScrollForFirstTable; private FetchWithScrollResponse(SearchResponse response, boolean needScrollForFirstTable) { this.response = response; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/QueryPlanElasticExecutor.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/QueryPlanElasticExecutor.java index f4b2f5421d..d8e9d41376 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/QueryPlanElasticExecutor.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/join/QueryPlanElasticExecutor.java @@ -6,6 +6,7 @@ package org.opensearch.sql.legacy.executor.join; import java.util.List; +import org.opensearch.client.Client; import org.opensearch.search.SearchHit; import org.opensearch.sql.legacy.query.planner.HashJoinQueryPlanRequestBuilder; import org.opensearch.sql.legacy.query.planner.core.QueryPlanner; @@ -19,8 +20,8 @@ class QueryPlanElasticExecutor extends ElasticJoinExecutor { private final QueryPlanner queryPlanner; - QueryPlanElasticExecutor(HashJoinQueryPlanRequestBuilder request) { - super(request); + QueryPlanElasticExecutor(Client client, HashJoinQueryPlanRequestBuilder request) { + super(client, request); this.queryPlanner = request.plan(); } diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/multi/ComperableHitResult.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/multi/ComperableHitResult.java index fa3514600b..f799cd63d2 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/multi/ComperableHitResult.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/multi/ComperableHitResult.java @@ -15,10 +15,10 @@ /** Created by Eliran on 9/9/2016. */ public class ComperableHitResult { - private SearchHit hit; - private String comperator; + private final SearchHit hit; + private final String comperator; private boolean isAllNull; - private Map flattenMap; + private final Map flattenMap; public ComperableHitResult(SearchHit hit, String[] fieldsOrder, String seperator) { this.hit = hit; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/multi/MinusExecutor.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/multi/MinusExecutor.java index 03e16424e7..a3f8596eb4 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/multi/MinusExecutor.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/multi/MinusExecutor.java @@ -5,6 +5,8 @@ package org.opensearch.sql.legacy.executor.multi; +import static org.opensearch.sql.common.setting.Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER; + import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -18,7 +20,7 @@ import org.opensearch.action.search.SearchResponse; import org.opensearch.client.Client; import org.opensearch.common.document.DocumentField; -import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.ArrayUtils; import org.opensearch.index.mapper.MapperService; import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; @@ -28,19 +30,21 @@ import org.opensearch.sql.legacy.domain.Where; import org.opensearch.sql.legacy.domain.hints.Hint; import org.opensearch.sql.legacy.domain.hints.HintType; +import org.opensearch.sql.legacy.esdomain.LocalClusterState; import org.opensearch.sql.legacy.exception.SqlParseException; import org.opensearch.sql.legacy.executor.ElasticHitsExecutor; -import org.opensearch.sql.legacy.executor.join.ElasticUtils; +import org.opensearch.sql.legacy.metrics.MetricName; +import org.opensearch.sql.legacy.metrics.Metrics; +import org.opensearch.sql.legacy.pit.PointInTimeHandlerImpl; import org.opensearch.sql.legacy.query.DefaultQueryAction; import org.opensearch.sql.legacy.query.multi.MultiQueryRequestBuilder; import org.opensearch.sql.legacy.utils.Util; /** Created by Eliran on 26/8/2016. */ -public class MinusExecutor implements ElasticHitsExecutor { - private Client client; - private MultiQueryRequestBuilder builder; +public class MinusExecutor extends ElasticHitsExecutor { + private final MultiQueryRequestBuilder builder; private SearchHits minusHits; - private boolean useTermsOptimization; + private final boolean useTermsOptimization; private boolean termsOptimizationWithToLower; private boolean useScrolling; private int maxDocsToFetchOnFirstTable; @@ -48,7 +52,7 @@ public class MinusExecutor implements ElasticHitsExecutor { private int maxDocsToFetchOnEachScrollShard; private String[] fieldsOrderFirstTable; private String[] fieldsOrderSecondTable; - private String seperator; + private final String seperator; public MinusExecutor(Client client, MultiQueryRequestBuilder builder) { this.client = client; @@ -63,45 +67,68 @@ public MinusExecutor(Client client, MultiQueryRequestBuilder builder) { @Override public void run() throws SqlParseException { - if (this.useTermsOptimization && this.fieldsOrderFirstTable.length != 1) { - throw new SqlParseException( - "Terms optimization failed: terms optimization for minus execution is supported with one" - + " field"); - } - if (this.useTermsOptimization && !this.useScrolling) { - throw new SqlParseException( - "Terms optimization failed: using scrolling is required for terms optimization"); - } - if (!this.useScrolling || !this.useTermsOptimization) { - Set comperableHitResults; - if (!this.useScrolling) { - // 1. get results from first search , put in set - // 2. get reults from second search - // 2.1 for each result remove from set - comperableHitResults = simpleOneTimeQueryEach(); + try { + if (LocalClusterState.state().getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER)) { + pit = + new PointInTimeHandlerImpl( + client, + ArrayUtils.concat( + builder.getOriginalSelect(true).getIndexArr(), + builder.getOriginalSelect(false).getIndexArr())); + pit.create(); + } + + if (this.useTermsOptimization && this.fieldsOrderFirstTable.length != 1) { + throw new SqlParseException( + "Terms optimization failed: terms optimization for minus execution is supported with" + + " one field"); + } + if (this.useTermsOptimization && !this.useScrolling) { + throw new SqlParseException( + "Terms optimization failed: using scrolling is required for terms optimization"); + } + if (!this.useScrolling || !this.useTermsOptimization) { + Set comperableHitResults; + if (!this.useScrolling) { + // 1. get results from first search , put in set + // 2. get reults from second search + // 2.1 for each result remove from set + comperableHitResults = simpleOneTimeQueryEach(); + } else { + // if scrolling + // 1. get all results in scrolls (till some limit) . put on set + // 2. scroll on second table + // 3. on each scroll result remove items from set + comperableHitResults = runWithScrollings(); + } + fillMinusHitsFromResults(comperableHitResults); + return; } else { - // if scrolling - // 1. get all results in scrolls (till some limit) . put on set - // 2. scroll on second table - // 3. on each scroll result remove items from set - comperableHitResults = runWithScrollings(); + // if scrolling and optimization + // 0. save the original second table where , init set + // 1. on each scroll on first table , create miniSet + // 1.1 build where from all results (terms filter) , and run query + // 1.1.1 on each result remove from miniSet + // 1.1.2 add all results left from miniset to bigset + Select firstSelect = this.builder.getOriginalSelect(true); + MinusOneFieldAndOptimizationResult optimizationResult = + runWithScrollingAndAddFilter(fieldsOrderFirstTable[0], fieldsOrderSecondTable[0]); + String fieldName = getFieldName(firstSelect.getFields().get(0)); + Set results = optimizationResult.getFieldValues(); + SearchHit someHit = optimizationResult.getSomeHit(); + fillMinusHitsFromOneField(fieldName, results, someHit); + } + } catch (Exception e) { + LOG.error("Failed during multi query run.", e); + } finally { + if (LocalClusterState.state().getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER)) { + try { + pit.delete(); + } catch (RuntimeException e) { + Metrics.getInstance().getNumericalMetric(MetricName.FAILED_REQ_COUNT_SYS).increment(); + LOG.info("Error deleting point in time {} ", pit); + } } - fillMinusHitsFromResults(comperableHitResults); - return; - } else { - // if scrolling and optimization - // 0. save the original second table where , init set - // 1. on each scroll on first table , create miniSet - // 1.1 build where from all results (terms filter) , and run query - // 1.1.1 on each result remove from miniSet - // 1.1.2 add all results left from miniset to bigset - Select firstSelect = this.builder.getOriginalSelect(true); - MinusOneFieldAndOptimizationResult optimizationResult = - runWithScrollingAndAddFilter(fieldsOrderFirstTable[0], fieldsOrderSecondTable[0]); - String fieldName = getFieldName(firstSelect.getFields().get(0)); - Set results = optimizationResult.getFieldValues(); - SearchHit someHit = optimizationResult.getSomeHit(); - fillMinusHitsFromOneField(fieldName, results, someHit); } } @@ -187,11 +214,12 @@ private void fillMinusHitsFromResults(Set comperableHitResu private Set runWithScrollings() { SearchResponse scrollResp = - ElasticUtils.scrollOneTimeWithHits( - this.client, - this.builder.getFirstSearchRequest(), + getResponseWithHits( + builder.getFirstSearchRequest(), builder.getOriginalSelect(true), - this.maxDocsToFetchOnEachScrollShard); + maxDocsToFetchOnEachScrollShard, + null, + pit); Set results = new HashSet<>(); SearchHit[] hits = scrollResp.getHits().getHits(); @@ -199,7 +227,6 @@ private Set runWithScrollings() { return new HashSet<>(); } int totalDocsFetchedFromFirstTable = 0; - // fetch from first table . fill set. while (hits != null && hits.length != 0) { totalDocsFetchedFromFirstTable += hits.length; @@ -208,19 +235,21 @@ private Set runWithScrollings() { break; } scrollResp = - client - .prepareSearchScroll(scrollResp.getScrollId()) - .setScroll(new TimeValue(600000)) - .execute() - .actionGet(); + getResponseWithHits( + builder.getFirstSearchRequest(), + builder.getOriginalSelect(true), + maxDocsToFetchOnEachScrollShard, + scrollResp, + pit); hits = scrollResp.getHits().getHits(); } scrollResp = - ElasticUtils.scrollOneTimeWithHits( - this.client, + getResponseWithHits( this.builder.getSecondSearchRequest(), builder.getOriginalSelect(false), - this.maxDocsToFetchOnEachScrollShard); + this.maxDocsToFetchOnEachScrollShard, + null, + pit); hits = scrollResp.getHits().getHits(); if (hits == null || hits.length == 0) { @@ -234,11 +263,12 @@ private Set runWithScrollings() { break; } scrollResp = - client - .prepareSearchScroll(scrollResp.getScrollId()) - .setScroll(new TimeValue(600000)) - .execute() - .actionGet(); + getResponseWithHits( + builder.getSecondSearchRequest(), + builder.getOriginalSelect(false), + maxDocsToFetchOnEachScrollShard, + scrollResp, + pit); hits = scrollResp.getHits().getHits(); } @@ -303,11 +333,12 @@ private boolean checkIfOnlyOneField(Select firstSelect, Select secondSelect) { private MinusOneFieldAndOptimizationResult runWithScrollingAndAddFilter( String firstFieldName, String secondFieldName) throws SqlParseException { SearchResponse scrollResp = - ElasticUtils.scrollOneTimeWithHits( - this.client, - this.builder.getFirstSearchRequest(), + getResponseWithHits( + builder.getFirstSearchRequest(), builder.getOriginalSelect(true), - this.maxDocsToFetchOnEachScrollShard); + maxDocsToFetchOnEachScrollShard, + null, + pit); Set results = new HashSet<>(); int currentNumOfResults = 0; SearchHit[] hits = scrollResp.getHits().getHits(); @@ -335,14 +366,16 @@ private MinusOneFieldAndOptimizationResult runWithScrollingAndAddFilter( break; } SearchResponse responseForSecondTable = - ElasticUtils.scrollOneTimeWithHits( - this.client, + getResponseWithHits( queryAction.getRequestBuilder(), secondQuerySelect, - this.maxDocsToFetchOnEachScrollShard); + this.maxDocsToFetchOnEachScrollShard, + null, + pit); SearchHits secondQuerySearchHits = responseForSecondTable.getHits(); SearchHit[] secondQueryHits = secondQuerySearchHits.getHits(); + while (secondQueryHits.length > 0) { totalDocsFetchedFromSecondTable += secondQueryHits.length; removeValuesFromSetAccordingToHits(secondFieldName, currentSetFromResults, secondQueryHits); @@ -350,11 +383,12 @@ private MinusOneFieldAndOptimizationResult runWithScrollingAndAddFilter( break; } responseForSecondTable = - client - .prepareSearchScroll(responseForSecondTable.getScrollId()) - .setScroll(new TimeValue(600000)) - .execute() - .actionGet(); + getResponseWithHits( + queryAction.getRequestBuilder(), + secondQuerySelect, + maxDocsToFetchOnEachScrollShard, + responseForSecondTable, + pit); secondQueryHits = responseForSecondTable.getHits().getHits(); } results.addAll(currentSetFromResults); @@ -363,13 +397,13 @@ private MinusOneFieldAndOptimizationResult runWithScrollingAndAddFilter( "too many results for first table, stoping at:" + totalDocsFetchedFromFirstTable); break; } - scrollResp = - client - .prepareSearchScroll(scrollResp.getScrollId()) - .setScroll(new TimeValue(600000)) - .execute() - .actionGet(); + getResponseWithHits( + builder.getFirstSearchRequest(), + builder.getOriginalSelect(true), + maxDocsToFetchOnEachScrollShard, + scrollResp, + pit); hits = scrollResp.getHits().getHits(); } return new MinusOneFieldAndOptimizationResult(results, someHit); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/multi/MinusOneFieldAndOptimizationResult.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/multi/MinusOneFieldAndOptimizationResult.java index 3d7206ab13..5843e0d931 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/multi/MinusOneFieldAndOptimizationResult.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/multi/MinusOneFieldAndOptimizationResult.java @@ -10,8 +10,8 @@ /** Created by Eliran on 26/8/2016. */ class MinusOneFieldAndOptimizationResult { - private Set fieldValues; - private SearchHit someHit; + private final Set fieldValues; + private final SearchHit someHit; MinusOneFieldAndOptimizationResult(Set fieldValues, SearchHit someHit) { this.fieldValues = fieldValues; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/executor/multi/UnionExecutor.java b/legacy/src/main/java/org/opensearch/sql/legacy/executor/multi/UnionExecutor.java index 6b8b64c4e8..024d1bb4c3 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/executor/multi/UnionExecutor.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/executor/multi/UnionExecutor.java @@ -23,11 +23,10 @@ import org.opensearch.sql.legacy.utils.Util; /** Created by Eliran on 21/8/2016. */ -public class UnionExecutor implements ElasticHitsExecutor { +public class UnionExecutor extends ElasticHitsExecutor { - private MultiQueryRequestBuilder multiQueryBuilder; + private final MultiQueryRequestBuilder multiQueryBuilder; private SearchHits results; - private Client client; private int currentId; public UnionExecutor(Client client, MultiQueryRequestBuilder builder) { diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/metrics/BasicCounter.java b/legacy/src/main/java/org/opensearch/sql/legacy/metrics/BasicCounter.java index 88d5f817e8..5c238521a0 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/metrics/BasicCounter.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/metrics/BasicCounter.java @@ -9,7 +9,7 @@ public class BasicCounter implements Counter { - private LongAdder count = new LongAdder(); + private final LongAdder count = new LongAdder(); @Override public void increment() { diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/metrics/GaugeMetric.java b/legacy/src/main/java/org/opensearch/sql/legacy/metrics/GaugeMetric.java index 2f7c269351..150862e4d1 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/metrics/GaugeMetric.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/metrics/GaugeMetric.java @@ -10,7 +10,7 @@ /** Gauge metric, an instant value like cpu usage, state and so on */ public class GaugeMetric extends Metric { - private Supplier loadValue; + private final Supplier loadValue; public GaugeMetric(String name, Supplier supplier) { super(name); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/metrics/Metric.java b/legacy/src/main/java/org/opensearch/sql/legacy/metrics/Metric.java index 956e0f558c..ea543cbd2d 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/metrics/Metric.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/metrics/Metric.java @@ -9,7 +9,7 @@ public abstract class Metric implements java.io.Serializable { private static final long serialVersionUID = 1L; - private String name; + private final String name; public Metric(String name) { this.name = name; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/metrics/MetricName.java b/legacy/src/main/java/org/opensearch/sql/legacy/metrics/MetricName.java index 72960944b6..7c2a7cb824 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/metrics/MetricName.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/metrics/MetricName.java @@ -50,7 +50,7 @@ public enum MetricName { EMR_BATCH_QUERY_JOBS_CREATION_COUNT("emr_batch_jobs_creation_count"), STREAMING_JOB_HOUSEKEEPER_TASK_FAILURE_COUNT("streaming_job_housekeeper_task_failure_count"); - private String name; + private final String name; MetricName(String name) { this.name = name; @@ -64,7 +64,7 @@ public static List getNames() { return Arrays.stream(MetricName.values()).map(v -> v.name).collect(Collectors.toList()); } - private static Set NUMERICAL_METRIC = + private static final Set NUMERICAL_METRIC = new ImmutableSet.Builder() .add(PPL_REQ_TOTAL) .add(PPL_REQ_COUNT_TOTAL) diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/metrics/Metrics.java b/legacy/src/main/java/org/opensearch/sql/legacy/metrics/Metrics.java index 858f9e5cef..a47f0e12bd 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/metrics/Metrics.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/metrics/Metrics.java @@ -12,8 +12,9 @@ public class Metrics { - private static Metrics metrics = new Metrics(); - private ConcurrentHashMap registeredMetricsByName = new ConcurrentHashMap<>(); + private static final Metrics metrics = new Metrics(); + private final ConcurrentHashMap registeredMetricsByName = + new ConcurrentHashMap<>(); public static Metrics getInstance() { return metrics; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/metrics/NumericMetric.java b/legacy/src/main/java/org/opensearch/sql/legacy/metrics/NumericMetric.java index ee6d373f8f..93bc9485bc 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/metrics/NumericMetric.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/metrics/NumericMetric.java @@ -7,7 +7,7 @@ public class NumericMetric extends Metric { - private Counter counter; + private final Counter counter; public NumericMetric(String name, Counter counter) { super(name); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/parser/CaseWhenParser.java b/legacy/src/main/java/org/opensearch/sql/legacy/parser/CaseWhenParser.java index d55ee64601..9b26bf4608 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/parser/CaseWhenParser.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/parser/CaseWhenParser.java @@ -21,9 +21,9 @@ /** Created by allwefantasy on 9/3/16. */ public class CaseWhenParser { - private SQLCaseExpr caseExpr; - private String alias; - private String tableAlias; + private final SQLCaseExpr caseExpr; + private final String alias; + private final String tableAlias; public CaseWhenParser(SQLCaseExpr caseExpr, String alias, String tableAlias) { this.alias = alias; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/parser/ElasticSqlExprParser.java b/legacy/src/main/java/org/opensearch/sql/legacy/parser/ElasticSqlExprParser.java index be9c2f9652..ed9cc2cd3d 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/parser/ElasticSqlExprParser.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/parser/ElasticSqlExprParser.java @@ -193,7 +193,7 @@ public SQLExpr primary() { return expr; } - public static String[] AGGREGATE_FUNCTIONS = { + public static final String[] AGGREGATE_FUNCTIONS = { "AVG", "COUNT", "GROUP_CONCAT", "MAX", "MIN", "STDDEV", "SUM" }; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/parser/FieldMaker.java b/legacy/src/main/java/org/opensearch/sql/legacy/parser/FieldMaker.java index da08f81453..ca0fbb8393 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/parser/FieldMaker.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/parser/FieldMaker.java @@ -43,7 +43,7 @@ * @author ansj */ public class FieldMaker { - private SQLFunctions sqlFunctions = new SQLFunctions(); + private final SQLFunctions sqlFunctions = new SQLFunctions(); public Field makeField(SQLExpr expr, String alias, String tableAlias) throws SqlParseException { Field field = makeFieldImpl(expr, alias, tableAlias); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/parser/SqlParser.java b/legacy/src/main/java/org/opensearch/sql/legacy/parser/SqlParser.java index 947533630b..c380ded176 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/parser/SqlParser.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/parser/SqlParser.java @@ -55,7 +55,7 @@ * @author ansj */ public class SqlParser { - private FieldMaker fieldMaker = new FieldMaker(); + private final FieldMaker fieldMaker = new FieldMaker(); public SqlParser() {} diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/parser/SubQueryExpression.java b/legacy/src/main/java/org/opensearch/sql/legacy/parser/SubQueryExpression.java index e9b0797d00..663de4f4b7 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/parser/SubQueryExpression.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/parser/SubQueryExpression.java @@ -11,7 +11,7 @@ public class SubQueryExpression { private Object[] values; private Select select; - private String returnField; + private final String returnField; public SubQueryExpression(Select innerSelect) { this.select = innerSelect; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/parser/WhereParser.java b/legacy/src/main/java/org/opensearch/sql/legacy/parser/WhereParser.java index a329d1ed52..4ba25f9fad 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/parser/WhereParser.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/parser/WhereParser.java @@ -44,12 +44,12 @@ /** Created by allwefantasy on 9/2/16. */ public class WhereParser { - private FieldMaker fieldMaker; + private final FieldMaker fieldMaker; private MySqlSelectQueryBlock query; private SQLDeleteStatement delete; private SQLExpr where; - private SqlParser sqlParser; + private final SqlParser sqlParser; public WhereParser(SqlParser sqlParser, MySqlSelectQueryBlock query, FieldMaker fieldMaker) { this.sqlParser = sqlParser; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/pit/PointInTimeHandler.java b/legacy/src/main/java/org/opensearch/sql/legacy/pit/PointInTimeHandler.java new file mode 100644 index 0000000000..66339cc70a --- /dev/null +++ b/legacy/src/main/java/org/opensearch/sql/legacy/pit/PointInTimeHandler.java @@ -0,0 +1,18 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.legacy.pit; + +/** Point In Time */ +public interface PointInTimeHandler { + /** Create Point In Time */ + void create(); + + /** Delete Point In Time */ + void delete(); + + /** Get Point In Time Identifier */ + String getPitId(); +} diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/pit/PointInTimeHandlerImpl.java b/legacy/src/main/java/org/opensearch/sql/legacy/pit/PointInTimeHandlerImpl.java new file mode 100644 index 0000000000..db3530e91e --- /dev/null +++ b/legacy/src/main/java/org/opensearch/sql/legacy/pit/PointInTimeHandlerImpl.java @@ -0,0 +1,84 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.legacy.pit; + +import static org.opensearch.sql.common.setting.Settings.Key.SQL_CURSOR_KEEP_ALIVE; + +import java.util.concurrent.ExecutionException; +import lombok.Getter; +import lombok.Setter; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.action.search.CreatePitAction; +import org.opensearch.action.search.CreatePitRequest; +import org.opensearch.action.search.CreatePitResponse; +import org.opensearch.action.search.DeletePitAction; +import org.opensearch.action.search.DeletePitRequest; +import org.opensearch.action.search.DeletePitResponse; +import org.opensearch.client.Client; +import org.opensearch.common.action.ActionFuture; +import org.opensearch.sql.legacy.esdomain.LocalClusterState; + +/** Handler for Point In Time */ +public class PointInTimeHandlerImpl implements PointInTimeHandler { + private final Client client; + private String[] indices; + @Getter @Setter private String pitId; + private static final Logger LOG = LogManager.getLogger(); + + /** + * Constructor for class + * + * @param client OpenSearch client + * @param indices list of indices + */ + public PointInTimeHandlerImpl(Client client, String[] indices) { + this.client = client; + this.indices = indices; + } + + /** + * Constructor for class + * + * @param client OpenSearch client + * @param pitId Point In Time ID + */ + public PointInTimeHandlerImpl(Client client, String pitId) { + this.client = client; + this.pitId = pitId; + } + + /** Create PIT for given indices */ + @Override + public void create() { + CreatePitRequest createPitRequest = + new CreatePitRequest( + LocalClusterState.state().getSettingValue(SQL_CURSOR_KEEP_ALIVE), false, indices); + ActionFuture execute = + client.execute(CreatePitAction.INSTANCE, createPitRequest); + try { + CreatePitResponse pitResponse = execute.get(); + pitId = pitResponse.getId(); + LOG.info("Created Point In Time {} successfully.", pitId); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException("Error occurred while creating PIT.", e); + } + } + + /** Delete PIT */ + @Override + public void delete() { + DeletePitRequest deletePitRequest = new DeletePitRequest(pitId); + ActionFuture execute = + client.execute(DeletePitAction.INSTANCE, deletePitRequest); + try { + DeletePitResponse deletePitResponse = execute.get(); + LOG.info("Delete Point In Time {} status: {}", pitId, deletePitResponse.status().getStatus()); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException("Error occurred while deleting PIT.", e); + } + } +} diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSQLQueryAction.java b/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSQLQueryAction.java index 309a7c9c2a..4440219f1b 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSQLQueryAction.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSQLQueryAction.java @@ -172,7 +172,7 @@ private ResponseListener createQueryResponseListener( } else if (format.equals(Format.CSV)) { formatter = new CsvResponseFormatter(request.sanitize()); } else if (format.equals(Format.RAW)) { - formatter = new RawResponseFormatter(); + formatter = new RawResponseFormatter(request.pretty()); } else { formatter = new JdbcResponseFormatter(PRETTY); } diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/AggregationQueryAction.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/AggregationQueryAction.java index 57af269001..c7a1f82df1 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/AggregationQueryAction.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/AggregationQueryAction.java @@ -41,7 +41,7 @@ public class AggregationQueryAction extends QueryAction { private final Select select; - private AggMaker aggMaker = new AggMaker(); + private final AggMaker aggMaker = new AggMaker(); private SearchRequestBuilder request; public AggregationQueryAction(Client client, Select select) { diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/DefaultQueryAction.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/DefaultQueryAction.java index 18c9708df8..0e9d09d3e7 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/DefaultQueryAction.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/DefaultQueryAction.java @@ -5,6 +5,9 @@ package org.opensearch.sql.legacy.query; +import static org.opensearch.sql.common.setting.Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER; +import static org.opensearch.sql.opensearch.storage.OpenSearchIndex.METADATA_FIELD_ID; + import com.alibaba.druid.sql.ast.SQLExpr; import com.alibaba.druid.sql.ast.expr.SQLBinaryOpExpr; import com.alibaba.druid.sql.ast.expr.SQLBinaryOperator; @@ -100,7 +103,20 @@ public void checkAndSetScroll() { .getNumericalMetric(MetricName.DEFAULT_CURSOR_REQUEST_COUNT_TOTAL) .increment(); Metrics.getInstance().getNumericalMetric(MetricName.DEFAULT_CURSOR_REQUEST_TOTAL).increment(); - request.setSize(fetchSize).setScroll(timeValue); + request.setSize(fetchSize); + // Set scroll or search after for pagination + if (LocalClusterState.state().getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER)) { + // search after requires results to be in specific order + // set sort field for search_after + boolean ordered = select.isOrderdSelect(); + if (!ordered) { + request.addSort(FieldSortBuilder.DOC_FIELD_NAME, SortOrder.ASC); + request.addSort(METADATA_FIELD_ID, SortOrder.ASC); + } + // Request also requires PointInTime, but we should create pit while execution. + } else { + request.setScroll(timeValue); + } } else { request.setSearchType(SearchType.DFS_QUERY_THEN_FETCH); setLimit(select.getOffset(), rowCount != null ? rowCount : Select.DEFAULT_LIMIT); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/OpenSearchActionFactory.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/OpenSearchActionFactory.java index b9a7c9f218..a5bfeebfbd 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/OpenSearchActionFactory.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/OpenSearchActionFactory.java @@ -8,6 +8,7 @@ import static org.opensearch.sql.legacy.domain.IndexStatement.StatementType; import static org.opensearch.sql.legacy.utils.Util.toSqlExpr; +import com.alibaba.druid.sql.ast.SQLExpr; import com.alibaba.druid.sql.ast.expr.SQLAggregateExpr; import com.alibaba.druid.sql.ast.expr.SQLAllColumnExpr; import com.alibaba.druid.sql.ast.expr.SQLMethodInvokeExpr; @@ -86,7 +87,14 @@ public static QueryAction create(Client client, QueryActionRequest request) switch (getFirstWord(sql)) { case "SELECT": - SQLQueryExpr sqlExpr = (SQLQueryExpr) toSqlExpr(sql); + SQLExpr rawExpr = toSqlExpr(sql); + if (!(rawExpr instanceof SQLQueryExpr)) { + throw new SqlParseException( + "Expected a query expression, but found a " + + rawExpr.getClass().getSimpleName() + + ". The query is not runnable."); + } + SQLQueryExpr sqlExpr = (SQLQueryExpr) rawExpr; RewriteRuleExecutor ruleExecutor = RewriteRuleExecutor.builder() diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/QueryAction.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/QueryAction.java index c9b39d2f97..4d40701964 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/QueryAction.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/QueryAction.java @@ -36,8 +36,8 @@ */ public abstract class QueryAction { - protected Query query; - protected Client client; + protected final Query query; + protected final Client client; protected SqlRequest sqlRequest = SqlRequest.NULL; protected ColumnTypeProvider scriptColumnType; protected Format format; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/SqlElasticDeleteByQueryRequestBuilder.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/SqlElasticDeleteByQueryRequestBuilder.java index 2203cbb39e..1ff35df531 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/SqlElasticDeleteByQueryRequestBuilder.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/SqlElasticDeleteByQueryRequestBuilder.java @@ -13,7 +13,7 @@ /** Created by Eliran on 19/8/2015. */ public class SqlElasticDeleteByQueryRequestBuilder implements SqlElasticRequestBuilder { - DeleteByQueryRequestBuilder deleteByQueryRequestBuilder; + final DeleteByQueryRequestBuilder deleteByQueryRequestBuilder; public SqlElasticDeleteByQueryRequestBuilder( DeleteByQueryRequestBuilder deleteByQueryRequestBuilder) { diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/SqlOpenSearchRequestBuilder.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/SqlOpenSearchRequestBuilder.java index 2beb16837b..d15debb0fd 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/SqlOpenSearchRequestBuilder.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/SqlOpenSearchRequestBuilder.java @@ -11,7 +11,7 @@ /** Created by Eliran on 19/8/2015. */ public class SqlOpenSearchRequestBuilder implements SqlElasticRequestBuilder { - ActionRequestBuilder requestBuilder; + final ActionRequestBuilder requestBuilder; public SqlOpenSearchRequestBuilder(ActionRequestBuilder requestBuilder) { this.requestBuilder = requestBuilder; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/join/BackOffRetryStrategy.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/join/BackOffRetryStrategy.java index d767268cb1..3386298802 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/join/BackOffRetryStrategy.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/join/BackOffRetryStrategy.java @@ -31,9 +31,9 @@ public class BackOffRetryStrategy { private static final int threshold = 85; - private static IdentityHashMap> memUse = new IdentityHashMap<>(); + private static final IdentityHashMap> memUse = new IdentityHashMap<>(); - private static AtomicLong mem = new AtomicLong(0L); + private static final AtomicLong mem = new AtomicLong(0L); private static long lastTimeoutCleanTime = System.currentTimeMillis(); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/join/JoinRequestBuilder.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/join/JoinRequestBuilder.java index 82ebd1b225..0a9917a624 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/join/JoinRequestBuilder.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/join/JoinRequestBuilder.java @@ -21,8 +21,8 @@ public class JoinRequestBuilder implements SqlElasticRequestBuilder { private MultiSearchRequest multi; - private TableInJoinRequestBuilder firstTable; - private TableInJoinRequestBuilder secondTable; + private final TableInJoinRequestBuilder firstTable; + private final TableInJoinRequestBuilder secondTable; private SQLJoinTableSource.JoinType joinType; private int totalLimit; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/join/OpenSearchJoinQueryAction.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/join/OpenSearchJoinQueryAction.java index 7068ddf9a2..b317ef9740 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/join/OpenSearchJoinQueryAction.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/join/OpenSearchJoinQueryAction.java @@ -22,7 +22,7 @@ /** Created by Eliran on 15/9/2015. */ public abstract class OpenSearchJoinQueryAction extends QueryAction { - protected JoinSelect joinSelect; + protected final JoinSelect joinSelect; public OpenSearchJoinQueryAction(Client client, JoinSelect joinSelect) { super(client, joinSelect); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/maker/AggMaker.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/maker/AggMaker.java index dcb703cd33..75753ce24f 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/maker/AggMaker.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/maker/AggMaker.java @@ -65,7 +65,7 @@ public class AggMaker { /** The mapping bettwen group fieldName or Alias to the KVValue. */ - private Map groupMap = new HashMap<>(); + private final Map groupMap = new HashMap<>(); private Where where; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/multi/MultiQueryAction.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/multi/MultiQueryAction.java index a9eb6113f7..549fdfc077 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/multi/MultiQueryAction.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/multi/MultiQueryAction.java @@ -19,7 +19,7 @@ /** Created by Eliran on 19/8/2016. */ public class MultiQueryAction extends QueryAction { - private MultiQuerySelect multiQuerySelect; + private final MultiQuerySelect multiQuerySelect; public MultiQueryAction(Client client, MultiQuerySelect multiSelect) { super(client, null); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/multi/MultiQueryRequestBuilder.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/multi/MultiQueryRequestBuilder.java index b4e92a8de6..a02f4037c7 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/multi/MultiQueryRequestBuilder.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/multi/MultiQueryRequestBuilder.java @@ -27,10 +27,10 @@ public class MultiQueryRequestBuilder implements SqlElasticRequestBuilder { private SearchRequestBuilder firstSearchRequest; private SearchRequestBuilder secondSearchRequest; - private Map firstTableFieldToAlias; - private Map secondTableFieldToAlias; - private MultiQuerySelect multiQuerySelect; - private SQLUnionOperator relation; + private final Map firstTableFieldToAlias; + private final Map secondTableFieldToAlias; + private final MultiQuerySelect multiQuerySelect; + private final SQLUnionOperator relation; public MultiQueryRequestBuilder(MultiQuerySelect multiQuerySelect) { this.multiQuerySelect = multiQuerySelect; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/multi/MultiQuerySelect.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/multi/MultiQuerySelect.java index 72e7232a30..6a573c999f 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/multi/MultiQuerySelect.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/multi/MultiQuerySelect.java @@ -10,9 +10,9 @@ /** Created by Eliran on 19/8/2016. */ public class MultiQuerySelect { - private SQLUnionOperator operation; - private Select firstSelect; - private Select secondSelect; + private final SQLUnionOperator operation; + private final Select firstSelect; + private final Select secondSelect; public MultiQuerySelect(SQLUnionOperator operation, Select firstSelect, Select secondSelect) { this.operation = operation; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/converter/SQLAggregationParser.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/converter/SQLAggregationParser.java index b54e260fd4..0944339939 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/converter/SQLAggregationParser.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/converter/SQLAggregationParser.java @@ -39,7 +39,7 @@ public class SQLAggregationParser { private final ColumnTypeProvider columnTypeProvider; private Context context; - @Getter private List columnNodes = new ArrayList<>(); + @Getter private final List columnNodes = new ArrayList<>(); public void parse(MySqlSelectQueryBlock queryBlock) { context = new Context(constructSQLExprAliasMapFromSelect(queryBlock)); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/core/BindingTupleQueryPlanner.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/core/BindingTupleQueryPlanner.java index a8fb7cc53c..e4cb323e71 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/core/BindingTupleQueryPlanner.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/core/BindingTupleQueryPlanner.java @@ -18,8 +18,8 @@ /** The definition of QueryPlanner which return the {@link BindingTuple} as result. */ public class BindingTupleQueryPlanner { - private PhysicalOperator physicalOperator; - @Getter private List columnNodes; + private final PhysicalOperator physicalOperator; + @Getter private final List columnNodes; public BindingTupleQueryPlanner( Client client, SQLQueryExpr sqlExpr, ColumnTypeProvider columnTypeProvider) { diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/core/ExecuteParams.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/core/ExecuteParams.java index c5ed48a514..86ab26876a 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/core/ExecuteParams.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/core/ExecuteParams.java @@ -11,7 +11,7 @@ public class ExecuteParams { /** Mapping from type to parameters */ - private EnumMap params = new EnumMap<>(ExecuteParamType.class); + private final EnumMap params = new EnumMap<>(ExecuteParamType.class); public void add(ExecuteParamType type, T param) { params.put(type, param); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/logical/node/Group.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/logical/node/Group.java index da94ae74da..79724db7d9 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/logical/node/Group.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/logical/node/Group.java @@ -14,10 +14,10 @@ public class Group implements LogicalOperator { /** Optional pushed down projection */ - private Project project; + private final Project project; /** Optional pushed down filter (selection) */ - private Filter filter; + private final Filter filter; /** Required table scan operator */ private final TableScan tableScan; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/logical/node/TableScan.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/logical/node/TableScan.java index 16af199ed7..59e6f27216 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/logical/node/TableScan.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/logical/node/TableScan.java @@ -5,11 +5,15 @@ package org.opensearch.sql.legacy.query.planner.logical.node; +import static org.opensearch.sql.common.setting.Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER; + import java.util.Map; +import org.opensearch.sql.legacy.esdomain.LocalClusterState; import org.opensearch.sql.legacy.query.join.TableInJoinRequestBuilder; import org.opensearch.sql.legacy.query.planner.core.PlanNode; import org.opensearch.sql.legacy.query.planner.logical.LogicalOperator; import org.opensearch.sql.legacy.query.planner.physical.PhysicalOperator; +import org.opensearch.sql.legacy.query.planner.physical.node.pointInTime.PointInTime; import org.opensearch.sql.legacy.query.planner.physical.node.scroll.Scroll; /** Table scan */ @@ -33,6 +37,9 @@ public PlanNode[] children() { @Override public PhysicalOperator[] toPhysical(Map> optimalOps) { + if (LocalClusterState.state().getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER)) { + return new PhysicalOperator[] {new PointInTime(request, pageSize)}; + } return new PhysicalOperator[] {new Scroll(request, pageSize)}; } diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/estimation/Estimation.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/estimation/Estimation.java index 72ffbd4652..982b400821 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/estimation/Estimation.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/estimation/Estimation.java @@ -23,7 +23,7 @@ public class Estimation implements LogicalPlanVisitor { /** Optimal physical operator for logical operator based on completed estimation */ - private Map> optimalOps = new IdentityHashMap<>(); + private final Map> optimalOps = new IdentityHashMap<>(); /** Keep tracking of the operator that exit visit() */ private PhysicalOperator root; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/Paginate.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/Paginate.java new file mode 100644 index 0000000000..5bf31bb691 --- /dev/null +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/Paginate.java @@ -0,0 +1,144 @@ +package org.opensearch.sql.legacy.query.planner.physical.node; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Objects; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.Client; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.common.Strings; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.search.SearchHit; +import org.opensearch.sql.legacy.domain.Where; +import org.opensearch.sql.legacy.exception.SqlParseException; +import org.opensearch.sql.legacy.query.join.TableInJoinRequestBuilder; +import org.opensearch.sql.legacy.query.maker.QueryMaker; +import org.opensearch.sql.legacy.query.planner.core.ExecuteParams; +import org.opensearch.sql.legacy.query.planner.core.PlanNode; +import org.opensearch.sql.legacy.query.planner.physical.Row; +import org.opensearch.sql.legacy.query.planner.physical.estimation.Cost; +import org.opensearch.sql.legacy.query.planner.resource.ResourceManager; + +public abstract class Paginate extends BatchPhysicalOperator { + + /** Request to submit to OpenSearch to scan over */ + protected final TableInJoinRequestBuilder request; + + protected final int pageSize; + + protected Client client; + + protected SearchResponse searchResponse; + + protected Integer timeout; + + protected ResourceManager resourceMgr; + + public Paginate(TableInJoinRequestBuilder request, int pageSize) { + this.request = request; + this.pageSize = pageSize; + } + + @Override + public PlanNode[] children() { + return new PlanNode[0]; + } + + @Override + public Cost estimate() { + return new Cost(); + } + + @Override + public void open(ExecuteParams params) throws Exception { + super.open(params); + client = params.get(ExecuteParams.ExecuteParamType.CLIENT); + timeout = params.get(ExecuteParams.ExecuteParamType.TIMEOUT); + resourceMgr = params.get(ExecuteParams.ExecuteParamType.RESOURCE_MANAGER); + + Object filter = params.get(ExecuteParams.ExecuteParamType.EXTRA_QUERY_FILTER); + if (filter instanceof BoolQueryBuilder) { + request + .getRequestBuilder() + .setQuery(generateNewQueryWithExtraFilter((BoolQueryBuilder) filter)); + + if (LOG.isDebugEnabled()) { + LOG.debug( + "Received extra query filter, re-build query: {}", + Strings.toString( + XContentType.JSON, request.getRequestBuilder().request().source(), true, true)); + } + } + } + + @Override + protected Collection> prefetch() { + Objects.requireNonNull(client, "Client connection is not ready"); + Objects.requireNonNull(resourceMgr, "ResourceManager is not set"); + Objects.requireNonNull(timeout, "Time out is not set"); + + if (searchResponse == null) { + loadFirstBatch(); + updateMetaResult(); + } else { + loadNextBatch(); + } + return wrapRowForCurrentBatch(); + } + + protected abstract void loadFirstBatch(); + + protected abstract void loadNextBatch(); + + /** + * Extra filter pushed down from upstream. Re-parse WHERE clause with extra filter because + * OpenSearch RequestBuilder doesn't allow QueryBuilder inside be changed after added. + */ + protected QueryBuilder generateNewQueryWithExtraFilter(BoolQueryBuilder filter) + throws SqlParseException { + Where where = request.getOriginalSelect().getWhere(); + BoolQueryBuilder newQuery; + if (where != null) { + newQuery = QueryMaker.explain(where, false); + newQuery.must(filter); + } else { + newQuery = filter; + } + return newQuery; + } + + protected void updateMetaResult() { + resourceMgr.getMetaResult().addTotalNumOfShards(searchResponse.getTotalShards()); + resourceMgr.getMetaResult().addSuccessfulShards(searchResponse.getSuccessfulShards()); + resourceMgr.getMetaResult().addFailedShards(searchResponse.getFailedShards()); + resourceMgr.getMetaResult().updateTimeOut(searchResponse.isTimedOut()); + } + + @SuppressWarnings("unchecked") + protected Collection> wrapRowForCurrentBatch() { + SearchHit[] hits = searchResponse.getHits().getHits(); + Row[] rows = new Row[hits.length]; + for (int i = 0; i < hits.length; i++) { + rows[i] = new SearchHitRow(hits[i], request.getAlias()); + } + return Arrays.asList(rows); + } + + @Override + public String toString() { + return getClass().getSimpleName() + " [ " + describeTable() + ", pageSize=" + pageSize + " ]"; + } + + protected String describeTable() { + return request.getOriginalSelect().getFrom().get(0).getIndex() + " as " + request.getAlias(); + } + + /********************************************* + * Getters for Explain + *********************************************/ + + public String getRequest() { + return Strings.toString(XContentType.JSON, request.getRequestBuilder().request().source()); + } +} diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/scroll/SearchHitRow.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/SearchHitRow.java similarity index 97% rename from legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/scroll/SearchHitRow.java rename to legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/SearchHitRow.java index d03dd5af40..3031429ba8 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/scroll/SearchHitRow.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/SearchHitRow.java @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.sql.legacy.query.planner.physical.node.scroll; +package org.opensearch.sql.legacy.query.planner.physical.node; import com.google.common.base.Strings; import java.util.HashMap; @@ -36,7 +36,7 @@ * ---------------------------------------------------------------------------------------------------------------------- * */ -class SearchHitRow implements Row { +public class SearchHitRow implements Row { /** Native OpenSearch data object for each row */ private final SearchHit hit; @@ -47,7 +47,7 @@ class SearchHitRow implements Row { /** Table alias owned the row. Empty if this row comes from combination of two other rows */ private final String tableAlias; - SearchHitRow(SearchHit hit, String tableAlias) { + public SearchHitRow(SearchHit hit, String tableAlias) { this.hit = hit; this.source = hit.getSourceAsMap(); this.tableAlias = tableAlias; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/join/CombinedRow.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/join/CombinedRow.java index b1fb43441e..1682c14042 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/join/CombinedRow.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/join/CombinedRow.java @@ -18,8 +18,8 @@ */ public class CombinedRow { - private Row rightRow; - private Collection> leftRows; + private final Row rightRow; + private final Collection> leftRows; public CombinedRow(Row rightRow, Collection> leftRows) { this.rightRow = rightRow; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/join/JoinAlgorithm.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/join/JoinAlgorithm.java index 9f2c9e4174..0c0d50258d 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/join/JoinAlgorithm.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/join/JoinAlgorithm.java @@ -51,7 +51,7 @@ public abstract class JoinAlgorithm extends BatchPhysicalOperator { private final Set> leftMismatch; /** Hash table for right table probing */ - protected HashTable hashTable; + protected final HashTable hashTable; /** Execute params to reset right side for each left block */ protected ExecuteParams params; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/join/ListHashTable.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/join/ListHashTable.java index baf0af8c86..37486045b7 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/join/ListHashTable.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/join/ListHashTable.java @@ -15,7 +15,7 @@ /** List implementation to avoid normal hash table degrading into linked list. */ public class ListHashTable implements HashTable { - private List> rows = new ArrayList<>(); + private final List> rows = new ArrayList<>(); @Override public void add(Row row) { diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/pointInTime/PointInTime.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/pointInTime/PointInTime.java new file mode 100644 index 0000000000..a879a21ee8 --- /dev/null +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/pointInTime/PointInTime.java @@ -0,0 +1,76 @@ +package org.opensearch.sql.legacy.query.planner.physical.node.pointInTime; + +import static org.opensearch.sql.opensearch.storage.OpenSearchIndex.METADATA_FIELD_ID; + +import org.opensearch.common.unit.TimeValue; +import org.opensearch.search.builder.PointInTimeBuilder; +import org.opensearch.search.sort.FieldSortBuilder; +import org.opensearch.search.sort.SortOrder; +import org.opensearch.sql.legacy.pit.PointInTimeHandlerImpl; +import org.opensearch.sql.legacy.query.join.TableInJoinRequestBuilder; +import org.opensearch.sql.legacy.query.planner.physical.node.Paginate; + +/** OpenSearch Search API with Point in time as physical implementation of TableScan */ +public class PointInTime extends Paginate { + + private String pitId; + private PointInTimeHandlerImpl pit; + + public PointInTime(TableInJoinRequestBuilder request, int pageSize) { + super(request, pageSize); + } + + @Override + public void close() { + if (searchResponse != null) { + LOG.debug("Closing Point In Time (PIT) context"); + // Delete the Point In Time context + pit.delete(); + searchResponse = null; + } else { + LOG.debug("PIT context is already closed or was never opened"); + } + } + + @Override + protected void loadFirstBatch() { + // Create PIT and set to request object + pit = new PointInTimeHandlerImpl(client, request.getOriginalSelect().getIndexArr()); + pit.create(); + pitId = pit.getPitId(); + + LOG.info("Loading first batch of response using Point In Time"); + searchResponse = + request + .getRequestBuilder() + .addSort(FieldSortBuilder.DOC_FIELD_NAME, SortOrder.ASC) + .addSort(METADATA_FIELD_ID, SortOrder.ASC) + .setSize(pageSize) + .setTimeout(TimeValue.timeValueSeconds(timeout)) + .setPointInTime(new PointInTimeBuilder(pitId)) + .get(); + } + + @Override + protected void loadNextBatch() { + // Add PIT with search after to fetch next batch of data + if (searchResponse.getHits().getHits() != null + && searchResponse.getHits().getHits().length > 0) { + Object[] sortValues = + searchResponse + .getHits() + .getHits()[searchResponse.getHits().getHits().length - 1] + .getSortValues(); + + LOG.info("Loading next batch of response using Point In Time. - " + pitId); + searchResponse = + request + .getRequestBuilder() + .setSize(pageSize) + .setTimeout(TimeValue.timeValueSeconds(timeout)) + .setPointInTime(new PointInTimeBuilder(pitId)) + .searchAfter(sortValues) + .get(); + } + } +} diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/scroll/Scroll.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/scroll/Scroll.java index 40e9860886..9a8deba46a 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/scroll/Scroll.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/physical/node/scroll/Scroll.java @@ -5,185 +5,55 @@ package org.opensearch.sql.legacy.query.planner.physical.node.scroll; -import java.util.Arrays; -import java.util.Collection; -import java.util.Objects; +import static org.opensearch.sql.opensearch.storage.OpenSearchIndex.METADATA_FIELD_ID; + import org.opensearch.action.search.ClearScrollResponse; -import org.opensearch.action.search.SearchResponse; -import org.opensearch.client.Client; import org.opensearch.common.unit.TimeValue; -import org.opensearch.common.xcontent.XContentType; -import org.opensearch.core.common.Strings; -import org.opensearch.index.query.BoolQueryBuilder; -import org.opensearch.index.query.QueryBuilder; -import org.opensearch.search.SearchHit; import org.opensearch.search.sort.FieldSortBuilder; import org.opensearch.search.sort.SortOrder; -import org.opensearch.sql.legacy.domain.Where; -import org.opensearch.sql.legacy.exception.SqlParseException; import org.opensearch.sql.legacy.query.join.TableInJoinRequestBuilder; -import org.opensearch.sql.legacy.query.maker.QueryMaker; -import org.opensearch.sql.legacy.query.planner.core.ExecuteParams; -import org.opensearch.sql.legacy.query.planner.core.PlanNode; -import org.opensearch.sql.legacy.query.planner.physical.Row; -import org.opensearch.sql.legacy.query.planner.physical.estimation.Cost; -import org.opensearch.sql.legacy.query.planner.physical.node.BatchPhysicalOperator; -import org.opensearch.sql.legacy.query.planner.resource.ResourceManager; +import org.opensearch.sql.legacy.query.planner.physical.node.Paginate; /** OpenSearch Scroll API as physical implementation of TableScan */ -public class Scroll extends BatchPhysicalOperator { - - /** Request to submit to OpenSearch to scroll over */ - private final TableInJoinRequestBuilder request; - - /** Page size to scroll over index */ - private final int pageSize; - - /** Client connection to ElasticSearch */ - private Client client; - - /** Currently undergoing Scroll */ - private SearchResponse scrollResponse; - - /** Time out */ - private Integer timeout; - - /** Resource monitor manager */ - private ResourceManager resourceMgr; +public class Scroll extends Paginate { public Scroll(TableInJoinRequestBuilder request, int pageSize) { - this.request = request; - this.pageSize = pageSize; - } - - @Override - public PlanNode[] children() { - return new PlanNode[0]; - } - - @Override - public Cost estimate() { - return new Cost(); - } - - @Override - public void open(ExecuteParams params) throws Exception { - super.open(params); - client = params.get(ExecuteParams.ExecuteParamType.CLIENT); - timeout = params.get(ExecuteParams.ExecuteParamType.TIMEOUT); - resourceMgr = params.get(ExecuteParams.ExecuteParamType.RESOURCE_MANAGER); - - Object filter = params.get(ExecuteParams.ExecuteParamType.EXTRA_QUERY_FILTER); - if (filter instanceof BoolQueryBuilder) { - request - .getRequestBuilder() - .setQuery(generateNewQueryWithExtraFilter((BoolQueryBuilder) filter)); - - if (LOG.isDebugEnabled()) { - LOG.debug( - "Received extra query filter, re-build query: {}", - Strings.toString( - XContentType.JSON, request.getRequestBuilder().request().source(), true, true)); - } - } + super(request, pageSize); } @Override public void close() { - if (scrollResponse != null) { + if (searchResponse != null) { LOG.debug("Closing all scroll resources"); ClearScrollResponse clearScrollResponse = - client.prepareClearScroll().addScrollId(scrollResponse.getScrollId()).get(); + client.prepareClearScroll().addScrollId(searchResponse.getScrollId()).get(); if (!clearScrollResponse.isSucceeded()) { LOG.warn("Failed to close scroll: {}", clearScrollResponse.status()); } - scrollResponse = null; + searchResponse = null; } else { LOG.debug("Scroll already be closed"); } } @Override - protected Collection> prefetch() { - Objects.requireNonNull(client, "Client connection is not ready"); - Objects.requireNonNull(resourceMgr, "ResourceManager is not set"); - Objects.requireNonNull(timeout, "Time out is not set"); - - if (scrollResponse == null) { - loadFirstBatch(); - updateMetaResult(); - } else { - loadNextBatchByScrollId(); - } - return wrapRowForCurrentBatch(); - } - - /** - * Extra filter pushed down from upstream. Re-parse WHERE clause with extra filter because - * OpenSearch RequestBuilder doesn't allow QueryBuilder inside be changed after added. - */ - private QueryBuilder generateNewQueryWithExtraFilter(BoolQueryBuilder filter) - throws SqlParseException { - Where where = request.getOriginalSelect().getWhere(); - BoolQueryBuilder newQuery; - if (where != null) { - newQuery = QueryMaker.explain(where, false); - newQuery.must(filter); - } else { - newQuery = filter; - } - return newQuery; - } - - private void loadFirstBatch() { - scrollResponse = + protected void loadFirstBatch() { + searchResponse = request .getRequestBuilder() .addSort(FieldSortBuilder.DOC_FIELD_NAME, SortOrder.ASC) + .addSort(METADATA_FIELD_ID, SortOrder.ASC) .setSize(pageSize) .setScroll(TimeValue.timeValueSeconds(timeout)) .get(); } - private void updateMetaResult() { - resourceMgr.getMetaResult().addTotalNumOfShards(scrollResponse.getTotalShards()); - resourceMgr.getMetaResult().addSuccessfulShards(scrollResponse.getSuccessfulShards()); - resourceMgr.getMetaResult().addFailedShards(scrollResponse.getFailedShards()); - resourceMgr.getMetaResult().updateTimeOut(scrollResponse.isTimedOut()); - } - - private void loadNextBatchByScrollId() { - scrollResponse = + @Override + protected void loadNextBatch() { + searchResponse = client - .prepareSearchScroll(scrollResponse.getScrollId()) + .prepareSearchScroll(searchResponse.getScrollId()) .setScroll(TimeValue.timeValueSeconds(timeout)) .get(); } - - @SuppressWarnings("unchecked") - private Collection> wrapRowForCurrentBatch() { - SearchHit[] hits = scrollResponse.getHits().getHits(); - Row[] rows = new Row[hits.length]; - for (int i = 0; i < hits.length; i++) { - rows[i] = new SearchHitRow(hits[i], request.getAlias()); - } - return Arrays.asList(rows); - } - - @Override - public String toString() { - return "Scroll [ " + describeTable() + ", pageSize=" + pageSize + " ]"; - } - - private String describeTable() { - return request.getOriginalSelect().getFrom().get(0).getIndex() + " as " + request.getAlias(); - } - - /********************************************* - * Getters for Explain - *********************************************/ - - public String getRequest() { - return Strings.toString(XContentType.JSON, request.getRequestBuilder().request().source()); - } } diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/resource/Stats.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/resource/Stats.java index 3ff4662ce4..ac830d2595 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/resource/Stats.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/resource/Stats.java @@ -19,7 +19,7 @@ public class Stats { /** Client connection to OpenSearch cluster (unused now) */ - private Client client; + private final Client client; public Stats(Client client) { this.client = client; @@ -31,8 +31,8 @@ public MemStats collectMemStats() { /** Statistics data class for memory usage */ public static class MemStats { - private long free; - private long total; + private final long free; + private final long total; public MemStats(long free, long total) { this.free = free; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/resource/blocksize/AdaptiveBlockSize.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/resource/blocksize/AdaptiveBlockSize.java index 339e326cc3..a5e7841d69 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/resource/blocksize/AdaptiveBlockSize.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/resource/blocksize/AdaptiveBlockSize.java @@ -8,7 +8,7 @@ /** Adaptive block size calculator based on resource usage dynamically. */ public class AdaptiveBlockSize implements BlockSize { - private int upperLimit; + private final int upperLimit; public AdaptiveBlockSize(int upperLimit) { this.upperLimit = upperLimit; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/resource/blocksize/BlockSize.java b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/resource/blocksize/BlockSize.java index 6e5a2703f4..154cc6122f 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/resource/blocksize/BlockSize.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/query/planner/resource/blocksize/BlockSize.java @@ -18,7 +18,7 @@ public interface BlockSize { /** Default implementation with fixed block size */ class FixedBlockSize implements BlockSize { - private int blockSize; + private final int blockSize; public FixedBlockSize(int blockSize) { this.blockSize = blockSize; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/request/PreparedStatementRequest.java b/legacy/src/main/java/org/opensearch/sql/legacy/request/PreparedStatementRequest.java index c32e529157..c9ce288784 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/request/PreparedStatementRequest.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/request/PreparedStatementRequest.java @@ -10,8 +10,8 @@ public class PreparedStatementRequest extends SqlRequest { - private List parameters; - private String sqlTemplate; + private final List parameters; + private final String sqlTemplate; public PreparedStatementRequest( String sql, JSONObject payloadJson, List parameters) { @@ -117,7 +117,7 @@ public enum ParameterType { } public static class PreparedStatementParameter { - protected T value; + protected final T value; public PreparedStatementParameter(T value) { this.value = value; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/join/JoinRewriteRule.java b/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/join/JoinRewriteRule.java index 884784ed42..0ce61966dc 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/join/JoinRewriteRule.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/join/JoinRewriteRule.java @@ -205,10 +205,10 @@ public String getAlias() { } /** Table Name. */ - private String name; + private final String name; /** Table Alias. */ - private String alias; + private final String alias; Table(String name, String alias) { this.name = name; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/matchtoterm/TermFieldRewriter.java b/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/matchtoterm/TermFieldRewriter.java index f9744ab841..d7a1000179 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/matchtoterm/TermFieldRewriter.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/matchtoterm/TermFieldRewriter.java @@ -28,6 +28,7 @@ import java.util.Map; import java.util.Optional; import java.util.stream.Stream; +import org.opensearch.index.IndexNotFoundException; import org.opensearch.sql.legacy.esdomain.LocalClusterState; import org.opensearch.sql.legacy.esdomain.mapping.FieldMappings; import org.opensearch.sql.legacy.esdomain.mapping.IndexMappings; @@ -39,8 +40,8 @@ */ public class TermFieldRewriter extends MySqlASTVisitorAdapter { - private Deque environment = new ArrayDeque<>(); - private TermRewriterFilter filterType; + private final Deque environment = new ArrayDeque<>(); + private final TermRewriterFilter filterType; public TermFieldRewriter() { this.filterType = TermRewriterFilter.COMMA; @@ -122,7 +123,23 @@ public boolean visit(SQLIdentifierExpr expr) { String fullFieldName = arr[1]; String index = curScope().getAliases().get(alias); + if (index == null) { + throw new IndexNotFoundException( + String.format( + "The requested table '%s' does not correspond to any known index. Only indices or" + + " table aliases are allowed.", + alias.replaceFirst("_\\d+$", ""))); + } + FieldMappings fieldMappings = curScope().getMapper().mapping(index); + if (fieldMappings == null) { + throw new IndexNotFoundException( + String.format( + "The index '%s' could not be found. Note that wildcard indices are not permitted" + + " in SQL.", + index)); + } + if (fieldMappings.has(fullFieldName)) { source = fieldMappings.mapping(fullFieldName); } else { diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/nestedfield/NestedFieldRewriter.java b/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/nestedfield/NestedFieldRewriter.java index 46afbb8ca1..59f415f306 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/nestedfield/NestedFieldRewriter.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/nestedfield/NestedFieldRewriter.java @@ -57,7 +57,7 @@ public class NestedFieldRewriter extends MySqlASTVisitorAdapter { * Scope stack to record the state (nested field names etc) for current query. In the case of * subquery, the active scope of current query is the top element of the stack. */ - private Deque environment = new ArrayDeque<>(); + private final Deque environment = new ArrayDeque<>(); /** * Rewrite FROM here to make sure FROM statement always be visited before other statement in diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/nestedfield/Scope.java b/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/nestedfield/Scope.java index f65d7f166b..e9c2729716 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/nestedfield/Scope.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/nestedfield/Scope.java @@ -26,13 +26,13 @@ class Scope { * Mapping from nested field path alias to path full name in FROM. eg. e in {e => employees} in * "FROM t.employees e" */ - private Map aliasFullPaths = new HashMap<>(); + private final Map aliasFullPaths = new HashMap<>(); /** * Mapping from binary operation condition (in WHERE) to nested field tag (full path for nested, * EMPTY for non-nested field) */ - private Map conditionTags = new IdentityHashMap<>(); + private final Map conditionTags = new IdentityHashMap<>(); String getParentAlias() { return parentAlias; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/ordinal/OrdinalRewriterRule.java b/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/ordinal/OrdinalRewriterRule.java index ed853823ce..7a1ac49b0e 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/ordinal/OrdinalRewriterRule.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/ordinal/OrdinalRewriterRule.java @@ -68,13 +68,13 @@ private void changeOrdinalAliasInGroupAndOrderBy( root.accept( new MySqlASTVisitorAdapter() { - private String groupException = "Invalid ordinal [%s] specified in [GROUP BY %s]"; - private String orderException = "Invalid ordinal [%s] specified in [ORDER BY %s]"; + private final String groupException = "Invalid ordinal [%s] specified in [GROUP BY %s]"; + private final String orderException = "Invalid ordinal [%s] specified in [ORDER BY %s]"; - private List groupSelectList = + private final List groupSelectList = ((MySqlSelectQueryBlock) exprGroup.getSubQuery().getQuery()).getSelectList(); - private List orderSelectList = + private final List orderSelectList = ((MySqlSelectQueryBlock) exprOrder.getSubQuery().getQuery()).getSelectList(); @Override diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/subquery/SubQueryRewriteRule.java b/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/subquery/SubQueryRewriteRule.java index 5177b2d6d3..0fa55fe59d 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/subquery/SubQueryRewriteRule.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/subquery/SubQueryRewriteRule.java @@ -13,7 +13,7 @@ /** Subquery Rewriter Rule. */ public class SubQueryRewriteRule implements RewriteRule { - private FindSubQuery findAllSubQuery = new FindSubQuery(); + private final FindSubQuery findAllSubQuery = new FindSubQuery(); @Override public boolean match(SQLQueryExpr expr) throws SQLFeatureNotSupportedException { diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/subquery/rewriter/SubqueryAliasRewriter.java b/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/subquery/rewriter/SubqueryAliasRewriter.java index 7176bd030c..955cf258b1 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/subquery/rewriter/SubqueryAliasRewriter.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/rewriter/subquery/rewriter/SubqueryAliasRewriter.java @@ -152,10 +152,10 @@ public String getAlias() { } /** Table Name. */ - private String name; + private final String name; /** Table Alias. */ - private String alias; + private final String alias; Table(String name, String alias) { this.name = name; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/spatial/BoundingBoxFilterParams.java b/legacy/src/main/java/org/opensearch/sql/legacy/spatial/BoundingBoxFilterParams.java index fb62f60ae7..3fe292e94d 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/spatial/BoundingBoxFilterParams.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/spatial/BoundingBoxFilterParams.java @@ -7,8 +7,8 @@ /** Created by Eliran on 1/8/2015. */ public class BoundingBoxFilterParams { - private Point topLeft; - private Point bottomRight; + private final Point topLeft; + private final Point bottomRight; public BoundingBoxFilterParams(Point topLeft, Point bottomRight) { this.topLeft = topLeft; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/spatial/CellFilterParams.java b/legacy/src/main/java/org/opensearch/sql/legacy/spatial/CellFilterParams.java index 6c50c17467..8c169814cd 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/spatial/CellFilterParams.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/spatial/CellFilterParams.java @@ -7,9 +7,9 @@ /** Created by Eliran on 15/8/2015. */ public class CellFilterParams { - private Point geohashPoint; - private int precision; - private boolean neighbors; + private final Point geohashPoint; + private final int precision; + private final boolean neighbors; public CellFilterParams(Point geohashPoint, int precision, boolean neighbors) { this.geohashPoint = geohashPoint; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/spatial/DistanceFilterParams.java b/legacy/src/main/java/org/opensearch/sql/legacy/spatial/DistanceFilterParams.java index 8c419de58d..32027dec42 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/spatial/DistanceFilterParams.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/spatial/DistanceFilterParams.java @@ -7,8 +7,8 @@ /** Created by Eliran on 1/8/2015. */ public class DistanceFilterParams { - private String distance; - private Point from; + private final String distance; + private final Point from; public DistanceFilterParams(String distance, Point from) { this.distance = distance; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/spatial/Point.java b/legacy/src/main/java/org/opensearch/sql/legacy/spatial/Point.java index f3f8639a1c..a5ed003c3d 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/spatial/Point.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/spatial/Point.java @@ -7,8 +7,8 @@ /** Created by Eliran on 1/8/2015. */ public class Point { - private double lon; - private double lat; + private final double lon; + private final double lat; public Point(double lon, double lat) { this.lon = lon; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/spatial/PolygonFilterParams.java b/legacy/src/main/java/org/opensearch/sql/legacy/spatial/PolygonFilterParams.java index 1aeddb24a4..edf2deced4 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/spatial/PolygonFilterParams.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/spatial/PolygonFilterParams.java @@ -9,7 +9,7 @@ /** Created by Eliran on 15/8/2015. */ public class PolygonFilterParams { - private List polygon; + private final List polygon; public PolygonFilterParams(List polygon) { this.polygon = polygon; diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/spatial/RangeDistanceFilterParams.java b/legacy/src/main/java/org/opensearch/sql/legacy/spatial/RangeDistanceFilterParams.java index 0bdb01c3ce..6dd25ad2cf 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/spatial/RangeDistanceFilterParams.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/spatial/RangeDistanceFilterParams.java @@ -7,7 +7,7 @@ /** Created by Eliran on 15/8/2015. */ public class RangeDistanceFilterParams extends DistanceFilterParams { - private String distanceTo; + private final String distanceTo; public RangeDistanceFilterParams(String distanceFrom, String distanceTo, Point from) { super(distanceFrom, from); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/spatial/SpatialParamsFactory.java b/legacy/src/main/java/org/opensearch/sql/legacy/spatial/SpatialParamsFactory.java index 5e1102994e..d9de1e1a00 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/spatial/SpatialParamsFactory.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/spatial/SpatialParamsFactory.java @@ -13,7 +13,7 @@ /** Created by Eliran on 1/8/2015. */ public class SpatialParamsFactory { - public static Set allowedMethods; + public static final Set allowedMethods; static { allowedMethods = new HashSet<>(); diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/utils/SQLFunctions.java b/legacy/src/main/java/org/opensearch/sql/legacy/utils/SQLFunctions.java index d46a80f6d3..a6a9199533 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/utils/SQLFunctions.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/utils/SQLFunctions.java @@ -112,7 +112,7 @@ public class SQLFunctions { .flatMap(Set::stream) .collect(Collectors.toSet()); - private Map generatedIds = new HashMap<>(); + private final Map generatedIds = new HashMap<>(); /** * Generates next id for given method name. The id's are increasing for each method name, so diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/antlr/SyntaxAnalysisTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/antlr/SyntaxAnalysisTest.java index 765bb0616e..4275b6152e 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/antlr/SyntaxAnalysisTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/antlr/SyntaxAnalysisTest.java @@ -22,9 +22,9 @@ public class SyntaxAnalysisTest { /** public accessor is required by @Rule annotation */ - @Rule public ExpectedException exception = ExpectedException.none(); + @Rule public final ExpectedException exception = ExpectedException.none(); - private OpenSearchLegacySqlAnalyzer analyzer = + private final OpenSearchLegacySqlAnalyzer analyzer = new OpenSearchLegacySqlAnalyzer(new SqlAnalysisConfig(true, true, 1000)); /** In reality exception occurs before reaching new parser for now */ diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/SemanticAnalyzerConfigTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/SemanticAnalyzerConfigTest.java index 2b9a5e418c..79d42a05b7 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/SemanticAnalyzerConfigTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/SemanticAnalyzerConfigTest.java @@ -19,7 +19,7 @@ /** Test cases for semantic analysis configuration */ public class SemanticAnalyzerConfigTest extends SemanticAnalyzerTestBase { - @Rule public ExpectedException exceptionWithoutSuggestion = ExpectedException.none(); + @Rule public final ExpectedException exceptionWithoutSuggestion = ExpectedException.none(); @Test public void noAnalysisShouldPerformForNonSelectStatement() { diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/SemanticAnalyzerTestBase.java b/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/SemanticAnalyzerTestBase.java index 403c2f49b7..390760af32 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/SemanticAnalyzerTestBase.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/SemanticAnalyzerTestBase.java @@ -33,9 +33,9 @@ public abstract class SemanticAnalyzerTestBase { private static final String TEST_MAPPING_FILE = "mappings/semantics.json"; /** public accessor is required by @Rule annotation */ - @Rule public ExpectedException exception = ExpectedException.none(); + @Rule public final ExpectedException exception = ExpectedException.none(); - private OpenSearchLegacySqlAnalyzer analyzer = + private final OpenSearchLegacySqlAnalyzer analyzer = new OpenSearchLegacySqlAnalyzer(new SqlAnalysisConfig(true, true, 1000)); @SuppressWarnings("UnstableApiUsage") diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/scope/TypeSupplierTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/scope/TypeSupplierTest.java index e6090117c1..57d8b3c7bb 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/scope/TypeSupplierTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/scope/TypeSupplierTest.java @@ -14,7 +14,7 @@ import org.opensearch.sql.legacy.antlr.semantic.types.base.OpenSearchDataType; public class TypeSupplierTest { - @Rule public ExpectedException exception = ExpectedException.none(); + @Rule public final ExpectedException exception = ExpectedException.none(); @Test public void haveOneTypeShouldPass() { diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/antlr/visitor/AntlrSqlParseTreeVisitorTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/antlr/visitor/AntlrSqlParseTreeVisitorTest.java index be4b5a5197..2baaa91980 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/antlr/visitor/AntlrSqlParseTreeVisitorTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/antlr/visitor/AntlrSqlParseTreeVisitorTest.java @@ -27,7 +27,7 @@ /** Test cases for AntlrSqlParseTreeVisitor */ public class AntlrSqlParseTreeVisitorTest { - private TypeChecker analyzer = + private final TypeChecker analyzer = new TypeChecker(new SemanticContext()) { @Override public Type visitIndexName(String indexName) { @@ -47,7 +47,7 @@ public Type visitFieldName(String fieldName) { } }; - @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Rule public final ExpectedException exceptionRule = ExpectedException.none(); @Test public void selectNumberShouldReturnNumberAsQueryVisitingResult() { diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/executor/AsyncRestExecutorTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/executor/AsyncRestExecutorTest.java index 9be2517c4a..eea1c9a87a 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/executor/AsyncRestExecutorTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/executor/AsyncRestExecutorTest.java @@ -43,7 +43,7 @@ public class AsyncRestExecutorTest { @Mock private Client client; - private Map params = emptyMap(); + private final Map params = emptyMap(); @Mock private QueryAction action; diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/executor/format/DateFieldFormatterTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/executor/format/DateFieldFormatterTest.java index 1c2d1bae62..7d43ea0383 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/executor/format/DateFieldFormatterTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/executor/format/DateFieldFormatterTest.java @@ -575,7 +575,7 @@ public void testIncorrectFormat() { String dateFormat = "date_optional_time"; String originalDateValue = "1581724085"; // Invalid format for date value; should return original value - String expectedDateValue = "1581724085"; + String expectedDateValue = "2020-02-14 23:48:05.000"; verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); } @@ -609,6 +609,24 @@ public void testStrictDateOptionalTimeOrEpochMillsShouldPass() { verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); } + @Test + public void testDateInTimestampFormInSecondWithoutHint() { + String columnName = "date_field"; + String dateFormat = "date_optional_time"; + String originalDateValue = "1732057981"; + String expectedDateValue = "2024-11-19 23:13:01.000"; + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + + @Test + public void testDateInTimestampFormInMilliSecondWithoutHint() { + String columnName = "date_field"; + String dateFormat = "date_optional_time"; + String originalDateValue = "1732057981000"; + String expectedDateValue = "2024-11-19 23:13:01.000"; + verifyFormatting(columnName, dateFormat, originalDateValue, expectedDateValue); + } + private void verifyFormatting( String columnName, String dateFormatProperty, diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutorTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutorTest.java new file mode 100644 index 0000000000..1387412d37 --- /dev/null +++ b/legacy/src/test/java/org/opensearch/sql/legacy/executor/format/PrettyFormatRestExecutorTest.java @@ -0,0 +1,89 @@ +package org.opensearch.sql.legacy.executor.format; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.opensearch.sql.common.setting.Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER; + +import org.apache.lucene.search.TotalHits; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.search.SearchHit; +import org.opensearch.search.SearchHits; +import org.opensearch.sql.legacy.esdomain.LocalClusterState; +import org.opensearch.sql.legacy.query.DefaultQueryAction; +import org.opensearch.sql.legacy.request.SqlRequest; +import org.opensearch.sql.opensearch.setting.OpenSearchSettings; + +@RunWith(MockitoJUnitRunner.class) +public class PrettyFormatRestExecutorTest { + + @Mock private SearchResponse searchResponse; + @Mock private SearchHits searchHits; + @Mock private SearchHit searchHit; + @Mock private DefaultQueryAction queryAction; + @Mock private SqlRequest sqlRequest; + private PrettyFormatRestExecutor executor; + + @Before + public void setUp() { + OpenSearchSettings settings = mock(OpenSearchSettings.class); + LocalClusterState.state().setPluginSettings(settings); + when(LocalClusterState.state().getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER)) + .thenReturn(true); + when(queryAction.getSqlRequest()).thenReturn(sqlRequest); + executor = new PrettyFormatRestExecutor("jdbc"); + } + + @Test + public void testIsDefaultCursor_fetchSizeZero() { + when(sqlRequest.fetchSize()).thenReturn(0); + + assertFalse(executor.isDefaultCursor(searchResponse, queryAction)); + } + + @Test + public void testIsDefaultCursor_totalHitsLessThanFetchSize() { + when(sqlRequest.fetchSize()).thenReturn(10); + when(searchResponse.getHits()) + .thenReturn( + new SearchHits( + new SearchHit[] {searchHit}, new TotalHits(5, TotalHits.Relation.EQUAL_TO), 1.0F)); + + assertFalse(executor.isDefaultCursor(searchResponse, queryAction)); + } + + @Test + public void testIsDefaultCursor_totalHitsGreaterThanOrEqualToFetchSize() { + when(sqlRequest.fetchSize()).thenReturn(5); + when(searchResponse.getHits()) + .thenReturn( + new SearchHits( + new SearchHit[] {searchHit}, new TotalHits(5, TotalHits.Relation.EQUAL_TO), 1.0F)); + + assertTrue(executor.isDefaultCursor(searchResponse, queryAction)); + } + + @Test + public void testIsDefaultCursor_PaginationApiDisabled() { + when(LocalClusterState.state().getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER)) + .thenReturn(false); + when(searchResponse.getScrollId()).thenReturn("someScrollId"); + + assertTrue(executor.isDefaultCursor(searchResponse, queryAction)); + } + + @Test + public void testIsDefaultCursor_PaginationApiDisabled_NoScrollId() { + when(LocalClusterState.state().getSettingValue(SQL_PAGINATION_API_SEARCH_AFTER)) + .thenReturn(false); + when(searchResponse.getScrollId()).thenReturn(null); + + assertFalse(executor.isDefaultCursor(searchResponse, queryAction)); + } +} diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/pit/PointInTimeHandlerImplTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/pit/PointInTimeHandlerImplTest.java new file mode 100644 index 0000000000..d9fcf4e87e --- /dev/null +++ b/legacy/src/test/java/org/opensearch/sql/legacy/pit/PointInTimeHandlerImplTest.java @@ -0,0 +1,133 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ +package org.opensearch.sql.legacy.pit; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.fail; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.util.Collections; +import java.util.concurrent.ExecutionException; +import lombok.SneakyThrows; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.opensearch.action.search.CreatePitAction; +import org.opensearch.action.search.CreatePitRequest; +import org.opensearch.action.search.CreatePitResponse; +import org.opensearch.action.search.DeletePitAction; +import org.opensearch.action.search.DeletePitRequest; +import org.opensearch.action.search.DeletePitResponse; +import org.opensearch.client.Client; +import org.opensearch.common.action.ActionFuture; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.legacy.esdomain.LocalClusterState; +import org.opensearch.sql.opensearch.setting.OpenSearchSettings; + +public class PointInTimeHandlerImplTest { + + @Mock private Client mockClient; + private final String[] indices = {"index1", "index2"}; + private PointInTimeHandlerImpl pointInTimeHandlerImpl; + private final String PIT_ID = "testId"; + private CreatePitResponse mockCreatePitResponse; + private DeletePitResponse mockDeletePitResponse; + private ActionFuture mockActionFuture; + private ActionFuture mockActionFutureDelete; + + @Mock private OpenSearchSettings settings; + + @Before + public void setUp() { + MockitoAnnotations.initMocks(this); + pointInTimeHandlerImpl = new PointInTimeHandlerImpl(mockClient, indices); + + doReturn(Collections.emptyList()).when(settings).getSettings(); + when(settings.getSettingValue(Settings.Key.SQL_CURSOR_KEEP_ALIVE)) + .thenReturn(new TimeValue(10000)); + LocalClusterState.state().setPluginSettings(settings); + + mockCreatePitResponse = mock(CreatePitResponse.class); + mockDeletePitResponse = mock(DeletePitResponse.class); + mockActionFuture = mock(ActionFuture.class); + mockActionFutureDelete = mock(ActionFuture.class); + when(mockClient.execute(any(CreatePitAction.class), any(CreatePitRequest.class))) + .thenReturn(mockActionFuture); + when(mockClient.execute(any(DeletePitAction.class), any(DeletePitRequest.class))) + .thenReturn(mockActionFutureDelete); + RestStatus mockRestStatus = mock(RestStatus.class); + when(mockDeletePitResponse.status()).thenReturn(mockRestStatus); + when(mockDeletePitResponse.status().getStatus()).thenReturn(200); + when(mockCreatePitResponse.getId()).thenReturn(PIT_ID); + } + + @SneakyThrows + @Test + public void testCreate() { + when(mockActionFuture.get()).thenReturn(mockCreatePitResponse); + try { + pointInTimeHandlerImpl.create(); + } catch (RuntimeException e) { + fail("Expected no exception while creating PIT, but got: " + e.getMessage()); + } + verify(mockClient).execute(any(CreatePitAction.class), any(CreatePitRequest.class)); + verify(mockActionFuture).get(); + verify(mockCreatePitResponse).getId(); + } + + @SneakyThrows + @Test + public void testCreateForFailure() { + ExecutionException executionException = + new ExecutionException("Error occurred while creating PIT.", new Throwable()); + when(mockActionFuture.get()).thenThrow(executionException); + + RuntimeException thrownException = + assertThrows(RuntimeException.class, () -> pointInTimeHandlerImpl.create()); + + verify(mockClient).execute(any(CreatePitAction.class), any(CreatePitRequest.class)); + assertNotNull(thrownException.getCause()); + assertEquals("Error occurred while creating PIT.", thrownException.getMessage()); + verify(mockActionFuture).get(); + } + + @SneakyThrows + @Test + public void testDelete() { + when(mockActionFutureDelete.get()).thenReturn(mockDeletePitResponse); + try { + pointInTimeHandlerImpl.delete(); + } catch (RuntimeException e) { + fail("Expected no exception while deleting PIT, but got: " + e.getMessage()); + } + verify(mockClient).execute(any(DeletePitAction.class), any(DeletePitRequest.class)); + verify(mockActionFutureDelete).get(); + } + + @SneakyThrows + @Test + public void testDeleteForFailure() { + ExecutionException executionException = + new ExecutionException("Error occurred while deleting PIT.", new Throwable()); + when(mockActionFutureDelete.get()).thenThrow(executionException); + + RuntimeException thrownException = + assertThrows(RuntimeException.class, () -> pointInTimeHandlerImpl.delete()); + + verify(mockClient).execute(any(DeletePitAction.class), any(DeletePitRequest.class)); + assertNotNull(thrownException.getCause()); + assertEquals("Error occurred while deleting PIT.", thrownException.getMessage()); + verify(mockActionFutureDelete).get(); + } +} diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/query/planner/physical/node/scroll/SearchHitRowTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/query/planner/physical/node/scroll/SearchHitRowTest.java index dd0fc626c0..f7d2030b0c 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/query/planner/physical/node/scroll/SearchHitRowTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/query/planner/physical/node/scroll/SearchHitRowTest.java @@ -12,6 +12,7 @@ import org.junit.Test; import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.search.SearchHit; +import org.opensearch.sql.legacy.query.planner.physical.node.SearchHitRow; public class SearchHitRowTest { diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/ErrorMessageFactoryTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/ErrorMessageFactoryTest.java index 09cd9e9efc..31baaced85 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/ErrorMessageFactoryTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/ErrorMessageFactoryTest.java @@ -15,8 +15,8 @@ public class ErrorMessageFactoryTest { - private Throwable nonOpenSearchThrowable = new Throwable(); - private Throwable openSearchThrowable = new OpenSearchException(nonOpenSearchThrowable); + private final Throwable nonOpenSearchThrowable = new Throwable(); + private final Throwable openSearchThrowable = new OpenSearchException(nonOpenSearchThrowable); @Test public void openSearchExceptionShouldCreateEsErrorMessage() { diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/SqlRequestParamTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/SqlRequestParamTest.java index 3c47832761..0d29b55106 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/SqlRequestParamTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/SqlRequestParamTest.java @@ -19,7 +19,7 @@ import org.opensearch.sql.legacy.request.SqlRequestParam; public class SqlRequestParamTest { - @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Rule public final ExpectedException exceptionRule = ExpectedException.none(); @Test public void shouldReturnTrueIfPrettyParamsIsTrue() { diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/cursor/DefaultCursorTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/cursor/DefaultCursorTest.java index 1b9662035d..deff7132b0 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/cursor/DefaultCursorTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/cursor/DefaultCursorTest.java @@ -9,14 +9,47 @@ import static org.hamcrest.Matchers.emptyOrNullString; import static org.hamcrest.Matchers.startsWith; import static org.junit.Assert.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.when; +import java.io.ByteArrayOutputStream; import java.util.ArrayList; import java.util.Collections; +import org.junit.Before; import org.junit.Test; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.legacy.cursor.CursorType; import org.opensearch.sql.legacy.cursor.DefaultCursor; +import org.opensearch.sql.legacy.esdomain.LocalClusterState; +import org.opensearch.sql.opensearch.setting.OpenSearchSettings; public class DefaultCursorTest { + @Mock private OpenSearchSettings settings; + + @Mock private SearchSourceBuilder sourceBuilder; + + @Before + public void setUp() { + MockitoAnnotations.openMocks(this); + // Required for Pagination queries using PIT instead of Scroll + doReturn(Collections.emptyList()).when(settings).getSettings(); + when(settings.getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER)).thenReturn(true); + LocalClusterState.state().setPluginSettings(settings); + + // Mock the toXContent method of SearchSourceBuilder + try { + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder(new ByteArrayOutputStream()); + when(sourceBuilder.toXContent(any(XContentBuilder.class), any())).thenReturn(xContentBuilder); + } catch (Exception e) { + throw new RuntimeException(e); + } + } @Test public void checkCursorType() { @@ -25,7 +58,26 @@ public void checkCursorType() { } @Test - public void cursorShouldStartWithCursorTypeID() { + public void cursorShouldStartWithCursorTypeIDForPIT() { + DefaultCursor cursor = new DefaultCursor(); + cursor.setRowsLeft(50); + cursor.setPitId("dbdskbcdjksbcjkdsbcjk+//"); + cursor.setIndexPattern("myIndex"); + cursor.setFetchSize(500); + cursor.setFieldAliasMap(Collections.emptyMap()); + cursor.setColumns(new ArrayList<>()); + + // Set the mocked SearchSourceBuilder to the cursor + cursor.setSearchSourceBuilder(sourceBuilder); + + assertThat(cursor.generateCursorId(), startsWith(cursor.getType().getId() + ":")); + } + + @Test + public void cursorShouldStartWithCursorTypeIDForScroll() { + // Disable PIT for pagination and use scroll instead + when(settings.getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER)).thenReturn(false); + DefaultCursor cursor = new DefaultCursor(); cursor.setRowsLeft(50); cursor.setScrollId("dbdskbcdjksbcjkdsbcjk+//"); @@ -33,6 +85,10 @@ public void cursorShouldStartWithCursorTypeID() { cursor.setFetchSize(500); cursor.setFieldAliasMap(Collections.emptyMap()); cursor.setColumns(new ArrayList<>()); + + // Set the mocked SearchSourceBuilder to the cursor + cursor.setSearchSourceBuilder(sourceBuilder); + assertThat(cursor.generateCursorId(), startsWith(cursor.getType().getId() + ":")); } diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/expression/core/BinaryExpressionTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/expression/core/BinaryExpressionTest.java index acc0e9c60e..610930e0e7 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/expression/core/BinaryExpressionTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/expression/core/BinaryExpressionTest.java @@ -20,7 +20,7 @@ @RunWith(MockitoJUnitRunner.class) public class BinaryExpressionTest extends ExpressionTest { - @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Rule public final ExpectedException exceptionRule = ExpectedException.none(); @Test public void addIntegerValueShouldPass() { diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/expression/core/UnaryExpressionTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/expression/core/UnaryExpressionTest.java index e030e1c6cf..f049cd843a 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/expression/core/UnaryExpressionTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/expression/core/UnaryExpressionTest.java @@ -20,7 +20,7 @@ @RunWith(MockitoJUnitRunner.class) public class UnaryExpressionTest extends ExpressionTest { - @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Rule public final ExpectedException exceptionRule = ExpectedException.none(); @Test public void absShouldPass() { diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/expression/model/ExprValueUtilsTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/expression/model/ExprValueUtilsTest.java index 15fd72a522..846406879d 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/expression/model/ExprValueUtilsTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/expression/model/ExprValueUtilsTest.java @@ -18,7 +18,7 @@ @RunWith(MockitoJUnitRunner.class) public class ExprValueUtilsTest { - @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Rule public final ExpectedException exceptionRule = ExpectedException.none(); @Test public void getIntegerValueWithIntegerExprValueShouldPass() { diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/parser/BucketPathTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/parser/BucketPathTest.java index c26740a04c..c88874c3f1 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/parser/BucketPathTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/parser/BucketPathTest.java @@ -15,7 +15,7 @@ public class BucketPathTest { - @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Rule public final ExpectedException exceptionRule = ExpectedException.none(); private final Path agg1 = Path.getAggPath("projects@NESTED"); private final Path agg2 = Path.getAggPath("projects@FILTERED"); diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/parser/SqlParserTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/parser/SqlParserTest.java index 38eefaaec1..a81e4f4dfd 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/parser/SqlParserTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/parser/SqlParserTest.java @@ -62,7 +62,7 @@ public void init() { parser = new SqlParser(); } - @Rule public ExpectedException thrown = ExpectedException.none(); + @Rule public final ExpectedException thrown = ExpectedException.none(); @Test public void whereConditionLeftFunctionRightPropertyGreatTest() throws Exception { diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/parser/SubQueryParserTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/parser/SubQueryParserTest.java index 5713179b46..81e084e152 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/parser/SubQueryParserTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/parser/SubQueryParserTest.java @@ -19,7 +19,7 @@ public class SubQueryParserTest { - private static SqlParser parser = new SqlParser(); + private static final SqlParser parser = new SqlParser(); @Test public void selectFromSubqueryShouldPass() throws SqlParseException { diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/QueryPlannerBatchTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/QueryPlannerBatchTest.java index 0c77550a2f..bb3a960270 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/QueryPlannerBatchTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/QueryPlannerBatchTest.java @@ -43,7 +43,7 @@ public class QueryPlannerBatchTest extends QueryPlannerTest { private static final String TEST_SQL3 = "ON d.id = e.departmentId " + " WHERE e.age <= 50"; - private SearchHit[] employees = { + private final SearchHit[] employees = { employee(1, "People 1", "A"), employee(2, "People 2", "A"), employee(3, "People 3", "A"), @@ -56,7 +56,7 @@ public class QueryPlannerBatchTest extends QueryPlannerTest { employee(10, "People 10", "F") }; - private SearchHit[] departments = { + private final SearchHit[] departments = { department(1, "A", "AWS"), department(2, "C", "Capital One"), department(3, "D", "Dell"), @@ -66,7 +66,7 @@ public class QueryPlannerBatchTest extends QueryPlannerTest { department(7, "U", "Uber"), }; - private Matcher[] matched = { + private final Matcher[] matched = { hit(kv("name", "People 1"), kv("id", "A"), kv("dep", "AWS")), hit(kv("name", "People 2"), kv("id", "A"), kv("dep", "AWS")), hit(kv("name", "People 3"), kv("id", "A"), kv("dep", "AWS")), @@ -76,25 +76,27 @@ public class QueryPlannerBatchTest extends QueryPlannerTest { hit(kv("name", "People 10"), kv("id", "F"), kv("dep", "Facebook")) }; - private Matcher[] mismatched1 = { + private final Matcher[] mismatched1 = { hit(kv("name", null), kv("id", "G"), kv("dep", "Google")), hit(kv("name", null), kv("id", "M"), kv("dep", "Microsoft")), hit(kv("name", null), kv("id", "U"), kv("dep", "Uber")) }; - private Matcher[] mismatched2 = { + private final Matcher[] mismatched2 = { hit(kv("name", "People 4"), kv("id", null), kv("dep", null)), hit(kv("name", "People 5"), kv("id", null), kv("dep", null)), hit(kv("name", "People 9"), kv("id", null), kv("dep", null)) }; - private Matcher expectedInnerJoinResult = hits(matched); + private final Matcher expectedInnerJoinResult = hits(matched); /** Department left join Employee */ - private Matcher expectedLeftOuterJoinResult1 = hits(concat(matched, mismatched1)); + private final Matcher expectedLeftOuterJoinResult1 = + hits(concat(matched, mismatched1)); /** Employee left join Department */ - private Matcher expectedLeftOuterJoinResult2 = hits(concat(matched, mismatched2)); + private final Matcher expectedLeftOuterJoinResult2 = + hits(concat(matched, mismatched2)); /** Parameterized test cases */ private final int blockSize; diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/QueryPlannerMonitorTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/QueryPlannerMonitorTest.java index 9b1d307ebc..6995743d06 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/QueryPlannerMonitorTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/QueryPlannerMonitorTest.java @@ -34,7 +34,7 @@ public class QueryPlannerMonitorTest extends QueryPlannerTest { private static final long[] PERCENT_USAGE_24 = freeAndTotalMem(76, 100); private static final long[] PERCENT_USAGE_50 = freeAndTotalMem(50, 100); - @Spy private Stats stats = new Stats(client); + @Spy private final Stats stats = new Stats(client); @Test public void reachedLimitAndRecoverAt1stAttempt() { diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/QueryPlannerTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/QueryPlannerTest.java index 521b225893..6ff907ba30 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/QueryPlannerTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/QueryPlannerTest.java @@ -42,6 +42,7 @@ import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.legacy.domain.JoinSelect; import org.opensearch.sql.legacy.esdomain.LocalClusterState; import org.opensearch.sql.legacy.exception.SqlParseException; @@ -104,6 +105,7 @@ public void init() { // to mock. // In this case, default value in Setting will be returned all the time. doReturn(emptyList()).when(settings).getSettings(); + doReturn(false).when(settings).getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER); LocalClusterState.state().setPluginSettings(settings); ActionFuture mockFuture = mock(ActionFuture.class); diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/converter/SQLAggregationParserTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/converter/SQLAggregationParserTest.java index d6911ac2fc..6b93dbcaf4 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/converter/SQLAggregationParserTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/converter/SQLAggregationParserTest.java @@ -35,7 +35,7 @@ @RunWith(MockitoJUnitRunner.class) public class SQLAggregationParserTest { - @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Rule public final ExpectedException exceptionRule = ExpectedException.none(); @Test public void parseAggWithoutExpressionShouldPass() { diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/converter/SQLExprToExpressionConverterTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/converter/SQLExprToExpressionConverterTest.java index ac949eb0d7..100b5ebe47 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/converter/SQLExprToExpressionConverterTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/planner/converter/SQLExprToExpressionConverterTest.java @@ -33,7 +33,7 @@ @RunWith(MockitoJUnitRunner.class) public class SQLExprToExpressionConverterTest { - @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Rule public final ExpectedException exceptionRule = ExpectedException.none(); private SQLExprToExpressionConverter converter; private SQLAggregationParser.Context context; diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/query/DefaultQueryActionTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/query/DefaultQueryActionTest.java index 755d604a65..50f094bf72 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/query/DefaultQueryActionTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/query/DefaultQueryActionTest.java @@ -6,18 +6,9 @@ package org.opensearch.sql.legacy.unittest.query; import static org.hamcrest.Matchers.equalTo; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.any; -import static org.mockito.Mockito.doReturn; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.times; - -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.Optional; +import static org.mockito.Mockito.*; + +import java.util.*; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -27,6 +18,8 @@ import org.opensearch.client.Client; import org.opensearch.common.unit.TimeValue; import org.opensearch.script.Script; +import org.opensearch.search.sort.FieldSortBuilder; +import org.opensearch.search.sort.SortOrder; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.legacy.domain.Field; import org.opensearch.sql.legacy.domain.KVValue; @@ -149,6 +142,12 @@ public void testIfScrollShouldBeOpenWithDifferentFormats() { queryAction.setFormat(Format.JDBC); queryAction.checkAndSetScroll(); Mockito.verify(mockRequestBuilder).setSize(settingFetchSize); + Mockito.verify(mockRequestBuilder).addSort(FieldSortBuilder.DOC_FIELD_NAME, SortOrder.ASC); + Mockito.verify(mockRequestBuilder, never()).setScroll(timeValue); + + // Verify setScroll when SQL_PAGINATION_API_SEARCH_AFTER is set to false + mockLocalClusterStateAndIntializeMetricsForScroll(timeValue); + queryAction.checkAndSetScroll(); Mockito.verify(mockRequestBuilder).setScroll(timeValue); } @@ -168,6 +167,12 @@ public void testIfScrollShouldBeOpen() { mockLocalClusterStateAndInitializeMetrics(timeValue); queryAction.checkAndSetScroll(); Mockito.verify(mockRequestBuilder).setSize(settingFetchSize); + Mockito.verify(mockRequestBuilder).addSort(FieldSortBuilder.DOC_FIELD_NAME, SortOrder.ASC); + Mockito.verify(mockRequestBuilder, never()).setScroll(timeValue); + + // Verify setScroll when SQL_PAGINATION_API_SEARCH_AFTER is set to false + mockLocalClusterStateAndIntializeMetricsForScroll(timeValue); + queryAction.checkAndSetScroll(); Mockito.verify(mockRequestBuilder).setScroll(timeValue); } @@ -195,6 +200,12 @@ public void testIfScrollShouldBeOpenWithDifferentFetchSize() { doReturn(mockRequestBuilder).when(mockRequestBuilder).setSize(userFetchSize); queryAction.checkAndSetScroll(); Mockito.verify(mockRequestBuilder).setSize(20); + Mockito.verify(mockRequestBuilder).addSort(FieldSortBuilder.DOC_FIELD_NAME, SortOrder.ASC); + Mockito.verify(mockRequestBuilder, never()).setScroll(timeValue); + + // Verify setScroll when SQL_PAGINATION_API_SEARCH_AFTER is set to false + mockLocalClusterStateAndIntializeMetricsForScroll(timeValue); + queryAction.checkAndSetScroll(); Mockito.verify(mockRequestBuilder).setScroll(timeValue); } @@ -216,7 +227,9 @@ public void testIfScrollShouldBeOpenWithDifferentValidFetchSizeAndLimit() { queryAction.checkAndSetScroll(); Mockito.verify(mockRequestBuilder).setSize(userFetchSize); - Mockito.verify(mockRequestBuilder).setScroll(timeValue); + Mockito.verify(mockRequestBuilder).addSort(FieldSortBuilder.DOC_FIELD_NAME, SortOrder.ASC); + // Skip setScroll when SQL_PAGINATION_API_SEARCH_AFTER is set to false + Mockito.verify(mockRequestBuilder, never()).setScroll(timeValue); /** fetchSize > LIMIT - no scroll */ userFetchSize = 5000; @@ -226,6 +239,14 @@ public void testIfScrollShouldBeOpenWithDifferentValidFetchSizeAndLimit() { queryAction.checkAndSetScroll(); Mockito.verify(mockRequestBuilder).setSize(limit); Mockito.verify(mockRequestBuilder, never()).setScroll(timeValue); + + // Verify setScroll when SQL_PAGINATION_API_SEARCH_AFTER is set to false + mockLocalClusterStateAndIntializeMetricsForScroll(timeValue); + /** fetchSize <= LIMIT - open scroll */ + userFetchSize = 1500; + doReturn(userFetchSize).when(mockSqlRequest).fetchSize(); + queryAction.checkAndSetScroll(); + Mockito.verify(mockRequestBuilder).setScroll(timeValue); } private void mockLocalClusterStateAndInitializeMetrics(TimeValue time) { @@ -236,6 +257,24 @@ private void mockLocalClusterStateAndInitializeMetrics(TimeValue time) { .when(mockLocalClusterState) .getSettingValue(Settings.Key.METRICS_ROLLING_WINDOW); doReturn(2L).when(mockLocalClusterState).getSettingValue(Settings.Key.METRICS_ROLLING_INTERVAL); + doReturn(true) + .when(mockLocalClusterState) + .getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER); + + Metrics.getInstance().registerDefaultMetrics(); + } + + private void mockLocalClusterStateAndIntializeMetricsForScroll(TimeValue time) { + LocalClusterState mockLocalClusterState = mock(LocalClusterState.class); + LocalClusterState.state(mockLocalClusterState); + doReturn(time).when(mockLocalClusterState).getSettingValue(Settings.Key.SQL_CURSOR_KEEP_ALIVE); + doReturn(3600L) + .when(mockLocalClusterState) + .getSettingValue(Settings.Key.METRICS_ROLLING_WINDOW); + doReturn(2L).when(mockLocalClusterState).getSettingValue(Settings.Key.METRICS_ROLLING_INTERVAL); + doReturn(false) + .when(mockLocalClusterState) + .getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER); Metrics.getInstance().registerDefaultMetrics(); } diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/identifier/UnquoteIdentifierRuleTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/identifier/UnquoteIdentifierRuleTest.java index 30bbac861a..5a27420b12 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/identifier/UnquoteIdentifierRuleTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/identifier/UnquoteIdentifierRuleTest.java @@ -65,8 +65,8 @@ private QueryAssertion query(String sql) { private static class QueryAssertion { - private UnquoteIdentifierRule rule = new UnquoteIdentifierRule(); - private SQLQueryExpr expr; + private final UnquoteIdentifierRule rule = new UnquoteIdentifierRule(); + private final SQLQueryExpr expr; QueryAssertion(String sql) { this.expr = SqlParserUtils.parse(sql); diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/ordinal/OrdinalRewriterRuleTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/ordinal/OrdinalRewriterRuleTest.java index d27967e361..d4bbc59eb5 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/ordinal/OrdinalRewriterRuleTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/ordinal/OrdinalRewriterRuleTest.java @@ -18,7 +18,7 @@ /** Test cases for ordinal aliases in GROUP BY and ORDER BY */ public class OrdinalRewriterRuleTest { - @Rule public ExpectedException exception = ExpectedException.none(); + @Rule public final ExpectedException exception = ExpectedException.none(); @Test public void ordinalInGroupByShouldMatch() { @@ -118,8 +118,8 @@ private QueryAssertion query(String sql) { private static class QueryAssertion { - private OrdinalRewriterRule rule; - private SQLQueryExpr expr; + private final OrdinalRewriterRule rule; + private final SQLQueryExpr expr; QueryAssertion(String sql) { this.expr = SqlParserUtils.parse(sql); diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/parent/SQLExprParentSetterRuleTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/parent/SQLExprParentSetterRuleTest.java index 460b045ca0..3940ba1366 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/parent/SQLExprParentSetterRuleTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/parent/SQLExprParentSetterRuleTest.java @@ -19,7 +19,7 @@ public class SQLExprParentSetterRuleTest { @Mock private SQLQueryExpr queryExpr; - private SQLExprParentSetterRule rule = new SQLExprParentSetterRule(); + private final SQLExprParentSetterRule rule = new SQLExprParentSetterRule(); @Test public void match() { diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/subquery/ExistsSubQueryRewriterTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/subquery/ExistsSubQueryRewriterTest.java index dd15fd6683..e60eead33b 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/subquery/ExistsSubQueryRewriterTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/subquery/ExistsSubQueryRewriterTest.java @@ -13,7 +13,7 @@ public class ExistsSubQueryRewriterTest extends SubQueryRewriterTestBase { - @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Rule public final ExpectedException exceptionRule = ExpectedException.none(); @Test public void nonCorrelatedExists() { diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/subquery/InSubqueryRewriterTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/subquery/InSubqueryRewriterTest.java index bb33baae7d..95d236460e 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/subquery/InSubqueryRewriterTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/subquery/InSubqueryRewriterTest.java @@ -13,7 +13,7 @@ public class InSubqueryRewriterTest extends SubQueryRewriterTestBase { - @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Rule public final ExpectedException exceptionRule = ExpectedException.none(); @Test public void nonCorrleatedIn() throws Exception { diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/subquery/SubQueryRewriteRuleTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/subquery/SubQueryRewriteRuleTest.java index 7bd3dd847e..0e0a30ef3a 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/subquery/SubQueryRewriteRuleTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/subquery/SubQueryRewriteRuleTest.java @@ -19,7 +19,7 @@ public class SubQueryRewriteRuleTest { final SubQueryRewriteRule rewriteRule = new SubQueryRewriteRule(); - @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Rule public final ExpectedException exceptionRule = ExpectedException.none(); @Test public void isInMatch() throws SQLFeatureNotSupportedException { diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/term/TermFieldRewriterTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/term/TermFieldRewriterTest.java index 7922d60647..561cafab81 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/term/TermFieldRewriterTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/rewriter/term/TermFieldRewriterTest.java @@ -21,7 +21,7 @@ import org.opensearch.sql.legacy.util.SqlParserUtils; public class TermFieldRewriterTest { - @Rule public ExpectedException exception = ExpectedException.none(); + @Rule public final ExpectedException exception = ExpectedException.none(); @Before public void setup() { diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/utils/SQLFunctionsTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/utils/SQLFunctionsTest.java index 9fc2b6012d..983f10023e 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/unittest/utils/SQLFunctionsTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/unittest/utils/SQLFunctionsTest.java @@ -34,7 +34,7 @@ public class SQLFunctionsTest { - private SQLFunctions sqlFunctions = new SQLFunctions(); + private final SQLFunctions sqlFunctions = new SQLFunctions(); @Rule public ExpectedException exceptionRule = ExpectedException.none(); diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/util/MultipleIndexClusterUtils.java b/legacy/src/test/java/org/opensearch/sql/legacy/util/MultipleIndexClusterUtils.java index 8890a6853f..85b8cffb59 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/util/MultipleIndexClusterUtils.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/util/MultipleIndexClusterUtils.java @@ -33,7 +33,7 @@ public class MultipleIndexClusterUtils { public static final String INDEX_ACCOUNT_2 = "account2"; public static final String INDEX_ACCOUNT_ALL = "account*"; - public static String INDEX_ACCOUNT_1_MAPPING = + public static final String INDEX_ACCOUNT_1_MAPPING = "{\n" + " \"field_mappings\": {\n" + " \"mappings\": {\n" @@ -93,7 +93,7 @@ public class MultipleIndexClusterUtils { * The difference with account1. 1. missing address. 2. age has different type. 3. * projects.started_year has different type. */ - public static String INDEX_ACCOUNT_2_MAPPING = + public static final String INDEX_ACCOUNT_2_MAPPING = "{\n" + " \"field_mappings\": {\n" + " \"mappings\": {\n" diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchClient.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchClient.java index 0a9cc67993..cdc3d4462f 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchClient.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchClient.java @@ -7,6 +7,8 @@ import java.util.List; import java.util.Map; +import org.opensearch.action.search.CreatePitRequest; +import org.opensearch.action.search.DeletePitRequest; import org.opensearch.client.node.NodeClient; import org.opensearch.sql.opensearch.mapping.IndexMapping; import org.opensearch.sql.opensearch.request.OpenSearchRequest; @@ -89,4 +91,19 @@ public interface OpenSearchClient { void schedule(Runnable task); NodeClient getNodeClient(); + + /** + * Create PIT for given indices + * + * @param createPitRequest Create Point In Time request + * @return PitId + */ + String createPit(CreatePitRequest createPitRequest); + + /** + * Delete PIT + * + * @param deletePitRequest Delete Point In Time request + */ + void deletePit(DeletePitRequest deletePitRequest); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClient.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClient.java index 993e092534..7a9487ef6a 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClient.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClient.java @@ -11,6 +11,7 @@ import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.concurrent.ExecutionException; import java.util.function.Function; import java.util.function.Predicate; import java.util.stream.Collectors; @@ -21,13 +22,16 @@ import org.opensearch.action.admin.indices.get.GetIndexResponse; import org.opensearch.action.admin.indices.mapping.get.GetMappingsResponse; import org.opensearch.action.admin.indices.settings.get.GetSettingsResponse; +import org.opensearch.action.search.*; import org.opensearch.client.node.NodeClient; import org.opensearch.cluster.metadata.AliasMetadata; +import org.opensearch.common.action.ActionFuture; import org.opensearch.common.settings.Settings; import org.opensearch.index.IndexNotFoundException; import org.opensearch.index.IndexSettings; import org.opensearch.sql.opensearch.mapping.IndexMapping; import org.opensearch.sql.opensearch.request.OpenSearchRequest; +import org.opensearch.sql.opensearch.request.OpenSearchScrollRequest; import org.opensearch.sql.opensearch.response.OpenSearchResponse; /** OpenSearch connection by node client. */ @@ -155,20 +159,32 @@ public List indices() { */ @Override public Map meta() { - return ImmutableMap.of(META_CLUSTER_NAME, client.settings().get("cluster.name", "opensearch")); + return ImmutableMap.of( + META_CLUSTER_NAME, + client.settings().get("cluster.name", "opensearch"), + "plugins.sql.pagination.api", + client.settings().get("plugins.sql.pagination.api", "true")); } @Override public void cleanup(OpenSearchRequest request) { - request.clean( - scrollId -> { - try { - client.prepareClearScroll().addScrollId(scrollId).get(); - } catch (Exception e) { - throw new IllegalStateException( - "Failed to clean up resources for search request " + request, e); - } - }); + if (request instanceof OpenSearchScrollRequest) { + request.clean( + scrollId -> { + try { + client.prepareClearScroll().addScrollId(scrollId).get(); + } catch (Exception e) { + throw new IllegalStateException( + "Failed to clean up resources for search request " + request, e); + } + }); + } else { + request.clean( + pitId -> { + DeletePitRequest deletePitRequest = new DeletePitRequest(pitId); + deletePit(deletePitRequest); + }); + } } @Override @@ -181,4 +197,27 @@ public void schedule(Runnable task) { public NodeClient getNodeClient() { return client; } + + @Override + public String createPit(CreatePitRequest createPitRequest) { + ActionFuture execute = + this.client.execute(CreatePitAction.INSTANCE, createPitRequest); + try { + CreatePitResponse pitResponse = execute.get(); + return pitResponse.getId(); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException("Error occurred while creating PIT for new engine SQL query", e); + } + } + + @Override + public void deletePit(DeletePitRequest deletePitRequest) { + ActionFuture execute = + this.client.execute(DeletePitAction.INSTANCE, deletePitRequest); + try { + DeletePitResponse deletePitResponse = execute.get(); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException("Error occurred while deleting PIT.", e); + } + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchRestClient.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchRestClient.java index b6106982a7..5cb6a69918 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchRestClient.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/client/OpenSearchRestClient.java @@ -19,7 +19,7 @@ import org.opensearch.action.admin.cluster.settings.ClusterGetSettingsRequest; import org.opensearch.action.admin.indices.settings.get.GetSettingsRequest; import org.opensearch.action.admin.indices.settings.get.GetSettingsResponse; -import org.opensearch.action.search.ClearScrollRequest; +import org.opensearch.action.search.*; import org.opensearch.client.RequestOptions; import org.opensearch.client.RestHighLevelClient; import org.opensearch.client.indices.CreateIndexRequest; @@ -32,6 +32,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.sql.opensearch.mapping.IndexMapping; import org.opensearch.sql.opensearch.request.OpenSearchRequest; +import org.opensearch.sql.opensearch.request.OpenSearchScrollRequest; import org.opensearch.sql.opensearch.response.OpenSearchResponse; /** @@ -166,6 +167,8 @@ public Map meta() { final Settings defaultSettings = client.cluster().getSettings(request, RequestOptions.DEFAULT).getDefaultSettings(); builder.put(META_CLUSTER_NAME, defaultSettings.get("cluster.name", "opensearch")); + builder.put( + "plugins.sql.pagination.api", defaultSettings.get("plugins.sql.pagination.api", "true")); return builder.build(); } catch (IOException e) { throw new IllegalStateException("Failed to get cluster meta info", e); @@ -174,17 +177,25 @@ public Map meta() { @Override public void cleanup(OpenSearchRequest request) { - request.clean( - scrollId -> { - try { - ClearScrollRequest clearRequest = new ClearScrollRequest(); - clearRequest.addScrollId(scrollId); - client.clearScroll(clearRequest, RequestOptions.DEFAULT); - } catch (IOException e) { - throw new IllegalStateException( - "Failed to clean up resources for search request " + request, e); - } - }); + if (request instanceof OpenSearchScrollRequest) { + request.clean( + scrollId -> { + try { + ClearScrollRequest clearRequest = new ClearScrollRequest(); + clearRequest.addScrollId(scrollId); + client.clearScroll(clearRequest, RequestOptions.DEFAULT); + } catch (IOException e) { + throw new IllegalStateException( + "Failed to clean up resources for search request " + request, e); + } + }); + } else { + request.clean( + pitId -> { + DeletePitRequest deletePitRequest = new DeletePitRequest(pitId); + deletePit(deletePitRequest); + }); + } } @Override @@ -196,4 +207,25 @@ public void schedule(Runnable task) { public NodeClient getNodeClient() { throw new UnsupportedOperationException("Unsupported method."); } + + @Override + public String createPit(CreatePitRequest createPitRequest) { + try { + CreatePitResponse createPitResponse = + client.createPit(createPitRequest, RequestOptions.DEFAULT); + return createPitResponse.getId(); + } catch (IOException e) { + throw new RuntimeException("Error occurred while creating PIT for new engine SQL query", e); + } + } + + @Override + public void deletePit(DeletePitRequest deletePitRequest) { + try { + DeletePitResponse deletePitResponse = + client.deletePit(deletePitRequest, RequestOptions.DEFAULT); + } catch (IOException e) { + throw new RuntimeException("Error occurred while creating PIT for new engine SQL query", e); + } + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java index ddbba61260..6c8912be86 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataType.java @@ -26,7 +26,7 @@ public enum MappingType { Invalid(null, ExprCoreType.UNKNOWN), Text("text", ExprCoreType.UNKNOWN), Keyword("keyword", ExprCoreType.STRING), - Ip("ip", ExprCoreType.UNKNOWN), + Ip("ip", ExprCoreType.IP), GeoPoint("geo_point", ExprCoreType.UNKNOWN), Binary("binary", ExprCoreType.UNKNOWN), Date("date", ExprCoreType.TIMESTAMP), @@ -62,19 +62,23 @@ public String toString() { @EqualsAndHashCode.Exclude @Getter protected MappingType mappingType; // resolved ExprCoreType - protected ExprCoreType exprCoreType; + @Getter protected ExprCoreType exprCoreType; /** * Get a simplified type {@link ExprCoreType} if possible. To avoid returning `UNKNOWN` for - * `OpenSearch*Type`s, e.g. for IP, returns itself. + * `OpenSearch*Type`s, e.g. for IP, returns itself. If the `exprCoreType` is {@link + * ExprCoreType#DATE}, {@link ExprCoreType#TIMESTAMP}, {@link ExprCoreType#TIME}, or {@link + * ExprCoreType#UNKNOWN}, it returns the current instance; otherwise, it returns `exprCoreType`. * * @return An {@link ExprType}. */ public ExprType getExprType() { - if (exprCoreType != ExprCoreType.UNKNOWN) { - return exprCoreType; - } - return this; + return (exprCoreType == ExprCoreType.DATE + || exprCoreType == ExprCoreType.TIMESTAMP + || exprCoreType == ExprCoreType.TIME + || exprCoreType == ExprCoreType.UNKNOWN) + ? this + : exprCoreType; } /** @@ -156,8 +160,6 @@ public static OpenSearchDataType of(MappingType mappingType, Map return OpenSearchGeoPointType.of(); case Binary: return OpenSearchBinaryType.of(); - case Ip: - return OpenSearchIpType.of(); case Date: case DateNanos: // Default date formatter is used when "" is passed as the second parameter @@ -230,6 +232,9 @@ public String legacyTypeName() { if (mappingType == null) { return exprCoreType.typeName(); } + if (mappingType.toString().equalsIgnoreCase("DATE")) { + return exprCoreType.typeName(); + } return mappingType.toString().toUpperCase(); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDateType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDateType.java index 7e6bee77c2..5ffce655d0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDateType.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchDateType.java @@ -11,11 +11,16 @@ import static org.opensearch.sql.data.type.ExprCoreType.TIME; import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.time.temporal.TemporalAccessor; import java.util.List; import java.util.Objects; import java.util.stream.Collectors; +import java.util.stream.Stream; import lombok.EqualsAndHashCode; import org.opensearch.common.time.DateFormatter; +import org.opensearch.common.time.DateFormatters; import org.opensearch.common.time.FormatNames; import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; @@ -137,6 +142,11 @@ public class OpenSearchDateType extends OpenSearchDataType { private static final String CUSTOM_FORMAT_DATE_SYMBOLS = "FecEWwYqQgdMLDyuG"; + private static final List OPENSEARCH_DEFAULT_FORMATTERS = + Stream.of("strict_date_time_no_millis", "strict_date_optional_time", "epoch_millis") + .map(DateFormatter::forPattern) + .toList(); + @EqualsAndHashCode.Exclude private final List formats; private OpenSearchDateType() { @@ -235,6 +245,59 @@ public List getAllCustomFormatters() { .collect(Collectors.toList()); } + /** + * Retrieves a list of custom formatters and OpenSearch named formatters defined by the user, and + * attempts to parse the given date/time string using these formatters. + * + * @param dateTime The date/time string to parse. + * @return A ZonedDateTime representing the parsed date/time in UTC, or null if parsing fails. + */ + public ZonedDateTime getParsedDateTime(String dateTime) { + List dateFormatters = + Stream.concat(this.getAllNamedFormatters().stream(), this.getAllCustomFormatters().stream()) + .collect(Collectors.toList()); + ZonedDateTime zonedDateTime = null; + + // check if dateFormatters are empty, then set default ones + if (dateFormatters.isEmpty()) { + dateFormatters = OPENSEARCH_DEFAULT_FORMATTERS; + } + // parse using OpenSearch DateFormatters + for (DateFormatter formatter : dateFormatters) { + try { + TemporalAccessor accessor = formatter.parse(dateTime); + zonedDateTime = DateFormatters.from(accessor).withZoneSameLocal(ZoneOffset.UTC); + break; + } catch (IllegalArgumentException ignored) { + // nothing to do, try another format + } + } + return zonedDateTime; + } + + /** + * Returns a formatted date string using the internal formatter, if available. + * + * @param accessor The TemporalAccessor object containing the date/time information. + * @return A formatted date string if a formatter is available, otherwise null. + */ + public String getFormattedDate(TemporalAccessor accessor) { + if (hasNoFormatter()) { + return OPENSEARCH_DEFAULT_FORMATTERS.get(0).format(accessor); + } + // Use the first available format string to create the formatter + return DateFormatter.forPattern(this.formats.get(0)).format(accessor); + } + + /** + * Checks if the formatter is not initialized. + * + * @return True if the formatter is not set, otherwise false. + */ + public boolean hasNoFormatter() { + return this.formats.isEmpty(); + } + /** * Retrieves a list of named formatters that format for dates. * diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchIpType.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchIpType.java deleted file mode 100644 index 22581ec28c..0000000000 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/type/OpenSearchIpType.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.opensearch.data.type; - -import static org.opensearch.sql.data.type.ExprCoreType.UNKNOWN; - -import lombok.EqualsAndHashCode; - -/** - * The type of an ip value. See doc - */ -@EqualsAndHashCode(callSuper = false) -public class OpenSearchIpType extends OpenSearchDataType { - - private static final OpenSearchIpType instance = new OpenSearchIpType(); - - private OpenSearchIpType() { - super(MappingType.Ip); - exprCoreType = UNKNOWN; - } - - public static OpenSearchIpType of() { - return OpenSearchIpType.instance; - } - - @Override - protected OpenSearchDataType cloneEmpty() { - return instance; - } -} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java index bdb15428e1..4446c1f979 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java @@ -7,11 +7,19 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.Iterators; +import java.io.IOException; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; import lombok.RequiredArgsConstructor; import org.apache.commons.lang3.tuple.Pair; +import org.opensearch.OpenSearchParseException; +import org.opensearch.common.geo.GeoPoint; +import org.opensearch.common.geo.GeoUtils; +import org.opensearch.common.xcontent.json.JsonXContentParser; +import org.opensearch.core.xcontent.DeprecationHandler; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentParser; /** The Implementation of Content to represent {@link JsonNode}. */ @RequiredArgsConstructor @@ -122,25 +130,17 @@ public Object objectValue() { @Override public Pair geoValue() { final JsonNode value = value(); - if (value.has("lat") && value.has("lon")) { - Double lat = 0d; - Double lon = 0d; - try { - lat = extractDoubleValue(value.get("lat")); - } catch (Exception exception) { - throw new IllegalStateException( - "latitude must be number value, but got value: " + value.get("lat")); - } - try { - lon = extractDoubleValue(value.get("lon")); - } catch (Exception exception) { - throw new IllegalStateException( - "longitude must be number value, but got value: " + value.get("lon")); - } - return Pair.of(lat, lon); - } else { - throw new IllegalStateException( - "geo point must in format of {\"lat\": number, \"lon\": number}"); + try (XContentParser parser = + new JsonXContentParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.IGNORE_DEPRECATIONS, + value.traverse())) { + parser.nextToken(); + GeoPoint point = new GeoPoint(); + GeoUtils.parseGeoPoint(parser, point, true); + return Pair.of(point.getLat(), point.getLon()); + } catch (IOException ex) { + throw new OpenSearchParseException("error parsing geo point", ex); } } @@ -148,16 +148,4 @@ public Pair geoValue() { private JsonNode value() { return value; } - - /** Get doubleValue from JsonNode if possible. */ - private Double extractDoubleValue(JsonNode node) { - if (node.isTextual()) { - return Double.valueOf(node.textValue()); - } - if (node.isNumber()) { - return node.doubleValue(); - } else { - throw new IllegalStateException("node must be a number"); - } - } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprIpValue.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprIpValue.java deleted file mode 100644 index 30b3784bfc..0000000000 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprIpValue.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.opensearch.data.value; - -import java.util.Objects; -import lombok.RequiredArgsConstructor; -import org.opensearch.sql.data.model.AbstractExprValue; -import org.opensearch.sql.data.model.ExprValue; -import org.opensearch.sql.data.type.ExprType; -import org.opensearch.sql.opensearch.data.type.OpenSearchIpType; - -/** - * OpenSearch IP ExprValue
- * Todo, add this to avoid the unknown value type exception, the implementation will be changed. - */ -@RequiredArgsConstructor -public class OpenSearchExprIpValue extends AbstractExprValue { - - private final String ip; - - @Override - public Object value() { - return ip; - } - - @Override - public ExprType type() { - return OpenSearchIpType.of(); - } - - @Override - public int compare(ExprValue other) { - return ip.compareTo(((OpenSearchExprIpValue) other).ip); - } - - @Override - public boolean equal(ExprValue other) { - return ip.equals(((OpenSearchExprIpValue) other).ip); - } - - @Override - public int hashCode() { - return Objects.hashCode(ip); - } -} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java index 3341e01ab2..68c6fda617 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java @@ -11,6 +11,7 @@ import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.FLOAT; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.IP; import static org.opensearch.sql.data.type.ExprCoreType.LONG; import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; @@ -39,6 +40,7 @@ import java.util.function.BiFunction; import lombok.Getter; import lombok.Setter; +import org.opensearch.OpenSearchParseException; import org.opensearch.common.time.DateFormatter; import org.opensearch.common.time.DateFormatters; import org.opensearch.common.time.FormatNames; @@ -49,6 +51,7 @@ import org.opensearch.sql.data.model.ExprDoubleValue; import org.opensearch.sql.data.model.ExprFloatValue; import org.opensearch.sql.data.model.ExprIntegerValue; +import org.opensearch.sql.data.model.ExprIpValue; import org.opensearch.sql.data.model.ExprLongValue; import org.opensearch.sql.data.model.ExprNullValue; import org.opensearch.sql.data.model.ExprShortValue; @@ -62,8 +65,6 @@ import org.opensearch.sql.opensearch.data.type.OpenSearchBinaryType; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; -import org.opensearch.sql.opensearch.data.type.OpenSearchGeoPointType; -import org.opensearch.sql.opensearch.data.type.OpenSearchIpType; import org.opensearch.sql.opensearch.data.utils.Content; import org.opensearch.sql.opensearch.data.utils.ObjectContent; import org.opensearch.sql.opensearch.data.utils.OpenSearchJsonContent; @@ -74,6 +75,9 @@ public class OpenSearchExprValueFactory { /** The Mapping of Field and ExprType. */ private final Map typeMapping; + /** Whether to support nested value types (such as arrays) */ + private final boolean fieldTypeTolerance; + /** * Extend existing mapping by new data without overwrite. Called from aggregation only {@see * AggregationQueryBuilder#buildTypeMapping}. @@ -84,9 +88,7 @@ public void extendTypeMapping(Map typeMapping) { for (var field : typeMapping.keySet()) { // Prevent overwriting, because aggregation engine may be not aware // of all niceties of all types. - if (!this.typeMapping.containsKey(field)) { - this.typeMapping.put(field, typeMapping.get(field)); - } + this.typeMapping.putIfAbsent(field, typeMapping.get(field)); } } @@ -132,20 +134,18 @@ public void extendTypeMapping(Map typeMapping) { OpenSearchDateType.of(TIMESTAMP), OpenSearchExprValueFactory::createOpenSearchDateType) .put( - OpenSearchDataType.of(OpenSearchDataType.MappingType.Ip), - (c, dt) -> new OpenSearchExprIpValue(c.stringValue())) - .put( - OpenSearchDataType.of(OpenSearchDataType.MappingType.GeoPoint), - (c, dt) -> - new OpenSearchExprGeoPointValue(c.geoValue().getLeft(), c.geoValue().getRight())) + OpenSearchDateType.of(OpenSearchDataType.MappingType.Ip), + (c, dt) -> new ExprIpValue(c.stringValue())) .put( OpenSearchDataType.of(OpenSearchDataType.MappingType.Binary), (c, dt) -> new OpenSearchExprBinaryValue(c.stringValue())) .build(); /** Constructor of OpenSearchExprValueFactory. */ - public OpenSearchExprValueFactory(Map typeMapping) { + public OpenSearchExprValueFactory( + Map typeMapping, boolean fieldTypeTolerance) { this.typeMapping = OpenSearchDataType.traverseAndFlatten(typeMapping); + this.fieldTypeTolerance = fieldTypeTolerance; } /** @@ -168,7 +168,7 @@ public ExprValue construct(String jsonString, boolean supportArrays) { new OpenSearchJsonContent(OBJECT_MAPPER.readTree(jsonString)), TOP_PATH, Optional.of(STRUCT), - supportArrays); + fieldTypeTolerance || supportArrays); } catch (JsonProcessingException e) { throw new IllegalStateException(String.format("invalid json: %s.", jsonString), e); } @@ -193,21 +193,22 @@ private ExprValue parse( return ExprNullValue.of(); } - ExprType type = fieldType.get(); - if (type.equals(OpenSearchDataType.of(OpenSearchDataType.MappingType.Nested)) + final ExprType type = fieldType.get(); + + if (type.equals(OpenSearchDataType.of(OpenSearchDataType.MappingType.GeoPoint))) { + return parseGeoPoint(content, supportArrays); + } else if (type.equals(OpenSearchDataType.of(OpenSearchDataType.MappingType.Nested)) || content.isArray()) { return parseArray(content, field, type, supportArrays); } else if (type.equals(OpenSearchDataType.of(OpenSearchDataType.MappingType.Object)) || type == STRUCT) { return parseStruct(content, field, supportArrays); + } else if (typeActionMap.containsKey(type)) { + return typeActionMap.get(type).apply(content, type); } else { - if (typeActionMap.containsKey(type)) { - return typeActionMap.get(type).apply(content, type); - } else { - throw new IllegalStateException( - String.format( - "Unsupported type: %s for value: %s.", type.typeName(), content.objectValue())); - } + throw new IllegalStateException( + String.format( + "Unsupported type: %s for value: %s.", type.typeName(), content.objectValue())); } } @@ -230,7 +231,7 @@ private Optional type(String field) { private static ExprValue parseDateTimeString(String value, OpenSearchDateType dataType) { List formatters = dataType.getAllNamedFormatters(); formatters.addAll(dataType.getAllCustomFormatters()); - ExprCoreType returnFormat = (ExprCoreType) dataType.getExprType(); + ExprCoreType returnFormat = dataType.getExprCoreType(); for (DateFormatter formatter : formatters) { try { @@ -273,8 +274,7 @@ private static ExprValue parseDateTimeString(String value, OpenSearchDateType da private static ExprValue createOpenSearchDateType(Content value, ExprType type) { OpenSearchDateType dt = (OpenSearchDateType) type; - ExprType returnFormat = dt.getExprType(); - + ExprCoreType returnFormat = dt.getExprCoreType(); if (value.isNumber()) { // isNumber var numFormatters = dt.getNumericNamedFormatters(); if (numFormatters.size() > 0 || !dt.hasFormats()) { @@ -287,7 +287,7 @@ private static ExprValue createOpenSearchDateType(Content value, ExprType type) epochMillis = value.longValue(); } Instant instant = Instant.ofEpochMilli(epochMillis); - switch ((ExprCoreType) returnFormat) { + switch (returnFormat) { case TIME: return new ExprTimeValue(LocalTime.from(instant.atZone(ZoneOffset.UTC))); case DATE: @@ -297,7 +297,7 @@ private static ExprValue createOpenSearchDateType(Content value, ExprType type) } } else { // custom format - return parseDateTimeString(value.stringValue(), dt); + return parseDateTimeString(value.objectValue().toString(), dt); } } if (value.isString()) { @@ -363,6 +363,49 @@ private ExprValue parseArray( return new ExprCollectionValue(result); } + /** + * Parse geo point content. + * + * @param content Content to parse. + * @param supportArrays Parsing the whole array or not + * @return Geo point value parsed from content. + */ + private ExprValue parseGeoPoint(Content content, boolean supportArrays) { + // there is only one point in doc. + if (content.isArray() == false) { + final var pair = content.geoValue(); + return new OpenSearchExprGeoPointValue(pair.getLeft(), pair.getRight()); + } + + var elements = content.array(); + var first = elements.next(); + // an array in the [longitude, latitude] format. + if (first.isNumber()) { + double lon = first.doubleValue(); + var second = elements.next(); + if (second.isNumber() == false) { + throw new OpenSearchParseException("lat must be a number, got " + second.objectValue()); + } + return new OpenSearchExprGeoPointValue(second.doubleValue(), lon); + } + + // there are multi points in doc + var pair = first.geoValue(); + var firstPoint = new OpenSearchExprGeoPointValue(pair.getLeft(), pair.getRight()); + if (supportArrays) { + List result = new ArrayList<>(); + result.add(firstPoint); + elements.forEachRemaining( + e -> { + var p = e.geoValue(); + result.add(new OpenSearchExprGeoPointValue(p.getLeft(), p.getRight())); + }); + return new ExprCollectionValue(result); + } else { + return firstPoint; + } + } + /** * Parse inner array value. Can be object type and recurse continues. * @@ -374,11 +417,10 @@ private ExprValue parseArray( */ private ExprValue parseInnerArrayValue( Content content, String prefix, ExprType type, boolean supportArrays) { - if (type instanceof OpenSearchIpType - || type instanceof OpenSearchBinaryType - || type instanceof OpenSearchDateType - || type instanceof OpenSearchGeoPointType) { + if (type instanceof OpenSearchBinaryType || type instanceof OpenSearchDateType) { return parse(content, prefix, Optional.of(type), supportArrays); + } else if (content.isString() && type.equals(OpenSearchDataType.of(IP))) { + return parse(content, prefix, Optional.of(OpenSearchDataType.of(IP)), supportArrays); } else if (content.isString()) { return parse(content, prefix, Optional.of(OpenSearchDataType.of(STRING)), supportArrays); } else if (content.isLong()) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java index 0905c2f4b4..358bc10ab4 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtector.java @@ -23,6 +23,8 @@ import org.opensearch.sql.planner.physical.RemoveOperator; import org.opensearch.sql.planner.physical.RenameOperator; import org.opensearch.sql.planner.physical.SortOperator; +import org.opensearch.sql.planner.physical.TakeOrderedOperator; +import org.opensearch.sql.planner.physical.TrendlineOperator; import org.opensearch.sql.planner.physical.ValuesOperator; import org.opensearch.sql.planner.physical.WindowOperator; import org.opensearch.sql.storage.TableScanOperator; @@ -130,6 +132,17 @@ public PhysicalPlan visitSort(SortOperator node, Object context) { return doProtect(new SortOperator(visitInput(node.getInput(), context), node.getSortList())); } + /** Decorate with {@link ResourceMonitorPlan}. */ + @Override + public PhysicalPlan visitTakeOrdered(TakeOrderedOperator node, Object context) { + return doProtect( + new TakeOrderedOperator( + visitInput(node.getInput(), context), + node.getLimit(), + node.getOffset(), + node.getSortList())); + } + /** * Values are a sequence of rows of literal value in memory which doesn't need memory protection. */ @@ -175,6 +188,12 @@ public PhysicalPlan visitML(PhysicalPlan node, Object context) { mlOperator.getNodeClient())); } + @Override + public PhysicalPlan visitTrendline(TrendlineOperator node, Object context) { + return doProtect( + new TrendlineOperator(visitInput(node.getInput(), context), node.getComputations())); + } + PhysicalPlan visitInput(PhysicalPlan node, Object context) { if (null == node) { return node; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/ResourceMonitorPlan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/ResourceMonitorPlan.java index e3bc48ba72..150a749358 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/ResourceMonitorPlan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/protector/ResourceMonitorPlan.java @@ -44,7 +44,7 @@ public R accept(PhysicalPlanNodeVisitor visitor, C context) { @Override public void open() { if (!this.monitor.isHealthy()) { - throw new IllegalStateException("resource is not enough to run the query, quit."); + throw new IllegalStateException("insufficient resources to run the query, quit."); } delegate.open(); } @@ -68,7 +68,7 @@ public boolean hasNext() { public ExprValue next() { boolean shouldCheck = (++nextCallCount % NUMBER_OF_NEXT_CALL_TO_CHECK == 0); if (shouldCheck && !this.monitor.isHealthy()) { - throw new IllegalStateException("resource is not enough to load next row, quit."); + throw new IllegalStateException("insufficient resources to load next row, quit."); } return delegate.next(); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/OpenSearchMemoryHealthy.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/OpenSearchMemoryHealthy.java index 4b7b6c5dcb..bc038cb42f 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/OpenSearchMemoryHealthy.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/monitor/OpenSearchMemoryHealthy.java @@ -36,7 +36,7 @@ public boolean isMemoryHealthy(long limitBytes) { } else { log.warn("Memory usage:{} exceed limit:{}", memoryUsage, limitBytes); if (randomFail.shouldFail()) { - log.warn("Fast failure the current request"); + log.warn("Fast failing the current request"); throw new MemoryUsageExceedFastFailureException(); } else { throw new MemoryUsageExceedException(); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequest.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequest.java index 6447a3ff65..3461660795 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequest.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequest.java @@ -5,21 +5,36 @@ package org.opensearch.sql.opensearch.request; +import static org.opensearch.core.xcontent.DeprecationHandler.IGNORE_DEPRECATIONS; +import static org.opensearch.search.sort.FieldSortBuilder.DOC_FIELD_NAME; +import static org.opensearch.search.sort.SortOrder.ASC; +import static org.opensearch.sql.opensearch.storage.OpenSearchIndex.METADATA_FIELD_ID; + import java.io.IOException; +import java.util.Collections; import java.util.List; import java.util.function.Consumer; import java.util.function.Function; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; -import org.opensearch.action.search.SearchRequest; -import org.opensearch.action.search.SearchResponse; -import org.opensearch.action.search.SearchScrollRequest; +import org.opensearch.action.search.*; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; +import org.opensearch.search.SearchModule; +import org.opensearch.search.builder.PointInTimeBuilder; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory; import org.opensearch.sql.opensearch.response.OpenSearchResponse; +import org.opensearch.sql.opensearch.storage.OpenSearchIndex; +import org.opensearch.sql.opensearch.storage.OpenSearchStorageEngine; /** * OpenSearch search request. This has to be stateful because it needs to: @@ -36,7 +51,7 @@ public class OpenSearchQueryRequest implements OpenSearchRequest { private final IndexName indexName; /** Search request source builder. */ - private final SearchSourceBuilder sourceBuilder; + private SearchSourceBuilder sourceBuilder; /** OpenSearchExprValueFactory. */ @EqualsAndHashCode.Exclude @ToString.Exclude @@ -45,9 +60,19 @@ public class OpenSearchQueryRequest implements OpenSearchRequest { /** List of includes expected in the response. */ @EqualsAndHashCode.Exclude @ToString.Exclude private final List includes; + @EqualsAndHashCode.Exclude private boolean needClean = true; + /** Indicate the search already done. */ private boolean searchDone = false; + private String pitId; + + private TimeValue cursorKeepAlive; + + private Object[] searchAfter; + + private SearchResponse searchResponse = null; + /** Constructor of OpenSearchQueryRequest. */ public OpenSearchQueryRequest( String indexName, int size, OpenSearchExprValueFactory factory, List includes) { @@ -78,35 +103,158 @@ public OpenSearchQueryRequest( this.includes = includes; } + /** Constructor of OpenSearchQueryRequest with PIT support. */ + public OpenSearchQueryRequest( + IndexName indexName, + SearchSourceBuilder sourceBuilder, + OpenSearchExprValueFactory factory, + List includes, + TimeValue cursorKeepAlive, + String pitId) { + this.indexName = indexName; + this.sourceBuilder = sourceBuilder; + this.exprValueFactory = factory; + this.includes = includes; + this.cursorKeepAlive = cursorKeepAlive; + this.pitId = pitId; + } + + /** + * Constructs OpenSearchQueryRequest from serialized representation. + * + * @param in stream to read data from. + * @param engine OpenSearchSqlEngine to get node-specific context. + * @throws IOException thrown if reading from input {@code in} fails. + */ + public OpenSearchQueryRequest(StreamInput in, OpenSearchStorageEngine engine) throws IOException { + // Deserialize the SearchSourceBuilder from the string representation + String sourceBuilderString = in.readString(); + + NamedXContentRegistry xContentRegistry = + new NamedXContentRegistry( + new SearchModule(Settings.EMPTY, Collections.emptyList()).getNamedXContents()); + XContentParser parser = + XContentType.JSON + .xContent() + .createParser(xContentRegistry, IGNORE_DEPRECATIONS, sourceBuilderString); + this.sourceBuilder = SearchSourceBuilder.fromXContent(parser); + + cursorKeepAlive = in.readTimeValue(); + pitId = in.readString(); + includes = in.readStringList(); + indexName = new IndexName(in); + + int length = in.readVInt(); + this.searchAfter = new Object[length]; + for (int i = 0; i < length; i++) { + this.searchAfter[i] = in.readGenericValue(); + } + + OpenSearchIndex index = (OpenSearchIndex) engine.getTable(null, indexName.toString()); + exprValueFactory = + new OpenSearchExprValueFactory( + index.getFieldOpenSearchTypes(), index.isFieldTypeTolerance()); + } + @Override public OpenSearchResponse search( Function searchAction, Function scrollAction) { + if (this.pitId == null) { + // When SearchRequest doesn't contain PitId, fetch single page request + if (searchDone) { + return new OpenSearchResponse(SearchHits.empty(), exprValueFactory, includes); + } else { + searchDone = true; + return new OpenSearchResponse( + searchAction.apply( + new SearchRequest().indices(indexName.getIndexNames()).source(sourceBuilder)), + exprValueFactory, + includes); + } + } else { + // Search with PIT instead of scroll API + return searchWithPIT(searchAction); + } + } + + public OpenSearchResponse searchWithPIT(Function searchAction) { + OpenSearchResponse openSearchResponse; if (searchDone) { - return new OpenSearchResponse(SearchHits.empty(), exprValueFactory, includes); + openSearchResponse = new OpenSearchResponse(SearchHits.empty(), exprValueFactory, includes); } else { - searchDone = true; - return new OpenSearchResponse( - searchAction.apply( - new SearchRequest().indices(indexName.getIndexNames()).source(sourceBuilder)), - exprValueFactory, - includes); + this.sourceBuilder.pointInTimeBuilder(new PointInTimeBuilder(this.pitId)); + this.sourceBuilder.timeout(cursorKeepAlive); + // check for search after + if (searchAfter != null) { + this.sourceBuilder.searchAfter(searchAfter); + } + // Set sort field for search_after + if (this.sourceBuilder.sorts() == null) { + this.sourceBuilder.sort(DOC_FIELD_NAME, ASC); + // Workaround to preserve sort location more exactly, + // see https://github.com/opensearch-project/sql/pull/3061 + this.sourceBuilder.sort(METADATA_FIELD_ID, ASC); + } + SearchRequest searchRequest = new SearchRequest().source(this.sourceBuilder); + this.searchResponse = searchAction.apply(searchRequest); + + openSearchResponse = new OpenSearchResponse(this.searchResponse, exprValueFactory, includes); + + needClean = openSearchResponse.isEmpty(); + searchDone = openSearchResponse.isEmpty(); + SearchHit[] searchHits = this.searchResponse.getHits().getHits(); + if (searchHits != null && searchHits.length > 0) { + searchAfter = searchHits[searchHits.length - 1].getSortValues(); + this.sourceBuilder.searchAfter(searchAfter); + } } + return openSearchResponse; } @Override public void clean(Consumer cleanAction) { - // do nothing. + try { + // clean on the last page only, to prevent deleting the PitId in the middle of paging. + if (this.pitId != null && needClean) { + cleanAction.accept(this.pitId); + searchDone = true; + } + } finally { + this.pitId = null; + } } @Override public boolean hasAnotherBatch() { + if (this.pitId != null) { + return !needClean; + } return false; } @Override public void writeTo(StreamOutput out) throws IOException { - throw new UnsupportedOperationException( - "OpenSearchQueryRequest serialization is not implemented."); + if (this.pitId != null) { + // Convert SearchSourceBuilder to XContent and write it as a string + out.writeString(sourceBuilder.toString()); + + out.writeTimeValue(sourceBuilder.timeout()); + out.writeString(sourceBuilder.pointInTimeBuilder().getId()); + out.writeStringCollection(includes); + indexName.writeTo(out); + + // Serialize the searchAfter array + if (searchAfter != null) { + out.writeVInt(searchAfter.length); + for (Object obj : searchAfter) { + out.writeGenericValue(obj); + } + } + } else { + // OpenSearch Query request without PIT for single page requests + throw new UnsupportedOperationException( + "OpenSearchQueryRequest serialization is not implemented."); + } } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index 1df3dcb183..6fa9b17697 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -26,6 +26,7 @@ import lombok.ToString; import org.apache.commons.lang3.tuple.Pair; import org.apache.lucene.search.join.ScoreMode; +import org.opensearch.action.search.CreatePitRequest; import org.opensearch.common.unit.TimeValue; import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.InnerHitBuilder; @@ -39,9 +40,11 @@ import org.opensearch.search.sort.SortBuilder; import org.opensearch.search.sort.SortBuilders; import org.opensearch.sql.ast.expression.Literal; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.opensearch.client.OpenSearchClient; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; @@ -67,10 +70,13 @@ public class OpenSearchRequestBuilder { private int startFrom = 0; + private final Settings settings; + /** Constructor. */ public OpenSearchRequestBuilder( - int requestedTotalSize, OpenSearchExprValueFactory exprValueFactory) { + int requestedTotalSize, OpenSearchExprValueFactory exprValueFactory, Settings settings) { this.requestedTotalSize = requestedTotalSize; + this.settings = settings; this.sourceBuilder = new SearchSourceBuilder() .from(startFrom) @@ -82,18 +88,65 @@ public OpenSearchRequestBuilder( /** * Build DSL request. * - * @return query request or scroll request + * @return query request with PIT or scroll request */ public OpenSearchRequest build( - OpenSearchRequest.IndexName indexName, int maxResultWindow, TimeValue scrollTimeout) { + OpenSearchRequest.IndexName indexName, + int maxResultWindow, + TimeValue cursorKeepAlive, + OpenSearchClient client) { + if (this.settings.getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER)) { + return buildRequestWithPit(indexName, maxResultWindow, cursorKeepAlive, client); + } else { + return buildRequestWithScroll(indexName, maxResultWindow, cursorKeepAlive); + } + } + + private OpenSearchRequest buildRequestWithPit( + OpenSearchRequest.IndexName indexName, + int maxResultWindow, + TimeValue cursorKeepAlive, + OpenSearchClient client) { + int size = requestedTotalSize; + FetchSourceContext fetchSource = this.sourceBuilder.fetchSource(); + List includes = fetchSource != null ? Arrays.asList(fetchSource.includes()) : List.of(); + + if (pageSize == null) { + if (startFrom + size > maxResultWindow) { + sourceBuilder.size(maxResultWindow - startFrom); + // Search with PIT request + String pitId = createPit(indexName, cursorKeepAlive, client); + return new OpenSearchQueryRequest( + indexName, sourceBuilder, exprValueFactory, includes, cursorKeepAlive, pitId); + } else { + sourceBuilder.from(startFrom); + sourceBuilder.size(requestedTotalSize); + // Search with non-Pit request + return new OpenSearchQueryRequest(indexName, sourceBuilder, exprValueFactory, includes); + } + } else { + if (startFrom != 0) { + throw new UnsupportedOperationException("Non-zero offset is not supported with pagination"); + } + sourceBuilder.size(pageSize); + // Search with PIT request + String pitId = createPit(indexName, cursorKeepAlive, client); + return new OpenSearchQueryRequest( + indexName, sourceBuilder, exprValueFactory, includes, cursorKeepAlive, pitId); + } + } + + private OpenSearchRequest buildRequestWithScroll( + OpenSearchRequest.IndexName indexName, int maxResultWindow, TimeValue cursorKeepAlive) { int size = requestedTotalSize; FetchSourceContext fetchSource = this.sourceBuilder.fetchSource(); List includes = fetchSource != null ? Arrays.asList(fetchSource.includes()) : List.of(); + if (pageSize == null) { if (startFrom + size > maxResultWindow) { sourceBuilder.size(maxResultWindow - startFrom); return new OpenSearchScrollRequest( - indexName, scrollTimeout, sourceBuilder, exprValueFactory, includes); + indexName, cursorKeepAlive, sourceBuilder, exprValueFactory, includes); } else { sourceBuilder.from(startFrom); sourceBuilder.size(requestedTotalSize); @@ -105,10 +158,18 @@ public OpenSearchRequest build( } sourceBuilder.size(pageSize); return new OpenSearchScrollRequest( - indexName, scrollTimeout, sourceBuilder, exprValueFactory, includes); + indexName, cursorKeepAlive, sourceBuilder, exprValueFactory, includes); } } + private String createPit( + OpenSearchRequest.IndexName indexName, TimeValue cursorKeepAlive, OpenSearchClient client) { + // Create PIT ID for request + CreatePitRequest createPitRequest = + new CreatePitRequest(cursorKeepAlive, false, indexName.getIndexNames()); + return client.createPit(createPitRequest); + } + boolean isBoolFilterQuery(QueryBuilder current) { return (current instanceof BoolQueryBuilder); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchScrollRequest.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchScrollRequest.java index c9490f0767..d793b53fca 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchScrollRequest.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchScrollRequest.java @@ -178,6 +178,8 @@ public OpenSearchScrollRequest(StreamInput in, OpenSearchStorageEngine engine) includes = in.readStringList(); indexName = new IndexName(in); OpenSearchIndex index = (OpenSearchIndex) engine.getTable(null, indexName.toString()); - exprValueFactory = new OpenSearchExprValueFactory(index.getFieldOpenSearchTypes()); + exprValueFactory = + new OpenSearchExprValueFactory( + index.getFieldOpenSearchTypes(), index.isFieldTypeTolerance()); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessage.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessage.java index bbcacc1d2c..fbe6d3cd72 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessage.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/ErrorMessage.java @@ -12,7 +12,7 @@ /** Error Message. */ public class ErrorMessage { - protected Throwable exception; + protected final Throwable exception; private final int status; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/OpenSearchErrorMessage.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/OpenSearchErrorMessage.java index 87a374d353..a712ceaedf 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/OpenSearchErrorMessage.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/error/OpenSearchErrorMessage.java @@ -13,8 +13,8 @@ /** OpenSearch Error Message. */ public class OpenSearchErrorMessage extends ErrorMessage { - OpenSearchErrorMessage(OpenSearchException exception, int status) { - super(exception, status); + OpenSearchErrorMessage(OpenSearchException exception, int defaultStatus) { + super(exception, exception.status() != null ? exception.status().getStatus() : defaultStatus); } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java index c493aa46e5..612771eea4 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java @@ -28,6 +28,7 @@ import org.opensearch.common.settings.Setting; import org.opensearch.common.unit.MemorySizeValue; import org.opensearch.common.unit.TimeValue; +import org.opensearch.index.IndexSettings; import org.opensearch.sql.common.setting.LegacySettings; import org.opensearch.sql.common.setting.Settings; @@ -70,6 +71,13 @@ public class OpenSearchSettings extends Settings { Setting.Property.NodeScope, Setting.Property.Dynamic); + public static final Setting SQL_PAGINATION_API_SEARCH_AFTER_SETTING = + Setting.boolSetting( + Key.SQL_PAGINATION_API_SEARCH_AFTER.getKeyValue(), + true, + Setting.Property.NodeScope, + Setting.Property.Dynamic); + public static final Setting PPL_ENABLED_SETTING = Setting.boolSetting( Key.PPL_ENABLED.getKeyValue(), @@ -90,7 +98,7 @@ public class OpenSearchSettings extends Settings { public static final Setting QUERY_SIZE_LIMIT_SETTING = Setting.intSetting( Key.QUERY_SIZE_LIMIT.getKeyValue(), - LegacyOpenDistroSettings.QUERY_SIZE_LIMIT_SETTING, + IndexSettings.MAX_RESULT_WINDOW_SETTING, 0, Setting.Property.NodeScope, Setting.Property.Dynamic); @@ -132,6 +140,13 @@ public class OpenSearchSettings extends Settings { Setting.Property.NodeScope, Setting.Property.Dynamic); + public static final Setting DATASOURCE_ENABLED_SETTING = + Setting.boolSetting( + Key.DATASOURCES_ENABLED.getKeyValue(), + true, + Setting.Property.NodeScope, + Setting.Property.Dynamic); + public static final Setting ASYNC_QUERY_ENABLED_SETTING = Setting.boolSetting( Key.ASYNC_QUERY_ENABLED.getKeyValue(), @@ -139,6 +154,19 @@ public class OpenSearchSettings extends Settings { Setting.Property.NodeScope, Setting.Property.Dynamic); + public static final Setting ASYNC_QUERY_EXTERNAL_SCHEDULER_ENABLED_SETTING = + Setting.boolSetting( + Key.ASYNC_QUERY_EXTERNAL_SCHEDULER_ENABLED.getKeyValue(), + true, + Setting.Property.NodeScope, + Setting.Property.Dynamic); + + public static final Setting ASYNC_QUERY_EXTERNAL_SCHEDULER_INTERVAL_SETTING = + Setting.simpleString( + Key.ASYNC_QUERY_EXTERNAL_SCHEDULER_INTERVAL.getKeyValue(), + Setting.Property.NodeScope, + Setting.Property.Dynamic); + public static final Setting SPARK_EXECUTION_ENGINE_CONFIG = Setting.simpleString( Key.SPARK_EXECUTION_ENGINE_CONFIG.getKeyValue(), @@ -201,6 +229,13 @@ public class OpenSearchSettings extends Settings { Setting.Property.NodeScope, Setting.Property.Dynamic); + public static final Setting FIELD_TYPE_TOLERANCE_SETTING = + Setting.boolSetting( + Key.FIELD_TYPE_TOLERANCE.getKeyValue(), + true, + Setting.Property.NodeScope, + Setting.Property.Dynamic); + /** Construct OpenSearchSetting. The OpenSearchSetting must be singleton. */ @SuppressWarnings("unchecked") public OpenSearchSettings(ClusterSettings clusterSettings) { @@ -229,6 +264,12 @@ public OpenSearchSettings(ClusterSettings clusterSettings) { Key.SQL_DELETE_ENABLED, SQL_DELETE_ENABLED_SETTING, new Updater(Key.SQL_DELETE_ENABLED)); + register( + settingBuilder, + clusterSettings, + Key.SQL_PAGINATION_API_SEARCH_AFTER, + SQL_PAGINATION_API_SEARCH_AFTER_SETTING, + new Updater(Key.SQL_PAGINATION_API_SEARCH_AFTER)); register( settingBuilder, clusterSettings, @@ -265,12 +306,30 @@ public OpenSearchSettings(ClusterSettings clusterSettings) { Key.DATASOURCES_URI_HOSTS_DENY_LIST, DATASOURCE_URI_HOSTS_DENY_LIST, new Updater(Key.DATASOURCES_URI_HOSTS_DENY_LIST)); + register( + settingBuilder, + clusterSettings, + Key.DATASOURCES_ENABLED, + DATASOURCE_ENABLED_SETTING, + new Updater(Key.DATASOURCES_ENABLED)); register( settingBuilder, clusterSettings, Key.ASYNC_QUERY_ENABLED, ASYNC_QUERY_ENABLED_SETTING, new Updater(Key.ASYNC_QUERY_ENABLED)); + register( + settingBuilder, + clusterSettings, + Key.ASYNC_QUERY_EXTERNAL_SCHEDULER_ENABLED, + ASYNC_QUERY_EXTERNAL_SCHEDULER_ENABLED_SETTING, + new Updater(Key.ASYNC_QUERY_EXTERNAL_SCHEDULER_ENABLED)); + register( + settingBuilder, + clusterSettings, + Key.ASYNC_QUERY_EXTERNAL_SCHEDULER_INTERVAL, + ASYNC_QUERY_EXTERNAL_SCHEDULER_INTERVAL_SETTING, + new Updater(Key.ASYNC_QUERY_EXTERNAL_SCHEDULER_INTERVAL)); register( settingBuilder, clusterSettings, @@ -320,13 +379,19 @@ public OpenSearchSettings(ClusterSettings clusterSettings) { clusterSettings, Key.SESSION_INACTIVITY_TIMEOUT_MILLIS, SESSION_INACTIVITY_TIMEOUT_MILLIS_SETTING, - new Updater((Key.SESSION_INACTIVITY_TIMEOUT_MILLIS))); + new Updater(Key.SESSION_INACTIVITY_TIMEOUT_MILLIS)); register( settingBuilder, clusterSettings, Key.STREAMING_JOB_HOUSEKEEPER_INTERVAL, STREAMING_JOB_HOUSEKEEPER_INTERVAL_SETTING, - new Updater((Key.STREAMING_JOB_HOUSEKEEPER_INTERVAL))); + new Updater(Key.STREAMING_JOB_HOUSEKEEPER_INTERVAL)); + register( + settingBuilder, + clusterSettings, + Key.FIELD_TYPE_TOLERANCE, + FIELD_TYPE_TOLERANCE_SETTING, + new Updater(Key.FIELD_TYPE_TOLERANCE)); defaultSettings = settingBuilder.build(); } @@ -383,13 +448,17 @@ public static List> pluginSettings() { .add(SQL_SLOWLOG_SETTING) .add(SQL_CURSOR_KEEP_ALIVE_SETTING) .add(SQL_DELETE_ENABLED_SETTING) + .add(SQL_PAGINATION_API_SEARCH_AFTER_SETTING) .add(PPL_ENABLED_SETTING) .add(QUERY_MEMORY_LIMIT_SETTING) .add(QUERY_SIZE_LIMIT_SETTING) .add(METRICS_ROLLING_WINDOW_SETTING) .add(METRICS_ROLLING_INTERVAL_SETTING) .add(DATASOURCE_URI_HOSTS_DENY_LIST) + .add(DATASOURCE_ENABLED_SETTING) .add(ASYNC_QUERY_ENABLED_SETTING) + .add(ASYNC_QUERY_EXTERNAL_SCHEDULER_ENABLED_SETTING) + .add(ASYNC_QUERY_EXTERNAL_SCHEDULER_INTERVAL_SETTING) .add(SPARK_EXECUTION_ENGINE_CONFIG) .add(SPARK_EXECUTION_SESSION_LIMIT_SETTING) .add(SPARK_EXECUTION_REFRESH_JOB_LIMIT_SETTING) @@ -399,6 +468,7 @@ public static List> pluginSettings() { .add(DATASOURCES_LIMIT_SETTING) .add(SESSION_INACTIVITY_TIMEOUT_MILLIS_SETTING) .add(STREAMING_JOB_HOUSEKEEPER_INTERVAL_SETTING) + .add(FIELD_TYPE_TOLERANCE_SETTING) .build(); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java index c6afdb8511..b8822cd1e8 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java @@ -163,13 +163,13 @@ public TableScanBuilder createScanBuilder() { final int querySizeLimit = settings.getSettingValue(Settings.Key.QUERY_SIZE_LIMIT); final TimeValue cursorKeepAlive = settings.getSettingValue(Settings.Key.SQL_CURSOR_KEEP_ALIVE); - var builder = new OpenSearchRequestBuilder(querySizeLimit, createExprValueFactory()); + var builder = new OpenSearchRequestBuilder(querySizeLimit, createExprValueFactory(), settings); Function createScanOperator = requestBuilder -> new OpenSearchIndexScan( client, requestBuilder.getMaxResponseSize(), - requestBuilder.build(indexName, getMaxResultWindow(), cursorKeepAlive)); + requestBuilder.build(indexName, getMaxResultWindow(), cursorKeepAlive, client)); return new OpenSearchIndexScanBuilder(builder, createScanOperator); } @@ -177,7 +177,12 @@ private OpenSearchExprValueFactory createExprValueFactory() { Map allFields = new HashMap<>(); getReservedFieldTypes().forEach((k, v) -> allFields.put(k, OpenSearchDataType.of(v))); allFields.putAll(getFieldOpenSearchTypes()); - return new OpenSearchExprValueFactory(allFields); + return new OpenSearchExprValueFactory( + allFields, settings.getSettingValue(Settings.Key.FIELD_TYPE_TOLERANCE)); + } + + public boolean isFieldTypeTolerance() { + return settings.getSettingValue(Settings.Key.FIELD_TYPE_TOLERANCE); } @VisibleForTesting diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScan.java index b1e4ccc463..74cbd1f167 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScan.java @@ -14,10 +14,12 @@ import lombok.ToString; import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.core.common.io.stream.BytesStreamInput; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.exception.NoCursorException; import org.opensearch.sql.executor.pagination.PlanSerializer; import org.opensearch.sql.opensearch.client.OpenSearchClient; +import org.opensearch.sql.opensearch.request.OpenSearchQueryRequest; import org.opensearch.sql.opensearch.request.OpenSearchRequest; import org.opensearch.sql.opensearch.request.OpenSearchScrollRequest; import org.opensearch.sql.opensearch.response.OpenSearchResponse; @@ -121,12 +123,18 @@ public void readExternal(ObjectInput in) throws IOException { (OpenSearchStorageEngine) ((PlanSerializer.CursorDeserializationStream) in).resolveObject("engine"); + client = engine.getClient(); + boolean pointInTimeEnabled = + Boolean.parseBoolean( + client.meta().get(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER.getKeyValue())); try (BytesStreamInput bsi = new BytesStreamInput(requestStream)) { - request = new OpenSearchScrollRequest(bsi, engine); + if (pointInTimeEnabled) { + request = new OpenSearchQueryRequest(bsi, engine); + } else { + request = new OpenSearchScrollRequest(bsi, engine); + } } maxResponseSize = in.readInt(); - - client = engine.getClient(); } @Override diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java index f4b0b05256..08567b78c1 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanQueryBuilder.java @@ -42,7 +42,7 @@ @EqualsAndHashCode class OpenSearchIndexScanQueryBuilder implements PushDownQueryBuilder { - OpenSearchRequestBuilder requestBuilder; + final OpenSearchRequestBuilder requestBuilder; public OpenSearchIndexScanQueryBuilder(OpenSearchRequestBuilder requestBuilder) { this.requestBuilder = requestBuilder; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java index ff66ec425a..4488128b97 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilder.java @@ -23,6 +23,7 @@ import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.span.SpanExpression; +import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; import org.opensearch.sql.opensearch.storage.serialization.ExpressionSerializer; /** Bucket Aggregation Builder. */ @@ -65,7 +66,10 @@ private CompositeValuesSourceBuilder buildCompositeValuesSourceBuilder( .missingOrder(missingOrder) .order(sortOrder); // Time types values are converted to LONG in ExpressionAggregationScript::execute - if (List.of(TIMESTAMP, TIME, DATE).contains(expr.getDelegated().type())) { + if ((expr.getDelegated().type() instanceof OpenSearchDateType + && List.of(TIMESTAMP, TIME, DATE) + .contains(((OpenSearchDateType) expr.getDelegated().type()).getExprCoreType())) + || List.of(TIMESTAMP, TIME, DATE).contains(expr.getDelegated().type())) { sourceBuilder.userValuetypeHint(ValueType.LONG); } return helper.build(expr.getDelegated(), sourceBuilder::field, sourceBuilder::script); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/core/ExpressionScript.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/core/ExpressionScript.java index 3a9ff02ba0..460a9b4567 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/core/ExpressionScript.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/core/ExpressionScript.java @@ -102,7 +102,7 @@ private OpenSearchExprValueFactory buildValueFactory(Set fi Map typeEnv = fields.stream() .collect(toMap(ReferenceExpression::getAttr, e -> OpenSearchDataType.of(e.type()))); - return new OpenSearchExprValueFactory(typeEnv); + return new OpenSearchExprValueFactory(typeEnv, false); } private Environment buildValueEnv( diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQuery.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQuery.java index 11533c754e..26ef56e576 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQuery.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQuery.java @@ -8,8 +8,9 @@ import static org.opensearch.sql.analysis.NestedAnalyzer.isNestedFunction; import com.google.common.collect.ImmutableMap; +import java.time.ZonedDateTime; import java.util.Map; -import java.util.function.Function; +import java.util.function.BiFunction; import org.opensearch.index.query.QueryBuilder; import org.opensearch.sql.data.model.ExprBooleanValue; import org.opensearch.sql.data.model.ExprByteValue; @@ -17,6 +18,7 @@ import org.opensearch.sql.data.model.ExprDoubleValue; import org.opensearch.sql.data.model.ExprFloatValue; import org.opensearch.sql.data.model.ExprIntegerValue; +import org.opensearch.sql.data.model.ExprIpValue; import org.opensearch.sql.data.model.ExprLongValue; import org.opensearch.sql.data.model.ExprShortValue; import org.opensearch.sql.data.model.ExprStringValue; @@ -32,6 +34,7 @@ import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.FunctionName; +import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; /** Lucene query abstraction that builds Lucene query from function expression. */ public abstract class LuceneQuery { @@ -105,135 +108,169 @@ public QueryBuilder build(FunctionExpression func) { ReferenceExpression ref = (ReferenceExpression) func.getArguments().get(0); Expression expr = func.getArguments().get(1); ExprValue literalValue = - expr instanceof LiteralExpression ? expr.valueOf() : cast((FunctionExpression) expr); + expr instanceof LiteralExpression ? expr.valueOf() : cast((FunctionExpression) expr, ref); return doBuild(ref.getAttr(), ref.type(), literalValue); } - private ExprValue cast(FunctionExpression castFunction) { + private ExprValue cast(FunctionExpression castFunction, ReferenceExpression ref) { return castMap .get(castFunction.getFunctionName()) - .apply((LiteralExpression) castFunction.getArguments().get(0)); + .apply((LiteralExpression) castFunction.getArguments().get(0), ref); } /** Type converting map. */ - private final Map> castMap = - ImmutableMap.>builder() - .put( - BuiltinFunctionName.CAST_TO_STRING.getName(), - expr -> { - if (!expr.type().equals(ExprCoreType.STRING)) { - return new ExprStringValue(String.valueOf(expr.valueOf().value())); - } else { - return expr.valueOf(); - } - }) - .put( - BuiltinFunctionName.CAST_TO_BYTE.getName(), - expr -> { - if (ExprCoreType.numberTypes().contains(expr.type())) { - return new ExprByteValue(expr.valueOf().byteValue()); - } else if (expr.type().equals(ExprCoreType.BOOLEAN)) { - return new ExprByteValue(expr.valueOf().booleanValue() ? 1 : 0); - } else { - return new ExprByteValue(Byte.valueOf(expr.valueOf().stringValue())); - } - }) - .put( - BuiltinFunctionName.CAST_TO_SHORT.getName(), - expr -> { - if (ExprCoreType.numberTypes().contains(expr.type())) { - return new ExprShortValue(expr.valueOf().shortValue()); - } else if (expr.type().equals(ExprCoreType.BOOLEAN)) { - return new ExprShortValue(expr.valueOf().booleanValue() ? 1 : 0); - } else { - return new ExprShortValue(Short.valueOf(expr.valueOf().stringValue())); - } - }) - .put( - BuiltinFunctionName.CAST_TO_INT.getName(), - expr -> { - if (ExprCoreType.numberTypes().contains(expr.type())) { - return new ExprIntegerValue(expr.valueOf().integerValue()); - } else if (expr.type().equals(ExprCoreType.BOOLEAN)) { - return new ExprIntegerValue(expr.valueOf().booleanValue() ? 1 : 0); - } else { - return new ExprIntegerValue(Integer.valueOf(expr.valueOf().stringValue())); - } - }) - .put( - BuiltinFunctionName.CAST_TO_LONG.getName(), - expr -> { - if (ExprCoreType.numberTypes().contains(expr.type())) { - return new ExprLongValue(expr.valueOf().longValue()); - } else if (expr.type().equals(ExprCoreType.BOOLEAN)) { - return new ExprLongValue(expr.valueOf().booleanValue() ? 1 : 0); - } else { - return new ExprLongValue(Long.valueOf(expr.valueOf().stringValue())); - } - }) - .put( - BuiltinFunctionName.CAST_TO_FLOAT.getName(), - expr -> { - if (ExprCoreType.numberTypes().contains(expr.type())) { - return new ExprFloatValue(expr.valueOf().floatValue()); - } else if (expr.type().equals(ExprCoreType.BOOLEAN)) { - return new ExprFloatValue(expr.valueOf().booleanValue() ? 1 : 0); - } else { - return new ExprFloatValue(Float.valueOf(expr.valueOf().stringValue())); - } - }) - .put( - BuiltinFunctionName.CAST_TO_DOUBLE.getName(), - expr -> { - if (ExprCoreType.numberTypes().contains(expr.type())) { - return new ExprDoubleValue(expr.valueOf().doubleValue()); - } else if (expr.type().equals(ExprCoreType.BOOLEAN)) { - return new ExprDoubleValue(expr.valueOf().booleanValue() ? 1 : 0); - } else { - return new ExprDoubleValue(Double.valueOf(expr.valueOf().stringValue())); - } - }) - .put( - BuiltinFunctionName.CAST_TO_BOOLEAN.getName(), - expr -> { - if (ExprCoreType.numberTypes().contains(expr.type())) { - return expr.valueOf().doubleValue() != 0 - ? ExprBooleanValue.of(true) - : ExprBooleanValue.of(false); - } else if (expr.type().equals(ExprCoreType.STRING)) { - return ExprBooleanValue.of(Boolean.valueOf(expr.valueOf().stringValue())); - } else { - return expr.valueOf(); - } - }) - .put( - BuiltinFunctionName.CAST_TO_DATE.getName(), - expr -> { - if (expr.type().equals(ExprCoreType.STRING)) { - return new ExprDateValue(expr.valueOf().stringValue()); - } else { - return new ExprDateValue(expr.valueOf().dateValue()); - } - }) - .put( - BuiltinFunctionName.CAST_TO_TIME.getName(), - expr -> { - if (expr.type().equals(ExprCoreType.STRING)) { - return new ExprTimeValue(expr.valueOf().stringValue()); - } else { - return new ExprTimeValue(expr.valueOf().timeValue()); - } - }) - .put( - BuiltinFunctionName.CAST_TO_TIMESTAMP.getName(), - expr -> { - if (expr.type().equals(ExprCoreType.STRING)) { - return new ExprTimestampValue(expr.valueOf().stringValue()); - } else { - return new ExprTimestampValue(expr.valueOf().timestampValue()); - } - }) - .build(); + private final Map> + castMap = + ImmutableMap + .> + builder() + .put( + BuiltinFunctionName.CAST_TO_STRING.getName(), + (expr, ref) -> { + if (!expr.type().equals(ExprCoreType.STRING)) { + return new ExprStringValue(String.valueOf(expr.valueOf().value())); + } else { + return expr.valueOf(); + } + }) + .put( + BuiltinFunctionName.CAST_TO_BYTE.getName(), + (expr, ref) -> { + if (ExprCoreType.numberTypes().contains(expr.type())) { + return new ExprByteValue(expr.valueOf().byteValue()); + } else if (expr.type().equals(ExprCoreType.BOOLEAN)) { + return new ExprByteValue(expr.valueOf().booleanValue() ? 1 : 0); + } else { + return new ExprByteValue(Byte.valueOf(expr.valueOf().stringValue())); + } + }) + .put( + BuiltinFunctionName.CAST_TO_SHORT.getName(), + (expr, ref) -> { + if (ExprCoreType.numberTypes().contains(expr.type())) { + return new ExprShortValue(expr.valueOf().shortValue()); + } else if (expr.type().equals(ExprCoreType.BOOLEAN)) { + return new ExprShortValue(expr.valueOf().booleanValue() ? 1 : 0); + } else { + return new ExprShortValue(Short.valueOf(expr.valueOf().stringValue())); + } + }) + .put( + BuiltinFunctionName.CAST_TO_INT.getName(), + (expr, ref) -> { + if (ExprCoreType.numberTypes().contains(expr.type())) { + return new ExprIntegerValue(expr.valueOf().integerValue()); + } else if (expr.type().equals(ExprCoreType.BOOLEAN)) { + return new ExprIntegerValue(expr.valueOf().booleanValue() ? 1 : 0); + } else { + return new ExprIntegerValue(Integer.valueOf(expr.valueOf().stringValue())); + } + }) + .put( + BuiltinFunctionName.CAST_TO_LONG.getName(), + (expr, ref) -> { + if (ExprCoreType.numberTypes().contains(expr.type())) { + return new ExprLongValue(expr.valueOf().longValue()); + } else if (expr.type().equals(ExprCoreType.BOOLEAN)) { + return new ExprLongValue(expr.valueOf().booleanValue() ? 1 : 0); + } else { + return new ExprLongValue(Long.valueOf(expr.valueOf().stringValue())); + } + }) + .put( + BuiltinFunctionName.CAST_TO_FLOAT.getName(), + (expr, ref) -> { + if (ExprCoreType.numberTypes().contains(expr.type())) { + return new ExprFloatValue(expr.valueOf().floatValue()); + } else if (expr.type().equals(ExprCoreType.BOOLEAN)) { + return new ExprFloatValue(expr.valueOf().booleanValue() ? 1 : 0); + } else { + return new ExprFloatValue(Float.valueOf(expr.valueOf().stringValue())); + } + }) + .put( + BuiltinFunctionName.CAST_TO_DOUBLE.getName(), + (expr, ref) -> { + if (ExprCoreType.numberTypes().contains(expr.type())) { + return new ExprDoubleValue(expr.valueOf().doubleValue()); + } else if (expr.type().equals(ExprCoreType.BOOLEAN)) { + return new ExprDoubleValue(expr.valueOf().booleanValue() ? 1 : 0); + } else { + return new ExprDoubleValue(Double.valueOf(expr.valueOf().stringValue())); + } + }) + .put( + BuiltinFunctionName.CAST_TO_BOOLEAN.getName(), + (expr, ref) -> { + if (ExprCoreType.numberTypes().contains(expr.type())) { + return expr.valueOf().doubleValue() != 0 + ? ExprBooleanValue.of(true) + : ExprBooleanValue.of(false); + } else if (expr.type().equals(ExprCoreType.STRING)) { + return ExprBooleanValue.of(Boolean.valueOf(expr.valueOf().stringValue())); + } else { + return expr.valueOf(); + } + }) + .put( + BuiltinFunctionName.CAST_TO_IP.getName(), + (expr, ref) -> { + return new ExprIpValue(expr.valueOf().stringValue()); + }) + .put( + BuiltinFunctionName.CAST_TO_DATE.getName(), + (expr, ref) -> { + if (expr.type().equals(ExprCoreType.STRING)) { + ZonedDateTime zonedDateTime = getParsedDateTime(expr, ref); + if (zonedDateTime != null) { + return new ExprDateValue(zonedDateTime.toLocalDate()); + } + return new ExprDateValue(expr.valueOf().stringValue()); + } else { + return new ExprDateValue(expr.valueOf().dateValue()); + } + }) + .put( + BuiltinFunctionName.CAST_TO_TIME.getName(), + (expr, ref) -> { + if (expr.type().equals(ExprCoreType.STRING)) { + ZonedDateTime zonedDateTime = getParsedDateTime(expr, ref); + if (zonedDateTime != null) { + return new ExprTimeValue(zonedDateTime.toLocalTime()); + } + return new ExprTimeValue(expr.valueOf().stringValue()); + } else { + return new ExprTimeValue(expr.valueOf().timeValue()); + } + }) + .put( + BuiltinFunctionName.CAST_TO_TIMESTAMP.getName(), + (expr, ref) -> { + if (expr.type().equals(ExprCoreType.STRING)) { + ZonedDateTime zonedDateTime = getParsedDateTime(expr, ref); + if (zonedDateTime != null) { + return new ExprTimestampValue(zonedDateTime.toInstant()); + } + return new ExprTimestampValue(expr.valueOf().stringValue()); + } else { + return new ExprTimestampValue(expr.valueOf().timestampValue()); + } + }) + .build(); + + /** + * Parses the date/time from the given expression if the reference type is an instance of + * OpenSearchDateType. + * + * @param expr The expression to parse. + * @return The parsed ZonedDateTime or null if the conditions are not met. + */ + private ZonedDateTime getParsedDateTime(LiteralExpression expr, ReferenceExpression ref) { + if (ref.type() instanceof OpenSearchDateType) { + return ((OpenSearchDateType) ref.type()).getParsedDateTime(expr.valueOf().stringValue()); + } + return null; + } /** * Build method that subclass implements by default which is to build query from reference and @@ -248,4 +285,36 @@ protected QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue l throw new UnsupportedOperationException( "Subclass doesn't implement this and build method either"); } + + /** + * Converts a literal value to a formatted date or time value based on the specified field type. + * + *

If the field type is an instance of {@link OpenSearchDateType}, this method checks the type + * of the literal value and converts it to a formatted date or time if necessary. The formatting + * is applied if the {@link OpenSearchDateType} has a formatter. Otherwise, the raw value is + * returned. + * + * @param literal the literal value to be converted + * @param fieldType the field type to determine the conversion logic + * @return the formatted date or time value if the field type requires it, otherwise the raw value + */ + protected Object value(ExprValue literal, ExprType fieldType) { + if (fieldType instanceof OpenSearchDateType) { + OpenSearchDateType openSearchDateType = (OpenSearchDateType) fieldType; + if (literal.type().equals(ExprCoreType.TIMESTAMP)) { + return openSearchDateType.hasNoFormatter() + ? literal.timestampValue().toEpochMilli() + : openSearchDateType.getFormattedDate(literal.timestampValue()); + } else if (literal.type().equals(ExprCoreType.DATE)) { + return openSearchDateType.hasNoFormatter() + ? literal.value() + : openSearchDateType.getFormattedDate(literal.dateValue()); + } else if (literal.type().equals(ExprCoreType.TIME)) { + return openSearchDateType.hasNoFormatter() + ? literal.value() + : openSearchDateType.getFormattedDate(literal.timeValue()); + } + } + return literal.value(); + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/RangeQuery.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/RangeQuery.java index 2e33e3cc7c..e9a38b6ee3 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/RangeQuery.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/RangeQuery.java @@ -10,7 +10,6 @@ import org.opensearch.index.query.QueryBuilders; import org.opensearch.index.query.RangeQueryBuilder; import org.opensearch.sql.data.model.ExprValue; -import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; /** Lucene query that builds range query for non-quality comparison. */ @@ -30,7 +29,7 @@ public enum Comparison { @Override protected QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue literal) { - Object value = value(literal); + Object value = this.value(literal, fieldType); RangeQueryBuilder query = QueryBuilders.rangeQuery(fieldName); switch (comparison) { @@ -46,12 +45,4 @@ protected QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue l throw new IllegalStateException("Comparison is supported by range query: " + comparison); } } - - private Object value(ExprValue literal) { - if (literal.type().equals(ExprCoreType.TIMESTAMP)) { - return literal.timestampValue().toEpochMilli(); - } else { - return literal.value(); - } - } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/TermQuery.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/TermQuery.java index cd506898d7..f8988b3cd9 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/TermQuery.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/TermQuery.java @@ -8,7 +8,6 @@ import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; import org.opensearch.sql.data.model.ExprValue; -import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; @@ -18,14 +17,6 @@ public class TermQuery extends LuceneQuery { @Override protected QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue literal) { fieldName = OpenSearchTextType.convertTextToKeyword(fieldName, fieldType); - return QueryBuilders.termQuery(fieldName, value(literal)); - } - - private Object value(ExprValue literal) { - if (literal.type().equals(ExprCoreType.TIMESTAMP)) { - return literal.timestampValue().toEpochMilli(); - } else { - return literal.value(); - } + return QueryBuilders.termQuery(fieldName, this.value(literal, fieldType)); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/SortQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/SortQueryBuilder.java index 7669b569d4..54e3477410 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/SortQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/sort/SortQueryBuilder.java @@ -25,14 +25,14 @@ public class SortQueryBuilder { /** The mapping between Core Engine sort order and OpenSearch sort order. */ - private Map sortOrderMap = + private final Map sortOrderMap = new ImmutableMap.Builder() .put(Sort.SortOrder.ASC, SortOrder.ASC) .put(Sort.SortOrder.DESC, SortOrder.DESC) .build(); /** The mapping between Core Engine null order and OpenSearch null order. */ - private Map missingMap = + private final Map missingMap = new ImmutableMap.Builder() .put(Sort.NullOrder.NULL_FIRST, "_first") .put(Sort.NullOrder.NULL_LAST, "_last") diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/util/RestRequestUtil.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/RestRequestUtil.java new file mode 100644 index 0000000000..e02bcf5af9 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/util/RestRequestUtil.java @@ -0,0 +1,25 @@ +package org.opensearch.sql.opensearch.util; + +import lombok.NonNull; +import org.opensearch.client.node.NodeClient; +import org.opensearch.rest.RestChannel; +import org.opensearch.rest.RestRequest; + +/** RestRequestUtil is a utility class for common operations on OpenSearch RestRequest's. */ +public class RestRequestUtil { + + private RestRequestUtil() { + // utility class + } + + /** + * Utility method for consuming all the request parameters. Doing this will ensure that the + * BaseRestHandler doesn't fail the request with an unconsumed parameter exception. + * + * @see org.opensearch.rest.BaseRestHandler#handleRequest(RestRequest, RestChannel, NodeClient) + * @param request - The request to consume all parameters on + */ + public static void consumeAllRequestParameters(@NonNull RestRequest request) { + request.params().keySet().forEach(request::param); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java index 040b7d2759..73c4f0e7f8 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchNodeClientTest.java @@ -13,9 +13,7 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Answers.RETURNS_DEEP_STUBS; -import static org.mockito.ArgumentMatchers.anyBoolean; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.any; +import static org.mockito.ArgumentMatchers.*; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; @@ -31,6 +29,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicBoolean; import lombok.SneakyThrows; import org.apache.commons.lang3.reflect.FieldUtils; @@ -51,15 +50,16 @@ import org.opensearch.action.admin.indices.get.GetIndexResponse; import org.opensearch.action.admin.indices.mapping.get.GetMappingsResponse; import org.opensearch.action.admin.indices.settings.get.GetSettingsResponse; -import org.opensearch.action.search.ClearScrollRequestBuilder; -import org.opensearch.action.search.SearchResponse; +import org.opensearch.action.search.*; import org.opensearch.client.node.NodeClient; import org.opensearch.cluster.metadata.AliasMetadata; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.MappingMetadata; +import org.opensearch.common.action.ActionFuture; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.common.Strings; import org.opensearch.core.xcontent.DeprecationHandler; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; @@ -74,6 +74,7 @@ import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory; import org.opensearch.sql.opensearch.mapping.IndexMapping; +import org.opensearch.sql.opensearch.request.OpenSearchQueryRequest; import org.opensearch.sql.opensearch.request.OpenSearchRequest; import org.opensearch.sql.opensearch.request.OpenSearchScrollRequest; import org.opensearch.sql.opensearch.response.OpenSearchResponse; @@ -169,7 +170,7 @@ void get_index_mappings() throws IOException { () -> assertEquals(OpenSearchTextType.of(MappingType.Double), parsedTypes.get("balance")), () -> assertEquals("KEYWORD", mapping.get("city").legacyTypeName()), () -> assertEquals(OpenSearchTextType.of(MappingType.Keyword), parsedTypes.get("city")), - () -> assertEquals("DATE", mapping.get("birthday").legacyTypeName()), + () -> assertEquals("TIMESTAMP", mapping.get("birthday").legacyTypeName()), () -> assertEquals(OpenSearchTextType.of(MappingType.Date), parsedTypes.get("birthday")), () -> assertEquals("GEO_POINT", mapping.get("location").legacyTypeName()), () -> @@ -295,7 +296,6 @@ void search() { new SearchHits( new SearchHit[] {searchHit}, new TotalHits(1L, TotalHits.Relation.EQUAL_TO), 1.0F)); when(searchHit.getSourceAsString()).thenReturn("{\"id\", 1}"); - when(searchHit.getInnerHits()).thenReturn(null); when(factory.construct(any(), anyBoolean())).thenReturn(exprTupleValue); // Mock second scroll request followed @@ -393,6 +393,65 @@ void cleanup_rethrows_exception() { assertThrows(IllegalStateException.class, () -> client.cleanup(request)); } + @Test + @SneakyThrows + void cleanup_pit_request() { + OpenSearchQueryRequest request = + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + new SearchSourceBuilder(), + factory, + List.of(), + TimeValue.timeValueMinutes(1L), + "samplePitId"); + // Enforce cleaning by setting a private field. + FieldUtils.writeField(request, "needClean", true, true); + client.cleanup(request); + verify(nodeClient).execute(any(), any()); + } + + @Test + @SneakyThrows + void cleanup_pit_request_throw_exception() { + DeletePitRequest deletePitRequest = new DeletePitRequest("samplePitId"); + ActionFuture actionFuture = mock(ActionFuture.class); + when(actionFuture.get()).thenThrow(new ExecutionException("Execution failed", new Throwable())); + when(nodeClient.execute(eq(DeletePitAction.INSTANCE), any(DeletePitRequest.class))) + .thenReturn(actionFuture); + assertThrows(RuntimeException.class, () -> client.deletePit(deletePitRequest)); + } + + @Test + @SneakyThrows + void create_pit() { + CreatePitRequest createPitRequest = + new CreatePitRequest(TimeValue.timeValueMinutes(5), false, Strings.EMPTY_ARRAY); + ActionFuture actionFuture = mock(ActionFuture.class); + CreatePitResponse createPitResponse = mock(CreatePitResponse.class); + when(createPitResponse.getId()).thenReturn("samplePitId"); + when(actionFuture.get()).thenReturn(createPitResponse); + when(nodeClient.execute(eq(CreatePitAction.INSTANCE), any(CreatePitRequest.class))) + .thenReturn(actionFuture); + + String pitId = client.createPit(createPitRequest); + assertEquals("samplePitId", pitId); + + verify(nodeClient).execute(CreatePitAction.INSTANCE, createPitRequest); + verify(actionFuture).get(); + } + + @Test + @SneakyThrows + void create_pit_request_throw_exception() { + CreatePitRequest createPitRequest = + new CreatePitRequest(TimeValue.timeValueMinutes(5), false, Strings.EMPTY_ARRAY); + ActionFuture actionFuture = mock(ActionFuture.class); + when(actionFuture.get()).thenThrow(new ExecutionException("Execution failed", new Throwable())); + when(nodeClient.execute(eq(CreatePitAction.INSTANCE), any(CreatePitRequest.class))) + .thenReturn(actionFuture); + assertThrows(RuntimeException.class, () -> client.createPit(createPitRequest)); + } + @Test void get_indices() { AliasMetadata aliasMetadata = mock(AliasMetadata.class); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java index 99201aae4f..eb2355a36b 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/client/OpenSearchRestClientTest.java @@ -43,6 +43,8 @@ import org.opensearch.action.admin.cluster.settings.ClusterGetSettingsResponse; import org.opensearch.action.admin.indices.settings.get.GetSettingsRequest; import org.opensearch.action.admin.indices.settings.get.GetSettingsResponse; +import org.opensearch.action.search.CreatePitRequest; +import org.opensearch.action.search.CreatePitResponse; import org.opensearch.action.search.SearchResponse; import org.opensearch.client.RequestOptions; import org.opensearch.client.RestHighLevelClient; @@ -56,6 +58,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.common.Strings; import org.opensearch.core.xcontent.DeprecationHandler; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; @@ -69,6 +72,7 @@ import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory; import org.opensearch.sql.opensearch.mapping.IndexMapping; +import org.opensearch.sql.opensearch.request.OpenSearchQueryRequest; import org.opensearch.sql.opensearch.request.OpenSearchRequest; import org.opensearch.sql.opensearch.request.OpenSearchScrollRequest; import org.opensearch.sql.opensearch.response.OpenSearchResponse; @@ -169,7 +173,7 @@ void get_index_mappings() throws IOException { () -> assertEquals(OpenSearchTextType.of(MappingType.Double), parsedTypes.get("balance")), () -> assertEquals("KEYWORD", mapping.get("city").legacyTypeName()), () -> assertEquals(OpenSearchTextType.of(MappingType.Keyword), parsedTypes.get("city")), - () -> assertEquals("DATE", mapping.get("birthday").legacyTypeName()), + () -> assertEquals("TIMESTAMP", mapping.get("birthday").legacyTypeName()), () -> assertEquals(OpenSearchTextType.of(MappingType.Date), parsedTypes.get("birthday")), () -> assertEquals("GEO_POINT", mapping.get("location").legacyTypeName()), () -> @@ -282,7 +286,6 @@ void search() throws IOException { new SearchHits( new SearchHit[] {searchHit}, new TotalHits(1L, TotalHits.Relation.EQUAL_TO), 1.0F)); when(searchHit.getSourceAsString()).thenReturn("{\"id\", 1}"); - when(searchHit.getInnerHits()).thenReturn(null); when(factory.construct(any(), anyBoolean())).thenReturn(exprTupleValue); // Mock second scroll request followed @@ -411,6 +414,64 @@ void cleanup_with_IOException() { assertThrows(IllegalStateException.class, () -> client.cleanup(request)); } + @Test + @SneakyThrows + void cleanup_pit_request() { + OpenSearchQueryRequest request = + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + new SearchSourceBuilder(), + factory, + List.of(), + TimeValue.timeValueMinutes(1L), + "samplePitId"); + // Enforce cleaning by setting a private field. + FieldUtils.writeField(request, "needClean", true, true); + client.cleanup(request); + verify(restClient).deletePit(any(), any()); + } + + @Test + @SneakyThrows + void cleanup_pit_request_throw_exception() { + when(restClient.deletePit(any(), any())).thenThrow(new IOException()); + OpenSearchQueryRequest request = + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + new SearchSourceBuilder(), + factory, + List.of(), + TimeValue.timeValueMinutes(1L), + "samplePitId"); + // Enforce cleaning by setting a private field. + FieldUtils.writeField(request, "needClean", true, true); + assertThrows(RuntimeException.class, () -> client.cleanup(request)); + } + + @Test + @SneakyThrows + void create_pit() { + CreatePitRequest createPitRequest = + new CreatePitRequest(TimeValue.timeValueMinutes(5), false, Strings.EMPTY_ARRAY); + CreatePitResponse createPitResponse = mock(CreatePitResponse.class); + when(createPitResponse.getId()).thenReturn("samplePitId"); + when(restClient.createPit(any(CreatePitRequest.class), any())).thenReturn(createPitResponse); + + String pitId = client.createPit(createPitRequest); + assertEquals("samplePitId", pitId); + + verify(restClient).createPit(createPitRequest, RequestOptions.DEFAULT); + } + + @Test + @SneakyThrows + void create_pit_request_throw_exception() { + CreatePitRequest createPitRequest = + new CreatePitRequest(TimeValue.timeValueMinutes(5), false, Strings.EMPTY_ARRAY); + when(restClient.createPit(any(), any())).thenThrow(new IOException()); + assertThrows(RuntimeException.class, () -> client.createPit(createPitRequest)); + } + @Test void get_indices() throws IOException { when(restClient.indices().get(any(GetIndexRequest.class), any(RequestOptions.class))) diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeRecognitionTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeRecognitionTest.java index 35ad6b7ea6..2e90004571 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeRecognitionTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeRecognitionTest.java @@ -17,7 +17,6 @@ import org.opensearch.sql.expression.DSL; import org.opensearch.sql.opensearch.data.value.OpenSearchExprBinaryValue; import org.opensearch.sql.opensearch.data.value.OpenSearchExprGeoPointValue; -import org.opensearch.sql.opensearch.data.value.OpenSearchExprIpValue; import org.opensearch.sql.opensearch.data.value.OpenSearchExprTextValue; public class OpenSearchDataTypeRecognitionTest { @@ -33,7 +32,6 @@ private static Stream types() { return Stream.of( Arguments.of("TEXT", new OpenSearchExprTextValue("A"), "text without fields"), Arguments.of("BINARY", new OpenSearchExprBinaryValue("A"), "binary"), - Arguments.of("IP", new OpenSearchExprIpValue("A"), "ip"), Arguments.of("TEXT", new TestTextWithFieldValue("Hello World"), "text with fields"), Arguments.of("GEO_POINT", new OpenSearchExprGeoPointValue(0d, 0d), "geo point")); } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeTest.java index 82e6222dc4..77b905e228 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDataTypeTest.java @@ -22,6 +22,7 @@ import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.FLOAT; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.IP; import static org.opensearch.sql.data.type.ExprCoreType.LONG; import static org.opensearch.sql.data.type.ExprCoreType.SHORT; import static org.opensearch.sql.data.type.ExprCoreType.STRING; @@ -70,7 +71,7 @@ public void typeName() { assertEquals("STRING", textType.typeName()); assertEquals("STRING", textKeywordType.typeName()); assertEquals("OBJECT", OpenSearchDataType.of(MappingType.Object).typeName()); - assertEquals("DATE", OpenSearchDataType.of(MappingType.Date).typeName()); + assertEquals("TIMESTAMP", OpenSearchDataType.of(MappingType.Date).typeName()); assertEquals("DOUBLE", OpenSearchDataType.of(MappingType.Double).typeName()); assertEquals("KEYWORD", OpenSearchDataType.of(MappingType.Keyword).typeName()); } @@ -80,7 +81,7 @@ public void legacyTypeName() { assertEquals("TEXT", textType.legacyTypeName()); assertEquals("TEXT", textKeywordType.legacyTypeName()); assertEquals("OBJECT", OpenSearchDataType.of(MappingType.Object).legacyTypeName()); - assertEquals("DATE", OpenSearchDataType.of(MappingType.Date).legacyTypeName()); + assertEquals("TIMESTAMP", OpenSearchDataType.of(MappingType.Date).legacyTypeName()); assertEquals("DOUBLE", OpenSearchDataType.of(MappingType.Double).legacyTypeName()); assertEquals("KEYWORD", OpenSearchDataType.of(MappingType.Keyword).legacyTypeName()); } @@ -104,13 +105,13 @@ private static Stream getTestDataWithType() { Arguments.of(MappingType.ScaledFloat, "scaled_float", DOUBLE), Arguments.of(MappingType.Double, "double", DOUBLE), Arguments.of(MappingType.Boolean, "boolean", BOOLEAN), - Arguments.of(MappingType.Date, "date", TIMESTAMP), - Arguments.of(MappingType.DateNanos, "date", TIMESTAMP), + Arguments.of(MappingType.Date, "timestamp", TIMESTAMP), + Arguments.of(MappingType.DateNanos, "timestamp", TIMESTAMP), Arguments.of(MappingType.Object, "object", STRUCT), Arguments.of(MappingType.Nested, "nested", ARRAY), + Arguments.of(MappingType.Ip, "ip", IP), Arguments.of(MappingType.GeoPoint, "geo_point", OpenSearchGeoPointType.of()), - Arguments.of(MappingType.Binary, "binary", OpenSearchBinaryType.of()), - Arguments.of(MappingType.Ip, "ip", OpenSearchIpType.of())); + Arguments.of(MappingType.Binary, "binary", OpenSearchBinaryType.of())); } @ParameterizedTest(name = "{1}") @@ -124,7 +125,15 @@ public void of_MappingType(MappingType mappingType, String name, ExprType dataTy assertAll( () -> assertEquals(nameForPPL, type.typeName()), () -> assertEquals(nameForSQL, type.legacyTypeName()), - () -> assertEquals(dataType, type.getExprType())); + () -> { + if (dataType == ExprCoreType.TIMESTAMP + || dataType == ExprCoreType.DATE + || dataType == ExprCoreType.TIME) { + assertEquals(dataType, type.getExprCoreType()); + } else { + assertEquals(dataType, type.getExprType()); + } + }); } @ParameterizedTest(name = "{0}") @@ -133,7 +142,7 @@ public void of_ExprCoreType(ExprCoreType coreType) { assumeFalse(coreType == UNKNOWN); var type = OpenSearchDataType.of(coreType); if (type instanceof OpenSearchDateType) { - assertEquals(coreType, type.getExprType()); + assertEquals(coreType, type.getExprCoreType()); } else { assertEquals(coreType.toString(), type.typeName()); assertEquals(coreType.toString(), type.legacyTypeName()); @@ -180,13 +189,13 @@ public void types_but_clones_are_singletons_and_cached() { () -> assertSame(OpenSearchDataType.of(MappingType.Text), OpenSearchTextType.of()), () -> assertSame(OpenSearchDataType.of(MappingType.Binary), OpenSearchBinaryType.of()), () -> assertSame(OpenSearchDataType.of(MappingType.GeoPoint), OpenSearchGeoPointType.of()), - () -> assertSame(OpenSearchDataType.of(MappingType.Ip), OpenSearchIpType.of()), () -> assertNotSame( OpenSearchTextType.of(), OpenSearchTextType.of(Map.of("properties", OpenSearchDataType.of(INTEGER)))), () -> assertSame(OpenSearchDataType.of(INTEGER), OpenSearchDataType.of(INTEGER)), () -> assertSame(OpenSearchDataType.of(STRING), OpenSearchDataType.of(STRING)), + () -> assertSame(OpenSearchDataType.of(IP), OpenSearchDataType.of(IP)), () -> assertSame(OpenSearchDataType.of(STRUCT), OpenSearchDataType.of(STRUCT)), () -> assertNotSame( @@ -416,7 +425,7 @@ public void test_getExprType() { assertEquals(FLOAT, OpenSearchDataType.of(MappingType.HalfFloat).getExprType()); assertEquals(DOUBLE, OpenSearchDataType.of(MappingType.Double).getExprType()); assertEquals(DOUBLE, OpenSearchDataType.of(MappingType.ScaledFloat).getExprType()); - assertEquals(TIMESTAMP, OpenSearchDataType.of(MappingType.Date).getExprType()); + assertEquals(TIMESTAMP, OpenSearchDataType.of(MappingType.Date).getExprCoreType()); } @Test diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDateTypeTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDateTypeTest.java index c6885c8ffe..3c1cf1bf0f 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDateTypeTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/type/OpenSearchDateTypeTest.java @@ -5,12 +5,7 @@ package org.opensearch.sql.opensearch.data.type; -import static org.junit.jupiter.api.Assertions.assertAll; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; +import static org.junit.jupiter.api.Assertions.*; import static org.opensearch.sql.data.type.ExprCoreType.DATE; import static org.opensearch.sql.data.type.ExprCoreType.TIME; import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; @@ -22,6 +17,9 @@ import static org.opensearch.sql.opensearch.data.type.OpenSearchDateType.isDateTypeCompatible; import com.google.common.collect.Lists; +import java.time.*; +import java.time.format.DateTimeFormatter; +import java.util.Arrays; import java.util.EnumSet; import java.util.List; import java.util.stream.Stream; @@ -48,8 +46,6 @@ class OpenSearchDateTypeTest { OpenSearchDateType.of(defaultFormatString); private static final OpenSearchDateType dateDateType = OpenSearchDateType.of(dateFormatString); private static final OpenSearchDateType timeDateType = OpenSearchDateType.of(timeFormatString); - private static final OpenSearchDateType datetimeDateType = - OpenSearchDateType.of(timestampFormatString); @Test public void isCompatible() { @@ -76,8 +72,8 @@ public void isCompatible() { public void check_typeName() { assertAll( // always use the MappingType of "DATE" - () -> assertEquals("DATE", defaultDateType.typeName()), - () -> assertEquals("DATE", timeDateType.typeName()), + () -> assertEquals("TIMESTAMP", defaultDateType.typeName()), + () -> assertEquals("TIME", timeDateType.typeName()), () -> assertEquals("DATE", dateDateType.typeName())); } @@ -85,8 +81,8 @@ public void check_typeName() { public void check_legacyTypeName() { assertAll( // always use the legacy "DATE" type - () -> assertEquals("DATE", defaultDateType.legacyTypeName()), - () -> assertEquals("DATE", timeDateType.legacyTypeName()), + () -> assertEquals("TIMESTAMP", defaultDateType.legacyTypeName()), + () -> assertEquals("TIME", timeDateType.legacyTypeName()), () -> assertEquals("DATE", dateDateType.legacyTypeName())); } @@ -94,9 +90,9 @@ public void check_legacyTypeName() { public void check_exprTypeName() { assertAll( // exprType changes based on type (no datetime): - () -> assertEquals(TIMESTAMP, defaultDateType.getExprType()), - () -> assertEquals(TIME, timeDateType.getExprType()), - () -> assertEquals(DATE, dateDateType.getExprType())); + () -> assertEquals(TIMESTAMP, defaultDateType.getExprCoreType()), + () -> assertEquals(TIME, timeDateType.getExprCoreType()), + () -> assertEquals(DATE, dateDateType.getExprCoreType())); } private static Stream getAllSupportedFormats() { @@ -129,22 +125,22 @@ public void check_datetime_format_names(FormatNames datetimeFormat) { if (camelCaseName != null && !camelCaseName.isEmpty()) { OpenSearchDateType dateType = OpenSearchDateType.of(camelCaseName); assertSame( - dateType.getExprType(), + dateType.getExprCoreType(), TIMESTAMP, camelCaseName + " does not format to a TIMESTAMP type, instead got " - + dateType.getExprType()); + + dateType.getExprCoreType()); } String snakeCaseName = datetimeFormat.getSnakeCaseName(); if (snakeCaseName != null && !snakeCaseName.isEmpty()) { OpenSearchDateType dateType = OpenSearchDateType.of(snakeCaseName); assertSame( - dateType.getExprType(), + dateType.getExprCoreType(), TIMESTAMP, snakeCaseName + " does not format to a TIMESTAMP type, instead got " - + dateType.getExprType()); + + dateType.getExprCoreType()); } else { fail(); } @@ -161,18 +157,22 @@ public void check_date_format_names(FormatNames dateFormat) { if (camelCaseName != null && !camelCaseName.isEmpty()) { OpenSearchDateType dateType = OpenSearchDateType.of(camelCaseName); assertSame( - dateType.getExprType(), + dateType.getExprCoreType(), DATE, - camelCaseName + " does not format to a DATE type, instead got " + dateType.getExprType()); + camelCaseName + + " does not format to a DATE type, instead got " + + dateType.getExprCoreType()); } String snakeCaseName = dateFormat.getSnakeCaseName(); if (snakeCaseName != null && !snakeCaseName.isEmpty()) { OpenSearchDateType dateType = OpenSearchDateType.of(snakeCaseName); assertSame( - dateType.getExprType(), + dateType.getExprCoreType(), DATE, - snakeCaseName + " does not format to a DATE type, instead got " + dateType.getExprType()); + snakeCaseName + + " does not format to a DATE type, instead got " + + dateType.getExprCoreType()); } else { fail(); } @@ -189,18 +189,22 @@ public void check_time_format_names(FormatNames timeFormat) { if (camelCaseName != null && !camelCaseName.isEmpty()) { OpenSearchDateType dateType = OpenSearchDateType.of(camelCaseName); assertSame( - dateType.getExprType(), + dateType.getExprCoreType(), TIME, - camelCaseName + " does not format to a TIME type, instead got " + dateType.getExprType()); + camelCaseName + + " does not format to a TIME type, instead got " + + dateType.getExprCoreType()); } String snakeCaseName = timeFormat.getSnakeCaseName(); if (snakeCaseName != null && !snakeCaseName.isEmpty()) { OpenSearchDateType dateType = OpenSearchDateType.of(snakeCaseName); assertSame( - dateType.getExprType(), + dateType.getExprCoreType(), TIME, - snakeCaseName + " does not format to a TIME type, instead got " + dateType.getExprType()); + snakeCaseName + + " does not format to a TIME type, instead got " + + dateType.getExprCoreType()); } else { fail(); } @@ -244,9 +248,9 @@ private static Stream get_format_combinations_for_test() { @MethodSource("get_format_combinations_for_test") public void check_ExprCoreType_of_combinations_of_custom_and_predefined_formats( ExprCoreType expected, List formats, String testName) { - assertEquals(expected, OpenSearchDateType.of(String.join(" || ", formats)).getExprType()); + assertEquals(expected, OpenSearchDateType.of(String.join(" || ", formats)).getExprCoreType()); formats = Lists.reverse(formats); - assertEquals(expected, OpenSearchDateType.of(String.join(" || ", formats)).getExprType()); + assertEquals(expected, OpenSearchDateType.of(String.join(" || ", formats)).getExprCoreType()); } @Test @@ -259,4 +263,171 @@ public void check_if_date_type_compatible() { assertTrue(isDateTypeCompatible(DATE)); assertFalse(isDateTypeCompatible(OpenSearchDataType.of(OpenSearchDataType.MappingType.Text))); } + + @Test + void test_valid_timestamp_with_custom_format() { + String timestamp = "2021-11-08T17:00:00Z"; + String format = "strict_date_time_no_millis"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + ZonedDateTime zonedDateTime = dateType.getParsedDateTime(timestamp); + + assertEquals("2021-11-08T17:00:00Z", dateType.getFormattedDate(zonedDateTime.toInstant())); + assertEquals(LocalDate.parse("2021-11-08"), zonedDateTime.toLocalDate()); + assertFalse(dateType.hasNoFormatter()); + } + + @Test + void test_valid_timestamp_with_multiple_formats() { + String timestamp = "2021-11-08T17:00:00Z"; + String timestamp2 = "2021/11/08T17:00:00Z"; + + List formats = Arrays.asList("strict_date_time_no_millis", "yyyy/MM/dd'T'HH:mm:ssX"); + OpenSearchDateType dateType = OpenSearchDateType.of(String.join(" || ", formats)); + + // Testing with the first timestamp + ZonedDateTime zonedDateTime1 = dateType.getParsedDateTime(timestamp); + + assertEquals("2021-11-08T17:00:00Z", dateType.getFormattedDate(zonedDateTime1.toInstant())); + assertEquals(LocalDate.parse("2021-11-08"), zonedDateTime1.toLocalDate()); + assertFalse(dateType.hasNoFormatter()); + + // Testing with the second timestamp + ZonedDateTime zonedDateTime2 = dateType.getParsedDateTime(timestamp2); + + assertEquals("2021-11-08T17:00:00Z", dateType.getFormattedDate(zonedDateTime2.toInstant())); + assertEquals(LocalDate.parse("2021-11-08"), zonedDateTime2.toLocalDate()); + assertFalse(dateType.hasNoFormatter()); + } + + @Test + void test_openSearch_datetime_named_formatter() { + String timestamp = "2019-03-23T21:34:46"; + String format = "strict_date_hour_minute_second"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + ZonedDateTime zonedDateTime = dateType.getParsedDateTime(timestamp); + + assertEquals("2019-03-23T21:34:46", dateType.getFormattedDate(zonedDateTime.toInstant())); + assertEquals(LocalDate.parse("2019-03-23"), zonedDateTime.toLocalDate()); + assertEquals(LocalTime.parse("21:34:46"), zonedDateTime.toLocalTime()); + assertFalse(dateType.hasNoFormatter()); + } + + @Test + void test_openSearch_datetime_with_default_formatter() { + String timestamp = "2019-03-23T21:34:46"; + OpenSearchDateType dateType = OpenSearchDateType.of(TIMESTAMP); + ZonedDateTime zonedDateTime = dateType.getParsedDateTime(timestamp); + // formatted using OpenSearch default formatter + assertEquals("2019-03-23T21:34:46Z", dateType.getFormattedDate(zonedDateTime.toInstant())); + assertEquals(LocalDate.parse("2019-03-23"), zonedDateTime.toLocalDate()); + assertEquals(LocalTime.parse("21:34:46"), zonedDateTime.toLocalTime()); + assertTrue(dateType.hasNoFormatter()); + } + + @Test + void test_invalid_date_with_named_formatter() { + // Incorrect date + String timestamp = "2019-23-23"; + String format = "strict_date_hour_minute_second"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + ZonedDateTime zonedDateTime = dateType.getParsedDateTime(timestamp); + assertNull(zonedDateTime); + assertFalse(dateType.hasNoFormatter()); + } + + @Test + void test_invalid_time_with_custom_formatter() { + String timestamp = "invalid-timestamp"; + List formats = Arrays.asList("yyyy/MM/dd'T'HH:mm:ssX", "yyyy-MM-dd'T'HH:mm:ssX"); + OpenSearchDateType dateType = OpenSearchDateType.of(String.join(" || ", formats)); + ZonedDateTime zonedDateTime = dateType.getParsedDateTime(timestamp); + assertNull(zonedDateTime); + assertFalse(dateType.hasNoFormatter()); + } + + @Test + void test_epoch_datetime_formatter() { + long epochTimestamp = 1636390800000L; // Corresponds to "2021-11-08T17:00:00Z" + String format = "epoch_millis"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + ZonedDateTime zonedDateTime = dateType.getParsedDateTime(String.valueOf(epochTimestamp)); + + assertEquals(Long.toString(epochTimestamp), dateType.getFormattedDate(zonedDateTime)); + assertEquals(LocalDate.parse("2021-11-08"), zonedDateTime.toLocalDate()); + assertEquals(LocalTime.parse("17:00:00"), zonedDateTime.toLocalTime()); + assertFalse(dateType.hasNoFormatter()); + } + + @Test + void test_timeStamp_format_with_default_formatters() { + String timestamp = "2021-11-08 17:00:00"; + String format = "strict_date_time_no_millis || epoch_millis"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + assertNull(dateType.getParsedDateTime(timestamp)); + } + + @Test + void test_valid_date_with_custom_formatter() { + String dateString = "2021-11-08"; + String format = "yyyy-MM-dd"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + + LocalDate expectedDate = LocalDate.parse(dateString, DateTimeFormatter.ISO_DATE); + LocalDate parsedDate = dateType.getParsedDateTime(dateString).toLocalDate(); + + assertEquals(expectedDate, parsedDate); + assertEquals("2021-11-08", dateType.getFormattedDate(parsedDate)); + } + + @Test + void test_valid_date_string_with_custom_formatter() { + String dateString = "03-Jan-21"; + String format = "dd-MMM-yy"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + + LocalDate parsedDate = dateType.getParsedDateTime(dateString).toLocalDate(); + + assertEquals(LocalDate.parse("2021-01-03"), parsedDate); + assertEquals("03-Jan-21", dateType.getFormattedDate(parsedDate)); + assertFalse(dateType.hasNoFormatter()); + } + + @Test + void test_valid_date_with_multiple_formatters() { + String dateString = "2021-11-08"; + String format = "yyyy/MM/dd || yyyy-MM-dd"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + + LocalDate expectedDate = LocalDate.parse(dateString, DateTimeFormatter.ofPattern("yyyy-MM-dd")); + LocalDate parsedDate = dateType.getParsedDateTime(dateString).toLocalDate(); + + assertEquals(expectedDate, parsedDate); + assertEquals("2021/11/08", dateType.getFormattedDate(parsedDate)); + } + + @Test + void test_valid_time_with_custom_formatter() { + String timeString = "12:10:30.000"; + String format = "HH:mm:ss.SSS"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + + LocalTime expectedTime = LocalTime.parse(timeString, DateTimeFormatter.ofPattern(format)); + LocalTime parsedTime = dateType.getParsedDateTime(timeString).toLocalTime(); + + assertEquals(expectedTime, parsedTime); + assertEquals("12:10:30.000", dateType.getFormattedDate(parsedTime)); + } + + @Test + void test_valid_time_with_multiple_formatters() { + String timeString = "12:10:30"; + String format = "HH:mm:ss.SSS || HH:mm:ss"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + + LocalTime expectedTime = LocalTime.parse(timeString, DateTimeFormatter.ofPattern("HH:mm:ss")); + LocalTime parsedTime = dateType.getParsedDateTime(timeString).toLocalTime(); + + assertEquals(expectedTime, parsedTime); + assertEquals("12:10:30.000", dateType.getFormattedDate(parsedTime)); + } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContentTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContentTest.java new file mode 100644 index 0000000000..c2cf0328bd --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContentTest.java @@ -0,0 +1,31 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.data.utils; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import org.junit.jupiter.api.Test; +import org.opensearch.OpenSearchParseException; + +public class OpenSearchJsonContentTest { + @Test + public void testGetValueWithIOException() throws IOException { + JsonNode jsonNode = mock(JsonNode.class); + JsonParser jsonParser = mock(JsonParser.class); + when(jsonNode.traverse()).thenReturn(jsonParser); + when(jsonParser.nextToken()).thenThrow(new IOException()); + OpenSearchJsonContent content = new OpenSearchJsonContent(jsonNode); + OpenSearchParseException exception = + assertThrows(OpenSearchParseException.class, content::geoValue); + assertTrue(exception.getMessage().contains("error parsing geo point")); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprGeoPointValueTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprGeoPointValueTest.java index defa97d8c8..1ff3321237 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprGeoPointValueTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprGeoPointValueTest.java @@ -14,7 +14,8 @@ class OpenSearchExprGeoPointValueTest { - private OpenSearchExprGeoPointValue geoPointValue = new OpenSearchExprGeoPointValue(1.0, 1.0); + private final OpenSearchExprGeoPointValue geoPointValue = + new OpenSearchExprGeoPointValue(1.0, 1.0); @Test void value() { diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprIpValueTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprIpValueTest.java index 38a4ad3199..8b13789179 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprIpValueTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprIpValueTest.java @@ -1,44 +1 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ -package org.opensearch.sql.opensearch.data.value; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.junit.jupiter.api.Test; -import org.opensearch.sql.opensearch.data.type.OpenSearchIpType; - -public class OpenSearchExprIpValueTest { - - private OpenSearchExprIpValue ipValue = new OpenSearchExprIpValue("192.168.0.1"); - - @Test - void value() { - assertEquals("192.168.0.1", ipValue.value()); - } - - @Test - void type() { - assertEquals(OpenSearchIpType.of(), ipValue.type()); - } - - @Test - void compare() { - assertEquals(0, ipValue.compareTo(new OpenSearchExprIpValue("192.168.0.1"))); - assertEquals(ipValue, new OpenSearchExprIpValue("192.168.0.1")); - } - - @Test - void equal() { - assertTrue(ipValue.equal(new OpenSearchExprIpValue("192.168.0.1"))); - } - - @Test - void testHashCode() { - assertNotNull(ipValue.hashCode()); - } -} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java index 83e26f85e4..89dfd4dbdb 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java @@ -16,6 +16,7 @@ import static org.opensearch.sql.data.model.ExprValueUtils.doubleValue; import static org.opensearch.sql.data.model.ExprValueUtils.floatValue; import static org.opensearch.sql.data.model.ExprValueUtils.integerValue; +import static org.opensearch.sql.data.model.ExprValueUtils.ipValue; import static org.opensearch.sql.data.model.ExprValueUtils.longValue; import static org.opensearch.sql.data.model.ExprValueUtils.nullValue; import static org.opensearch.sql.data.model.ExprValueUtils.shortValue; @@ -47,8 +48,11 @@ import lombok.EqualsAndHashCode; import lombok.ToString; import org.junit.jupiter.api.Test; +import org.opensearch.OpenSearchParseException; +import org.opensearch.geometry.utils.Geohash; import org.opensearch.sql.data.model.ExprCollectionValue; import org.opensearch.sql.data.model.ExprDateValue; +import org.opensearch.sql.data.model.ExprIpValue; import org.opensearch.sql.data.model.ExprTimeValue; import org.opensearch.sql.data.model.ExprTimestampValue; import org.opensearch.sql.data.model.ExprTupleValue; @@ -60,6 +64,8 @@ class OpenSearchExprValueFactoryTest { + static final String fieldIp = "ipV"; + private static final Map MAPPING = new ImmutableMap.Builder() .put("byteV", OpenSearchDataType.of(BYTE)) @@ -110,13 +116,15 @@ class OpenSearchExprValueFactoryTest { "textKeywordV", OpenSearchTextType.of( Map.of("words", OpenSearchDataType.of(OpenSearchDataType.MappingType.Keyword)))) - .put("ipV", OpenSearchDataType.of(OpenSearchDataType.MappingType.Ip)) + .put(fieldIp, OpenSearchDataType.of(OpenSearchDataType.MappingType.Ip)) .put("geoV", OpenSearchDataType.of(OpenSearchDataType.MappingType.GeoPoint)) .put("binaryV", OpenSearchDataType.of(OpenSearchDataType.MappingType.Binary)) .build(); - + private static final double TOLERANCE = 1E-5; private final OpenSearchExprValueFactory exprValueFactory = - new OpenSearchExprValueFactory(MAPPING); + new OpenSearchExprValueFactory(MAPPING, true); + private final OpenSearchExprValueFactory exprValueFactoryNoArrays = + new OpenSearchExprValueFactory(MAPPING, false); @Test public void constructNullValue() { @@ -209,6 +217,16 @@ public void constructString() { () -> assertEquals(stringValue("text"), constructFromObject("stringV", "text"))); } + @Test + public void constructIp() { + assertAll( + () -> assertEquals(ipValue("1.2.3.4"), tupleValue("{\"ipV\":\"1.2.3.4\"}").get("ipV")), + () -> + assertEquals( + ipValue("2001:db7::ff00:42:8329"), + constructFromObject("ipV", "2001:db7::ff00:42:8329"))); + } + @Test public void constructBoolean() { assertAll( @@ -462,6 +480,13 @@ public void constructArrayOfStrings() { constructFromObject("arrayV", List.of("zz", "au"))); } + @Test + public void constructArrayOfStringsWithArrays() { + assertEquals( + new ExprCollectionValue(List.of(stringValue("zz"), stringValue("au"))), + constructFromObjectWithArraySupport("arrayV", List.of("zz", "au"))); + } + @Test public void constructNestedArraysOfStrings() { assertEquals( @@ -471,15 +496,23 @@ public void constructNestedArraysOfStrings() { } @Test - public void constructNestedArraysOfStringsReturnsFirstIndex() { + public void constructNestedArraysOfStringsReturnsAll() { assertEquals( - stringValue("zz"), tupleValue("{\"stringV\":[[\"zz\", \"au\"],[\"ss\"]]}").get("stringV")); + new ExprCollectionValue( + List.of( + new ExprCollectionValue(List.of(stringValue("zz"), stringValue("au"))), + new ExprCollectionValue(List.of(stringValue("ss"))))), + tupleValue("{\"stringV\":[[\"zz\", \"au\"],[\"ss\"]]}").get("stringV")); } @Test - public void constructMultiNestedArraysOfStringsReturnsFirstIndex() { + public void constructMultiNestedArraysOfStringsReturnsAll() { assertEquals( - stringValue("z"), + new ExprCollectionValue( + List.of( + stringValue("z"), + new ExprCollectionValue(List.of(stringValue("s"))), + new ExprCollectionValue(List.of(stringValue("zz"), stringValue("au"))))), tupleValue("{\"stringV\":[\"z\",[\"s\"],[\"zz\", \"au\"]]}").get("stringV")); } @@ -575,6 +608,20 @@ public void constructNestedArrayNode() { tupleValueWithArraySupport("{\"nestedV\":[1969,2011]}").get("nestedV")); } + @Test + public void constructNestedArrayNodeNotSupported() { + assertEquals( + Map.of("stringV", stringValue("foo")), + tupleValueWithoutArraySupport("[{\"stringV\":\"foo\"}]")); + } + + @Test + public void constructNestedArrayNodeNotSupportedNoFieldTolerance() { + assertEquals( + Map.of("stringV", stringValue("foo")), + tupleValueWithoutArraySupportNoFieldTolerance("{\"stringV\":\"foo\"}")); + } + @Test public void constructNestedObjectNode() { assertEquals( @@ -598,46 +645,88 @@ public void constructArrayOfGeoPoints() { } @Test - public void constructArrayOfIPsReturnsFirstIndex() { + public void constructArrayOfGeoPointsNoArrays() { assertEquals( - new OpenSearchExprIpValue("192.168.0.1"), - tupleValue("{\"ipV\":[\"192.168.0.1\",\"192.168.0.2\"]}").get("ipV")); + new OpenSearchExprGeoPointValue(42.60355556, -97.25263889), + tupleValueWithoutArraySupport( + "{\"geoV\":[" + + "{\"lat\":42.60355556,\"lon\":-97.25263889}," + + "{\"lat\":-33.6123556,\"lon\":66.287449}" + + "]}") + .get("geoV")); } @Test - public void constructBinaryArrayReturnsFirstIndex() { + public void constructArrayOfGeoPointsReturnsAll() { assertEquals( - new OpenSearchExprBinaryValue("U29tZSBiaWsdfsdfgYmxvYg=="), + new ExprCollectionValue( + List.of( + new OpenSearchExprGeoPointValue(42.60355556, -97.25263889), + new OpenSearchExprGeoPointValue(-33.6123556, 66.287449))), + tupleValue( + "{\"geoV\":[" + + "{\"lat\":42.60355556,\"lon\":-97.25263889}," + + "{\"lat\":-33.6123556,\"lon\":66.287449}" + + "]}") + .get("geoV")); + } + + @Test + public void constructBinaryArrayReturnsAll() { + assertEquals( + new ExprCollectionValue( + List.of( + new OpenSearchExprBinaryValue("U29tZSBiaWsdfsdfgYmxvYg=="), + new OpenSearchExprBinaryValue("U987yuhjjiy8jhk9vY+98jjdf"))), tupleValue("{\"binaryV\":[\"U29tZSBiaWsdfsdfgYmxvYg==\",\"U987yuhjjiy8jhk9vY+98jjdf\"]}") .get("binaryV")); } @Test - public void constructArrayOfCustomEpochMillisReturnsFirstIndex() { + public void constructArrayOfIPsReturnsAll() { + final String ipv4String = "1.2.3.4"; + final String ipv6String = "2001:db7::ff00:42:8329"; + assertEquals( - new ExprTimestampValue("2015-01-01 12:10:30"), + new ExprCollectionValue(List.of(ipValue(ipv4String), ipValue(ipv6String))), + tupleValue(String.format("{\"%s\":[\"%s\",\"%s\"]}", fieldIp, ipv4String, ipv6String)) + .get(fieldIp)); + } + + @Test + public void constructArrayOfCustomEpochMillisReturnsAll() { + assertEquals( + new ExprCollectionValue( + List.of( + new ExprTimestampValue("2015-01-01 12:10:30"), + new ExprTimestampValue("1999-11-09 01:09:44"))), tupleValue("{\"customAndEpochMillisV\":[\"2015-01-01 12:10:30\",\"1999-11-09 01:09:44\"]}") .get("customAndEpochMillisV")); } @Test - public void constructArrayOfDateStringsReturnsFirstIndex() { + public void constructArrayOfDateStringsReturnsAll() { assertEquals( - new ExprDateValue("1984-04-12"), + new ExprCollectionValue( + List.of(new ExprDateValue("1984-04-12"), new ExprDateValue("2033-05-03"))), tupleValue("{\"dateStringV\":[\"1984-04-12\",\"2033-05-03\"]}").get("dateStringV")); } @Test - public void constructArrayOfTimeStringsReturnsFirstIndex() { + public void constructArrayOfTimeStringsReturnsAll() { assertEquals( - new ExprTimeValue("12:10:30"), + new ExprCollectionValue( + List.of(new ExprTimeValue("12:10:30"), new ExprTimeValue("18:33:55"))), tupleValue("{\"timeStringV\":[\"12:10:30.000Z\",\"18:33:55.000Z\"]}").get("timeStringV")); } @Test public void constructArrayOfEpochMillis() { assertEquals( - new ExprTimestampValue(Instant.ofEpochMilli(1420070400001L)), + new ExprCollectionValue( + List.of( + new ExprTimestampValue(Instant.ofEpochMilli(1420070400001L)), + new ExprTimestampValue(Instant.ofEpochMilli(1454251113333L)))), tupleValue("{\"dateOrEpochMillisV\":[\"1420070400001\",\"1454251113333\"]}") .get("dateOrEpochMillisV")); } @@ -666,19 +755,54 @@ public void constructStruct() { @Test public void constructIP() { + final String ipString = "192.168.0.1"; assertEquals( - new OpenSearchExprIpValue("192.168.0.1"), - tupleValue("{\"ipV\":\"192.168.0.1\"}").get("ipV")); + new ExprIpValue(ipString), + tupleValue(String.format("{\"%s\":\"%s\"}", fieldIp, ipString)).get(fieldIp)); } @Test public void constructGeoPoint() { + final double lat = 42.60355556; + final double lon = -97.25263889; + final var expectedGeoPointValue = new OpenSearchExprGeoPointValue(lat, lon); + // An object with a latitude and longitude. assertEquals( - new OpenSearchExprGeoPointValue(42.60355556, -97.25263889), - tupleValue("{\"geoV\":{\"lat\":42.60355556,\"lon\":-97.25263889}}").get("geoV")); + expectedGeoPointValue, + tupleValue(String.format("{\"geoV\":{\"lat\":%.8f,\"lon\":%.8f}}", lat, lon)).get("geoV")); + + // A string in the “latitude,longitude” format. assertEquals( - new OpenSearchExprGeoPointValue(42.60355556, -97.25263889), - tupleValue("{\"geoV\":{\"lat\":\"42.60355556\",\"lon\":\"-97.25263889\"}}").get("geoV")); + expectedGeoPointValue, + tupleValue(String.format("{\"geoV\":\"%.8f,%.8f\"}", lat, lon)).get("geoV")); + + // A geohash. + var point = + (OpenSearchExprGeoPointValue.GeoPoint) + tupleValue(String.format("{\"geoV\":\"%s\"}", Geohash.stringEncode(lon, lat))) + .get("geoV") + .value(); + assertEquals(lat, point.getLat(), TOLERANCE); + assertEquals(lon, point.getLon(), TOLERANCE); + + // An array in the [longitude, latitude] format. + assertEquals( + expectedGeoPointValue, + tupleValue(String.format("{\"geoV\":[%.8f, %.8f]}", lon, lat)).get("geoV")); + + // A Well-Known Text POINT in the “POINT(longitude latitude)” format. + assertEquals( + expectedGeoPointValue, + tupleValue(String.format("{\"geoV\":\"POINT (%.8f %.8f)\"}", lon, lat)).get("geoV")); + + // GeoJSON format, where the coordinates are in the [longitude, latitude] format + assertEquals( + expectedGeoPointValue, + tupleValue( + String.format( + "{\"geoV\":{\"type\":\"Point\",\"coordinates\":[%.8f,%.8f]}}", lon, lat)) + .get("geoV")); + assertEquals( new OpenSearchExprGeoPointValue(42.60355556, -97.25263889), constructFromObject("geoV", "42.60355556,-97.25263889")); @@ -686,38 +810,23 @@ public void constructGeoPoint() { @Test public void constructGeoPointFromUnsupportedFormatShouldThrowException() { - IllegalStateException exception = + OpenSearchParseException exception = assertThrows( - IllegalStateException.class, - () -> tupleValue("{\"geoV\":[42.60355556,-97.25263889]}").get("geoV")); - assertEquals( - "geo point must in format of {\"lat\": number, \"lon\": number}", exception.getMessage()); + OpenSearchParseException.class, + () -> tupleValue("{\"geoV\": [42.60355556, false]}").get("geoV")); + assertEquals("lat must be a number, got false", exception.getMessage()); exception = assertThrows( - IllegalStateException.class, + OpenSearchParseException.class, () -> tupleValue("{\"geoV\":{\"lon\":-97.25263889}}").get("geoV")); - assertEquals( - "geo point must in format of {\"lat\": number, \"lon\": number}", exception.getMessage()); - - exception = - assertThrows( - IllegalStateException.class, - () -> tupleValue("{\"geoV\":{\"lat\":-97.25263889}}").get("geoV")); - assertEquals( - "geo point must in format of {\"lat\": number, \"lon\": number}", exception.getMessage()); + assertEquals("field [lat] missing", exception.getMessage()); exception = assertThrows( - IllegalStateException.class, + OpenSearchParseException.class, () -> tupleValue("{\"geoV\":{\"lat\":true,\"lon\":-97.25263889}}").get("geoV")); - assertEquals("latitude must be number value, but got value: true", exception.getMessage()); - - exception = - assertThrows( - IllegalStateException.class, - () -> tupleValue("{\"geoV\":{\"lat\":42.60355556,\"lon\":false}}").get("geoV")); - assertEquals("longitude must be number value, but got value: false", exception.getMessage()); + assertEquals("lat must be a number", exception.getMessage()); } @Test @@ -728,12 +837,75 @@ public void constructBinary() { } /** - * Return the first element if is OpenSearch Array. + * Return the all elements if is OpenSearch Array. + * https://www.elastic.co/guide/en/elasticsearch/reference/current/array.html. + */ + @Test + public void constructFromOpenSearchArrayReturnAll() { + assertEquals( + new ExprCollectionValue(List.of(integerValue(1), integerValue(2), integerValue(3))), + tupleValue("{\"intV\":[1, 2, 3]}").get("intV")); + assertEquals( + new ExprCollectionValue( + List.of( + new ExprTupleValue( + new LinkedHashMap() { + { + put("id", integerValue(1)); + put("state", stringValue("WA")); + } + }), + new ExprTupleValue( + new LinkedHashMap() { + { + put("id", integerValue(2)); + put("state", stringValue("CA")); + } + }))), + tupleValue("{\"structV\":[{\"id\":1,\"state\":\"WA\"},{\"id\":2,\"state\":\"CA\"}]}}") + .get("structV")); + } + + /** + * Return the all elements if is OpenSearch Array. * https://www.elastic.co/guide/en/elasticsearch/reference/current/array.html. */ @Test - public void constructFromOpenSearchArrayReturnFirstElement() { - assertEquals(integerValue(1), tupleValue("{\"intV\":[1, 2, 3]}").get("intV")); + public void constructFromOpenSearchArrayReturnAllWithArraySupport() { + assertEquals( + new ExprCollectionValue(List.of(integerValue(1), integerValue(2), integerValue(3))), + tupleValue("{\"intV\":[1, 2, 3]}").get("intV")); + assertEquals( + new ExprCollectionValue( + List.of( + new ExprTupleValue( + new LinkedHashMap() { + { + put("id", integerValue(1)); + put("state", stringValue("WA")); + } + }), + new ExprTupleValue( + new LinkedHashMap() { + { + put("id", integerValue(2)); + put("state", stringValue("CA")); + } + }))), + tupleValueWithArraySupport( + "{\"structV\":[{\"id\":1,\"state\":\"WA\"},{\"id\":2,\"state\":\"CA\"}]}}") + .get("structV")); + } + + /** + * Return only the first element if is OpenSearch Array. + * https://www.elastic.co/guide/en/elasticsearch/reference/current/array.html. + */ + @Test + public void constructFromOpenSearchArrayReturnAllWithoutArraySupport() { + assertEquals( + new ExprCollectionValue(List.of(integerValue(1), integerValue(2), integerValue(3))), + tupleValue("{\"intV\":[1, 2, 3]}").get("intV")); assertEquals( new ExprTupleValue( new LinkedHashMap() { @@ -742,7 +914,39 @@ public void constructFromOpenSearchArrayReturnFirstElement() { put("state", stringValue("WA")); } }), - tupleValue("{\"structV\":[{\"id\":1,\"state\":\"WA\"},{\"id\":2,\"state\":\"CA\"}]}}") + tupleValueWithoutArraySupport( + "{\"structV\":[{\"id\":1,\"state\":\"WA\"},{\"id\":2,\"state\":\"CA\"}]}}") + .get("structV")); + } + + /** + * Return only the first element if is OpenSearch Array. + * https://www.elastic.co/guide/en/elasticsearch/reference/current/array.html. + */ + @Test + public void constructFromOpenSearchArrayReturnAllWithoutArraySupportNoFieldTolerance() { + assertEquals( + new ExprCollectionValue(List.of(integerValue(1), integerValue(2), integerValue(3))), + tupleValue("{\"intV\":[1, 2, 3]}").get("intV")); + assertEquals( + new ExprCollectionValue( + List.of( + new ExprTupleValue( + new LinkedHashMap() { + { + put("id", integerValue(1)); + put("state", stringValue("WA")); + } + }), + new ExprTupleValue( + new LinkedHashMap() { + { + put("id", integerValue(2)); + put("state", stringValue("CA")); + } + }))), + tupleValueWithoutArraySupportNoFieldTolerance( + "{\"structV\":[{\"id\":1,\"state\":\"WA\"},{\"id\":2,\"state\":\"CA\"}]}}") .get("structV")); } @@ -763,7 +967,7 @@ public void noTypeFoundForMapping() { @Test public void constructUnsupportedTypeThrowException() { OpenSearchExprValueFactory exprValueFactory = - new OpenSearchExprValueFactory(Map.of("type", new TestType())); + new OpenSearchExprValueFactory(Map.of("type", new TestType()), true); IllegalStateException exception = assertThrows( IllegalStateException.class, () -> exprValueFactory.construct("{\"type\":1}", false)); @@ -780,7 +984,8 @@ public void constructUnsupportedTypeThrowException() { // it is accepted without overwriting existing data. public void factoryMappingsAreExtendableWithoutOverWrite() throws NoSuchFieldException, IllegalAccessException { - var factory = new OpenSearchExprValueFactory(Map.of("value", OpenSearchDataType.of(INTEGER))); + var factory = + new OpenSearchExprValueFactory(Map.of("value", OpenSearchDataType.of(INTEGER)), true); factory.extendTypeMapping( Map.of( "value", OpenSearchDataType.of(DOUBLE), @@ -808,6 +1013,16 @@ public Map tupleValueWithArraySupport(String jsonString) { return construct.tupleValue(); } + public Map tupleValueWithoutArraySupport(String jsonString) { + final ExprValue construct = exprValueFactoryNoArrays.construct(jsonString, false); + return construct.tupleValue(); + } + + public Map tupleValueWithoutArraySupportNoFieldTolerance(String jsonString) { + final ExprValue construct = exprValueFactoryNoArrays.construct(jsonString, true); + return construct.tupleValue(); + } + private ExprValue constructFromObject(String fieldName, Object value) { return exprValueFactory.construct(fieldName, value, false); } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngineTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngineTest.java index 739b70b1b8..e5cf94eb86 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngineTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngineTest.java @@ -174,18 +174,20 @@ void explain_successfully() { new OpenSearchExecutionEngine(client, protector, new PlanSerializer(null)); Settings settings = mock(Settings.class); when(settings.getSettingValue(SQL_CURSOR_KEEP_ALIVE)).thenReturn(TimeValue.timeValueMinutes(1)); + when(settings.getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER)).thenReturn(true); OpenSearchExprValueFactory exprValueFactory = mock(OpenSearchExprValueFactory.class); final var name = new OpenSearchRequest.IndexName("test"); final int defaultQuerySize = 100; final int maxResultWindow = 10000; - final var requestBuilder = new OpenSearchRequestBuilder(defaultQuerySize, exprValueFactory); + final var requestBuilder = + new OpenSearchRequestBuilder(defaultQuerySize, exprValueFactory, settings); PhysicalPlan plan = new OpenSearchIndexScan( mock(OpenSearchClient.class), maxResultWindow, requestBuilder.build( - name, maxResultWindow, settings.getSettingValue(SQL_CURSOR_KEEP_ALIVE))); + name, maxResultWindow, settings.getSettingValue(SQL_CURSOR_KEEP_ALIVE), client)); AtomicReference result = new AtomicReference<>(); executor.explain( diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/ResourceMonitorPlanTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/ResourceMonitorPlanTest.java index 26bcdf6d89..82062bee76 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/ResourceMonitorPlanTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/ResourceMonitorPlanTest.java @@ -47,7 +47,7 @@ void openExceedResourceLimit() { IllegalStateException exception = assertThrows(IllegalStateException.class, () -> monitorPlan.open()); - assertEquals("resource is not enough to run the query, quit.", exception.getMessage()); + assertEquals("insufficient resources to run the query, quit.", exception.getMessage()); } @Test @@ -79,7 +79,7 @@ void nextExceedResourceLimit() { IllegalStateException exception = assertThrows(IllegalStateException.class, () -> monitorPlan.next()); - assertEquals("resource is not enough to load next row, quit.", exception.getMessage()); + assertEquals("insufficient resources to load next row, quit.", exception.getMessage()); } @Test diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java index b2dc042110..724178bd34 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/executor/protector/OpenSearchExecutionProtectorTest.java @@ -8,10 +8,9 @@ import static java.util.Collections.emptyList; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertSame; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.*; import static org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC; +import static org.opensearch.sql.ast.tree.Trendline.TrendlineType.SMA; import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.STRING; @@ -25,6 +24,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -39,10 +39,12 @@ import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.client.node.NodeClient; +import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.tree.RareTopN.CommandType; import org.opensearch.sql.ast.tree.Sort; +import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.data.model.ExprBooleanValue; import org.opensearch.sql.expression.DSL; @@ -68,6 +70,8 @@ import org.opensearch.sql.planner.physical.NestedOperator; import org.opensearch.sql.planner.physical.PhysicalPlan; import org.opensearch.sql.planner.physical.PhysicalPlanDSL; +import org.opensearch.sql.planner.physical.TakeOrderedOperator; +import org.opensearch.sql.planner.physical.TrendlineOperator; @ExtendWith(MockitoExtension.class) @DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) @@ -90,6 +94,8 @@ public void setup() { @Test void test_protect_indexScan() { + when(settings.getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER)).thenReturn(true); + String indexName = "test"; final int maxResultWindow = 10000; final int querySizeLimit = 200; @@ -113,11 +119,12 @@ void test_protect_indexScan() { final var name = new OpenSearchRequest.IndexName(indexName); final var request = - new OpenSearchRequestBuilder(querySizeLimit, exprValueFactory) + new OpenSearchRequestBuilder(querySizeLimit, exprValueFactory, settings) .build( name, maxResultWindow, - settings.getSettingValue(Settings.Key.SQL_CURSOR_KEEP_ALIVE)); + settings.getSettingValue(Settings.Key.SQL_CURSOR_KEEP_ALIVE), + client); assertEquals( PhysicalPlanDSL.project( PhysicalPlanDSL.limit( @@ -306,6 +313,32 @@ void do_nothing_with_CursorCloseOperator_and_children() { verify(child, never()).accept(executionProtector, null); } + @Test + public void test_visitTakeOrdered() { + Pair sort = + ImmutablePair.of(Sort.SortOption.DEFAULT_ASC, ref("a", INTEGER)); + TakeOrderedOperator takeOrdered = + PhysicalPlanDSL.takeOrdered(PhysicalPlanDSL.values(emptyList()), 10, 5, sort); + assertEquals( + resourceMonitor(takeOrdered), executionProtector.visitTakeOrdered(takeOrdered, null)); + } + + @Test + public void test_visitTrendline() { + final TrendlineOperator trendlineOperator = + new TrendlineOperator( + PhysicalPlanDSL.values(emptyList()), + Collections.singletonList( + Pair.of( + new Trendline.TrendlineComputation( + 1, AstDSL.field("dummy"), "dummy_alias", SMA), + DOUBLE))); + + assertEquals( + resourceMonitor(trendlineOperator), + executionProtector.visitTrendline(trendlineOperator, null)); + } + PhysicalPlan resourceMonitor(PhysicalPlan input) { return new ResourceMonitorPlan(input, resourceMonitor); } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/planner/physical/MLOperatorTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/planner/physical/MLOperatorTest.java index 0a3f56285f..fa328fd26c 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/planner/physical/MLOperatorTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/planner/physical/MLOperatorTest.java @@ -65,7 +65,7 @@ public class MLOperatorTest { private NodeClient nodeClient; private MLOperator mlOperator; - Map arguments = new HashMap<>(); + final Map arguments = new HashMap<>(); @Mock(answer = Answers.RETURNS_DEEP_STUBS) private MachineLearningNodeClient machineLearningNodeClient; diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequestTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequestTest.java index d2bc5b0641..52c208da15 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequestTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchQueryRequestTest.java @@ -5,22 +5,18 @@ package org.opensearch.sql.opensearch.request; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.*; import static org.opensearch.sql.opensearch.request.OpenSearchRequest.DEFAULT_QUERY_TIMEOUT; +import java.io.IOException; +import java.lang.reflect.Field; import java.util.List; import java.util.function.Consumer; import java.util.function.Function; +import lombok.SneakyThrows; import org.apache.lucene.search.TotalHits; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; @@ -28,14 +24,19 @@ import org.opensearch.action.search.SearchRequest; import org.opensearch.action.search.SearchResponse; import org.opensearch.action.search.SearchScrollRequest; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; +import org.opensearch.search.builder.PointInTimeBuilder; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.fetch.subphase.FetchSourceContext; import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory; import org.opensearch.sql.opensearch.response.OpenSearchResponse; +import org.opensearch.sql.opensearch.storage.OpenSearchIndex; +import org.opensearch.sql.opensearch.storage.OpenSearchStorageEngine; @ExtendWith(MockitoExtension.class) public class OpenSearchQueryRequestTest { @@ -64,6 +65,93 @@ public class OpenSearchQueryRequestTest { private final OpenSearchQueryRequest remoteRequest = new OpenSearchQueryRequest("ccs:test", 200, factory, List.of()); + @Mock private StreamOutput streamOutput; + @Mock private StreamInput streamInput; + @Mock private OpenSearchStorageEngine engine; + @Mock private PointInTimeBuilder pointInTimeBuilder; + + private OpenSearchQueryRequest serializationRequest; + + private SearchSourceBuilder sourceBuilderForSerializer; + + @BeforeEach + void setup() { + sourceBuilderForSerializer = new SearchSourceBuilder(); + sourceBuilderForSerializer.pointInTimeBuilder(pointInTimeBuilder); + sourceBuilderForSerializer.timeout(TimeValue.timeValueSeconds(30)); + } + + @SneakyThrows + @Test + void testWriteTo() throws IOException { + when(pointInTimeBuilder.getId()).thenReturn("samplePITId"); + sourceBuilderForSerializer.searchAfter(new Object[] {"value1", 123}); + List includes = List.of("field1", "field2"); + serializationRequest = + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + sourceBuilderForSerializer, + factory, + includes, + new TimeValue(1000), + "samplePITId"); + + Field searchAfterField = OpenSearchQueryRequest.class.getDeclaredField("searchAfter"); + searchAfterField.setAccessible(true); + searchAfterField.set(serializationRequest, new Object[] {"value1", 123}); + + serializationRequest.writeTo(streamOutput); + + String expectedJson = "{\"timeout\":\"30s\",\"search_after\":[\"value1\",123]}"; + verify(streamOutput).writeString(expectedJson); + verify(streamOutput).writeTimeValue(TimeValue.timeValueSeconds(30)); + verify(streamOutput).writeString("samplePITId"); + verify(streamOutput).writeStringCollection(includes); + + verify(streamOutput).writeVInt(2); + verify(streamOutput).writeGenericValue("value1"); + verify(streamOutput).writeGenericValue(123); + } + + @Test + void testWriteToWithoutSearchAfter() + throws IOException, NoSuchFieldException, IllegalAccessException { + when(pointInTimeBuilder.getId()).thenReturn("samplePITId"); + + List includes = List.of("field1", "field2"); + serializationRequest = + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + sourceBuilderForSerializer, + factory, + includes, + new TimeValue(1000), + "samplePITId"); + + serializationRequest.writeTo(streamOutput); + verify(streamOutput).writeString("{\"timeout\":\"30s\"}"); + verify(streamOutput).writeTimeValue(TimeValue.timeValueSeconds(30)); + verify(streamOutput).writeString("samplePITId"); + verify(streamOutput).writeStringCollection(includes); + verify(streamOutput, never()).writeVInt(anyInt()); + verify(streamOutput, never()).writeGenericValue(any()); + } + + @Test + void testWriteToWithoutPIT() { + serializationRequest = new OpenSearchQueryRequest("test", 200, factory, List.of()); + + UnsupportedOperationException exception = + assertThrows( + UnsupportedOperationException.class, + () -> { + request.writeTo(streamOutput); + }); + + assertEquals( + "OpenSearchQueryRequest serialization is not implemented.", exception.getMessage()); + } + @Test void search() { OpenSearchQueryRequest request = @@ -81,6 +169,145 @@ void search() { verify(searchAction, times(1)).apply(any()); } + @Test + void search_with_pit() { + OpenSearchQueryRequest request = + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + sourceBuilder, + factory, + List.of(), + new TimeValue(1000), + "samplePid"); + + when(searchAction.apply(any())).thenReturn(searchResponse); + when(searchResponse.getHits()).thenReturn(searchHits); + when(searchHits.getHits()).thenReturn(new SearchHit[] {searchHit}); + when(searchHit.getSortValues()).thenReturn(new String[] {"sortedValue"}); + when(sourceBuilder.sorts()).thenReturn(null); + + OpenSearchResponse openSearchResponse = request.searchWithPIT(searchAction); + assertFalse(openSearchResponse.isEmpty()); + verify(searchAction, times(1)).apply(any()); + + when(searchResponse.getHits()).thenReturn(searchHits); + when(searchResponse.getAggregations()).thenReturn(null); + when(searchHits.getHits()).thenReturn(null); + openSearchResponse = request.searchWithPIT(searchAction); + assertTrue(openSearchResponse.isEmpty()); + verify(searchAction, times(2)).apply(any()); + + openSearchResponse = request.searchWithPIT(searchAction); + assertTrue(openSearchResponse.isEmpty()); + } + + @Test + void search_with_pit_hits_null() { + OpenSearchQueryRequest request = + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + sourceBuilder, + factory, + List.of(), + new TimeValue(1000), + "samplePid"); + + when(searchAction.apply(any())).thenReturn(searchResponse); + when(searchResponse.getHits()).thenReturn(searchHits); + when(searchHits.getHits()).thenReturn(new SearchHit[] {searchHit}); + when(sourceBuilder.sorts()).thenReturn(null); + + OpenSearchResponse openSearchResponse = request.searchWithPIT(searchAction); + assertFalse(openSearchResponse.isEmpty()); + } + + @Test + void search_with_pit_hits_empty() { + SearchResponse searchResponse = mock(SearchResponse.class); + SearchHits searchHits = mock(SearchHits.class); + OpenSearchQueryRequest request = + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + sourceBuilder, + factory, + List.of(), + new TimeValue(1000), + "samplePid"); + + when(searchAction.apply(any())).thenReturn(searchResponse); + when(searchResponse.getHits()).thenReturn(searchHits); + when(searchHits.getHits()).thenReturn(new SearchHit[] {}); + when(sourceBuilder.sorts()).thenReturn(null); + + OpenSearchResponse openSearchResponse = request.searchWithPIT(searchAction); + assertTrue(openSearchResponse.isEmpty()); + } + + @Test + void search_with_pit_null() { + SearchResponse searchResponse = mock(SearchResponse.class); + SearchHits searchHits = mock(SearchHits.class); + OpenSearchQueryRequest request = + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + sourceBuilder, + factory, + List.of(), + new TimeValue(1000), + "sample"); + + when(searchAction.apply(any())).thenReturn(searchResponse); + when(searchResponse.getHits()).thenReturn(searchHits); + when(searchHits.getHits()).thenReturn(new SearchHit[] {searchHit}); + + OpenSearchResponse openSearchResponse = request.search(searchAction, scrollAction); + assertFalse(openSearchResponse.isEmpty()); + } + + @Test + void has_another_batch() { + OpenSearchQueryRequest request = + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + sourceBuilder, + factory, + List.of(), + new TimeValue(1000), + "sample"); + assertFalse(request.hasAnotherBatch()); + } + + @Test + void has_another_batch_pid_null() { + OpenSearchQueryRequest request = + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + sourceBuilder, + factory, + List.of(), + new TimeValue(1000), + null); + assertFalse(request.hasAnotherBatch()); + } + + @Test + void has_another_batch_need_clean() { + OpenSearchQueryRequest request = + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + sourceBuilder, + factory, + List.of(), + new TimeValue(1000), + "samplePid"); + + when(searchAction.apply(any())).thenReturn(searchResponse); + when(searchResponse.getHits()).thenReturn(searchHits); + when(searchHits.getHits()).thenReturn(new SearchHit[] {searchHit}); + OpenSearchResponse openSearchResponse = request.searchWithPIT(searchAction); + assertTrue(request.hasAnotherBatch()); + } + @Test void search_withoutContext() { OpenSearchQueryRequest request = @@ -121,6 +348,68 @@ void clean() { verify(cleanAction, never()).accept(any()); } + @Test + void testCleanConditionTrue() { + OpenSearchQueryRequest request = + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + sourceBuilder, + factory, + List.of(), + new TimeValue(1000), + "samplePid"); + + when(searchAction.apply(any())).thenReturn(searchResponse); + when(searchResponse.getHits()).thenReturn(searchHits); + when(searchHits.getHits()).thenReturn(null); + OpenSearchResponse openSearchResponse = request.searchWithPIT(searchAction); + + request.clean(cleanAction); + + verify(cleanAction, times(1)).accept("samplePid"); + assertTrue(request.isSearchDone()); + assertNull(request.getPitId()); + } + + @Test + void testCleanConditionFalse_needCleanFalse() { + OpenSearchQueryRequest request = + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + sourceBuilder, + factory, + List.of(), + new TimeValue(1000), + "samplePid"); + + when(searchAction.apply(any())).thenReturn(searchResponse); + when(searchResponse.getHits()).thenReturn(searchHits); + when(searchHits.getHits()).thenReturn(new SearchHit[] {searchHit}); + OpenSearchResponse openSearchResponse = request.searchWithPIT(searchAction); + + request.clean(cleanAction); + verify(cleanAction, never()).accept(anyString()); + assertFalse(request.isSearchDone()); + assertNull(request.getPitId()); + } + + @Test + void testCleanConditionFalse_pidNull() { + OpenSearchQueryRequest request = + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + sourceBuilder, + factory, + List.of(), + new TimeValue(1000), + null); + + request.clean(cleanAction); + verify(cleanAction, never()).accept(anyString()); + assertFalse(request.isSearchDone()); + assertNull(request.getPitId()); + } + @Test void searchRequest() { request.getSourceBuilder().query(QueryBuilders.termQuery("name", "John")); @@ -159,6 +448,20 @@ void writeTo_unsupported() { UnsupportedOperationException.class, () -> request.writeTo(mock(StreamOutput.class))); } + @Test + void constructor_serialized() throws IOException { + StreamInput stream = mock(StreamInput.class); + OpenSearchStorageEngine engine = mock(OpenSearchStorageEngine.class); + when(stream.readString()).thenReturn("{}"); + when(stream.readStringArray()).thenReturn(new String[] {"sample"}); + OpenSearchIndex index = mock(OpenSearchIndex.class); + when(engine.getTable(null, "sample")).thenReturn(index); + when(stream.readVInt()).thenReturn(2); + when(stream.readGenericValue()).thenReturn("sampleSearchAfter"); + OpenSearchQueryRequest request = new OpenSearchQueryRequest(stream, engine); + assertNotNull(request); + } + private void assertSearchRequest(SearchRequest expected, OpenSearchQueryRequest request) { Function querySearch = searchRequest -> { diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java index 742e76cbd0..a2430a671d 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilderTest.java @@ -5,13 +5,10 @@ package org.opensearch.sql.opensearch.request; -import static org.junit.Assert.assertThrows; +import static org.junit.Assert.*; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; -import static org.opensearch.index.query.QueryBuilders.matchAllQuery; -import static org.opensearch.index.query.QueryBuilders.nestedQuery; +import static org.mockito.Mockito.*; +import static org.opensearch.index.query.QueryBuilders.*; import static org.opensearch.search.sort.FieldSortBuilder.DOC_FIELD_NAME; import static org.opensearch.search.sort.SortOrder.ASC; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; @@ -25,21 +22,16 @@ import org.apache.commons.lang3.tuple.Pair; import org.apache.lucene.search.TotalHits; import org.apache.lucene.search.join.ScoreMode; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.DisplayNameGeneration; -import org.junit.jupiter.api.DisplayNameGenerator; -import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.*; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.action.search.CreatePitRequest; import org.opensearch.action.search.SearchRequest; import org.opensearch.action.search.SearchResponse; import org.opensearch.action.search.SearchScrollRequest; import org.opensearch.common.unit.TimeValue; -import org.opensearch.index.query.InnerHitBuilder; -import org.opensearch.index.query.NestedQueryBuilder; -import org.opensearch.index.query.QueryBuilder; -import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.*; import org.opensearch.search.SearchHit; import org.opensearch.search.SearchHits; import org.opensearch.search.aggregations.AggregationBuilder; @@ -47,13 +39,19 @@ import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.fetch.subphase.FetchSourceContext; +import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; import org.opensearch.search.sort.FieldSortBuilder; import org.opensearch.search.sort.ScoreSortBuilder; import org.opensearch.search.sort.SortBuilders; +import org.opensearch.search.sort.SortOrder; +import org.opensearch.sql.ast.expression.DataType; +import org.opensearch.sql.ast.expression.Literal; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.opensearch.client.OpenSearchClient; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory; import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; @@ -76,11 +74,19 @@ class OpenSearchRequestBuilderTest { @Mock private OpenSearchExprValueFactory exprValueFactory; + @Mock private OpenSearchClient client; + + @Mock private Settings settings; + private OpenSearchRequestBuilder requestBuilder; @BeforeEach void setup() { - requestBuilder = new OpenSearchRequestBuilder(DEFAULT_LIMIT, exprValueFactory); + requestBuilder = new OpenSearchRequestBuilder(DEFAULT_LIMIT, exprValueFactory, settings); + lenient() + .when(settings.getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER)) + .thenReturn(true); + lenient().when(settings.getSettingValue(Settings.Key.FIELD_TYPE_TOLERANCE)).thenReturn(false); } @Test @@ -100,14 +106,148 @@ void build_query_request() { .trackScores(true), exprValueFactory, List.of()), - requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT)); + requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT, client)); + } + + @Test + void build_query_request_push_down_size() { + Integer limit = 200; + Integer offset = 0; + requestBuilder.pushDownLimit(limit, offset); + requestBuilder.pushDownTrackedScore(true); + + assertNotNull( + requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT, client)); + } + + @Test + void build_PIT_request_with_correct_size() { + when(settings.getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER)).thenReturn(true); + when(client.createPit(any(CreatePitRequest.class))).thenReturn("samplePITId"); + Integer limit = 0; + Integer offset = 0; + requestBuilder.pushDownLimit(limit, offset); + requestBuilder.pushDownPageSize(2); + + assertEquals( + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + new SearchSourceBuilder().from(offset).size(2).timeout(DEFAULT_QUERY_TIMEOUT), + exprValueFactory, + List.of(), + TimeValue.timeValueMinutes(1), + "samplePITId"), + requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT, client)); + } + + @Test + void buildRequestWithPit_pageSizeNull_sizeGreaterThanMaxResultWindow() { + when(settings.getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER)).thenReturn(true); + when(client.createPit(any(CreatePitRequest.class))).thenReturn("samplePITId"); + Integer limit = 600; + Integer offset = 0; + int requestedTotalSize = 600; + requestBuilder = new OpenSearchRequestBuilder(requestedTotalSize, exprValueFactory, settings); + requestBuilder.pushDownLimit(limit, offset); + + assertEquals( + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + new SearchSourceBuilder() + .from(offset) + .size(MAX_RESULT_WINDOW - offset) + .timeout(DEFAULT_QUERY_TIMEOUT), + exprValueFactory, + List.of(), + TimeValue.timeValueMinutes(1), + "samplePITId"), + requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT, client)); + } + + @Test + void buildRequestWithPit_pageSizeNull_sizeLessThanMaxResultWindow() { + when(settings.getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER)).thenReturn(true); + Integer limit = 400; + Integer offset = 0; + int requestedTotalSize = 400; + requestBuilder = new OpenSearchRequestBuilder(requestedTotalSize, exprValueFactory, settings); + requestBuilder.pushDownLimit(limit, offset); + + assertEquals( + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + new SearchSourceBuilder() + .from(offset) + .size(requestedTotalSize) + .timeout(DEFAULT_QUERY_TIMEOUT), + exprValueFactory, + List.of()), + requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT, client)); + } + + @Test + void buildRequestWithPit_pageSizeNotNull_startFromZero() { + int pageSize = 200; + int offset = 0; + int limit = 400; + requestBuilder.pushDownPageSize(pageSize); + requestBuilder.pushDownLimit(limit, offset); + when(client.createPit(any(CreatePitRequest.class))).thenReturn("samplePITId"); + + assertEquals( + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + new SearchSourceBuilder().from(offset).size(pageSize).timeout(DEFAULT_QUERY_TIMEOUT), + exprValueFactory, + List.of(), + TimeValue.timeValueMinutes(1), + "samplePITId"), + requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT, client)); + } + + @Test + void buildRequestWithPit_pageSizeNotNull_startFromNonZero() { + int pageSize = 200; + int offset = 100; + int limit = 400; + requestBuilder.pushDownPageSize(pageSize); + requestBuilder.pushDownLimit(limit, offset); + assertThrows( + UnsupportedOperationException.class, + () -> { + requestBuilder.build(indexName, 500, TimeValue.timeValueMinutes(1), client); + }); } @Test void build_scroll_request_with_correct_size() { + when(settings.getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER)).thenReturn(false); Integer limit = 800; Integer offset = 10; requestBuilder.pushDownLimit(limit, offset); + requestBuilder.getSourceBuilder().fetchSource("a", "b"); + + assertEquals( + new OpenSearchScrollRequest( + new OpenSearchRequest.IndexName("test"), + TimeValue.timeValueMinutes(1), + new SearchSourceBuilder() + .from(offset) + .size(MAX_RESULT_WINDOW - offset) + .timeout(DEFAULT_QUERY_TIMEOUT), + exprValueFactory, + List.of()), + requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT, client)); + } + + @Test + void buildRequestWithScroll_pageSizeNull_sizeGreaterThanMaxResultWindow() { + when(settings.getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER)).thenReturn(false); + Integer limit = 600; + Integer offset = 0; + int requestedTotalSize = 600; + requestBuilder = new OpenSearchRequestBuilder(requestedTotalSize, exprValueFactory, settings); + requestBuilder.pushDownLimit(limit, offset); assertEquals( new OpenSearchScrollRequest( @@ -119,7 +259,65 @@ void build_scroll_request_with_correct_size() { .timeout(DEFAULT_QUERY_TIMEOUT), exprValueFactory, List.of()), - requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT)); + requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT, client)); + } + + @Test + void buildRequestWithScroll_pageSizeNull_sizeLessThanMaxResultWindow() { + when(settings.getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER)).thenReturn(false); + Integer limit = 400; + Integer offset = 0; + int requestedTotalSize = 400; + requestBuilder = new OpenSearchRequestBuilder(requestedTotalSize, exprValueFactory, settings); + requestBuilder.pushDownLimit(limit, offset); + + assertEquals( + new OpenSearchQueryRequest( + new OpenSearchRequest.IndexName("test"), + new SearchSourceBuilder() + .from(offset) + .size(requestedTotalSize) + .timeout(DEFAULT_QUERY_TIMEOUT), + exprValueFactory, + List.of()), + requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT, client)); + } + + @Test + void buildRequestWithScroll_pageSizeNotNull_startFromZero() { + when(settings.getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER)).thenReturn(false); + int pageSize = 200; + int offset = 0; + int limit = 400; + requestBuilder.pushDownPageSize(pageSize); + requestBuilder.pushDownLimit(limit, offset); + + assertEquals( + new OpenSearchScrollRequest( + new OpenSearchRequest.IndexName("test"), + TimeValue.timeValueMinutes(1), + new SearchSourceBuilder() + .from(offset) + .size(MAX_RESULT_WINDOW - offset) + .timeout(DEFAULT_QUERY_TIMEOUT), + exprValueFactory, + List.of()), + requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT, client)); + } + + @Test + void buildRequestWithScroll_pageSizeNotNull_startFromNonZero() { + when(settings.getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER)).thenReturn(false); + int pageSize = 200; + int offset = 100; + int limit = 400; + requestBuilder.pushDownPageSize(pageSize); + requestBuilder.pushDownLimit(limit, offset); + assertThrows( + UnsupportedOperationException.class, + () -> { + requestBuilder.build(indexName, 500, TimeValue.timeValueMinutes(1), client); + }); } @Test @@ -127,7 +325,7 @@ void test_push_down_query() { QueryBuilder query = QueryBuilders.termQuery("intA", 1); requestBuilder.pushDownFilter(query); - var r = requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT); + var r = requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT, client); Function querySearch = searchRequest -> { assertEquals( @@ -203,6 +401,51 @@ void test_push_down_query_and_sort() { requestBuilder); } + @Test + void test_push_down_query_not_null() { + SearchSourceBuilder sourceBuilder = requestBuilder.getSourceBuilder(); + sourceBuilder.query(QueryBuilders.termQuery("name", "John")); + sourceBuilder.sort(DOC_FIELD_NAME, SortOrder.ASC); + + QueryBuilder query = QueryBuilders.termQuery("intA", 1); + requestBuilder.pushDownFilter(query); + + BoolQueryBuilder expectedQuery = + QueryBuilders.boolQuery().filter(QueryBuilders.termQuery("name", "John")).filter(query); + + SearchSourceBuilder expectedSourceBuilder = + new SearchSourceBuilder() + .from(DEFAULT_OFFSET) + .size(DEFAULT_LIMIT) + .timeout(DEFAULT_QUERY_TIMEOUT) + .query(expectedQuery) + .sort(DOC_FIELD_NAME, SortOrder.ASC); + + assertSearchSourceBuilder(expectedSourceBuilder, requestBuilder); + } + + @Test + void test_push_down_query_with_bool_filter() { + BoolQueryBuilder initialBoolQuery = + QueryBuilders.boolQuery().filter(QueryBuilders.termQuery("name", "John")); + + SearchSourceBuilder sourceBuilder = requestBuilder.getSourceBuilder(); + sourceBuilder.query(initialBoolQuery); + + QueryBuilder newQuery = QueryBuilders.termQuery("intA", 1); + requestBuilder.pushDownFilter(newQuery); + initialBoolQuery.filter(newQuery); + SearchSourceBuilder expectedSourceBuilder = + new SearchSourceBuilder() + .from(DEFAULT_OFFSET) + .size(DEFAULT_LIMIT) + .timeout(DEFAULT_QUERY_TIMEOUT) + .query(initialBoolQuery) + .sort(DOC_FIELD_NAME, SortOrder.ASC); + + assertSearchSourceBuilder(expectedSourceBuilder, requestBuilder); + } + void assertSearchSourceBuilder( SearchSourceBuilder expected, OpenSearchRequestBuilder requestBuilder) throws UnsupportedOperationException { @@ -220,7 +463,7 @@ void assertSearchSourceBuilder( throw new UnsupportedOperationException(); }; requestBuilder - .build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT) + .build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT, client) .search(querySearch, scrollSearch); } @@ -290,7 +533,7 @@ void test_push_down_project() { .fetchSource("intA", null), exprValueFactory, List.of("intA")), - requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT)); + requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT, client)); } @Test @@ -320,7 +563,7 @@ void test_push_down_project_limit() { .fetchSource("intA", null), exprValueFactory, List.of("intA")), - requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT)); + requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT, client)); } @Test @@ -350,7 +593,7 @@ void test_push_down_project_limit_and_offset() { .fetchSource("intA", null), exprValueFactory, List.of("intA")), - requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT)); + requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT, client)); } @Test @@ -377,7 +620,7 @@ void test_push_down_nested() { assertSearchSourceBuilder( new SearchSourceBuilder() - .query(QueryBuilders.boolQuery().filter(QueryBuilders.boolQuery().must(nestedQuery))) + .query(boolQuery().filter(boolQuery().must(nestedQuery))) .from(DEFAULT_OFFSET) .size(DEFAULT_LIMIT) .timeout(DEFAULT_QUERY_TIMEOUT), @@ -411,7 +654,7 @@ void test_push_down_multiple_nested_with_same_path() { true, new String[] {"message.info", "message.from"}, null))); assertSearchSourceBuilder( new SearchSourceBuilder() - .query(QueryBuilders.boolQuery().filter(QueryBuilders.boolQuery().must(nestedQuery))) + .query(boolQuery().filter(boolQuery().must(nestedQuery))) .from(DEFAULT_OFFSET) .size(DEFAULT_LIMIT) .timeout(DEFAULT_QUERY_TIMEOUT), @@ -444,9 +687,9 @@ void test_push_down_nested_with_filter() { assertSearchSourceBuilder( new SearchSourceBuilder() .query( - QueryBuilders.boolQuery() + boolQuery() .filter( - QueryBuilders.boolQuery() + boolQuery() .must(QueryBuilders.rangeQuery("myNum").gt(3)) .must(nestedQuery))) .from(DEFAULT_OFFSET) @@ -483,7 +726,7 @@ void testPushDownNestedWithNestedFilter() { assertSearchSourceBuilder( new SearchSourceBuilder() - .query(QueryBuilders.boolQuery().filter(QueryBuilders.boolQuery().must(filterQuery))) + .query(boolQuery().filter(boolQuery().must(filterQuery))) .from(DEFAULT_OFFSET) .size(DEFAULT_LIMIT) .timeout(DEFAULT_QUERY_TIMEOUT), @@ -507,6 +750,32 @@ void push_down_highlight_with_repeating_fields() { assertEquals("Duplicate field name in highlight", exception.getMessage()); } + @Test + void test_push_down_highlight_with_pre_tags() { + requestBuilder.pushDownHighlight( + "name", Map.of("pre_tags", new Literal("pre1", DataType.STRING))); + + SearchSourceBuilder sourceBuilder = requestBuilder.getSourceBuilder(); + assertNotNull(sourceBuilder.highlighter()); + assertEquals(1, sourceBuilder.highlighter().fields().size()); + HighlightBuilder.Field field = sourceBuilder.highlighter().fields().get(0); + assertEquals("name", field.name()); + assertEquals("pre1", field.preTags()[0]); + } + + @Test + void test_push_down_highlight_with_post_tags() { + requestBuilder.pushDownHighlight( + "name", Map.of("post_tags", new Literal("post1", DataType.STRING))); + + SearchSourceBuilder sourceBuilder = requestBuilder.getSourceBuilder(); + assertNotNull(sourceBuilder.highlighter()); + assertEquals(1, sourceBuilder.highlighter().fields().size()); + HighlightBuilder.Field field = sourceBuilder.highlighter().fields().get(0); + assertEquals("name", field.name()); + assertEquals("post1", field.postTags()[0]); + } + @Test void push_down_page_size() { requestBuilder.pushDownPageSize(3); @@ -521,7 +790,7 @@ void exception_when_non_zero_offset_and_page_size() { requestBuilder.pushDownLimit(300, 2); assertThrows( UnsupportedOperationException.class, - () -> requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT)); + () -> requestBuilder.build(indexName, MAX_RESULT_WINDOW, DEFAULT_QUERY_TIMEOUT, client)); } @Test diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java index 6f4605bc2f..984c98f803 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchResponseTest.java @@ -14,10 +14,12 @@ import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import com.google.common.collect.ImmutableMap; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -58,14 +60,14 @@ class OpenSearchResponseTest { @Mock private Aggregations aggregations; - private List includes = List.of(); + private final List includes = List.of(); @Mock private OpenSearchAggregationResponseParser parser; - private ExprTupleValue exprTupleValue1 = + private final ExprTupleValue exprTupleValue1 = ExprTupleValue.fromExprValueMap(ImmutableMap.of("id1", new ExprIntegerValue(1))); - private ExprTupleValue exprTupleValue2 = + private final ExprTupleValue exprTupleValue2 = ExprTupleValue.fromExprValueMap(ImmutableMap.of("id2", new ExprIntegerValue(2))); @Test @@ -163,7 +165,8 @@ void iterator_metafields() { "_sort", new ExprLongValue(123456L), "_score", new ExprFloatValue(3.75F), "_maxscore", new ExprFloatValue(3.75F))); - List includes = List.of("id1", "_index", "_id", "_routing", "_sort", "_score", "_maxscore"); + List includes = + List.of("id1", "_index", "_id", "_routing", "_sort", "_score", "_maxscore"); int i = 0; for (ExprValue hit : new OpenSearchResponse(searchResponse, factory, includes)) { if (i == 0) { @@ -248,20 +251,15 @@ void iterator_metafields_scoreNaN() { @Test void iterator_with_inner_hits() { + Map innerHits = new HashMap<>(); + innerHits.put("a", mock(SearchHits.class)); + when(searchHit1.getInnerHits()).thenReturn(innerHits); when(searchResponse.getHits()) .thenReturn( new SearchHits( new SearchHit[] {searchHit1}, new TotalHits(2L, TotalHits.Relation.EQUAL_TO), 1.0F)); - when(searchHit1.getInnerHits()) - .thenReturn( - Map.of( - "innerHit", - new SearchHits( - new SearchHit[] {searchHit1}, - new TotalHits(2L, TotalHits.Relation.EQUAL_TO), - 1.0F))); when(factory.construct(any(), anyBoolean())).thenReturn(exprTupleValue1); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/error/ErrorMessageFactoryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/error/ErrorMessageFactoryTest.java index eb759233a8..6ffe6b275c 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/error/ErrorMessageFactoryTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/error/ErrorMessageFactoryTest.java @@ -13,8 +13,8 @@ import org.opensearch.core.rest.RestStatus; public class ErrorMessageFactoryTest { - private Throwable nonOpenSearchThrowable = new Throwable(); - private Throwable openSearchThrowable = new OpenSearchException(nonOpenSearchThrowable); + private final Throwable nonOpenSearchThrowable = new Throwable(); + private final Throwable openSearchThrowable = new OpenSearchException(nonOpenSearchThrowable); @Test public void openSearchExceptionShouldCreateEsErrorMessage() { diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/setting/OpenSearchSettingsTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/setting/OpenSearchSettingsTest.java index e99e5b360a..026f0c6218 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/setting/OpenSearchSettingsTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/setting/OpenSearchSettingsTest.java @@ -15,6 +15,8 @@ import static org.mockito.Mockito.when; import static org.opensearch.common.unit.TimeValue.timeValueMinutes; import static org.opensearch.sql.opensearch.setting.LegacyOpenDistroSettings.legacySettings; +import static org.opensearch.sql.opensearch.setting.OpenSearchSettings.ASYNC_QUERY_EXTERNAL_SCHEDULER_ENABLED_SETTING; +import static org.opensearch.sql.opensearch.setting.OpenSearchSettings.ASYNC_QUERY_EXTERNAL_SCHEDULER_INTERVAL_SETTING; import static org.opensearch.sql.opensearch.setting.OpenSearchSettings.METRICS_ROLLING_INTERVAL_SETTING; import static org.opensearch.sql.opensearch.setting.OpenSearchSettings.METRICS_ROLLING_WINDOW_SETTING; import static org.opensearch.sql.opensearch.setting.OpenSearchSettings.PPL_ENABLED_SETTING; @@ -34,6 +36,7 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.core.common.unit.ByteSizeValue; +import org.opensearch.index.IndexSettings; import org.opensearch.monitor.jvm.JvmInfo; import org.opensearch.sql.common.setting.LegacySettings; import org.opensearch.sql.common.setting.Settings; @@ -132,8 +135,7 @@ void settingsFallback() { org.opensearch.common.settings.Settings.EMPTY)); assertEquals( settings.getSettingValue(Settings.Key.QUERY_SIZE_LIMIT), - LegacyOpenDistroSettings.QUERY_SIZE_LIMIT_SETTING.get( - org.opensearch.common.settings.Settings.EMPTY)); + IndexSettings.MAX_RESULT_WINDOW_SETTING.get(org.opensearch.common.settings.Settings.EMPTY)); assertEquals( settings.getSettingValue(Settings.Key.METRICS_ROLLING_WINDOW), LegacyOpenDistroSettings.METRICS_ROLLING_WINDOW_SETTING.get( @@ -165,7 +167,7 @@ public void updateLegacySettingsFallback() { assertEquals( QUERY_MEMORY_LIMIT_SETTING.get(settings), new ByteSizeValue((int) (JvmInfo.jvmInfo().getMem().getHeapMax().getBytes() * 0.2))); - assertEquals(QUERY_SIZE_LIMIT_SETTING.get(settings), 100); + assertEquals(QUERY_SIZE_LIMIT_SETTING.get(settings), 10000); assertEquals(METRICS_ROLLING_WINDOW_SETTING.get(settings), 2000L); assertEquals(METRICS_ROLLING_INTERVAL_SETTING.get(settings), 100L); } @@ -195,4 +197,22 @@ void getSparkExecutionEngineConfigSetting() { .put(SPARK_EXECUTION_ENGINE_CONFIG.getKey(), sparkConfig) .build())); } + + @Test + void getAsyncQueryExternalSchedulerEnabledSetting() { + // Default is true + assertEquals( + true, + ASYNC_QUERY_EXTERNAL_SCHEDULER_ENABLED_SETTING.get( + org.opensearch.common.settings.Settings.builder().build())); + } + + @Test + void getAsyncQueryExternalSchedulerIntervalSetting() { + // Default is empty string + assertEquals( + "", + ASYNC_QUERY_EXTERNAL_SCHEDULER_INTERVAL_SETTING.get( + org.opensearch.common.settings.Settings.builder().build())); + } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java index 3ddb07d86a..3f8a07f495 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/OpenSearchIndexTest.java @@ -10,6 +10,7 @@ import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.hasEntry; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.doNothing; @@ -79,6 +80,10 @@ class OpenSearchIndexTest { @BeforeEach void setUp() { this.index = new OpenSearchIndex(client, settings, "test"); + lenient() + .when(settings.getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER)) + .thenReturn(true); + lenient().when(settings.getSettingValue(Settings.Key.FIELD_TYPE_TOLERANCE)).thenReturn(true); } @Test @@ -148,7 +153,7 @@ void getFieldTypes() { hasEntry("gender", ExprCoreType.BOOLEAN), hasEntry("family", ExprCoreType.ARRAY), hasEntry("employer", ExprCoreType.STRUCT), - hasEntry("birthday", ExprCoreType.TIMESTAMP), + hasEntry("birthday", (ExprType) OpenSearchDataType.of(MappingType.Date)), hasEntry("id1", ExprCoreType.BYTE), hasEntry("id2", ExprCoreType.SHORT), hasEntry("blob", (ExprType) OpenSearchDataType.of(MappingType.Binary)))); @@ -198,10 +203,11 @@ void implementRelationOperatorOnly() { when(settings.getSettingValue(Settings.Key.QUERY_SIZE_LIMIT)).thenReturn(200); LogicalPlan plan = index.createScanBuilder(); Integer maxResultWindow = index.getMaxResultWindow(); - final var requestBuilder = new OpenSearchRequestBuilder(QUERY_SIZE_LIMIT, exprValueFactory); + final var requestBuilder = + new OpenSearchRequestBuilder(QUERY_SIZE_LIMIT, exprValueFactory, settings); assertEquals( new OpenSearchIndexScan( - client, 200, requestBuilder.build(INDEX_NAME, maxResultWindow, SCROLL_TIMEOUT)), + client, 200, requestBuilder.build(INDEX_NAME, maxResultWindow, SCROLL_TIMEOUT, client)), index.implement(index.optimize(plan))); } @@ -211,10 +217,11 @@ void implementRelationOperatorWithOptimization() { when(settings.getSettingValue(Settings.Key.QUERY_SIZE_LIMIT)).thenReturn(200); LogicalPlan plan = index.createScanBuilder(); Integer maxResultWindow = index.getMaxResultWindow(); - final var requestBuilder = new OpenSearchRequestBuilder(QUERY_SIZE_LIMIT, exprValueFactory); + final var requestBuilder = + new OpenSearchRequestBuilder(QUERY_SIZE_LIMIT, exprValueFactory, settings); assertEquals( new OpenSearchIndexScan( - client, 200, requestBuilder.build(INDEX_NAME, maxResultWindow, SCROLL_TIMEOUT)), + client, 200, requestBuilder.build(INDEX_NAME, maxResultWindow, SCROLL_TIMEOUT, client)), index.implement(plan)); } @@ -243,7 +250,8 @@ void implementOtherLogicalOperators() { include); Integer maxResultWindow = index.getMaxResultWindow(); - final var requestBuilder = new OpenSearchRequestBuilder(QUERY_SIZE_LIMIT, exprValueFactory); + final var requestBuilder = + new OpenSearchRequestBuilder(QUERY_SIZE_LIMIT, exprValueFactory, settings); assertEquals( PhysicalPlanDSL.project( PhysicalPlanDSL.dedupe( @@ -255,7 +263,7 @@ void implementOtherLogicalOperators() { client, QUERY_SIZE_LIMIT, requestBuilder.build( - INDEX_NAME, maxResultWindow, SCROLL_TIMEOUT)), + INDEX_NAME, maxResultWindow, SCROLL_TIMEOUT, client)), mappings), exclude), newEvalField), @@ -264,4 +272,13 @@ void implementOtherLogicalOperators() { include), index.implement(plan)); } + + @Test + void isFieldTypeTolerance() { + when(settings.getSettingValue(Settings.Key.FIELD_TYPE_TOLERANCE)) + .thenReturn(true) + .thenReturn(false); + assertTrue(index.isFieldTypeTolerance()); + assertFalse(index.isFieldTypeTolerance()); + } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanPaginationTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanPaginationTest.java index 2085519b12..6f923cf5c4 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanPaginationTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanPaginationTest.java @@ -56,6 +56,10 @@ void setup() { lenient() .when(settings.getSettingValue(Settings.Key.SQL_CURSOR_KEEP_ALIVE)) .thenReturn(TimeValue.timeValueMinutes(1)); + lenient() + .when(settings.getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER)) + .thenReturn(true); + lenient().when(settings.getSettingValue(Settings.Key.FIELD_TYPE_TOLERANCE)).thenReturn(true); } @Mock private OpenSearchClient client; @@ -64,17 +68,18 @@ void setup() { new OpenSearchExprValueFactory( Map.of( "name", OpenSearchDataType.of(STRING), - "department", OpenSearchDataType.of(STRING))); + "department", OpenSearchDataType.of(STRING)), + true); @Test void query_empty_result() { mockResponse(client); - var builder = new OpenSearchRequestBuilder(QUERY_SIZE, exprValueFactory); + var builder = new OpenSearchRequestBuilder(QUERY_SIZE, exprValueFactory, settings); try (var indexScan = new OpenSearchIndexScan( client, MAX_RESULT_WINDOW, - builder.build(INDEX_NAME, MAX_RESULT_WINDOW, SCROLL_TIMEOUT))) { + builder.build(INDEX_NAME, MAX_RESULT_WINDOW, SCROLL_TIMEOUT, client))) { indexScan.open(); assertFalse(indexScan.hasNext()); } @@ -96,13 +101,13 @@ void dont_serialize_if_no_cursor() { OpenSearchRequestBuilder builder = mock(); OpenSearchRequest request = mock(); OpenSearchResponse response = mock(); - when(builder.build(any(), anyInt(), any())).thenReturn(request); + when(builder.build(any(), anyInt(), any(), any())).thenReturn(request); when(client.search(any())).thenReturn(response); try (var indexScan = new OpenSearchIndexScan( client, MAX_RESULT_WINDOW, - builder.build(INDEX_NAME, MAX_RESULT_WINDOW, SCROLL_TIMEOUT))) { + builder.build(INDEX_NAME, MAX_RESULT_WINDOW, SCROLL_TIMEOUT, client))) { indexScan.open(); when(request.hasAnotherBatch()).thenReturn(false); diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanTest.java index f813d8f551..5381c4a7a7 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanTest.java @@ -5,24 +5,18 @@ package org.opensearch.sql.opensearch.storage.scan; +import static org.junit.Assert.assertNotNull; import static org.junit.jupiter.api.Assertions.assertAll; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; +import static org.mockito.Mockito.*; import static org.opensearch.search.sort.FieldSortBuilder.DOC_FIELD_NAME; import static org.opensearch.search.sort.SortOrder.ASC; import static org.opensearch.sql.data.type.ExprCoreType.STRING; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; +import java.io.*; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -52,6 +46,7 @@ import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Literal; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.exception.NoCursorException; @@ -77,14 +72,21 @@ class OpenSearchIndexScanTest { public static final int MAX_RESULT_WINDOW = 10000; public static final TimeValue CURSOR_KEEP_ALIVE = TimeValue.timeValueMinutes(1); @Mock private OpenSearchClient client; + @Mock private Settings settings; private final OpenSearchExprValueFactory exprValueFactory = new OpenSearchExprValueFactory( Map.of( - "name", OpenSearchDataType.of(STRING), "department", OpenSearchDataType.of(STRING))); + "name", OpenSearchDataType.of(STRING), "department", OpenSearchDataType.of(STRING)), + true); @BeforeEach - void setup() {} + void setup() { + lenient() + .when(settings.getSettingValue(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER)) + .thenReturn(true); + lenient().when(settings.getSettingValue(Settings.Key.FIELD_TYPE_TOLERANCE)).thenReturn(true); + } @Test void explain() { @@ -144,6 +146,49 @@ void serialize(Integer numberOfIncludes) { } } + @SneakyThrows + @ParameterizedTest + @ValueSource(ints = {0, 150}) + void serialize_PIT(Integer numberOfIncludes) { + var searchSourceBuilder = new SearchSourceBuilder().size(4); + + var factory = mock(OpenSearchExprValueFactory.class); + var engine = mock(OpenSearchStorageEngine.class); + var index = mock(OpenSearchIndex.class); + when(engine.getClient()).thenReturn(client); + when(engine.getTable(any(), any())).thenReturn(index); + Map map = mock(Map.class); + when(map.get(any(String.class))).thenReturn("true"); + when(client.meta()).thenReturn(map); + var includes = + Stream.iterate(1, i -> i + 1) + .limit(numberOfIncludes) + .map(i -> "column" + i) + .collect(Collectors.toList()); + var request = + new OpenSearchQueryRequest( + INDEX_NAME, searchSourceBuilder, factory, includes, CURSOR_KEEP_ALIVE, "samplePitId"); + // make a response, so OpenSearchResponse::isEmpty would return true and unset needClean + var response = mock(SearchResponse.class); + when(response.getAggregations()).thenReturn(mock()); + var hits = mock(SearchHits.class); + when(response.getHits()).thenReturn(hits); + SearchHit hit = mock(SearchHit.class); + when(hit.getSortValues()).thenReturn(new String[] {"sample1"}); + when(hits.getHits()).thenReturn(new SearchHit[] {hit}); + request.search((req) -> response, null); + + try (var indexScan = new OpenSearchIndexScan(client, QUERY_SIZE, request)) { + var planSerializer = new PlanSerializer(engine); + var cursor = planSerializer.convertToCursor(indexScan); + var newPlan = planSerializer.convertToPlan(cursor.toString()); + assertNotNull(newPlan); + + verify(client).meta(); + verify(map).get(Settings.Key.SQL_PAGINATION_API_SEARCH_AFTER.getKeyValue()); + } + } + @SneakyThrows @Test void throws_io_exception_if_too_short() { @@ -172,10 +217,12 @@ void plan_for_serialization() { void query_empty_result() { mockResponse(client); final var name = new OpenSearchRequest.IndexName("test"); - final var requestBuilder = new OpenSearchRequestBuilder(QUERY_SIZE, exprValueFactory); + final var requestBuilder = new OpenSearchRequestBuilder(QUERY_SIZE, exprValueFactory, settings); try (OpenSearchIndexScan indexScan = new OpenSearchIndexScan( - client, QUERY_SIZE, requestBuilder.build(name, MAX_RESULT_WINDOW, CURSOR_KEEP_ALIVE))) { + client, + QUERY_SIZE, + requestBuilder.build(name, MAX_RESULT_WINDOW, CURSOR_KEEP_ALIVE, client))) { indexScan.open(); assertFalse(indexScan.hasNext()); } @@ -190,10 +237,10 @@ void query_all_results_with_query() { employee(1, "John", "IT"), employee(2, "Smith", "HR"), employee(3, "Allen", "IT") }); - final var requestBuilder = new OpenSearchRequestBuilder(QUERY_SIZE, exprValueFactory); + final var requestBuilder = new OpenSearchRequestBuilder(QUERY_SIZE, exprValueFactory, settings); try (OpenSearchIndexScan indexScan = new OpenSearchIndexScan( - client, 10, requestBuilder.build(INDEX_NAME, 10000, CURSOR_KEEP_ALIVE))) { + client, 10, requestBuilder.build(INDEX_NAME, 10000, CURSOR_KEEP_ALIVE, client))) { indexScan.open(); assertAll( @@ -218,10 +265,10 @@ void query_all_results_with_scroll() { new ExprValue[] {employee(1, "John", "IT"), employee(2, "Smith", "HR")}, new ExprValue[] {employee(3, "Allen", "IT")}); - final var requestBuilder = new OpenSearchRequestBuilder(QUERY_SIZE, exprValueFactory); + final var requestBuilder = new OpenSearchRequestBuilder(QUERY_SIZE, exprValueFactory, settings); try (OpenSearchIndexScan indexScan = new OpenSearchIndexScan( - client, 10, requestBuilder.build(INDEX_NAME, 10000, CURSOR_KEEP_ALIVE))) { + client, 10, requestBuilder.build(INDEX_NAME, 10000, CURSOR_KEEP_ALIVE, client))) { indexScan.open(); assertAll( @@ -248,10 +295,12 @@ void query_some_results_with_query() { }); final int limit = 3; - OpenSearchRequestBuilder builder = new OpenSearchRequestBuilder(0, exprValueFactory); + OpenSearchRequestBuilder builder = new OpenSearchRequestBuilder(0, exprValueFactory, settings); try (OpenSearchIndexScan indexScan = new OpenSearchIndexScan( - client, limit, builder.build(INDEX_NAME, MAX_RESULT_WINDOW, CURSOR_KEEP_ALIVE))) { + client, + limit, + builder.build(INDEX_NAME, MAX_RESULT_WINDOW, CURSOR_KEEP_ALIVE, client))) { indexScan.open(); assertAll( @@ -269,10 +318,12 @@ void query_some_results_with_query() { @Test void query_some_results_with_scroll() { mockTwoPageResponse(client); - final var requestuilder = new OpenSearchRequestBuilder(10, exprValueFactory); + final var requestuilder = new OpenSearchRequestBuilder(10, exprValueFactory, settings); try (OpenSearchIndexScan indexScan = new OpenSearchIndexScan( - client, 3, requestuilder.build(INDEX_NAME, MAX_RESULT_WINDOW, CURSOR_KEEP_ALIVE))) { + client, + 3, + requestuilder.build(INDEX_NAME, MAX_RESULT_WINDOW, CURSOR_KEEP_ALIVE, client))) { indexScan.open(); assertAll( @@ -306,12 +357,13 @@ void query_results_limited_by_query_size() { }); final int defaultQuerySize = 2; - final var requestBuilder = new OpenSearchRequestBuilder(defaultQuerySize, exprValueFactory); + final var requestBuilder = + new OpenSearchRequestBuilder(defaultQuerySize, exprValueFactory, settings); try (OpenSearchIndexScan indexScan = new OpenSearchIndexScan( client, defaultQuerySize, - requestBuilder.build(INDEX_NAME, QUERY_SIZE, CURSOR_KEEP_ALIVE))) { + requestBuilder.build(INDEX_NAME, QUERY_SIZE, CURSOR_KEEP_ALIVE, client))) { indexScan.open(); assertAll( @@ -368,7 +420,7 @@ void push_down_highlight_with_arguments() { } private PushDownAssertion assertThat() { - return new PushDownAssertion(client, exprValueFactory); + return new PushDownAssertion(client, exprValueFactory, settings); } private static class PushDownAssertion { @@ -377,9 +429,10 @@ private static class PushDownAssertion { private final OpenSearchResponse response; private final OpenSearchExprValueFactory factory; - public PushDownAssertion(OpenSearchClient client, OpenSearchExprValueFactory valueFactory) { + public PushDownAssertion( + OpenSearchClient client, OpenSearchExprValueFactory valueFactory, Settings settings) { this.client = client; - this.requestBuilder = new OpenSearchRequestBuilder(QUERY_SIZE, valueFactory); + this.requestBuilder = new OpenSearchRequestBuilder(QUERY_SIZE, valueFactory, settings); this.response = mock(OpenSearchResponse.class); this.factory = valueFactory; @@ -411,7 +464,9 @@ PushDownAssertion shouldQueryHighlight(QueryBuilder query, HighlightBuilder high when(client.search(request)).thenReturn(response); var indexScan = new OpenSearchIndexScan( - client, QUERY_SIZE, requestBuilder.build(EMPLOYEES_INDEX, 10000, CURSOR_KEEP_ALIVE)); + client, + QUERY_SIZE, + requestBuilder.build(EMPLOYEES_INDEX, 10000, CURSOR_KEEP_ALIVE, client)); indexScan.open(); return this; } @@ -429,7 +484,9 @@ PushDownAssertion shouldQuery(QueryBuilder expected) { when(client.search(request)).thenReturn(response); var indexScan = new OpenSearchIndexScan( - client, 10000, requestBuilder.build(EMPLOYEES_INDEX, 10000, CURSOR_KEEP_ALIVE)); + client, + 10000, + requestBuilder.build(EMPLOYEES_INDEX, 10000, CURSOR_KEEP_ALIVE, client)); indexScan.open(); return this; } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilderTest.java index 4250b3297f..08c4017f1d 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/aggregation/dsl/BucketAggregationBuilderTest.java @@ -40,6 +40,7 @@ import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.parse.ParseExpression; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; +import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; import org.opensearch.sql.opensearch.data.type.OpenSearchTextType; import org.opensearch.sql.opensearch.storage.serialization.ExpressionSerializer; @@ -134,6 +135,39 @@ void should_build_bucket_with_parse_expression() { buildQuery(Arrays.asList(asc(named("name", parseExpression))))); } + @Test + void terms_bucket_for_opensearchdate_type_uses_long() { + OpenSearchDateType dataType = OpenSearchDateType.of(ExprCoreType.TIMESTAMP); + + assertEquals( + "{\n" + + " \"terms\" : {\n" + + " \"field\" : \"date\",\n" + + " \"missing_bucket\" : true,\n" + + " \"value_type\" : \"long\",\n" + + " \"missing_order\" : \"first\",\n" + + " \"order\" : \"asc\"\n" + + " }\n" + + "}", + buildQuery(Arrays.asList(asc(named("date", ref("date", dataType)))))); + } + + @Test + void terms_bucket_for_opensearchdate_type_uses_long_false() { + OpenSearchDateType dataType = OpenSearchDateType.of(STRING); + + assertEquals( + "{\n" + + " \"terms\" : {\n" + + " \"field\" : \"date\",\n" + + " \"missing_bucket\" : true,\n" + + " \"missing_order\" : \"first\",\n" + + " \"order\" : \"asc\"\n" + + " }\n" + + "}", + buildQuery(Arrays.asList(asc(named("date", ref("date", dataType)))))); + } + @ParameterizedTest(name = "{0}") @EnumSource( value = ExprCoreType.class, diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java index 90b982e017..f8c43743ab 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java @@ -16,6 +16,7 @@ import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; import static org.opensearch.sql.data.type.ExprCoreType.FLOAT; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.IP; import static org.opensearch.sql.data.type.ExprCoreType.LONG; import static org.opensearch.sql.data.type.ExprCoreType.SHORT; import static org.opensearch.sql.data.type.ExprCoreType.STRING; @@ -59,6 +60,10 @@ @ExtendWith(MockitoExtension.class) class FilterQueryBuilderTest { + private static Stream ipCastSource() { + return Stream.of(literal("1.2.3.4"), literal("2001:db7::ff00:42:8329")); + } + private static Stream numericCastSource() { return Stream.of( literal((byte) 1), @@ -1715,6 +1720,25 @@ void cast_to_boolean_false_in_filter(LiteralExpression expr) { json, buildQuery(DSL.equal(ref("boolean_value", BOOLEAN), DSL.castBoolean(expr)))); } + @ParameterizedTest(name = "castIp({0})") + @MethodSource({"ipCastSource"}) + void cast_to_ip_in_filter(LiteralExpression expr) { + String json = + String.format( + """ + { + "term" : { + "ip_value" : { + "value" : "%s", + "boost" : 1.0 + } + } + }""", + expr.valueOf().stringValue()); + + assertJsonEquals(json, buildQuery(DSL.equal(ref("ip_value", IP), DSL.castIp(expr)))); + } + @Test void cast_from_boolean() { Expression booleanExpr = literal(false); @@ -1772,9 +1796,9 @@ void cast_to_date_in_filter() { + " }\n" + " }\n" + "}"; - assertJsonEquals( json, buildQuery(DSL.equal(ref("date_value", DATE), DSL.castDate(literal("2021-11-08"))))); + assertJsonEquals( json, buildQuery( @@ -1821,7 +1845,7 @@ void cast_to_timestamp_in_filter() { "{\n" + " \"term\" : {\n" + " \"timestamp_value\" : {\n" - + " \"value\" : 1636390800000,\n" + + " \"value\" : \"2021-11-08 17:00:00\",\n" + " \"boost\" : 1.0\n" + " }\n" + " }\n" @@ -1847,7 +1871,7 @@ void cast_in_range_query() { "{\n" + " \"range\" : {\n" + " \"timestamp_value\" : {\n" - + " \"from\" : 1636390800000,\n" + + " \"from\" : \"2021-11-08 17:00:00\",\n" + " \"to\" : null," + " \"include_lower\" : false," + " \"include_upper\" : true," diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQueryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQueryTest.java index df3a730bad..1713d1dd1b 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQueryTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/LuceneQueryTest.java @@ -8,18 +8,22 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.expression.DSL.literal; +import static org.opensearch.sql.expression.DSL.ref; import org.junit.jupiter.api.DisplayNameGeneration; import org.junit.jupiter.api.DisplayNameGenerator; import org.junit.jupiter.api.Test; +import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; @DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) class LuceneQueryTest { @Test void should_not_support_single_argument_by_default() { - assertFalse(new LuceneQuery() {}.canSupport(DSL.abs(DSL.ref("age", INTEGER)))); + assertFalse(new LuceneQuery() {}.canSupport(DSL.abs(ref("age", INTEGER)))); } @Test @@ -27,4 +31,74 @@ void should_throw_exception_if_not_implemented() { assertThrows( UnsupportedOperationException.class, () -> new LuceneQuery() {}.doBuild(null, null, null)); } + + @Test + void should_cast_to_time_with_format() { + String format = "HH:mm:ss.SSS || HH:mm:ss"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + assertThrows( + UnsupportedOperationException.class, + () -> + new LuceneQuery() {}.build( + DSL.equal(ref("time_value", dateType), DSL.castTime(literal("17:00:00"))))); + } + + @Test + void should_cast_to_time_with_no_format() { + String format = "HH:mm"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + assertThrows( + UnsupportedOperationException.class, + () -> + new LuceneQuery() {}.build( + DSL.equal(ref("time_value", dateType), DSL.castTime(literal("17:00:00"))))); + } + + @Test + void should_cast_to_date_with_format() { + String format = "yyyy-MM-dd"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + assertThrows( + UnsupportedOperationException.class, + () -> + new LuceneQuery() {}.build( + DSL.equal(ref("date_value", dateType), DSL.castDate(literal("2017-01-02"))))); + } + + @Test + void should_cast_to_date_with_no_format() { + String format = "yyyy/MM/dd"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + assertThrows( + UnsupportedOperationException.class, + () -> + new LuceneQuery() {}.build( + DSL.equal(ref("date_value", dateType), DSL.castDate(literal("2017-01-02"))))); + } + + @Test + void should_cast_to_timestamp_with_format() { + String format = "yyyy-MM-dd HH:mm:ss"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + assertThrows( + UnsupportedOperationException.class, + () -> + new LuceneQuery() {}.build( + DSL.equal( + ref("timestamp_value", dateType), + DSL.castTimestamp(literal("2021-11-08 17:00:00"))))); + } + + @Test + void should_cast_to_timestamp_with_no_format() { + String format = "2021/11/08T17:00:00Z"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + assertThrows( + SemanticCheckException.class, + () -> + new LuceneQuery() {}.build( + DSL.equal( + ref("timestamp_value", dateType), + DSL.castTimestamp(literal("2021-11-08 17:00:00 "))))); + } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/RangeQueryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/RangeQueryTest.java index ca87f42900..2f5482171d 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/RangeQueryTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/RangeQueryTest.java @@ -5,13 +5,17 @@ package org.opensearch.sql.opensearch.storage.script.filter.lucene; +import static org.junit.Assert.*; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import java.time.*; import org.junit.jupiter.api.DisplayNameGeneration; import org.junit.jupiter.api.DisplayNameGenerator; import org.junit.jupiter.api.Test; -import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.data.model.*; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; import org.opensearch.sql.opensearch.storage.script.filter.lucene.RangeQuery.Comparison; @DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) @@ -26,4 +30,65 @@ void should_throw_exception_for_unsupported_comparison() { new RangeQuery(Comparison.BETWEEN) .doBuild("name", STRING, ExprValueUtils.stringValue("John"))); } + + @Test + void test_timestamp_with_no_format() { + OpenSearchDateType openSearchDateType = OpenSearchDateType.of(ExprCoreType.TIMESTAMP); + assertNotNull( + new RangeQuery(Comparison.LT) + .doBuild("time", openSearchDateType, new ExprTimestampValue("2021-11-08 17:00:00"))); + } + + @Test + void test_timestamp_has_format() { + String timestamp = "2019-03-23 21:34:46"; + OpenSearchDateType dateType = OpenSearchDateType.of("yyyy-MM-dd HH:mm:ss"); + ZonedDateTime zonedDateTime = dateType.getParsedDateTime(timestamp); + ExprValue literal = ExprValueUtils.timestampValue(zonedDateTime.toInstant()); + assertNotNull(new RangeQuery(Comparison.LT).doBuild("time_stamp", dateType, literal)); + } + + @Test + void test_time_with_no_format() { + OpenSearchDateType openSearchDateType = OpenSearchDateType.of(ExprCoreType.TIME); + assertNotNull( + new RangeQuery(Comparison.LT) + .doBuild("time", openSearchDateType, new ExprTimeValue("17:00:00"))); + } + + @Test + void test_time_has_format() { + long epochTimestamp = 1636390800000L; // Corresponds to "2021-11-08T17:00:00Z" + String format = "epoch_millis"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + ZonedDateTime zonedDateTime = dateType.getParsedDateTime(String.valueOf(epochTimestamp)); + ExprValue literal = ExprValueUtils.timeValue(zonedDateTime.toLocalTime()); + assertNotNull(new RangeQuery(Comparison.LT).doBuild("time", dateType, literal)); + } + + @Test + void test_date_with_no_format() { + OpenSearchDateType openSearchDateType = OpenSearchDateType.of(ExprCoreType.DATE); + assertNotNull( + new RangeQuery(Comparison.LT) + .doBuild("date", openSearchDateType, new ExprDateValue("2021-11-08"))); + } + + @Test + void test_date_has_format() { + String dateString = "2021-11-08"; + String format = "yyyy-MM-dd"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + LocalDate parsedDate = dateType.getParsedDateTime(dateString).toLocalDate(); + ExprValue literal = ExprValueUtils.dateValue(parsedDate); + assertNotNull(new RangeQuery(Comparison.LT).doBuild("date", dateType, literal)); + } + + @Test + void test_non_date_field_type() { + String dateString = "2021-11-08"; + OpenSearchDateType dateType = OpenSearchDateType.of(STRING); + ExprValue literal = ExprValueUtils.stringValue(dateString); + assertNotNull(new RangeQuery(Comparison.LT).doBuild("string_value", dateType, literal)); + } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/TermQueryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/TermQueryTest.java new file mode 100644 index 0000000000..def9fafba3 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/TermQueryTest.java @@ -0,0 +1,82 @@ +package org.opensearch.sql.opensearch.storage.script.filter.lucene; + +import static org.junit.Assert.*; +import static org.opensearch.sql.data.type.ExprCoreType.STRING; + +import java.time.*; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.data.model.*; +import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.opensearch.data.type.OpenSearchDateType; + +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +class TermQueryTest { + + @Test + void test_timestamp_with_no_format() { + OpenSearchDateType openSearchDateType = OpenSearchDateType.of(ExprCoreType.TIMESTAMP); + assertNotNull( + new TermQuery() + .doBuild("time", openSearchDateType, new ExprTimestampValue("2021-11-08 17:00:00"))); + } + + @Test + void test_timestamp_has_format() { + String timestamp = "2019-03-23 21:34:46"; + OpenSearchDateType dateType = OpenSearchDateType.of("yyyy-MM-dd HH:mm:ss"); + ZonedDateTime zonedDateTime = dateType.getParsedDateTime(timestamp); + ExprValue literal = ExprValueUtils.timestampValue(zonedDateTime.toInstant()); + assertNotNull(new TermQuery().doBuild("time_stamp", dateType, literal)); + } + + @Test + void test_time_with_no_format() { + OpenSearchDateType openSearchDateType = OpenSearchDateType.of(ExprCoreType.TIME); + assertNotNull( + new TermQuery().doBuild("time", openSearchDateType, new ExprTimeValue("17:00:00"))); + } + + @Test + void test_time_has_format() { + long epochTimestamp = 1636390800000L; // Corresponds to "2021-11-08T17:00:00Z" + String format = "epoch_millis"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + ZonedDateTime zonedDateTime = dateType.getParsedDateTime(String.valueOf(epochTimestamp)); + ExprValue literal = ExprValueUtils.timeValue(zonedDateTime.toLocalTime()); + assertNotNull(new TermQuery().doBuild("time", dateType, literal)); + } + + @Test + void test_date_with_no_format() { + OpenSearchDateType openSearchDateType = OpenSearchDateType.of(ExprCoreType.DATE); + assertNotNull( + new TermQuery().doBuild("date", openSearchDateType, new ExprDateValue("2021-11-08"))); + } + + @Test + void test_date_has_format() { + String dateString = "2021-11-08"; + String format = "yyyy-MM-dd"; + OpenSearchDateType dateType = OpenSearchDateType.of(format); + LocalDate parsedDate = dateType.getParsedDateTime(dateString).toLocalDate(); + ExprValue literal = ExprValueUtils.dateValue(parsedDate); + assertNotNull(new TermQuery().doBuild("date", dateType, literal)); + } + + @Test + void test_invalid_date_field_type() { + String dateString = "2021-11-08"; + OpenSearchDateType dateType = OpenSearchDateType.of(STRING); + ExprValue literal = ExprValueUtils.stringValue(dateString); + assertNotNull(new TermQuery().doBuild("string_value", dateType, literal)); + } + + @Test + void test_string_field_type() { + String dateString = "2021-11-08"; + ExprValue literal = ExprValueUtils.stringValue(dateString); + assertNotNull(new TermQuery().doBuild("string_value", STRING, literal)); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/util/RestRequestUtilTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/util/RestRequestUtilTest.java new file mode 100644 index 0000000000..168fabee74 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/util/RestRequestUtilTest.java @@ -0,0 +1,24 @@ +package org.opensearch.sql.opensearch.util; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.mockito.ArgumentMatchers; +import org.mockito.Mockito; +import org.opensearch.rest.RestRequest; + +public class RestRequestUtilTest { + @Test + public void testConsumeAllRequestParameters() { + Assertions.assertThrows( + NullPointerException.class, + () -> { + RestRequestUtil.consumeAllRequestParameters(null); + }); + + RestRequest request = Mockito.mock(RestRequest.class, Mockito.RETURNS_DEEP_STUBS); + + RestRequestUtil.consumeAllRequestParameters(request); + + Mockito.verify(request.params().keySet(), Mockito.times(1)).forEach(ArgumentMatchers.any()); + } +} diff --git a/plugin/build.gradle b/plugin/build.gradle index 49cb52721d..9df3d3dd48 100644 --- a/plugin/build.gradle +++ b/plugin/build.gradle @@ -1,3 +1,5 @@ +import java.util.concurrent.Callable + /* * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 @@ -48,6 +50,7 @@ opensearchplugin { name 'opensearch-sql' description 'OpenSearch SQL' classname 'org.opensearch.sql.plugin.SQLPlugin' + extendedPlugins = ['opensearch-job-scheduler'] licenseFile rootProject.file("LICENSE.txt") noticeFile rootProject.file("NOTICE") } @@ -98,7 +101,8 @@ configurations.all { resolutionStrategy.force "com.fasterxml.jackson.core:jackson-core:${versions.jackson}" // enforce 1.1.3, https://www.whitesourcesoftware.com/vulnerability-database/WS-2019-0379 resolutionStrategy.force 'commons-codec:commons-codec:1.13' - resolutionStrategy.force 'com.google.guava:guava:32.0.1-jre' + resolutionStrategy.force "com.google.guava:guava:${guava_version}" + resolutionStrategy.force 'com.google.guava:failureaccess:1.0.2' resolutionStrategy.force "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:${versions.jackson}" resolutionStrategy.force "com.fasterxml.jackson.dataformat:jackson-dataformat-smile:${versions.jackson}" resolutionStrategy.force "com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:${versions.jackson}" @@ -112,6 +116,11 @@ configurations.all { resolutionStrategy.force "org.jetbrains.kotlin:kotlin-stdlib-jdk8:1.9.10" resolutionStrategy.force "org.jetbrains.kotlin:kotlin-stdlib-common:1.9.10" } + +configurations { + zipArchive +} + compileJava { options.compilerArgs.addAll(["-processor", 'lombok.launch.AnnotationProcessorHider$AnnotationProcessor']) } @@ -139,6 +148,10 @@ spotless { } dependencies { + compileOnly "org.opensearch:opensearch-job-scheduler-spi:${opensearch_build}" + compileOnly "com.google.guava:guava:${guava_version}" + compileOnly 'com.google.guava:failureaccess:1.0.2' + api "com.fasterxml.jackson.core:jackson-core:${versions.jackson}" api "com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}" api "com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}" @@ -149,12 +162,15 @@ dependencies { api project(':prometheus') api project(':datasources') api project(':spark') + api project(':async-query') testImplementation group: 'net.bytebuddy', name: 'byte-buddy-agent', version: '1.14.9' testImplementation group: 'org.hamcrest', name: 'hamcrest-library', version: '2.1' testImplementation group: 'org.mockito', name: 'mockito-core', version: "${versions.mockito}" testImplementation group: 'org.mockito', name: 'mockito-junit-jupiter', version: "${versions.mockito}" testImplementation 'org.junit.jupiter:junit-jupiter:5.9.3' + + zipArchive group: 'org.opensearch.plugin', name:'opensearch-job-scheduler', version: "${opensearch_build}" } test { @@ -171,6 +187,11 @@ ext { noticeFile = rootProject.file('NOTICE') } +java { + sourceCompatibility = JavaVersion.VERSION_21 + targetCompatibility = JavaVersion.VERSION_21 +} + // ANTLR generated parser file is too large to be checked which caused licenseHeaders stuck. licenseHeaders { enabled = true @@ -198,11 +219,10 @@ dependencyLicenses.enabled = false // enable testingConventions check will cause errors like: "Classes ending with [Tests] must subclass [LuceneTestCase]" testingConventions.enabled = false -// TODO: need to verify the thirdPartyAudi +// TODO: need to verify the thirdPartyAudit // currently it complains missing classes like ibatis, mysql etc, should not be a problem thirdPartyAudit.enabled = false - apply plugin: 'com.netflix.nebula.ospackage' validateNebulaPom.enabled = false @@ -265,7 +285,24 @@ afterEvaluate { } } +def getJobSchedulerPlugin() { + provider(new Callable() { + @Override + RegularFile call() throws Exception { + return new RegularFile() { + @Override + File getAsFile() { + return configurations.zipArchive.asFileTree.matching { + include '**/opensearch-job-scheduler*' + }.singleFile + } + } + } + }) +} + testClusters.integTest { + plugin(getJobSchedulerPlugin()) plugin(project.tasks.bundlePlugin.archiveFile) testDistribution = "ARCHIVE" @@ -281,3 +318,4 @@ testClusters.integTest { run { useCluster testClusters.integTest } + diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java index a9eb38a2c2..766edc42c0 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java @@ -7,10 +7,12 @@ import static java.util.Collections.singletonList; import static org.opensearch.sql.datasource.model.DataSourceMetadata.defaultOpenSearchDataSourceMetadata; +import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_REQUEST_BUFFER_INDEX_NAME; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import java.time.Clock; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; @@ -39,9 +41,14 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.Environment; import org.opensearch.env.NodeEnvironment; +import org.opensearch.indices.SystemIndexDescriptor; +import org.opensearch.jobscheduler.spi.JobSchedulerExtension; +import org.opensearch.jobscheduler.spi.ScheduledJobParser; +import org.opensearch.jobscheduler.spi.ScheduledJobRunner; import org.opensearch.plugins.ActionPlugin; import org.opensearch.plugins.Plugin; import org.opensearch.plugins.ScriptPlugin; +import org.opensearch.plugins.SystemIndexPlugin; import org.opensearch.repositories.RepositoriesService; import org.opensearch.rest.RestController; import org.opensearch.rest.RestHandler; @@ -53,12 +60,17 @@ import org.opensearch.sql.datasources.auth.DataSourceUserAuthorizationHelperImpl; import org.opensearch.sql.datasources.encryptor.EncryptorImpl; import org.opensearch.sql.datasources.glue.GlueDataSourceFactory; +import org.opensearch.sql.datasources.glue.SecurityLakeDataSourceFactory; import org.opensearch.sql.datasources.model.transport.*; import org.opensearch.sql.datasources.rest.RestDataSourceQueryAction; import org.opensearch.sql.datasources.service.DataSourceMetadataStorage; import org.opensearch.sql.datasources.service.DataSourceServiceImpl; import org.opensearch.sql.datasources.storage.OpenSearchDataSourceMetadataStorage; -import org.opensearch.sql.datasources.transport.*; +import org.opensearch.sql.datasources.transport.TransportCreateDataSourceAction; +import org.opensearch.sql.datasources.transport.TransportDeleteDataSourceAction; +import org.opensearch.sql.datasources.transport.TransportGetDataSourceAction; +import org.opensearch.sql.datasources.transport.TransportPatchDataSourceAction; +import org.opensearch.sql.datasources.transport.TransportUpdateDataSourceAction; import org.opensearch.sql.legacy.esdomain.LocalClusterState; import org.opensearch.sql.legacy.executor.AsyncRestExecutor; import org.opensearch.sql.legacy.metrics.Metrics; @@ -83,6 +95,9 @@ import org.opensearch.sql.spark.flint.FlintIndexMetadataServiceImpl; import org.opensearch.sql.spark.flint.operation.FlintIndexOpFactory; import org.opensearch.sql.spark.rest.RestAsyncQueryManagementAction; +import org.opensearch.sql.spark.scheduler.OpenSearchAsyncQueryScheduler; +import org.opensearch.sql.spark.scheduler.job.ScheduledAsyncQueryJobRunner; +import org.opensearch.sql.spark.scheduler.parser.OpenSearchScheduleQueryJobRequestParser; import org.opensearch.sql.spark.storage.SparkStorageFactory; import org.opensearch.sql.spark.transport.TransportCancelAsyncQueryRequestAction; import org.opensearch.sql.spark.transport.TransportCreateAsyncQueryRequestAction; @@ -97,7 +112,8 @@ import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; -public class SQLPlugin extends Plugin implements ActionPlugin, ScriptPlugin { +public class SQLPlugin extends Plugin + implements ActionPlugin, ScriptPlugin, SystemIndexPlugin, JobSchedulerExtension { private static final Logger LOGGER = LogManager.getLogger(SQLPlugin.class); @@ -108,6 +124,7 @@ public class SQLPlugin extends Plugin implements ActionPlugin, ScriptPlugin { private NodeClient client; private DataSourceServiceImpl dataSourceService; + private OpenSearchAsyncQueryScheduler asyncQueryScheduler; private Injector injector; public String name() { @@ -138,8 +155,8 @@ public List getRestHandlers( new RestSqlStatsAction(settings, restController), new RestPPLStatsAction(settings, restController), new RestQuerySettingsAction(settings, restController), - new RestDataSourceQueryAction(), - new RestAsyncQueryManagementAction()); + new RestDataSourceQueryAction((OpenSearchSettings) pluginSettings), + new RestAsyncQueryManagementAction((OpenSearchSettings) pluginSettings)); } /** Register action and handler so that transportClient can find proxy for action. */ @@ -228,11 +245,33 @@ public Collection createComponents( dataSourceService, injector.getInstance(FlintIndexMetadataServiceImpl.class), injector.getInstance(FlintIndexOpFactory.class)); + AsyncQueryExecutorService asyncQueryExecutorService = + injector.getInstance(AsyncQueryExecutorService.class); + ScheduledAsyncQueryJobRunner.getJobRunnerInstance() + .loadJobResource(client, clusterService, threadPool, asyncQueryExecutorService); + return ImmutableList.of( - dataSourceService, - injector.getInstance(AsyncQueryExecutorService.class), - clusterManagerEventListener, - pluginSettings); + dataSourceService, asyncQueryExecutorService, clusterManagerEventListener, pluginSettings); + } + + @Override + public String getJobType() { + return OpenSearchAsyncQueryScheduler.SCHEDULER_PLUGIN_JOB_TYPE; + } + + @Override + public String getJobIndex() { + return OpenSearchAsyncQueryScheduler.SCHEDULER_INDEX_NAME; + } + + @Override + public ScheduledJobRunner getJobRunner() { + return ScheduledAsyncQueryJobRunner.getJobRunnerInstance(); + } + + @Override + public ScheduledJobParser getJobParser() { + return OpenSearchScheduleQueryJobRequestParser.getJobParser(); } @Override @@ -274,7 +313,10 @@ private DataSourceServiceImpl createDataSourceService() { } DataSourceMetadataStorage dataSourceMetadataStorage = new OpenSearchDataSourceMetadataStorage( - client, clusterService, new EncryptorImpl(masterKey)); + client, + clusterService, + new EncryptorImpl(masterKey), + (OpenSearchSettings) pluginSettings); DataSourceUserAuthorizationHelper dataSourceUserAuthorizationHelper = new DataSourceUserAuthorizationHelperImpl(client); return new DataSourceServiceImpl( @@ -285,8 +327,21 @@ private DataSourceServiceImpl createDataSourceService() { .add(new PrometheusStorageFactory(pluginSettings)) .add(new SparkStorageFactory(this.client, pluginSettings)) .add(new GlueDataSourceFactory(pluginSettings)) + .add(new SecurityLakeDataSourceFactory(pluginSettings)) .build(), dataSourceMetadataStorage, dataSourceUserAuthorizationHelper); } + + @Override + public Collection getSystemIndexDescriptors(Settings settings) { + List systemIndexDescriptors = new ArrayList<>(); + systemIndexDescriptors.add( + new SystemIndexDescriptor( + OpenSearchDataSourceMetadataStorage.DATASOURCE_INDEX_NAME, "SQL DataSources index")); + systemIndexDescriptors.add( + new SystemIndexDescriptor( + SPARK_REQUEST_BUFFER_INDEX_NAME + "*", "SQL Spark Request Buffer index pattern")); + return systemIndexDescriptors; + } } diff --git a/plugin/src/main/resources/META-INF/services/org.opensearch.jobscheduler.spi.JobSchedulerExtension b/plugin/src/main/resources/META-INF/services/org.opensearch.jobscheduler.spi.JobSchedulerExtension new file mode 100644 index 0000000000..5337857c15 --- /dev/null +++ b/plugin/src/main/resources/META-INF/services/org.opensearch.jobscheduler.spi.JobSchedulerExtension @@ -0,0 +1,6 @@ +# +# Copyright OpenSearch Contributors +# SPDX-License-Identifier: Apache-2.0 +# + +org.opensearch.sql.plugin.SQLPlugin \ No newline at end of file diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/transport/TransportPPLQueryRequestTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/transport/TransportPPLQueryRequestTest.java index 286ac20fed..1cce1ccc18 100644 --- a/plugin/src/test/java/org/opensearch/sql/plugin/transport/TransportPPLQueryRequestTest.java +++ b/plugin/src/test/java/org/opensearch/sql/plugin/transport/TransportPPLQueryRequestTest.java @@ -20,7 +20,7 @@ public class TransportPPLQueryRequestTest { - @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Rule public final ExpectedException exceptionRule = ExpectedException.none(); @Test public void testValidate() { diff --git a/ppl/build.gradle b/ppl/build.gradle index d58882d5e8..2a3d6bdbf9 100644 --- a/ppl/build.gradle +++ b/ppl/build.gradle @@ -48,7 +48,7 @@ dependencies { runtimeOnly group: 'org.reflections', name: 'reflections', version: '0.9.12' implementation "org.antlr:antlr4-runtime:4.7.1" - implementation group: 'com.google.guava', name: 'guava', version: '32.0.1-jre' + implementation group: 'com.google.guava', name: 'guava', version: "${guava_version}" api group: 'org.json', name: 'json', version: '20231013' implementation group: 'org.apache.logging.log4j', name: 'log4j-core', version:"${versions.log4j}" api project(':common') diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 9f707c13cd..053ec530db 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -35,6 +35,8 @@ NEW_FIELD: 'NEW_FIELD'; KMEANS: 'KMEANS'; AD: 'AD'; ML: 'ML'; +FILLNULL: 'FILLNULL'; +TRENDLINE: 'TRENDLINE'; // COMMAND ASSIST KEYWORDS AS: 'AS'; @@ -44,16 +46,21 @@ INDEX: 'INDEX'; D: 'D'; DESC: 'DESC'; DATASOURCES: 'DATASOURCES'; +USING: 'USING'; +WITH: 'WITH'; // CLAUSE KEYWORDS SORTBY: 'SORTBY'; -// FIELD KEYWORDS +// SORT FIELD KEYWORDS +// TODO #3180: Fix broken sort functionality AUTO: 'AUTO'; STR: 'STR'; -IP: 'IP'; NUM: 'NUM'; +// TRENDLINE KEYWORDS +SMA: 'SMA'; + // ARGUMENT KEYWORDS KEEPEMPTY: 'KEEPEMPTY'; CONSECUTIVE: 'CONSECUTIVE'; @@ -135,6 +142,7 @@ LONG: 'LONG'; FLOAT: 'FLOAT'; STRING: 'STRING'; BOOLEAN: 'BOOLEAN'; +IP: 'IP'; // SPECIAL CHARACTERS AND OPERATORS PIPE: '|'; @@ -322,6 +330,7 @@ CAST: 'CAST'; LIKE: 'LIKE'; ISNULL: 'ISNULL'; ISNOTNULL: 'ISNOTNULL'; +CIDRMATCH: 'CIDRMATCH'; // FLOWCONTROL FUNCTIONS IFNULL: 'IFNULL'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 5a9c179d1a..27f7e4014b 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -49,6 +49,8 @@ commands | kmeansCommand | adCommand | mlCommand + | fillnullCommand + | trendlineCommand ; searchCommand @@ -127,6 +129,35 @@ patternsMethod | REGEX ; +fillnullCommand + : FILLNULL (fillNullWithTheSameValue + | fillNullWithFieldVariousValues) + ; + +fillNullWithTheSameValue + : WITH nullReplacement = valueExpression IN nullableFieldList = fieldList + ; + +fillNullWithFieldVariousValues + : USING nullReplacementExpression (COMMA nullReplacementExpression)* + ; + +nullReplacementExpression + : nullableField = fieldExpression EQUAL nullReplacement = valueExpression + ; + +trendlineCommand + : TRENDLINE (SORT sortField)? trendlineClause (trendlineClause)* + ; + +trendlineClause + : trendlineType LT_PRTHS numberOfDataPoints = integerLiteral COMMA field = fieldExpression RT_PRTHS (AS alias = qualifiedName)? + ; + +trendlineType + : SMA + ; + kmeansCommand : KMEANS (kmeansParameter)* ; @@ -188,6 +219,7 @@ statsByClause : BY fieldList | BY bySpanClause | BY bySpanClause COMMA fieldList + | BY fieldList COMMA bySpanClause ; bySpanClause @@ -254,6 +286,7 @@ expression | valueExpression ; +// predicates logicalExpression : comparisonExpression # comparsion | NOT logicalExpression # logicalNot @@ -361,7 +394,7 @@ dataTypeFunctionCall // boolean functions booleanFunctionCall - : conditionFunctionBase LT_PRTHS functionArgs RT_PRTHS + : conditionFunctionName LT_PRTHS functionArgs RT_PRTHS ; convertedDataType @@ -375,13 +408,15 @@ convertedDataType | typeName = FLOAT | typeName = STRING | typeName = BOOLEAN + | typeName = IP ; evalFunctionName : mathematicalFunctionName | dateTimeFunctionName | textFunctionName - | conditionFunctionBase + | conditionFunctionName + | flowControlFunctionName | systemFunctionName | positionFunctionName ; @@ -391,7 +426,7 @@ functionArgs ; functionArg - : (ident EQUAL)? valueExpression + : (ident EQUAL)? expression ; relevanceArg @@ -622,11 +657,16 @@ timestampFunctionName ; // condition function return boolean value -conditionFunctionBase +conditionFunctionName : LIKE - | IF | ISNULL | ISNOTNULL + | CIDRMATCH + ; + +// flow control function return non-boolean value +flowControlFunctionName + : IF | IFNULL | NULLIF ; @@ -822,6 +862,7 @@ keywordsCanBeId | textFunctionName | mathematicalFunctionName | positionFunctionName + | conditionFunctionName // commands | SEARCH | DESCRIBE @@ -834,6 +875,7 @@ keywordsCanBeId | DEDUP | SORT | EVAL + | FILLNULL | HEAD | TOP | RARE @@ -848,6 +890,7 @@ keywordsCanBeId | KMEANS | AD | ML + | TRENDLINE // commands assist keywords | SOURCE | INDEX @@ -855,7 +898,8 @@ keywordsCanBeId | DATASOURCES // CLAUSEKEYWORDS | SORTBY - // FIELDKEYWORDSAUTO + // SORT FIELD KEYWORDS + | AUTO | STR | IP | NUM diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 3c693fa0bd..c3c31ee2e1 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -51,6 +51,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; +import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Kmeans; @@ -63,6 +64,7 @@ import org.opensearch.sql.ast.tree.Rename; import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; @@ -332,8 +334,7 @@ public UnresolvedPlan visitTableFunction(TableFunctionContext ctx) { arg -> { String argName = (arg.ident() != null) ? arg.ident().getText() : null; builder.add( - new UnresolvedArgument( - argName, this.internalVisitExpression(arg.valueExpression()))); + new UnresolvedArgument(argName, this.internalVisitExpression(arg.expression()))); }); return new TableFunction(this.internalVisitExpression(ctx.qualifiedName()), builder.build()); } @@ -393,6 +394,49 @@ public UnresolvedPlan visitMlCommand(OpenSearchPPLParser.MlCommandContext ctx) { return new ML(builder.build()); } + /** fillnull command. */ + @Override + public UnresolvedPlan visitFillNullWithTheSameValue( + OpenSearchPPLParser.FillNullWithTheSameValueContext ctx) { + return new FillNull( + FillNull.ContainNullableFieldFill.ofSameValue( + internalVisitExpression(ctx.nullReplacement), + ctx.nullableFieldList.fieldExpression().stream() + .map(f -> (Field) internalVisitExpression(f)) + .toList())); + } + + /** fillnull command. */ + @Override + public UnresolvedPlan visitFillNullWithFieldVariousValues( + OpenSearchPPLParser.FillNullWithFieldVariousValuesContext ctx) { + ImmutableList.Builder replacementsBuilder = ImmutableList.builder(); + for (int i = 0; i < ctx.nullReplacementExpression().size(); i++) { + replacementsBuilder.add( + new FillNull.NullableFieldFill( + (Field) internalVisitExpression(ctx.nullReplacementExpression(i).nullableField), + internalVisitExpression(ctx.nullReplacementExpression(i).nullReplacement))); + } + + return new FillNull( + FillNull.ContainNullableFieldFill.ofVariousValue(replacementsBuilder.build())); + } + + /** trendline command. */ + @Override + public UnresolvedPlan visitTrendlineCommand(OpenSearchPPLParser.TrendlineCommandContext ctx) { + List trendlineComputations = + ctx.trendlineClause().stream() + .map(expressionBuilder::visit) + .map(Trendline.TrendlineComputation.class::cast) + .collect(Collectors.toList()); + return Optional.ofNullable(ctx.sortField()) + .map(this::internalVisitExpression) + .map(Field.class::cast) + .map(sort -> new Trendline(Optional.of(sort), trendlineComputations)) + .orElse(new Trendline(Optional.empty(), trendlineComputations)); + } + /** Get original text in query. */ private String getTextInQuery(ParserRuleContext ctx) { Token start = ctx.getStart(); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 47db10c99b..5a7522683a 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -45,6 +45,7 @@ import com.google.common.collect.ImmutableMap; import java.util.Arrays; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -52,6 +53,8 @@ import org.antlr.v4.runtime.RuleContext; import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.*; +import org.opensearch.sql.ast.tree.Trendline; +import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParserBaseVisitor; @@ -63,7 +66,7 @@ public class AstExpressionBuilder extends OpenSearchPPLParserBaseVisitor FUNCTION_NAME_MAPPING = + private static final Map FUNCTION_NAME_MAPPING = new ImmutableMap.Builder() .put("isnull", IS_NULL.getName().getFunctionName()) .put("isnotnull", IS_NOT_NULL.getName().getFunctionName()) @@ -75,6 +78,28 @@ public UnresolvedExpression visitEvalClause(EvalClauseContext ctx) { return new Let((Field) visit(ctx.fieldExpression()), visit(ctx.expression())); } + /** Trendline clause. */ + @Override + public Trendline.TrendlineComputation visitTrendlineClause( + OpenSearchPPLParser.TrendlineClauseContext ctx) { + final int numberOfDataPoints = Integer.parseInt(ctx.numberOfDataPoints.getText()); + if (numberOfDataPoints < 1) { + throw new SyntaxCheckException( + "Number of trendline data-points must be greater than or equal to 1"); + } + + final Field dataField = (Field) this.visitFieldExpression(ctx.field); + final String alias = + ctx.alias != null + ? ctx.alias.getText() + : dataField.getChild().get(0).toString() + "_trendline"; + + final Trendline.TrendlineType computationType = + Trendline.TrendlineType.valueOf(ctx.trendlineType().getText().toUpperCase(Locale.ROOT)); + return new Trendline.TrendlineComputation( + numberOfDataPoints, dataField, alias, computationType); + } + /** Logical expression excluding boolean, comparison. */ @Override public UnresolvedExpression visitLogicalNot(LogicalNotContext ctx) { @@ -136,6 +161,8 @@ public UnresolvedExpression visitWcFieldExpression(WcFieldExpressionContext ctx) @Override public UnresolvedExpression visitSortField(SortFieldContext ctx) { + + // TODO #3180: Fix broken sort functionality return new Field( visit(ctx.sortFieldExpression().fieldExpression().qualifiedName()), ArgumentFactory.getArgumentList(ctx)); @@ -187,7 +214,7 @@ public UnresolvedExpression visitTakeAggFunctionCall( /** Eval function. */ @Override public UnresolvedExpression visitBooleanFunctionCall(BooleanFunctionCallContext ctx) { - final String functionName = ctx.conditionFunctionBase().getText(); + final String functionName = ctx.conditionFunctionName().getText().toLowerCase(); return buildFunction( FUNCTION_NAME_MAPPING.getOrDefault(functionName, functionName), ctx.functionArgs().functionArg()); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index d28e5d122b..96e21eafcd 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -9,6 +9,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.util.List; +import java.util.Locale; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; @@ -34,6 +35,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; +import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Project; @@ -42,6 +44,7 @@ import org.opensearch.sql.ast.tree.Rename; import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.planner.logical.LogicalAggregation; @@ -220,20 +223,59 @@ public String visitHead(Head node, String context) { return StringUtils.format("%s | head %d", child, size); } + @Override + public String visitTrendline(Trendline node, String context) { + String child = node.getChild().get(0).accept(this, context); + String computations = visitExpressionList(node.getComputations(), " "); + return StringUtils.format("%s | trendline %s", child, computations); + } + private String visitFieldList(List fieldList) { return fieldList.stream().map(this::visitExpression).collect(Collectors.joining(",")); } - private String visitExpressionList(List expressionList) { + private String visitExpressionList(List expressionList) { + return visitExpressionList(expressionList, ","); + } + + private String visitExpressionList( + List expressionList, String delimiter) { return expressionList.isEmpty() ? "" - : expressionList.stream().map(this::visitExpression).collect(Collectors.joining(",")); + : expressionList.stream().map(this::visitExpression).collect(Collectors.joining(delimiter)); } private String visitExpression(UnresolvedExpression expression) { return expressionAnalyzer.analyze(expression, null); } + @Override + public String visitFillNull(FillNull node, String context) { + String child = node.getChild().get(0).accept(this, context); + List fieldFills = node.getNullableFieldFills(); + final UnresolvedExpression firstReplacement = fieldFills.getFirst().getReplaceNullWithMe(); + if (fieldFills.stream().allMatch(n -> firstReplacement == n.getReplaceNullWithMe())) { + return StringUtils.format( + "%s | fillnull with %s in %s", + child, + firstReplacement, + node.getNullableFieldFills().stream() + .map(n -> visitExpression(n.getNullableFieldReference())) + .collect(Collectors.joining(", "))); + } else { + return StringUtils.format( + "%s | fillnull using %s", + child, + node.getNullableFieldFills().stream() + .map( + n -> + StringUtils.format( + "%s = %s", + visitExpression(n.getNullableFieldReference()), n.getReplaceNullWithMe())) + .collect(Collectors.joining(", "))); + } + } + private String groupBy(String groupBy) { return Strings.isNullOrEmpty(groupBy) ? "" : StringUtils.format("by %s", groupBy); } @@ -316,5 +358,14 @@ public String visitAlias(Alias node, String context) { String expr = node.getDelegated().accept(this, context); return StringUtils.format("%s", expr); } + + @Override + public String visitTrendlineComputation(Trendline.TrendlineComputation node, String context) { + final String dataField = node.getDataField().accept(this, context); + final String aliasClause = " as " + node.getAlias(); + final String computationType = node.getComputationType().name().toLowerCase(Locale.ROOT); + return StringUtils.format( + "%s(%d, %s)%s", computationType, node.getNumberOfDataPoints(), dataField, aliasClause); + } } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/PPLServiceTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/PPLServiceTest.java index 598f6691cb..34553823c4 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/PPLServiceTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/PPLServiceTest.java @@ -32,9 +32,9 @@ @RunWith(MockitoJUnitRunner.class) public class PPLServiceTest { - private static String QUERY = "/_plugins/_ppl"; + private static final String QUERY = "/_plugins/_ppl"; - private static String EXPLAIN = "/_plugins/_ppl/_explain"; + private static final String EXPLAIN = "/_plugins/_ppl/_explain"; private PPLService pplService; diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java index 943953d416..2645be3aca 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java @@ -18,7 +18,7 @@ public class PPLSyntaxParserTest { - @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Rule public final ExpectedException exceptionRule = ExpectedException.none(); @Test public void testSearchCommandShouldPass() { @@ -417,4 +417,16 @@ public void testCanParseTimestampdiffFunction() { new PPLSyntaxParser() .parse("SOURCE=test | eval k = TIMESTAMPDIFF(WEEK,'2003-01-02','2003-01-02')")); } + + @Test + public void testCanParseFillNullSameValue() { + assertNotNull(new PPLSyntaxParser().parse("SOURCE=test | fillnull with 0 in a")); + assertNotNull(new PPLSyntaxParser().parse("SOURCE=test | fillnull with 0 in a, b")); + } + + @Test + public void testCanParseFillNullVariousValues() { + assertNotNull(new PPLSyntaxParser().parse("SOURCE=test | fillnull using a = 0")); + assertNotNull(new PPLSyntaxParser().parse("SOURCE=test | fillnull using a = 0, b = 1")); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java index 29e6ff3298..f4e90395cb 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/domain/PPLQueryRequestTest.java @@ -15,7 +15,7 @@ public class PPLQueryRequestTest { - @Rule public ExpectedException exceptionRule = ExpectedException.none(); + @Rule public final ExpectedException exceptionRule = ExpectedException.none(); @Test public void getRequestShouldPass() { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index c9989a49c4..c6f4ed2044 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -7,12 +7,14 @@ import static java.util.Collections.emptyList; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; import static org.opensearch.sql.ast.dsl.AstDSL.agg; import static org.opensearch.sql.ast.dsl.AstDSL.aggregate; import static org.opensearch.sql.ast.dsl.AstDSL.alias; import static org.opensearch.sql.ast.dsl.AstDSL.argument; import static org.opensearch.sql.ast.dsl.AstDSL.booleanLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.compare; +import static org.opensearch.sql.ast.dsl.AstDSL.computation; import static org.opensearch.sql.ast.dsl.AstDSL.dedupe; import static org.opensearch.sql.ast.dsl.AstDSL.defaultDedupArgs; import static org.opensearch.sql.ast.dsl.AstDSL.defaultFieldsArgs; @@ -38,32 +40,39 @@ import static org.opensearch.sql.ast.dsl.AstDSL.span; import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral; import static org.opensearch.sql.ast.dsl.AstDSL.tableFunction; +import static org.opensearch.sql.ast.dsl.AstDSL.trendline; import static org.opensearch.sql.ast.dsl.AstDSL.unresolvedArg; +import static org.opensearch.sql.ast.tree.Trendline.TrendlineType.SMA; import static org.opensearch.sql.utils.SystemIndexUtils.DATASOURCES_TABLE_NAME; import static org.opensearch.sql.utils.SystemIndexUtils.mappingTable; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.util.Arrays; +import java.util.Optional; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; import org.opensearch.sql.ast.Node; import org.opensearch.sql.ast.expression.DataType; +import org.opensearch.sql.ast.expression.Field; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.ParseMethod; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.tree.AD; +import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.RareTopN.CommandType; +import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.ppl.antlr.PPLSyntaxParser; public class AstBuilderTest { @Rule public ExpectedException exceptionRule = ExpectedException.none(); - private PPLSyntaxParser parser = new PPLSyntaxParser(); + private final PPLSyntaxParser parser = new PPLSyntaxParser(); @Test public void testSearchCommand() { @@ -319,11 +328,26 @@ public void testStatsCommandWithSpan() { exprList(alias("f1", field("f1")), alias("f2", field("f2"))), alias("span(timestamp,1h)", span(field("timestamp"), intLiteral(1), SpanUnit.H)), defaultStatsArgs())); - } - @Test(expected = org.opensearch.sql.common.antlr.SyntaxCheckException.class) - public void throwExceptionIfSpanInGroupByList() { - plan("source=t | stats avg(price) by f1, f2, span(timestamp, 1h)"); + assertEqual( + "source=t | stats avg(price) by b, span(timestamp, 1h)", + agg( + relation("t"), + exprList(alias("avg(price)", aggregate("avg", field("price")))), + emptyList(), + exprList(alias("b", field("b"))), + alias("span(timestamp,1h)", span(field("timestamp"), intLiteral(1), SpanUnit.H)), + defaultStatsArgs())); + + assertEqual( + "source=t | stats avg(price) by f1, f2, span(timestamp, 1h)", + agg( + relation("t"), + exprList(alias("avg(price)", aggregate("avg", field("price")))), + emptyList(), + exprList(alias("f1", field("f1")), alias("f2", field("f2"))), + alias("span(timestamp,1h)", span(field("timestamp"), intLiteral(1), SpanUnit.H)), + defaultStatsArgs())); } @Test(expected = org.opensearch.sql.common.antlr.SyntaxCheckException.class) @@ -645,6 +669,103 @@ public void testMLCommand() { .build())); } + @Test + public void testFillNullCommandSameValue() { + assertEqual( + "source=t | fillnull with 0 in a, b, c", + new FillNull( + relation("t"), + FillNull.ContainNullableFieldFill.ofSameValue( + intLiteral(0), + ImmutableList.builder() + .add(field("a")) + .add(field("b")) + .add(field("c")) + .build()))); + } + + @Test + public void testFillNullCommandVariousValues() { + assertEqual( + "source=t | fillnull using a = 1, b = 2, c = 3", + new FillNull( + relation("t"), + FillNull.ContainNullableFieldFill.ofVariousValue( + ImmutableList.builder() + .add(new FillNull.NullableFieldFill(field("a"), intLiteral(1))) + .add(new FillNull.NullableFieldFill(field("b"), intLiteral(2))) + .add(new FillNull.NullableFieldFill(field("c"), intLiteral(3))) + .build()))); + } + + public void testTrendline() { + assertEqual( + "source=t | trendline sma(5, test_field) as test_field_alias sma(1, test_field_2) as" + + " test_field_alias_2", + trendline( + relation("t"), + Optional.empty(), + computation(5, field("test_field"), "test_field_alias", SMA), + computation(1, field("test_field_2"), "test_field_alias_2", SMA))); + } + + @Test + public void testTrendlineSort() { + assertEqual( + "source=t | trendline sort test_field sma(5, test_field)", + trendline( + relation("t"), + Optional.of( + field( + "test_field", + argument("asc", booleanLiteral(true)), + argument("type", nullLiteral()))), + computation(5, field("test_field"), "test_field_trendline", SMA))); + } + + @Test + public void testTrendlineSortDesc() { + assertEqual( + "source=t | trendline sort - test_field sma(5, test_field)", + trendline( + relation("t"), + Optional.of( + field( + "test_field", + argument("asc", booleanLiteral(false)), + argument("type", nullLiteral()))), + computation(5, field("test_field"), "test_field_trendline", SMA))); + } + + @Test + public void testTrendlineSortAsc() { + assertEqual( + "source=t | trendline sort + test_field sma(5, test_field)", + trendline( + relation("t"), + Optional.of( + field( + "test_field", + argument("asc", booleanLiteral(true)), + argument("type", nullLiteral()))), + computation(5, field("test_field"), "test_field_trendline", SMA))); + } + + @Test + public void testTrendlineNoAlias() { + assertEqual( + "source=t | trendline sma(5, test_field)", + trendline( + relation("t"), + Optional.empty(), + computation(5, field("test_field"), "test_field_trendline", SMA))); + } + + @Test + public void testTrendlineTooFewSamples() { + assertThrows(SyntaxCheckException.class, () -> plan("source=t | trendline sma(0, test_field)")); + } + @Test public void testDescribeCommand() { assertEqual("describe t", relation(mappingTable("t"))); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java index 7bcb87d193..fbb25549ab 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstExpressionBuilderTest.java @@ -107,12 +107,15 @@ public void testLogicalLikeExpr() { @Test public void testBooleanIsNullFunction() { assertEqual("source=t isnull(a)", filter(relation("t"), function("is null", field("a")))); + assertEqual("source=t ISNULL(a)", filter(relation("t"), function("is null", field("a")))); } @Test public void testBooleanIsNotNullFunction() { assertEqual( "source=t isnotnull(a)", filter(relation("t"), function("is not null", field("a")))); + assertEqual( + "source=t ISNOTNULL(a)", filter(relation("t"), function("is not null", field("a")))); } /** Todo. search operator should not include functionCall, need to change antlr. */ @@ -135,6 +138,120 @@ public void testEvalFunctionExprNoArgs() { assertEqual("source=t | eval f=PI()", eval(relation("t"), let(field("f"), function("PI")))); } + @Test + public void testEvalIfFunctionExpr() { + assertEqual( + "source=t | eval f=if(true, 1, 0)", + eval( + relation("t"), + let(field("f"), function("if", booleanLiteral(true), intLiteral(1), intLiteral(0))))); + assertEqual( + "source=t | eval f=if(1>2, 1, 0)", + eval( + relation("t"), + let( + field("f"), + function( + "if", + compare(">", intLiteral(1), intLiteral(2)), + intLiteral(1), + intLiteral(0))))); + assertEqual( + "source=t | eval f=if(1<=2, 1, 0)", + eval( + relation("t"), + let( + field("f"), + function( + "if", + compare("<=", intLiteral(1), intLiteral(2)), + intLiteral(1), + intLiteral(0))))); + assertEqual( + "source=t | eval f=if(1=2, 1, 0)", + eval( + relation("t"), + let( + field("f"), + function( + "if", + compare("=", intLiteral(1), intLiteral(2)), + intLiteral(1), + intLiteral(0))))); + assertEqual( + "source=t | eval f=if(1!=2, 1, 0)", + eval( + relation("t"), + let( + field("f"), + function( + "if", + compare("!=", intLiteral(1), intLiteral(2)), + intLiteral(1), + intLiteral(0))))); + assertEqual( + "source=t | eval f=if(isnull(a), 1, 0)", + eval( + relation("t"), + let( + field("f"), + function("if", function("is null", field("a")), intLiteral(1), intLiteral(0))))); + assertEqual( + "source=t | eval f=if(isnotnull(a), 1, 0)", + eval( + relation("t"), + let( + field("f"), + function( + "if", function("is not null", field("a")), intLiteral(1), intLiteral(0))))); + assertEqual( + "source=t | eval f=if(not 1>2, 1, 0)", + eval( + relation("t"), + let( + field("f"), + function( + "if", + not(compare(">", intLiteral(1), intLiteral(2))), + intLiteral(1), + intLiteral(0))))); + assertEqual( + "source=t | eval f=if(not a in (0, 1), 1, 0)", + eval( + relation("t"), + let( + field("f"), + function( + "if", + not(in(field("a"), intLiteral(0), intLiteral(1))), + intLiteral(1), + intLiteral(0))))); + assertEqual( + "source=t | eval f=if(not a in (0, 1) OR isnull(a), 1, 0)", + eval( + relation("t"), + let( + field("f"), + function( + "if", + or( + not(in(field("a"), intLiteral(0), intLiteral(1))), + function("is null", field("a"))), + intLiteral(1), + intLiteral(0))))); + assertEqual( + "source=t | eval f=if(like(a, '_a%b%c_d_'), 1, 0)", + eval( + relation("t"), + let( + field("f"), + function( + "if", + function("like", field("a"), stringLiteral("_a%b%c_d_")), + intLiteral(1), + intLiteral(0))))); + } + @Test public void testPositionFunctionExpr() { assertEqual( diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java index 7d7b31e822..0b98ee6179 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstStatementBuilderTest.java @@ -30,7 +30,7 @@ public class AstStatementBuilderTest { @Rule public ExpectedException exceptionRule = ExpectedException.none(); - private PPLSyntaxParser parser = new PPLSyntaxParser(); + private final PPLSyntaxParser parser = new PPLSyntaxParser(); @Test public void buildQueryStatement() { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index cd51ea07df..06f8fbb061 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -89,6 +89,13 @@ public void testDedupCommand() { anonymize("source=t | dedup f1, f2")); } + @Test + public void testTrendlineCommand() { + assertEquals( + "source=t | trendline sma(2, date) as date_alias sma(3, time) as time_alias", + anonymize("source=t | trendline sma(2, date) as date_alias sma(3, time) as time_alias")); + } + @Test public void testHeadCommandWithNumber() { assertEquals("source=t | head 3", anonymize("source=t | head 3")); @@ -105,6 +112,19 @@ public void testEvalCommand() { assertEquals("source=t | eval r=abs(f)", anonymize("source=t | eval r=abs(f)")); } + @Test + public void testFillNullSameValue() { + assertEquals( + "source=t | fillnull with 0 in f1, f2", anonymize("source=t | fillnull with 0 in f1, f2")); + } + + @Test + public void testFillNullVariousValues() { + assertEquals( + "source=t | fillnull using f1 = 0, f2 = -1", + anonymize("source=t | fillnull using f1 = 0, f2 = -1")); + } + @Test public void testRareCommandWithGroupBy() { assertEquals("source=t | rare 10 a by b", anonymize("source=t | rare a by b")); diff --git a/protocol/build.gradle b/protocol/build.gradle index 5bbff68e51..b5d7929041 100644 --- a/protocol/build.gradle +++ b/protocol/build.gradle @@ -30,7 +30,7 @@ plugins { } dependencies { - implementation group: 'com.google.guava', name: 'guava', version: '32.0.1-jre' + implementation group: 'com.google.guava', name: 'guava', version: "${guava_version}" implementation group: 'com.fasterxml.jackson.core', name: 'jackson-core', version: "${versions.jackson}" implementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: "${versions.jackson_databind}" implementation group: 'com.fasterxml.jackson.dataformat', name: 'jackson-dataformat-cbor', version: "${versions.jackson}" diff --git a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/CsvResponseFormatter.java b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/CsvResponseFormatter.java index a61b54b258..f5899e88e0 100644 --- a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/CsvResponseFormatter.java +++ b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/CsvResponseFormatter.java @@ -5,12 +5,45 @@ package org.opensearch.sql.protocol.response.format; -public class CsvResponseFormatter extends FlatResponseFormatter { +import org.opensearch.sql.protocol.response.QueryResult; + +/** Response formatter to format response to csv format. */ +public class CsvResponseFormatter implements ResponseFormatter { + public static final String CONTENT_TYPE = "plain/text; charset=UTF-8"; + private final String separator; + private final boolean sanitize; + public CsvResponseFormatter() { - super(",", true); + this(",", true); } public CsvResponseFormatter(boolean sanitize) { - super(",", sanitize); + this(",", sanitize); + } + + public CsvResponseFormatter(String separator, boolean sanitize) { + this.separator = separator; + this.sanitize = sanitize; + } + + @Override + public String format(QueryResult response) { + FlatResponseBase flatResponse; + if (sanitize) { + flatResponse = new FlatResponseWithSanitizer(response, separator); + } else { + flatResponse = new FlatResponseBase(response, separator); + } + return flatResponse.format(); + } + + @Override + public String format(Throwable t) { + return ErrorFormatter.prettyFormat(t); + } + + @Override + public String contentType() { + return CONTENT_TYPE; } } diff --git a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/FlatResponseBase.java b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/FlatResponseBase.java new file mode 100644 index 0000000000..ee6e2051d2 --- /dev/null +++ b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/FlatResponseBase.java @@ -0,0 +1,93 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.protocol.response.format; + +import com.google.common.collect.ImmutableList; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; +import lombok.Getter; +import org.opensearch.sql.protocol.response.QueryResult; + +@Getter +public class FlatResponseBase { + protected static final String INTERLINE_SEPARATOR = System.lineSeparator(); + + private final QueryResult response; + protected final String separator; + + private final List headers; + private final List> data; + + FlatResponseBase(QueryResult response, String separator) { + this.response = response; + this.separator = separator; + this.headers = getOriginalHeaders(response); + this.data = getOriginalData(response); + } + + public String format() { + List headersAndData = new ArrayList<>(); + headersAndData.add(getHeaderLine()); + headersAndData.addAll(getDataLines()); + return String.join(INTERLINE_SEPARATOR, headersAndData); + } + + protected String getHeaderLine() { + return String.join(separator, headers); + } + + private List getOriginalHeaders(QueryResult response) { + ImmutableList.Builder headers = ImmutableList.builder(); + response.columnNameTypes().forEach((column, type) -> headers.add(column)); + List result = headers.build(); + return formatHeaders(result); + } + + protected List getDataLines() { + return data.stream().map(v -> String.join(separator, v)).collect(Collectors.toList()); + } + + private List> getOriginalData(QueryResult response) { + ImmutableList.Builder> dataLines = new ImmutableList.Builder<>(); + response + .iterator() + .forEachRemaining( + row -> { + ImmutableList.Builder line = new ImmutableList.Builder<>(); + // replace null values with empty string + Arrays.asList(row).forEach(val -> line.add(val == null ? "" : val.toString())); + dataLines.add(line.build()); + }); + List> result = dataLines.build(); + return formatData(result); + } + + protected List formatHeaders(List headers) { + return headers.stream() + .map(cell -> quoteIfRequired(separator, cell)) + .collect(Collectors.toList()); + } + + protected List> formatData(List> lines) { + List> result = new ArrayList<>(); + for (List line : lines) { + result.add( + line.stream().map(cell -> quoteIfRequired(separator, cell)).collect(Collectors.toList())); + } + return result; + } + + protected String quoteIfRequired(String separator, String cell) { + final String quote = "\""; + if (cell.contains(separator) || cell.contains(quote)) { + return quote + cell.replaceAll(quote, quote + quote) + quote; + } else { + return cell; + } + } +} diff --git a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/FlatResponseFormatter.java b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/FlatResponseFormatter.java deleted file mode 100644 index 8c67d524b8..0000000000 --- a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/FlatResponseFormatter.java +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.protocol.response.format; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Set; -import java.util.stream.Collectors; -import lombok.Getter; -import lombok.RequiredArgsConstructor; -import org.opensearch.sql.protocol.response.QueryResult; - -@RequiredArgsConstructor -public abstract class FlatResponseFormatter implements ResponseFormatter { - private static String INLINE_SEPARATOR = ","; - private static final String INTERLINE_SEPARATOR = System.lineSeparator(); - private static final Set SENSITIVE_CHAR = ImmutableSet.of("=", "+", "-", "@"); - - public static final String CONTENT_TYPE = "plain/text; charset=UTF-8"; - - private boolean sanitize = false; - - public FlatResponseFormatter(String seperator, boolean sanitize) { - this.INLINE_SEPARATOR = seperator; - this.sanitize = sanitize; - } - - public String contentType() { - return CONTENT_TYPE; - } - - @Override - public String format(QueryResult response) { - FlatResult result = new FlatResult(response, sanitize); - return result.getFlat(); - } - - @Override - public String format(Throwable t) { - return ErrorFormatter.prettyFormat(t); - } - - /** - * Sanitize methods are migrated from legacy CSV result. Sanitize both headers and data lines by: - * 1) Second double quote entire cell if any comma is found. - */ - @Getter - @RequiredArgsConstructor - static class FlatResult { - private final QueryResult response; - private final boolean sanitize; - - public String getFlat() { - List headersAndData = new ArrayList<>(); - headersAndData.add(getHeaderLine(response, sanitize)); - headersAndData.addAll(getDataLines(response, sanitize)); - return String.join(INTERLINE_SEPARATOR, headersAndData); - } - - private String getHeaderLine(QueryResult response, boolean sanitize) { - List headers = getHeaders(response, sanitize); - return String.join(INLINE_SEPARATOR, headers); - } - - private List getDataLines(QueryResult response, boolean sanitize) { - List> data = getData(response, sanitize); - return data.stream().map(v -> String.join(INLINE_SEPARATOR, v)).collect(Collectors.toList()); - } - - private List getHeaders(QueryResult response, boolean sanitize) { - ImmutableList.Builder headers = ImmutableList.builder(); - response.columnNameTypes().forEach((column, type) -> headers.add(column)); - List result = headers.build(); - return sanitizeHeaders(result); - } - - private List> getData(QueryResult response, boolean sanitize) { - ImmutableList.Builder> dataLines = new ImmutableList.Builder<>(); - response - .iterator() - .forEachRemaining( - row -> { - ImmutableList.Builder line = new ImmutableList.Builder<>(); - // replace null values with empty string - Arrays.asList(row).forEach(val -> line.add(val == null ? "" : val.toString())); - dataLines.add(line.build()); - }); - List> result = dataLines.build(); - return sanitizeData(result); - } - - /** Sanitize headers because OpenSearch allows special character present in field names. */ - private List sanitizeHeaders(List headers) { - if (sanitize) { - return headers.stream() - .map(this::sanitizeCell) - .map(cell -> quoteIfRequired(INLINE_SEPARATOR, cell)) - .collect(Collectors.toList()); - } else { - return headers.stream() - .map(cell -> quoteIfRequired(INLINE_SEPARATOR, cell)) - .collect(Collectors.toList()); - } - } - - private List> sanitizeData(List> lines) { - List> result = new ArrayList<>(); - if (sanitize) { - for (List line : lines) { - result.add( - line.stream() - .map(this::sanitizeCell) - .map(cell -> quoteIfRequired(INLINE_SEPARATOR, cell)) - .collect(Collectors.toList())); - } - } else { - for (List line : lines) { - result.add( - line.stream() - .map(cell -> quoteIfRequired(INLINE_SEPARATOR, cell)) - .collect(Collectors.toList())); - } - } - return result; - } - - private String sanitizeCell(String cell) { - if (isStartWithSensitiveChar(cell)) { - return "'" + cell; - } - return cell; - } - - private String quoteIfRequired(String separator, String cell) { - final String quote = "\""; - return cell.contains(separator) ? quote + cell.replaceAll("\"", "\"\"") + quote : cell; - } - - private boolean isStartWithSensitiveChar(String cell) { - return SENSITIVE_CHAR.stream().anyMatch(cell::startsWith); - } - } -} diff --git a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/FlatResponseWithPrettifier.java b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/FlatResponseWithPrettifier.java new file mode 100644 index 0000000000..0e2527fbfe --- /dev/null +++ b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/FlatResponseWithPrettifier.java @@ -0,0 +1,53 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.protocol.response.format; + +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.opensearch.sql.protocol.response.QueryResult; + +public class FlatResponseWithPrettifier extends FlatResponseBase { + private int[] maxWidths; + + FlatResponseWithPrettifier(QueryResult response, String inlineSeparator) { + super(response, inlineSeparator); + calculateMaxWidths(); + } + + private void calculateMaxWidths() { + int columns = getHeaders().size(); + maxWidths = new int[columns]; + + for (int i = 0; i < columns; i++) { + int maxWidth = getHeaders().get(i).length(); + for (List row : getData()) { + maxWidth = Math.max(maxWidth, row.get(i).length()); + } + maxWidths[i] = maxWidth; + } + } + + @Override + protected List getDataLines() { + return getData().stream().map(this::prettyFormatLine).collect(Collectors.toList()); + } + + @Override + protected String getHeaderLine() { + return prettyFormatLine(getHeaders()); + } + + private String prettyFormatLine(List line) { + return IntStream.range(0, line.size()) + .mapToObj(i -> padRight(line.get(i), maxWidths[i])) + .collect(Collectors.joining(separator)); + } + + private String padRight(String s, int n) { + return String.format("%-" + n + "s", s); + } +} diff --git a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/FlatResponseWithSanitizer.java b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/FlatResponseWithSanitizer.java new file mode 100644 index 0000000000..69a29c5cce --- /dev/null +++ b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/FlatResponseWithSanitizer.java @@ -0,0 +1,53 @@ +package org.opensearch.sql.protocol.response.format; + +import com.google.common.collect.ImmutableSet; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.opensearch.sql.protocol.response.QueryResult; + +/** + * Sanitize methods are migrated from legacy CSV result. Sanitize both headers and data lines by: 1) + * Second double quote entire cell if any comma is found. + */ +public class FlatResponseWithSanitizer extends FlatResponseBase { + private static final Set SENSITIVE_CHAR = ImmutableSet.of("=", "+", "-", "@"); + + FlatResponseWithSanitizer(QueryResult response, String inlineSeparator) { + super(response, inlineSeparator); + } + + /** Sanitize headers because OpenSearch allows special character present in field names. */ + @Override + protected List formatHeaders(List headers) { + return headers.stream() + .map(this::sanitizeCell) + .map(cell -> quoteIfRequired(separator, cell)) + .collect(Collectors.toList()); + } + + @Override + protected List> formatData(List> lines) { + List> result = new ArrayList<>(); + for (List line : lines) { + result.add( + line.stream() + .map(this::sanitizeCell) + .map(cell -> quoteIfRequired(separator, cell)) + .collect(Collectors.toList())); + } + return result; + } + + private String sanitizeCell(String cell) { + if (isStartWithSensitiveChar(cell)) { + return "'" + cell; + } + return cell; + } + + private boolean isStartWithSensitiveChar(String cell) { + return SENSITIVE_CHAR.stream().anyMatch(cell::startsWith); + } +} diff --git a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/RawResponseFormatter.java b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/RawResponseFormatter.java index 3b64be7062..061c2fcb8f 100644 --- a/protocol/src/main/java/org/opensearch/sql/protocol/response/format/RawResponseFormatter.java +++ b/protocol/src/main/java/org/opensearch/sql/protocol/response/format/RawResponseFormatter.java @@ -5,9 +5,51 @@ package org.opensearch.sql.protocol.response.format; -/** Response formatter to format response to csv or raw format. */ -public class RawResponseFormatter extends FlatResponseFormatter { +import org.opensearch.sql.protocol.response.QueryResult; + +/** Response formatter to format response to raw format. */ +public class RawResponseFormatter implements ResponseFormatter { + public static final String CONTENT_TYPE = "plain/text; charset=UTF-8"; + private final String separator; + private final boolean pretty; + public RawResponseFormatter() { - super("|", false); + this("|", false); + } + + public RawResponseFormatter(boolean pretty) { + this("|", pretty); + } + + /** + * Create a raw response formatter with separator and pretty parameter. + * + * @param pretty if true, display the columns with proper padding. Tracks the maximum width for + * each column to ensure proper formatting. + */ + public RawResponseFormatter(String separator, boolean pretty) { + this.separator = separator; + this.pretty = pretty; + } + + @Override + public String format(QueryResult response) { + FlatResponseBase flatResponse; + if (pretty) { + flatResponse = new FlatResponseWithPrettifier(response, separator); + } else { + flatResponse = new FlatResponseBase(response, separator); + } + return flatResponse.format(); + } + + @Override + public String format(Throwable t) { + return ErrorFormatter.prettyFormat(t); + } + + @Override + public String contentType() { + return CONTENT_TYPE; } } diff --git a/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java b/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java index e03169e9f8..fc3402e20a 100644 --- a/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java +++ b/protocol/src/test/java/org/opensearch/sql/protocol/response/QueryResultTest.java @@ -22,7 +22,7 @@ class QueryResultTest { - private ExecutionEngine.Schema schema = + private final ExecutionEngine.Schema schema = new ExecutionEngine.Schema( ImmutableList.of( new ExecutionEngine.Schema.Column("name", null, STRING), diff --git a/protocol/src/test/java/org/opensearch/sql/protocol/response/format/CsvResponseFormatterTest.java b/protocol/src/test/java/org/opensearch/sql/protocol/response/format/CsvResponseFormatterTest.java index d27ac72373..ef2f2e8da8 100644 --- a/protocol/src/test/java/org/opensearch/sql/protocol/response/format/CsvResponseFormatterTest.java +++ b/protocol/src/test/java/org/opensearch/sql/protocol/response/format/CsvResponseFormatterTest.java @@ -13,7 +13,7 @@ import static org.opensearch.sql.data.model.ExprValueUtils.tupleValue; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.STRING; -import static org.opensearch.sql.protocol.response.format.FlatResponseFormatter.CONTENT_TYPE; +import static org.opensearch.sql.protocol.response.format.CsvResponseFormatter.CONTENT_TYPE; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -108,8 +108,11 @@ void quoteIfRequired() { QueryResult response = new QueryResult( schema, - Arrays.asList(tupleValue(ImmutableMap.of("na,me", "John,Smith", ",,age", "30,,,")))); - String expected = "\"na,me\",\",,age\"%n\"John,Smith\",\"30,,,\""; + Arrays.asList( + tupleValue(ImmutableMap.of("na,me", "John,Smith", ",,age", "30,,,")), + tupleValue(ImmutableMap.of("na,me", "\"Janice Jones", ",,age", "26\"")))); + String expected = + "\"na,me\",\",,age\"%n\"John,Smith\",\"30,,,\"%n\"\"\"Janice Jones\",\"26\"\"\""; assertEquals(format(expected), formatter.format(response)); } diff --git a/protocol/src/test/java/org/opensearch/sql/protocol/response/format/RawResponseFormatterTest.java b/protocol/src/test/java/org/opensearch/sql/protocol/response/format/RawResponseFormatterTest.java index 65111bd3b9..ebdadcd50b 100644 --- a/protocol/src/test/java/org/opensearch/sql/protocol/response/format/RawResponseFormatterTest.java +++ b/protocol/src/test/java/org/opensearch/sql/protocol/response/format/RawResponseFormatterTest.java @@ -13,7 +13,7 @@ import static org.opensearch.sql.data.model.ExprValueUtils.tupleValue; import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; import static org.opensearch.sql.data.type.ExprCoreType.STRING; -import static org.opensearch.sql.protocol.response.format.FlatResponseFormatter.CONTENT_TYPE; +import static org.opensearch.sql.protocol.response.format.RawResponseFormatter.CONTENT_TYPE; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -23,9 +23,8 @@ import org.opensearch.sql.executor.ExecutionEngine; import org.opensearch.sql.protocol.response.QueryResult; -/** Unit test for {@link FlatResponseFormatter}. */ +/** Unit test for {@link RawResponseFormatter}. */ public class RawResponseFormatterTest { - private FlatResponseFormatter rawFormatter = new RawResponseFormatter(); @Test void formatResponse() { @@ -40,8 +39,10 @@ void formatResponse() { Arrays.asList( tupleValue(ImmutableMap.of("name", "John", "age", 20)), tupleValue(ImmutableMap.of("name", "Smith", "age", 30)))); - String expected = "name|age%nJohn|20%nSmith|30"; - assertEquals(format(expected), rawFormatter.format(response)); + String expected = "name|age%n" + "John|20%n" + "Smith|30"; + assertEquals(format(expected), getRawFormatter().format(response)); + String expectedPretty = "name |age%n" + "John |20 %n" + "Smith|30 "; + assertEquals(format(expectedPretty), getRawFormatterPretty().format(response)); } @Test @@ -67,8 +68,11 @@ void sanitizeHeaders() { "Seattle", "@age", 20)))); - String expected = "=firstname|+lastname|-city|@age%nJohn|Smith|Seattle|20"; - assertEquals(format(expected), rawFormatter.format(response)); + String expected = "=firstname|+lastname|-city|@age%n" + "John|Smith|Seattle|20"; + assertEquals(format(expected), getRawFormatter().format(response)); + String expectedPretty = + "=firstname|+lastname|-city |@age%n" + "John |Smith |Seattle|20 "; + assertEquals(format(expectedPretty), getRawFormatterPretty().format(response)); } @Test @@ -94,7 +98,16 @@ void sanitizeData() { + "-Seattle%n" + "@Seattle%n" + "Seattle="; - assertEquals(format(expected), rawFormatter.format(response)); + assertEquals(format(expected), getRawFormatter().format(response)); + String expectedPretty = + "city %n" + + "Seattle %n" + + "=Seattle%n" + + "+Seattle%n" + + "-Seattle%n" + + "@Seattle%n" + + "Seattle="; + assertEquals(format(expectedPretty), getRawFormatterPretty().format(response)); } @Test @@ -107,9 +120,19 @@ void quoteIfRequired() { QueryResult response = new QueryResult( schema, - Arrays.asList(tupleValue(ImmutableMap.of("na|me", "John|Smith", "||age", "30|||")))); - String expected = "\"na|me\"|\"||age\"%n\"John|Smith\"|\"30|||\""; - assertEquals(format(expected), rawFormatter.format(response)); + Arrays.asList( + tupleValue(ImmutableMap.of("na|me", "John|Smith", "||age", "30|||")), + tupleValue(ImmutableMap.of("na|me", "Ja\"ne J\"ones", "||age", "\"40\"")))); + String expected = + "\"na|me\"|\"||age\"%n" + + "\"John|Smith\"|\"30|||\"%n" + + "\"Ja\"\"ne J\"\"ones\"|\"\"\"40\"\"\""; + assertEquals(format(expected), getRawFormatter().format(response)); + String expectedPretty = + "\"na|me\" |\"||age\" %n" + + "\"John|Smith\" |\"30|||\" %n" + + "\"Ja\"\"ne J\"\"ones\"|\"\"\"40\"\"\""; + assertEquals(format(expectedPretty), getRawFormatterPretty().format(response)); } @Test @@ -117,12 +140,12 @@ void formatError() { Throwable t = new RuntimeException("This is an exception"); String expected = "{\n \"type\": \"RuntimeException\",\n \"reason\": \"This is an exception\"\n}"; - assertEquals(expected, rawFormatter.format(t)); + assertEquals(expected, getRawFormatter().format(t)); + assertEquals(expected, getRawFormatterPretty().format(t)); } @Test void escapeSanitize() { - FlatResponseFormatter escapeFormatter = new RawResponseFormatter(); ExecutionEngine.Schema schema = new ExecutionEngine.Schema( ImmutableList.of(new ExecutionEngine.Schema.Column("city", "city", STRING))); @@ -132,8 +155,10 @@ void escapeSanitize() { Arrays.asList( tupleValue(ImmutableMap.of("city", "=Seattle")), tupleValue(ImmutableMap.of("city", "||Seattle")))); - String expected = "city%n=Seattle%n\"||Seattle\""; - assertEquals(format(expected), escapeFormatter.format(response)); + String expected = "city%n" + "=Seattle%n" + "\"||Seattle\""; + assertEquals(format(expected), getRawFormatter().format(response)); + String expectedPretty = "city %n" + "=Seattle %n" + "\"||Seattle\""; + assertEquals(format(expectedPretty), getRawFormatterPretty().format(response)); } @Test @@ -147,13 +172,14 @@ void senstiveCharater() { Arrays.asList( tupleValue(ImmutableMap.of("city", "@Seattle")), tupleValue(ImmutableMap.of("city", "++Seattle")))); - String expected = "city%n@Seattle%n++Seattle"; - assertEquals(format(expected), rawFormatter.format(response)); + String expected = "city%n" + "@Seattle%n" + "++Seattle"; + assertEquals(format(expected), getRawFormatter().format(response)); + String expectedPretty = "city %n" + "@Seattle %n" + "++Seattle"; + assertEquals(format(expectedPretty), getRawFormatterPretty().format(response)); } @Test void senstiveCharaterWithSanitize() { - FlatResponseFormatter testFormater = new RawResponseFormatter(); ExecutionEngine.Schema schema = new ExecutionEngine.Schema( ImmutableList.of(new ExecutionEngine.Schema.Column("city", "city", STRING))); @@ -163,8 +189,10 @@ void senstiveCharaterWithSanitize() { Arrays.asList( tupleValue(ImmutableMap.of("city", "@Seattle")), tupleValue(ImmutableMap.of("city", "++Seattle|||")))); - String expected = "city%n@Seattle%n\"++Seattle|||\""; - assertEquals(format(expected), testFormater.format(response)); + String expected = "city%n" + "@Seattle%n" + "\"++Seattle|||\""; + assertEquals(format(expected), getRawFormatter().format(response)); + String expectedPretty = "city %n" + "@Seattle %n" + "\"++Seattle|||\""; + assertEquals(format(expectedPretty), getRawFormatterPretty().format(response)); } @Test @@ -183,12 +211,23 @@ void replaceNullValues() { ImmutableMap.of("firstname", LITERAL_NULL, "city", stringValue("Seattle"))), ExprTupleValue.fromExprValueMap( ImmutableMap.of("firstname", stringValue("John"), "city", LITERAL_MISSING)))); - String expected = "name|city%nJohn|Seattle%n|Seattle%nJohn|"; - assertEquals(format(expected), rawFormatter.format(response)); + String expected = "name|city%n" + "John|Seattle%n" + "|Seattle%n" + "John|"; + assertEquals(format(expected), getRawFormatter().format(response)); + String expectedPretty = "name|city %n" + "John|Seattle%n" + " |Seattle%n" + "John| "; + assertEquals(format(expectedPretty), getRawFormatterPretty().format(response)); } @Test void testContentType() { - assertEquals(rawFormatter.contentType(), CONTENT_TYPE); + assertEquals(getRawFormatter().contentType(), CONTENT_TYPE); + assertEquals(getRawFormatterPretty().contentType(), CONTENT_TYPE); + } + + private RawResponseFormatter getRawFormatter() { + return new RawResponseFormatter(); + } + + private RawResponseFormatter getRawFormatterPretty() { + return new RawResponseFormatter(true); } } diff --git a/release-notes/opensearch-sql.release-notes-2.16.0.0.md b/release-notes/opensearch-sql.release-notes-2.16.0.0.md new file mode 100644 index 0000000000..607d6e14dd --- /dev/null +++ b/release-notes/opensearch-sql.release-notes-2.16.0.0.md @@ -0,0 +1,39 @@ +Compatible with OpenSearch and OpenSearch Dashboards Version 2.16.0 + +### Enhancements +* Added Setting to Toggle Data Source Management Code Paths ([#2811](https://github.com/opensearch-project/sql/pull/2811)) +* Span in PPL statsByClause could be specified after fields ([#2810](https://github.com/opensearch-project/sql/pull/2810)) +* Updating Grammer changes same as main branch ([#2850](https://github.com/opensearch-project/sql/pull/2850)) + +### Bug Fixes +* Temp use of older nodejs version before moving to Almalinux8 ([#2816](https://github.com/opensearch-project/sql/pull/2816)) +* Fix yaml errors causing checks not to be run ([#2823](https://github.com/opensearch-project/sql/pull/2823)) +* Well format the raw response when query parameter "pretty" enabled ([#2829](https://github.com/opensearch-project/sql/pull/2829)) +* Add support for custom date format and openSearch date format for date fields as part of Lucene query ([#2762](https://github.com/opensearch-project/sql/pull/2762)) +* Fix SparkExecutionEngineConfigClusterSetting deserialize issue ([#2838](https://github.com/opensearch-project/sql/pull/2838)) +* Fix SparkSubmitParameterModifier issue ([#2837](https://github.com/opensearch-project/sql/pull/2837)) + +### Infrastructure +* Increment version to 2.16.0-SNAPSHOT ([#2743](https://github.com/opensearch-project/sql/pull/2743)) +* Fix checkout action failure ([#2819](https://github.com/opensearch-project/sql/pull/2819)) +* Fix MacOS workflow failure ([#2831](https://github.com/opensearch-project/sql/pull/2831)) + +### Refactoring +* Change DataSourceType from enum to class ([#2746](https://github.com/opensearch-project/sql/pull/2746)) +* Fix code style issue ([#2745](https://github.com/opensearch-project/sql/pull/2745)) +* Scaffold async-query-core and async-query module ([#2751](https://github.com/opensearch-project/sql/pull/2751)) +* Move classes from spark to async-query-core and async-query ([#2750](https://github.com/opensearch-project/sql/pull/2750)) +* Exclude integ-test, doctest and download task when built offline ([#2763](https://github.com/opensearch-project/sql/pull/2763)) +* Abstract metrics to reduce dependency to legacy ([#2768](https://github.com/opensearch-project/sql/pull/2768)) +* Remove AsyncQueryId ([#2769](https://github.com/opensearch-project/sql/pull/2769)) +* Add README to async-query-core ([#2770](https://github.com/opensearch-project/sql/pull/2770)) +* Separate build and validateAndBuild method in DataSourceMetadata ([#2752](https://github.com/opensearch-project/sql/pull/2752)) +* Abstract FlintIndex client ([#2771](https://github.com/opensearch-project/sql/pull/2771)) +* Fix statement to store requested langType ([#2779](https://github.com/opensearch-project/sql/pull/2779)) +* Push down OpenSearch specific exception handling ([#2782](https://github.com/opensearch-project/sql/pull/2782)) +* Implement integration test for async-query-core ([#2785](https://github.com/opensearch-project/sql/pull/2785)) +* Fix SQLQueryUtils to extract multiple tables ([#2791](https://github.com/opensearch-project/sql/pull/2791)) +* Eliminate dependency from async-query-core to legacy ([#2792](https://github.com/opensearch-project/sql/pull/2792)) +* Pass accountId to EMRServerlessClientFactory.getClient ([#2822](https://github.com/opensearch-project/sql/pull/2822)) +* Register system index descriptors through SystemIndexPlugin.getSystemIndexDescriptors ([#2817](https://github.com/opensearch-project/sql/pull/2817)) +* Introduce SparkParameterComposerCollection ([#2824](https://github.com/opensearch-project/sql/pull/2824)) diff --git a/release-notes/opensearch-sql.release-notes-2.17.0.0.md b/release-notes/opensearch-sql.release-notes-2.17.0.0.md new file mode 100644 index 0000000000..2282d71f94 --- /dev/null +++ b/release-notes/opensearch-sql.release-notes-2.17.0.0.md @@ -0,0 +1,35 @@ +Compatible with OpenSearch and OpenSearch Dashboards Version 2.17.0 + +### Features +* Flint query scheduler part1 - integrate job scheduler plugin ([#2889](https://github.com/opensearch-project/sql/pull/2889)) +* Flint query scheduler part 2 ([#2975](https://github.com/opensearch-project/sql/pull/2975)) +* Add feature flag for async query scheduler ([#2989](https://github.com/opensearch-project/sql/pull/2989)) + +### Enhancements +* Change the default value of plugins.query.size_limit to MAX_RESULT_WINDOW (10000) ([#2877](https://github.com/opensearch-project/sql/pull/2877)) +* Support common format geo point ([#2896](https://github.com/opensearch-project/sql/pull/2896)) +* Add TakeOrderedOperator ([#2906](https://github.com/opensearch-project/sql/pull/2906)) +* IF function should support complex predicates in PPL ([#2970](https://github.com/opensearch-project/sql/pull/2970)) +* Add flags for Iceberg and Lake Formation and Security Lake as a data source type ([#2978](https://github.com/opensearch-project/sql/pull/2978)) +* Adds validation to allow only flint queries and sql SELECT queries to security lake type datasource ([#2977](https://github.com/opensearch-project/sql/pull/2977)) +* Delegate Flint index vacuum operation to Spark ([#2995](https://github.com/opensearch-project/sql/pull/2995)) + +### Bug Fixes +* Restrict UDF functions ([#2884](https://github.com/opensearch-project/sql/pull/2884)) +* Update SqlBaseParser ([#2890](https://github.com/opensearch-project/sql/pull/2890)) +* Boolean function in PPL should be case insensitive ([#2842](https://github.com/opensearch-project/sql/pull/2842)) +* Fix SparkExecutionEngineConfigClusterSetting deserialize issue ([#2972](https://github.com/opensearch-project/sql/pull/2972)) +* Fix jobType for Batch and IndexDML query ([#2982](https://github.com/opensearch-project/sql/pull/2982)) +* Fix handler for existing query ([#2983](https://github.com/opensearch-project/sql/pull/2983)) + +### Infrastructure +* Increment version to 2.17.0-SNAPSHOT ([#2892](https://github.com/opensearch-project/sql/pull/2892)) +* Fix :integ-test:sqlBwcCluster#fullRestartClusterTask ([#2996](https://github.com/opensearch-project/sql/pull/2996)) + +### Refactoring +* Add RequestContext parameter to verifyDataSourceAccessAndGetRawMetada method ([#2872](https://github.com/opensearch-project/sql/pull/2872)) +* Add AsyncQueryRequestContext to QueryIdProvider parameter ([#2887](https://github.com/opensearch-project/sql/pull/2887)) +* Add AsyncQueryRequestContext to FlintIndexMetadataService/FlintIndexStateModelService ([#2885](https://github.com/opensearch-project/sql/pull/2885)) +* Add mvQuery attribute in IndexQueryDetails ([#2951](https://github.com/opensearch-project/sql/pull/2951)) +* Add AsyncQueryRequestContext to update/get in StatementStorageService ([#2953](https://github.com/opensearch-project/sql/pull/2953)) +* Extract validation logic from FlintIndexMetadataServiceImpl ([#2954](https://github.com/opensearch-project/sql/pull/2954)) diff --git a/release-notes/opensearch-sql.release-notes-2.18.0.0.md b/release-notes/opensearch-sql.release-notes-2.18.0.0.md new file mode 100644 index 0000000000..1acd0c7d21 --- /dev/null +++ b/release-notes/opensearch-sql.release-notes-2.18.0.0.md @@ -0,0 +1,20 @@ +Compatible with OpenSearch and OpenSearch Dashboards Version 2.18.0 + +### Features + +* Backport #2981 to 2.x ([#3111](https://github.com/opensearch-project/sql/pull/3111)) + +### Bug Fixes + +* Improve error handling for some more edge cases ([#3112](https://github.com/opensearch-project/sql/pull/3112)) +* Resolve Alias Issues in Legacy SQL with Filters ([#3109](https://github.com/opensearch-project/sql/pull/3109)) +* Bug Fixes for minor issues with SQL PIT refactor ([#3108](https://github.com/opensearch-project/sql/pull/3108)) +* Correct regular expression range ([#3107](https://github.com/opensearch-project/sql/pull/3107)) +* SQL pagination should work with the `pretty` parameter ([#3106](https://github.com/opensearch-project/sql/pull/3106)) +* Improve error handling for malformed query cursors ([#3084](https://github.com/opensearch-project/sql/pull/3084)) +* Remove scheduler index from SystemIndexDescriptor ([#3097](https://github.com/opensearch-project/sql/pull/3097)) + +### Maintenance + +* bump commons-io to 2.14.0 ([#3091](https://github.com/opensearch-project/sql/pull/3091)) +* Fix tests on 2.18 ([#3113](https://github.com/opensearch-project/sql/pull/3113)) diff --git a/settings.gradle b/settings.gradle index f09e18c8d1..9cf1715335 100644 --- a/settings.gradle +++ b/settings.gradle @@ -9,12 +9,10 @@ rootProject.name = 'opensearch-sql' include 'opensearch-sql-plugin' project(':opensearch-sql-plugin').projectDir = file('plugin') include 'ppl' -include 'integ-test' include 'common' include 'opensearch' include 'core' include 'protocol' -include 'doctest' include 'legacy' include 'sql' include 'prometheus' @@ -23,3 +21,9 @@ include 'datasources' include 'spark' include 'async-query-core' include 'async-query' + +// exclude integ-test/doctest in case of offline build since they need downloads +if (!gradle.startParameter.offline) { + include 'integ-test' + include 'doctest' +} diff --git a/spark-sql-application/.gitignore b/spark-sql-application/.gitignore deleted file mode 100644 index ec13a702be..0000000000 --- a/spark-sql-application/.gitignore +++ /dev/null @@ -1,14 +0,0 @@ -# Compiled output -target/ -project/target/ - -# sbt-specific files -.sbtserver -.sbt/ -.bsp/ - -# Miscellaneous -.DS_Store -*.class -*.log -*.zip \ No newline at end of file diff --git a/spark-sql-application/README.md b/spark-sql-application/README.md deleted file mode 100644 index 6422f294cd..0000000000 --- a/spark-sql-application/README.md +++ /dev/null @@ -1,109 +0,0 @@ -# Spark SQL Application - -This application execute sql query and store the result in OpenSearch index in following format -``` -"stepId":"", -"applicationId":"" -"schema": "json blob", -"result": "json blob" -``` - -## Prerequisites - -+ Spark 3.3.1 -+ Scala 2.12.15 -+ flint-spark-integration - -## Usage - -To use this application, you can run Spark with Flint extension: - -``` -./bin/spark-submit \ - --class org.opensearch.sql.SQLJob \ - --jars \ - sql-job.jar \ - \ - \ - \ - \ - \ - \ - \ -``` - -## Result Specifications - -Following example shows how the result is written to OpenSearch index after query execution. - -Let's assume sql query result is -``` -+------+------+ -|Letter|Number| -+------+------+ -|A |1 | -|B |2 | -|C |3 | -+------+------+ -``` -OpenSearch index document will look like -```json -{ - "_index" : ".query_execution_result", - "_id" : "A2WOsYgBMUoqCqlDJHrn", - "_score" : 1.0, - "_source" : { - "result" : [ - "{'Letter':'A','Number':1}", - "{'Letter':'B','Number':2}", - "{'Letter':'C','Number':3}" - ], - "schema" : [ - "{'column_name':'Letter','data_type':'string'}", - "{'column_name':'Number','data_type':'integer'}" - ], - "stepId" : "s-JZSB1139WIVU", - "applicationId" : "application_1687726870985_0003" - } -} -``` - -## Build - -To build and run this application with Spark, you can run: - -``` -sbt clean publishLocal -``` - -## Test - -To run tests, you can use: - -``` -sbt test -``` - -## Scalastyle - -To check code with scalastyle, you can run: - -``` -sbt scalastyle -``` - -## Code of Conduct - -This project has adopted an [Open Source Code of Conduct](../CODE_OF_CONDUCT.md). - -## Security - -If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public GitHub issue. - -## License - -See the [LICENSE](../LICENSE.txt) file for our project's licensing. We will ask you to confirm the licensing of your contribution. - -## Copyright - -Copyright OpenSearch Contributors. See [NOTICE](../NOTICE) for details. \ No newline at end of file diff --git a/spark-sql-application/build.sbt b/spark-sql-application/build.sbt deleted file mode 100644 index 79d69a30d1..0000000000 --- a/spark-sql-application/build.sbt +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -name := "sql-job" - -version := "1.0" - -scalaVersion := "2.12.15" - -val sparkVersion = "3.3.2" - -mainClass := Some("org.opensearch.sql.SQLJob") - -artifactName := { (sv: ScalaVersion, module: ModuleID, artifact: Artifact) => - "sql-job.jar" -} - -resolvers ++= Seq( - ("apache-snapshots" at "http://repository.apache.org/snapshots/").withAllowInsecureProtocol(true) -) - -libraryDependencies ++= Seq( - "org.apache.spark" %% "spark-core" % sparkVersion % "provided", - "org.apache.spark" %% "spark-sql" % sparkVersion % "provided", - "org.scalatest" %% "scalatest" % "3.2.15" % Test -) diff --git a/spark-sql-application/project/build.properties b/spark-sql-application/project/build.properties deleted file mode 100644 index 46e43a97ed..0000000000 --- a/spark-sql-application/project/build.properties +++ /dev/null @@ -1 +0,0 @@ -sbt.version=1.8.2 diff --git a/spark-sql-application/scalastyle-config.xml b/spark-sql-application/scalastyle-config.xml deleted file mode 100644 index 37b1978cd7..0000000000 --- a/spark-sql-application/scalastyle-config.xml +++ /dev/null @@ -1,106 +0,0 @@ - - Scalastyle standard configuration - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/spark-sql-application/src/main/scala/org/opensearch/sql/SQLJob.scala b/spark-sql-application/src/main/scala/org/opensearch/sql/SQLJob.scala deleted file mode 100644 index 98a3a08134..0000000000 --- a/spark-sql-application/src/main/scala/org/opensearch/sql/SQLJob.scala +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql - -import org.apache.spark.SparkConf -import org.apache.spark.sql.{DataFrame, SparkSession, Row} -import org.apache.spark.sql.types._ - -/** - * Spark SQL Application entrypoint - * - * @param args(0) - * sql query - * @param args(1) - * opensearch index name - * @param args(2-6) - * opensearch connection values required for flint-integration jar. host, port, scheme, auth, region respectively. - * @return - * write sql query result to given opensearch index - */ -object SQLJob { - def main(args: Array[String]) { - // Get the SQL query and Opensearch Config from the command line arguments - val query = args(0) - val index = args(1) - val host = args(2) - val port = args(3) - val scheme = args(4) - val auth = args(5) - val region = args(6) - - val conf: SparkConf = new SparkConf() - .setAppName("SQLJob") - .set("spark.sql.extensions", "org.opensearch.flint.spark.FlintSparkExtensions") - .set("spark.datasource.flint.host", host) - .set("spark.datasource.flint.port", port) - .set("spark.datasource.flint.scheme", scheme) - .set("spark.datasource.flint.auth", auth) - .set("spark.datasource.flint.region", region) - - // Create a SparkSession - val spark = SparkSession.builder().config(conf).enableHiveSupport().getOrCreate() - - try { - // Execute SQL query - val result: DataFrame = spark.sql(query) - - // Get Data - val data = getFormattedData(result, spark) - - // Write data to OpenSearch index - val aos = Map( - "host" -> host, - "port" -> port, - "scheme" -> scheme, - "auth" -> auth, - "region" -> region) - - data.write - .format("flint") - .options(aos) - .mode("append") - .save(index) - - } finally { - // Stop SparkSession - spark.stop() - } - } - - /** - * Create a new formatted dataframe with json result, json schema and EMR_STEP_ID. - * - * @param result - * sql query result dataframe - * @param spark - * spark session - * @return - * dataframe with result, schema and emr step id - */ - def getFormattedData(result: DataFrame, spark: SparkSession): DataFrame = { - // Create the schema dataframe - val schemaRows = result.schema.fields.map { field => - Row(field.name, field.dataType.typeName) - } - val resultSchema = spark.createDataFrame(spark.sparkContext.parallelize(schemaRows), StructType(Seq( - StructField("column_name", StringType, nullable = false), - StructField("data_type", StringType, nullable = false)))) - - // Define the data schema - val schema = StructType(Seq( - StructField("result", ArrayType(StringType, containsNull = true), nullable = true), - StructField("schema", ArrayType(StringType, containsNull = true), nullable = true), - StructField("stepId", StringType, nullable = true), - StructField("applicationId", StringType, nullable = true))) - - // Create the data rows - val rows = Seq(( - result.toJSON.collect.toList.map(_.replaceAll("'", "\\\\'").replaceAll("\"", "'")), - resultSchema.toJSON.collect.toList.map(_.replaceAll("\"", "'")), - sys.env.getOrElse("EMR_STEP_ID", "unknown"), - spark.sparkContext.applicationId)) - - // Create the DataFrame for data - spark.createDataFrame(rows).toDF(schema.fields.map(_.name): _*) - } -} diff --git a/spark-sql-application/src/test/scala/org/opensearch/sql/SQLJobTest.scala b/spark-sql-application/src/test/scala/org/opensearch/sql/SQLJobTest.scala deleted file mode 100644 index 7ec4e45450..0000000000 --- a/spark-sql-application/src/test/scala/org/opensearch/sql/SQLJobTest.scala +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql - -import org.scalatest.funsuite.AnyFunSuite -import org.apache.spark.sql.{DataFrame, Row, SparkSession} -import org.apache.spark.sql.types.{ArrayType, IntegerType, StringType, StructField, StructType} - - -class SQLJobTest extends AnyFunSuite{ - - val spark = SparkSession.builder().appName("Test").master("local").getOrCreate() - - // Define input dataframe - val inputSchema = StructType(Seq( - StructField("Letter", StringType, nullable = false), - StructField("Number", IntegerType, nullable = false) - )) - val inputRows = Seq( - Row("A", 1), - Row("B", 2), - Row("C", 3) - ) - val input: DataFrame = spark.createDataFrame(spark.sparkContext.parallelize(inputRows), inputSchema) - - test("Test getFormattedData method") { - // Define expected dataframe - val expectedSchema = StructType(Seq( - StructField("result", ArrayType(StringType, containsNull = true), nullable = true), - StructField("schema", ArrayType(StringType, containsNull = true), nullable = true), - StructField("stepId", StringType, nullable = true), - StructField("applicationId", StringType, nullable = true) - )) - val expectedRows = Seq( - Row( - Array("{'Letter':'A','Number':1}","{'Letter':'B','Number':2}", "{'Letter':'C','Number':3}"), - Array("{'column_name':'Letter','data_type':'string'}", "{'column_name':'Number','data_type':'integer'}"), - "unknown", - spark.sparkContext.applicationId - ) - ) - val expected: DataFrame = spark.createDataFrame(spark.sparkContext.parallelize(expectedRows), expectedSchema) - - // Compare the result - val result = SQLJob.getFormattedData(input, spark) - assertEqualDataframe(expected, result) - } - - def assertEqualDataframe(expected: DataFrame, result: DataFrame): Unit ={ - assert(expected.schema === result.schema) - assert(expected.collect() === result.collect()) - } -} diff --git a/spark/build.gradle b/spark/build.gradle index c221c4e36c..103c017791 100644 --- a/spark/build.gradle +++ b/spark/build.gradle @@ -7,51 +7,21 @@ plugins { id 'java-library' id "io.freefair.lombok" id 'jacoco' - id 'antlr' } repositories { mavenCentral() } -tasks.register('downloadG4Files', Exec) { - description = 'Download remote .g4 files from GitHub' - - executable 'curl' - - args '-o', 'src/main/antlr/FlintSparkSqlExtensions.g4', 'https://raw.githubusercontent.com/opensearch-project/opensearch-spark/main/flint-spark-integration/src/main/antlr4/FlintSparkSqlExtensions.g4' - args '-o', 'src/main/antlr/SparkSqlBase.g4', 'https://raw.githubusercontent.com/opensearch-project/opensearch-spark/main/flint-spark-integration/src/main/antlr4/SparkSqlBase.g4' - args '-o', 'src/main/antlr/SqlBaseParser.g4', 'https://raw.githubusercontent.com/apache/spark/master/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4' - args '-o', 'src/main/antlr/SqlBaseLexer.g4', 'https://raw.githubusercontent.com/apache/spark/master/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4' -} - -generateGrammarSource { - arguments += ['-visitor', '-package', 'org.opensearch.sql.spark.antlr.parser'] - source = sourceSets.main.antlr - outputDirectory = file("build/generated-src/antlr/main/org/opensearch/sql/spark/antlr/parser") -} -configurations { - compile { - extendsFrom = extendsFrom.findAll { it != configurations.antlr } - } -} - -// Make sure the downloadG4File task runs before the generateGrammarSource task -generateGrammarSource.dependsOn downloadG4Files - dependencies { - antlr "org.antlr:antlr4:4.7.1" - api project(':core') - implementation project(':protocol') implementation project(':datasources') - implementation project(':legacy') implementation group: 'org.opensearch', name: 'opensearch', version: "${opensearch_version}" implementation group: 'org.json', name: 'json', version: '20231013' api group: 'com.amazonaws', name: 'aws-java-sdk-emr', version: "${aws_java_sdk_version}" api group: 'com.amazonaws', name: 'aws-java-sdk-emrserverless', version: "${aws_java_sdk_version}" - implementation group: 'commons-io', name: 'commons-io', version: '2.8.0' + implementation group: 'commons-io', name: 'commons-io', version: '2.14.0' testImplementation(platform("org.junit:junit-bom:5.9.3")) @@ -59,20 +29,12 @@ dependencies { testImplementation group: 'org.mockito', name: 'mockito-core', version: '5.7.0' testImplementation group: 'org.mockito', name: 'mockito-junit-jupiter', version: '5.7.0' - testCompileOnly('junit:junit:4.13.1') { - exclude group: 'org.hamcrest', module: 'hamcrest-core' - } - testRuntimeOnly("org.junit.vintage:junit-vintage-engine") { - exclude group: 'org.hamcrest', module: 'hamcrest-core' - } testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine") { exclude group: 'org.hamcrest', module: 'hamcrest-core' } testRuntimeOnly("org.junit.platform:junit-platform-launcher") { because 'allows tests to run from IDEs that bundle older version of launcher' } - testImplementation("org.opensearch.test:framework:${opensearch_version}") - testImplementation project(':opensearch') } test { @@ -84,54 +46,28 @@ test { exceptionFormat "full" } } -task junit4(type: Test) { - useJUnitPlatform { - includeEngines("junit-vintage") - } - systemProperty 'tests.security.manager', 'false' - testLogging { - events "failed" - exceptionFormat "full" - } -} jacocoTestReport { - dependsOn test, junit4 - executionData test, junit4 + dependsOn test + executionData test reports { html.required = true xml.required = true } afterEvaluate { classDirectories.setFrom(files(classDirectories.files.collect { - fileTree(dir: it, exclude: ['**/antlr/parser/**']) })) } } jacocoTestCoverageVerification { - dependsOn test, junit4 - executionData test, junit4 + dependsOn test + executionData test violationRules { rule { element = 'CLASS' excludes = [ 'org.opensearch.sql.spark.data.constants.*', - 'org.opensearch.sql.spark.rest.*', - 'org.opensearch.sql.spark.transport.model.*', - 'org.opensearch.sql.spark.asyncquery.model.*', - 'org.opensearch.sql.spark.asyncquery.exceptions.*', - 'org.opensearch.sql.spark.dispatcher.model.*', - 'org.opensearch.sql.spark.flint.FlintIndexType', - // ignore because XContext IOException - 'org.opensearch.sql.spark.execution.statestore.StateStore', - 'org.opensearch.sql.spark.execution.session.SessionModel', - 'org.opensearch.sql.spark.execution.statement.StatementModel', - 'org.opensearch.sql.spark.flint.FlintIndexStateModel', - // TODO: add tests for purging flint indices - 'org.opensearch.sql.spark.cluster.ClusterManagerEventListener*', - 'org.opensearch.sql.spark.cluster.FlintIndexRetention', - 'org.opensearch.sql.spark.cluster.IndexCleanup' ] limit { counter = 'LINE' @@ -145,7 +81,6 @@ jacocoTestCoverageVerification { } afterEvaluate { classDirectories.setFrom(files(classDirectories.files.collect { - fileTree(dir: it, exclude: ['**/antlr/parser/**']) })) } } diff --git a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryId.java b/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryId.java deleted file mode 100644 index b99ebe0e8c..0000000000 --- a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryId.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.spark.asyncquery.model; - -import static org.opensearch.sql.spark.utils.IDUtils.decode; -import static org.opensearch.sql.spark.utils.IDUtils.encode; - -import lombok.Data; - -/** Async query id. */ -@Data -public class AsyncQueryId { - private final String id; - - public static AsyncQueryId newAsyncQueryId(String datasourceName) { - return new AsyncQueryId(encode(datasourceName)); - } - - public String getDataSourceName() { - return decode(id); - } - - /** OpenSearch DocId. */ - public String docId() { - return "qid" + id; - } - - @Override - public String toString() { - return "asyncQueryId=" + id; - } -} diff --git a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/SparkSubmitParameters.java b/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/SparkSubmitParameters.java deleted file mode 100644 index 6badea6a74..0000000000 --- a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/SparkSubmitParameters.java +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.spark.asyncquery.model; - -import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH; -import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH_PASSWORD; -import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_AUTH_USERNAME; -import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_REGION; -import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_INDEX_STORE_OPENSEARCH_URI; -import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_LAKEFORMATION_ENABLED; -import static org.opensearch.sql.datasources.glue.GlueDataSourceFactory.GLUE_ROLE_ARN; -import static org.opensearch.sql.spark.data.constants.SparkConstants.*; - -import java.net.URI; -import java.net.URISyntaxException; -import java.util.LinkedHashMap; -import java.util.Map; -import java.util.function.Supplier; -import lombok.AllArgsConstructor; -import lombok.RequiredArgsConstructor; -import lombok.Setter; -import org.apache.commons.lang3.BooleanUtils; -import org.apache.commons.text.StringEscapeUtils; -import org.opensearch.sql.datasource.model.DataSourceMetadata; -import org.opensearch.sql.datasource.model.DataSourceType; -import org.opensearch.sql.datasources.auth.AuthenticationType; -import org.opensearch.sql.spark.config.SparkSubmitParameterModifier; -import org.opensearch.sql.spark.execution.statestore.OpenSearchStateStoreUtil; - -/** Define Spark Submit Parameters. */ -@AllArgsConstructor -@RequiredArgsConstructor -public class SparkSubmitParameters { - public static final String SPACE = " "; - public static final String EQUALS = "="; - public static final String FLINT_BASIC_AUTH = "basic"; - - private final String className; - private final Map config; - - /** Extra parameters to append finally */ - @Setter private String extraParameters; - - public void setConfigItem(String key, String value) { - config.put(key, value); - } - - public void deleteConfigItem(String key) { - config.remove(key); - } - - public static Builder builder() { - return Builder.builder(); - } - - public SparkSubmitParameters acceptModifier(SparkSubmitParameterModifier modifier) { - modifier.modifyParameters(this); - return this; - } - - public static class Builder { - - private String className; - private final Map config; - private String extraParameters; - - private Builder() { - className = DEFAULT_CLASS_NAME; - config = new LinkedHashMap<>(); - - config.put(S3_AWS_CREDENTIALS_PROVIDER_KEY, DEFAULT_S3_AWS_CREDENTIALS_PROVIDER_VALUE); - config.put( - HADOOP_CATALOG_CREDENTIALS_PROVIDER_FACTORY_KEY, - DEFAULT_GLUE_CATALOG_CREDENTIALS_PROVIDER_FACTORY_KEY); - config.put(SPARK_JARS_KEY, ICEBERG_SPARK_RUNTIME_PACKAGE); - config.put( - SPARK_JAR_PACKAGES_KEY, - SPARK_STANDALONE_PACKAGE + "," + SPARK_LAUNCHER_PACKAGE + "," + PPL_STANDALONE_PACKAGE); - config.put(SPARK_JAR_REPOSITORIES_KEY, AWS_SNAPSHOT_REPOSITORY); - config.put(SPARK_DRIVER_ENV_JAVA_HOME_KEY, JAVA_HOME_LOCATION); - config.put(SPARK_EXECUTOR_ENV_JAVA_HOME_KEY, JAVA_HOME_LOCATION); - config.put(SPARK_DRIVER_ENV_FLINT_CLUSTER_NAME_KEY, FLINT_DEFAULT_CLUSTER_NAME); - config.put(SPARK_EXECUTOR_ENV_FLINT_CLUSTER_NAME_KEY, FLINT_DEFAULT_CLUSTER_NAME); - config.put(FLINT_INDEX_STORE_HOST_KEY, FLINT_DEFAULT_HOST); - config.put(FLINT_INDEX_STORE_PORT_KEY, FLINT_DEFAULT_PORT); - config.put(FLINT_INDEX_STORE_SCHEME_KEY, FLINT_DEFAULT_SCHEME); - config.put(FLINT_INDEX_STORE_AUTH_KEY, FLINT_DEFAULT_AUTH); - config.put(FLINT_CREDENTIALS_PROVIDER_KEY, EMR_ASSUME_ROLE_CREDENTIALS_PROVIDER); - config.put( - SPARK_SQL_EXTENSIONS_KEY, - ICEBERG_SPARK_EXTENSION + "," + FLINT_SQL_EXTENSION + "," + FLINT_PPL_EXTENSION); - config.put(HIVE_METASTORE_CLASS_KEY, GLUE_HIVE_CATALOG_FACTORY_CLASS); - config.put(SPARK_CATALOG, ICEBERG_SESSION_CATALOG); - config.put(SPARK_CATALOG_CATALOG_IMPL, ICEBERG_GLUE_CATALOG); - } - - public static Builder builder() { - return new Builder(); - } - - public Builder className(String className) { - this.className = className; - return this; - } - - public Builder clusterName(String clusterName) { - config.put(SPARK_DRIVER_ENV_FLINT_CLUSTER_NAME_KEY, clusterName); - config.put(SPARK_EXECUTOR_ENV_FLINT_CLUSTER_NAME_KEY, clusterName); - return this; - } - - /** - * For query in spark submit parameters to be parsed correctly, escape the characters in the - * query, then wrap the query with double quotes. - */ - public Builder query(String query) { - String escapedQuery = StringEscapeUtils.escapeJava(query); - String wrappedQuery = "\"" + escapedQuery + "\""; - config.put(FLINT_JOB_QUERY, wrappedQuery); - return this; - } - - public Builder dataSource(DataSourceMetadata metadata) { - if (DataSourceType.S3GLUE.equals(metadata.getConnector())) { - String roleArn = metadata.getProperties().get(GLUE_ROLE_ARN); - - config.put(DRIVER_ENV_ASSUME_ROLE_ARN_KEY, roleArn); - config.put(EXECUTOR_ENV_ASSUME_ROLE_ARN_KEY, roleArn); - config.put(HIVE_METASTORE_GLUE_ARN_KEY, roleArn); - config.put("spark.sql.catalog." + metadata.getName(), FLINT_DELEGATE_CATALOG); - config.put(FLINT_DATA_SOURCE_KEY, metadata.getName()); - - final boolean lakeFormationEnabled = - BooleanUtils.toBoolean(metadata.getProperties().get(GLUE_LAKEFORMATION_ENABLED)); - config.put(EMR_LAKEFORMATION_OPTION, Boolean.toString(lakeFormationEnabled)); - config.put(FLINT_ACCELERATE_USING_COVERING_INDEX, Boolean.toString(!lakeFormationEnabled)); - - setFlintIndexStoreHost( - parseUri( - metadata.getProperties().get(GLUE_INDEX_STORE_OPENSEARCH_URI), metadata.getName())); - setFlintIndexStoreAuthProperties( - metadata.getProperties().get(GLUE_INDEX_STORE_OPENSEARCH_AUTH), - () -> metadata.getProperties().get(GLUE_INDEX_STORE_OPENSEARCH_AUTH_USERNAME), - () -> metadata.getProperties().get(GLUE_INDEX_STORE_OPENSEARCH_AUTH_PASSWORD), - () -> metadata.getProperties().get(GLUE_INDEX_STORE_OPENSEARCH_REGION)); - config.put("spark.flint.datasource.name", metadata.getName()); - return this; - } - throw new UnsupportedOperationException( - String.format( - "UnSupported datasource type for async queries:: %s", metadata.getConnector())); - } - - private void setFlintIndexStoreHost(URI uri) { - config.put(FLINT_INDEX_STORE_HOST_KEY, uri.getHost()); - config.put(FLINT_INDEX_STORE_PORT_KEY, String.valueOf(uri.getPort())); - config.put(FLINT_INDEX_STORE_SCHEME_KEY, uri.getScheme()); - } - - private void setFlintIndexStoreAuthProperties( - String authType, - Supplier userName, - Supplier password, - Supplier region) { - if (AuthenticationType.get(authType).equals(AuthenticationType.BASICAUTH)) { - config.put(FLINT_INDEX_STORE_AUTH_KEY, FLINT_BASIC_AUTH); - config.put(FLINT_INDEX_STORE_AUTH_USERNAME, userName.get()); - config.put(FLINT_INDEX_STORE_AUTH_PASSWORD, password.get()); - } else if (AuthenticationType.get(authType).equals(AuthenticationType.AWSSIGV4AUTH)) { - config.put(FLINT_INDEX_STORE_AUTH_KEY, "sigv4"); - config.put(FLINT_INDEX_STORE_AWSREGION_KEY, region.get()); - } else { - config.put(FLINT_INDEX_STORE_AUTH_KEY, authType); - } - } - - private URI parseUri(String opensearchUri, String datasourceName) { - try { - return new URI(opensearchUri); - } catch (URISyntaxException e) { - throw new IllegalArgumentException( - String.format( - "Bad URI in indexstore configuration of the : %s datasoure.", datasourceName)); - } - } - - public Builder structuredStreaming(Boolean isStructuredStreaming) { - if (isStructuredStreaming) { - config.put("spark.flint.job.type", "streaming"); - } - return this; - } - - public Builder extraParameters(String params) { - extraParameters = params; - return this; - } - - public SparkSubmitParameters build() { - return new SparkSubmitParameters(className, config, extraParameters); - } - } - - public void sessionExecution(String sessionId, String datasourceName) { - config.put(FLINT_JOB_REQUEST_INDEX, OpenSearchStateStoreUtil.getIndexName(datasourceName)); - config.put(FLINT_JOB_SESSION_ID, sessionId); - } - - @Override - public String toString() { - StringBuilder stringBuilder = new StringBuilder(); - stringBuilder.append(" --class "); - stringBuilder.append(this.className); - stringBuilder.append(SPACE); - for (String key : config.keySet()) { - stringBuilder.append(" --conf "); - stringBuilder.append(key); - stringBuilder.append(EQUALS); - stringBuilder.append(config.get(key)); - stringBuilder.append(SPACE); - } - - if (extraParameters != null) { - stringBuilder.append(extraParameters); - } - return stringBuilder.toString(); - } -} diff --git a/spark/src/main/java/org/opensearch/sql/spark/client/EmrClientImpl.java b/spark/src/main/java/org/opensearch/sql/spark/client/EmrClientImpl.java index 87f35bbc1e..7b7fa1eadf 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/client/EmrClientImpl.java +++ b/spark/src/main/java/org/opensearch/sql/spark/client/EmrClientImpl.java @@ -6,7 +6,6 @@ package org.opensearch.sql.spark.client; import static org.opensearch.sql.datasource.model.DataSourceMetadata.DEFAULT_RESULT_INDEX; -import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_SQL_APPLICATION_JAR; import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce; import com.amazonaws.services.elasticmapreduce.model.ActionOnFailure; @@ -26,12 +25,16 @@ import org.opensearch.sql.spark.response.SparkResponse; public class EmrClientImpl implements SparkClient { + // EMR-S will download JAR to local maven + public static final String SPARK_SQL_APPLICATION_JAR = + "file:///home/hadoop/.ivy2/jars/org.opensearch_opensearch-spark-sql-application_2.12-0.3.0-SNAPSHOT.jar"; + private final AmazonElasticMapReduce emr; private final String emrCluster; private final FlintHelper flint; private final String sparkApplicationJar; private static final Logger logger = LogManager.getLogger(EmrClientImpl.class); - private SparkResponse sparkResponse; + private final SparkResponse sparkResponse; /** * Constructor for EMR Client Implementation. diff --git a/spark/src/main/java/org/opensearch/sql/spark/config/OpenSearchSparkSubmitParameterModifier.java b/spark/src/main/java/org/opensearch/sql/spark/config/OpenSearchSparkSubmitParameterModifier.java deleted file mode 100644 index f1831c9786..0000000000 --- a/spark/src/main/java/org/opensearch/sql/spark/config/OpenSearchSparkSubmitParameterModifier.java +++ /dev/null @@ -1,15 +0,0 @@ -package org.opensearch.sql.spark.config; - -import lombok.AllArgsConstructor; -import org.opensearch.sql.spark.asyncquery.model.SparkSubmitParameters; - -@AllArgsConstructor -public class OpenSearchSparkSubmitParameterModifier implements SparkSubmitParameterModifier { - - private String extraParameters; - - @Override - public void modifyParameters(SparkSubmitParameters parameters) { - parameters.setExtraParameters(this.extraParameters); - } -} diff --git a/spark/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigSupplierImpl.java b/spark/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigSupplierImpl.java deleted file mode 100644 index 8d2c40f4cd..0000000000 --- a/spark/src/main/java/org/opensearch/sql/spark/config/SparkExecutionEngineConfigSupplierImpl.java +++ /dev/null @@ -1,47 +0,0 @@ -package org.opensearch.sql.spark.config; - -import static org.opensearch.sql.common.setting.Settings.Key.CLUSTER_NAME; -import static org.opensearch.sql.common.setting.Settings.Key.SPARK_EXECUTION_ENGINE_CONFIG; - -import java.security.AccessController; -import java.security.PrivilegedAction; -import lombok.AllArgsConstructor; -import org.apache.commons.lang3.StringUtils; -import org.opensearch.cluster.ClusterName; -import org.opensearch.sql.common.setting.Settings; -import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; - -@AllArgsConstructor -public class SparkExecutionEngineConfigSupplierImpl implements SparkExecutionEngineConfigSupplier { - - private Settings settings; - - @Override - public SparkExecutionEngineConfig getSparkExecutionEngineConfig( - AsyncQueryRequestContext asyncQueryRequestContext) { - ClusterName clusterName = settings.getSettingValue(CLUSTER_NAME); - return getBuilderFromSettingsIfAvailable().clusterName(clusterName.value()).build(); - } - - private SparkExecutionEngineConfig.SparkExecutionEngineConfigBuilder - getBuilderFromSettingsIfAvailable() { - String sparkExecutionEngineConfigSettingString = - this.settings.getSettingValue(SPARK_EXECUTION_ENGINE_CONFIG); - if (!StringUtils.isBlank(sparkExecutionEngineConfigSettingString)) { - SparkExecutionEngineConfigClusterSetting setting = - AccessController.doPrivileged( - (PrivilegedAction) - () -> - SparkExecutionEngineConfigClusterSetting.toSparkExecutionEngineConfig( - sparkExecutionEngineConfigSettingString)); - return SparkExecutionEngineConfig.builder() - .applicationId(setting.getApplicationId()) - .executionRoleARN(setting.getExecutionRoleARN()) - .sparkSubmitParameterModifier( - new OpenSearchSparkSubmitParameterModifier(setting.getSparkSubmitParameters())) - .region(setting.getRegion()); - } else { - return SparkExecutionEngineConfig.builder(); - } - } -} diff --git a/spark/src/main/java/org/opensearch/sql/spark/execution/statestore/OpenSearchStatementStorageService.java b/spark/src/main/java/org/opensearch/sql/spark/execution/statestore/OpenSearchStatementStorageService.java deleted file mode 100644 index 5fcccc22a4..0000000000 --- a/spark/src/main/java/org/opensearch/sql/spark/execution/statestore/OpenSearchStatementStorageService.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.spark.execution.statestore; - -import java.util.Optional; -import lombok.RequiredArgsConstructor; -import org.opensearch.sql.spark.asyncquery.model.AsyncQueryRequestContext; -import org.opensearch.sql.spark.execution.statement.StatementModel; -import org.opensearch.sql.spark.execution.statement.StatementState; -import org.opensearch.sql.spark.execution.xcontent.StatementModelXContentSerializer; - -@RequiredArgsConstructor -public class OpenSearchStatementStorageService implements StatementStorageService { - - private final StateStore stateStore; - private final StatementModelXContentSerializer serializer; - - @Override - public StatementModel createStatement( - StatementModel statementModel, AsyncQueryRequestContext asyncQueryRequestContext) { - return stateStore.create( - statementModel.getId(), - statementModel, - StatementModel::copy, - OpenSearchStateStoreUtil.getIndexName(statementModel.getDatasourceName())); - } - - @Override - public Optional getStatement(String id, String datasourceName) { - return stateStore.get( - id, serializer::fromXContent, OpenSearchStateStoreUtil.getIndexName(datasourceName)); - } - - @Override - public StatementModel updateStatementState( - StatementModel oldStatementModel, StatementState statementState) { - return stateStore.updateState( - oldStatementModel, - statementState, - StatementModel::copyWithState, - OpenSearchStateStoreUtil.getIndexName(oldStatementModel.getDatasourceName())); - } -} diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexStateModelService.java b/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexStateModelService.java deleted file mode 100644 index 94647f4e07..0000000000 --- a/spark/src/main/java/org/opensearch/sql/spark/flint/FlintIndexStateModelService.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.spark.flint; - -import java.util.Optional; - -/** - * Abstraction over flint index state storage. Flint index state will maintain the status of each - * flint index. - */ -public interface FlintIndexStateModelService { - FlintIndexStateModel createFlintIndexStateModel(FlintIndexStateModel flintIndexStateModel); - - Optional getFlintIndexStateModel(String id, String datasourceName); - - FlintIndexStateModel updateFlintIndexState( - FlintIndexStateModel flintIndexStateModel, - FlintIndexState flintIndexState, - String datasourceName); - - boolean deleteFlintIndexStateModel(String id, String datasourceName); -} diff --git a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpVacuum.java b/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpVacuum.java deleted file mode 100644 index ffd09e16a4..0000000000 --- a/spark/src/main/java/org/opensearch/sql/spark/flint/operation/FlintIndexOpVacuum.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.spark.flint.operation; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; -import org.opensearch.action.support.master.AcknowledgedResponse; -import org.opensearch.client.Client; -import org.opensearch.sql.spark.client.EMRServerlessClientFactory; -import org.opensearch.sql.spark.flint.FlintIndexMetadata; -import org.opensearch.sql.spark.flint.FlintIndexState; -import org.opensearch.sql.spark.flint.FlintIndexStateModel; -import org.opensearch.sql.spark.flint.FlintIndexStateModelService; - -/** Flint index vacuum operation. */ -public class FlintIndexOpVacuum extends FlintIndexOp { - - private static final Logger LOG = LogManager.getLogger(); - - /** OpenSearch client. */ - private final Client client; - - public FlintIndexOpVacuum( - FlintIndexStateModelService flintIndexStateModelService, - String datasourceName, - Client client, - EMRServerlessClientFactory emrServerlessClientFactory) { - super(flintIndexStateModelService, datasourceName, emrServerlessClientFactory); - this.client = client; - } - - @Override - boolean validate(FlintIndexState state) { - return state == FlintIndexState.DELETED; - } - - @Override - FlintIndexState transitioningState() { - return FlintIndexState.VACUUMING; - } - - @Override - public void runOp(FlintIndexMetadata flintIndexMetadata, FlintIndexStateModel flintIndex) { - LOG.info("Vacuuming Flint index {}", flintIndexMetadata.getOpensearchIndexName()); - DeleteIndexRequest request = - new DeleteIndexRequest().indices(flintIndexMetadata.getOpensearchIndexName()); - AcknowledgedResponse response = client.admin().indices().delete(request).actionGet(); - LOG.info("OpenSearch index delete result: {}", response.isAcknowledged()); - } - - @Override - FlintIndexState stableState() { - // Instruct StateStore to purge the index state doc - return FlintIndexState.NONE; - } -} diff --git a/spark/src/main/java/org/opensearch/sql/spark/helper/FlintHelper.java b/spark/src/main/java/org/opensearch/sql/spark/helper/FlintHelper.java index 10d880187f..206ff4aed4 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/helper/FlintHelper.java +++ b/spark/src/main/java/org/opensearch/sql/spark/helper/FlintHelper.java @@ -5,16 +5,18 @@ package org.opensearch.sql.spark.helper; -import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_DEFAULT_AUTH; -import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_DEFAULT_HOST; -import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_DEFAULT_PORT; -import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_DEFAULT_REGION; -import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_DEFAULT_SCHEME; -import static org.opensearch.sql.spark.data.constants.SparkConstants.FLINT_INTEGRATION_JAR; - import lombok.Getter; public class FlintHelper { + // TODO should be replaced with mvn jar. + public static final String FLINT_INTEGRATION_JAR = + "s3://spark-datasource/flint-spark-integration-assembly-0.3.0-SNAPSHOT.jar"; + public static final String FLINT_DEFAULT_HOST = "localhost"; + public static final String FLINT_DEFAULT_PORT = "9200"; + public static final String FLINT_DEFAULT_SCHEME = "http"; + public static final String FLINT_DEFAULT_AUTH = "noauth"; + public static final String FLINT_DEFAULT_REGION = "us-west-2"; + @Getter private final String flintIntegrationJar; @Getter private final String flintHost; @Getter private final String flintPort; diff --git a/spark/src/main/java/org/opensearch/sql/spark/storage/SparkStorageFactory.java b/spark/src/main/java/org/opensearch/sql/spark/storage/SparkStorageFactory.java index 467bacbaea..4495eb0fac 100644 --- a/spark/src/main/java/org/opensearch/sql/spark/storage/SparkStorageFactory.java +++ b/spark/src/main/java/org/opensearch/sql/spark/storage/SparkStorageFactory.java @@ -5,9 +5,6 @@ package org.opensearch.sql.spark.storage; -import static org.opensearch.sql.spark.data.constants.SparkConstants.EMR; -import static org.opensearch.sql.spark.data.constants.SparkConstants.STEP_ID_FIELD; - import com.amazonaws.auth.AWSStaticCredentialsProvider; import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce; @@ -36,6 +33,8 @@ public class SparkStorageFactory implements DataSourceFactory { private final Client client; private final Settings settings; + public static final String EMR = "emr"; + public static final String STEP_ID_FIELD = "stepId.keyword"; // Spark datasource configuration properties public static final String CONNECTOR_TYPE = "spark.connector"; public static final String SPARK_SQL_APPLICATION = "spark.sql.application"; @@ -44,7 +43,6 @@ public class SparkStorageFactory implements DataSourceFactory { public static final String EMR_CLUSTER = "emr.cluster"; public static final String EMR_AUTH_TYPE = "emr.auth.type"; public static final String EMR_REGION = "emr.auth.region"; - public static final String EMR_ROLE_ARN = "emr.auth.role_arn"; public static final String EMR_ACCESS_KEY = "emr.auth.access_key"; public static final String EMR_SECRET_KEY = "emr.auth.secret_key"; diff --git a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecVacuumTest.java b/spark/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecVacuumTest.java deleted file mode 100644 index 3bccf1b30b..0000000000 --- a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/IndexQuerySpecVacuumTest.java +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.spark.asyncquery; - -import static org.opensearch.sql.spark.flint.FlintIndexState.ACTIVE; -import static org.opensearch.sql.spark.flint.FlintIndexState.CREATING; -import static org.opensearch.sql.spark.flint.FlintIndexState.DELETED; -import static org.opensearch.sql.spark.flint.FlintIndexState.EMPTY; -import static org.opensearch.sql.spark.flint.FlintIndexState.REFRESHING; -import static org.opensearch.sql.spark.flint.FlintIndexState.VACUUMING; -import static org.opensearch.sql.spark.flint.FlintIndexType.COVERING; -import static org.opensearch.sql.spark.flint.FlintIndexType.MATERIALIZED_VIEW; -import static org.opensearch.sql.spark.flint.FlintIndexType.SKIPPING; - -import com.amazonaws.services.emrserverless.model.CancelJobRunResult; -import com.amazonaws.services.emrserverless.model.GetJobRunResult; -import com.amazonaws.services.emrserverless.model.JobRun; -import com.google.common.collect.Lists; -import java.util.Base64; -import java.util.List; -import java.util.function.BiConsumer; -import org.apache.commons.lang3.tuple.Pair; -import org.junit.Test; -import org.opensearch.action.admin.indices.exists.indices.IndicesExistsRequest; -import org.opensearch.action.delete.DeleteRequest; -import org.opensearch.action.get.GetRequest; -import org.opensearch.sql.spark.asyncquery.model.AsyncQueryExecutionResponse; -import org.opensearch.sql.spark.asyncquery.model.MockFlintSparkJob; -import org.opensearch.sql.spark.client.EMRServerlessClientFactory; -import org.opensearch.sql.spark.execution.statestore.OpenSearchStateStoreUtil; -import org.opensearch.sql.spark.flint.FlintIndexState; -import org.opensearch.sql.spark.flint.FlintIndexType; -import org.opensearch.sql.spark.rest.model.CreateAsyncQueryRequest; -import org.opensearch.sql.spark.rest.model.CreateAsyncQueryResponse; -import org.opensearch.sql.spark.rest.model.LangType; - -@SuppressWarnings({"unchecked", "rawtypes"}) -public class IndexQuerySpecVacuumTest extends AsyncQueryExecutorServiceSpec { - - private static final EMRApiCall DEFAULT_OP = () -> null; - - private final List FLINT_TEST_DATASETS = - List.of( - mockDataset( - "VACUUM SKIPPING INDEX ON mys3.default.http_logs", - SKIPPING, - "flint_mys3_default_http_logs_skipping_index"), - mockDataset( - "VACUUM INDEX covering ON mys3.default.http_logs", - COVERING, - "flint_mys3_default_http_logs_covering_index"), - mockDataset( - "VACUUM MATERIALIZED VIEW mys3.default.http_logs_metrics", - MATERIALIZED_VIEW, - "flint_mys3_default_http_logs_metrics"), - mockDataset( - "VACUUM SKIPPING INDEX ON mys3.default.`test ,:\"+/\\|?#><`", - SKIPPING, - "flint_mys3_default_test%20%2c%3a%22%2b%2f%5c%7c%3f%23%3e%3c_skipping_index") - .isSpecialCharacter(true)); - - @Test - public void shouldVacuumIndexInDeletedState() { - List> testCases = - Lists.cartesianProduct( - FLINT_TEST_DATASETS, - List.of(DELETED), - List.of( - Pair.of( - DEFAULT_OP, - () -> new GetJobRunResult().withJobRun(new JobRun().withState("Cancelled"))))); - - runVacuumTestSuite( - testCases, - (mockDS, response) -> { - assertEquals("SUCCESS", response.getStatus()); - assertFalse(flintIndexExists(mockDS.indexName)); - assertFalse(indexDocExists(mockDS.latestId)); - }); - } - - @Test - public void shouldNotVacuumIndexInOtherStates() { - List> testCases = - Lists.cartesianProduct( - FLINT_TEST_DATASETS, - List.of(EMPTY, CREATING, ACTIVE, REFRESHING, VACUUMING), - List.of( - Pair.of( - () -> { - throw new AssertionError("should not call cancelJobRun"); - }, - () -> { - throw new AssertionError("should not call getJobRunResult"); - }))); - - runVacuumTestSuite( - testCases, - (mockDS, response) -> { - assertEquals("FAILED", response.getStatus()); - assertTrue(flintIndexExists(mockDS.indexName)); - assertTrue(indexDocExists(mockDS.latestId)); - }); - } - - private void runVacuumTestSuite( - List> testCases, - BiConsumer assertion) { - testCases.forEach( - params -> { - FlintDatasetMock mockDS = (FlintDatasetMock) params.get(0); - try { - FlintIndexState state = (FlintIndexState) params.get(1); - EMRApiCall cancelJobRun = ((Pair) params.get(2)).getLeft(); - EMRApiCall getJobRunResult = ((Pair) params.get(2)).getRight(); - - AsyncQueryExecutionResponse response = - runVacuumTest(mockDS, state, cancelJobRun, getJobRunResult); - assertion.accept(mockDS, response); - } finally { - // Clean up because we simulate parameterized test in single unit test method - if (flintIndexExists(mockDS.indexName)) { - mockDS.deleteIndex(); - } - if (indexDocExists(mockDS.latestId)) { - deleteIndexDoc(mockDS.latestId); - } - } - }); - } - - private AsyncQueryExecutionResponse runVacuumTest( - FlintDatasetMock mockDS, - FlintIndexState state, - EMRApiCall cancelJobRun, - EMRApiCall getJobRunResult) { - LocalEMRSClient emrsClient = - new LocalEMRSClient() { - @Override - public CancelJobRunResult cancelJobRun( - String applicationId, String jobId, boolean allowExceptionPropagation) { - if (cancelJobRun == DEFAULT_OP) { - return super.cancelJobRun(applicationId, jobId, allowExceptionPropagation); - } - return cancelJobRun.call(); - } - - @Override - public GetJobRunResult getJobRunResult(String applicationId, String jobId) { - if (getJobRunResult == DEFAULT_OP) { - return super.getJobRunResult(applicationId, jobId); - } - return getJobRunResult.call(); - } - }; - EMRServerlessClientFactory emrServerlessClientFactory = () -> emrsClient; - AsyncQueryExecutorService asyncQueryExecutorService = - createAsyncQueryExecutorService(emrServerlessClientFactory); - - // Mock Flint index - mockDS.createIndex(); - - // Mock index state doc - MockFlintSparkJob flintIndexJob = - new MockFlintSparkJob(flintIndexStateModelService, mockDS.latestId, "mys3"); - flintIndexJob.transition(state); - - // Vacuum index - CreateAsyncQueryResponse response = - asyncQueryExecutorService.createAsyncQuery( - new CreateAsyncQueryRequest(mockDS.query, MYS3_DATASOURCE, LangType.SQL, null), - asyncQueryRequestContext); - - return asyncQueryExecutorService.getAsyncQueryResults(response.getQueryId()); - } - - private boolean flintIndexExists(String flintIndexName) { - return client - .admin() - .indices() - .exists(new IndicesExistsRequest(flintIndexName)) - .actionGet() - .isExists(); - } - - private boolean indexDocExists(String docId) { - return client - .get(new GetRequest(OpenSearchStateStoreUtil.getIndexName("mys3"), docId)) - .actionGet() - .isExists(); - } - - private void deleteIndexDoc(String docId) { - client - .delete(new DeleteRequest(OpenSearchStateStoreUtil.getIndexName("mys3"), docId)) - .actionGet(); - } - - private FlintDatasetMock mockDataset(String query, FlintIndexType indexType, String indexName) { - FlintDatasetMock dataset = new FlintDatasetMock(query, "", indexType, indexName); - dataset.latestId(Base64.getEncoder().encodeToString(indexName.getBytes())); - return dataset; - } - - /** - * EMR API call mock interface. - * - * @param API call response type - */ - @FunctionalInterface - public interface EMRApiCall { - V call(); - } -} diff --git a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/model/SparkSubmitParametersTest.java b/spark/src/test/java/org/opensearch/sql/spark/asyncquery/model/SparkSubmitParametersTest.java deleted file mode 100644 index 10f12251b0..0000000000 --- a/spark/src/test/java/org/opensearch/sql/spark/asyncquery/model/SparkSubmitParametersTest.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.spark.asyncquery.model; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.opensearch.sql.spark.data.constants.SparkConstants.HADOOP_CATALOG_CREDENTIALS_PROVIDER_FACTORY_KEY; -import static org.opensearch.sql.spark.data.constants.SparkConstants.SPARK_JARS_KEY; - -import org.junit.jupiter.api.Test; - -public class SparkSubmitParametersTest { - - @Test - public void testBuildWithoutExtraParameters() { - String params = SparkSubmitParameters.builder().build().toString(); - - assertNotNull(params); - } - - @Test - public void testBuildWithExtraParameters() { - String params = - SparkSubmitParameters.builder().extraParameters("--conf A=1").build().toString(); - - // Assert the conf is included with a space - assertTrue(params.endsWith(" --conf A=1")); - } - - @Test - public void testBuildQueryString() { - String rawQuery = "SHOW tables LIKE \"%\";"; - String expectedQueryInParams = "\"SHOW tables LIKE \\\"%\\\";\""; - String params = SparkSubmitParameters.builder().query(rawQuery).build().toString(); - assertTrue(params.contains(expectedQueryInParams)); - } - - @Test - public void testBuildQueryStringNestedQuote() { - String rawQuery = "SELECT '\"1\"'"; - String expectedQueryInParams = "\"SELECT '\\\"1\\\"'\""; - String params = SparkSubmitParameters.builder().query(rawQuery).build().toString(); - assertTrue(params.contains(expectedQueryInParams)); - } - - @Test - public void testBuildQueryStringSpecialCharacter() { - String rawQuery = "SELECT '{\"test ,:+\\\"inner\\\"/\\|?#><\"}'"; - String expectedQueryInParams = "SELECT '{\\\"test ,:+\\\\\\\"inner\\\\\\\"/\\\\|?#><\\\"}'"; - String params = SparkSubmitParameters.builder().query(rawQuery).build().toString(); - assertTrue(params.contains(expectedQueryInParams)); - } - - @Test - public void testOverrideConfigItem() { - SparkSubmitParameters params = SparkSubmitParameters.builder().build(); - params.setConfigItem(SPARK_JARS_KEY, "Overridden"); - String result = params.toString(); - - assertTrue(result.contains(String.format("%s=Overridden", SPARK_JARS_KEY))); - } - - @Test - public void testDeleteConfigItem() { - SparkSubmitParameters params = SparkSubmitParameters.builder().build(); - params.deleteConfigItem(HADOOP_CATALOG_CREDENTIALS_PROVIDER_FACTORY_KEY); - String result = params.toString(); - - assertFalse(result.contains(HADOOP_CATALOG_CREDENTIALS_PROVIDER_FACTORY_KEY)); - } - - @Test - public void testAddConfigItem() { - SparkSubmitParameters params = SparkSubmitParameters.builder().build(); - params.setConfigItem("AdditionalKey", "Value"); - String result = params.toString(); - - assertTrue(result.contains("AdditionalKey=Value")); - } -} diff --git a/spark/src/test/java/org/opensearch/sql/spark/data/type/SparkDataTypeTest.java b/spark/src/test/java/org/opensearch/sql/spark/data/type/SparkDataTypeTest.java new file mode 100644 index 0000000000..ff6cee2a5e --- /dev/null +++ b/spark/src/test/java/org/opensearch/sql/spark/data/type/SparkDataTypeTest.java @@ -0,0 +1,19 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.spark.data.type; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.junit.jupiter.api.Test; + +class SparkDataTypeTest { + @Test + public void testTypeName() { + SparkDataType sparkDataType = new SparkDataType("TYPE_NAME"); + + assertEquals("TYPE_NAME", sparkDataType.typeName()); + } +} diff --git a/spark/src/test/java/org/opensearch/sql/spark/leasemanager/DefaultLeaseManagerTest.java b/spark/src/test/java/org/opensearch/sql/spark/leasemanager/DefaultLeaseManagerTest.java deleted file mode 100644 index 558f7f7b3a..0000000000 --- a/spark/src/test/java/org/opensearch/sql/spark/leasemanager/DefaultLeaseManagerTest.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.spark.leasemanager; - -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; -import org.opensearch.sql.common.setting.Settings; -import org.opensearch.sql.spark.dispatcher.model.JobType; -import org.opensearch.sql.spark.execution.statestore.StateStore; -import org.opensearch.sql.spark.leasemanager.model.LeaseRequest; - -@ExtendWith(MockitoExtension.class) -class DefaultLeaseManagerTest { - @Mock private Settings settings; - - @Mock private StateStore stateStore; - - @Test - public void concurrentSessionRuleOnlyApplyToInteractiveQuery() { - assertTrue( - new DefaultLeaseManager.ConcurrentSessionRule(settings, stateStore) - .test(new LeaseRequest(JobType.BATCH, "mys3"))); - assertTrue( - new DefaultLeaseManager.ConcurrentSessionRule(settings, stateStore) - .test(new LeaseRequest(JobType.STREAMING, "mys3"))); - } - - @Test - public void concurrentRefreshRuleOnlyNotAppliedToInteractiveQuery() { - assertTrue( - new DefaultLeaseManager.ConcurrentRefreshJobRule(settings, stateStore) - .test(new LeaseRequest(JobType.INTERACTIVE, "mys3"))); - } -} diff --git a/spark/src/test/java/org/opensearch/sql/spark/utils/SQLQueryUtilsTest.java b/spark/src/test/java/org/opensearch/sql/spark/utils/SQLQueryUtilsTest.java deleted file mode 100644 index 620d187e52..0000000000 --- a/spark/src/test/java/org/opensearch/sql/spark/utils/SQLQueryUtilsTest.java +++ /dev/null @@ -1,392 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.spark.utils; - -import static org.opensearch.sql.spark.utils.SQLQueryUtilsTest.IndexQuery.index; -import static org.opensearch.sql.spark.utils.SQLQueryUtilsTest.IndexQuery.mv; -import static org.opensearch.sql.spark.utils.SQLQueryUtilsTest.IndexQuery.skippingIndex; - -import lombok.Getter; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.junit.jupiter.MockitoExtension; -import org.opensearch.sql.spark.dispatcher.model.FullyQualifiedTableName; -import org.opensearch.sql.spark.dispatcher.model.IndexQueryActionType; -import org.opensearch.sql.spark.dispatcher.model.IndexQueryDetails; -import org.opensearch.sql.spark.flint.FlintIndexType; - -@ExtendWith(MockitoExtension.class) -public class SQLQueryUtilsTest { - - @Test - void testExtractionOfTableNameFromSQLQueries() { - String sqlQuery = "select * from my_glue.default.http_logs"; - FullyQualifiedTableName fullyQualifiedTableName = - SQLQueryUtils.extractFullyQualifiedTableName(sqlQuery); - Assertions.assertFalse(SQLQueryUtils.isFlintExtensionQuery(sqlQuery)); - Assertions.assertEquals("my_glue", fullyQualifiedTableName.getDatasourceName()); - Assertions.assertEquals("default", fullyQualifiedTableName.getSchemaName()); - Assertions.assertEquals("http_logs", fullyQualifiedTableName.getTableName()); - - sqlQuery = "select * from my_glue.db.http_logs"; - Assertions.assertFalse(SQLQueryUtils.isFlintExtensionQuery(sqlQuery)); - fullyQualifiedTableName = SQLQueryUtils.extractFullyQualifiedTableName(sqlQuery); - Assertions.assertEquals("my_glue", fullyQualifiedTableName.getDatasourceName()); - Assertions.assertEquals("db", fullyQualifiedTableName.getSchemaName()); - Assertions.assertEquals("http_logs", fullyQualifiedTableName.getTableName()); - - sqlQuery = "select * from my_glue.http_logs"; - fullyQualifiedTableName = SQLQueryUtils.extractFullyQualifiedTableName(sqlQuery); - Assertions.assertFalse(SQLQueryUtils.isFlintExtensionQuery(sqlQuery)); - Assertions.assertEquals("my_glue", fullyQualifiedTableName.getSchemaName()); - Assertions.assertNull(fullyQualifiedTableName.getDatasourceName()); - Assertions.assertEquals("http_logs", fullyQualifiedTableName.getTableName()); - - sqlQuery = "select * from http_logs"; - fullyQualifiedTableName = SQLQueryUtils.extractFullyQualifiedTableName(sqlQuery); - Assertions.assertFalse(SQLQueryUtils.isFlintExtensionQuery(sqlQuery)); - Assertions.assertNull(fullyQualifiedTableName.getDatasourceName()); - Assertions.assertNull(fullyQualifiedTableName.getSchemaName()); - Assertions.assertEquals("http_logs", fullyQualifiedTableName.getTableName()); - - sqlQuery = "DROP TABLE myS3.default.alb_logs"; - fullyQualifiedTableName = SQLQueryUtils.extractFullyQualifiedTableName(sqlQuery); - Assertions.assertFalse(SQLQueryUtils.isFlintExtensionQuery(sqlQuery)); - Assertions.assertEquals("myS3", fullyQualifiedTableName.getDatasourceName()); - Assertions.assertEquals("default", fullyQualifiedTableName.getSchemaName()); - Assertions.assertEquals("alb_logs", fullyQualifiedTableName.getTableName()); - - sqlQuery = "DESCRIBE TABLE myS3.default.alb_logs"; - fullyQualifiedTableName = SQLQueryUtils.extractFullyQualifiedTableName(sqlQuery); - Assertions.assertFalse(SQLQueryUtils.isFlintExtensionQuery(sqlQuery)); - Assertions.assertEquals("myS3", fullyQualifiedTableName.getDatasourceName()); - Assertions.assertEquals("default", fullyQualifiedTableName.getSchemaName()); - Assertions.assertEquals("alb_logs", fullyQualifiedTableName.getTableName()); - - sqlQuery = - "CREATE EXTERNAL TABLE\n" - + "myS3.default.alb_logs\n" - + "[ PARTITIONED BY (col_name [, … ] ) ]\n" - + "[ ROW FORMAT DELIMITED row_format ]\n" - + "STORED AS file_format\n" - + "LOCATION { 's3://bucket/folder/' }"; - fullyQualifiedTableName = SQLQueryUtils.extractFullyQualifiedTableName(sqlQuery); - Assertions.assertFalse(SQLQueryUtils.isFlintExtensionQuery(sqlQuery)); - Assertions.assertEquals("myS3", fullyQualifiedTableName.getDatasourceName()); - Assertions.assertEquals("default", fullyQualifiedTableName.getSchemaName()); - Assertions.assertEquals("alb_logs", fullyQualifiedTableName.getTableName()); - } - - @Test - void testErrorScenarios() { - String sqlQuery = "SHOW tables"; - FullyQualifiedTableName fullyQualifiedTableName = - SQLQueryUtils.extractFullyQualifiedTableName(sqlQuery); - Assertions.assertNotNull(fullyQualifiedTableName); - Assertions.assertNull(fullyQualifiedTableName.getFullyQualifiedName()); - Assertions.assertNull(fullyQualifiedTableName.getSchemaName()); - Assertions.assertNull(fullyQualifiedTableName.getTableName()); - Assertions.assertNull(fullyQualifiedTableName.getDatasourceName()); - - sqlQuery = "DESCRIBE TABLE FROM myS3.default.alb_logs"; - fullyQualifiedTableName = SQLQueryUtils.extractFullyQualifiedTableName(sqlQuery); - Assertions.assertFalse(SQLQueryUtils.isFlintExtensionQuery(sqlQuery)); - Assertions.assertEquals("FROM", fullyQualifiedTableName.getFullyQualifiedName()); - Assertions.assertNull(fullyQualifiedTableName.getSchemaName()); - Assertions.assertEquals("FROM", fullyQualifiedTableName.getTableName()); - Assertions.assertNull(fullyQualifiedTableName.getDatasourceName()); - } - - @Test - void testExtractionFromFlintSkippingIndexQueries() { - String[] createSkippingIndexQueries = { - "CREATE SKIPPING INDEX ON myS3.default.alb_logs (l_orderkey VALUE_SET)", - "CREATE SKIPPING INDEX IF NOT EXISTS" - + " ON myS3.default.alb_logs (l_orderkey VALUE_SET) " - + " WITH (auto_refresh = true)", - "CREATE SKIPPING INDEX ON myS3.default.alb_logs(l_orderkey VALUE_SET)" - + " WITH (auto_refresh = true)", - "CREATE SKIPPING INDEX ON myS3.default.alb_logs(l_orderkey VALUE_SET) " - + " WHERE elb_status_code = 500 " - + " WITH (auto_refresh = true)" - }; - - for (String query : createSkippingIndexQueries) { - Assertions.assertTrue(SQLQueryUtils.isFlintExtensionQuery(query), "Failed query: " + query); - IndexQueryDetails indexQueryDetails = SQLQueryUtils.extractIndexDetails(query); - FullyQualifiedTableName fullyQualifiedTableName = - indexQueryDetails.getFullyQualifiedTableName(); - - Assertions.assertNull(indexQueryDetails.getIndexName()); - Assertions.assertEquals("myS3", fullyQualifiedTableName.getDatasourceName()); - Assertions.assertEquals("default", fullyQualifiedTableName.getSchemaName()); - Assertions.assertEquals("alb_logs", fullyQualifiedTableName.getTableName()); - } - } - - @Test - void testExtractionFromFlintCoveringIndexQueries() { - String[] createCoveredIndexQueries = { - "CREATE INDEX elb_and_requestUri ON myS3.default.alb_logs(l_orderkey, l_quantity)", - "CREATE INDEX IF NOT EXISTS elb_and_requestUri " - + " ON myS3.default.alb_logs(l_orderkey, l_quantity) " - + " WITH (auto_refresh = true)", - "CREATE INDEX elb_and_requestUri ON myS3.default.alb_logs(l_orderkey, l_quantity)" - + " WITH (auto_refresh = true)", - "CREATE INDEX elb_and_requestUri ON myS3.default.alb_logs(l_orderkey, l_quantity) " - + " WHERE elb_status_code = 500 " - + " WITH (auto_refresh = true)" - }; - - for (String query : createCoveredIndexQueries) { - Assertions.assertTrue(SQLQueryUtils.isFlintExtensionQuery(query), "Failed query: " + query); - IndexQueryDetails indexQueryDetails = SQLQueryUtils.extractIndexDetails(query); - FullyQualifiedTableName fullyQualifiedTableName = - indexQueryDetails.getFullyQualifiedTableName(); - - Assertions.assertEquals("elb_and_requestUri", indexQueryDetails.getIndexName()); - Assertions.assertEquals("myS3", fullyQualifiedTableName.getDatasourceName()); - Assertions.assertEquals("default", fullyQualifiedTableName.getSchemaName()); - Assertions.assertEquals("alb_logs", fullyQualifiedTableName.getTableName()); - } - } - - @Test - void testExtractionFromFlintMVQuery() { - String createCoveredIndexQuery = - "CREATE MATERIALIZED VIEW mv_1 AS query=select * from my_glue.default.logs WITH" - + " (auto_refresh = true)"; - Assertions.assertTrue(SQLQueryUtils.isFlintExtensionQuery(createCoveredIndexQuery)); - IndexQueryDetails indexQueryDetails = - SQLQueryUtils.extractIndexDetails(createCoveredIndexQuery); - FullyQualifiedTableName fullyQualifiedTableName = - indexQueryDetails.getFullyQualifiedTableName(); - Assertions.assertNull(indexQueryDetails.getIndexName()); - Assertions.assertNull(fullyQualifiedTableName); - Assertions.assertEquals("mv_1", indexQueryDetails.getMvName()); - } - - @Test - void testDescIndex() { - String descSkippingIndex = "DESC SKIPPING INDEX ON mys3.default.http_logs"; - Assertions.assertTrue(SQLQueryUtils.isFlintExtensionQuery(descSkippingIndex)); - IndexQueryDetails indexDetails = SQLQueryUtils.extractIndexDetails(descSkippingIndex); - FullyQualifiedTableName fullyQualifiedTableName = indexDetails.getFullyQualifiedTableName(); - Assertions.assertNull(indexDetails.getIndexName()); - Assertions.assertNotNull(fullyQualifiedTableName); - Assertions.assertEquals(FlintIndexType.SKIPPING, indexDetails.getIndexType()); - Assertions.assertEquals(IndexQueryActionType.DESCRIBE, indexDetails.getIndexQueryActionType()); - - String descCoveringIndex = "DESC INDEX cv1 ON mys3.default.http_logs"; - Assertions.assertTrue(SQLQueryUtils.isFlintExtensionQuery(descCoveringIndex)); - indexDetails = SQLQueryUtils.extractIndexDetails(descCoveringIndex); - fullyQualifiedTableName = indexDetails.getFullyQualifiedTableName(); - Assertions.assertEquals("cv1", indexDetails.getIndexName()); - Assertions.assertNotNull(fullyQualifiedTableName); - Assertions.assertEquals(FlintIndexType.COVERING, indexDetails.getIndexType()); - Assertions.assertEquals(IndexQueryActionType.DESCRIBE, indexDetails.getIndexQueryActionType()); - - String descMv = "DESC MATERIALIZED VIEW mv1"; - Assertions.assertTrue(SQLQueryUtils.isFlintExtensionQuery(descMv)); - indexDetails = SQLQueryUtils.extractIndexDetails(descMv); - fullyQualifiedTableName = indexDetails.getFullyQualifiedTableName(); - Assertions.assertNull(indexDetails.getIndexName()); - Assertions.assertEquals("mv1", indexDetails.getMvName()); - Assertions.assertNull(fullyQualifiedTableName); - Assertions.assertEquals(FlintIndexType.MATERIALIZED_VIEW, indexDetails.getIndexType()); - Assertions.assertEquals(IndexQueryActionType.DESCRIBE, indexDetails.getIndexQueryActionType()); - } - - @Test - void testShowIndex() { - String showCoveringIndex = " SHOW INDEX ON myS3.default.http_logs"; - Assertions.assertTrue(SQLQueryUtils.isFlintExtensionQuery(showCoveringIndex)); - IndexQueryDetails indexDetails = SQLQueryUtils.extractIndexDetails(showCoveringIndex); - FullyQualifiedTableName fullyQualifiedTableName = indexDetails.getFullyQualifiedTableName(); - Assertions.assertNull(indexDetails.getIndexName()); - Assertions.assertNull(indexDetails.getMvName()); - Assertions.assertNotNull(fullyQualifiedTableName); - Assertions.assertEquals(FlintIndexType.COVERING, indexDetails.getIndexType()); - Assertions.assertEquals(IndexQueryActionType.SHOW, indexDetails.getIndexQueryActionType()); - - String showMV = "SHOW MATERIALIZED VIEW IN my_glue.default"; - Assertions.assertTrue(SQLQueryUtils.isFlintExtensionQuery(showMV)); - indexDetails = SQLQueryUtils.extractIndexDetails(showMV); - fullyQualifiedTableName = indexDetails.getFullyQualifiedTableName(); - Assertions.assertNull(indexDetails.getIndexName()); - Assertions.assertNull(indexDetails.getMvName()); - Assertions.assertNull(fullyQualifiedTableName); - Assertions.assertEquals(FlintIndexType.MATERIALIZED_VIEW, indexDetails.getIndexType()); - Assertions.assertEquals(IndexQueryActionType.SHOW, indexDetails.getIndexQueryActionType()); - } - - @Test - void testRefreshIndex() { - String refreshSkippingIndex = "REFRESH SKIPPING INDEX ON mys3.default.http_logs"; - Assertions.assertTrue(SQLQueryUtils.isFlintExtensionQuery(refreshSkippingIndex)); - IndexQueryDetails indexDetails = SQLQueryUtils.extractIndexDetails(refreshSkippingIndex); - FullyQualifiedTableName fullyQualifiedTableName = indexDetails.getFullyQualifiedTableName(); - Assertions.assertNull(indexDetails.getIndexName()); - Assertions.assertNotNull(fullyQualifiedTableName); - Assertions.assertEquals(FlintIndexType.SKIPPING, indexDetails.getIndexType()); - Assertions.assertEquals(IndexQueryActionType.REFRESH, indexDetails.getIndexQueryActionType()); - - String refreshCoveringIndex = "REFRESH INDEX cv1 ON mys3.default.http_logs"; - Assertions.assertTrue(SQLQueryUtils.isFlintExtensionQuery(refreshCoveringIndex)); - indexDetails = SQLQueryUtils.extractIndexDetails(refreshCoveringIndex); - fullyQualifiedTableName = indexDetails.getFullyQualifiedTableName(); - Assertions.assertEquals("cv1", indexDetails.getIndexName()); - Assertions.assertNotNull(fullyQualifiedTableName); - Assertions.assertEquals(FlintIndexType.COVERING, indexDetails.getIndexType()); - Assertions.assertEquals(IndexQueryActionType.REFRESH, indexDetails.getIndexQueryActionType()); - - String refreshMV = "REFRESH MATERIALIZED VIEW mv1"; - Assertions.assertTrue(SQLQueryUtils.isFlintExtensionQuery(refreshMV)); - indexDetails = SQLQueryUtils.extractIndexDetails(refreshMV); - fullyQualifiedTableName = indexDetails.getFullyQualifiedTableName(); - Assertions.assertNull(indexDetails.getIndexName()); - Assertions.assertEquals("mv1", indexDetails.getMvName()); - Assertions.assertNull(fullyQualifiedTableName); - Assertions.assertEquals(FlintIndexType.MATERIALIZED_VIEW, indexDetails.getIndexType()); - Assertions.assertEquals(IndexQueryActionType.REFRESH, indexDetails.getIndexQueryActionType()); - } - - /** https://github.com/opensearch-project/sql/issues/2206 */ - @Test - void testAutoRefresh() { - Assertions.assertFalse( - SQLQueryUtils.extractIndexDetails(skippingIndex().getQuery()) - .getFlintIndexOptions() - .autoRefresh()); - - Assertions.assertFalse( - SQLQueryUtils.extractIndexDetails( - skippingIndex().withProperty("auto_refresh", "false").getQuery()) - .getFlintIndexOptions() - .autoRefresh()); - - Assertions.assertTrue( - SQLQueryUtils.extractIndexDetails( - skippingIndex().withProperty("auto_refresh", "true").getQuery()) - .getFlintIndexOptions() - .autoRefresh()); - - Assertions.assertTrue( - SQLQueryUtils.extractIndexDetails( - skippingIndex().withProperty("auto_refresh", "true").withSemicolon().getQuery()) - .getFlintIndexOptions() - .autoRefresh()); - - Assertions.assertTrue( - SQLQueryUtils.extractIndexDetails( - skippingIndex().withProperty("\"auto_refresh\"", "true").getQuery()) - .getFlintIndexOptions() - .autoRefresh()); - - Assertions.assertTrue( - SQLQueryUtils.extractIndexDetails( - skippingIndex().withProperty("\"auto_refresh\"", "true").withSemicolon().getQuery()) - .getFlintIndexOptions() - .autoRefresh()); - - Assertions.assertTrue( - SQLQueryUtils.extractIndexDetails( - skippingIndex().withProperty("\"auto_refresh\"", "\"true\"").getQuery()) - .getFlintIndexOptions() - .autoRefresh()); - - Assertions.assertTrue( - SQLQueryUtils.extractIndexDetails( - skippingIndex() - .withProperty("\"auto_refresh\"", "\"true\"") - .withSemicolon() - .getQuery()) - .getFlintIndexOptions() - .autoRefresh()); - - Assertions.assertFalse( - SQLQueryUtils.extractIndexDetails( - skippingIndex().withProperty("auto_refresh", "1").getQuery()) - .getFlintIndexOptions() - .autoRefresh()); - - Assertions.assertFalse( - SQLQueryUtils.extractIndexDetails(skippingIndex().withProperty("interval", "1").getQuery()) - .getFlintIndexOptions() - .autoRefresh()); - - Assertions.assertFalse( - SQLQueryUtils.extractIndexDetails( - skippingIndex().withProperty("\"\"", "\"true\"").getQuery()) - .getFlintIndexOptions() - .autoRefresh()); - - Assertions.assertFalse( - SQLQueryUtils.extractIndexDetails(index().getQuery()).getFlintIndexOptions().autoRefresh()); - - Assertions.assertFalse( - SQLQueryUtils.extractIndexDetails(index().withProperty("auto_refresh", "false").getQuery()) - .getFlintIndexOptions() - .autoRefresh()); - - Assertions.assertTrue( - SQLQueryUtils.extractIndexDetails(index().withProperty("auto_refresh", "true").getQuery()) - .getFlintIndexOptions() - .autoRefresh()); - - Assertions.assertTrue( - SQLQueryUtils.extractIndexDetails( - index().withProperty("auto_refresh", "true").withSemicolon().getQuery()) - .getFlintIndexOptions() - .autoRefresh()); - - Assertions.assertTrue( - SQLQueryUtils.extractIndexDetails(mv().withProperty("auto_refresh", "true").getQuery()) - .getFlintIndexOptions() - .autoRefresh()); - - Assertions.assertTrue( - SQLQueryUtils.extractIndexDetails( - mv().withProperty("auto_refresh", "true").withSemicolon().getQuery()) - .getFlintIndexOptions() - .autoRefresh()); - } - - @Getter - protected static class IndexQuery { - private String query; - - private IndexQuery(String query) { - this.query = query; - } - - public static IndexQuery skippingIndex() { - return new IndexQuery( - "CREATE SKIPPING INDEX ON myS3.default.alb_logs" + "(l_orderkey VALUE_SET)"); - } - - public static IndexQuery index() { - return new IndexQuery( - "CREATE INDEX elb_and_requestUri ON myS3.default.alb_logs(l_orderkey, " + "l_quantity)"); - } - - public static IndexQuery mv() { - return new IndexQuery( - "CREATE MATERIALIZED VIEW mv_1 AS query=select * from my_glue.default.logs"); - } - - public IndexQuery withProperty(String key, String value) { - query = String.format("%s with (%s = %s)", query, key, value); - return this; - } - - public IndexQuery withSemicolon() { - query += ";"; - return this; - } - } -} diff --git a/spark/src/test/java/org/opensearch/sql/spark/utils/TestUtils.java b/spark/src/test/java/org/opensearch/sql/spark/utils/TestUtils.java index 4cab6afa9c..4336b13aa9 100644 --- a/spark/src/test/java/org/opensearch/sql/spark/utils/TestUtils.java +++ b/spark/src/test/java/org/opensearch/sql/spark/utils/TestUtils.java @@ -5,41 +5,13 @@ package org.opensearch.sql.spark.utils; -import com.google.common.base.Charsets; -import com.google.common.io.Resources; import java.io.IOException; -import java.net.URL; import java.util.Objects; -import lombok.SneakyThrows; -import org.opensearch.action.admin.indices.create.CreateIndexRequest; -import org.opensearch.client.Client; -import org.opensearch.common.xcontent.XContentType; public class TestUtils { - - /** - * Get Json document from the files in resources folder. - * - * @param filename filename. - * @return String. - * @throws IOException IOException. - */ public static String getJson(String filename) throws IOException { ClassLoader classLoader = TestUtils.class.getClassLoader(); return new String( Objects.requireNonNull(classLoader.getResourceAsStream(filename)).readAllBytes()); } - - @SneakyThrows - public static String loadMappings(String path) { - URL url = Resources.getResource(path); - return Resources.toString(url, Charsets.UTF_8); - } - - public static void createIndexWithMappings( - Client client, String indexName, String metadataFileLocation) { - CreateIndexRequest request = new CreateIndexRequest(indexName); - request.mapping(loadMappings(metadataFileLocation), XContentType.JSON); - client.admin().indices().create(request).actionGet(); - } } diff --git a/sql/build.gradle b/sql/build.gradle index 81872e6035..10bb4b24bb 100644 --- a/sql/build.gradle +++ b/sql/build.gradle @@ -46,7 +46,7 @@ dependencies { antlr "org.antlr:antlr4:4.7.1" implementation "org.antlr:antlr4-runtime:4.7.1" - implementation group: 'com.google.guava', name: 'guava', version: '32.0.1-jre' + implementation group: 'com.google.guava', name: 'guava', version: "${guava_version}" implementation group: 'org.json', name: 'json', version:'20231013' implementation project(':common') implementation project(':core') diff --git a/sql/src/main/java/org/opensearch/sql/sql/domain/SQLQueryRequest.java b/sql/src/main/java/org/opensearch/sql/sql/domain/SQLQueryRequest.java index 4e902cb67d..3714b443f5 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/domain/SQLQueryRequest.java +++ b/sql/src/main/java/org/opensearch/sql/sql/domain/SQLQueryRequest.java @@ -10,6 +10,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.function.Predicate; import java.util.stream.Stream; import lombok.EqualsAndHashCode; import lombok.Getter; @@ -29,6 +30,7 @@ public class SQLQueryRequest { Set.of("query", "fetch_size", "parameters", QUERY_FIELD_CURSOR); private static final String QUERY_PARAMS_FORMAT = "format"; private static final String QUERY_PARAMS_SANITIZE = "sanitize"; + private static final String QUERY_PARAMS_PRETTY = "pretty"; /** JSON payload in REST request. */ private final JSONObject jsonContent; @@ -49,6 +51,10 @@ public class SQLQueryRequest { @Accessors(fluent = true) private boolean sanitize = true; + @Getter + @Accessors(fluent = true) + private boolean pretty = false; + private String cursor; /** Constructor of SQLQueryRequest that passes request params. */ @@ -64,6 +70,7 @@ public SQLQueryRequest( this.params = params; this.format = getFormat(params); this.sanitize = shouldSanitize(params); + this.pretty = shouldPretty(params); this.cursor = cursor; } @@ -79,19 +86,21 @@ public SQLQueryRequest( * @return true if supported. */ public boolean isSupported() { - var noCursor = !isCursor(); - var noQuery = query == null; - var noUnsupportedParams = - params.isEmpty() || (params.size() == 1 && params.containsKey(QUERY_PARAMS_FORMAT)); - var noContent = jsonContent == null || jsonContent.isEmpty(); - - return ((!noCursor - && noQuery - && noUnsupportedParams - && noContent) // if cursor is given, but other things - || (noCursor && !noQuery)) // or if cursor is not given, but query - && isOnlySupportedFieldInPayload() // and request has supported fields only - && isSupportedFormat(); // and request is in supported format + boolean hasCursor = isCursor(); + boolean hasQuery = query != null; + boolean hasContent = jsonContent != null && !jsonContent.isEmpty(); + + Predicate supportedParams = Set.of(QUERY_PARAMS_FORMAT, QUERY_PARAMS_PRETTY)::contains; + boolean hasUnsupportedParams = + (!params.isEmpty()) + && params.keySet().stream().dropWhile(supportedParams).findAny().isPresent(); + + boolean validCursor = hasCursor && !hasQuery && !hasUnsupportedParams && !hasContent; + boolean validQuery = !hasCursor && hasQuery; + + return (validCursor || validQuery) // It's a valid cursor or a valid query + && isOnlySupportedFieldInPayload() // and request must contain supported fields only + && isSupportedFormat(); // and request must be a supported format } private boolean isCursor() { @@ -148,4 +157,11 @@ private boolean shouldSanitize(Map params) { } return true; } + + private boolean shouldPretty(Map params) { + if (params.containsKey(QUERY_PARAMS_PRETTY)) { + return Boolean.parseBoolean(params.get(QUERY_PARAMS_PRETTY)); + } + return false; + } } diff --git a/sql/src/test/java/org/opensearch/sql/sql/SQLServiceTest.java b/sql/src/test/java/org/opensearch/sql/sql/SQLServiceTest.java index 8cb2994dc3..dc920b248f 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/SQLServiceTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/SQLServiceTest.java @@ -36,9 +36,9 @@ @DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) class SQLServiceTest { - private static String QUERY = "/_plugins/_sql"; + private static final String QUERY = "/_plugins/_sql"; - private static String EXPLAIN = "/_plugins/_sql/_explain"; + private static final String EXPLAIN = "/_plugins/_sql/_explain"; private SQLService sqlService; @@ -127,6 +127,45 @@ public void onFailure(Exception e) { }); } + @Test + public void can_execute_raw_format_request() { + sqlService.execute( + new SQLQueryRequest(new JSONObject(), "SELECT 123", QUERY, "raw"), + new ResponseListener() { + @Override + public void onResponse(QueryResponse response) { + assertNotNull(response); + } + + @Override + public void onFailure(Exception e) { + fail(e); + } + }); + } + + @Test + public void can_execute_pretty_raw_format_request() { + sqlService.execute( + new SQLQueryRequest( + new JSONObject(), + "SELECT 123", + QUERY, + Map.of("format", "jdbc", "pretty", "true"), + "n:cursor"), + new ResponseListener() { + @Override + public void onResponse(QueryResponse response) { + assertNotNull(response); + } + + @Override + public void onFailure(Exception e) { + fail(e); + } + }); + } + @Test public void can_explain_sql_query() { doAnswer( diff --git a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java index f68c27deea..c43044508b 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLSyntaxParserTest.java @@ -719,6 +719,22 @@ public void canParseMultiMatchAlternateSyntax() { assertNotNull(parser.parse("SELECT * FROM test WHERE Field = multimatch(\"query\")")); } + @Test + public void canParseIfFunction() { + assertNotNull(parser.parse("SELECT IF(1 > 2, 1, 0)")); + assertNotNull(parser.parse("SELECT IF(1 < 2, 1, 0)")); + assertNotNull(parser.parse("SELECT IF(1 >= 2, 1, 0)")); + assertNotNull(parser.parse("SELECT IF(1 <= 2, 1, 0)")); + assertNotNull(parser.parse("SELECT IF(1 <> 2, 1, 0)")); + assertNotNull(parser.parse("SELECT IF(1 != 2, 1, 0)")); + assertNotNull(parser.parse("SELECT IF(1 = 2, 1, 0)")); + assertNotNull(parser.parse("SELECT IF(true, 1, 0)")); + assertNotNull(parser.parse("SELECT IF(1 IS NOT NULL, 1, 0)")); + assertNotNull(parser.parse("SELECT IF(NOT 1 > 2, 1, 0)")); + assertNotNull(parser.parse("SELECT IF(NOT 1 IN (0, 1), 1, 0)")); + assertNotNull(parser.parse("SELECT IF(NOT 1 IN (0, 1) OR 1 IS NOT NULL, 1, 0)")); + } + private static Stream matchPhraseQueryComplexQueries() { return Stream.of( "SELECT * FROM t WHERE matchphrasequery(c, 3)", diff --git a/sql/src/test/java/org/opensearch/sql/sql/domain/SQLQueryRequestTest.java b/sql/src/test/java/org/opensearch/sql/sql/domain/SQLQueryRequestTest.java index 2b64b13b35..b569a89a2e 100644 --- a/sql/src/test/java/org/opensearch/sql/sql/domain/SQLQueryRequestTest.java +++ b/sql/src/test/java/org/opensearch/sql/sql/domain/SQLQueryRequestTest.java @@ -106,6 +106,42 @@ public void should_support_cursor_request() { () -> assertTrue(cursorRequest.isSupported())); } + @Test + public void should_support_cursor_request_with_supported_parameters() { + SQLQueryRequest fetchSizeRequest = + SQLQueryRequestBuilder.request("SELECT 1") + .jsonContent("{\"query\": \"SELECT 1\", \"fetch_size\": 5}") + .build(); + + SQLQueryRequest cursorRequest = + SQLQueryRequestBuilder.request(null) + .cursor("abcdefgh...") + .params(Map.of("format", "csv", "pretty", "true")) + .build(); + + assertAll( + () -> assertTrue(fetchSizeRequest.isSupported()), + () -> assertTrue(cursorRequest.isSupported())); + } + + @Test + public void should_not_support_cursor_request_with_unsupported_parameters() { + SQLQueryRequest fetchSizeRequest = + SQLQueryRequestBuilder.request("SELECT 1") + .jsonContent("{\"query\": \"SELECT 1\", \"fetch_size\": 5}") + .build(); + + SQLQueryRequest cursorRequest = + SQLQueryRequestBuilder.request(null) + .cursor("abcdefgh...") + .params(Map.of("one", "two")) + .build(); + + assertAll( + () -> assertTrue(fetchSizeRequest.isSupported()), + () -> assertFalse(cursorRequest.isSupported())); + } + @Test public void should_support_cursor_close_request() { SQLQueryRequest closeRequest =