Skip to content

Commit

Permalink
json-valid PPL function (opensearch-project#3230)
Browse files Browse the repository at this point in the history
Add json-valid PPL function (opensearch-project#3230)

---------

Signed-off-by: Kenrick Yap <[email protected]>
Signed-off-by: Kenrick Yap <[email protected]>
Signed-off-by: kenrickyap <[email protected]>
Signed-off-by: Andrew Carbonetto <[email protected]>
Co-authored-by: Kenrick Yap <[email protected]>
Co-authored-by: Andrew Carbonetto <[email protected]>
  • Loading branch information
3 people authored Jan 16, 2025
1 parent e7be8ca commit d5806cc
Show file tree
Hide file tree
Showing 19 changed files with 279 additions and 3 deletions.
4 changes: 4 additions & 0 deletions core/src/main/java/org/opensearch/sql/expression/DSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,10 @@ public static FunctionExpression notLike(Expression... expressions) {
return compile(FunctionProperties.None, BuiltinFunctionName.NOT_LIKE, expressions);
}

public static FunctionExpression jsonValid(Expression... expressions) {
return compile(FunctionProperties.None, BuiltinFunctionName.JSON_VALID, expressions);
}

public static Aggregator avg(Expression... expressions) {
return aggregate(BuiltinFunctionName.AVG, expressions);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,9 @@ public enum BuiltinFunctionName {
TRIM(FunctionName.of("trim")),
UPPER(FunctionName.of("upper")),

/** Json Functions. */
JSON_VALID(FunctionName.of("json_valid")),

/** NULL Test. */
IS_NULL(FunctionName.of("is null")),
IS_NOT_NULL(FunctionName.of("is not null")),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.opensearch.sql.expression.datetime.DateTimeFunctions;
import org.opensearch.sql.expression.datetime.IntervalClause;
import org.opensearch.sql.expression.ip.IPFunctions;
import org.opensearch.sql.expression.json.JsonFunctions;
import org.opensearch.sql.expression.operator.arthmetic.ArithmeticFunctions;
import org.opensearch.sql.expression.operator.arthmetic.MathematicalFunctions;
import org.opensearch.sql.expression.operator.convert.TypeCastOperators;
Expand Down Expand Up @@ -83,6 +84,7 @@ public static synchronized BuiltinFunctionRepository getInstance() {
SystemFunctions.register(instance);
OpenSearchFunctions.register(instance);
IPFunctions.register(instance);
JsonFunctions.register(instance);
}
return instance;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.expression.json;

import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN;
import static org.opensearch.sql.data.type.ExprCoreType.STRING;
import static org.opensearch.sql.expression.function.FunctionDSL.define;
import static org.opensearch.sql.expression.function.FunctionDSL.impl;

import lombok.experimental.UtilityClass;
import org.opensearch.sql.expression.function.BuiltinFunctionName;
import org.opensearch.sql.expression.function.BuiltinFunctionRepository;
import org.opensearch.sql.expression.function.DefaultFunctionResolver;
import org.opensearch.sql.utils.JsonUtils;

@UtilityClass
public class JsonFunctions {
public void register(BuiltinFunctionRepository repository) {
repository.register(jsonValid());
}

private DefaultFunctionResolver jsonValid() {
return define(
BuiltinFunctionName.JSON_VALID.getName(), impl(JsonUtils::isValidJson, BOOLEAN, STRING));
}
}
31 changes: 31 additions & 0 deletions core/src/main/java/org/opensearch/sql/utils/JsonUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package org.opensearch.sql.utils;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.experimental.UtilityClass;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.model.ExprValueUtils;

@UtilityClass
public class JsonUtils {
/**
* Checks if given JSON string can be parsed as valid JSON.
*
* @param jsonExprValue JSON string (e.g. "{\"hello\": \"world\"}").
* @return true if the string can be parsed as valid JSON, else false.
*/
public static ExprValue isValidJson(ExprValue jsonExprValue) {
ObjectMapper objectMapper = new ObjectMapper();

if (jsonExprValue.isNull() || jsonExprValue.isMissing()) {
return ExprValueUtils.LITERAL_FALSE;
}

try {
objectMapper.readTree(jsonExprValue.stringValue());
return ExprValueUtils.LITERAL_TRUE;
} catch (JsonProcessingException e) {
return ExprValueUtils.LITERAL_FALSE;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.expression.json;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_FALSE;
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_MISSING;
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_NULL;
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.junit.jupiter.MockitoExtension;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.model.ExprValueUtils;
import org.opensearch.sql.exception.ExpressionEvaluationException;
import org.opensearch.sql.expression.DSL;
import org.opensearch.sql.expression.FunctionExpression;

@ExtendWith(MockitoExtension.class)
public class JsonFunctionsTest {
private static final ExprValue JsonNestedObject =
ExprValueUtils.stringValue("{\"a\":\"1\",\"b\":{\"c\":\"2\",\"d\":\"3\"}}");
private static final ExprValue JsonObject =
ExprValueUtils.stringValue("{\"a\":\"1\",\"b\":\"2\"}");
private static final ExprValue JsonArray = ExprValueUtils.stringValue("[1, 2, 3, 4]");
private static final ExprValue JsonScalarString = ExprValueUtils.stringValue("\"abc\"");
private static final ExprValue JsonEmptyString = ExprValueUtils.stringValue("");
private static final ExprValue JsonInvalidObject =
ExprValueUtils.stringValue("{\"invalid\":\"json\", \"string\"}");
private static final ExprValue JsonInvalidScalar = ExprValueUtils.stringValue("abc");

@Test
public void json_valid_returns_false() {
assertEquals(LITERAL_FALSE, execute(JsonInvalidObject));
assertEquals(LITERAL_FALSE, execute(JsonInvalidScalar));
assertEquals(LITERAL_FALSE, execute(LITERAL_NULL));
assertEquals(LITERAL_FALSE, execute(LITERAL_MISSING));
}

@Test
public void json_valid_throws_ExpressionEvaluationException() {
assertThrows(
ExpressionEvaluationException.class, () -> execute(ExprValueUtils.booleanValue(true)));
}

@Test
public void json_valid_returns_true() {
assertEquals(LITERAL_TRUE, execute(JsonNestedObject));
assertEquals(LITERAL_TRUE, execute(JsonObject));
assertEquals(LITERAL_TRUE, execute(JsonArray));
assertEquals(LITERAL_TRUE, execute(JsonScalarString));
assertEquals(LITERAL_TRUE, execute(JsonEmptyString));
}

private ExprValue execute(ExprValue jsonString) {
FunctionExpression exp = DSL.jsonValid(DSL.literal(jsonString));
return exp.valueOf();
}
}
1 change: 1 addition & 0 deletions docs/category.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"user/ppl/functions/datetime.rst",
"user/ppl/functions/expressions.rst",
"user/ppl/functions/ip.rst",
"user/ppl/functions/json.rst",
"user/ppl/functions/math.rst",
"user/ppl/functions/relevance.rst",
"user/ppl/functions/string.rst"
Expand Down
3 changes: 2 additions & 1 deletion docs/user/dql/metadata.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Example 1: Show All Indices Information
SQL query::

os> SHOW TABLES LIKE '%'
fetched rows / total rows = 10/10
fetched rows / total rows = 11/11
+----------------+-------------+-----------------+------------+---------+----------+------------+-----------+---------------------------+----------------+
| TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION |
|----------------+-------------+-----------------+------------+---------+----------+------------+-----------+---------------------------+----------------|
Expand All @@ -44,6 +44,7 @@ SQL query::
| docTestCluster | null | accounts | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | apache | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | books | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | json_test | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | nested | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | nyc_taxi | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | people | BASE TABLE | null | null | null | null | null | null |
Expand Down
36 changes: 36 additions & 0 deletions docs/user/ppl/functions/json.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
====================
JSON Functions
====================

.. rubric:: Table of contents

.. contents::
:local:
:depth: 1

JSON_VALID
----------

Description
>>>>>>>>>>>

Usage: `json_valid(json_string)` checks if `json_string` is a valid JSON-encoded string.

Argument type: STRING

Return type: BOOLEAN

Example::

> source=json_test | eval is_valid = json_valid(json_string) | fields test_name, json_string, is_valid
fetched rows / total rows = 6/6
+---------------------+---------------------------------+----------+
| test_name | json_string | is_valid |
|---------------------|---------------------------------|----------|
| json nested object | {"a":"1","b":{"c":"2","d":"3"}} | True |
| json object | {"a":"1","b":"2"} | True |
| json array | [1, 2, 3, 4] | True |
| json scalar string | "abc" | True |
| json empty string | | True |
| json invalid object | {"invalid":"json", "string"} | False |
+---------------------+---------------------------------+----------+
6 changes: 6 additions & 0 deletions doctest/test_data/json_test.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{"test_name":"json nested object", "json_string":"{\"a\":\"1\",\"b\":{\"c\":\"2\",\"d\":\"3\"}}"}
{"test_name":"json object", "json_string":"{\"a\":\"1\",\"b\":\"2\"}"}
{"test_name":"json array", "json_string":"[1, 2, 3, 4]"}
{"test_name":"json scalar string", "json_string":"\"abc\""}
{"test_name":"json empty string","json_string":""}
{"test_name":"json invalid object", "json_string":"{\"invalid\":\"json\", \"string\"}"}
4 changes: 3 additions & 1 deletion doctest/test_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
NESTED = "nested"
DATASOURCES = ".ql-datasources"
WEBLOGS = "weblogs"
JSON_TEST = "json_test"

class DocTestConnection(OpenSearchConnection):

Expand Down Expand Up @@ -123,6 +124,7 @@ def set_up_test_indices(test):
load_file("nested_objects.json", index_name=NESTED)
load_file("datasources.json", index_name=DATASOURCES)
load_file("weblogs.json", index_name=WEBLOGS)
load_file("json_test.json", index_name=JSON_TEST)


def load_file(filename, index_name):
Expand Down Expand Up @@ -151,7 +153,7 @@ def set_up(test):

def tear_down(test):
# drop leftover tables after each test
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED, WEBLOGS], ignore_unavailable=True)
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD, NESTED, WEBLOGS, JSON_TEST], ignore_unavailable=True)


docsuite = partial(doctest.DocFileSuite,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import static org.opensearch.sql.legacy.TestUtils.getGameOfThronesIndexMapping;
import static org.opensearch.sql.legacy.TestUtils.getGeopointIndexMapping;
import static org.opensearch.sql.legacy.TestUtils.getJoinTypeIndexMapping;
import static org.opensearch.sql.legacy.TestUtils.getJsonTestIndexMapping;
import static org.opensearch.sql.legacy.TestUtils.getLocationIndexMapping;
import static org.opensearch.sql.legacy.TestUtils.getMappingFile;
import static org.opensearch.sql.legacy.TestUtils.getNestedSimpleIndexMapping;
Expand Down Expand Up @@ -745,7 +746,12 @@ public enum Index {
TestsConstants.TEST_INDEX_GEOPOINT,
"dates",
getGeopointIndexMapping(),
"src/test/resources/geopoints.json");
"src/test/resources/geopoints.json"),
JSON_TEST(
TestsConstants.TEST_INDEX_JSON_TEST,
"json",
getJsonTestIndexMapping(),
"src/test/resources/json_test.json");

private final String name;
private final String type;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,11 @@ public static String getGeopointIndexMapping() {
return getMappingFile(mappingFile);
}

public static String getJsonTestIndexMapping() {
String mappingFile = "json_test_index_mapping.json";
return getMappingFile(mappingFile);
}

public static void loadBulk(Client client, String jsonPath, String defaultIndex)
throws Exception {
System.out.println(String.format("Loading file %s into opensearch cluster", jsonPath));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ public class TestsConstants {
public static final String TEST_INDEX_MULTI_NESTED_TYPE = TEST_INDEX + "_multi_nested";
public static final String TEST_INDEX_NESTED_WITH_NULLS = TEST_INDEX + "_nested_with_nulls";
public static final String TEST_INDEX_GEOPOINT = TEST_INDEX + "_geopoint";
public static final String TEST_INDEX_JSON_TEST = TEST_INDEX + "_json_test";
public static final String DATASOURCES = ".ql-datasources";

public static final String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.ppl;

import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_JSON_TEST;
import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.schema;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
import static org.opensearch.sql.util.MatcherUtils.verifySchema;

import java.io.IOException;
import org.json.JSONObject;
import org.junit.jupiter.api.Test;

public class JsonFunctionsIT extends PPLIntegTestCase {
@Override
public void init() throws IOException {
loadIndex(Index.JSON_TEST);
}

@Test
public void test_json_valid() throws IOException {
JSONObject result;

result =
executeQuery(
String.format(
"source=%s | where json_valid(json_string) | fields test_name",
TEST_INDEX_JSON_TEST));
verifySchema(result, schema("test_name", null, "string"));
verifyDataRows(
result,
rows("json nested object"),
rows("json object"),
rows("json array"),
rows("json scalar string"),
rows("json empty string"));
}

@Test
public void test_not_json_valid() throws IOException {
JSONObject result;

result =
executeQuery(
String.format(
"source=%s | where not json_valid(json_string) | fields test_name",
TEST_INDEX_JSON_TEST));
verifySchema(result, schema("test_name", null, "string"));
verifyDataRows(result, rows("json invalid object"), rows("json null"));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"mappings": {
"properties": {
"test_name": {
"type": "keyword"
},
"json_string": {
"type": "keyword"
}
}
}
}
14 changes: 14 additions & 0 deletions integ-test/src/test/resources/json_test.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{"index":{"_id":"0"}}
{"test_name":"json nested object", "json_string":"{\"a\":\"1\",\"b\":{\"c\":\"2\",\"d\":\"3\"}}"}
{"index":{"_id":"1"}}
{"test_name":"json object", "json_string":"{\"a\":\"1\",\"b\":\"2\"}"}
{"index":{"_id":"2"}}
{"test_name":"json array", "json_string":"[1, 2, 3, 4]"}
{"index":{"_id":"3"}}
{"test_name":"json scalar string", "json_string":"\"abc\""}
{"index":{"_id":"4"}}
{"test_name":"json empty string","json_string":""}
{"index":{"_id":"5"}}
{"test_name":"json invalid object", "json_string":"{\"invalid\":\"json\", \"string\"}"}
{"index":{"_id":"6"}}
{"test_name":"json null", "json_string":null}
3 changes: 3 additions & 0 deletions ppl/src/main/antlr/OpenSearchPPLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,9 @@ ISNULL: 'ISNULL';
ISNOTNULL: 'ISNOTNULL';
CIDRMATCH: 'CIDRMATCH';

// JSON FUNCTIONS
JSON_VALID: 'JSON_VALID';

// FLOWCONTROL FUNCTIONS
IFNULL: 'IFNULL';
NULLIF: 'NULLIF';
Expand Down
Loading

0 comments on commit d5806cc

Please sign in to comment.