Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing create index and use case input parsing bugs #600

Merged
merged 4 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public enum DefaultUseCases {
COHERE_EMBEDDING_MODEL_DEPLOY(
"cohere-embedding_model_deploy",
"defaults/cohere-embedding-defaults.json",
"substitutionTemplates/deploy-remote-model-template-extra-params.json"
"substitutionTemplates/deploy-remote-model-extra-params-template.json"
),
/** defaults file and substitution ready template for Bedrock Titan embedding model */
BEDROCK_TITAN_EMBEDDING_MODEL_DEPLOY(
Expand Down
60 changes: 38 additions & 22 deletions src/main/java/org/opensearch/flowframework/util/ParseUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
// Matches ${{ foo.bar }} (whitespace optional) with capturing groups 1=foo, 2=bar
// private static final Pattern SUBSTITUTION_PATTERN = Pattern.compile("\\$\\{\\{\\s*(.+)\\.(.+?)\\s*\\}\\}");
private static final Pattern SUBSTITUTION_PATTERN = Pattern.compile("\\$\\{\\{\\s*([\\w_]+)\\.([\\w_]+)\\s*\\}\\}");
private static final Pattern JSON_ARRAY_DOUBLE_QUOTES_PATTERN = Pattern.compile("\"\\[(.*?)]\"");

private ParseUtils() {}

Expand All @@ -70,7 +71,7 @@
* @param json the json string
* @return The XContent parser for the json string
* @throws IOException on failure to create the parser
*/
*/
public static XContentParser jsonToParser(String json) throws IOException {
XContentParser parser = JsonXContent.jsonXContent.createParser(
NamedXContentRegistry.EMPTY,
Expand Down Expand Up @@ -104,7 +105,7 @@
* Builds an XContent object representing a map of String keys to String values.
*
* @param xContentBuilder An XContent builder whose position is at the start of the map object to build
* @param map A map as key-value String pairs.
* @param map A map as key-value String pairs.
* @throws IOException on a build failure
*/
public static void buildStringToStringMap(XContentBuilder xContentBuilder, Map<?, ?> map) throws IOException {
Expand All @@ -119,7 +120,7 @@
* Builds an XContent object representing a map of String keys to Object values.
*
* @param xContentBuilder An XContent builder whose position is at the start of the map object to build
* @param map A map as key-value String to Object.
* @param map A map as key-value String to Object.
* @throws IOException on a build failure
*/
public static void buildStringToObjectMap(XContentBuilder xContentBuilder, Map<?, ?> map) throws IOException {
Expand All @@ -138,7 +139,7 @@
* Builds an XContent object representing a LLMSpec.
*
* @param xContentBuilder An XContent builder whose position is at the start of the map object to build
* @param llm LLMSpec
* @param llm LLMSpec
* @throws IOException on a build failure
*/
public static void buildLLMMap(XContentBuilder xContentBuilder, LLMSpec llm) throws IOException {
Expand Down Expand Up @@ -171,6 +172,7 @@
* Parses an XContent object representing a map of String keys to Object values.
* The Object value here can either be a string or a map
* If an array is found in the given parser we conver the array to a string representation of the array
*
* @param parser An XContent parser whose position is at the start of the map object to parse
* @return A map as identified by the key-value pairs in the XContent
* @throws IOException on a parse failure
Expand All @@ -187,12 +189,15 @@
} else if (parser.currentToken() == XContentParser.Token.START_ARRAY) {
// If an array, parse it to a string
// Handle array: convert it to a string representation
List<String> elements = new ArrayList<>();

Check warning on line 192 in src/main/java/org/opensearch/flowframework/util/ParseUtils.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/flowframework/util/ParseUtils.java#L192

Added line #L192 was not covered by tests
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
elements.add("\"" + parser.text() + "\""); // Adding escaped quotes around each element
if (parser.currentToken().equals(XContentParser.Token.VALUE_NUMBER)) {
elements.add(String.valueOf(parser.numberValue())); // If number value don't add escaping quotes

Check warning on line 195 in src/main/java/org/opensearch/flowframework/util/ParseUtils.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/flowframework/util/ParseUtils.java#L195

Added line #L195 was not covered by tests
} else {
elements.add("\"" + parser.text() + "\""); // Adding escaped quotes around each element

Check warning on line 197 in src/main/java/org/opensearch/flowframework/util/ParseUtils.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/flowframework/util/ParseUtils.java#L197

Added line #L197 was not covered by tests
}
}
String arrayString = "[" + String.join(", ", elements) + "]";
map.put(fieldName, arrayString);
map.put(fieldName, elements.toString());

Check warning on line 200 in src/main/java/org/opensearch/flowframework/util/ParseUtils.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/flowframework/util/ParseUtils.java#L200

Added line #L200 was not covered by tests
} else {
// Otherwise, parse it as a string
map.put(fieldName, parser.text());
Expand Down Expand Up @@ -220,6 +225,7 @@
* (e.g., john||own_index,testrole|__user__, no backend role so you see two verticle line after john.).
* This is the user string format used internally in the OPENSEARCH_SECURITY_USER_INFO_THREAD_CONTEXT and may be
* parsed using User.parse(string).
*
* @param client Client containing user info. A public API request will fill in the user info in the thread context.
* @return parsed user object
*/
Expand All @@ -233,7 +239,7 @@
* Creates a XContentParser from a given Registry
*
* @param xContentRegistry main registry for serializable content
* @param bytesReference given bytes to be parsed
* @param bytesReference given bytes to be parsed
* @return bytesReference of {@link java.time.Instant}
* @throws IOException IOException if content can't be parsed correctly
*/
Expand All @@ -244,7 +250,8 @@

/**
* Generates a string to string Map
* @param map content map
*
* @param map content map
* @param fieldName fieldName
* @return instance of the map
*/
Expand All @@ -260,15 +267,15 @@
* Creates a map containing the specified input keys, with values derived from template data or previous node
* output.
*
* @param requiredInputKeys A set of keys that must be present, or will cause an exception to be thrown
* @param optionalInputKeys A set of keys that may be present, or will be absent in the returned map
* @param currentNodeInputs Input params and content for this node, from workflow parsing
* @param outputs WorkflowData content of previous steps
* @param requiredInputKeys A set of keys that must be present, or will cause an exception to be thrown
* @param optionalInputKeys A set of keys that may be present, or will be absent in the returned map
* @param currentNodeInputs Input params and content for this node, from workflow parsing
* @param outputs WorkflowData content of previous steps
* @param previousNodeInputs Input params for this node that come from previous steps
* @param params Params that came from REST path
* @param params Params that came from REST path
* @return A map containing the requiredInputKeys with their corresponding values,
* and optionalInputKeys with their corresponding values if present.
* Throws a {@link FlowFrameworkException} if a required key is not present.
* and optionalInputKeys with their corresponding values if present.
* Throws a {@link FlowFrameworkException} if a required key is not present.
*/
public static Map<String, Object> getInputsFromPreviousSteps(
Set<String> requiredInputKeys,
Expand Down Expand Up @@ -357,9 +364,10 @@

/**
* Executes substitution on the given value by looking at any matching values in either the ouputs or params map
* @param value the Object that will have the substitution done on
*
* @param value the Object that will have the substitution done on
* @param outputs potential location of values to be substituted in
* @param params potential location of values to be subsituted in
* @param params potential location of values to be subsituted in
* @return the substituted object back
*/
public static Object conditionallySubstitute(Object value, Map<String, WorkflowData> outputs, Map<String, String> params) {
Expand Down Expand Up @@ -403,6 +411,7 @@

/**
* Generates a string based on an arbitrary String to object map using Jackson
*
* @param map content map
* @return instance of the string
* @throws JsonProcessingException JsonProcessingException from Jackson for issues processing map
Expand All @@ -415,6 +424,7 @@

/**
* Generates a String to String map based on a Json File
*
* @param path file path
* @return instance of the string
* @throws JsonProcessingException JsonProcessingException from Jackson for issues processing map
Expand All @@ -430,15 +440,21 @@
* (e.g. "[\"text\", \"hello\"]" to "["text", "hello"]"), this is needed for processors that take in string arrays,
* This also removes the quotations around the array making the array valid to consume
* (e.g. "weights": "[0.7, 0.3]" to "weights": [0.7, 0.3])
*
* @param input The inputString given to be transformed
* @return the transformed string
*/
public static String removingBackslashesAndQuotesInArrayInJsonString(String input) {
return Pattern.compile("\"\\[(.*?)]\"").matcher(input).replaceAll(matchResult -> {
Matcher matcher = JSON_ARRAY_DOUBLE_QUOTES_PATTERN.matcher(input);
StringBuffer result = new StringBuffer();
while (matcher.find()) {
// Extract matched content and remove backslashes before quotes
String withoutEscapes = matchResult.group(1).replaceAll("\\\\\"", "\"");
String withoutEscapes = matcher.group(1).replaceAll("\\\\\"", "\"");
// Return the transformed string with the brackets but without the outer quotes
return "[" + withoutEscapes + "]";
});
matcher.appendReplacement(result, "[" + withoutEscapes + "]");
}
// Append remaining input after the last match
matcher.appendTail(result);
return result.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
"create_connector.name": "Amazon Bedrock Connector: embedding",
"create_connector.description": "The connector to bedrock Titan embedding model",
"create_connector.region": "us-east-1",
"create_connector.endpoint": "api.openai.com",
"create_connector.credential.access_key": "123",
"create_connector.credential.secret_key": "123",
"create_connector.credential.session_token": "123",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
{
"template.name": "deploy-bedrock-titan-multimodal-embedding-model",
"template.description": "deploying Amazon Bedrock Titan multimodal embedding model ",
"template.description": "Deploying Amazon Bedrock Titan multimodal embedding model ",
"create_connector.name": "Amazon Bedrock Connector: multi-modal embedding",
"create_connector.description": "The connector to bedrock Titan multi-modal embedding model",
"create_connector.region": "us-east-1",
"create_connector.input_docs_processed_step_size": 2,
"create_connector.endpoint": "api.openai.com",
"create_connector.input_docs_processed_step_size": "2",
"create_connector.credential.access_key": "123",
"create_connector.credential.secret_key": "123",
"create_connector.credential.session_token": "123",
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/defaults/cohere-chat-defaults.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"template.name": "deploy-cohere-chat-model",
"template.description": "deploying cohere chat model",
"template.description": "Deploying a Cohere chat model",
"create_connector.name": "Cohere Chat Model",
"create_connector.description": "The connector to Cohere's public chat API",
"create_connector.protocol": "http",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"template.name": "deploy-cohere-model",
"template.description": "deploying cohere embedding model",
"template.description": "Deploying a Cohere embedding model",
"create_connector.name": "cohere-embedding-connector",
"create_connector.description": "The connector to Cohere's public embed API",
"create_connector.protocol": "http",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"template.name": "semantic search with cohere embedding",
"template.description": "Setting up semantic search, with cohere embedding model",
"template.description": "Setting up semantic search, with a Cohere embedding model",
"create_connector.name": "cohere-embedding-connector",
"create_connector.description": "The connector to Cohere's public embed API",
"create_connector.protocol": "http",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"template.name": "deploy-cohere-chat-model",
"template.description": "deploying cohere chat model",
"template.description": "A template to deploy a Cohere chat model",
"create_connector.name": "Cohere Chat Model",
"create_connector.description": "The connector to Cohere's public chat API",
"create_connector.protocol": "http",
Expand All @@ -13,7 +13,7 @@
"register_remote_model.description": "cohere-chat-model",
"create_search_pipeline.pipeline_id": "rag-pipeline",
"create_search_pipeline.retrieval_augmented_generation.tag": "openai_pipeline_demo",
"create_search_pipeline.retrieval_augmented_generation.description": "Demo pipeline Using cohere Connector",
"create_search_pipeline.retrieval_augmented_generation.description": "Demo pipeline using a Cohere chat model",
"create_search_pipeline.retrieval_augmented_generation.context_field_list": "[\"text\"]",
"create_search_pipeline.retrieval_augmented_generation.system_prompt": "You are a helpful assistant",
"create_search_pipeline.retrieval_augmented_generation.user_instructions": "Generate a concise and informative answer in less than 100 words for the given question"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"template.name": "local-model-neural-sparse-search",
"template.description": "setting up neural sparse search with local model",
"template.description": "Setting up neural sparse search with pretrained local model",
"register_local_sparse_encoding_model.name": "amazon/neural-sparse/opensearch-neural-sparse-encoding-v1",
"register_local_sparse_encoding_model.description": "This is a neural sparse encoding model",
"register_local_sparse_encoding_model.model_format": "TORCH_SCRIPT",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
"create_ingest_pipeline.pipeline_id": "nlp-multimodal-ingest-pipeline",
"create_ingest_pipeline.description": "A text/image embedding pipeline",
"create_ingest_pipeline.model_id": "123",
"create_ingest_pipeline.embedding": "vector_embedding",
"text_image_embedding.embedding": "vector_embedding",
"text_image_embedding.field_map.text": "image_description",
"text_image_embedding.field_map.image": "image_binary",
"create_index.name": "my-multimodal-nlp-index",
"create_index.settings.number_of_shards": 2,
"text_image_embedding.field_map.output.dimension": 1024,
"create_index.settings.number_of_shards": "2",
"text_image_embedding.field_map.output.dimension": "1024",
"create_index.mappings.method.engine": "lucene",
"create_index.mappings.method.name": "hnsw"
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"register_remote_model.description": "bedrock-multi-modal-embedding-model",
"create_ingest_pipeline.pipeline_id": "nlp-multimodal-ingest-pipeline",
"create_ingest_pipeline.description": "A text/image embedding pipeline",
"text_image_embedding.create_ingest_pipeline.embedding": "vector_embedding",
"text_image_embedding.embedding": "vector_embedding",
"text_image_embedding.field_map.text": "image_description",
"text_image_embedding.field_map.image": "image_binary",
"create_index.name": "my-multimodal-nlp-index",
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/defaults/openai-chat-defaults.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"template.name": "deploy-openai-chat-model",
"template.description": "deploying openAI chat model",
"template.description": "Deploying an OpenAI chat model",
"create_connector.name": "OpenAI Chat Connector",
"create_connector.description": "Connector to public OpenAI model",
"create_connector.protocol": "http",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"template.name": "deploy-openai-model",
"template.description": "deploying openAI embedding model",
"template.description": "Deploying an OpenAI embedding model",
"create_connector.name": "OpenAI-embedding-connector",
"create_connector.description": "Connector to public OpenAI model",
"create_connector.protocol": "http",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"input_docs_processed_step_size": "${{create_connector.input_docs_processed_step_size}}"
},
"credential": {
"access_ key": "${{create_connector.credential.access_key}}",
"access_key": "${{create_connector.credential.access_key}}",
"secret_key": "${{create_connector.credential.secret_key}}",
"session_token": "${{create_connector.credential.session_token}}"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
"id": "register_model",
"type": "register_remote_model",
"previous_node_inputs": {
"create_connector_step_1": "parameters"
"create_connector": "parameters"
},
"user_inputs": {
"name": "${{register_remote_model.name}}",
Expand All @@ -56,7 +56,7 @@
"id": "deploy_model",
"type": "deploy_model",
"previous_node_inputs": {
"register_model_1": "model_id"
"register_model": "model_id"
}
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
"id": "register_model",
"type": "register_remote_model",
"previous_node_inputs": {
"create_connector_step_1": "parameters"
"create_connector": "parameters"
},
"user_inputs": {
"name": "${{register_remote_model.name}}",
Expand All @@ -58,7 +58,7 @@
"id": "deploy_model",
"type": "deploy_model",
"previous_node_inputs": {
"register_model_1": "model_id"
"register_model": "model_id"
}
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
{
"text_image_embedding": {
"model_id": "${{create_ingest_pipeline.model_id}}",
"embedding": "${{create_ingest_pipeline.embedding}}",
"embedding": "${{text_image_embedding.embedding}}",
"field_map": {
"text": "${{text_image_embedding.field_map.text}}",
"image": "${{text_image_embedding.field_map.image}}"
Expand Down Expand Up @@ -53,7 +53,7 @@
"id": {
"type": "text"
},
"${{text_embedding.field_map.output}}": {
"${{text_image_embedding.embedding}}": {
"type": "knn_vector",
"dimension": "${{text_image_embedding.field_map.output.dimension}}",
"method": {
Expand Down
Loading
Loading