From 694a391d686f382f6127b39eca54a8bd64670562 Mon Sep 17 00:00:00 2001 From: tkykenmt Date: Tue, 7 Jan 2025 18:36:11 +0900 Subject: [PATCH] Add support for Bedrock Rerank API #9027 Signed-off-by: tkykenmt --- _ml-commons-plugin/tutorials/index.md | 1 + .../tutorials/reranking-bedrock.md | 712 ++++++++++++++++++ 2 files changed, 713 insertions(+) create mode 100644 _ml-commons-plugin/tutorials/reranking-bedrock.md diff --git a/_ml-commons-plugin/tutorials/index.md b/_ml-commons-plugin/tutorials/index.md index 070da3cae1..8c4b42fb73 100644 --- a/_ml-commons-plugin/tutorials/index.md +++ b/_ml-commons-plugin/tutorials/index.md @@ -19,6 +19,7 @@ Using the OpenSearch machine learning (ML) framework, you can build various appl - **Reranking search results**: - [Reranking search results using the Cohere Rerank model]({{site.url}}{{site.baseurl}}/ml-commons-plugin/tutorials/reranking-cohere/) + - [Reranking search results using the Cohere Rerank model]({{site.url}}{{site.baseurl}}/ml-commons-plugin/tutorials/reranking-bedrock/) - [Reranking search results using the MS MARCO cross-encoder model]({{site.url}}{{site.baseurl}}/ml-commons-plugin/tutorials/reranking-cross-encoder/) - **Agents and tools**: diff --git a/_ml-commons-plugin/tutorials/reranking-bedrock.md b/_ml-commons-plugin/tutorials/reranking-bedrock.md new file mode 100644 index 0000000000..27173887e7 --- /dev/null +++ b/_ml-commons-plugin/tutorials/reranking-bedrock.md @@ -0,0 +1,712 @@ +--- +layout: default +title: Reranking with Bedrock Rerank API +parent: Tutorials +nav_order: 32 +--- + +# Reranking search results using the Bedrock Rerank API + +[Reranking pipeline](https://opensearch.org/docs/latest/search-plugins/search-relevance/reranking-search-results/) is a feature released in OpenSearch 2.12. It can rerank search results, providing a relevance score with respect to the search query for each matching document. The relevance score is calculated by a cross-encoder model. + +This tutorial illustrates using the [Amazon Bedrock Rerank API](https://docs.aws.amazon.com/bedrock/latest/APIReference/API_agent-runtime_Rerank.html) in a reranking pipeline. + +Replace the placeholders beginning with the prefix `your_` with your own values. +{: .note} + +## Step 0: Test the model on Amazon Bedrock +You can perform a reranking test using the following code. Supported reranker models are listed in [Supported Regions and models for reranking in Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/rerank-supported.html). You can find the model id on [Supported foundation models in Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html) + +```python +import json +import boto3 +bedrock_region = "your_bedrock_model_region_like_us-west-2" +bedrock_agent_runtime_client = boto3.client("bedrock-agent-runtime", region_name=bedrock_region) + +model_id = "amazon.rerank-v1:0" + +response = bedrock_agent_runtime_client.rerank( + queries=[ + { + "textQuery": { + "text": "What is the capital city of America?", + }, + "type": "TEXT" + } + ], + rerankingConfiguration={ + "bedrockRerankingConfiguration": { + "modelConfiguration": { + "modelArn": f"arn:aws:bedrock:{bedrock_region}::foundation-model/{model_id}" + }, + }, + "type": "BEDROCK_RERANKING_MODEL" + }, + sources=[ + { + "inlineDocumentSource": { + "textDocument": { + "text": "Carson City is the capital city of the American state of Nevada.", + }, + "type": "TEXT" + }, + "type": "INLINE" + }, + { + "inlineDocumentSource": { + "textDocument": { + "text": "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", + }, + "type": "TEXT" + }, + "type": "INLINE" + }, + { + "inlineDocumentSource": { + "textDocument": { + "text": "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", + }, + "type": "TEXT" + }, + "type": "INLINE" + }, + { + "inlineDocumentSource": { + "textDocument": { + "text": "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states." + }, + "type": "TEXT" + }, + "type": "INLINE" + }, + ] +) + +results = response["results"] +print(json.dumps(results, indent=2)) +``` + +The reranking result is ordering by the highest score first: +```json +[ + { + "index": 2, + "relevanceScore": 0.7711548805236816 + }, + { + "index": 0, + "relevanceScore": 0.0025114635936915874 + }, + { + "index": 1, + "relevanceScore": 2.4876489987946115e-05 + }, + { + "index": 3, + "relevanceScore": 6.339210358419223e-06 + } +] +``` + +You can sort the result by index number. + +```python +print(json.dumps(sorted(results, key=lambda x: x['index']),indent=2)) +``` + +The results are as follows: +```json +[ + { + "index": 0, + "relevanceScore": 0.0025114635936915874 + }, + { + "index": 1, + "relevanceScore": 2.4876489987946115e-05 + }, + { + "index": 2, + "relevanceScore": 0.7711548805236816 + }, + { + "index": 3, + "relevanceScore": 6.339210358419223e-06 + } +] +``` + +## Step 1: Create a connector and register the model + +To create a connector for the model, send the following request. If you are using self-managed OpenSearch, supply your AWS credentials: +```json +POST /_plugins/_ml/connectors/_create +{ + "name": "Amazon Bedrock Rerank API", + "description": "Test connector for Amazon Bedrock Rerank API", + "version": 1, + "protocol": "aws_sigv4", + "credential": { + "access_key": "your_access_key", + "secret_key": "your_secret_key", + "session_token": "your_session_token" + }, + "parameters": { + "service_name": "bedrock", + "endpoint": "bedrock-agent-runtime", + "region": "your_bedrock_model_region_like_us-west-2", + "api_name": "rerank", + "model_id": "amazon.rerank-v1:0" + }, + "actions": [ + { + "action_type": "PREDICT", + "method": "POST", + "url": "https://${parameters.endpoint}.${parameters.region}.amazonaws.com/${parameters.api_name}", + "headers": { + "x-amz-content-sha256": "required", + "content-type": "application/json" + }, + "pre_process_function": "connector.pre_process.bedrock.rerank", + "request_body": """ + { + "queries": ${parameters.queries}, + "rerankingConfiguration": { + "bedrockRerankingConfiguration": { + "modelConfiguration": { + "modelArn": "arn:aws:bedrock:${parameters.region}::foundation-model/${parameters.model_id}" + } + }, + "type": "BEDROCK_RERANKING_MODEL" + }, + "sources": ${parameters.sources} + } + """, + "post_process_function": "connector.post_process.bedrock.rerank" + } + ] +} +``` +{% include copy-curl.html %} + +If using the Amazon Opensearch Service, you can provide an IAM role ARN that allows access to the Amazon Bedrock service. For more information, see [AWS documentation](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/ml-amazon-connector.html): + +```json +POST /_plugins/_ml/connectors/_create +{ + "name": "Amazon Bedrock Rerank API", + "description": "Test connector for Amazon Bedrock Rerank API", + "version": 1, + "protocol": "aws_sigv4", + "credential": { + "roleArn": "your_role_arn_which_allows_access_to_bedrock_agent_runtime_rerank_api" + }, + "parameters": { + "service_name": "bedrock", + "endpoint": "bedrock-agent-runtime", + "region": "your_bedrock_model_region_like_us-west-2", + "api_name": "rerank", + "model_id": "amazon.rerank-v1:0" + }, + "actions": [ + { + "action_type": "PREDICT", + "method": "POST", + "url": "https://${parameters.endpoint}.${parameters.region}.amazonaws.com/${parameters.api_name}", + "headers": { + "x-amz-content-sha256": "required", + "content-type": "application/json" + }, + "pre_process_function": "connector.pre_process.bedrock.rerank", + "request_body": """ + { + "queries": ${parameters.queries}, + "rerankingConfiguration": { + "bedrockRerankingConfiguration": { + "modelConfiguration": { + "modelArn": "arn:aws:bedrock:${parameters.region}::foundation-model/${parameters.model_id}" + } + }, + "type": "BEDROCK_RERANKING_MODEL" + }, + "sources": ${parameters.sources} + } + """, + "post_process_function": "connector.post_process.bedrock.rerank" + } + ] +} +``` +{% include copy-curl.html %} + +Use the connector ID from the response to register and deploy the model: + +```json +POST /_plugins/_ml/models/_register?deploy=true +{ + "name": "Amazon Bedrock Rerank API", + "function_name": "remote", + "description": "test Amazon Bedrock Rerank API", + "connector_id": "your_connector_id" +} +``` +{% include copy-curl.html %} + +Note the model ID in the response; you'll use it in the following steps. + +Test the model by using the Predict API: + +```json +POST _plugins/_ml/_predict/text_similarity/your_model_id +{ + "query_text": "What is the capital city of America?", + "text_docs": [ + "Carson City is the capital city of the American state of Nevada.", + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", + "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", + "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states." + ] +} +``` +{% include copy-curl.html %} + +Alternatively, you can test the model as follows. This query bypasses pre_process_function and calls the Rerank API: + +```json +POST _plugins/_ml/models/your_model_id/_predict +{ + "parameters": { + "queries": [ + { + "textQuery": { + "text": "What is the capital city of America?" + }, + "type": "TEXT" + } + ], + "sources": [ + { + "inlineDocumentSource": { + "textDocument": { + "text": "Carson City is the capital city of the American state of Nevada." + }, + "type": "TEXT" + }, + "type": "INLINE" + }, + { + "inlineDocumentSource": { + "textDocument": { + "text": "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan." + }, + "type": "TEXT" + }, + "type": "INLINE" + }, + { + "inlineDocumentSource": { + "textDocument": { + "text": "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district." + }, + "type": "TEXT" + }, + "type": "INLINE" + }, + { + "inlineDocumentSource": { + "textDocument": { + "text": "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states." + }, + "type": "TEXT" + }, + "type": "INLINE" + } + ] + } +} +``` +{% include copy-curl.html %} + +The connector `pre_process_function` transforms the input into the format required by the previously shown parameters. + +By default, the Amazon Bedrock Rerank API output has the following format: +```json +[ + { + "index": 2, + "relevance_score": 0.7711548724998493 + }, + { + "index": 0, + "relevance_score": 0.0025114635138098534 + }, + { + "index": 1, + "relevance_score": 2.4876490010363496e-05 + }, + { + "index": 3, + "relevance_score": 6.339210403977635e-06 + } +] +``` + +The connector `post_process_function` transforms the model's output into a format that the [Reranker processor](https://opensearch.org/docs/latest/search-plugins/search-pipelines/rerank-processor/) can interpret, and orders the results by index. This adapted format is as follows: + +```json +{ + "inference_results": [ + { + "output": [ + { + "name": "similarity", + "data_type": "FLOAT32", + "shape": [ + 1 + ], + "data": [ + 0.0025114636 + ] + }, + { + "name": "similarity", + "data_type": "FLOAT32", + "shape": [ + 1 + ], + "data": [ + 2.487649e-05 + ] + }, + { + "name": "similarity", + "data_type": "FLOAT32", + "shape": [ + 1 + ], + "data": [ + 0.7711549 + ] + }, + { + "name": "similarity", + "data_type": "FLOAT32", + "shape": [ + 1 + ], + "data": [ + 6.3392104e-06 + ] + } + ], + "status_code": 200 + } + ] +} +``` + +Explanation of the response: +1. The response contains four `similarity` outputs. For each `similarity` output, the `data` array contains a relevance score of each document against the query. +2. The `similarity` outputs are provided in the order of the input documents; the first similarity result pertains to the first document. + +## Step 2: Create a reranking pipeline + +### Step 2.1: Ingest test data + +```json +POST _bulk +{ "index": { "_index": "my-test-data" } } +{ "passage_text" : "Carson City is the capital city of the American state of Nevada." } +{ "index": { "_index": "my-test-data" } } +{ "passage_text" : "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan." } +{ "index": { "_index": "my-test-data" } } +{ "passage_text" : "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district." } +{ "index": { "_index": "my-test-data" } } +{ "passage_text" : "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states." } +``` +{% include copy-curl.html %} + +### Step 2.2: Create a reranking pipeline + +```json +PUT /_search/pipeline/rerank_pipeline_bedrock +{ + "description": "Pipeline for reranking with Bedrock rerank model", + "response_processors": [ + { + "rerank": { + "ml_opensearch": { + "model_id": "your_model_id_created_in_step1" + }, + "context": { + "document_fields": ["passage_text"] + } + } + } + ] +} +``` +{% include copy-curl.html %} + +If you provide multiple field names in `document_fields`, the values of all fields are first concatenated and then reranking is performed. +{: .note} + +### Step 2.3: Test reranking + +First, test the query without using the reranking pipeline: + +```json +POST my-test-data/_search +{ + "query": { + "match": { + "passage_text": "What is the capital city of America?" + } + }, + "highlight": { + "pre_tags": [""], + "post_tags": [""], + "fields": {"passage_text": {}} + }, + "_source": false, + "fields": ["passage_text"] +} +``` +{% include copy-curl.html %} + +The first document in the response is `Carson City is the capital city of the American state of Nevada`, which is incorrect: + +```json +{ + "took": 2, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 2.5045562, + "hits": [ + { + "_index": "my-test-data", + "_id": "1", + "_score": 2.5045562, + "fields": { + "passage_text": [ + "Carson City is the capital city of the American state of Nevada." + ] + }, + "highlight": { + "passage_text": [ + "Carson City is the capital city of the American state of Nevada." + ] + } + }, + { + "_index": "my-test-data", + "_id": "2", + "_score": 0.5807494, + "fields": { + "passage_text": [ + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan." + ] + }, + "highlight": { + "passage_text": [ + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean.", + "Its capital is Saipan." + ] + } + }, + { + "_index": "my-test-data", + "_id": "3", + "_score": 0.5261191, + "fields": { + "passage_text": [ + "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district." + ] + }, + "highlight": { + "passage_text": [ + "(also known as simply Washington or D.C., and officially as the District of Columbia) is the capital", + "of the United States.", + "It is a federal district." + ] + } + }, + { + "_index": "my-test-data", + "_id": "4", + "_score": 0.5083029, + "fields": { + "passage_text": [ + "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states." + ] + }, + "highlight": { + "passage_text": [ + "Capital punishment (the death penalty) has existed in the United States since beforethe United States", + "As of 2017, capital punishment is legal in 30 of the 50 states." + ] + } + } + ] + } +} +``` + +Next, test the query using the reranking pipeline: + +```json +POST my-test-data/_search?search_pipeline=rerank_pipeline_bedrock +{ + "query": { + "match": { + "passage_text": "What is the capital city of America?" + } + }, + "ext": { + "rerank": { + "query_context": { + "query_text": "What is the capital city of America?" + } + } + }, + "highlight": { + "pre_tags": [""], + "post_tags": [""], + "fields": {"passage_text": {}} + }, + "_source": false, + "fields": ["passage_text"] +} +``` +{% include copy-curl.html %} + +The first document in the response is `"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district."`, which is correct: + +```json +{ + "took": 2, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 0.7711549, + "hits": [ + { + "_index": "my-test-data", + "_id": "3", + "_score": 0.7711549, + "fields": { + "passage_text": [ + "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district." + ] + }, + "highlight": { + "passage_text": [ + "(also known as simply Washington or D.C., and officially as the District of Columbia) is the capital", + "of the United States.", + "It is a federal district." + ] + } + }, + { + "_index": "my-test-data", + "_id": "1", + "_score": 0.0025114636, + "fields": { + "passage_text": [ + "Carson City is the capital city of the American state of Nevada." + ] + }, + "highlight": { + "passage_text": [ + "Carson City is the capital city of the American state of Nevada." + ] + } + }, + { + "_index": "my-test-data", + "_id": "2", + "_score": 02.487649e-05, + "fields": { + "passage_text": [ + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan." + ] + }, + "highlight": { + "passage_text": [ + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean.", + "Its capital is Saipan." + ] + } + }, + { + "_index": "my-test-data", + "_id": "4", + "_score": 6.3392104e-06, + "fields": { + "passage_text": [ + "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states." + ] + }, + "highlight": { + "passage_text": [ + "Capital punishment (the death penalty) has existed in the United States since beforethe United States", + "As of 2017, capital punishment is legal in 30 of the 50 states." + ] + } + } + ] + }, + "profile": { + "shards": [] + } +} +``` + +You can avoid writing the query twice by using the `query_text_path` instead of `query_text` as follows +{: .note} + +```json +POST my-test-data/_search?search_pipeline=rerank_pipeline_bedrock +{ + "query": { + "match": { + "passage_text": "What is the capital city of America?" + } + }, + "ext": { + "rerank": { + "query_context": { + "query_text_path": "query.match.passage_text.query" + } + } + }, + "highlight": { + "pre_tags": [""], + "post_tags": [""], + "fields": {"passage_text": {}} + }, + "_source": false, + "fields": ["passage_text"] +} +``` +{% include copy-curl.html %} +