From 804ccf7225f94ffc38486c538eb4cc00756eb0ff Mon Sep 17 00:00:00 2001 From: Elastic Machine Date: Mon, 20 Jan 2025 06:04:15 +0000 Subject: [PATCH] Auto-generated API code --- .../120fcf9f55128d6a81d5e87a9c235bbd.asciidoc | 20 + ...141ef0ebaa3b0772892b79b9bb85efb0.asciidoc} | 8 +- .../31bc93e429ad0de11dd2dd231e8f2c5e.asciidoc | 10 - .../45954b8aaedfed57012be8b6538b0a24.asciidoc | 47 + .../519e46350316a33162740e5d7968aa2c.asciidoc | 20 + ...77cebba946fe648873a1e7375c13df41.asciidoc} | 3 +- .../82bb6c61dab959f4446dc5ecab7ecbdf.asciidoc | 34 + docs/reference.asciidoc | 1661 ++++++++++++++--- src/api/api/bulk.ts | 2 +- src/api/api/count.ts | 2 +- src/api/api/create.ts | 2 +- src/api/api/delete.ts | 2 +- src/api/api/esql.ts | 72 +- src/api/api/exists.ts | 2 +- src/api/api/exists_source.ts | 2 +- src/api/api/get.ts | 2 +- src/api/api/get_source.ts | 2 +- src/api/api/index.ts | 2 +- src/api/api/indices.ts | 1 + src/api/api/inference.ts | 75 +- src/api/api/reindex.ts | 2 +- src/api/api/reindex_rethrottle.ts | 2 +- src/api/api/security.ts | 66 +- src/api/api/snapshot.ts | 20 +- src/api/api/tasks.ts | 6 +- src/api/api/update.ts | 2 +- src/api/types.ts | 197 +- src/api/typesWithBodyKey.ts | 207 +- 28 files changed, 2141 insertions(+), 330 deletions(-) create mode 100644 docs/doc_examples/120fcf9f55128d6a81d5e87a9c235bbd.asciidoc rename docs/doc_examples/{ddaadd91b7743a1c7e946ce1b593cd1b.asciidoc => 141ef0ebaa3b0772892b79b9bb85efb0.asciidoc} (65%) delete mode 100644 docs/doc_examples/31bc93e429ad0de11dd2dd231e8f2c5e.asciidoc create mode 100644 docs/doc_examples/45954b8aaedfed57012be8b6538b0a24.asciidoc create mode 100644 docs/doc_examples/519e46350316a33162740e5d7968aa2c.asciidoc rename docs/doc_examples/{9d47f02a063444da9f098858a1830d28.asciidoc => 77cebba946fe648873a1e7375c13df41.asciidoc} (66%) create mode 100644 docs/doc_examples/82bb6c61dab959f4446dc5ecab7ecbdf.asciidoc diff --git a/docs/doc_examples/120fcf9f55128d6a81d5e87a9c235bbd.asciidoc b/docs/doc_examples/120fcf9f55128d6a81d5e87a9c235bbd.asciidoc new file mode 100644 index 000000000..f6c1cb881 --- /dev/null +++ b/docs/doc_examples/120fcf9f55128d6a81d5e87a9c235bbd.asciidoc @@ -0,0 +1,20 @@ +// This file is autogenerated, DO NOT EDIT +// Use `node scripts/generate-docs-examples.js` to generate the docs examples + +[source, js] +---- +const response = await client.transport.request({ + method: "POST", + path: "/_inference/chat_completion/openai-completion/_stream", + body: { + model: "gpt-4o", + messages: [ + { + role: "user", + content: "What is Elastic?", + }, + ], + }, +}); +console.log(response); +---- diff --git a/docs/doc_examples/ddaadd91b7743a1c7e946ce1b593cd1b.asciidoc b/docs/doc_examples/141ef0ebaa3b0772892b79b9bb85efb0.asciidoc similarity index 65% rename from docs/doc_examples/ddaadd91b7743a1c7e946ce1b593cd1b.asciidoc rename to docs/doc_examples/141ef0ebaa3b0772892b79b9bb85efb0.asciidoc index bd9c35360..7d7aeab98 100644 --- a/docs/doc_examples/ddaadd91b7743a1c7e946ce1b593cd1b.asciidoc +++ b/docs/doc_examples/141ef0ebaa3b0772892b79b9bb85efb0.asciidoc @@ -3,11 +3,13 @@ [source, js] ---- -const response = await client.inference.inference({ +const response = await client.inference.put({ task_type: "my-inference-endpoint", inference_id: "_update", - service_settings: { - api_key: "", + inference_config: { + service_settings: { + api_key: "", + }, }, }); console.log(response); diff --git a/docs/doc_examples/31bc93e429ad0de11dd2dd231e8f2c5e.asciidoc b/docs/doc_examples/31bc93e429ad0de11dd2dd231e8f2c5e.asciidoc deleted file mode 100644 index 36c1c5a79..000000000 --- a/docs/doc_examples/31bc93e429ad0de11dd2dd231e8f2c5e.asciidoc +++ /dev/null @@ -1,10 +0,0 @@ -// This file is autogenerated, DO NOT EDIT -// Use `node scripts/generate-docs-examples.js` to generate the docs examples - -[source, js] ----- -const response = await client.indices.unfreeze({ - index: "my-index-000001", -}); -console.log(response); ----- diff --git a/docs/doc_examples/45954b8aaedfed57012be8b6538b0a24.asciidoc b/docs/doc_examples/45954b8aaedfed57012be8b6538b0a24.asciidoc new file mode 100644 index 000000000..a2ff623e6 --- /dev/null +++ b/docs/doc_examples/45954b8aaedfed57012be8b6538b0a24.asciidoc @@ -0,0 +1,47 @@ +// This file is autogenerated, DO NOT EDIT +// Use `node scripts/generate-docs-examples.js` to generate the docs examples + +[source, js] +---- +const response = await client.transport.request({ + method: "POST", + path: "/_inference/chat_completion/openai-completion/_stream", + body: { + messages: [ + { + role: "user", + content: [ + { + type: "text", + text: "What's the price of a scarf?", + }, + ], + }, + ], + tools: [ + { + type: "function", + function: { + name: "get_current_price", + description: "Get the current price of a item", + parameters: { + type: "object", + properties: { + item: { + id: "123", + }, + }, + }, + }, + }, + ], + tool_choice: { + type: "function", + function: { + name: "get_current_price", + }, + }, + }, +}); +console.log(response); +---- diff --git a/docs/doc_examples/519e46350316a33162740e5d7968aa2c.asciidoc b/docs/doc_examples/519e46350316a33162740e5d7968aa2c.asciidoc new file mode 100644 index 000000000..3c92986f6 --- /dev/null +++ b/docs/doc_examples/519e46350316a33162740e5d7968aa2c.asciidoc @@ -0,0 +1,20 @@ +// This file is autogenerated, DO NOT EDIT +// Use `node scripts/generate-docs-examples.js` to generate the docs examples + +[source, js] +---- +const response = await client.search({ + index: "image-index", + knn: { + field: "image-vector", + query_vector: [-5, 9, -12], + k: 10, + num_candidates: 100, + rescore_vector: { + oversample: 2, + }, + }, + fields: ["title", "file-type"], +}); +console.log(response); +---- diff --git a/docs/doc_examples/9d47f02a063444da9f098858a1830d28.asciidoc b/docs/doc_examples/77cebba946fe648873a1e7375c13df41.asciidoc similarity index 66% rename from docs/doc_examples/9d47f02a063444da9f098858a1830d28.asciidoc rename to docs/doc_examples/77cebba946fe648873a1e7375c13df41.asciidoc index b10da9a05..a09e089bb 100644 --- a/docs/doc_examples/9d47f02a063444da9f098858a1830d28.asciidoc +++ b/docs/doc_examples/77cebba946fe648873a1e7375c13df41.asciidoc @@ -5,7 +5,8 @@ ---- const response = await client.cluster.putSettings({ persistent: { - "cluster.routing.allocation.disk.watermark.low": "30gb", + "cluster.routing.allocation.disk.watermark.low": "90%", + "cluster.routing.allocation.disk.watermark.high": "95%", }, }); console.log(response); diff --git a/docs/doc_examples/82bb6c61dab959f4446dc5ecab7ecbdf.asciidoc b/docs/doc_examples/82bb6c61dab959f4446dc5ecab7ecbdf.asciidoc new file mode 100644 index 000000000..7c7a7cba1 --- /dev/null +++ b/docs/doc_examples/82bb6c61dab959f4446dc5ecab7ecbdf.asciidoc @@ -0,0 +1,34 @@ +// This file is autogenerated, DO NOT EDIT +// Use `node scripts/generate-docs-examples.js` to generate the docs examples + +[source, js] +---- +const response = await client.transport.request({ + method: "POST", + path: "/_inference/chat_completion/openai-completion/_stream", + body: { + messages: [ + { + role: "assistant", + content: "Let's find out what the weather is", + tool_calls: [ + { + id: "call_KcAjWtAww20AihPHphUh46Gd", + type: "function", + function: { + name: "get_current_weather", + arguments: '{"location":"Boston, MA"}', + }, + }, + ], + }, + { + role: "tool", + content: "The weather is cold", + tool_call_id: "call_KcAjWtAww20AihPHphUh46Gd", + }, + ], + }, +}); +console.log(response); +---- diff --git a/docs/reference.asciidoc b/docs/reference.asciidoc index 840060bdf..8a4bd21d8 100644 --- a/docs/reference.asciidoc +++ b/docs/reference.asciidoc @@ -28,9 +28,116 @@ [discrete] === bulk Bulk index or delete documents. -Performs multiple indexing or delete operations in a single API call. +Perform multiple `index`, `create`, `delete`, and `update` actions in a single request. This reduces overhead and can greatly increase indexing speed. +If the Elasticsearch security features are enabled, you must have the following index privileges for the target data stream, index, or index alias: + +* To use the `create` action, you must have the `create_doc`, `create`, `index`, or `write` index privilege. Data streams support only the `create` action. +* To use the `index` action, you must have the `create`, `index`, or `write` index privilege. +* To use the `delete` action, you must have the `delete` or `write` index privilege. +* To use the `update` action, you must have the `index` or `write` index privilege. +* To automatically create a data stream or index with a bulk API request, you must have the `auto_configure`, `create_index`, or `manage` index privilege. +* To make the result of a bulk operation visible to search using the `refresh` parameter, you must have the `maintenance` or `manage` index privilege. + +Automatic data stream creation requires a matching index template with data stream enabled. + +The actions are specified in the request body using a newline delimited JSON (NDJSON) structure: + +``` +action_and_meta_data\n +optional_source\n +action_and_meta_data\n +optional_source\n +.... +action_and_meta_data\n +optional_source\n +``` + +The `index` and `create` actions expect a source on the next line and have the same semantics as the `op_type` parameter in the standard index API. +A `create` action fails if a document with the same ID already exists in the target +An `index` action adds or replaces a document as necessary. + +NOTE: Data streams support only the `create` action. +To update or delete a document in a data stream, you must target the backing index containing the document. + +An `update` action expects that the partial doc, upsert, and script and its options are specified on the next line. + +A `delete` action does not expect a source on the next line and has the same semantics as the standard delete API. + +NOTE: The final line of data must end with a newline character (`\n`). +Each newline character may be preceded by a carriage return (`\r`). +When sending NDJSON data to the `_bulk` endpoint, use a `Content-Type` header of `application/json` or `application/x-ndjson`. +Because this format uses literal newline characters (`\n`) as delimiters, make sure that the JSON actions and sources are not pretty printed. + +If you provide a target in the request path, it is used for any actions that don't explicitly specify an `_index` argument. + +A note on the format: the idea here is to make processing as fast as possible. +As some of the actions are redirected to other shards on other nodes, only `action_meta_data` is parsed on the receiving node side. + +Client libraries using this protocol should try and strive to do something similar on the client side, and reduce buffering as much as possible. + +There is no "correct" number of actions to perform in a single bulk request. +Experiment with different settings to find the optimal size for your particular workload. +Note that Elasticsearch limits the maximum size of a HTTP request to 100mb by default so clients must ensure that no request exceeds this size. +It is not possible to index a single document that exceeds the size limit, so you must pre-process any such documents into smaller pieces before sending them to Elasticsearch. +For instance, split documents into pages or chapters before indexing them, or store raw binary data in a system outside Elasticsearch and replace the raw data with a link to the external system in the documents that you send to Elasticsearch. + +**Client suppport for bulk requests** + +Some of the officially supported clients provide helpers to assist with bulk requests and reindexing: + +* Go: Check out `esutil.BulkIndexer` +* Perl: Check out `Search::Elasticsearch::Client::5_0::Bulk` and `Search::Elasticsearch::Client::5_0::Scroll` +* Python: Check out `elasticsearch.helpers.*` +* JavaScript: Check out `client.helpers.*` +* .NET: Check out `BulkAllObservable` +* PHP: Check out bulk indexing. + +**Submitting bulk requests with cURL** + +If you're providing text file input to `curl`, you must use the `--data-binary` flag instead of plain `-d`. +The latter doesn't preserve newlines. For example: + +``` +$ cat requests +{ "index" : { "_index" : "test", "_id" : "1" } } +{ "field1" : "value1" } +$ curl -s -H "Content-Type: application/x-ndjson" -XPOST localhost:9200/_bulk --data-binary "@requests"; echo +{"took":7, "errors": false, "items":[{"index":{"_index":"test","_id":"1","_version":1,"result":"created","forced_refresh":false}}]} +``` + +**Optimistic concurrency control** + +Each `index` and `delete` action within a bulk API call may include the `if_seq_no` and `if_primary_term` parameters in their respective action and meta data lines. +The `if_seq_no` and `if_primary_term` parameters control how operations are run, based on the last modification to existing documents. See Optimistic concurrency control for more details. + +**Versioning** + +Each bulk item can include the version value using the `version` field. +It automatically follows the behavior of the index or delete operation based on the `_version` mapping. +It also support the `version_type`. + +**Routing** + +Each bulk item can include the routing value using the `routing` field. +It automatically follows the behavior of the index or delete operation based on the `_routing` mapping. + +NOTE: Data streams do not support custom routing unless they were created with the `allow_custom_routing` setting enabled in the template. + +**Wait for active shards** + +When making bulk calls, you can set the `wait_for_active_shards` parameter to require a minimum number of shard copies to be active before starting to process the bulk request. + +**Refresh** + +Control when the changes made by this request are visible to search. + +NOTE: Only the shards that receive the bulk request will be affected by refresh. +Imagine a `_bulk?refresh=wait_for` request with three documents in it that happen to be routed to different shards in an index with five shards. +The request will only wait for those three shards to refresh. +The other two shards that make up the index do not participate in the `_bulk` request at all. + {ref}/docs-bulk.html[Endpoint documentation] [source,ts] ---- @@ -40,28 +147,37 @@ client.bulk({ ... }) ==== Arguments * *Request (object):* -** *`index` (Optional, string)*: Name of the data stream, index, or index alias to perform bulk actions on. +** *`index` (Optional, string)*: The name of the data stream, index, or index alias to perform bulk actions on. ** *`operations` (Optional, { index, create, update, delete } | { detect_noop, doc, doc_as_upsert, script, scripted_upsert, _source, upsert } | object[])* -** *`list_executed_pipelines` (Optional, boolean)*: If `true`, the response will include the ingest pipelines that were executed for each index or create. -** *`pipeline` (Optional, string)*: ID of the pipeline to use to preprocess incoming documents. -If the index has a default ingest pipeline specified, then setting the value to `_none` disables the default ingest pipeline for this request. -If a final pipeline is configured it will always run, regardless of the value of this parameter. -** *`refresh` (Optional, Enum(true | false | "wait_for"))*: If `true`, Elasticsearch refreshes the affected shards to make this operation visible to search, if `wait_for` then wait for a refresh to make this operation visible to search, if `false` do nothing with refreshes. +** *`list_executed_pipelines` (Optional, boolean)*: If `true`, the response will include the ingest pipelines that were run for each index or create. +** *`pipeline` (Optional, string)*: The pipeline identifier to use to preprocess incoming documents. +If the index has a default ingest pipeline specified, setting the value to `_none` turns off the default ingest pipeline for this request. +If a final pipeline is configured, it will always run regardless of the value of this parameter. +** *`refresh` (Optional, Enum(true | false | "wait_for"))*: If `true`, Elasticsearch refreshes the affected shards to make this operation visible to search. +If `wait_for`, wait for a refresh to make this operation visible to search. +If `false`, do nothing with refreshes. Valid values: `true`, `false`, `wait_for`. -** *`routing` (Optional, string)*: Custom value used to route operations to a specific shard. -** *`_source` (Optional, boolean | string | string[])*: `true` or `false` to return the `_source` field or not, or a list of fields to return. +** *`routing` (Optional, string)*: A custom value that is used to route operations to a specific shard. +** *`_source` (Optional, boolean | string | string[])*: Indicates whether to return the `_source` field (`true` or `false`) or contains a list of fields to return. ** *`_source_excludes` (Optional, string | string[])*: A list of source fields to exclude from the response. +You can also use this parameter to exclude fields from the subset specified in `_source_includes` query parameter. +If the `_source` parameter is `false`, this parameter is ignored. ** *`_source_includes` (Optional, string | string[])*: A list of source fields to include in the response. -** *`timeout` (Optional, string | -1 | 0)*: Period each action waits for the following operations: automatic index creation, dynamic mapping updates, waiting for active shards. +If this parameter is specified, only these source fields are returned. +You can exclude fields from this subset using the `_source_excludes` query parameter. +If the `_source` parameter is `false`, this parameter is ignored. +** *`timeout` (Optional, string | -1 | 0)*: The period each action waits for the following operations: automatic index creation, dynamic mapping updates, and waiting for active shards. +The default is `1m` (one minute), which guarantees Elasticsearch waits for at least the timeout before failing. +The actual wait time could be longer, particularly when multiple waits occur. ** *`wait_for_active_shards` (Optional, number | Enum("all" | "index-setting"))*: The number of shard copies that must be active before proceeding with the operation. -Set to all or any positive integer up to the total number of shards in the index (`number_of_replicas+1`). -** *`require_alias` (Optional, boolean)*: If `true`, the request’s actions must target an index alias. -** *`require_data_stream` (Optional, boolean)*: If `true`, the request's actions must target a data stream (existing or to-be-created). +Set to `all` or any positive integer up to the total number of shards in the index (`number_of_replicas+1`). +The default is `1`, which waits for each primary shard to be active. +** *`require_alias` (Optional, boolean)*: If `true`, the request's actions must target an index alias. +** *`require_data_stream` (Optional, boolean)*: If `true`, the request's actions must target a data stream (existing or to be created). [discrete] === clear_scroll Clear a scrolling search. - Clear the search context and results for a scrolling search. {ref}/clear-scroll-api.html[Endpoint documentation] @@ -73,13 +189,13 @@ client.clearScroll({ ... }) ==== Arguments * *Request (object):* -** *`scroll_id` (Optional, string | string[])*: List of scroll IDs to clear. +** *`scroll_id` (Optional, string | string[])*: A list of scroll IDs to clear. To clear all scroll IDs, use `_all`. +IMPORTANT: Scroll IDs can be long. It is recommended to specify scroll IDs in the request body parameter. [discrete] === close_point_in_time Close a point in time. - A point in time must be opened explicitly before being used in search requests. The `keep_alive` parameter tells Elasticsearch how long it should persist. A point in time is automatically closed when the `keep_alive` period has elapsed. @@ -101,6 +217,15 @@ client.closePointInTime({ id }) Count search results. Get the number of documents matching a query. +The query can either be provided using a simple query string as a parameter or using the Query DSL defined within the request body. +The latter must be nested in a `query` key, which is the same as the search API. + +The count API supports multi-target syntax. You can run a single count API search across multiple data streams and indices. + +The operation is broadcast across all shards. +For each shard ID group, a replica is chosen and the search is run against it. +This means that replicas increase the scalability of the count. + {ref}/search-count.html[Endpoint documentation] [source,ts] ---- @@ -110,40 +235,117 @@ client.count({ ... }) ==== Arguments * *Request (object):* -** *`index` (Optional, string | string[])*: List of data streams, indices, and aliases to search. -Supports wildcards (`*`). +** *`index` (Optional, string | string[])*: A list of data streams, indices, and aliases to search. +It supports wildcards (`*`). To search all data streams and indices, omit this parameter or use `*` or `_all`. ** *`query` (Optional, { bool, boosting, common, combined_fields, constant_score, dis_max, distance_feature, exists, function_score, fuzzy, geo_bounding_box, geo_distance, geo_polygon, geo_shape, has_child, has_parent, ids, intervals, knn, match, match_all, match_bool_prefix, match_none, match_phrase, match_phrase_prefix, more_like_this, multi_match, nested, parent_id, percolate, pinned, prefix, query_string, range, rank_feature, regexp, rule, script, script_score, semantic, shape, simple_query_string, span_containing, span_field_masking, span_first, span_multi, span_near, span_not, span_or, span_term, span_within, sparse_vector, term, terms, terms_set, text_expansion, weighted_tokens, wildcard, wrapper, type })*: Defines the search definition using the Query DSL. +The query is optional, and when not provided, it will use `match_all` to count all the docs. ** *`allow_no_indices` (Optional, boolean)*: If `false`, the request returns an error if any wildcard expression, index alias, or `_all` value targets only missing or closed indices. This behavior applies even if the request targets other open indices. -** *`analyzer` (Optional, string)*: Analyzer to use for the query string. -This parameter can only be used when the `q` query string parameter is specified. +For example, a request targeting `foo*,bar*` returns an error if an index starts with `foo` but no index starts with `bar`. +** *`analyzer` (Optional, string)*: The analyzer to use for the query string. +This parameter can be used only when the `q` query string parameter is specified. ** *`analyze_wildcard` (Optional, boolean)*: If `true`, wildcard and prefix queries are analyzed. -This parameter can only be used when the `q` query string parameter is specified. +This parameter can be used only when the `q` query string parameter is specified. ** *`default_operator` (Optional, Enum("and" | "or"))*: The default operator for query string query: `AND` or `OR`. -This parameter can only be used when the `q` query string parameter is specified. -** *`df` (Optional, string)*: Field to use as default where no field prefix is given in the query string. -This parameter can only be used when the `q` query string parameter is specified. -** *`expand_wildcards` (Optional, Enum("all" | "open" | "closed" | "hidden" | "none") | Enum("all" | "open" | "closed" | "hidden" | "none")[])*: Type of index that wildcard patterns can match. +This parameter can be used only when the `q` query string parameter is specified. +** *`df` (Optional, string)*: The field to use as a default when no field prefix is given in the query string. +This parameter can be used only when the `q` query string parameter is specified. +** *`expand_wildcards` (Optional, Enum("all" | "open" | "closed" | "hidden" | "none") | Enum("all" | "open" | "closed" | "hidden" | "none")[])*: The type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. -Supports a list of values, such as `open,hidden`. -** *`ignore_throttled` (Optional, boolean)*: If `true`, concrete, expanded or aliased indices are ignored when frozen. +It supports a list of values, such as `open,hidden`. +** *`ignore_throttled` (Optional, boolean)*: If `true`, concrete, expanded, or aliased indices are ignored when frozen. ** *`ignore_unavailable` (Optional, boolean)*: If `false`, the request returns an error if it targets a missing or closed index. ** *`lenient` (Optional, boolean)*: If `true`, format-based query failures (such as providing text to a numeric field) in the query string will be ignored. -** *`min_score` (Optional, number)*: Sets the minimum `_score` value that documents must have to be included in the result. -** *`preference` (Optional, string)*: Specifies the node or shard the operation should be performed on. -Random by default. -** *`routing` (Optional, string)*: Custom value used to route operations to a specific shard. -** *`terminate_after` (Optional, number)*: Maximum number of documents to collect for each shard. +This parameter can be used only when the `q` query string parameter is specified. +** *`min_score` (Optional, number)*: The minimum `_score` value that documents must have to be included in the result. +** *`preference` (Optional, string)*: The node or shard the operation should be performed on. +By default, it is random. +** *`routing` (Optional, string)*: A custom value used to route operations to a specific shard. +** *`terminate_after` (Optional, number)*: The maximum number of documents to collect for each shard. If a query reaches this limit, Elasticsearch terminates the query early. Elasticsearch collects documents before sorting. -** *`q` (Optional, string)*: Query in the Lucene query string syntax. + +IMPORTANT: Use with caution. +Elasticsearch applies this parameter to each shard handling the request. +When possible, let Elasticsearch perform early termination automatically. +Avoid specifying this parameter for requests that target data streams with backing indices across multiple data tiers. +** *`q` (Optional, string)*: The query in Lucene query string syntax. [discrete] === create -Index a document. -Adds a JSON document to the specified data stream or index and makes it searchable. -If the target is an index and the document already exists, the request updates the document and increments its version. +Create a new document in the index. + +You can index a new JSON document with the `//_doc/` or `//_create/<_id>` APIs +Using `_create` guarantees that the document is indexed only if it does not already exist. +It returns a 409 response when a document with a same ID already exists in the index. +To update an existing document, you must use the `//_doc/` API. + +If the Elasticsearch security features are enabled, you must have the following index privileges for the target data stream, index, or index alias: + +* To add a document using the `PUT //_create/<_id>` or `POST //_create/<_id>` request formats, you must have the `create_doc`, `create`, `index`, or `write` index privilege. +* To automatically create a data stream or index with this API request, you must have the `auto_configure`, `create_index`, or `manage` index privilege. + +Automatic data stream creation requires a matching index template with data stream enabled. + +**Automatically create data streams and indices** + +If the request's target doesn't exist and matches an index template with a `data_stream` definition, the index operation automatically creates the data stream. + +If the target doesn't exist and doesn't match a data stream template, the operation automatically creates the index and applies any matching index templates. + +NOTE: Elasticsearch includes several built-in index templates. To avoid naming collisions with these templates, refer to index pattern documentation. + +If no mapping exists, the index operation creates a dynamic mapping. +By default, new fields and objects are automatically added to the mapping if needed. + +Automatic index creation is controlled by the `action.auto_create_index` setting. +If it is `true`, any index can be created automatically. +You can modify this setting to explicitly allow or block automatic creation of indices that match specified patterns or set it to `false` to turn off automatic index creation entirely. +Specify a list of patterns you want to allow or prefix each pattern with `+` or `-` to indicate whether it should be allowed or blocked. +When a list is specified, the default behaviour is to disallow. + +NOTE: The `action.auto_create_index` setting affects the automatic creation of indices only. +It does not affect the creation of data streams. + +**Routing** + +By default, shard placement — or routing — is controlled by using a hash of the document's ID value. +For more explicit control, the value fed into the hash function used by the router can be directly specified on a per-operation basis using the `routing` parameter. + +When setting up explicit mapping, you can also use the `_routing` field to direct the index operation to extract the routing value from the document itself. +This does come at the (very minimal) cost of an additional document parsing pass. +If the `_routing` mapping is defined and set to be required, the index operation will fail if no routing value is provided or extracted. + +NOTE: Data streams do not support custom routing unless they were created with the `allow_custom_routing` setting enabled in the template. + +** Distributed** + +The index operation is directed to the primary shard based on its route and performed on the actual node containing this shard. +After the primary shard completes the operation, if needed, the update is distributed to applicable replicas. + +**Active shards** + +To improve the resiliency of writes to the system, indexing operations can be configured to wait for a certain number of active shard copies before proceeding with the operation. +If the requisite number of active shard copies are not available, then the write operation must wait and retry, until either the requisite shard copies have started or a timeout occurs. +By default, write operations only wait for the primary shards to be active before proceeding (that is to say `wait_for_active_shards` is `1`). +This default can be overridden in the index settings dynamically by setting `index.write.wait_for_active_shards`. +To alter this behavior per operation, use the `wait_for_active_shards request` parameter. + +Valid values are all or any positive integer up to the total number of configured copies per shard in the index (which is `number_of_replicas`+1). +Specifying a negative value or a number greater than the number of shard copies will throw an error. + +For example, suppose you have a cluster of three nodes, A, B, and C and you create an index index with the number of replicas set to 3 (resulting in 4 shard copies, one more copy than there are nodes). +If you attempt an indexing operation, by default the operation will only ensure the primary copy of each shard is available before proceeding. +This means that even if B and C went down and A hosted the primary shard copies, the indexing operation would still proceed with only one copy of the data. +If `wait_for_active_shards` is set on the request to `3` (and all three nodes are up), the indexing operation will require 3 active shard copies before proceeding. +This requirement should be met because there are 3 active nodes in the cluster, each one holding a copy of the shard. +However, if you set `wait_for_active_shards` to `all` (or to `4`, which is the same in this situation), the indexing operation will not proceed as you do not have all 4 copies of each shard active in the index. +The operation will timeout unless a new node is brought up in the cluster to host the fourth copy of the shard. + +It is important to note that this setting greatly reduces the chances of the write operation not writing to the requisite number of shard copies, but it does not completely eliminate the possibility, because this check occurs before the write operation starts. +After the write operation is underway, it is still possible for replication to fail on any number of shard copies but still succeed on the primary. +The `_shards` section of the API response reveals the number of shard copies on which replication succeeded and failed. {ref}/docs-index_.html[Endpoint documentation] [source,ts] @@ -154,28 +356,75 @@ client.create({ id, index }) ==== Arguments * *Request (object):* -** *`id` (string)*: Unique identifier for the document. -** *`index` (string)*: Name of the data stream or index to target. -If the target doesn’t exist and matches the name or wildcard (`*`) pattern of an index template with a `data_stream` definition, this request creates the data stream. -If the target doesn’t exist and doesn’t match a data stream template, this request creates the index. +** *`id` (string)*: A unique identifier for the document. +To automatically generate a document ID, use the `POST //_doc/` request format. +** *`index` (string)*: The name of the data stream or index to target. +If the target doesn't exist and matches the name or wildcard (`*`) pattern of an index template with a `data_stream` definition, this request creates the data stream. +If the target doesn't exist and doesn’t match a data stream template, this request creates the index. ** *`document` (Optional, object)*: A document. -** *`pipeline` (Optional, string)*: ID of the pipeline to use to preprocess incoming documents. -If the index has a default ingest pipeline specified, then setting the value to `_none` disables the default ingest pipeline for this request. -If a final pipeline is configured it will always run, regardless of the value of this parameter. -** *`refresh` (Optional, Enum(true | false | "wait_for"))*: If `true`, Elasticsearch refreshes the affected shards to make this operation visible to search, if `wait_for` then wait for a refresh to make this operation visible to search, if `false` do nothing with refreshes. -Valid values: `true`, `false`, `wait_for`. -** *`routing` (Optional, string)*: Custom value used to route operations to a specific shard. -** *`timeout` (Optional, string | -1 | 0)*: Period the request waits for the following operations: automatic index creation, dynamic mapping updates, waiting for active shards. -** *`version` (Optional, number)*: Explicit version number for concurrency control. -The specified version must match the current version of the document for the request to succeed. -** *`version_type` (Optional, Enum("internal" | "external" | "external_gte" | "force"))*: Specific version type: `external`, `external_gte`. +** *`pipeline` (Optional, string)*: The ID of the pipeline to use to preprocess incoming documents. +If the index has a default ingest pipeline specified, setting the value to `_none` turns off the default ingest pipeline for this request. +If a final pipeline is configured, it will always run regardless of the value of this parameter. +** *`refresh` (Optional, Enum(true | false | "wait_for"))*: If `true`, Elasticsearch refreshes the affected shards to make this operation visible to search. +If `wait_for`, it waits for a refresh to make this operation visible to search. +If `false`, it does nothing with refreshes. +** *`routing` (Optional, string)*: A custom value that is used to route operations to a specific shard. +** *`timeout` (Optional, string | -1 | 0)*: The period the request waits for the following operations: automatic index creation, dynamic mapping updates, waiting for active shards. +Elasticsearch waits for at least the specified timeout period before failing. +The actual wait time could be longer, particularly when multiple waits occur. + +This parameter is useful for situations where the primary shard assigned to perform the operation might not be available when the operation runs. +Some reasons for this might be that the primary shard is currently recovering from a gateway or undergoing relocation. +By default, the operation will wait on the primary shard to become available for at least 1 minute before failing and responding with an error. +The actual wait time could be longer, particularly when multiple waits occur. +** *`version` (Optional, number)*: The explicit version number for concurrency control. +It must be a non-negative long number. +** *`version_type` (Optional, Enum("internal" | "external" | "external_gte" | "force"))*: The version type. ** *`wait_for_active_shards` (Optional, number | Enum("all" | "index-setting"))*: The number of shard copies that must be active before proceeding with the operation. -Set to `all` or any positive integer up to the total number of shards in the index (`number_of_replicas+1`). +You can set it to `all` or any positive integer up to the total number of shards in the index (`number_of_replicas+1`). +The default value of `1` means it waits for each primary shard to be active. [discrete] === delete Delete a document. -Removes a JSON document from the specified index. + +Remove a JSON document from the specified index. + +NOTE: You cannot send deletion requests directly to a data stream. +To delete a document in a data stream, you must target the backing index containing the document. + +**Optimistic concurrency control** + +Delete operations can be made conditional and only be performed if the last modification to the document was assigned the sequence number and primary term specified by the `if_seq_no` and `if_primary_term` parameters. +If a mismatch is detected, the operation will result in a `VersionConflictException` and a status code of `409`. + +**Versioning** + +Each document indexed is versioned. +When deleting a document, the version can be specified to make sure the relevant document you are trying to delete is actually being deleted and it has not changed in the meantime. +Every write operation run on a document, deletes included, causes its version to be incremented. +The version number of a deleted document remains available for a short time after deletion to allow for control of concurrent operations. +The length of time for which a deleted document's version remains available is determined by the `index.gc_deletes` index setting. + +**Routing** + +If routing is used during indexing, the routing value also needs to be specified to delete a document. + +If the `_routing` mapping is set to `required` and no routing value is specified, the delete API throws a `RoutingMissingException` and rejects the request. + +For example: + +``` +DELETE /my-index-000001/_doc/1?routing=shard-1 +``` + +This request deletes the document with ID 1, but it is routed based on the user. +The document is not deleted if the correct routing is not specified. + +**Distributed** + +The delete operation gets hashed into a specific shard ID. +It then gets redirected into the primary shard within that ID group and replicated (if needed) to shard replicas within that ID group. {ref}/docs-delete.html[Endpoint documentation] [source,ts] @@ -186,19 +435,25 @@ client.delete({ id, index }) ==== Arguments * *Request (object):* -** *`id` (string)*: Unique identifier for the document. -** *`index` (string)*: Name of the target index. +** *`id` (string)*: A unique identifier for the document. +** *`index` (string)*: The name of the target index. ** *`if_primary_term` (Optional, number)*: Only perform the operation if the document has this primary term. ** *`if_seq_no` (Optional, number)*: Only perform the operation if the document has this sequence number. -** *`refresh` (Optional, Enum(true | false | "wait_for"))*: If `true`, Elasticsearch refreshes the affected shards to make this operation visible to search, if `wait_for` then wait for a refresh to make this operation visible to search, if `false` do nothing with refreshes. -Valid values: `true`, `false`, `wait_for`. -** *`routing` (Optional, string)*: Custom value used to route operations to a specific shard. -** *`timeout` (Optional, string | -1 | 0)*: Period to wait for active shards. -** *`version` (Optional, number)*: Explicit version number for concurrency control. -The specified version must match the current version of the document for the request to succeed. -** *`version_type` (Optional, Enum("internal" | "external" | "external_gte" | "force"))*: Specific version type: `external`, `external_gte`. -** *`wait_for_active_shards` (Optional, number | Enum("all" | "index-setting"))*: The number of shard copies that must be active before proceeding with the operation. -Set to `all` or any positive integer up to the total number of shards in the index (`number_of_replicas+1`). +** *`refresh` (Optional, Enum(true | false | "wait_for"))*: If `true`, Elasticsearch refreshes the affected shards to make this operation visible to search. +If `wait_for`, it waits for a refresh to make this operation visible to search. +If `false`, it does nothing with refreshes. +** *`routing` (Optional, string)*: A custom value used to route operations to a specific shard. +** *`timeout` (Optional, string | -1 | 0)*: The period to wait for active shards. + +This parameter is useful for situations where the primary shard assigned to perform the delete operation might not be available when the delete operation runs. +Some reasons for this might be that the primary shard is currently recovering from a store or undergoing relocation. +By default, the delete operation will wait on the primary shard to become available for up to 1 minute before failing and responding with an error. +** *`version` (Optional, number)*: An explicit version number for concurrency control. +It must match the current version of the document for the request to succeed. +** *`version_type` (Optional, Enum("internal" | "external" | "external_gte" | "force"))*: The version type. +** *`wait_for_active_shards` (Optional, number | Enum("all" | "index-setting"))*: The minimum number of shard copies that must be active before proceeding with the operation. +You can set it to `all` or any positive integer up to the total number of shards in the index (`number_of_replicas+1`). +The default value of `1` means it waits for each primary shard to be active. [discrete] === delete_by_query @@ -306,7 +561,24 @@ If no response is received before the timeout expires, the request fails and ret [discrete] === exists Check a document. -Checks if a specified document exists. + +Verify that a document exists. +For example, check to see if a document with the `_id` 0 exists: + +``` +HEAD my-index-000001/_doc/0 +``` + +If the document exists, the API returns a status code of `200 - OK`. +If the document doesn’t exist, the API returns `404 - Not Found`. + +**Versioning support** + +You can use the `version` parameter to check the document only if its current version is equal to the specified one. + +Internally, Elasticsearch has marked the old document as deleted and added an entirely new document. +The old version of the document doesn't disappear immediately, although you won't be able to access it. +Elasticsearch cleans up deleted documents in the background as you continue to index more data. {ref}/docs-get.html[Endpoint documentation] [source,ts] @@ -317,28 +589,47 @@ client.exists({ id, index }) ==== Arguments * *Request (object):* -** *`id` (string)*: Identifier of the document. -** *`index` (string)*: List of data streams, indices, and aliases. -Supports wildcards (`*`). -** *`preference` (Optional, string)*: Specifies the node or shard the operation should be performed on. -Random by default. +** *`id` (string)*: A unique document identifier. +** *`index` (string)*: A list of data streams, indices, and aliases. +It supports wildcards (`*`). +** *`preference` (Optional, string)*: The node or shard the operation should be performed on. +By default, the operation is randomized between the shard replicas. + +If it is set to `_local`, the operation will prefer to be run on a local allocated shard when possible. +If it is set to a custom value, the value is used to guarantee that the same shards will be used for the same custom value. +This can help with "jumping values" when hitting different shards in different refresh states. +A sample value can be something like the web session ID or the user name. ** *`realtime` (Optional, boolean)*: If `true`, the request is real-time as opposed to near-real-time. -** *`refresh` (Optional, boolean)*: If `true`, Elasticsearch refreshes all shards involved in the delete by query after the request completes. -** *`routing` (Optional, string)*: Target the specified primary shard. -** *`_source` (Optional, boolean | string | string[])*: `true` or `false` to return the `_source` field or not, or a list of fields to return. -** *`_source_excludes` (Optional, string | string[])*: A list of source fields to exclude in the response. +** *`refresh` (Optional, boolean)*: If `true`, the request refreshes the relevant shards before retrieving the document. +Setting it to `true` should be done after careful thought and verification that this does not cause a heavy load on the system (and slow down indexing). +** *`routing` (Optional, string)*: A custom value used to route operations to a specific shard. +** *`_source` (Optional, boolean | string | string[])*: Indicates whether to return the `_source` field (`true` or `false`) or lists the fields to return. +** *`_source_excludes` (Optional, string | string[])*: A list of source fields to exclude from the response. +You can also use this parameter to exclude fields from the subset specified in `_source_includes` query parameter. +If the `_source` parameter is `false`, this parameter is ignored. ** *`_source_includes` (Optional, string | string[])*: A list of source fields to include in the response. -** *`stored_fields` (Optional, string | string[])*: List of stored fields to return as part of a hit. +If this parameter is specified, only these source fields are returned. +You can exclude fields from this subset using the `_source_excludes` query parameter. +If the `_source` parameter is `false`, this parameter is ignored. +** *`stored_fields` (Optional, string | string[])*: A list of stored fields to return as part of a hit. If no fields are specified, no stored fields are included in the response. -If this field is specified, the `_source` parameter defaults to false. +If this field is specified, the `_source` parameter defaults to `false`. ** *`version` (Optional, number)*: Explicit version number for concurrency control. The specified version must match the current version of the document for the request to succeed. -** *`version_type` (Optional, Enum("internal" | "external" | "external_gte" | "force"))*: Specific version type: `external`, `external_gte`. +** *`version_type` (Optional, Enum("internal" | "external" | "external_gte" | "force"))*: The version type. [discrete] === exists_source Check for a document source. -Checks if a document's `_source` is stored. + +Check whether a document source exists in an index. +For example: + +``` +HEAD my-index-000001/_source/1 +``` + +A document's source is not available if it is disabled in the mapping. {ref}/docs-get.html[Endpoint documentation] [source,ts] @@ -349,20 +640,21 @@ client.existsSource({ id, index }) ==== Arguments * *Request (object):* -** *`id` (string)*: Identifier of the document. -** *`index` (string)*: List of data streams, indices, and aliases. -Supports wildcards (`*`). -** *`preference` (Optional, string)*: Specifies the node or shard the operation should be performed on. -Random by default. -** *`realtime` (Optional, boolean)*: If true, the request is real-time as opposed to near-real-time. -** *`refresh` (Optional, boolean)*: If `true`, Elasticsearch refreshes all shards involved in the delete by query after the request completes. -** *`routing` (Optional, string)*: Target the specified primary shard. -** *`_source` (Optional, boolean | string | string[])*: `true` or `false` to return the `_source` field or not, or a list of fields to return. +** *`id` (string)*: A unique identifier for the document. +** *`index` (string)*: A list of data streams, indices, and aliases. +It supports wildcards (`*`). +** *`preference` (Optional, string)*: The node or shard the operation should be performed on. +By default, the operation is randomized between the shard replicas. +** *`realtime` (Optional, boolean)*: If `true`, the request is real-time as opposed to near-real-time. +** *`refresh` (Optional, boolean)*: If `true`, the request refreshes the relevant shards before retrieving the document. +Setting it to `true` should be done after careful thought and verification that this does not cause a heavy load on the system (and slow down indexing). +** *`routing` (Optional, string)*: A custom value used to route operations to a specific shard. +** *`_source` (Optional, boolean | string | string[])*: Indicates whether to return the `_source` field (`true` or `false`) or lists the fields to return. ** *`_source_excludes` (Optional, string | string[])*: A list of source fields to exclude in the response. ** *`_source_includes` (Optional, string | string[])*: A list of source fields to include in the response. -** *`version` (Optional, number)*: Explicit version number for concurrency control. -The specified version must match the current version of the document for the request to succeed. -** *`version_type` (Optional, Enum("internal" | "external" | "external_gte" | "force"))*: Specific version type: `external`, `external_gte`. +** *`version` (Optional, number)*: The version number for concurrency control. +It must match the current version of the document for the request to succeed. +** *`version_type` (Optional, Enum("internal" | "external" | "external_gte" | "force"))*: The version type. [discrete] === explain @@ -434,7 +726,63 @@ targeting `foo*,bar*` returns an error if an index starts with foo but no index [discrete] === get Get a document by its ID. -Retrieves the document with the specified ID from an index. + +Get a document and its source or stored fields from an index. + +By default, this API is realtime and is not affected by the refresh rate of the index (when data will become visible for search). +In the case where stored fields are requested with the `stored_fields` parameter and the document has been updated but is not yet refreshed, the API will have to parse and analyze the source to extract the stored fields. +To turn off realtime behavior, set the `realtime` parameter to false. + +**Source filtering** + +By default, the API returns the contents of the `_source` field unless you have used the `stored_fields` parameter or the `_source` field is turned off. +You can turn off `_source` retrieval by using the `_source` parameter: + +``` +GET my-index-000001/_doc/0?_source=false +``` + +If you only need one or two fields from the `_source`, use the `_source_includes` or `_source_excludes` parameters to include or filter out particular fields. +This can be helpful with large documents where partial retrieval can save on network overhead +Both parameters take a comma separated list of fields or wildcard expressions. +For example: + +``` +GET my-index-000001/_doc/0?_source_includes=*.id&_source_excludes=entities +``` + +If you only want to specify includes, you can use a shorter notation: + +``` +GET my-index-000001/_doc/0?_source=*.id +``` + +**Routing** + +If routing is used during indexing, the routing value also needs to be specified to retrieve a document. +For example: + +``` +GET my-index-000001/_doc/2?routing=user1 +``` + +This request gets the document with ID 2, but it is routed based on the user. +The document is not fetched if the correct routing is not specified. + +**Distributed** + +The GET operation is hashed into a specific shard ID. +It is then redirected to one of the replicas within that shard ID and returns the result. +The replicas are the primary shard and its replicas within that shard ID group. +This means that the more replicas you have, the better your GET scaling will be. + +**Versioning support** + +You can use the `version` parameter to retrieve the document only if its current version is equal to the specified one. + +Internally, Elasticsearch has marked the old document as deleted and added an entirely new document. +The old version of the document doesn't disappear immediately, although you won't be able to access it. +Elasticsearch cleans up deleted documents in the background as you continue to index more data. {ref}/docs-get.html[Endpoint documentation] [source,ts] @@ -445,23 +793,38 @@ client.get({ id, index }) ==== Arguments * *Request (object):* -** *`id` (string)*: Unique identifier of the document. -** *`index` (string)*: Name of the index that contains the document. -** *`force_synthetic_source` (Optional, boolean)*: Should this request force synthetic _source? -Use this to test if the mapping supports synthetic _source and to get a sense of the worst case performance. -Fetches with this enabled will be slower the enabling synthetic source natively in the index. -** *`preference` (Optional, string)*: Specifies the node or shard the operation should be performed on. Random by default. +** *`id` (string)*: A unique document identifier. +** *`index` (string)*: The name of the index that contains the document. +** *`force_synthetic_source` (Optional, boolean)*: Indicates whether the request forces synthetic `_source`. +Use this paramater to test if the mapping supports synthetic `_source` and to get a sense of the worst case performance. +Fetches with this parameter enabled will be slower than enabling synthetic source natively in the index. +** *`preference` (Optional, string)*: The node or shard the operation should be performed on. +By default, the operation is randomized between the shard replicas. + +If it is set to `_local`, the operation will prefer to be run on a local allocated shard when possible. +If it is set to a custom value, the value is used to guarantee that the same shards will be used for the same custom value. +This can help with "jumping values" when hitting different shards in different refresh states. +A sample value can be something like the web session ID or the user name. ** *`realtime` (Optional, boolean)*: If `true`, the request is real-time as opposed to near-real-time. -** *`refresh` (Optional, boolean)*: If true, Elasticsearch refreshes the affected shards to make this operation visible to search. If false, do nothing with refreshes. -** *`routing` (Optional, string)*: Target the specified primary shard. -** *`_source` (Optional, boolean | string | string[])*: True or false to return the _source field or not, or a list of fields to return. -** *`_source_excludes` (Optional, string | string[])*: A list of source fields to exclude in the response. +** *`refresh` (Optional, boolean)*: If `true`, the request refreshes the relevant shards before retrieving the document. +Setting it to `true` should be done after careful thought and verification that this does not cause a heavy load on the system (and slow down indexing). +** *`routing` (Optional, string)*: A custom value used to route operations to a specific shard. +** *`_source` (Optional, boolean | string | string[])*: Indicates whether to return the `_source` field (`true` or `false`) or lists the fields to return. +** *`_source_excludes` (Optional, string | string[])*: A list of source fields to exclude from the response. +You can also use this parameter to exclude fields from the subset specified in `_source_includes` query parameter. +If the `_source` parameter is `false`, this parameter is ignored. ** *`_source_includes` (Optional, string | string[])*: A list of source fields to include in the response. -** *`stored_fields` (Optional, string | string[])*: List of stored fields to return as part of a hit. +If this parameter is specified, only these source fields are returned. +You can exclude fields from this subset using the `_source_excludes` query parameter. +If the `_source` parameter is `false`, this parameter is ignored. +** *`stored_fields` (Optional, string | string[])*: A list of stored fields to return as part of a hit. If no fields are specified, no stored fields are included in the response. -If this field is specified, the `_source` parameter defaults to false. -** *`version` (Optional, number)*: Explicit version number for concurrency control. The specified version must match the current version of the document for the request to succeed. -** *`version_type` (Optional, Enum("internal" | "external" | "external_gte" | "force"))*: Specific version type: internal, external, external_gte. +If this field is specified, the `_source` parameter defaults to `false`. +Only leaf fields can be retrieved with the `stored_field` option. +Object fields can't be returned;​if specified, the request fails. +** *`version` (Optional, number)*: The version number for concurrency control. +It must match the current version of the document for the request to succeed. +** *`version_type` (Optional, Enum("internal" | "external" | "external_gte" | "force"))*: The version type. [discrete] === get_script @@ -507,7 +870,19 @@ client.getScriptLanguages() [discrete] === get_source Get a document's source. -Returns the source of a document. + +Get the source of a document. +For example: + +``` +GET my-index-000001/_source/1 +``` + +You can use the source filtering parameters to control which parts of the `_source` are returned: + +``` +GET my-index-000001/_source/1/?_source_includes=*.id&_source_excludes=entities +``` {ref}/docs-get.html[Endpoint documentation] [source,ts] @@ -518,18 +893,21 @@ client.getSource({ id, index }) ==== Arguments * *Request (object):* -** *`id` (string)*: Unique identifier of the document. -** *`index` (string)*: Name of the index that contains the document. -** *`preference` (Optional, string)*: Specifies the node or shard the operation should be performed on. Random by default. -** *`realtime` (Optional, boolean)*: Boolean) If true, the request is real-time as opposed to near-real-time. -** *`refresh` (Optional, boolean)*: If true, Elasticsearch refreshes the affected shards to make this operation visible to search. If false, do nothing with refreshes. -** *`routing` (Optional, string)*: Target the specified primary shard. -** *`_source` (Optional, boolean | string | string[])*: True or false to return the _source field or not, or a list of fields to return. +** *`id` (string)*: A unique document identifier. +** *`index` (string)*: The name of the index that contains the document. +** *`preference` (Optional, string)*: The node or shard the operation should be performed on. +By default, the operation is randomized between the shard replicas. +** *`realtime` (Optional, boolean)*: If `true`, the request is real-time as opposed to near-real-time. +** *`refresh` (Optional, boolean)*: If `true`, the request refreshes the relevant shards before retrieving the document. +Setting it to `true` should be done after careful thought and verification that this does not cause a heavy load on the system (and slow down indexing). +** *`routing` (Optional, string)*: A custom value used to route operations to a specific shard. +** *`_source` (Optional, boolean | string | string[])*: Indicates whether to return the `_source` field (`true` or `false`) or lists the fields to return. ** *`_source_excludes` (Optional, string | string[])*: A list of source fields to exclude in the response. ** *`_source_includes` (Optional, string | string[])*: A list of source fields to include in the response. -** *`stored_fields` (Optional, string | string[])* -** *`version` (Optional, number)*: Explicit version number for concurrency control. The specified version must match the current version of the document for the request to succeed. -** *`version_type` (Optional, Enum("internal" | "external" | "external_gte" | "force"))*: Specific version type: internal, external, external_gte. +** *`stored_fields` (Optional, string | string[])*: A list of stored fields to return as part of a hit. +** *`version` (Optional, number)*: The version number for concurrency control. +It must match the current version of the document for the request to succeed. +** *`version_type` (Optional, Enum("internal" | "external" | "external_gte" | "force"))*: The version type. [discrete] === health_report @@ -568,10 +946,126 @@ client.healthReport({ ... }) [discrete] === index -Index a document. -Adds a JSON document to the specified data stream or index and makes it searchable. +Create or update a document in an index. + +Add a JSON document to the specified data stream or index and make it searchable. If the target is an index and the document already exists, the request updates the document and increments its version. +NOTE: You cannot use this API to send update requests for existing documents in a data stream. + +If the Elasticsearch security features are enabled, you must have the following index privileges for the target data stream, index, or index alias: + +* To add or overwrite a document using the `PUT //_doc/<_id>` request format, you must have the `create`, `index`, or `write` index privilege. +* To add a document using the `POST //_doc/` request format, you must have the `create_doc`, `create`, `index`, or `write` index privilege. +* To automatically create a data stream or index with this API request, you must have the `auto_configure`, `create_index`, or `manage` index privilege. + +Automatic data stream creation requires a matching index template with data stream enabled. + +NOTE: Replica shards might not all be started when an indexing operation returns successfully. +By default, only the primary is required. Set `wait_for_active_shards` to change this default behavior. + +**Automatically create data streams and indices** + +If the request's target doesn't exist and matches an index template with a `data_stream` definition, the index operation automatically creates the data stream. + +If the target doesn't exist and doesn't match a data stream template, the operation automatically creates the index and applies any matching index templates. + +NOTE: Elasticsearch includes several built-in index templates. To avoid naming collisions with these templates, refer to index pattern documentation. + +If no mapping exists, the index operation creates a dynamic mapping. +By default, new fields and objects are automatically added to the mapping if needed. + +Automatic index creation is controlled by the `action.auto_create_index` setting. +If it is `true`, any index can be created automatically. +You can modify this setting to explicitly allow or block automatic creation of indices that match specified patterns or set it to `false` to turn off automatic index creation entirely. +Specify a list of patterns you want to allow or prefix each pattern with `+` or `-` to indicate whether it should be allowed or blocked. +When a list is specified, the default behaviour is to disallow. + +NOTE: The `action.auto_create_index` setting affects the automatic creation of indices only. +It does not affect the creation of data streams. + +**Optimistic concurrency control** + +Index operations can be made conditional and only be performed if the last modification to the document was assigned the sequence number and primary term specified by the `if_seq_no` and `if_primary_term` parameters. +If a mismatch is detected, the operation will result in a `VersionConflictException` and a status code of `409`. + +**Routing** + +By default, shard placement — or routing — is controlled by using a hash of the document's ID value. +For more explicit control, the value fed into the hash function used by the router can be directly specified on a per-operation basis using the `routing` parameter. + +When setting up explicit mapping, you can also use the `_routing` field to direct the index operation to extract the routing value from the document itself. +This does come at the (very minimal) cost of an additional document parsing pass. +If the `_routing` mapping is defined and set to be required, the index operation will fail if no routing value is provided or extracted. + +NOTE: Data streams do not support custom routing unless they were created with the `allow_custom_routing` setting enabled in the template. + + * ** Distributed** + +The index operation is directed to the primary shard based on its route and performed on the actual node containing this shard. +After the primary shard completes the operation, if needed, the update is distributed to applicable replicas. + +**Active shards** + +To improve the resiliency of writes to the system, indexing operations can be configured to wait for a certain number of active shard copies before proceeding with the operation. +If the requisite number of active shard copies are not available, then the write operation must wait and retry, until either the requisite shard copies have started or a timeout occurs. +By default, write operations only wait for the primary shards to be active before proceeding (that is to say `wait_for_active_shards` is `1`). +This default can be overridden in the index settings dynamically by setting `index.write.wait_for_active_shards`. +To alter this behavior per operation, use the `wait_for_active_shards request` parameter. + +Valid values are all or any positive integer up to the total number of configured copies per shard in the index (which is `number_of_replicas`+1). +Specifying a negative value or a number greater than the number of shard copies will throw an error. + +For example, suppose you have a cluster of three nodes, A, B, and C and you create an index index with the number of replicas set to 3 (resulting in 4 shard copies, one more copy than there are nodes). +If you attempt an indexing operation, by default the operation will only ensure the primary copy of each shard is available before proceeding. +This means that even if B and C went down and A hosted the primary shard copies, the indexing operation would still proceed with only one copy of the data. +If `wait_for_active_shards` is set on the request to `3` (and all three nodes are up), the indexing operation will require 3 active shard copies before proceeding. +This requirement should be met because there are 3 active nodes in the cluster, each one holding a copy of the shard. +However, if you set `wait_for_active_shards` to `all` (or to `4`, which is the same in this situation), the indexing operation will not proceed as you do not have all 4 copies of each shard active in the index. +The operation will timeout unless a new node is brought up in the cluster to host the fourth copy of the shard. + +It is important to note that this setting greatly reduces the chances of the write operation not writing to the requisite number of shard copies, but it does not completely eliminate the possibility, because this check occurs before the write operation starts. +After the write operation is underway, it is still possible for replication to fail on any number of shard copies but still succeed on the primary. +The `_shards` section of the API response reveals the number of shard copies on which replication succeeded and failed. + +**No operation (noop) updates** + +When updating a document by using this API, a new version of the document is always created even if the document hasn't changed. +If this isn't acceptable use the `_update` API with `detect_noop` set to `true`. +The `detect_noop` option isn't available on this API because it doesn’t fetch the old source and isn't able to compare it against the new source. + +There isn't a definitive rule for when noop updates aren't acceptable. +It's a combination of lots of factors like how frequently your data source sends updates that are actually noops and how many queries per second Elasticsearch runs on the shard receiving the updates. + +**Versioning** + +Each indexed document is given a version number. +By default, internal versioning is used that starts at 1 and increments with each update, deletes included. +Optionally, the version number can be set to an external value (for example, if maintained in a database). +To enable this functionality, `version_type` should be set to `external`. +The value provided must be a numeric, long value greater than or equal to 0, and less than around `9.2e+18`. + +NOTE: Versioning is completely real time, and is not affected by the near real time aspects of search operations. +If no version is provided, the operation runs without any version checks. + +When using the external version type, the system checks to see if the version number passed to the index request is greater than the version of the currently stored document. +If true, the document will be indexed and the new version number used. +If the value provided is less than or equal to the stored document's version number, a version conflict will occur and the index operation will fail. For example: + +``` +PUT my-index-000001/_doc/1?version=2&version_type=external +{ + "user": { + "id": "elkbee" + } +} + +In this example, the operation will succeed since the supplied version of 2 is higher than the current document version of 1. +If the document was already updated and its version was set to 2 or higher, the indexing command will fail and result in a conflict (409 HTTP status code). + +A nice side effect is that there is no need to maintain strict ordering of async indexing operations run as a result of changes to a source database, as long as version numbers from the source database are used. +Even the simple case of updating the Elasticsearch index using data from a database is simplified if external versioning is used, as only the latest version will be used if the index operations arrive out of order. + {ref}/docs-index_.html[Endpoint documentation] [source,ts] ---- @@ -581,29 +1075,40 @@ client.index({ index }) ==== Arguments * *Request (object):* -** *`index` (string)*: Name of the data stream or index to target. -** *`id` (Optional, string)*: Unique identifier for the document. +** *`index` (string)*: The name of the data stream or index to target. +If the target doesn't exist and matches the name or wildcard (`*`) pattern of an index template with a `data_stream` definition, this request creates the data stream. +If the target doesn't exist and doesn't match a data stream template, this request creates the index. +You can check for existing targets with the resolve index API. +** *`id` (Optional, string)*: A unique identifier for the document. +To automatically generate a document ID, use the `POST //_doc/` request format and omit this parameter. ** *`document` (Optional, object)*: A document. ** *`if_primary_term` (Optional, number)*: Only perform the operation if the document has this primary term. ** *`if_seq_no` (Optional, number)*: Only perform the operation if the document has this sequence number. -** *`op_type` (Optional, Enum("index" | "create"))*: Set to create to only index the document if it does not already exist (put if absent). +** *`op_type` (Optional, Enum("index" | "create"))*: Set to `create` to only index the document if it does not already exist (put if absent). If a document with the specified `_id` already exists, the indexing operation will fail. -Same as using the `/_create` endpoint. -Valid values: `index`, `create`. -If document id is specified, it defaults to `index`. +The behavior is the same as using the `/_create` endpoint. +If a document ID is specified, this paramater defaults to `index`. Otherwise, it defaults to `create`. -** *`pipeline` (Optional, string)*: ID of the pipeline to use to preprocess incoming documents. -If the index has a default ingest pipeline specified, then setting the value to `_none` disables the default ingest pipeline for this request. -If a final pipeline is configured it will always run, regardless of the value of this parameter. -** *`refresh` (Optional, Enum(true | false | "wait_for"))*: If `true`, Elasticsearch refreshes the affected shards to make this operation visible to search, if `wait_for` then wait for a refresh to make this operation visible to search, if `false` do nothing with refreshes. -Valid values: `true`, `false`, `wait_for`. -** *`routing` (Optional, string)*: Custom value used to route operations to a specific shard. -** *`timeout` (Optional, string | -1 | 0)*: Period the request waits for the following operations: automatic index creation, dynamic mapping updates, waiting for active shards. -** *`version` (Optional, number)*: Explicit version number for concurrency control. -The specified version must match the current version of the document for the request to succeed. -** *`version_type` (Optional, Enum("internal" | "external" | "external_gte" | "force"))*: Specific version type: `external`, `external_gte`. +If the request targets a data stream, an `op_type` of `create` is required. +** *`pipeline` (Optional, string)*: The ID of the pipeline to use to preprocess incoming documents. +If the index has a default ingest pipeline specified, then setting the value to `_none` disables the default ingest pipeline for this request. +If a final pipeline is configured it will always run, regardless of the value of this parameter. +** *`refresh` (Optional, Enum(true | false | "wait_for"))*: If `true`, Elasticsearch refreshes the affected shards to make this operation visible to search. +If `wait_for`, it waits for a refresh to make this operation visible to search. +If `false`, it does nothing with refreshes. +** *`routing` (Optional, string)*: A custom value that is used to route operations to a specific shard. +** *`timeout` (Optional, string | -1 | 0)*: The period the request waits for the following operations: automatic index creation, dynamic mapping updates, waiting for active shards. + +This parameter is useful for situations where the primary shard assigned to perform the operation might not be available when the operation runs. +Some reasons for this might be that the primary shard is currently recovering from a gateway or undergoing relocation. +By default, the operation will wait on the primary shard to become available for at least 1 minute before failing and responding with an error. +The actual wait time could be longer, particularly when multiple waits occur. +** *`version` (Optional, number)*: An explicit version number for concurrency control. +It must be a non-negative long number. +** *`version_type` (Optional, Enum("internal" | "external" | "external_gte" | "force"))*: The version type. ** *`wait_for_active_shards` (Optional, number | Enum("all" | "index-setting"))*: The number of shard copies that must be active before proceeding with the operation. -Set to all or any positive integer up to the total number of shards in the index (`number_of_replicas+1`). +You can set it to `all` or any positive integer up to the total number of shards in the index (`number_of_replicas+1`). +The default value of `1` means it waits for each primary shard to be active. ** *`require_alias` (Optional, boolean)*: If `true`, the destination must be an index alias. [discrete] @@ -903,7 +1408,187 @@ To target all data streams and indices in a cluster, omit this parameter or use [discrete] === reindex Reindex documents. -Copies documents from a source to a destination. The source can be any existing index, alias, or data stream. The destination must differ from the source. For example, you cannot reindex a data stream into itself. + +Copy documents from a source to a destination. +You can copy all documents to the destination index or reindex a subset of the documents. +The source can be any existing index, alias, or data stream. +The destination must differ from the source. +For example, you cannot reindex a data stream into itself. + +IMPORTANT: Reindex requires `_source` to be enabled for all documents in the source. +The destination should be configured as wanted before calling the reindex API. +Reindex does not copy the settings from the source or its associated template. +Mappings, shard counts, and replicas, for example, must be configured ahead of time. + +If the Elasticsearch security features are enabled, you must have the following security privileges: + +* The `read` index privilege for the source data stream, index, or alias. +* The `write` index privilege for the destination data stream, index, or index alias. +* To automatically create a data stream or index with a reindex API request, you must have the `auto_configure`, `create_index`, or `manage` index privilege for the destination data stream, index, or alias. +* If reindexing from a remote cluster, the `source.remote.user` must have the `monitor` cluster privilege and the `read` index privilege for the source data stream, index, or alias. + +If reindexing from a remote cluster, you must explicitly allow the remote host in the `reindex.remote.whitelist` setting. +Automatic data stream creation requires a matching index template with data stream enabled. + +The `dest` element can be configured like the index API to control optimistic concurrency control. +Omitting `version_type` or setting it to `internal` causes Elasticsearch to blindly dump documents into the destination, overwriting any that happen to have the same ID. + +Setting `version_type` to `external` causes Elasticsearch to preserve the `version` from the source, create any documents that are missing, and update any documents that have an older version in the destination than they do in the source. + +Setting `op_type` to `create` causes the reindex API to create only missing documents in the destination. +All existing documents will cause a version conflict. + +IMPORTANT: Because data streams are append-only, any reindex request to a destination data stream must have an `op_type` of `create`. +A reindex can only add new documents to a destination data stream. +It cannot update existing documents in a destination data stream. + +By default, version conflicts abort the reindex process. +To continue reindexing if there are conflicts, set the `conflicts` request body property to `proceed`. +In this case, the response includes a count of the version conflicts that were encountered. +Note that the handling of other error types is unaffected by the `conflicts` property. +Additionally, if you opt to count version conflicts, the operation could attempt to reindex more documents from the source than `max_docs` until it has successfully indexed `max_docs` documents into the target or it has gone through every document in the source query. + +NOTE: The reindex API makes no effort to handle ID collisions. +The last document written will "win" but the order isn't usually predictable so it is not a good idea to rely on this behavior. +Instead, make sure that IDs are unique by using a script. + +**Running reindex asynchronously** + +If the request contains `wait_for_completion=false`, Elasticsearch performs some preflight checks, launches the request, and returns a task you can use to cancel or get the status of the task. +Elasticsearch creates a record of this task as a document at `_tasks/`. + +**Reindex from multiple sources** + +If you have many sources to reindex it is generally better to reindex them one at a time rather than using a glob pattern to pick up multiple sources. +That way you can resume the process if there are any errors by removing the partially completed source and starting over. +It also makes parallelizing the process fairly simple: split the list of sources to reindex and run each list in parallel. + +For example, you can use a bash script like this: + +``` +for index in i1 i2 i3 i4 i5; do + curl -HContent-Type:application/json -XPOST localhost:9200/_reindex?pretty -d'{ + "source": { + "index": "'$index'" + }, + "dest": { + "index": "'$index'-reindexed" + } + }' +done +``` + +** Throttling** + +Set `requests_per_second` to any positive decimal number (`1.4`, `6`, `1000`, for example) to throttle the rate at which reindex issues batches of index operations. +Requests are throttled by padding each batch with a wait time. +To turn off throttling, set `requests_per_second` to `-1`. + +The throttling is done by waiting between batches so that the scroll that reindex uses internally can be given a timeout that takes into account the padding. +The padding time is the difference between the batch size divided by the `requests_per_second` and the time spent writing. +By default the batch size is `1000`, so if `requests_per_second` is set to `500`: + +``` +target_time = 1000 / 500 per second = 2 seconds +wait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds +``` + +Since the batch is issued as a single bulk request, large batch sizes cause Elasticsearch to create many requests and then wait for a while before starting the next set. +This is "bursty" instead of "smooth". + +**Slicing** + +Reindex supports sliced scroll to parallelize the reindexing process. +This parallelization can improve efficiency and provide a convenient way to break the request down into smaller parts. + +NOTE: Reindexing from remote clusters does not support manual or automatic slicing. + +You can slice a reindex request manually by providing a slice ID and total number of slices to each request. +You can also let reindex automatically parallelize by using sliced scroll to slice on `_id`. +The `slices` parameter specifies the number of slices to use. + +Adding `slices` to the reindex request just automates the manual process, creating sub-requests which means it has some quirks: + +* You can see these requests in the tasks API. These sub-requests are "child" tasks of the task for the request with slices. +* Fetching the status of the task for the request with `slices` only contains the status of completed slices. +* These sub-requests are individually addressable for things like cancellation and rethrottling. +* Rethrottling the request with `slices` will rethrottle the unfinished sub-request proportionally. +* Canceling the request with `slices` will cancel each sub-request. +* Due to the nature of `slices`, each sub-request won't get a perfectly even portion of the documents. All documents will be addressed, but some slices may be larger than others. Expect larger slices to have a more even distribution. +* Parameters like `requests_per_second` and `max_docs` on a request with `slices` are distributed proportionally to each sub-request. Combine that with the previous point about distribution being uneven and you should conclude that using `max_docs` with `slices` might not result in exactly `max_docs` documents being reindexed. +* Each sub-request gets a slightly different snapshot of the source, though these are all taken at approximately the same time. + +If slicing automatically, setting `slices` to `auto` will choose a reasonable number for most indices. +If slicing manually or otherwise tuning automatic slicing, use the following guidelines. + +Query performance is most efficient when the number of slices is equal to the number of shards in the index. +If that number is large (for example, `500`), choose a lower number as too many slices will hurt performance. +Setting slices higher than the number of shards generally does not improve efficiency and adds overhead. + +Indexing performance scales linearly across available resources with the number of slices. + +Whether query or indexing performance dominates the runtime depends on the documents being reindexed and cluster resources. + +**Modify documents during reindexing** + +Like `_update_by_query`, reindex operations support a script that modifies the document. +Unlike `_update_by_query`, the script is allowed to modify the document's metadata. + +Just as in `_update_by_query`, you can set `ctx.op` to change the operation that is run on the destination. +For example, set `ctx.op` to `noop` if your script decides that the document doesn’t have to be indexed in the destination. This "no operation" will be reported in the `noop` counter in the response body. +Set `ctx.op` to `delete` if your script decides that the document must be deleted from the destination. +The deletion will be reported in the `deleted` counter in the response body. +Setting `ctx.op` to anything else will return an error, as will setting any other field in `ctx`. + +Think of the possibilities! Just be careful; you are able to change: + +* `_id` +* `_index` +* `_version` +* `_routing` + +Setting `_version` to `null` or clearing it from the `ctx` map is just like not sending the version in an indexing request. +It will cause the document to be overwritten in the destination regardless of the version on the target or the version type you use in the reindex API. + +**Reindex from remote** + +Reindex supports reindexing from a remote Elasticsearch cluster. +The `host` parameter must contain a scheme, host, port, and optional path. +The `username` and `password` parameters are optional and when they are present the reindex operation will connect to the remote Elasticsearch node using basic authentication. +Be sure to use HTTPS when using basic authentication or the password will be sent in plain text. +There are a range of settings available to configure the behavior of the HTTPS connection. + +When using Elastic Cloud, it is also possible to authenticate against the remote cluster through the use of a valid API key. +Remote hosts must be explicitly allowed with the `reindex.remote.whitelist` setting. +It can be set to a comma delimited list of allowed remote host and port combinations. +Scheme is ignored; only the host and port are used. +For example: + +``` +reindex.remote.whitelist: [otherhost:9200, another:9200, 127.0.10.*:9200, localhost:*"] +``` + +The list of allowed hosts must be configured on any nodes that will coordinate the reindex. +This feature should work with remote clusters of any version of Elasticsearch. +This should enable you to upgrade from any version of Elasticsearch to the current version by reindexing from a cluster of the old version. + +WARNING: Elasticsearch does not support forward compatibility across major versions. +For example, you cannot reindex from a 7.x cluster into a 6.x cluster. + +To enable queries sent to older versions of Elasticsearch, the `query` parameter is sent directly to the remote host without validation or modification. + +NOTE: Reindexing from remote clusters does not support manual or automatic slicing. + +Reindexing from a remote server uses an on-heap buffer that defaults to a maximum size of 100mb. +If the remote index includes very large documents you'll need to use a smaller batch size. +It is also possible to set the socket read timeout on the remote connection with the `socket_timeout` field and the connection timeout with the `connect_timeout` field. +Both default to 30 seconds. + +**Configuring SSL parameters** + +Reindex from remote supports configurable SSL settings. +These must be specified in the `elasticsearch.yml` file, with the exception of the secure settings, which you add in the Elasticsearch keystore. +It is not possible to configure SSL in the body of the reindex request. {ref}/docs-reindex.html[Endpoint documentation] [source,ts] @@ -916,19 +1601,35 @@ client.reindex({ dest, source }) * *Request (object):* ** *`dest` ({ index, op_type, pipeline, routing, version_type })*: The destination you are copying to. ** *`source` ({ index, query, remote, size, slice, sort, _source, runtime_mappings })*: The source you are copying from. -** *`conflicts` (Optional, Enum("abort" | "proceed"))*: Set to proceed to continue reindexing even if there are conflicts. +** *`conflicts` (Optional, Enum("abort" | "proceed"))*: Indicates whether to continue reindexing even when there are conflicts. ** *`max_docs` (Optional, number)*: The maximum number of documents to reindex. +By default, all documents are reindexed. +If it is a value less then or equal to `scroll_size`, a scroll will not be used to retrieve the results for the operation. + +If `conflicts` is set to `proceed`, the reindex operation could attempt to reindex more documents from the source than `max_docs` until it has successfully indexed `max_docs` documents into the target or it has gone through every document in the source query. ** *`script` (Optional, { source, id, params, lang, options })*: The script to run to update the document source or metadata when reindexing. ** *`size` (Optional, number)* ** *`refresh` (Optional, boolean)*: If `true`, the request refreshes affected shards to make this operation visible to search. ** *`requests_per_second` (Optional, float)*: The throttle for this request in sub-requests per second. -Defaults to no throttle. -** *`scroll` (Optional, string | -1 | 0)*: Specifies how long a consistent view of the index should be maintained for scrolled search. +By default, there is no throttle. +** *`scroll` (Optional, string | -1 | 0)*: The period of time that a consistent view of the index should be maintained for scrolled search. ** *`slices` (Optional, number | Enum("auto"))*: The number of slices this task should be divided into. -Defaults to 1 slice, meaning the task isn’t sliced into subtasks. -** *`timeout` (Optional, string | -1 | 0)*: Period each indexing waits for automatic index creation, dynamic mapping updates, and waiting for active shards. +It defaults to one slice, which means the task isn't sliced into subtasks. + +Reindex supports sliced scroll to parallelize the reindexing process. +This parallelization can improve efficiency and provide a convenient way to break the request down into smaller parts. + +NOTE: Reindexing from remote clusters does not support manual or automatic slicing. + +If set to `auto`, Elasticsearch chooses the number of slices to use. +This setting will use one slice per shard, up to a certain limit. +If there are multiple sources, it will choose the number of slices based on the index or backing index with the smallest number of shards. +** *`timeout` (Optional, string | -1 | 0)*: The period each indexing waits for automatic index creation, dynamic mapping updates, and waiting for active shards. +By default, Elasticsearch waits for at least one minute before failing. +The actual wait time could be longer, particularly when multiple waits occur. ** *`wait_for_active_shards` (Optional, number | Enum("all" | "index-setting"))*: The number of shard copies that must be active before proceeding with the operation. -Set to `all` or any positive integer up to the total number of shards in the index (`number_of_replicas+1`). +Set it to `all` or any positive integer up to the total number of shards in the index (`number_of_replicas+1`). +The default value is one, which means it waits for each primary shard to be active. ** *`wait_for_completion` (Optional, boolean)*: If `true`, the request blocks until the operation is complete. ** *`require_alias` (Optional, boolean)*: If `true`, the destination must be an index alias. @@ -937,6 +1638,15 @@ Set to `all` or any positive integer up to the total number of shards in the ind Throttle a reindex operation. Change the number of requests per second for a particular reindex operation. +For example: + +``` +POST _reindex/r1A2WoRbTwKZ516z6NEs5A:36619/_rethrottle?requests_per_second=-1 +``` + +Rethrottling that speeds up the query takes effect immediately. +Rethrottling that slows down the query will take effect after completing the current batch. +This behavior prevents scroll timeouts. {ref}/docs-reindex.html[Endpoint documentation] [source,ts] @@ -947,8 +1657,9 @@ client.reindexRethrottle({ task_id }) ==== Arguments * *Request (object):* -** *`task_id` (string)*: Identifier for the task. +** *`task_id` (string)*: The task identifier, which can be found by using the tasks API. ** *`requests_per_second` (Optional, float)*: The throttle for this request in sub-requests per second. +It can be either `-1` to turn off throttling or any decimal number like `1.7` or `12` to throttle to that level. [discrete] === render_search_template @@ -1386,7 +2097,24 @@ Random by default. [discrete] === update Update a document. -Updates a document by running a script or passing a partial document. + +Update a document by running a script or passing a partial document. + +If the Elasticsearch security features are enabled, you must have the `index` or `write` index privilege for the target index or index alias. + +The script can update, delete, or skip modifying the document. +The API also supports passing a partial document, which is merged into the existing document. +To fully replace an existing document, use the index API. +This operation: + +* Gets the document (collocated with the shard) from the index. +* Runs the specified script. +* Indexes the result. + +The document must still be reindexed, but using this API removes some network roundtrips and reduces chances of version conflicts between the GET and the index operation. + +The `_source` field must be enabled to use this API. +In addition to `_source`, you can access the following variables through the `ctx` map: `_index`, `_type`, `_id`, `_version`, `_routing`, and `_now` (the current timestamp). {ref}/docs-update.html[Endpoint documentation] [source,ts] @@ -1397,35 +2125,37 @@ client.update({ id, index }) ==== Arguments * *Request (object):* -** *`id` (string)*: Document ID -** *`index` (string)*: The name of the index -** *`detect_noop` (Optional, boolean)*: Set to false to disable setting 'result' in the response -to 'noop' if no change to the document occurred. +** *`id` (string)*: A unique identifier for the document to be updated. +** *`index` (string)*: The name of the target index. +By default, the index is created automatically if it doesn't exist. +** *`detect_noop` (Optional, boolean)*: If `true`, the `result` in the response is set to `noop` (no operation) when there are no changes to the document. ** *`doc` (Optional, object)*: A partial update to an existing document. -** *`doc_as_upsert` (Optional, boolean)*: Set to true to use the contents of 'doc' as the value of 'upsert' -** *`script` (Optional, { source, id, params, lang, options })*: Script to execute to update the document. -** *`scripted_upsert` (Optional, boolean)*: Set to true to execute the script whether or not the document exists. -** *`_source` (Optional, boolean | { excludes, includes })*: Set to false to disable source retrieval. You can also specify a comma-separated -list of the fields you want to retrieve. -** *`upsert` (Optional, object)*: If the document does not already exist, the contents of 'upsert' are inserted as a -new document. If the document exists, the 'script' is executed. +If both `doc` and `script` are specified, `doc` is ignored. +** *`doc_as_upsert` (Optional, boolean)*: If `true`, use the contents of 'doc' as the value of 'upsert'. +NOTE: Using ingest pipelines with `doc_as_upsert` is not supported. +** *`script` (Optional, { source, id, params, lang, options })*: The script to run to update the document. +** *`scripted_upsert` (Optional, boolean)*: If `true`, run the script whether or not the document exists. +** *`_source` (Optional, boolean | { excludes, includes })*: If `false`, turn off source retrieval. +You can also specify a list of the fields you want to retrieve. +** *`upsert` (Optional, object)*: If the document does not already exist, the contents of 'upsert' are inserted as a new document. +If the document exists, the 'script' is run. ** *`if_primary_term` (Optional, number)*: Only perform the operation if the document has this primary term. ** *`if_seq_no` (Optional, number)*: Only perform the operation if the document has this sequence number. ** *`lang` (Optional, string)*: The script language. -** *`refresh` (Optional, Enum(true | false | "wait_for"))*: If 'true', Elasticsearch refreshes the affected shards to make this operation -visible to search, if 'wait_for' then wait for a refresh to make this operation -visible to search, if 'false' do nothing with refreshes. -** *`require_alias` (Optional, boolean)*: If true, the destination must be an index alias. -** *`retry_on_conflict` (Optional, number)*: Specify how many times should the operation be retried when a conflict occurs. -** *`routing` (Optional, string)*: Custom value used to route operations to a specific shard. -** *`timeout` (Optional, string | -1 | 0)*: Period to wait for dynamic mapping updates and active shards. -This guarantees Elasticsearch waits for at least the timeout before failing. +** *`refresh` (Optional, Enum(true | false | "wait_for"))*: If 'true', Elasticsearch refreshes the affected shards to make this operation visible to search. +If 'wait_for', it waits for a refresh to make this operation visible to search. +If 'false', it does nothing with refreshes. +** *`require_alias` (Optional, boolean)*: If `true`, the destination must be an index alias. +** *`retry_on_conflict` (Optional, number)*: The number of times the operation should be retried when a conflict occurs. +** *`routing` (Optional, string)*: A custom value used to route operations to a specific shard. +** *`timeout` (Optional, string | -1 | 0)*: The period to wait for the following operations: dynamic mapping updates and waiting for active shards. +Elasticsearch waits for at least the timeout period before failing. The actual wait time could be longer, particularly when multiple waits occur. -** *`wait_for_active_shards` (Optional, number | Enum("all" | "index-setting"))*: The number of shard copies that must be active before proceeding with the operations. -Set to 'all' or any positive integer up to the total number of shards in the index -(number_of_replicas+1). Defaults to 1 meaning the primary shard. -** *`_source_excludes` (Optional, string | string[])*: Specify the source fields you want to exclude. -** *`_source_includes` (Optional, string | string[])*: Specify the source fields you want to retrieve. +** *`wait_for_active_shards` (Optional, number | Enum("all" | "index-setting"))*: The number of copies of each shard that must be active before proceeding with the operation. +Set to 'all' or any positive integer up to the total number of shards in the index (`number_of_replicas`+1). +The default value of `1` means it waits for each primary shard to be active. +** *`_source_excludes` (Optional, string | string[])*: The source fields you want to exclude. +** *`_source_includes` (Optional, string | string[])*: The source fields you want to retrieve. [discrete] === update_by_query @@ -4038,25 +4768,100 @@ client.eql.search({ index, query }) === esql [discrete] ==== async_query -Executes an ESQL request asynchronously +Run an async ES|QL query. +Asynchronously run an ES|QL (Elasticsearch query language) query, monitor its progress, and retrieve results when they become available. + +The API accepts the same parameters and request body as the synchronous query API, along with additional async related properties. {ref}/esql-async-query-api.html[Endpoint documentation] [source,ts] ---- -client.esql.asyncQuery() +client.esql.asyncQuery({ query }) +---- + +[discrete] +==== Arguments + +* *Request (object):* +** *`query` (string)*: The ES|QL query API accepts an ES|QL query string in the query parameter, runs it, and returns the results. +** *`columnar` (Optional, boolean)*: By default, ES|QL returns results as rows. For example, FROM returns each individual document as one row. For the JSON, YAML, CBOR and smile formats, ES|QL can return the results in a columnar fashion where one row represents all the values of a certain column in the results. +** *`filter` (Optional, { bool, boosting, common, combined_fields, constant_score, dis_max, distance_feature, exists, function_score, fuzzy, geo_bounding_box, geo_distance, geo_polygon, geo_shape, has_child, has_parent, ids, intervals, knn, match, match_all, match_bool_prefix, match_none, match_phrase, match_phrase_prefix, more_like_this, multi_match, nested, parent_id, percolate, pinned, prefix, query_string, range, rank_feature, regexp, rule, script, script_score, semantic, shape, simple_query_string, span_containing, span_field_masking, span_first, span_multi, span_near, span_not, span_or, span_term, span_within, sparse_vector, term, terms, terms_set, text_expansion, weighted_tokens, wildcard, wrapper, type })*: Specify a Query DSL query in the filter parameter to filter the set of documents that an ES|QL query runs on. +** *`locale` (Optional, string)* +** *`params` (Optional, number | number | string | boolean | null | User-defined value[])*: To avoid any attempts of hacking or code injection, extract the values in a separate list of parameters. Use question mark placeholders (?) in the query string for each of the parameters. +** *`profile` (Optional, boolean)*: If provided and `true` the response will include an extra `profile` object +with information on how the query was executed. This information is for human debugging +and its format can change at any time but it can give some insight into the performance +of each part of the query. +** *`tables` (Optional, Record>)*: Tables to use with the LOOKUP operation. The top level key is the table +name and the next level key is the column name. +** *`delimiter` (Optional, string)*: The character to use between values within a CSV row. +It is valid only for the CSV format. +** *`drop_null_columns` (Optional, boolean)*: Indicates whether columns that are entirely `null` will be removed from the `columns` and `values` portion of the results. +If `true`, the response will include an extra section under the name `all_columns` which has the name of all the columns. +** *`format` (Optional, Enum("csv" | "json" | "tsv" | "txt" | "yaml" | "cbor" | "smile" | "arrow"))*: A short version of the Accept header, for example `json` or `yaml`. +** *`keep_alive` (Optional, string | -1 | 0)*: The period for which the query and its results are stored in the cluster. +The default period is five days. +When this period expires, the query and its results are deleted, even if the query is still ongoing. +If the `keep_on_completion` parameter is false, Elasticsearch only stores async queries that do not complete within the period set by the `wait_for_completion_timeout` parameter, regardless of this value. +** *`keep_on_completion` (Optional, boolean)*: Indicates whether the query and its results are stored in the cluster. +If false, the query and its results are stored in the cluster only if the request does not complete during the period set by the `wait_for_completion_timeout` parameter. +** *`wait_for_completion_timeout` (Optional, string | -1 | 0)*: The period to wait for the request to finish. +By default, the request waits for 1 second for the query results. +If the query completes during this period, results are returned +Otherwise, a query ID is returned that can later be used to retrieve the results. + +[discrete] +==== async_query_delete +Delete an async ES|QL query. +If the query is still running, it is cancelled. +Otherwise, the stored results are deleted. + +If the Elasticsearch security features are enabled, only the following users can use this API to delete a query: + +* The authenticated user that submitted the original query request +* Users with the `cancel_task` cluster privilege + +{ref}/esql-async-query-delete-api.html[Endpoint documentation] +[source,ts] +---- +client.esql.asyncQueryDelete({ id }) ---- +[discrete] +==== Arguments + +* *Request (object):* +** *`id` (string)*: The unique identifier of the query. +A query ID is provided in the ES|QL async query API response for a query that does not complete in the designated time. +A query ID is also provided when the request was submitted with the `keep_on_completion` parameter set to `true`. [discrete] ==== async_query_get -Retrieves the results of a previously submitted async query request given its ID. +Get async ES|QL query results. +Get the current status and available results or stored results for an ES|QL asynchronous query. +If the Elasticsearch security features are enabled, only the user who first submitted the ES|QL query can retrieve the results using this API. {ref}/esql-async-query-get-api.html[Endpoint documentation] [source,ts] ---- -client.esql.asyncQueryGet() +client.esql.asyncQueryGet({ id }) ---- +[discrete] +==== Arguments + +* *Request (object):* +** *`id` (string)*: The unique identifier of the query. +A query ID is provided in the ES|QL async query API response for a query that does not complete in the designated time. +A query ID is also provided when the request was submitted with the `keep_on_completion` parameter set to `true`. +** *`drop_null_columns` (Optional, boolean)*: Indicates whether columns that are entirely `null` will be removed from the `columns` and `values` portion of the results. +If `true`, the response will include an extra section under the name `all_columns` which has the name of all the columns. +** *`keep_alive` (Optional, string | -1 | 0)*: The period for which the query and its results are stored in the cluster. +When this period expires, the query and its results are deleted, even if the query is still ongoing. +** *`wait_for_completion_timeout` (Optional, string | -1 | 0)*: The period to wait for the request to finish. +By default, the request waits for complete query results. +If the request completes during the period specified in this parameter, complete query results are returned. +Otherwise, the response returns an `is_running` value of `true` and no results. [discrete] ==== query @@ -5379,6 +6184,8 @@ such as open,hidden. ==== get_alias Get aliases. Retrieves information for one or more data stream or index aliases. + +{ref}/indices-get-alias.html[Endpoint documentation] [source,ts] ---- client.indices.getAlias({ ... }) @@ -6782,12 +7589,54 @@ client.inference.put({ inference_id }) [discrete] ==== stream_inference -Perform streaming inference +Perform streaming inference. +Get real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation. +This API works only with the completion task type. + +IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs. + +This API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming. + +{ref}/stream-inference-api.html[Endpoint documentation] +[source,ts] +---- +client.inference.streamInference({ inference_id, input }) +---- + +[discrete] +==== Arguments + +* *Request (object):* +** *`inference_id` (string)*: The unique identifier for the inference endpoint. +** *`input` (string | string[])*: The text on which you want to perform the inference task. +It can be a single string or an array. + +NOTE: Inference endpoints for the completion task type currently only support a single string as input. +** *`task_type` (Optional, Enum("sparse_embedding" | "text_embedding" | "rerank" | "completion"))*: The type of task that the model performs. + +[discrete] +==== update +Update an inference endpoint. + +Modify `task_settings`, secrets (within `service_settings`), or `num_allocations` for an inference endpoint, depending on the specific endpoint service and `task_type`. + +IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. +For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. +However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs. + +{ref}/update-inference-api.html[Endpoint documentation] [source,ts] ---- -client.inference.streamInference() +client.inference.update({ inference_id }) ---- +[discrete] +==== Arguments + +* *Request (object):* +** *`inference_id` (string)*: The unique identifier of the inference endpoint. +** *`task_type` (Optional, Enum("sparse_embedding" | "text_embedding" | "rerank" | "completion"))*: The type of inference task that the model performs. +** *`inference_config` (Optional, { service, service_settings, task_settings })* [discrete] === ingest @@ -10266,6 +11115,17 @@ Activate a user profile. Create or update a user profile on behalf of another user. +NOTE: The user profile feature is designed only for use by Kibana and Elastic's Observability, Enterprise Search, and Elastic Security solutions. +Individual users and external applications should not call this API directly. +The calling application must have either an `access_token` or a combination of `username` and `password` for the user that the profile document is intended for. +Elastic reserves the right to change or remove this feature in future releases without prior notice. + +This API creates or updates a profile document for end users with information that is extracted from the user's authentication object including `username`, `full_name,` `roles`, and the authentication realm. +For example, in the JWT `access_token` case, the profile user's `username` is extracted from the JWT token claim pointed to by the `claims.principal` setting of the JWT realm that authenticated the token. + +When updating a profile document, the API enables the document if it was disabled. +Any updates do not change existing content for either the `labels` or `data` fields. + {ref}/security-api-activate-user-profile.html[Endpoint documentation] [source,ts] ---- @@ -10276,10 +11136,17 @@ client.security.activateUserProfile({ grant_type }) ==== Arguments * *Request (object):* -** *`grant_type` (Enum("password" | "access_token"))* -** *`access_token` (Optional, string)* -** *`password` (Optional, string)* -** *`username` (Optional, string)* +** *`grant_type` (Enum("password" | "access_token"))*: The type of grant. +** *`access_token` (Optional, string)*: The user's Elasticsearch access token or JWT. +Both `access` and `id` JWT token types are supported and they depend on the underlying JWT realm configuration. +If you specify the `access_token` grant type, this parameter is required. +It is not valid with other grant types. +** *`password` (Optional, string)*: The user's password. +If you specify the `password` grant type, this parameter is required. +It is not valid with other grant types. +** *`username` (Optional, string)*: The username that identifies the user. +If you specify the `password` grant type, this parameter is required. +It is not valid with other grant types. [discrete] ==== authenticate @@ -10740,6 +11607,13 @@ Disable a user profile. Disable user profiles so that they are not visible in user profile searches. +NOTE: The user profile feature is designed only for use by Kibana and Elastic's Observability, Enterprise Search, and Elastic Security solutions. +Individual users and external applications should not call this API directly. +Elastic reserves the right to change or remove this feature in future releases without prior notice. + +When you activate a user profile, its automatically enabled and visible in user profile searches. You can use the disable user profile API to disable a user profile so it’s not visible in these searches. +To re-enable a disabled user profile, use the enable user profile API . + {ref}/security-api-disable-user-profile.html[Endpoint documentation] [source,ts] ---- @@ -10751,9 +11625,9 @@ client.security.disableUserProfile({ uid }) * *Request (object):* ** *`uid` (string)*: Unique identifier for the user profile. -** *`refresh` (Optional, Enum(true | false | "wait_for"))*: If 'true', Elasticsearch refreshes the affected shards to make this operation -visible to search, if 'wait_for' then wait for a refresh to make this operation -visible to search, if 'false' do nothing with refreshes. +** *`refresh` (Optional, Enum(true | false | "wait_for"))*: If 'true', Elasticsearch refreshes the affected shards to make this operation visible to search. +If 'wait_for', it waits for a refresh to make this operation visible to search. +If 'false', it does nothing with refreshes. [discrete] ==== enable_user @@ -10780,6 +11654,13 @@ Enable a user profile. Enable user profiles to make them visible in user profile searches. +NOTE: The user profile feature is designed only for use by Kibana and Elastic's Observability, Enterprise Search, and Elastic Security solutions. +Individual users and external applications should not call this API directly. +Elastic reserves the right to change or remove this feature in future releases without prior notice. + +When you activate a user profile, it's automatically enabled and visible in user profile searches. +If you later disable the user profile, you can use the enable user profile API to make the profile visible in these searches again. + {ref}/security-api-enable-user-profile.html[Endpoint documentation] [source,ts] ---- @@ -10790,10 +11671,11 @@ client.security.enableUserProfile({ uid }) ==== Arguments * *Request (object):* -** *`uid` (string)*: Unique identifier for the user profile. +** *`uid` (string)*: A unique identifier for the user profile. ** *`refresh` (Optional, Enum(true | false | "wait_for"))*: If 'true', Elasticsearch refreshes the affected shards to make this operation -visible to search, if 'wait_for' then wait for a refresh to make this operation -visible to search, if 'false' do nothing with refreshes. +visible to search. +If 'wait_for', it waits for a refresh to make this operation visible to search. +If 'false', nothing is done with refreshes. [discrete] ==== enroll_kibana @@ -10965,14 +11847,21 @@ client.security.getServiceCredentials({ namespace, service }) [discrete] ==== get_settings -Retrieve settings for the security system indices +Get security index settings. +Get the user-configurable settings for the security internal index (`.security` and associated indices). {ref}/security-api-get-settings.html[Endpoint documentation] [source,ts] ---- -client.security.getSettings() +client.security.getSettings({ ... }) ---- +[discrete] +==== Arguments + +* *Request (object):* +** *`master_timeout` (Optional, string | -1 | 0)*: Period to wait for a connection to the master node. +If no response is received before the timeout expires, the request fails and returns an error. [discrete] ==== get_token @@ -11040,6 +11929,10 @@ Get a user profile. Get a user's profile using the unique profile ID. +NOTE: The user profile feature is designed only for use by Kibana and Elastic's Observability, Enterprise Search, and Elastic Security solutions. +Individual users and external applications should not call this API directly. +Elastic reserves the right to change or remove this feature in future releases without prior notice. + {ref}/security-api-get-user-profile.html[Endpoint documentation] [source,ts] ---- @@ -11051,9 +11944,9 @@ client.security.getUserProfile({ uid }) * *Request (object):* ** *`uid` (string | string[])*: A unique identifier for the user profile. -** *`data` (Optional, string | string[])*: List of filters for the `data` field of the profile document. -To return all content use `data=*`. To return a subset of content -use `data=` to retrieve content nested under the specified ``. +** *`data` (Optional, string | string[])*: A list of filters for the `data` field of the profile document. +To return all content use `data=*`. +To return a subset of content use `data=` to retrieve content nested under the specified ``. By default returns no `data` content. [discrete] @@ -11123,6 +12016,9 @@ Check user profile privileges. Determine whether the users associated with the specified user profile IDs have all the requested privileges. +NOTE: The user profile feature is designed only for use by Kibana and Elastic's Observability, Enterprise Search, and Elastic Security solutions. Individual users and external applications should not call this API directly. +Elastic reserves the right to change or remove this feature in future releases without prior notice. + {ref}/security-api-has-privileges-user-profile.html[Endpoint documentation] [source,ts] ---- @@ -11134,7 +12030,7 @@ client.security.hasPrivilegesUserProfile({ uids, privileges }) * *Request (object):* ** *`uids` (string[])*: A list of profile IDs. The privileges are checked for associated users of the profiles. -** *`privileges` ({ application, cluster, index })* +** *`privileges` ({ application, cluster, index })*: An object containing all the privileges to be checked. [discrete] ==== invalidate_api_key @@ -11495,7 +12391,20 @@ To page through more hits, use the `search_after` parameter. ==== saml_authenticate Authenticate SAML. -Submits a SAML response message to Elasticsearch for consumption. +Submit a SAML response message to Elasticsearch for consumption. + +NOTE: This API is intended for use by custom web applications other than Kibana. +If you are using Kibana, refer to the documentation for configuring SAML single-sign-on on the Elastic Stack. + +The SAML message that is submitted can be: + +* A response to a SAML authentication request that was previously created using the SAML prepare authentication API. +* An unsolicited SAML message in the case of an IdP-initiated single sign-on (SSO) flow. + +In either case, the SAML message needs to be a base64 encoded XML document with a root element of ``. + +After successful validation, Elasticsearch responds with an Elasticsearch internal access token and refresh token that can be subsequently used for authentication. +This API endpoint essentially exchanges SAML responses that indicate successful authentication in the IdP for Elasticsearch access and refresh tokens, which can be used for authentication against Elasticsearch. {ref}/security-api-saml-authenticate.html[Endpoint documentation] [source,ts] @@ -11507,8 +12416,8 @@ client.security.samlAuthenticate({ content, ids }) ==== Arguments * *Request (object):* -** *`content` (string)*: The SAML response as it was sent by the user’s browser, usually a Base64 encoded XML document. -** *`ids` (string | string[])*: A json array with all the valid SAML Request Ids that the caller of the API has for the current user. +** *`content` (string)*: The SAML response as it was sent by the user's browser, usually a Base64 encoded XML document. +** *`ids` (string | string[])*: A JSON array with all the valid SAML Request Ids that the caller of the API has for the current user. ** *`realm` (Optional, string)*: The name of the realm that should authenticate the SAML response. Useful in cases where many SAML realms are defined. [discrete] @@ -11517,6 +12426,15 @@ Logout of SAML completely. Verifies the logout response sent from the SAML IdP. +NOTE: This API is intended for use by custom web applications other than Kibana. +If you are using Kibana, refer to the documentation for configuring SAML single-sign-on on the Elastic Stack. + +The SAML IdP may send a logout response back to the SP after handling the SP-initiated SAML Single Logout. +This API verifies the response by ensuring the content is relevant and validating its signature. +An empty response is returned if the verification process is successful. +The response can be sent by the IdP with either the HTTP-Redirect or the HTTP-Post binding. +The caller of this API must prepare the request accordingly so that this API can handle either of them. + {ref}/security-api-saml-complete-logout.html[Endpoint documentation] [source,ts] ---- @@ -11528,7 +12446,7 @@ client.security.samlCompleteLogout({ realm, ids }) * *Request (object):* ** *`realm` (string)*: The name of the SAML realm in Elasticsearch for which the configuration is used to verify the logout response. -** *`ids` (string | string[])*: A json array with all the valid SAML Request Ids that the caller of the API has for the current user. +** *`ids` (string | string[])*: A JSON array with all the valid SAML Request Ids that the caller of the API has for the current user. ** *`query_string` (Optional, string)*: If the SAML IdP sends the logout response with the HTTP-Redirect binding, this field must be set to the query string of the redirect URI. ** *`content` (Optional, string)*: If the SAML IdP sends the logout response with the HTTP-Post binding, this field must be set to the value of the SAMLResponse form parameter from the logout response. @@ -11536,7 +12454,15 @@ client.security.samlCompleteLogout({ realm, ids }) ==== saml_invalidate Invalidate SAML. -Submits a SAML LogoutRequest message to Elasticsearch for consumption. +Submit a SAML LogoutRequest message to Elasticsearch for consumption. + +NOTE: This API is intended for use by custom web applications other than Kibana. +If you are using Kibana, refer to the documentation for configuring SAML single-sign-on on the Elastic Stack. + +The logout request comes from the SAML IdP during an IdP initiated Single Logout. +The custom web application can use this API to have Elasticsearch process the `LogoutRequest`. +After successful validation of the request, Elasticsearch invalidates the access token and refresh token that corresponds to that specific SAML principal and provides a URL that contains a SAML LogoutResponse message. +Thus the user can be redirected back to their IdP. {ref}/security-api-saml-invalidate.html[Endpoint documentation] [source,ts] @@ -11549,12 +12475,12 @@ client.security.samlInvalidate({ query_string }) * *Request (object):* ** *`query_string` (string)*: The query part of the URL that the user was redirected to by the SAML IdP to initiate the Single Logout. -This query should include a single parameter named SAMLRequest that contains a SAML logout request that is deflated and Base64 encoded. -If the SAML IdP has signed the logout request, the URL should include two extra parameters named SigAlg and Signature that contain the algorithm used for the signature and the signature value itself. -In order for Elasticsearch to be able to verify the IdP’s signature, the value of the query_string field must be an exact match to the string provided by the browser. +This query should include a single parameter named `SAMLRequest` that contains a SAML logout request that is deflated and Base64 encoded. +If the SAML IdP has signed the logout request, the URL should include two extra parameters named `SigAlg` and `Signature` that contain the algorithm used for the signature and the signature value itself. +In order for Elasticsearch to be able to verify the IdP's signature, the value of the `query_string` field must be an exact match to the string provided by the browser. The client application must not attempt to parse or process the string in any way. -** *`acs` (Optional, string)*: The Assertion Consumer Service URL that matches the one of the SAML realm in Elasticsearch that should be used. You must specify either this parameter or the realm parameter. -** *`realm` (Optional, string)*: The name of the SAML realm in Elasticsearch the configuration. You must specify either this parameter or the acs parameter. +** *`acs` (Optional, string)*: The Assertion Consumer Service URL that matches the one of the SAML realm in Elasticsearch that should be used. You must specify either this parameter or the `realm` parameter. +** *`realm` (Optional, string)*: The name of the SAML realm in Elasticsearch the configuration. You must specify either this parameter or the `acs` parameter. [discrete] ==== saml_logout @@ -11562,6 +12488,12 @@ Logout of SAML. Submits a request to invalidate an access token and refresh token. +NOTE: This API is intended for use by custom web applications other than Kibana. +If you are using Kibana, refer to the documentation for configuring SAML single-sign-on on the Elastic Stack. + +This API invalidates the tokens that were generated for a user by the SAML authenticate API. +If the SAML realm in Elasticsearch is configured accordingly and the SAML IdP supports this, the Elasticsearch response contains a URL to redirect the user to the IdP that contains a SAML logout request (starting an SP-initiated SAML Single Logout). + {ref}/security-api-saml-logout.html[Endpoint documentation] [source,ts] ---- @@ -11573,7 +12505,7 @@ client.security.samlLogout({ token }) * *Request (object):* ** *`token` (string)*: The access token that was returned as a response to calling the SAML authenticate API. -Alternatively, the most recent token that was received after refreshing the original one by using a refresh_token. +Alternatively, the most recent token that was received after refreshing the original one by using a `refresh_token`. ** *`refresh_token` (Optional, string)*: The refresh token that was returned as a response to calling the SAML authenticate API. Alternatively, the most recent refresh token that was received after refreshing the original access token. @@ -11581,7 +12513,18 @@ Alternatively, the most recent refresh token that was received after refreshing ==== saml_prepare_authentication Prepare SAML authentication. -Creates a SAML authentication request (``) as a URL string, based on the configuration of the respective SAML realm in Elasticsearch. +Create a SAML authentication request (``) as a URL string based on the configuration of the respective SAML realm in Elasticsearch. + +NOTE: This API is intended for use by custom web applications other than Kibana. +If you are using Kibana, refer to the documentation for configuring SAML single-sign-on on the Elastic Stack. + +This API returns a URL pointing to the SAML Identity Provider. +You can use the URL to redirect the browser of the user in order to continue the authentication process. +The URL includes a single parameter named `SAMLRequest`, which contains a SAML Authentication request that is deflated and Base64 encoded. +If the configuration dictates that SAML authentication requests should be signed, the URL has two extra parameters named `SigAlg` and `Signature`. +These parameters contain the algorithm used for the signature and the signature value itself. +It also returns a random string that uniquely identifies this SAML Authentication request. +The caller of this API needs to store this identifier as it needs to be used in a following step of the authentication process. {ref}/security-api-saml-prepare-authentication.html[Endpoint documentation] [source,ts] @@ -11594,10 +12537,10 @@ client.security.samlPrepareAuthentication({ ... }) * *Request (object):* ** *`acs` (Optional, string)*: The Assertion Consumer Service URL that matches the one of the SAML realms in Elasticsearch. -The realm is used to generate the authentication request. You must specify either this parameter or the realm parameter. +The realm is used to generate the authentication request. You must specify either this parameter or the `realm` parameter. ** *`realm` (Optional, string)*: The name of the SAML realm in Elasticsearch for which the configuration is used to generate the authentication request. -You must specify either this parameter or the acs parameter. -** *`relay_state` (Optional, string)*: A string that will be included in the redirect URL that this API returns as the RelayState query parameter. +You must specify either this parameter or the `acs` parameter. +** *`relay_state` (Optional, string)*: A string that will be included in the redirect URL that this API returns as the `RelayState` query parameter. If the Authentication Request is signed, this value is used as part of the signature computation. [discrete] @@ -11606,6 +12549,9 @@ Create SAML service provider metadata. Generate SAML metadata for a SAML 2.0 Service Provider. +The SAML 2.0 specification provides a mechanism for Service Providers to describe their capabilities and configuration using a metadata file. +This API generates Service Provider metadata based on the configuration of a SAML realm in Elasticsearch. + {ref}/security-api-saml-sp-metadata.html[Endpoint documentation] [source,ts] ---- @@ -11624,6 +12570,10 @@ Suggest a user profile. Get suggestions for user profiles that match specified search criteria. +NOTE: The user profile feature is designed only for use by Kibana and Elastic's Observability, Enterprise Search, and Elastic Security solutions. +Individual users and external applications should not call this API directly. +Elastic reserves the right to change or remove this feature in future releases without prior notice. + {ref}/security-api-suggest-user-profile.html[Endpoint documentation] [source,ts] ---- @@ -11634,17 +12584,17 @@ client.security.suggestUserProfiles({ ... }) ==== Arguments * *Request (object):* -** *`name` (Optional, string)*: Query string used to match name-related fields in user profile documents. +** *`name` (Optional, string)*: A query string used to match name-related fields in user profile documents. Name-related fields are the user's `username`, `full_name`, and `email`. -** *`size` (Optional, number)*: Number of profiles to return. -** *`data` (Optional, string | string[])*: List of filters for the `data` field of the profile document. -To return all content use `data=*`. To return a subset of content -use `data=` to retrieve content nested under the specified ``. -By default returns no `data` content. +** *`size` (Optional, number)*: The number of profiles to return. +** *`data` (Optional, string | string[])*: A list of filters for the `data` field of the profile document. +To return all content use `data=*`. +To return a subset of content, use `data=` to retrieve content nested under the specified ``. +By default, the API returns no `data` content. +It is an error to specify `data` as both the query parameter and the request body field. ** *`hint` (Optional, { uids, labels })*: Extra search criteria to improve relevance of the suggestion result. Profiles matching the spcified hint are ranked higher in the response. -Profiles not matching the hint don't exclude the profile from the response -as long as the profile matches the `name` field query. +Profiles not matching the hint aren't excluded from the response as long as the profile matches the `name` field query. [discrete] ==== update_api_key @@ -11685,6 +12635,20 @@ Update a cross-cluster API key. Update the attributes of an existing cross-cluster API key, which is used for API key based remote cluster access. +To use this API, you must have at least the `manage_security` cluster privilege. +Users can only update API keys that they created. +To update another user's API key, use the `run_as` feature to submit a request on behalf of another user. + +IMPORTANT: It's not possible to use an API key as the authentication credential for this API. +To update an API key, the owner user's credentials are required. + +It's not possible to update expired API keys, or API keys that have been invalidated by the invalidate API key API. + +This API supports updates to an API key's access scope, metadata, and expiration. +The owner user's information, such as the `username` and `realm`, is also updated automatically on every call. + +NOTE: This API cannot update REST API keys, which should be updated by either the update API key or bulk update API keys API. + {ref}/security-api-update-cross-cluster-api-key.html[Endpoint documentation] [source,ts] ---- @@ -11700,7 +12664,7 @@ client.security.updateCrossClusterApiKey({ id, access }) The access is composed of permissions for cross cluster search and cross cluster replication. At least one of them must be specified. When specified, the new access assignment fully replaces the previously assigned access. -** *`expiration` (Optional, string | -1 | 0)*: Expiration time for the API key. +** *`expiration` (Optional, string | -1 | 0)*: The expiration time for the API key. By default, API keys never expire. This property can be omitted to leave the value unchanged. ** *`metadata` (Optional, Record)*: Arbitrary metadata that you want to associate with the API key. It supports nested data structure. @@ -11709,14 +12673,29 @@ When specified, this information fully replaces metadata previously associated w [discrete] ==== update_settings -Update settings for the security system index +Update security index settings. + +Update the user-configurable settings for the security internal index (`.security` and associated indices). Only a subset of settings are allowed to be modified, for example `index.auto_expand_replicas` and `index.number_of_replicas`. + +If a specific index is not in use on the system and settings are provided for it, the request will be rejected. This API does not yet support configuring the settings for indices before they are in use. {ref}/security-api-update-settings.html[Endpoint documentation] [source,ts] ---- -client.security.updateSettings() +client.security.updateSettings({ ... }) ---- +[discrete] +==== Arguments + +* *Request (object):* +** *`security` (Optional, { index })*: Settings for the index used for most security configuration, including native realm users and roles configured with the API. +** *`security-profile` (Optional, { index })*: Settings for the index used to store profile information. +** *`security-tokens` (Optional, { index })*: Settings for the index used to store tokens. +** *`master_timeout` (Optional, string | -1 | 0)*: The period to wait for a connection to the master node. +If no response is received before the timeout expires, the request fails and returns an error. +** *`timeout` (Optional, string | -1 | 0)*: The period to wait for a response. +If no response is received before the timeout expires, the request fails and returns an error. [discrete] ==== update_user_profile_data @@ -11724,6 +12703,21 @@ Update user profile data. Update specific data for the user profile that is associated with a unique ID. +NOTE: The user profile feature is designed only for use by Kibana and Elastic's Observability, Enterprise Search, and Elastic Security solutions. +Individual users and external applications should not call this API directly. +Elastic reserves the right to change or remove this feature in future releases without prior notice. + +To use this API, you must have one of the following privileges: + +* The `manage_user_profile` cluster privilege. +* The `update_profile_data` global privilege for the namespaces that are referenced in the request. + +This API updates the `labels` and `data` fields of an existing user profile document with JSON objects. +New keys and their values are added to the profile document and conflicting keys are replaced by data that's included in the request. + +For both labels and data, content is namespaced by the top-level fields. +The `update_profile_data` global privilege grants privileges for updating only the allowed namespaces. + {ref}/security-api-update-user-profile-data.html[Endpoint documentation] [source,ts] ---- @@ -11735,15 +12729,19 @@ client.security.updateUserProfileData({ uid }) * *Request (object):* ** *`uid` (string)*: A unique identifier for the user profile. -** *`labels` (Optional, Record)*: Searchable data that you want to associate with the user profile. This -field supports a nested data structure. +** *`labels` (Optional, Record)*: Searchable data that you want to associate with the user profile. +This field supports a nested data structure. +Within the labels object, top-level keys cannot begin with an underscore (`_`) or contain a period (`.`). ** *`data` (Optional, Record)*: Non-searchable data that you want to associate with the user profile. This field supports a nested data structure. +Within the `data` object, top-level keys cannot begin with an underscore (`_`) or contain a period (`.`). +The data object is not searchable, but can be retrieved with the get user profile API. ** *`if_seq_no` (Optional, number)*: Only perform the operation if the document has this sequence number. ** *`if_primary_term` (Optional, number)*: Only perform the operation if the document has this primary term. ** *`refresh` (Optional, Enum(true | false | "wait_for"))*: If 'true', Elasticsearch refreshes the affected shards to make this operation -visible to search, if 'wait_for' then wait for a refresh to make this operation -visible to search, if 'false' do nothing with refreshes. +visible to search. +If 'wait_for', it waits for a refresh to make this operation visible to search. +If 'false', nothing is done with refreshes. [discrete] === shutdown @@ -12287,14 +13285,132 @@ client.snapshot.getRepository({ ... }) [discrete] ==== repository_analyze -Analyzes a repository for correctness and performance +Analyze a snapshot repository. +Analyze the performance characteristics and any incorrect behaviour found in a repository. -{ref}/modules-snapshots.html[Endpoint documentation] +The response exposes implementation details of the analysis which may change from version to version. +The response body format is therefore not considered stable and may be different in newer versions. + +There are a large number of third-party storage systems available, not all of which are suitable for use as a snapshot repository by Elasticsearch. +Some storage systems behave incorrectly, or perform poorly, especially when accessed concurrently by multiple clients as the nodes of an Elasticsearch cluster do. This API performs a collection of read and write operations on your repository which are designed to detect incorrect behaviour and to measure the performance characteristics of your storage system. + +The default values for the parameters are deliberately low to reduce the impact of running an analysis inadvertently and to provide a sensible starting point for your investigations. +Run your first analysis with the default parameter values to check for simple problems. +If successful, run a sequence of increasingly large analyses until you encounter a failure or you reach a `blob_count` of at least `2000`, a `max_blob_size` of at least `2gb`, a `max_total_data_size` of at least `1tb`, and a `register_operation_count` of at least `100`. +Always specify a generous timeout, possibly `1h` or longer, to allow time for each analysis to run to completion. +Perform the analyses using a multi-node cluster of a similar size to your production cluster so that it can detect any problems that only arise when the repository is accessed by many nodes at once. + +If the analysis fails, Elasticsearch detected that your repository behaved unexpectedly. +This usually means you are using a third-party storage system with an incorrect or incompatible implementation of the API it claims to support. +If so, this storage system is not suitable for use as a snapshot repository. +You will need to work with the supplier of your storage system to address the incompatibilities that Elasticsearch detects. + +If the analysis is successful, the API returns details of the testing process, optionally including how long each operation took. +You can use this information to determine the performance of your storage system. +If any operation fails or returns an incorrect result, the API returns an error. +If the API returns an error, it may not have removed all the data it wrote to the repository. +The error will indicate the location of any leftover data and this path is also recorded in the Elasticsearch logs. +You should verify that this location has been cleaned up correctly. +If there is still leftover data at the specified location, you should manually remove it. + +If the connection from your client to Elasticsearch is closed while the client is waiting for the result of the analysis, the test is cancelled. +Some clients are configured to close their connection if no response is received within a certain timeout. +An analysis takes a long time to complete so you might need to relax any such client-side timeouts. +On cancellation the analysis attempts to clean up the data it was writing, but it may not be able to remove it all. +The path to the leftover data is recorded in the Elasticsearch logs. +You should verify that this location has been cleaned up correctly. +If there is still leftover data at the specified location, you should manually remove it. + +If the analysis is successful then it detected no incorrect behaviour, but this does not mean that correct behaviour is guaranteed. +The analysis attempts to detect common bugs but it does not offer 100% coverage. +Additionally, it does not test the following: + +* Your repository must perform durable writes. Once a blob has been written it must remain in place until it is deleted, even after a power loss or similar disaster. +* Your repository must not suffer from silent data corruption. Once a blob has been written, its contents must remain unchanged until it is deliberately modified or deleted. +* Your repository must behave correctly even if connectivity from the cluster is disrupted. Reads and writes may fail in this case, but they must not return incorrect results. + +IMPORTANT: An analysis writes a substantial amount of data to your repository and then reads it back again. +This consumes bandwidth on the network between the cluster and the repository, and storage space and I/O bandwidth on the repository itself. +You must ensure this load does not affect other users of these systems. +Analyses respect the repository settings `max_snapshot_bytes_per_sec` and `max_restore_bytes_per_sec` if available and the cluster setting `indices.recovery.max_bytes_per_sec` which you can use to limit the bandwidth they consume. + +NOTE: This API is intended for exploratory use by humans. You should expect the request parameters and the response format to vary in future versions. + +NOTE: Different versions of Elasticsearch may perform different checks for repository compatibility, with newer versions typically being stricter than older ones. +A storage system that passes repository analysis with one version of Elasticsearch may fail with a different version. +This indicates it behaves incorrectly in ways that the former version did not detect. +You must work with the supplier of your storage system to address the incompatibilities detected by the repository analysis API in any version of Elasticsearch. + +NOTE: This API may not work correctly in a mixed-version cluster. + +*Implementation details* + +NOTE: This section of documentation describes how the repository analysis API works in this version of Elasticsearch, but you should expect the implementation to vary between versions. The request parameters and response format depend on details of the implementation so may also be different in newer versions. + +The analysis comprises a number of blob-level tasks, as set by the `blob_count` parameter and a number of compare-and-exchange operations on linearizable registers, as set by the `register_operation_count` parameter. +These tasks are distributed over the data and master-eligible nodes in the cluster for execution. + +For most blob-level tasks, the executing node first writes a blob to the repository and then instructs some of the other nodes in the cluster to attempt to read the data it just wrote. +The size of the blob is chosen randomly, according to the `max_blob_size` and `max_total_data_size` parameters. +If any of these reads fails then the repository does not implement the necessary read-after-write semantics that Elasticsearch requires. + +For some blob-level tasks, the executing node will instruct some of its peers to attempt to read the data before the writing process completes. +These reads are permitted to fail, but must not return partial data. +If any read returns partial data then the repository does not implement the necessary atomicity semantics that Elasticsearch requires. + +For some blob-level tasks, the executing node will overwrite the blob while its peers are reading it. +In this case the data read may come from either the original or the overwritten blob, but the read operation must not return partial data or a mix of data from the two blobs. +If any of these reads returns partial data or a mix of the two blobs then the repository does not implement the necessary atomicity semantics that Elasticsearch requires for overwrites. + +The executing node will use a variety of different methods to write the blob. +For instance, where applicable, it will use both single-part and multi-part uploads. +Similarly, the reading nodes will use a variety of different methods to read the data back again. +For instance they may read the entire blob from start to end or may read only a subset of the data. + +For some blob-level tasks, the executing node will cancel the write before it is complete. +In this case, it still instructs some of the other nodes in the cluster to attempt to read the blob but all of these reads must fail to find the blob. + +Linearizable registers are special blobs that Elasticsearch manipulates using an atomic compare-and-exchange operation. +This operation ensures correct and strongly-consistent behavior even when the blob is accessed by multiple nodes at the same time. +The detailed implementation of the compare-and-exchange operation on linearizable registers varies by repository type. +Repository analysis verifies that that uncontended compare-and-exchange operations on a linearizable register blob always succeed. +Repository analysis also verifies that contended operations either succeed or report the contention but do not return incorrect results. +If an operation fails due to contention, Elasticsearch retries the operation until it succeeds. +Most of the compare-and-exchange operations performed by repository analysis atomically increment a counter which is represented as an 8-byte blob. +Some operations also verify the behavior on small blobs with sizes other than 8 bytes. + +{ref}/repo-analysis-api.html[Endpoint documentation] [source,ts] ---- -client.snapshot.repositoryAnalyze() +client.snapshot.repositoryAnalyze({ repository }) ---- +[discrete] +==== Arguments + +* *Request (object):* +** *`repository` (string)*: The name of the repository. +** *`blob_count` (Optional, number)*: The total number of blobs to write to the repository during the test. +For realistic experiments, you should set it to at least `2000`. +** *`concurrency` (Optional, number)*: The number of operations to run concurrently during the test. +** *`detailed` (Optional, boolean)*: Indicates whether to return detailed results, including timing information for every operation performed during the analysis. +If false, it returns only a summary of the analysis. +** *`early_read_node_count` (Optional, number)*: The number of nodes on which to perform an early read operation while writing each blob. +Early read operations are only rarely performed. +** *`max_blob_size` (Optional, number | string)*: The maximum size of a blob to be written during the test. +For realistic experiments, you should set it to at least `2gb`. +** *`max_total_data_size` (Optional, number | string)*: An upper limit on the total size of all the blobs written during the test. +For realistic experiments, you should set it to at least `1tb`. +** *`rare_action_probability` (Optional, number)*: The probability of performing a rare action such as an early read, an overwrite, or an aborted write on each blob. +** *`rarely_abort_writes` (Optional, boolean)*: Indicates whether to rarely cancel writes before they complete. +** *`read_node_count` (Optional, number)*: The number of nodes on which to read a blob after writing. +** *`register_operation_count` (Optional, number)*: The minimum number of linearizable register operations to perform in total. +For realistic experiments, you should set it to at least `100`. +** *`seed` (Optional, number)*: The seed for the pseudo-random number generator used to generate the list of operations performed during the test. +To repeat the same set of operations in multiple experiments, use the same seed in each experiment. +Note that the operations are performed concurrently so might not always happen in the same order on each run. +** *`timeout` (Optional, string | -1 | 0)*: The period of time to wait for the test to complete. +If no response is received before the timeout expires, the test is cancelled and returns an error. [discrete] ==== restore @@ -12733,6 +13849,10 @@ client.synonyms.putSynonymRule({ set_id, rule_id, synonyms }) [discrete] ==== cancel Cancel a task. + +WARNING: The task management API is new and should still be considered a beta feature. +The API may change in ways that are not backwards compatible. + A task may continue to run for some time after it has been cancelled because it may not be able to safely stop its current activity straight away. It is also possible that Elasticsearch must complete its work on other tasks before it can process the cancellation. The get task information API will continue to list these cancelled tasks until they complete. @@ -12751,17 +13871,22 @@ client.tasks.cancel({ ... }) ==== Arguments * *Request (object):* -** *`task_id` (Optional, string | number)*: ID of the task. -** *`actions` (Optional, string | string[])*: List or wildcard expression of actions used to limit the request. -** *`nodes` (Optional, string[])*: List of node IDs or names used to limit the request. -** *`parent_task_id` (Optional, string)*: Parent task ID used to limit the tasks. -** *`wait_for_completion` (Optional, boolean)*: Should the request block until the cancellation of the task and its descendant tasks is completed. Defaults to false +** *`task_id` (Optional, string | number)*: The task identifier. +** *`actions` (Optional, string | string[])*: A list or wildcard expression of actions that is used to limit the request. +** *`nodes` (Optional, string[])*: A list of node IDs or names that is used to limit the request. +** *`parent_task_id` (Optional, string)*: A parent task ID that is used to limit the tasks. +** *`wait_for_completion` (Optional, boolean)*: If true, the request blocks until all found tasks are complete. [discrete] ==== get Get task information. Get information about a task currently running in the cluster. +WARNING: The task management API is new and should still be considered a beta feature. +The API may change in ways that are not backwards compatible. + +If the task identifier is not found, a 404 response code indicates that there are no resources that match the request. + {ref}/tasks.html[Endpoint documentation] [source,ts] ---- @@ -12772,8 +13897,8 @@ client.tasks.get({ task_id }) ==== Arguments * *Request (object):* -** *`task_id` (string)*: ID of the task. -** *`timeout` (Optional, string | -1 | 0)*: Period to wait for a response. +** *`task_id` (string)*: The task identifier. +** *`timeout` (Optional, string | -1 | 0)*: The period to wait for a response. If no response is received before the timeout expires, the request fails and returns an error. ** *`wait_for_completion` (Optional, boolean)*: If `true`, the request blocks until the task has completed. @@ -12782,6 +13907,63 @@ If no response is received before the timeout expires, the request fails and ret Get all tasks. Get information about the tasks currently running on one or more nodes in the cluster. +WARNING: The task management API is new and should still be considered a beta feature. +The API may change in ways that are not backwards compatible. + +**Identifying running tasks** + +The `X-Opaque-Id header`, when provided on the HTTP request header, is going to be returned as a header in the response as well as in the headers field for in the task information. +This enables you to track certain calls or associate certain tasks with the client that started them. +For example: + +``` +curl -i -H "X-Opaque-Id: 123456" "http://localhost:9200/_tasks?group_by=parents" +``` + +The API returns the following result: + +``` +HTTP/1.1 200 OK +X-Opaque-Id: 123456 +content-type: application/json; charset=UTF-8 +content-length: 831 + +{ + "tasks" : { + "u5lcZHqcQhu-rUoFaqDphA:45" : { + "node" : "u5lcZHqcQhu-rUoFaqDphA", + "id" : 45, + "type" : "transport", + "action" : "cluster:monitor/tasks/lists", + "start_time_in_millis" : 1513823752749, + "running_time_in_nanos" : 293139, + "cancellable" : false, + "headers" : { + "X-Opaque-Id" : "123456" + }, + "children" : [ + { + "node" : "u5lcZHqcQhu-rUoFaqDphA", + "id" : 46, + "type" : "direct", + "action" : "cluster:monitor/tasks/lists[n]", + "start_time_in_millis" : 1513823752750, + "running_time_in_nanos" : 92133, + "cancellable" : false, + "parent_task_id" : "u5lcZHqcQhu-rUoFaqDphA:45", + "headers" : { + "X-Opaque-Id" : "123456" + } + } + ] + } + } + } +``` +In this example, `X-Opaque-Id: 123456` is the ID as a part of the response header. +The `X-Opaque-Id` in the task `headers` is the ID for the task that was initiated by the REST request. +The `X-Opaque-Id` in the children `headers` is the child task of the task that was initiated by the REST request. + {ref}/tasks.html[Endpoint documentation] [source,ts] ---- @@ -12792,14 +13974,21 @@ client.tasks.list({ ... }) ==== Arguments * *Request (object):* -** *`actions` (Optional, string | string[])*: List or wildcard expression of actions used to limit the request. -** *`detailed` (Optional, boolean)*: If `true`, the response includes detailed information about shard recoveries. +** *`actions` (Optional, string | string[])*: A list or wildcard expression of actions used to limit the request. +For example, you can use `cluser:*` to retrieve all cluster-related tasks. +** *`detailed` (Optional, boolean)*: If `true`, the response includes detailed information about the running tasks. This information is useful to distinguish tasks from each other but is more costly to run. -** *`group_by` (Optional, Enum("nodes" | "parents" | "none"))*: Key used to group tasks in the response. -** *`nodes` (Optional, string | string[])*: List of node IDs or names used to limit returned information. -** *`parent_task_id` (Optional, string)*: Parent task ID used to limit returned information. To return all tasks, omit this parameter or use a value of `-1`. -** *`master_timeout` (Optional, string | -1 | 0)*: Period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and returns an error. -** *`timeout` (Optional, string | -1 | 0)*: Period to wait for a response. If no response is received before the timeout expires, the request fails and returns an error. +** *`group_by` (Optional, Enum("nodes" | "parents" | "none"))*: A key that is used to group tasks in the response. +The task lists can be grouped either by nodes or by parent tasks. +** *`nodes` (Optional, string | string[])*: A list of node IDs or names that is used to limit the returned information. +** *`parent_task_id` (Optional, string)*: A parent task identifier that is used to limit returned information. +To return all tasks, omit this parameter or use a value of `-1`. +If the parent task is not found, the API does not return a 404 response code. +** *`master_timeout` (Optional, string | -1 | 0)*: The period to wait for a connection to the master node. +If no response is received before the timeout expires, the request fails and returns an error. +** *`timeout` (Optional, string | -1 | 0)*: The period to wait for each node to respond. +If a node does not respond before its timeout expires, the response does not include its information. +However, timed out nodes are included in the `node_failures` property. ** *`wait_for_completion` (Optional, boolean)*: If `true`, the request blocks until the operation is complete. [discrete] diff --git a/src/api/api/bulk.ts b/src/api/api/bulk.ts index e8d496be1..4803aaf75 100644 --- a/src/api/api/bulk.ts +++ b/src/api/api/bulk.ts @@ -39,7 +39,7 @@ import * as TB from '../typesWithBodyKey' interface That { transport: Transport } /** - * Bulk index or delete documents. Performs multiple indexing or delete operations in a single API call. This reduces overhead and can greatly increase indexing speed. + * Bulk index or delete documents. Perform multiple `index`, `create`, `delete`, and `update` actions in a single request. This reduces overhead and can greatly increase indexing speed. If the Elasticsearch security features are enabled, you must have the following index privileges for the target data stream, index, or index alias: * To use the `create` action, you must have the `create_doc`, `create`, `index`, or `write` index privilege. Data streams support only the `create` action. * To use the `index` action, you must have the `create`, `index`, or `write` index privilege. * To use the `delete` action, you must have the `delete` or `write` index privilege. * To use the `update` action, you must have the `index` or `write` index privilege. * To automatically create a data stream or index with a bulk API request, you must have the `auto_configure`, `create_index`, or `manage` index privilege. * To make the result of a bulk operation visible to search using the `refresh` parameter, you must have the `maintenance` or `manage` index privilege. Automatic data stream creation requires a matching index template with data stream enabled. The actions are specified in the request body using a newline delimited JSON (NDJSON) structure: ``` action_and_meta_data\n optional_source\n action_and_meta_data\n optional_source\n .... action_and_meta_data\n optional_source\n ``` The `index` and `create` actions expect a source on the next line and have the same semantics as the `op_type` parameter in the standard index API. A `create` action fails if a document with the same ID already exists in the target An `index` action adds or replaces a document as necessary. NOTE: Data streams support only the `create` action. To update or delete a document in a data stream, you must target the backing index containing the document. An `update` action expects that the partial doc, upsert, and script and its options are specified on the next line. A `delete` action does not expect a source on the next line and has the same semantics as the standard delete API. NOTE: The final line of data must end with a newline character (`\n`). Each newline character may be preceded by a carriage return (`\r`). When sending NDJSON data to the `_bulk` endpoint, use a `Content-Type` header of `application/json` or `application/x-ndjson`. Because this format uses literal newline characters (`\n`) as delimiters, make sure that the JSON actions and sources are not pretty printed. If you provide a target in the request path, it is used for any actions that don't explicitly specify an `_index` argument. A note on the format: the idea here is to make processing as fast as possible. As some of the actions are redirected to other shards on other nodes, only `action_meta_data` is parsed on the receiving node side. Client libraries using this protocol should try and strive to do something similar on the client side, and reduce buffering as much as possible. There is no "correct" number of actions to perform in a single bulk request. Experiment with different settings to find the optimal size for your particular workload. Note that Elasticsearch limits the maximum size of a HTTP request to 100mb by default so clients must ensure that no request exceeds this size. It is not possible to index a single document that exceeds the size limit, so you must pre-process any such documents into smaller pieces before sending them to Elasticsearch. For instance, split documents into pages or chapters before indexing them, or store raw binary data in a system outside Elasticsearch and replace the raw data with a link to the external system in the documents that you send to Elasticsearch. **Client suppport for bulk requests** Some of the officially supported clients provide helpers to assist with bulk requests and reindexing: * Go: Check out `esutil.BulkIndexer` * Perl: Check out `Search::Elasticsearch::Client::5_0::Bulk` and `Search::Elasticsearch::Client::5_0::Scroll` * Python: Check out `elasticsearch.helpers.*` * JavaScript: Check out `client.helpers.*` * .NET: Check out `BulkAllObservable` * PHP: Check out bulk indexing. **Submitting bulk requests with cURL** If you're providing text file input to `curl`, you must use the `--data-binary` flag instead of plain `-d`. The latter doesn't preserve newlines. For example: ``` $ cat requests { "index" : { "_index" : "test", "_id" : "1" } } { "field1" : "value1" } $ curl -s -H "Content-Type: application/x-ndjson" -XPOST localhost:9200/_bulk --data-binary "@requests"; echo {"took":7, "errors": false, "items":[{"index":{"_index":"test","_id":"1","_version":1,"result":"created","forced_refresh":false}}]} ``` **Optimistic concurrency control** Each `index` and `delete` action within a bulk API call may include the `if_seq_no` and `if_primary_term` parameters in their respective action and meta data lines. The `if_seq_no` and `if_primary_term` parameters control how operations are run, based on the last modification to existing documents. See Optimistic concurrency control for more details. **Versioning** Each bulk item can include the version value using the `version` field. It automatically follows the behavior of the index or delete operation based on the `_version` mapping. It also support the `version_type`. **Routing** Each bulk item can include the routing value using the `routing` field. It automatically follows the behavior of the index or delete operation based on the `_routing` mapping. NOTE: Data streams do not support custom routing unless they were created with the `allow_custom_routing` setting enabled in the template. **Wait for active shards** When making bulk calls, you can set the `wait_for_active_shards` parameter to require a minimum number of shard copies to be active before starting to process the bulk request. **Refresh** Control when the changes made by this request are visible to search. NOTE: Only the shards that receive the bulk request will be affected by refresh. Imagine a `_bulk?refresh=wait_for` request with three documents in it that happen to be routed to different shards in an index with five shards. The request will only wait for those three shards to refresh. The other two shards that make up the index do not participate in the `_bulk` request at all. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/docs-bulk.html | Elasticsearch API documentation} */ export default async function BulkApi (this: That, params: T.BulkRequest | TB.BulkRequest, options?: TransportRequestOptionsWithOutMeta): Promise diff --git a/src/api/api/count.ts b/src/api/api/count.ts index e92586e6a..286a8107d 100644 --- a/src/api/api/count.ts +++ b/src/api/api/count.ts @@ -39,7 +39,7 @@ import * as TB from '../typesWithBodyKey' interface That { transport: Transport } /** - * Count search results. Get the number of documents matching a query. + * Count search results. Get the number of documents matching a query. The query can either be provided using a simple query string as a parameter or using the Query DSL defined within the request body. The latter must be nested in a `query` key, which is the same as the search API. The count API supports multi-target syntax. You can run a single count API search across multiple data streams and indices. The operation is broadcast across all shards. For each shard ID group, a replica is chosen and the search is run against it. This means that replicas increase the scalability of the count. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/search-count.html | Elasticsearch API documentation} */ export default async function CountApi (this: That, params?: T.CountRequest | TB.CountRequest, options?: TransportRequestOptionsWithOutMeta): Promise diff --git a/src/api/api/create.ts b/src/api/api/create.ts index e960470b5..d89b29d56 100644 --- a/src/api/api/create.ts +++ b/src/api/api/create.ts @@ -39,7 +39,7 @@ import * as TB from '../typesWithBodyKey' interface That { transport: Transport } /** - * Index a document. Adds a JSON document to the specified data stream or index and makes it searchable. If the target is an index and the document already exists, the request updates the document and increments its version. + * Create a new document in the index. You can index a new JSON document with the `//_doc/` or `//_create/<_id>` APIs Using `_create` guarantees that the document is indexed only if it does not already exist. It returns a 409 response when a document with a same ID already exists in the index. To update an existing document, you must use the `//_doc/` API. If the Elasticsearch security features are enabled, you must have the following index privileges for the target data stream, index, or index alias: * To add a document using the `PUT //_create/<_id>` or `POST //_create/<_id>` request formats, you must have the `create_doc`, `create`, `index`, or `write` index privilege. * To automatically create a data stream or index with this API request, you must have the `auto_configure`, `create_index`, or `manage` index privilege. Automatic data stream creation requires a matching index template with data stream enabled. **Automatically create data streams and indices** If the request's target doesn't exist and matches an index template with a `data_stream` definition, the index operation automatically creates the data stream. If the target doesn't exist and doesn't match a data stream template, the operation automatically creates the index and applies any matching index templates. NOTE: Elasticsearch includes several built-in index templates. To avoid naming collisions with these templates, refer to index pattern documentation. If no mapping exists, the index operation creates a dynamic mapping. By default, new fields and objects are automatically added to the mapping if needed. Automatic index creation is controlled by the `action.auto_create_index` setting. If it is `true`, any index can be created automatically. You can modify this setting to explicitly allow or block automatic creation of indices that match specified patterns or set it to `false` to turn off automatic index creation entirely. Specify a comma-separated list of patterns you want to allow or prefix each pattern with `+` or `-` to indicate whether it should be allowed or blocked. When a list is specified, the default behaviour is to disallow. NOTE: The `action.auto_create_index` setting affects the automatic creation of indices only. It does not affect the creation of data streams. **Routing** By default, shard placement — or routing — is controlled by using a hash of the document's ID value. For more explicit control, the value fed into the hash function used by the router can be directly specified on a per-operation basis using the `routing` parameter. When setting up explicit mapping, you can also use the `_routing` field to direct the index operation to extract the routing value from the document itself. This does come at the (very minimal) cost of an additional document parsing pass. If the `_routing` mapping is defined and set to be required, the index operation will fail if no routing value is provided or extracted. NOTE: Data streams do not support custom routing unless they were created with the `allow_custom_routing` setting enabled in the template. ** Distributed** The index operation is directed to the primary shard based on its route and performed on the actual node containing this shard. After the primary shard completes the operation, if needed, the update is distributed to applicable replicas. **Active shards** To improve the resiliency of writes to the system, indexing operations can be configured to wait for a certain number of active shard copies before proceeding with the operation. If the requisite number of active shard copies are not available, then the write operation must wait and retry, until either the requisite shard copies have started or a timeout occurs. By default, write operations only wait for the primary shards to be active before proceeding (that is to say `wait_for_active_shards` is `1`). This default can be overridden in the index settings dynamically by setting `index.write.wait_for_active_shards`. To alter this behavior per operation, use the `wait_for_active_shards request` parameter. Valid values are all or any positive integer up to the total number of configured copies per shard in the index (which is `number_of_replicas`+1). Specifying a negative value or a number greater than the number of shard copies will throw an error. For example, suppose you have a cluster of three nodes, A, B, and C and you create an index index with the number of replicas set to 3 (resulting in 4 shard copies, one more copy than there are nodes). If you attempt an indexing operation, by default the operation will only ensure the primary copy of each shard is available before proceeding. This means that even if B and C went down and A hosted the primary shard copies, the indexing operation would still proceed with only one copy of the data. If `wait_for_active_shards` is set on the request to `3` (and all three nodes are up), the indexing operation will require 3 active shard copies before proceeding. This requirement should be met because there are 3 active nodes in the cluster, each one holding a copy of the shard. However, if you set `wait_for_active_shards` to `all` (or to `4`, which is the same in this situation), the indexing operation will not proceed as you do not have all 4 copies of each shard active in the index. The operation will timeout unless a new node is brought up in the cluster to host the fourth copy of the shard. It is important to note that this setting greatly reduces the chances of the write operation not writing to the requisite number of shard copies, but it does not completely eliminate the possibility, because this check occurs before the write operation starts. After the write operation is underway, it is still possible for replication to fail on any number of shard copies but still succeed on the primary. The `_shards` section of the API response reveals the number of shard copies on which replication succeeded and failed. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/docs-index_.html | Elasticsearch API documentation} */ export default async function CreateApi (this: That, params: T.CreateRequest | TB.CreateRequest, options?: TransportRequestOptionsWithOutMeta): Promise diff --git a/src/api/api/delete.ts b/src/api/api/delete.ts index 530940830..ce16b46e2 100644 --- a/src/api/api/delete.ts +++ b/src/api/api/delete.ts @@ -39,7 +39,7 @@ import * as TB from '../typesWithBodyKey' interface That { transport: Transport } /** - * Delete a document. Removes a JSON document from the specified index. + * Delete a document. Remove a JSON document from the specified index. NOTE: You cannot send deletion requests directly to a data stream. To delete a document in a data stream, you must target the backing index containing the document. **Optimistic concurrency control** Delete operations can be made conditional and only be performed if the last modification to the document was assigned the sequence number and primary term specified by the `if_seq_no` and `if_primary_term` parameters. If a mismatch is detected, the operation will result in a `VersionConflictException` and a status code of `409`. **Versioning** Each document indexed is versioned. When deleting a document, the version can be specified to make sure the relevant document you are trying to delete is actually being deleted and it has not changed in the meantime. Every write operation run on a document, deletes included, causes its version to be incremented. The version number of a deleted document remains available for a short time after deletion to allow for control of concurrent operations. The length of time for which a deleted document's version remains available is determined by the `index.gc_deletes` index setting. **Routing** If routing is used during indexing, the routing value also needs to be specified to delete a document. If the `_routing` mapping is set to `required` and no routing value is specified, the delete API throws a `RoutingMissingException` and rejects the request. For example: ``` DELETE /my-index-000001/_doc/1?routing=shard-1 ``` This request deletes the document with ID 1, but it is routed based on the user. The document is not deleted if the correct routing is not specified. **Distributed** The delete operation gets hashed into a specific shard ID. It then gets redirected into the primary shard within that ID group and replicated (if needed) to shard replicas within that ID group. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/docs-delete.html | Elasticsearch API documentation} */ export default async function DeleteApi (this: That, params: T.DeleteRequest | TB.DeleteRequest, options?: TransportRequestOptionsWithOutMeta): Promise diff --git a/src/api/api/esql.ts b/src/api/api/esql.ts index d486b764c..df9c171f9 100644 --- a/src/api/api/esql.ts +++ b/src/api/api/esql.ts @@ -45,22 +45,34 @@ export default class Esql { } /** - * Executes an ESQL request asynchronously + * Run an async ES|QL query. Asynchronously run an ES|QL (Elasticsearch query language) query, monitor its progress, and retrieve results when they become available. The API accepts the same parameters and request body as the synchronous query API, along with additional async related properties. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/esql-async-query-api.html | Elasticsearch API documentation} */ - async asyncQuery (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptionsWithOutMeta): Promise - async asyncQuery (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptionsWithMeta): Promise> - async asyncQuery (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptions): Promise - async asyncQuery (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptions): Promise { + async asyncQuery (this: That, params: T.EsqlAsyncQueryRequest | TB.EsqlAsyncQueryRequest, options?: TransportRequestOptionsWithOutMeta): Promise + async asyncQuery (this: That, params: T.EsqlAsyncQueryRequest | TB.EsqlAsyncQueryRequest, options?: TransportRequestOptionsWithMeta): Promise> + async asyncQuery (this: That, params: T.EsqlAsyncQueryRequest | TB.EsqlAsyncQueryRequest, options?: TransportRequestOptions): Promise + async asyncQuery (this: That, params: T.EsqlAsyncQueryRequest | TB.EsqlAsyncQueryRequest, options?: TransportRequestOptions): Promise { const acceptedPath: string[] = [] + const acceptedBody: string[] = ['columnar', 'filter', 'locale', 'params', 'profile', 'query', 'tables'] const querystring: Record = {} - const body = undefined + // @ts-expect-error + const userBody: any = params?.body + let body: Record | string + if (typeof userBody === 'string') { + body = userBody + } else { + body = userBody != null ? { ...userBody } : undefined + } - params = params ?? {} for (const key in params) { - if (acceptedPath.includes(key)) { + if (acceptedBody.includes(key)) { + body = body ?? {} + // @ts-expect-error + body[key] = params[key] + } else if (acceptedPath.includes(key)) { continue } else if (key !== 'body') { + // @ts-expect-error querystring[key] = params[key] } } @@ -74,22 +86,54 @@ export default class Esql { } /** - * Retrieves the results of a previously submitted async query request given its ID. + * Delete an async ES|QL query. If the query is still running, it is cancelled. Otherwise, the stored results are deleted. If the Elasticsearch security features are enabled, only the following users can use this API to delete a query: * The authenticated user that submitted the original query request * Users with the `cancel_task` cluster privilege + * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/esql-async-query-delete-api.html | Elasticsearch API documentation} + */ + async asyncQueryDelete (this: That, params: T.EsqlAsyncQueryDeleteRequest | TB.EsqlAsyncQueryDeleteRequest, options?: TransportRequestOptionsWithOutMeta): Promise + async asyncQueryDelete (this: That, params: T.EsqlAsyncQueryDeleteRequest | TB.EsqlAsyncQueryDeleteRequest, options?: TransportRequestOptionsWithMeta): Promise> + async asyncQueryDelete (this: That, params: T.EsqlAsyncQueryDeleteRequest | TB.EsqlAsyncQueryDeleteRequest, options?: TransportRequestOptions): Promise + async asyncQueryDelete (this: That, params: T.EsqlAsyncQueryDeleteRequest | TB.EsqlAsyncQueryDeleteRequest, options?: TransportRequestOptions): Promise { + const acceptedPath: string[] = ['id'] + const querystring: Record = {} + const body = undefined + + for (const key in params) { + if (acceptedPath.includes(key)) { + continue + } else if (key !== 'body') { + // @ts-expect-error + querystring[key] = params[key] + } + } + + const method = 'DELETE' + const path = `/_query/async/${encodeURIComponent(params.id.toString())}` + const meta: TransportRequestMetadata = { + name: 'esql.async_query_delete', + pathParts: { + id: params.id + } + } + return await this.transport.request({ path, method, querystring, body, meta }, options) + } + + /** + * Get async ES|QL query results. Get the current status and available results or stored results for an ES|QL asynchronous query. If the Elasticsearch security features are enabled, only the user who first submitted the ES|QL query can retrieve the results using this API. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/esql-async-query-get-api.html | Elasticsearch API documentation} */ - async asyncQueryGet (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptionsWithOutMeta): Promise - async asyncQueryGet (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptionsWithMeta): Promise> - async asyncQueryGet (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptions): Promise - async asyncQueryGet (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptions): Promise { + async asyncQueryGet (this: That, params: T.EsqlAsyncQueryGetRequest | TB.EsqlAsyncQueryGetRequest, options?: TransportRequestOptionsWithOutMeta): Promise + async asyncQueryGet (this: That, params: T.EsqlAsyncQueryGetRequest | TB.EsqlAsyncQueryGetRequest, options?: TransportRequestOptionsWithMeta): Promise> + async asyncQueryGet (this: That, params: T.EsqlAsyncQueryGetRequest | TB.EsqlAsyncQueryGetRequest, options?: TransportRequestOptions): Promise + async asyncQueryGet (this: That, params: T.EsqlAsyncQueryGetRequest | TB.EsqlAsyncQueryGetRequest, options?: TransportRequestOptions): Promise { const acceptedPath: string[] = ['id'] const querystring: Record = {} const body = undefined - params = params ?? {} for (const key in params) { if (acceptedPath.includes(key)) { continue } else if (key !== 'body') { + // @ts-expect-error querystring[key] = params[key] } } diff --git a/src/api/api/exists.ts b/src/api/api/exists.ts index 38f35591c..c314afd77 100644 --- a/src/api/api/exists.ts +++ b/src/api/api/exists.ts @@ -39,7 +39,7 @@ import * as TB from '../typesWithBodyKey' interface That { transport: Transport } /** - * Check a document. Checks if a specified document exists. + * Check a document. Verify that a document exists. For example, check to see if a document with the `_id` 0 exists: ``` HEAD my-index-000001/_doc/0 ``` If the document exists, the API returns a status code of `200 - OK`. If the document doesn’t exist, the API returns `404 - Not Found`. **Versioning support** You can use the `version` parameter to check the document only if its current version is equal to the specified one. Internally, Elasticsearch has marked the old document as deleted and added an entirely new document. The old version of the document doesn't disappear immediately, although you won't be able to access it. Elasticsearch cleans up deleted documents in the background as you continue to index more data. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/docs-get.html | Elasticsearch API documentation} */ export default async function ExistsApi (this: That, params: T.ExistsRequest | TB.ExistsRequest, options?: TransportRequestOptionsWithOutMeta): Promise diff --git a/src/api/api/exists_source.ts b/src/api/api/exists_source.ts index 596444c30..836ad9ce8 100644 --- a/src/api/api/exists_source.ts +++ b/src/api/api/exists_source.ts @@ -39,7 +39,7 @@ import * as TB from '../typesWithBodyKey' interface That { transport: Transport } /** - * Check for a document source. Checks if a document's `_source` is stored. + * Check for a document source. Check whether a document source exists in an index. For example: ``` HEAD my-index-000001/_source/1 ``` A document's source is not available if it is disabled in the mapping. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/docs-get.html | Elasticsearch API documentation} */ export default async function ExistsSourceApi (this: That, params: T.ExistsSourceRequest | TB.ExistsSourceRequest, options?: TransportRequestOptionsWithOutMeta): Promise diff --git a/src/api/api/get.ts b/src/api/api/get.ts index f09229e62..3a2029f40 100644 --- a/src/api/api/get.ts +++ b/src/api/api/get.ts @@ -39,7 +39,7 @@ import * as TB from '../typesWithBodyKey' interface That { transport: Transport } /** - * Get a document by its ID. Retrieves the document with the specified ID from an index. + * Get a document by its ID. Get a document and its source or stored fields from an index. By default, this API is realtime and is not affected by the refresh rate of the index (when data will become visible for search). In the case where stored fields are requested with the `stored_fields` parameter and the document has been updated but is not yet refreshed, the API will have to parse and analyze the source to extract the stored fields. To turn off realtime behavior, set the `realtime` parameter to false. **Source filtering** By default, the API returns the contents of the `_source` field unless you have used the `stored_fields` parameter or the `_source` field is turned off. You can turn off `_source` retrieval by using the `_source` parameter: ``` GET my-index-000001/_doc/0?_source=false ``` If you only need one or two fields from the `_source`, use the `_source_includes` or `_source_excludes` parameters to include or filter out particular fields. This can be helpful with large documents where partial retrieval can save on network overhead Both parameters take a comma separated list of fields or wildcard expressions. For example: ``` GET my-index-000001/_doc/0?_source_includes=*.id&_source_excludes=entities ``` If you only want to specify includes, you can use a shorter notation: ``` GET my-index-000001/_doc/0?_source=*.id ``` **Routing** If routing is used during indexing, the routing value also needs to be specified to retrieve a document. For example: ``` GET my-index-000001/_doc/2?routing=user1 ``` This request gets the document with ID 2, but it is routed based on the user. The document is not fetched if the correct routing is not specified. **Distributed** The GET operation is hashed into a specific shard ID. It is then redirected to one of the replicas within that shard ID and returns the result. The replicas are the primary shard and its replicas within that shard ID group. This means that the more replicas you have, the better your GET scaling will be. **Versioning support** You can use the `version` parameter to retrieve the document only if its current version is equal to the specified one. Internally, Elasticsearch has marked the old document as deleted and added an entirely new document. The old version of the document doesn't disappear immediately, although you won't be able to access it. Elasticsearch cleans up deleted documents in the background as you continue to index more data. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/docs-get.html | Elasticsearch API documentation} */ export default async function GetApi (this: That, params: T.GetRequest | TB.GetRequest, options?: TransportRequestOptionsWithOutMeta): Promise> diff --git a/src/api/api/get_source.ts b/src/api/api/get_source.ts index 9aab5f176..a4a68cf2a 100644 --- a/src/api/api/get_source.ts +++ b/src/api/api/get_source.ts @@ -39,7 +39,7 @@ import * as TB from '../typesWithBodyKey' interface That { transport: Transport } /** - * Get a document's source. Returns the source of a document. + * Get a document's source. Get the source of a document. For example: ``` GET my-index-000001/_source/1 ``` You can use the source filtering parameters to control which parts of the `_source` are returned: ``` GET my-index-000001/_source/1/?_source_includes=*.id&_source_excludes=entities ``` * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/docs-get.html | Elasticsearch API documentation} */ export default async function GetSourceApi (this: That, params: T.GetSourceRequest | TB.GetSourceRequest, options?: TransportRequestOptionsWithOutMeta): Promise> diff --git a/src/api/api/index.ts b/src/api/api/index.ts index 9b34ae499..a322691b5 100644 --- a/src/api/api/index.ts +++ b/src/api/api/index.ts @@ -39,7 +39,7 @@ import * as TB from '../typesWithBodyKey' interface That { transport: Transport } /** - * Index a document. Adds a JSON document to the specified data stream or index and makes it searchable. If the target is an index and the document already exists, the request updates the document and increments its version. + * Create or update a document in an index. Add a JSON document to the specified data stream or index and make it searchable. If the target is an index and the document already exists, the request updates the document and increments its version. NOTE: You cannot use this API to send update requests for existing documents in a data stream. If the Elasticsearch security features are enabled, you must have the following index privileges for the target data stream, index, or index alias: * To add or overwrite a document using the `PUT //_doc/<_id>` request format, you must have the `create`, `index`, or `write` index privilege. * To add a document using the `POST //_doc/` request format, you must have the `create_doc`, `create`, `index`, or `write` index privilege. * To automatically create a data stream or index with this API request, you must have the `auto_configure`, `create_index`, or `manage` index privilege. Automatic data stream creation requires a matching index template with data stream enabled. NOTE: Replica shards might not all be started when an indexing operation returns successfully. By default, only the primary is required. Set `wait_for_active_shards` to change this default behavior. **Automatically create data streams and indices** If the request's target doesn't exist and matches an index template with a `data_stream` definition, the index operation automatically creates the data stream. If the target doesn't exist and doesn't match a data stream template, the operation automatically creates the index and applies any matching index templates. NOTE: Elasticsearch includes several built-in index templates. To avoid naming collisions with these templates, refer to index pattern documentation. If no mapping exists, the index operation creates a dynamic mapping. By default, new fields and objects are automatically added to the mapping if needed. Automatic index creation is controlled by the `action.auto_create_index` setting. If it is `true`, any index can be created automatically. You can modify this setting to explicitly allow or block automatic creation of indices that match specified patterns or set it to `false` to turn off automatic index creation entirely. Specify a comma-separated list of patterns you want to allow or prefix each pattern with `+` or `-` to indicate whether it should be allowed or blocked. When a list is specified, the default behaviour is to disallow. NOTE: The `action.auto_create_index` setting affects the automatic creation of indices only. It does not affect the creation of data streams. **Optimistic concurrency control** Index operations can be made conditional and only be performed if the last modification to the document was assigned the sequence number and primary term specified by the `if_seq_no` and `if_primary_term` parameters. If a mismatch is detected, the operation will result in a `VersionConflictException` and a status code of `409`. **Routing** By default, shard placement — or routing — is controlled by using a hash of the document's ID value. For more explicit control, the value fed into the hash function used by the router can be directly specified on a per-operation basis using the `routing` parameter. When setting up explicit mapping, you can also use the `_routing` field to direct the index operation to extract the routing value from the document itself. This does come at the (very minimal) cost of an additional document parsing pass. If the `_routing` mapping is defined and set to be required, the index operation will fail if no routing value is provided or extracted. NOTE: Data streams do not support custom routing unless they were created with the `allow_custom_routing` setting enabled in the template. * ** Distributed** The index operation is directed to the primary shard based on its route and performed on the actual node containing this shard. After the primary shard completes the operation, if needed, the update is distributed to applicable replicas. **Active shards** To improve the resiliency of writes to the system, indexing operations can be configured to wait for a certain number of active shard copies before proceeding with the operation. If the requisite number of active shard copies are not available, then the write operation must wait and retry, until either the requisite shard copies have started or a timeout occurs. By default, write operations only wait for the primary shards to be active before proceeding (that is to say `wait_for_active_shards` is `1`). This default can be overridden in the index settings dynamically by setting `index.write.wait_for_active_shards`. To alter this behavior per operation, use the `wait_for_active_shards request` parameter. Valid values are all or any positive integer up to the total number of configured copies per shard in the index (which is `number_of_replicas`+1). Specifying a negative value or a number greater than the number of shard copies will throw an error. For example, suppose you have a cluster of three nodes, A, B, and C and you create an index index with the number of replicas set to 3 (resulting in 4 shard copies, one more copy than there are nodes). If you attempt an indexing operation, by default the operation will only ensure the primary copy of each shard is available before proceeding. This means that even if B and C went down and A hosted the primary shard copies, the indexing operation would still proceed with only one copy of the data. If `wait_for_active_shards` is set on the request to `3` (and all three nodes are up), the indexing operation will require 3 active shard copies before proceeding. This requirement should be met because there are 3 active nodes in the cluster, each one holding a copy of the shard. However, if you set `wait_for_active_shards` to `all` (or to `4`, which is the same in this situation), the indexing operation will not proceed as you do not have all 4 copies of each shard active in the index. The operation will timeout unless a new node is brought up in the cluster to host the fourth copy of the shard. It is important to note that this setting greatly reduces the chances of the write operation not writing to the requisite number of shard copies, but it does not completely eliminate the possibility, because this check occurs before the write operation starts. After the write operation is underway, it is still possible for replication to fail on any number of shard copies but still succeed on the primary. The `_shards` section of the API response reveals the number of shard copies on which replication succeeded and failed. **No operation (noop) updates** When updating a document by using this API, a new version of the document is always created even if the document hasn't changed. If this isn't acceptable use the `_update` API with `detect_noop` set to `true`. The `detect_noop` option isn't available on this API because it doesn’t fetch the old source and isn't able to compare it against the new source. There isn't a definitive rule for when noop updates aren't acceptable. It's a combination of lots of factors like how frequently your data source sends updates that are actually noops and how many queries per second Elasticsearch runs on the shard receiving the updates. **Versioning** Each indexed document is given a version number. By default, internal versioning is used that starts at 1 and increments with each update, deletes included. Optionally, the version number can be set to an external value (for example, if maintained in a database). To enable this functionality, `version_type` should be set to `external`. The value provided must be a numeric, long value greater than or equal to 0, and less than around `9.2e+18`. NOTE: Versioning is completely real time, and is not affected by the near real time aspects of search operations. If no version is provided, the operation runs without any version checks. When using the external version type, the system checks to see if the version number passed to the index request is greater than the version of the currently stored document. If true, the document will be indexed and the new version number used. If the value provided is less than or equal to the stored document's version number, a version conflict will occur and the index operation will fail. For example: ``` PUT my-index-000001/_doc/1?version=2&version_type=external { "user": { "id": "elkbee" } } In this example, the operation will succeed since the supplied version of 2 is higher than the current document version of 1. If the document was already updated and its version was set to 2 or higher, the indexing command will fail and result in a conflict (409 HTTP status code). A nice side effect is that there is no need to maintain strict ordering of async indexing operations run as a result of changes to a source database, as long as version numbers from the source database are used. Even the simple case of updating the Elasticsearch index using data from a database is simplified if external versioning is used, as only the latest version will be used if the index operations arrive out of order. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/docs-index_.html | Elasticsearch API documentation} */ export default async function IndexApi (this: That, params: T.IndexRequest | TB.IndexRequest, options?: TransportRequestOptionsWithOutMeta): Promise diff --git a/src/api/api/indices.ts b/src/api/api/indices.ts index 61e093f40..5138db32b 100644 --- a/src/api/api/indices.ts +++ b/src/api/api/indices.ts @@ -946,6 +946,7 @@ export default class Indices { /** * Get aliases. Retrieves information for one or more data stream or index aliases. + * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/indices-get-alias.html | Elasticsearch API documentation} */ async getAlias (this: That, params?: T.IndicesGetAliasRequest | TB.IndicesGetAliasRequest, options?: TransportRequestOptionsWithOutMeta): Promise async getAlias (this: That, params?: T.IndicesGetAliasRequest | TB.IndicesGetAliasRequest, options?: TransportRequestOptionsWithMeta): Promise> diff --git a/src/api/api/inference.ts b/src/api/api/inference.ts index 65bf2e0ad..65f424975 100644 --- a/src/api/api/inference.ts +++ b/src/api/api/inference.ts @@ -226,22 +226,34 @@ export default class Inference { } /** - * Perform streaming inference - * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/post-stream-inference-api.html | Elasticsearch API documentation} + * Perform streaming inference. Get real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation. This API works only with the completion task type. IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs. This API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming. + * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/stream-inference-api.html | Elasticsearch API documentation} */ - async streamInference (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptionsWithOutMeta): Promise - async streamInference (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptionsWithMeta): Promise> - async streamInference (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptions): Promise - async streamInference (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptions): Promise { + async streamInference (this: That, params: T.InferenceStreamInferenceRequest | TB.InferenceStreamInferenceRequest, options?: TransportRequestOptionsWithOutMeta): Promise + async streamInference (this: That, params: T.InferenceStreamInferenceRequest | TB.InferenceStreamInferenceRequest, options?: TransportRequestOptionsWithMeta): Promise> + async streamInference (this: That, params: T.InferenceStreamInferenceRequest | TB.InferenceStreamInferenceRequest, options?: TransportRequestOptions): Promise + async streamInference (this: That, params: T.InferenceStreamInferenceRequest | TB.InferenceStreamInferenceRequest, options?: TransportRequestOptions): Promise { const acceptedPath: string[] = ['inference_id', 'task_type'] + const acceptedBody: string[] = ['input'] const querystring: Record = {} - const body = undefined + // @ts-expect-error + const userBody: any = params?.body + let body: Record | string + if (typeof userBody === 'string') { + body = userBody + } else { + body = userBody != null ? { ...userBody } : undefined + } - params = params ?? {} for (const key in params) { - if (acceptedPath.includes(key)) { + if (acceptedBody.includes(key)) { + body = body ?? {} + // @ts-expect-error + body[key] = params[key] + } else if (acceptedPath.includes(key)) { continue } else if (key !== 'body') { + // @ts-expect-error querystring[key] = params[key] } } @@ -264,4 +276,49 @@ export default class Inference { } return await this.transport.request({ path, method, querystring, body, meta }, options) } + + /** + * Update an inference endpoint. Modify `task_settings`, secrets (within `service_settings`), or `num_allocations` for an inference endpoint, depending on the specific endpoint service and `task_type`. IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs. + * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/update-inference-api.html | Elasticsearch API documentation} + */ + async update (this: That, params: T.InferenceUpdateRequest | TB.InferenceUpdateRequest, options?: TransportRequestOptionsWithOutMeta): Promise + async update (this: That, params: T.InferenceUpdateRequest | TB.InferenceUpdateRequest, options?: TransportRequestOptionsWithMeta): Promise> + async update (this: That, params: T.InferenceUpdateRequest | TB.InferenceUpdateRequest, options?: TransportRequestOptions): Promise + async update (this: That, params: T.InferenceUpdateRequest | TB.InferenceUpdateRequest, options?: TransportRequestOptions): Promise { + const acceptedPath: string[] = ['inference_id', 'task_type'] + const acceptedBody: string[] = ['inference_config'] + const querystring: Record = {} + // @ts-expect-error + let body: any = params.body ?? undefined + + for (const key in params) { + if (acceptedBody.includes(key)) { + // @ts-expect-error + body = params[key] + } else if (acceptedPath.includes(key)) { + continue + } else if (key !== 'body') { + // @ts-expect-error + querystring[key] = params[key] + } + } + + let method = '' + let path = '' + if (params.task_type != null && params.inference_id != null) { + method = 'POST' + path = `/_inference/${encodeURIComponent(params.task_type.toString())}/${encodeURIComponent(params.inference_id.toString())}/_update` + } else { + method = 'POST' + path = `/_inference/${encodeURIComponent(params.inference_id.toString())}/_update` + } + const meta: TransportRequestMetadata = { + name: 'inference.update', + pathParts: { + inference_id: params.inference_id, + task_type: params.task_type + } + } + return await this.transport.request({ path, method, querystring, body, meta }, options) + } } diff --git a/src/api/api/reindex.ts b/src/api/api/reindex.ts index e3d23d8a5..10534efd2 100644 --- a/src/api/api/reindex.ts +++ b/src/api/api/reindex.ts @@ -39,7 +39,7 @@ import * as TB from '../typesWithBodyKey' interface That { transport: Transport } /** - * Reindex documents. Copies documents from a source to a destination. The source can be any existing index, alias, or data stream. The destination must differ from the source. For example, you cannot reindex a data stream into itself. + * Reindex documents. Copy documents from a source to a destination. You can copy all documents to the destination index or reindex a subset of the documents. The source can be any existing index, alias, or data stream. The destination must differ from the source. For example, you cannot reindex a data stream into itself. IMPORTANT: Reindex requires `_source` to be enabled for all documents in the source. The destination should be configured as wanted before calling the reindex API. Reindex does not copy the settings from the source or its associated template. Mappings, shard counts, and replicas, for example, must be configured ahead of time. If the Elasticsearch security features are enabled, you must have the following security privileges: * The `read` index privilege for the source data stream, index, or alias. * The `write` index privilege for the destination data stream, index, or index alias. * To automatically create a data stream or index with a reindex API request, you must have the `auto_configure`, `create_index`, or `manage` index privilege for the destination data stream, index, or alias. * If reindexing from a remote cluster, the `source.remote.user` must have the `monitor` cluster privilege and the `read` index privilege for the source data stream, index, or alias. If reindexing from a remote cluster, you must explicitly allow the remote host in the `reindex.remote.whitelist` setting. Automatic data stream creation requires a matching index template with data stream enabled. The `dest` element can be configured like the index API to control optimistic concurrency control. Omitting `version_type` or setting it to `internal` causes Elasticsearch to blindly dump documents into the destination, overwriting any that happen to have the same ID. Setting `version_type` to `external` causes Elasticsearch to preserve the `version` from the source, create any documents that are missing, and update any documents that have an older version in the destination than they do in the source. Setting `op_type` to `create` causes the reindex API to create only missing documents in the destination. All existing documents will cause a version conflict. IMPORTANT: Because data streams are append-only, any reindex request to a destination data stream must have an `op_type` of `create`. A reindex can only add new documents to a destination data stream. It cannot update existing documents in a destination data stream. By default, version conflicts abort the reindex process. To continue reindexing if there are conflicts, set the `conflicts` request body property to `proceed`. In this case, the response includes a count of the version conflicts that were encountered. Note that the handling of other error types is unaffected by the `conflicts` property. Additionally, if you opt to count version conflicts, the operation could attempt to reindex more documents from the source than `max_docs` until it has successfully indexed `max_docs` documents into the target or it has gone through every document in the source query. NOTE: The reindex API makes no effort to handle ID collisions. The last document written will "win" but the order isn't usually predictable so it is not a good idea to rely on this behavior. Instead, make sure that IDs are unique by using a script. **Running reindex asynchronously** If the request contains `wait_for_completion=false`, Elasticsearch performs some preflight checks, launches the request, and returns a task you can use to cancel or get the status of the task. Elasticsearch creates a record of this task as a document at `_tasks/`. **Reindex from multiple sources** If you have many sources to reindex it is generally better to reindex them one at a time rather than using a glob pattern to pick up multiple sources. That way you can resume the process if there are any errors by removing the partially completed source and starting over. It also makes parallelizing the process fairly simple: split the list of sources to reindex and run each list in parallel. For example, you can use a bash script like this: ``` for index in i1 i2 i3 i4 i5; do curl -HContent-Type:application/json -XPOST localhost:9200/_reindex?pretty -d'{ "source": { "index": "'$index'" }, "dest": { "index": "'$index'-reindexed" } }' done ``` ** Throttling** Set `requests_per_second` to any positive decimal number (`1.4`, `6`, `1000`, for example) to throttle the rate at which reindex issues batches of index operations. Requests are throttled by padding each batch with a wait time. To turn off throttling, set `requests_per_second` to `-1`. The throttling is done by waiting between batches so that the scroll that reindex uses internally can be given a timeout that takes into account the padding. The padding time is the difference between the batch size divided by the `requests_per_second` and the time spent writing. By default the batch size is `1000`, so if `requests_per_second` is set to `500`: ``` target_time = 1000 / 500 per second = 2 seconds wait_time = target_time - write_time = 2 seconds - .5 seconds = 1.5 seconds ``` Since the batch is issued as a single bulk request, large batch sizes cause Elasticsearch to create many requests and then wait for a while before starting the next set. This is "bursty" instead of "smooth". **Slicing** Reindex supports sliced scroll to parallelize the reindexing process. This parallelization can improve efficiency and provide a convenient way to break the request down into smaller parts. NOTE: Reindexing from remote clusters does not support manual or automatic slicing. You can slice a reindex request manually by providing a slice ID and total number of slices to each request. You can also let reindex automatically parallelize by using sliced scroll to slice on `_id`. The `slices` parameter specifies the number of slices to use. Adding `slices` to the reindex request just automates the manual process, creating sub-requests which means it has some quirks: * You can see these requests in the tasks API. These sub-requests are "child" tasks of the task for the request with slices. * Fetching the status of the task for the request with `slices` only contains the status of completed slices. * These sub-requests are individually addressable for things like cancellation and rethrottling. * Rethrottling the request with `slices` will rethrottle the unfinished sub-request proportionally. * Canceling the request with `slices` will cancel each sub-request. * Due to the nature of `slices`, each sub-request won't get a perfectly even portion of the documents. All documents will be addressed, but some slices may be larger than others. Expect larger slices to have a more even distribution. * Parameters like `requests_per_second` and `max_docs` on a request with `slices` are distributed proportionally to each sub-request. Combine that with the previous point about distribution being uneven and you should conclude that using `max_docs` with `slices` might not result in exactly `max_docs` documents being reindexed. * Each sub-request gets a slightly different snapshot of the source, though these are all taken at approximately the same time. If slicing automatically, setting `slices` to `auto` will choose a reasonable number for most indices. If slicing manually or otherwise tuning automatic slicing, use the following guidelines. Query performance is most efficient when the number of slices is equal to the number of shards in the index. If that number is large (for example, `500`), choose a lower number as too many slices will hurt performance. Setting slices higher than the number of shards generally does not improve efficiency and adds overhead. Indexing performance scales linearly across available resources with the number of slices. Whether query or indexing performance dominates the runtime depends on the documents being reindexed and cluster resources. **Modify documents during reindexing** Like `_update_by_query`, reindex operations support a script that modifies the document. Unlike `_update_by_query`, the script is allowed to modify the document's metadata. Just as in `_update_by_query`, you can set `ctx.op` to change the operation that is run on the destination. For example, set `ctx.op` to `noop` if your script decides that the document doesn’t have to be indexed in the destination. This "no operation" will be reported in the `noop` counter in the response body. Set `ctx.op` to `delete` if your script decides that the document must be deleted from the destination. The deletion will be reported in the `deleted` counter in the response body. Setting `ctx.op` to anything else will return an error, as will setting any other field in `ctx`. Think of the possibilities! Just be careful; you are able to change: * `_id` * `_index` * `_version` * `_routing` Setting `_version` to `null` or clearing it from the `ctx` map is just like not sending the version in an indexing request. It will cause the document to be overwritten in the destination regardless of the version on the target or the version type you use in the reindex API. **Reindex from remote** Reindex supports reindexing from a remote Elasticsearch cluster. The `host` parameter must contain a scheme, host, port, and optional path. The `username` and `password` parameters are optional and when they are present the reindex operation will connect to the remote Elasticsearch node using basic authentication. Be sure to use HTTPS when using basic authentication or the password will be sent in plain text. There are a range of settings available to configure the behavior of the HTTPS connection. When using Elastic Cloud, it is also possible to authenticate against the remote cluster through the use of a valid API key. Remote hosts must be explicitly allowed with the `reindex.remote.whitelist` setting. It can be set to a comma delimited list of allowed remote host and port combinations. Scheme is ignored; only the host and port are used. For example: ``` reindex.remote.whitelist: [otherhost:9200, another:9200, 127.0.10.*:9200, localhost:*"] ``` The list of allowed hosts must be configured on any nodes that will coordinate the reindex. This feature should work with remote clusters of any version of Elasticsearch. This should enable you to upgrade from any version of Elasticsearch to the current version by reindexing from a cluster of the old version. WARNING: Elasticsearch does not support forward compatibility across major versions. For example, you cannot reindex from a 7.x cluster into a 6.x cluster. To enable queries sent to older versions of Elasticsearch, the `query` parameter is sent directly to the remote host without validation or modification. NOTE: Reindexing from remote clusters does not support manual or automatic slicing. Reindexing from a remote server uses an on-heap buffer that defaults to a maximum size of 100mb. If the remote index includes very large documents you'll need to use a smaller batch size. It is also possible to set the socket read timeout on the remote connection with the `socket_timeout` field and the connection timeout with the `connect_timeout` field. Both default to 30 seconds. **Configuring SSL parameters** Reindex from remote supports configurable SSL settings. These must be specified in the `elasticsearch.yml` file, with the exception of the secure settings, which you add in the Elasticsearch keystore. It is not possible to configure SSL in the body of the reindex request. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/docs-reindex.html | Elasticsearch API documentation} */ export default async function ReindexApi (this: That, params: T.ReindexRequest | TB.ReindexRequest, options?: TransportRequestOptionsWithOutMeta): Promise diff --git a/src/api/api/reindex_rethrottle.ts b/src/api/api/reindex_rethrottle.ts index 331555822..e609845a6 100644 --- a/src/api/api/reindex_rethrottle.ts +++ b/src/api/api/reindex_rethrottle.ts @@ -39,7 +39,7 @@ import * as TB from '../typesWithBodyKey' interface That { transport: Transport } /** - * Throttle a reindex operation. Change the number of requests per second for a particular reindex operation. + * Throttle a reindex operation. Change the number of requests per second for a particular reindex operation. For example: ``` POST _reindex/r1A2WoRbTwKZ516z6NEs5A:36619/_rethrottle?requests_per_second=-1 ``` Rethrottling that speeds up the query takes effect immediately. Rethrottling that slows down the query will take effect after completing the current batch. This behavior prevents scroll timeouts. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/docs-reindex.html | Elasticsearch API documentation} */ export default async function ReindexRethrottleApi (this: That, params: T.ReindexRethrottleRequest | TB.ReindexRethrottleRequest, options?: TransportRequestOptionsWithOutMeta): Promise diff --git a/src/api/api/security.ts b/src/api/api/security.ts index 9ceb151ae..78629ce2b 100644 --- a/src/api/api/security.ts +++ b/src/api/api/security.ts @@ -45,7 +45,7 @@ export default class Security { } /** - * Activate a user profile. Create or update a user profile on behalf of another user. + * Activate a user profile. Create or update a user profile on behalf of another user. NOTE: The user profile feature is designed only for use by Kibana and Elastic's Observability, Enterprise Search, and Elastic Security solutions. Individual users and external applications should not call this API directly. The calling application must have either an `access_token` or a combination of `username` and `password` for the user that the profile document is intended for. Elastic reserves the right to change or remove this feature in future releases without prior notice. This API creates or updates a profile document for end users with information that is extracted from the user's authentication object including `username`, `full_name,` `roles`, and the authentication realm. For example, in the JWT `access_token` case, the profile user's `username` is extracted from the JWT token claim pointed to by the `claims.principal` setting of the JWT realm that authenticated the token. When updating a profile document, the API enables the document if it was disabled. Any updates do not change existing content for either the `labels` or `data` fields. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/security-api-activate-user-profile.html | Elasticsearch API documentation} */ async activateUserProfile (this: That, params: T.SecurityActivateUserProfileRequest | TB.SecurityActivateUserProfileRequest, options?: TransportRequestOptionsWithOutMeta): Promise @@ -813,7 +813,7 @@ export default class Security { } /** - * Disable a user profile. Disable user profiles so that they are not visible in user profile searches. + * Disable a user profile. Disable user profiles so that they are not visible in user profile searches. NOTE: The user profile feature is designed only for use by Kibana and Elastic's Observability, Enterprise Search, and Elastic Security solutions. Individual users and external applications should not call this API directly. Elastic reserves the right to change or remove this feature in future releases without prior notice. When you activate a user profile, its automatically enabled and visible in user profile searches. You can use the disable user profile API to disable a user profile so it’s not visible in these searches. To re-enable a disabled user profile, use the enable user profile API . * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/security-api-disable-user-profile.html | Elasticsearch API documentation} */ async disableUserProfile (this: That, params: T.SecurityDisableUserProfileRequest | TB.SecurityDisableUserProfileRequest, options?: TransportRequestOptionsWithOutMeta): Promise @@ -877,7 +877,7 @@ export default class Security { } /** - * Enable a user profile. Enable user profiles to make them visible in user profile searches. + * Enable a user profile. Enable user profiles to make them visible in user profile searches. NOTE: The user profile feature is designed only for use by Kibana and Elastic's Observability, Enterprise Search, and Elastic Security solutions. Individual users and external applications should not call this API directly. Elastic reserves the right to change or remove this feature in future releases without prior notice. When you activate a user profile, it's automatically enabled and visible in user profile searches. If you later disable the user profile, you can use the enable user profile API to make the profile visible in these searches again. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/security-api-enable-user-profile.html | Elasticsearch API documentation} */ async enableUserProfile (this: That, params: T.SecurityEnableUserProfileRequest | TB.SecurityEnableUserProfileRequest, options?: TransportRequestOptionsWithOutMeta): Promise @@ -1230,13 +1230,13 @@ export default class Security { } /** - * Retrieve settings for the security system indices + * Get security index settings. Get the user-configurable settings for the security internal index (`.security` and associated indices). * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/security-api-get-settings.html | Elasticsearch API documentation} */ - async getSettings (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptionsWithOutMeta): Promise - async getSettings (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptionsWithMeta): Promise> - async getSettings (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptions): Promise - async getSettings (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptions): Promise { + async getSettings (this: That, params?: T.SecurityGetSettingsRequest | TB.SecurityGetSettingsRequest, options?: TransportRequestOptionsWithOutMeta): Promise + async getSettings (this: That, params?: T.SecurityGetSettingsRequest | TB.SecurityGetSettingsRequest, options?: TransportRequestOptionsWithMeta): Promise> + async getSettings (this: That, params?: T.SecurityGetSettingsRequest | TB.SecurityGetSettingsRequest, options?: TransportRequestOptions): Promise + async getSettings (this: That, params?: T.SecurityGetSettingsRequest | TB.SecurityGetSettingsRequest, options?: TransportRequestOptions): Promise { const acceptedPath: string[] = [] const querystring: Record = {} const body = undefined @@ -1246,6 +1246,7 @@ export default class Security { if (acceptedPath.includes(key)) { continue } else if (key !== 'body') { + // @ts-expect-error querystring[key] = params[key] } } @@ -1371,7 +1372,7 @@ export default class Security { } /** - * Get a user profile. Get a user's profile using the unique profile ID. + * Get a user profile. Get a user's profile using the unique profile ID. NOTE: The user profile feature is designed only for use by Kibana and Elastic's Observability, Enterprise Search, and Elastic Security solutions. Individual users and external applications should not call this API directly. Elastic reserves the right to change or remove this feature in future releases without prior notice. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/security-api-get-user-profile.html | Elasticsearch API documentation} */ async getUserProfile (this: That, params: T.SecurityGetUserProfileRequest | TB.SecurityGetUserProfileRequest, options?: TransportRequestOptionsWithOutMeta): Promise @@ -1496,7 +1497,7 @@ export default class Security { } /** - * Check user profile privileges. Determine whether the users associated with the specified user profile IDs have all the requested privileges. + * Check user profile privileges. Determine whether the users associated with the specified user profile IDs have all the requested privileges. NOTE: The user profile feature is designed only for use by Kibana and Elastic's Observability, Enterprise Search, and Elastic Security solutions. Individual users and external applications should not call this API directly. Elastic reserves the right to change or remove this feature in future releases without prior notice. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/security-api-has-privileges-user-profile.html | Elasticsearch API documentation} */ async hasPrivilegesUserProfile (this: That, params: T.SecurityHasPrivilegesUserProfileRequest | TB.SecurityHasPrivilegesUserProfileRequest, options?: TransportRequestOptionsWithOutMeta): Promise @@ -2037,7 +2038,7 @@ export default class Security { } /** - * Authenticate SAML. Submits a SAML response message to Elasticsearch for consumption. + * Authenticate SAML. Submit a SAML response message to Elasticsearch for consumption. NOTE: This API is intended for use by custom web applications other than Kibana. If you are using Kibana, refer to the documentation for configuring SAML single-sign-on on the Elastic Stack. The SAML message that is submitted can be: * A response to a SAML authentication request that was previously created using the SAML prepare authentication API. * An unsolicited SAML message in the case of an IdP-initiated single sign-on (SSO) flow. In either case, the SAML message needs to be a base64 encoded XML document with a root element of ``. After successful validation, Elasticsearch responds with an Elasticsearch internal access token and refresh token that can be subsequently used for authentication. This API endpoint essentially exchanges SAML responses that indicate successful authentication in the IdP for Elasticsearch access and refresh tokens, which can be used for authentication against Elasticsearch. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/security-api-saml-authenticate.html | Elasticsearch API documentation} */ async samlAuthenticate (this: That, params: T.SecuritySamlAuthenticateRequest | TB.SecuritySamlAuthenticateRequest, options?: TransportRequestOptionsWithOutMeta): Promise @@ -2078,7 +2079,7 @@ export default class Security { } /** - * Logout of SAML completely. Verifies the logout response sent from the SAML IdP. + * Logout of SAML completely. Verifies the logout response sent from the SAML IdP. NOTE: This API is intended for use by custom web applications other than Kibana. If you are using Kibana, refer to the documentation for configuring SAML single-sign-on on the Elastic Stack. The SAML IdP may send a logout response back to the SP after handling the SP-initiated SAML Single Logout. This API verifies the response by ensuring the content is relevant and validating its signature. An empty response is returned if the verification process is successful. The response can be sent by the IdP with either the HTTP-Redirect or the HTTP-Post binding. The caller of this API must prepare the request accordingly so that this API can handle either of them. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/security-api-saml-complete-logout.html | Elasticsearch API documentation} */ async samlCompleteLogout (this: That, params: T.SecuritySamlCompleteLogoutRequest | TB.SecuritySamlCompleteLogoutRequest, options?: TransportRequestOptionsWithOutMeta): Promise @@ -2119,7 +2120,7 @@ export default class Security { } /** - * Invalidate SAML. Submits a SAML LogoutRequest message to Elasticsearch for consumption. + * Invalidate SAML. Submit a SAML LogoutRequest message to Elasticsearch for consumption. NOTE: This API is intended for use by custom web applications other than Kibana. If you are using Kibana, refer to the documentation for configuring SAML single-sign-on on the Elastic Stack. The logout request comes from the SAML IdP during an IdP initiated Single Logout. The custom web application can use this API to have Elasticsearch process the `LogoutRequest`. After successful validation of the request, Elasticsearch invalidates the access token and refresh token that corresponds to that specific SAML principal and provides a URL that contains a SAML LogoutResponse message. Thus the user can be redirected back to their IdP. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/security-api-saml-invalidate.html | Elasticsearch API documentation} */ async samlInvalidate (this: That, params: T.SecuritySamlInvalidateRequest | TB.SecuritySamlInvalidateRequest, options?: TransportRequestOptionsWithOutMeta): Promise @@ -2160,7 +2161,7 @@ export default class Security { } /** - * Logout of SAML. Submits a request to invalidate an access token and refresh token. + * Logout of SAML. Submits a request to invalidate an access token and refresh token. NOTE: This API is intended for use by custom web applications other than Kibana. If you are using Kibana, refer to the documentation for configuring SAML single-sign-on on the Elastic Stack. This API invalidates the tokens that were generated for a user by the SAML authenticate API. If the SAML realm in Elasticsearch is configured accordingly and the SAML IdP supports this, the Elasticsearch response contains a URL to redirect the user to the IdP that contains a SAML logout request (starting an SP-initiated SAML Single Logout). * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/security-api-saml-logout.html | Elasticsearch API documentation} */ async samlLogout (this: That, params: T.SecuritySamlLogoutRequest | TB.SecuritySamlLogoutRequest, options?: TransportRequestOptionsWithOutMeta): Promise @@ -2201,7 +2202,7 @@ export default class Security { } /** - * Prepare SAML authentication. Creates a SAML authentication request (``) as a URL string, based on the configuration of the respective SAML realm in Elasticsearch. + * Prepare SAML authentication. Create a SAML authentication request (``) as a URL string based on the configuration of the respective SAML realm in Elasticsearch. NOTE: This API is intended for use by custom web applications other than Kibana. If you are using Kibana, refer to the documentation for configuring SAML single-sign-on on the Elastic Stack. This API returns a URL pointing to the SAML Identity Provider. You can use the URL to redirect the browser of the user in order to continue the authentication process. The URL includes a single parameter named `SAMLRequest`, which contains a SAML Authentication request that is deflated and Base64 encoded. If the configuration dictates that SAML authentication requests should be signed, the URL has two extra parameters named `SigAlg` and `Signature`. These parameters contain the algorithm used for the signature and the signature value itself. It also returns a random string that uniquely identifies this SAML Authentication request. The caller of this API needs to store this identifier as it needs to be used in a following step of the authentication process. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/security-api-saml-prepare-authentication.html | Elasticsearch API documentation} */ async samlPrepareAuthentication (this: That, params?: T.SecuritySamlPrepareAuthenticationRequest | TB.SecuritySamlPrepareAuthenticationRequest, options?: TransportRequestOptionsWithOutMeta): Promise @@ -2243,7 +2244,7 @@ export default class Security { } /** - * Create SAML service provider metadata. Generate SAML metadata for a SAML 2.0 Service Provider. + * Create SAML service provider metadata. Generate SAML metadata for a SAML 2.0 Service Provider. The SAML 2.0 specification provides a mechanism for Service Providers to describe their capabilities and configuration using a metadata file. This API generates Service Provider metadata based on the configuration of a SAML realm in Elasticsearch. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/security-api-saml-sp-metadata.html | Elasticsearch API documentation} */ async samlServiceProviderMetadata (this: That, params: T.SecuritySamlServiceProviderMetadataRequest | TB.SecuritySamlServiceProviderMetadataRequest, options?: TransportRequestOptionsWithOutMeta): Promise @@ -2275,7 +2276,7 @@ export default class Security { } /** - * Suggest a user profile. Get suggestions for user profiles that match specified search criteria. + * Suggest a user profile. Get suggestions for user profiles that match specified search criteria. NOTE: The user profile feature is designed only for use by Kibana and Elastic's Observability, Enterprise Search, and Elastic Security solutions. Individual users and external applications should not call this API directly. Elastic reserves the right to change or remove this feature in future releases without prior notice. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/security-api-suggest-user-profile.html | Elasticsearch API documentation} */ async suggestUserProfiles (this: That, params?: T.SecuritySuggestUserProfilesRequest | TB.SecuritySuggestUserProfilesRequest, options?: TransportRequestOptionsWithOutMeta): Promise @@ -2361,7 +2362,7 @@ export default class Security { } /** - * Update a cross-cluster API key. Update the attributes of an existing cross-cluster API key, which is used for API key based remote cluster access. + * Update a cross-cluster API key. Update the attributes of an existing cross-cluster API key, which is used for API key based remote cluster access. To use this API, you must have at least the `manage_security` cluster privilege. Users can only update API keys that they created. To update another user's API key, use the `run_as` feature to submit a request on behalf of another user. IMPORTANT: It's not possible to use an API key as the authentication credential for this API. To update an API key, the owner user's credentials are required. It's not possible to update expired API keys, or API keys that have been invalidated by the invalidate API key API. This API supports updates to an API key's access scope, metadata, and expiration. The owner user's information, such as the `username` and `realm`, is also updated automatically on every call. NOTE: This API cannot update REST API keys, which should be updated by either the update API key or bulk update API keys API. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/security-api-update-cross-cluster-api-key.html | Elasticsearch API documentation} */ async updateCrossClusterApiKey (this: That, params: T.SecurityUpdateCrossClusterApiKeyRequest | TB.SecurityUpdateCrossClusterApiKeyRequest, options?: TransportRequestOptionsWithOutMeta): Promise @@ -2405,22 +2406,35 @@ export default class Security { } /** - * Update settings for the security system index + * Update security index settings. Update the user-configurable settings for the security internal index (`.security` and associated indices). Only a subset of settings are allowed to be modified, for example `index.auto_expand_replicas` and `index.number_of_replicas`. If a specific index is not in use on the system and settings are provided for it, the request will be rejected. This API does not yet support configuring the settings for indices before they are in use. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/security-api-update-settings.html | Elasticsearch API documentation} */ - async updateSettings (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptionsWithOutMeta): Promise - async updateSettings (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptionsWithMeta): Promise> - async updateSettings (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptions): Promise - async updateSettings (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptions): Promise { + async updateSettings (this: That, params?: T.SecurityUpdateSettingsRequest | TB.SecurityUpdateSettingsRequest, options?: TransportRequestOptionsWithOutMeta): Promise + async updateSettings (this: That, params?: T.SecurityUpdateSettingsRequest | TB.SecurityUpdateSettingsRequest, options?: TransportRequestOptionsWithMeta): Promise> + async updateSettings (this: That, params?: T.SecurityUpdateSettingsRequest | TB.SecurityUpdateSettingsRequest, options?: TransportRequestOptions): Promise + async updateSettings (this: That, params?: T.SecurityUpdateSettingsRequest | TB.SecurityUpdateSettingsRequest, options?: TransportRequestOptions): Promise { const acceptedPath: string[] = [] + const acceptedBody: string[] = ['security', 'security-profile', 'security-tokens'] const querystring: Record = {} - const body = undefined + // @ts-expect-error + const userBody: any = params?.body + let body: Record | string + if (typeof userBody === 'string') { + body = userBody + } else { + body = userBody != null ? { ...userBody } : undefined + } params = params ?? {} for (const key in params) { - if (acceptedPath.includes(key)) { + if (acceptedBody.includes(key)) { + body = body ?? {} + // @ts-expect-error + body[key] = params[key] + } else if (acceptedPath.includes(key)) { continue } else if (key !== 'body') { + // @ts-expect-error querystring[key] = params[key] } } @@ -2434,7 +2448,7 @@ export default class Security { } /** - * Update user profile data. Update specific data for the user profile that is associated with a unique ID. + * Update user profile data. Update specific data for the user profile that is associated with a unique ID. NOTE: The user profile feature is designed only for use by Kibana and Elastic's Observability, Enterprise Search, and Elastic Security solutions. Individual users and external applications should not call this API directly. Elastic reserves the right to change or remove this feature in future releases without prior notice. To use this API, you must have one of the following privileges: * The `manage_user_profile` cluster privilege. * The `update_profile_data` global privilege for the namespaces that are referenced in the request. This API updates the `labels` and `data` fields of an existing user profile document with JSON objects. New keys and their values are added to the profile document and conflicting keys are replaced by data that's included in the request. For both labels and data, content is namespaced by the top-level fields. The `update_profile_data` global privilege grants privileges for updating only the allowed namespaces. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/security-api-update-user-profile-data.html | Elasticsearch API documentation} */ async updateUserProfileData (this: That, params: T.SecurityUpdateUserProfileDataRequest | TB.SecurityUpdateUserProfileDataRequest, options?: TransportRequestOptionsWithOutMeta): Promise diff --git a/src/api/api/snapshot.ts b/src/api/api/snapshot.ts index ec9517a9e..d08e31d48 100644 --- a/src/api/api/snapshot.ts +++ b/src/api/api/snapshot.ts @@ -343,32 +343,32 @@ export default class Snapshot { } /** - * Analyzes a repository for correctness and performance - * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/modules-snapshots.html | Elasticsearch API documentation} + * Analyze a snapshot repository. Analyze the performance characteristics and any incorrect behaviour found in a repository. The response exposes implementation details of the analysis which may change from version to version. The response body format is therefore not considered stable and may be different in newer versions. There are a large number of third-party storage systems available, not all of which are suitable for use as a snapshot repository by Elasticsearch. Some storage systems behave incorrectly, or perform poorly, especially when accessed concurrently by multiple clients as the nodes of an Elasticsearch cluster do. This API performs a collection of read and write operations on your repository which are designed to detect incorrect behaviour and to measure the performance characteristics of your storage system. The default values for the parameters are deliberately low to reduce the impact of running an analysis inadvertently and to provide a sensible starting point for your investigations. Run your first analysis with the default parameter values to check for simple problems. If successful, run a sequence of increasingly large analyses until you encounter a failure or you reach a `blob_count` of at least `2000`, a `max_blob_size` of at least `2gb`, a `max_total_data_size` of at least `1tb`, and a `register_operation_count` of at least `100`. Always specify a generous timeout, possibly `1h` or longer, to allow time for each analysis to run to completion. Perform the analyses using a multi-node cluster of a similar size to your production cluster so that it can detect any problems that only arise when the repository is accessed by many nodes at once. If the analysis fails, Elasticsearch detected that your repository behaved unexpectedly. This usually means you are using a third-party storage system with an incorrect or incompatible implementation of the API it claims to support. If so, this storage system is not suitable for use as a snapshot repository. You will need to work with the supplier of your storage system to address the incompatibilities that Elasticsearch detects. If the analysis is successful, the API returns details of the testing process, optionally including how long each operation took. You can use this information to determine the performance of your storage system. If any operation fails or returns an incorrect result, the API returns an error. If the API returns an error, it may not have removed all the data it wrote to the repository. The error will indicate the location of any leftover data and this path is also recorded in the Elasticsearch logs. You should verify that this location has been cleaned up correctly. If there is still leftover data at the specified location, you should manually remove it. If the connection from your client to Elasticsearch is closed while the client is waiting for the result of the analysis, the test is cancelled. Some clients are configured to close their connection if no response is received within a certain timeout. An analysis takes a long time to complete so you might need to relax any such client-side timeouts. On cancellation the analysis attempts to clean up the data it was writing, but it may not be able to remove it all. The path to the leftover data is recorded in the Elasticsearch logs. You should verify that this location has been cleaned up correctly. If there is still leftover data at the specified location, you should manually remove it. If the analysis is successful then it detected no incorrect behaviour, but this does not mean that correct behaviour is guaranteed. The analysis attempts to detect common bugs but it does not offer 100% coverage. Additionally, it does not test the following: * Your repository must perform durable writes. Once a blob has been written it must remain in place until it is deleted, even after a power loss or similar disaster. * Your repository must not suffer from silent data corruption. Once a blob has been written, its contents must remain unchanged until it is deliberately modified or deleted. * Your repository must behave correctly even if connectivity from the cluster is disrupted. Reads and writes may fail in this case, but they must not return incorrect results. IMPORTANT: An analysis writes a substantial amount of data to your repository and then reads it back again. This consumes bandwidth on the network between the cluster and the repository, and storage space and I/O bandwidth on the repository itself. You must ensure this load does not affect other users of these systems. Analyses respect the repository settings `max_snapshot_bytes_per_sec` and `max_restore_bytes_per_sec` if available and the cluster setting `indices.recovery.max_bytes_per_sec` which you can use to limit the bandwidth they consume. NOTE: This API is intended for exploratory use by humans. You should expect the request parameters and the response format to vary in future versions. NOTE: Different versions of Elasticsearch may perform different checks for repository compatibility, with newer versions typically being stricter than older ones. A storage system that passes repository analysis with one version of Elasticsearch may fail with a different version. This indicates it behaves incorrectly in ways that the former version did not detect. You must work with the supplier of your storage system to address the incompatibilities detected by the repository analysis API in any version of Elasticsearch. NOTE: This API may not work correctly in a mixed-version cluster. *Implementation details* NOTE: This section of documentation describes how the repository analysis API works in this version of Elasticsearch, but you should expect the implementation to vary between versions. The request parameters and response format depend on details of the implementation so may also be different in newer versions. The analysis comprises a number of blob-level tasks, as set by the `blob_count` parameter and a number of compare-and-exchange operations on linearizable registers, as set by the `register_operation_count` parameter. These tasks are distributed over the data and master-eligible nodes in the cluster for execution. For most blob-level tasks, the executing node first writes a blob to the repository and then instructs some of the other nodes in the cluster to attempt to read the data it just wrote. The size of the blob is chosen randomly, according to the `max_blob_size` and `max_total_data_size` parameters. If any of these reads fails then the repository does not implement the necessary read-after-write semantics that Elasticsearch requires. For some blob-level tasks, the executing node will instruct some of its peers to attempt to read the data before the writing process completes. These reads are permitted to fail, but must not return partial data. If any read returns partial data then the repository does not implement the necessary atomicity semantics that Elasticsearch requires. For some blob-level tasks, the executing node will overwrite the blob while its peers are reading it. In this case the data read may come from either the original or the overwritten blob, but the read operation must not return partial data or a mix of data from the two blobs. If any of these reads returns partial data or a mix of the two blobs then the repository does not implement the necessary atomicity semantics that Elasticsearch requires for overwrites. The executing node will use a variety of different methods to write the blob. For instance, where applicable, it will use both single-part and multi-part uploads. Similarly, the reading nodes will use a variety of different methods to read the data back again. For instance they may read the entire blob from start to end or may read only a subset of the data. For some blob-level tasks, the executing node will cancel the write before it is complete. In this case, it still instructs some of the other nodes in the cluster to attempt to read the blob but all of these reads must fail to find the blob. Linearizable registers are special blobs that Elasticsearch manipulates using an atomic compare-and-exchange operation. This operation ensures correct and strongly-consistent behavior even when the blob is accessed by multiple nodes at the same time. The detailed implementation of the compare-and-exchange operation on linearizable registers varies by repository type. Repository analysis verifies that that uncontended compare-and-exchange operations on a linearizable register blob always succeed. Repository analysis also verifies that contended operations either succeed or report the contention but do not return incorrect results. If an operation fails due to contention, Elasticsearch retries the operation until it succeeds. Most of the compare-and-exchange operations performed by repository analysis atomically increment a counter which is represented as an 8-byte blob. Some operations also verify the behavior on small blobs with sizes other than 8 bytes. + * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/repo-analysis-api.html | Elasticsearch API documentation} */ - async repositoryAnalyze (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptionsWithOutMeta): Promise - async repositoryAnalyze (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptionsWithMeta): Promise> - async repositoryAnalyze (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptions): Promise - async repositoryAnalyze (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptions): Promise { - const acceptedPath: string[] = ['repository'] + async repositoryAnalyze (this: That, params: T.SnapshotRepositoryAnalyzeRequest | TB.SnapshotRepositoryAnalyzeRequest, options?: TransportRequestOptionsWithOutMeta): Promise + async repositoryAnalyze (this: That, params: T.SnapshotRepositoryAnalyzeRequest | TB.SnapshotRepositoryAnalyzeRequest, options?: TransportRequestOptionsWithMeta): Promise> + async repositoryAnalyze (this: That, params: T.SnapshotRepositoryAnalyzeRequest | TB.SnapshotRepositoryAnalyzeRequest, options?: TransportRequestOptions): Promise + async repositoryAnalyze (this: That, params: T.SnapshotRepositoryAnalyzeRequest | TB.SnapshotRepositoryAnalyzeRequest, options?: TransportRequestOptions): Promise { + const acceptedPath: string[] = ['name'] const querystring: Record = {} const body = undefined - params = params ?? {} for (const key in params) { if (acceptedPath.includes(key)) { continue } else if (key !== 'body') { + // @ts-expect-error querystring[key] = params[key] } } const method = 'POST' - const path = `/_snapshot/${encodeURIComponent(params.repository.toString())}/_analyze` + const path = `/_snapshot/${encodeURIComponent(params.name.toString())}/_analyze` const meta: TransportRequestMetadata = { name: 'snapshot.repository_analyze', pathParts: { - repository: params.repository + name: params.name } } return await this.transport.request({ path, method, querystring, body, meta }, options) diff --git a/src/api/api/tasks.ts b/src/api/api/tasks.ts index 947721b9f..adb4535db 100644 --- a/src/api/api/tasks.ts +++ b/src/api/api/tasks.ts @@ -45,7 +45,7 @@ export default class Tasks { } /** - * Cancel a task. A task may continue to run for some time after it has been cancelled because it may not be able to safely stop its current activity straight away. It is also possible that Elasticsearch must complete its work on other tasks before it can process the cancellation. The get task information API will continue to list these cancelled tasks until they complete. The cancelled flag in the response indicates that the cancellation command has been processed and the task will stop as soon as possible. To troubleshoot why a cancelled task does not complete promptly, use the get task information API with the `?detailed` parameter to identify the other tasks the system is running. You can also use the node hot threads API to obtain detailed information about the work the system is doing instead of completing the cancelled task. + * Cancel a task. WARNING: The task management API is new and should still be considered a beta feature. The API may change in ways that are not backwards compatible. A task may continue to run for some time after it has been cancelled because it may not be able to safely stop its current activity straight away. It is also possible that Elasticsearch must complete its work on other tasks before it can process the cancellation. The get task information API will continue to list these cancelled tasks until they complete. The cancelled flag in the response indicates that the cancellation command has been processed and the task will stop as soon as possible. To troubleshoot why a cancelled task does not complete promptly, use the get task information API with the `?detailed` parameter to identify the other tasks the system is running. You can also use the node hot threads API to obtain detailed information about the work the system is doing instead of completing the cancelled task. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/tasks.html | Elasticsearch API documentation} */ async cancel (this: That, params?: T.TasksCancelRequest | TB.TasksCancelRequest, options?: TransportRequestOptionsWithOutMeta): Promise @@ -85,7 +85,7 @@ export default class Tasks { } /** - * Get task information. Get information about a task currently running in the cluster. + * Get task information. Get information about a task currently running in the cluster. WARNING: The task management API is new and should still be considered a beta feature. The API may change in ways that are not backwards compatible. If the task identifier is not found, a 404 response code indicates that there are no resources that match the request. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/tasks.html | Elasticsearch API documentation} */ async get (this: That, params: T.TasksGetRequest | TB.TasksGetRequest, options?: TransportRequestOptionsWithOutMeta): Promise @@ -117,7 +117,7 @@ export default class Tasks { } /** - * Get all tasks. Get information about the tasks currently running on one or more nodes in the cluster. + * Get all tasks. Get information about the tasks currently running on one or more nodes in the cluster. WARNING: The task management API is new and should still be considered a beta feature. The API may change in ways that are not backwards compatible. **Identifying running tasks** The `X-Opaque-Id header`, when provided on the HTTP request header, is going to be returned as a header in the response as well as in the headers field for in the task information. This enables you to track certain calls or associate certain tasks with the client that started them. For example: ``` curl -i -H "X-Opaque-Id: 123456" "http://localhost:9200/_tasks?group_by=parents" ``` The API returns the following result: ``` HTTP/1.1 200 OK X-Opaque-Id: 123456 content-type: application/json; charset=UTF-8 content-length: 831 { "tasks" : { "u5lcZHqcQhu-rUoFaqDphA:45" : { "node" : "u5lcZHqcQhu-rUoFaqDphA", "id" : 45, "type" : "transport", "action" : "cluster:monitor/tasks/lists", "start_time_in_millis" : 1513823752749, "running_time_in_nanos" : 293139, "cancellable" : false, "headers" : { "X-Opaque-Id" : "123456" }, "children" : [ { "node" : "u5lcZHqcQhu-rUoFaqDphA", "id" : 46, "type" : "direct", "action" : "cluster:monitor/tasks/lists[n]", "start_time_in_millis" : 1513823752750, "running_time_in_nanos" : 92133, "cancellable" : false, "parent_task_id" : "u5lcZHqcQhu-rUoFaqDphA:45", "headers" : { "X-Opaque-Id" : "123456" } } ] } } } ``` In this example, `X-Opaque-Id: 123456` is the ID as a part of the response header. The `X-Opaque-Id` in the task `headers` is the ID for the task that was initiated by the REST request. The `X-Opaque-Id` in the children `headers` is the child task of the task that was initiated by the REST request. * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/tasks.html | Elasticsearch API documentation} */ async list (this: That, params?: T.TasksListRequest | TB.TasksListRequest, options?: TransportRequestOptionsWithOutMeta): Promise diff --git a/src/api/api/update.ts b/src/api/api/update.ts index 0ef23cbd8..b9d9d998b 100644 --- a/src/api/api/update.ts +++ b/src/api/api/update.ts @@ -39,7 +39,7 @@ import * as TB from '../typesWithBodyKey' interface That { transport: Transport } /** - * Update a document. Updates a document by running a script or passing a partial document. + * Update a document. Update a document by running a script or passing a partial document. If the Elasticsearch security features are enabled, you must have the `index` or `write` index privilege for the target index or index alias. The script can update, delete, or skip modifying the document. The API also supports passing a partial document, which is merged into the existing document. To fully replace an existing document, use the index API. This operation: * Gets the document (collocated with the shard) from the index. * Runs the specified script. * Indexes the result. The document must still be reindexed, but using this API removes some network roundtrips and reduces chances of version conflicts between the GET and the index operation. The `_source` field must be enabled to use this API. In addition to `_source`, you can access the following variables through the `ctx` map: `_index`, `_type`, `_id`, `_version`, `_routing`, and `_now` (the current timestamp). * @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.17/docs-update.html | Elasticsearch API documentation} */ export default async function UpdateApi (this: That, params: T.UpdateRequest | TB.UpdateRequest, options?: TransportRequestOptionsWithOutMeta): Promise> diff --git a/src/api/types.ts b/src/api/types.ts index 1a719857f..f80d8d564 100644 --- a/src/api/types.ts +++ b/src/api/types.ts @@ -2829,6 +2829,8 @@ export interface StoredScript { source: string } +export type StreamResult = ArrayBuffer + export type SuggestMode = 'missing' | 'popular' | 'always' export type SuggestionName = string @@ -6904,7 +6906,7 @@ export type CatAllocationResponse = CatAllocationAllocationRecord[] export interface CatComponentTemplatesComponentTemplate { name: string - version: string + version: string | null alias_count: string mapping_count: string settings_count: string @@ -10287,6 +10289,8 @@ export type EqlSearchResponse = EqlEqlSearchResponseBase> +} + +export interface EsqlAsyncQueryResponse { + columns?: EsqlColumns + id?: string + is_running: boolean +} + +export interface EsqlAsyncQueryDeleteRequest extends RequestBase { + id: Id +} + +export type EsqlAsyncQueryDeleteResponse = AcknowledgedResponseBase + +export interface EsqlAsyncQueryGetRequest extends RequestBase { + id: Id + drop_null_columns?: boolean + keep_alive?: Duration + wait_for_completion_timeout?: Duration +} + +export interface EsqlAsyncQueryGetResponse { + columns?: EsqlColumns + is_running: boolean +} export interface EsqlQueryRequest extends RequestBase { - format?: EsqlQueryEsqlFormat + format?: EsqlEsqlFormat delimiter?: string drop_null_columns?: boolean columnar?: boolean @@ -12684,6 +12726,22 @@ export interface InferencePutRequest extends RequestBase { export type InferencePutResponse = InferenceInferenceEndpointInfo +export interface InferenceStreamInferenceRequest extends RequestBase { + inference_id: Id + task_type?: InferenceTaskType + input: string | string[] +} + +export type InferenceStreamInferenceResponse = StreamResult + +export interface InferenceUpdateRequest extends RequestBase { + inference_id: Id + task_type?: InferenceTaskType + inference_config?: InferenceInferenceEndpoint +} + +export type InferenceUpdateResponse = InferenceInferenceEndpointInfo + export interface IngestAppendProcessor extends IngestProcessorBase { field: Field value: any | any[] @@ -17702,6 +17760,10 @@ export interface SecuritySearchAccess { allow_restricted_indices?: boolean } +export interface SecuritySecuritySettings { + index?: IndicesIndexSettings +} + export type SecurityTemplateFormat = 'string' | 'json' export interface SecurityUser { @@ -18158,6 +18220,16 @@ export interface SecurityGetServiceCredentialsResponse { nodes_credentials: SecurityGetServiceCredentialsNodesCredentials } +export interface SecurityGetSettingsRequest extends RequestBase { + master_timeout?: Duration +} + +export interface SecurityGetSettingsResponse { + security: SecuritySecuritySettings + 'security-profile': SecuritySecuritySettings + 'security-tokens': SecuritySecuritySettings +} + export type SecurityGetTokenAccessTokenGrantType = 'password' | 'client_credentials' | '_kerberos' | 'refresh_token' export interface SecurityGetTokenAuthenticatedUser extends SecurityUser { @@ -18679,6 +18751,18 @@ export interface SecurityUpdateCrossClusterApiKeyResponse { updated: boolean } +export interface SecurityUpdateSettingsRequest extends RequestBase { + master_timeout?: Duration + timeout?: Duration + security?: SecuritySecuritySettings + 'security-profile'?: SecuritySecuritySettings + 'security-tokens'?: SecuritySecuritySettings +} + +export interface SecurityUpdateSettingsResponse { + acknowledged: boolean +} + export interface SecurityUpdateUserProfileDataRequest extends RequestBase { uid: SecurityUserProfileId if_seq_no?: SequenceNumber @@ -19220,6 +19304,113 @@ export interface SnapshotGetRepositoryRequest extends RequestBase { export type SnapshotGetRepositoryResponse = Record +export interface SnapshotRepositoryAnalyzeBlobDetails { + name: string + overwritten: boolean + read_early: boolean + read_end: long + read_start: long + reads: SnapshotRepositoryAnalyzeReadBlobDetails + size: ByteSize + size_bytes: long +} + +export interface SnapshotRepositoryAnalyzeDetailsInfo { + blob: SnapshotRepositoryAnalyzeBlobDetails + overwrite_elapsed?: Duration + overwrite_elapsed_nanos?: DurationValue + write_elapsed: Duration + write_elapsed_nanos: DurationValue + write_throttled: Duration + write_throttled_nanos: DurationValue + writer_node: SnapshotRepositoryAnalyzeNodeInfo +} + +export interface SnapshotRepositoryAnalyzeNodeInfo { + id: Id + name: Name +} + +export interface SnapshotRepositoryAnalyzeReadBlobDetails { + before_write_complete?: boolean + elapsed?: Duration + elapsed_nanos?: DurationValue + first_byte_time?: Duration + first_byte_time_nanos: DurationValue + found: boolean + node: SnapshotRepositoryAnalyzeNodeInfo + throttled?: Duration + throttled_nanos?: DurationValue +} + +export interface SnapshotRepositoryAnalyzeReadSummaryInfo { + count: integer + max_wait: Duration + max_wait_nanos: DurationValue + total_elapsed: Duration + total_elapsed_nanos: DurationValue + total_size: ByteSize + total_size_bytes: long + total_throttled: Duration + total_throttled_nanos: DurationValue + total_wait: Duration + total_wait_nanos: DurationValue +} + +export interface SnapshotRepositoryAnalyzeRequest extends RequestBase { + name: Name + blob_count?: integer + concurrency?: integer + detailed?: boolean + early_read_node_count?: integer + max_blob_size?: ByteSize + max_total_data_size?: ByteSize + rare_action_probability?: double + rarely_abort_writes?: boolean + read_node_count?: integer + register_operation_count?: integer + seed?: integer + timeout?: Duration +} + +export interface SnapshotRepositoryAnalyzeResponse { + blob_count: integer + blob_path: string + concurrency: integer + coordinating_node: SnapshotRepositoryAnalyzeNodeInfo + delete_elapsed: Duration + delete_elapsed_nanos: DurationValue + details: SnapshotRepositoryAnalyzeDetailsInfo + early_read_node_count: integer + issues_detected: string[] + listing_elapsed: Duration + listing_elapsed_nanos: DurationValue + max_blob_size: ByteSize + max_blob_size_bytes: long + max_total_data_size: ByteSize + max_total_data_size_bytes: long + rare_action_probability: double + read_node_count: integer + repository: string + seed: long + summary: SnapshotRepositoryAnalyzeSummaryInfo +} + +export interface SnapshotRepositoryAnalyzeSummaryInfo { + read: SnapshotRepositoryAnalyzeReadSummaryInfo + write: SnapshotRepositoryAnalyzeWriteSummaryInfo +} + +export interface SnapshotRepositoryAnalyzeWriteSummaryInfo { + count: integer + total_elapsed: Duration + total_elapsed_nanos: DurationValue + total_size: ByteSize + total_size_bytes: long + total_throttled: Duration + total_throttled_nanos: long +} + export interface SnapshotRepositoryVerifyIntegrityRequest extends RequestBase { name: Names meta_thread_pool_concurrency?: integer diff --git a/src/api/typesWithBodyKey.ts b/src/api/typesWithBodyKey.ts index e93f6a56c..463d831fa 100644 --- a/src/api/typesWithBodyKey.ts +++ b/src/api/typesWithBodyKey.ts @@ -2905,6 +2905,8 @@ export interface StoredScript { source: string } +export type StreamResult = ArrayBuffer + export type SuggestMode = 'missing' | 'popular' | 'always' export type SuggestionName = string @@ -6984,7 +6986,7 @@ export type CatAllocationResponse = CatAllocationAllocationRecord[] export interface CatComponentTemplatesComponentTemplate { name: string - version: string + version: string | null alias_count: string mapping_count: string settings_count: string @@ -10457,6 +10459,8 @@ export type EqlSearchResponse = EqlEqlSearchResponseBase> + } +} + +export interface EsqlAsyncQueryResponse { + columns?: EsqlColumns + id?: string + is_running: boolean +} + +export interface EsqlAsyncQueryDeleteRequest extends RequestBase { + id: Id +} + +export type EsqlAsyncQueryDeleteResponse = AcknowledgedResponseBase + +export interface EsqlAsyncQueryGetRequest extends RequestBase { + id: Id + drop_null_columns?: boolean + keep_alive?: Duration + wait_for_completion_timeout?: Duration +} + +export interface EsqlAsyncQueryGetResponse { + columns?: EsqlColumns + is_running: boolean +} export interface EsqlQueryRequest extends RequestBase { - format?: EsqlQueryEsqlFormat + format?: EsqlEsqlFormat delimiter?: string drop_null_columns?: boolean /** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */ @@ -12924,6 +12969,26 @@ export interface InferencePutRequest extends RequestBase { export type InferencePutResponse = InferenceInferenceEndpointInfo +export interface InferenceStreamInferenceRequest extends RequestBase { + inference_id: Id + task_type?: InferenceTaskType + /** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */ + body?: { + input: string | string[] + } +} + +export type InferenceStreamInferenceResponse = StreamResult + +export interface InferenceUpdateRequest extends RequestBase { + inference_id: Id + task_type?: InferenceTaskType + /** @deprecated The use of the 'body' key has been deprecated, use 'inference_config' instead. */ + body?: InferenceInferenceEndpoint +} + +export type InferenceUpdateResponse = InferenceInferenceEndpointInfo + export interface IngestAppendProcessor extends IngestProcessorBase { field: Field value: any | any[] @@ -18100,6 +18165,10 @@ export interface SecuritySearchAccess { allow_restricted_indices?: boolean } +export interface SecuritySecuritySettings { + index?: IndicesIndexSettings +} + export type SecurityTemplateFormat = 'string' | 'json' export interface SecurityUser { @@ -18580,6 +18649,16 @@ export interface SecurityGetServiceCredentialsResponse { nodes_credentials: SecurityGetServiceCredentialsNodesCredentials } +export interface SecurityGetSettingsRequest extends RequestBase { + master_timeout?: Duration +} + +export interface SecurityGetSettingsResponse { + security: SecuritySecuritySettings + 'security-profile': SecuritySecuritySettings + 'security-tokens': SecuritySecuritySettings +} + export type SecurityGetTokenAccessTokenGrantType = 'password' | 'client_credentials' | '_kerberos' | 'refresh_token' export interface SecurityGetTokenAuthenticatedUser extends SecurityUser { @@ -19172,6 +19251,21 @@ export interface SecurityUpdateCrossClusterApiKeyResponse { updated: boolean } +export interface SecurityUpdateSettingsRequest extends RequestBase { + master_timeout?: Duration + timeout?: Duration + /** @deprecated The use of the 'body' key has been deprecated, move the nested keys to the top level object. */ + body?: { + security?: SecuritySecuritySettings + 'security-profile'?: SecuritySecuritySettings + 'security-tokens'?: SecuritySecuritySettings + } +} + +export interface SecurityUpdateSettingsResponse { + acknowledged: boolean +} + export interface SecurityUpdateUserProfileDataRequest extends RequestBase { uid: SecurityUserProfileId if_seq_no?: SequenceNumber @@ -19732,6 +19826,113 @@ export interface SnapshotGetRepositoryRequest extends RequestBase { export type SnapshotGetRepositoryResponse = Record +export interface SnapshotRepositoryAnalyzeBlobDetails { + name: string + overwritten: boolean + read_early: boolean + read_end: long + read_start: long + reads: SnapshotRepositoryAnalyzeReadBlobDetails + size: ByteSize + size_bytes: long +} + +export interface SnapshotRepositoryAnalyzeDetailsInfo { + blob: SnapshotRepositoryAnalyzeBlobDetails + overwrite_elapsed?: Duration + overwrite_elapsed_nanos?: DurationValue + write_elapsed: Duration + write_elapsed_nanos: DurationValue + write_throttled: Duration + write_throttled_nanos: DurationValue + writer_node: SnapshotRepositoryAnalyzeNodeInfo +} + +export interface SnapshotRepositoryAnalyzeNodeInfo { + id: Id + name: Name +} + +export interface SnapshotRepositoryAnalyzeReadBlobDetails { + before_write_complete?: boolean + elapsed?: Duration + elapsed_nanos?: DurationValue + first_byte_time?: Duration + first_byte_time_nanos: DurationValue + found: boolean + node: SnapshotRepositoryAnalyzeNodeInfo + throttled?: Duration + throttled_nanos?: DurationValue +} + +export interface SnapshotRepositoryAnalyzeReadSummaryInfo { + count: integer + max_wait: Duration + max_wait_nanos: DurationValue + total_elapsed: Duration + total_elapsed_nanos: DurationValue + total_size: ByteSize + total_size_bytes: long + total_throttled: Duration + total_throttled_nanos: DurationValue + total_wait: Duration + total_wait_nanos: DurationValue +} + +export interface SnapshotRepositoryAnalyzeRequest extends RequestBase { + name: Name + blob_count?: integer + concurrency?: integer + detailed?: boolean + early_read_node_count?: integer + max_blob_size?: ByteSize + max_total_data_size?: ByteSize + rare_action_probability?: double + rarely_abort_writes?: boolean + read_node_count?: integer + register_operation_count?: integer + seed?: integer + timeout?: Duration +} + +export interface SnapshotRepositoryAnalyzeResponse { + blob_count: integer + blob_path: string + concurrency: integer + coordinating_node: SnapshotRepositoryAnalyzeNodeInfo + delete_elapsed: Duration + delete_elapsed_nanos: DurationValue + details: SnapshotRepositoryAnalyzeDetailsInfo + early_read_node_count: integer + issues_detected: string[] + listing_elapsed: Duration + listing_elapsed_nanos: DurationValue + max_blob_size: ByteSize + max_blob_size_bytes: long + max_total_data_size: ByteSize + max_total_data_size_bytes: long + rare_action_probability: double + read_node_count: integer + repository: string + seed: long + summary: SnapshotRepositoryAnalyzeSummaryInfo +} + +export interface SnapshotRepositoryAnalyzeSummaryInfo { + read: SnapshotRepositoryAnalyzeReadSummaryInfo + write: SnapshotRepositoryAnalyzeWriteSummaryInfo +} + +export interface SnapshotRepositoryAnalyzeWriteSummaryInfo { + count: integer + total_elapsed: Duration + total_elapsed_nanos: DurationValue + total_size: ByteSize + total_size_bytes: long + total_throttled: Duration + total_throttled_nanos: long +} + export interface SnapshotRepositoryVerifyIntegrityRequest extends RequestBase { name: Names meta_thread_pool_concurrency?: integer