diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index d8aca419dda5..040b4ea51de2 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -115,6 +115,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] - Ensure Elasticsearch output can always recover from network errors {pull}40794[40794] - Add `translate_ldap_attribute` processor. {pull}41472[41472] - Remove unnecessary debug logs during idle connection teardown {issue}40824[40824] +- Fix incorrect cloud provider identification in add_cloud_metadata processor using provider priority mechanism {pull}41636[41636] *Auditbeat* diff --git a/libbeat/processors/add_cloud_metadata/docs/add_cloud_metadata.asciidoc b/libbeat/processors/add_cloud_metadata/docs/add_cloud_metadata.asciidoc index 1e05e1d2c24a..852b3b187d1e 100644 --- a/libbeat/processors/add_cloud_metadata/docs/add_cloud_metadata.asciidoc +++ b/libbeat/processors/add_cloud_metadata/docs/add_cloud_metadata.asciidoc @@ -21,7 +21,10 @@ The following cloud providers are supported: - Openstack Nova - Hetzner Cloud -NOTE: `huawei` is an alias for `openstack`. Huawei cloud runs on OpenStack platform, and when +[float] +==== Special notes + +`huawei` is an alias for `openstack`. Huawei cloud runs on OpenStack platform, and when viewed from a metadata API standpoint, it is impossible to differentiate it from OpenStack. If you know that your deployments run on Huawei Cloud exclusively, and you wish to have `cloud.provider` value as `huawei`, you can achieve this by overwriting the value using an `add_fields` processor. @@ -30,6 +33,16 @@ The Alibaba Cloud and Tencent cloud providers are disabled by default, because they require to access a remote host. The `providers` setting allows users to select a list of default providers to query. +Cloud providers tend to maintain metadata services compliant with other cloud providers. +For example, Openstack supports https://docs.openstack.org/nova/latest/user/metadata.html#ec2-compatible-metadata[EC2 compliant metadat service]. +This makes it impossible to differentiate cloud provider (`cloud.provider` property) with auto discovery (when `providers` configuration is omitted). +The processor implementation incorporates a priority mechanism where priority is given to some providers over others when there are multiple successful metadata results. +Currently, `aws/ec2` and `azure` have priority over any other provider as their metadata retrival rely on SDKs. +The expectation here is that SDK methods should fail if run in an environment not configured accordingly (ex:- missing configurations or credentials). + +[float] +==== Configurations + The simple configuration below enables the processor. [source,yaml] @@ -71,6 +84,16 @@ List of names the `providers` setting supports: - "tencent", or "qcloud" for Tencent Cloud (disabled by default). - "hetzner" for Hetzner Cloud (enabled by default). +For example, configuration below only utilize `aws` metadata retrival mechanism, + +[source,yaml] +------------------------------------------------------------------------------- +processors: + - add_cloud_metadata: + providers: + aws +------------------------------------------------------------------------------- + The third optional configuration setting is `overwrite`. When `overwrite` is `true`, `add_cloud_metadata` overwrites existing `cloud.*` fields (`false` by default). @@ -78,6 +101,9 @@ default). The `add_cloud_metadata` processor supports SSL options to configure the http client used to query cloud metadata. See <> for more information. +[float] +==== Provided metadata + The metadata that is added to events varies by hosting provider. Below are examples for each of the supported providers. diff --git a/libbeat/processors/add_cloud_metadata/provider_alibaba_cloud.go b/libbeat/processors/add_cloud_metadata/provider_alibaba_cloud.go index 9bdce314ba8e..754cf82007f8 100644 --- a/libbeat/processors/add_cloud_metadata/provider_alibaba_cloud.go +++ b/libbeat/processors/add_cloud_metadata/provider_alibaba_cloud.go @@ -27,7 +27,7 @@ import ( var alibabaCloudMetadataFetcher = provider{ Name: "alibaba-ecs", - Local: false, + DefaultEnabled: false, Create: func(_ string, c *conf.C) (metadataFetcher, error) { ecsMetadataHost := "100.100.100.200" diff --git a/libbeat/processors/add_cloud_metadata/provider_aws_ec2.go b/libbeat/processors/add_cloud_metadata/provider_aws_ec2.go index ae7dfbf9865d..1c364f8e6a9a 100644 --- a/libbeat/processors/add_cloud_metadata/provider_aws_ec2.go +++ b/libbeat/processors/add_cloud_metadata/provider_aws_ec2.go @@ -65,7 +65,7 @@ var NewEC2Client func(cfg awssdk.Config) EC2Client = func(cfg awssdk.Config) EC2 var ec2MetadataFetcher = provider{ Name: "aws-ec2", - Local: true, + DefaultEnabled: true, Create: func(_ string, config *conf.C) (metadataFetcher, error) { ec2Schema := func(m map[string]interface{}) mapstr.M { diff --git a/libbeat/processors/add_cloud_metadata/provider_azure_vm.go b/libbeat/processors/add_cloud_metadata/provider_azure_vm.go index 788519352a86..e39b97b7031d 100644 --- a/libbeat/processors/add_cloud_metadata/provider_azure_vm.go +++ b/libbeat/processors/add_cloud_metadata/provider_azure_vm.go @@ -62,7 +62,7 @@ var NewClusterClient func(clientFactory *armcontainerservice.ClientFactory) *arm var azureVMMetadataFetcher = provider{ Name: "azure-compute", - Local: true, + DefaultEnabled: true, Create: func(_ string, config *conf.C) (metadataFetcher, error) { azMetadataURI := "/metadata/instance/compute?api-version=2021-02-01" diff --git a/libbeat/processors/add_cloud_metadata/provider_digital_ocean.go b/libbeat/processors/add_cloud_metadata/provider_digital_ocean.go index bfe282f70744..afc8cd3020dc 100644 --- a/libbeat/processors/add_cloud_metadata/provider_digital_ocean.go +++ b/libbeat/processors/add_cloud_metadata/provider_digital_ocean.go @@ -28,7 +28,7 @@ import ( var doMetadataFetcher = provider{ Name: "digitalocean", - Local: true, + DefaultEnabled: true, Create: func(provider string, config *conf.C) (metadataFetcher, error) { doSchema := func(m map[string]interface{}) mapstr.M { diff --git a/libbeat/processors/add_cloud_metadata/provider_google_gce.go b/libbeat/processors/add_cloud_metadata/provider_google_gce.go index 1eb8525b2f3e..cebfb747bfe5 100644 --- a/libbeat/processors/add_cloud_metadata/provider_google_gce.go +++ b/libbeat/processors/add_cloud_metadata/provider_google_gce.go @@ -45,7 +45,7 @@ type Server struct { var gceMetadataFetcher = provider{ Name: "google-gce", - Local: true, + DefaultEnabled: true, Create: func(provider string, config *conf.C) (metadataFetcher, error) { gceMetadataURI := "/computeMetadata/v1/?recursive=true&alt=json" diff --git a/libbeat/processors/add_cloud_metadata/provider_hetzner_cloud.go b/libbeat/processors/add_cloud_metadata/provider_hetzner_cloud.go index 4c231174c001..3bfea2c119a8 100644 --- a/libbeat/processors/add_cloud_metadata/provider_hetzner_cloud.go +++ b/libbeat/processors/add_cloud_metadata/provider_hetzner_cloud.go @@ -32,8 +32,8 @@ const ( // Hetzner Cloud Metadata Service // Document https://docs.hetzner.cloud/#server-metadata var hetznerMetadataFetcher = provider{ - Name: "hetzner-cloud", - Local: true, + Name: "hetzner-cloud", + DefaultEnabled: true, Create: func(_ string, c *conf.C) (metadataFetcher, error) { hetznerSchema := func(m map[string]interface{}) mapstr.M { m["service"] = mapstr.M{ diff --git a/libbeat/processors/add_cloud_metadata/provider_openstack_nova.go b/libbeat/processors/add_cloud_metadata/provider_openstack_nova.go index 824d2651c3a4..40f91be43231 100644 --- a/libbeat/processors/add_cloud_metadata/provider_openstack_nova.go +++ b/libbeat/processors/add_cloud_metadata/provider_openstack_nova.go @@ -33,15 +33,15 @@ const ( // OpenStack Nova Metadata Service // Document https://docs.openstack.org/nova/latest/user/metadata-service.html var openstackNovaMetadataFetcher = provider{ - Name: "openstack-nova", - Local: true, - Create: buildOpenstackNovaCreate("http"), + Name: "openstack-nova", + DefaultEnabled: true, + Create: buildOpenstackNovaCreate("http"), } var openstackNovaSSLMetadataFetcher = provider{ - Name: "openstack-nova-ssl", - Local: true, - Create: buildOpenstackNovaCreate("https"), + Name: "openstack-nova-ssl", + DefaultEnabled: true, + Create: buildOpenstackNovaCreate("https"), } func buildOpenstackNovaCreate(scheme string) func(provider string, c *conf.C) (metadataFetcher, error) { diff --git a/libbeat/processors/add_cloud_metadata/provider_tencent_cloud.go b/libbeat/processors/add_cloud_metadata/provider_tencent_cloud.go index e805ff1bf95c..ca4733edc60d 100644 --- a/libbeat/processors/add_cloud_metadata/provider_tencent_cloud.go +++ b/libbeat/processors/add_cloud_metadata/provider_tencent_cloud.go @@ -27,7 +27,7 @@ import ( var qcloudMetadataFetcher = provider{ Name: "tencent-qcloud", - Local: false, + DefaultEnabled: false, Create: func(_ string, c *conf.C) (metadataFetcher, error) { qcloudMetadataHost := "metadata.tencentyun.com" diff --git a/libbeat/processors/add_cloud_metadata/providers.go b/libbeat/processors/add_cloud_metadata/providers.go index ea56a5e669b3..5d6d64047c44 100644 --- a/libbeat/processors/add_cloud_metadata/providers.go +++ b/libbeat/processors/add_cloud_metadata/providers.go @@ -23,10 +23,12 @@ import ( "net" "net/http" "os" + "slices" "strings" "time" conf "github.com/elastic/elastic-agent-libs/config" + "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-libs/mapstr" ) @@ -34,8 +36,9 @@ type provider struct { // Name contains a long name of provider and service metadata is fetched from. Name string - // Local Set to true if local IP is accessed only - Local bool + // DefaultEnabled allows to control whether metadata provider should be enabled by default + // Set to true if metadata access is enabled by default for the provider + DefaultEnabled bool // Create returns an actual metadataFetcher Create func(string, *conf.C) (metadataFetcher, error) @@ -70,6 +73,14 @@ var cloudMetaProviders = map[string]provider{ "hetzner": hetznerMetadataFetcher, } +// priorityProviders contains providers which has priority over others. +// Metadata of these are derived using cloud provider SDKs, making them valid over metadata derived over well-known IP +// or other common endpoints. For example, Openstack supports EC2 compliant metadata endpoint. Thus adding possibility to +// conflict metadata between EC2/AWS and Openstack. +var priorityProviders = []string{ + "aws", "ec2", "azure", +} + func selectProviders(configList providerList, providers map[string]provider) map[string]provider { return filterMetaProviders(providersFilter(configList, providers), providers) } @@ -93,7 +104,7 @@ func providersFilter(configList providerList, allProviders map[string]provider) if len(configList) == 0 { return func(name string) bool { ff, ok := allProviders[name] - return ok && ff.Local + return ok && ff.DefaultEnabled } } return func(name string) (ok bool) { @@ -178,22 +189,54 @@ func (p *addCloudMetadata) fetchMetadata() *result { }() } - for i := 0; i < len(p.initData.fetchers); i++ { + var responses []result + + for ctx.Err() == nil { select { case result := <-results: p.logger.Debugf("add_cloud_metadata: received disposition for %v after %v. %v", result.provider, time.Since(start), result) - // Bail out on first success. + if result.err == nil && result.metadata != nil { - return &result - } else if result.err != nil { - p.logger.Errorf("add_cloud_metadata: received error for provider %s: %v", result.provider, result.err) + responses = append(responses, result) + } + + if result.err != nil { + p.logger.Debugf("add_cloud_metadata: received error for provider %s: %v", result.provider, result.err) } case <-ctx.Done(): - p.logger.Debugf("add_cloud_metadata: timed-out waiting for all responses") - return nil + p.logger.Debugf("add_cloud_metadata: timed-out waiting for responses") } } - return nil + return priorityResult(responses, p.logger) +} + +// priorityResult is a helper to extract correct result (if multiple exist) based on priorityProviders +func priorityResult(responses []result, logger *logp.Logger) *result { + if len(responses) == 0 { + return nil + } + + if len(responses) == 1 { + return &responses[0] + } + + logger.Debugf("add_cloud_metadata: multiple responses were received, filtering based on priority") + var prioritizedResponses []result + for _, r := range responses { + if slices.Contains(priorityProviders, r.provider) { + prioritizedResponses = append(prioritizedResponses, r) + } + } + + // simply send the first entry of prioritized response + if len(prioritizedResponses) != 0 { + pr := prioritizedResponses[0] + logger.Debugf("add_cloud_metadata: using provider %s metadata based on priority", pr.provider) + return &pr + } + + // else send the first from bulk of response + return &responses[0] } diff --git a/libbeat/processors/add_cloud_metadata/providers_test.go b/libbeat/processors/add_cloud_metadata/providers_test.go index 85336a4c2b3f..0f39dcf56769 100644 --- a/libbeat/processors/add_cloud_metadata/providers_test.go +++ b/libbeat/processors/add_cloud_metadata/providers_test.go @@ -25,6 +25,7 @@ import ( "github.com/stretchr/testify/assert" conf "github.com/elastic/elastic-agent-libs/config" + "github.com/elastic/elastic-agent-libs/logp" ) func init() { @@ -34,7 +35,7 @@ func init() { func TestProvidersFilter(t *testing.T) { var allLocal []string for name, ff := range cloudMetaProviders { - if ff.Local { + if ff.DefaultEnabled { allLocal = append(allLocal, name) } } @@ -119,3 +120,59 @@ func TestProvidersFilter(t *testing.T) { }) } } + +func Test_priorityResult(t *testing.T) { + tLogger := logp.NewLogger("add_cloud_metadata testing") + awsRsp := result{ + provider: "aws", + metadata: map[string]interface{}{ + "id": "a-1", + }, + } + + openStackRsp := result{ + provider: "openstack", + metadata: map[string]interface{}{ + "id": "o-1", + }, + } + + digitaloceanRsp := result{ + provider: "digitalocean", + metadata: map[string]interface{}{ + "id": "d-1", + }, + } + + tests := []struct { + name string + collected []result + want *result + }{ + { + name: "Empty results returns nil", + collected: []result{}, + want: nil, + }, + { + name: "Single result returns the same", + collected: []result{awsRsp}, + want: &awsRsp, + }, + { + name: "Priority result wins", + collected: []result{openStackRsp, awsRsp}, + want: &awsRsp, + }, + { + name: "For non-priority result, response order wins", + collected: []result{openStackRsp, digitaloceanRsp}, + want: &openStackRsp, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, priorityResult(tt.collected, tLogger)) + }) + } +}