Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch k8s input paths to /var/log/pods/* to ingest rotated container logs #6583

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -321,13 +321,13 @@ stringData:
- data_stream:
dataset: kubernetes.container_logs
type: logs
id: kubernetes-container-logs-${kubernetes.pod.name}-${kubernetes.container.id}
id: kubernetes-container-logs-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
parsers:
- container:
format: auto
stream: all
paths:
- /var/log/containers/*${kubernetes.container.id}.log
- /var/log/pods/${kubernetes.namespace}_${kubernetes.pod.name}_${kubernetes.pod.uid}/${kubernetes.container.name}/*.log
processors:
- add_fields:
fields:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -320,13 +320,13 @@ stringData:
- data_stream:
dataset: kubernetes.container_logs
type: logs
id: kubernetes-container-logs-${kubernetes.pod.name}-${kubernetes.container.id}
id: kubernetes-container-logs-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
parsers:
- container:
format: auto
stream: all
paths:
- /var/log/containers/*${kubernetes.container.id}.log
- /var/log/pods/${kubernetes.namespace}_${kubernetes.pod.name}_${kubernetes.pod.uid}/${kubernetes.container.name}/*.log
processors:
- add_fields:
fields:
Expand Down Expand Up @@ -888,7 +888,7 @@ spec:
labels:
name: agent-pernode-example
annotations:
checksum/config: 233affcd72143e637a130b5f099c30e194d90042eb00a26512f51c844c65a821
checksum/config: 0a02c8b3783f5e072f6eb62d87c3fefb2df9cdaf8efb7dbda9bcc11050246c3d
spec:
automountServiceAccountToken: true
containers:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -320,13 +320,13 @@ stringData:
- data_stream:
dataset: kubernetes.container_logs
type: logs
id: kubernetes-container-logs-${kubernetes.pod.name}-${kubernetes.container.id}
id: kubernetes-container-logs-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
parsers:
- container:
format: auto
stream: all
paths:
- /var/log/containers/*${kubernetes.container.id}.log
- /var/log/pods/${kubernetes.namespace}_${kubernetes.pod.name}_${kubernetes.pod.uid}/${kubernetes.container.name}/*.log
processors:
- add_fields:
fields:
Expand Down Expand Up @@ -891,7 +891,7 @@ spec:
labels:
name: agent-pernode-example
annotations:
checksum/config: daca0d998edb3afa587d96e69b0833f6919ca6ba72f58f3a1f83b22d7e5ffaf6
checksum/config: 440922798e9423e250298ce729018751f6a9d3343a03f3e54f93380e932acdfe
spec:
automountServiceAccountToken: true
containers:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,13 @@ stringData:
- data_stream:
dataset: kubernetes.container_logs
type: logs
id: kubernetes-container-logs-${kubernetes.pod.name}-${kubernetes.container.id}
id: kubernetes-container-logs-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
parsers:
- container:
format: auto
stream: all
paths:
- /var/log/containers/*${kubernetes.container.id}.log
- /var/log/pods/${kubernetes.namespace}_${kubernetes.pod.name}_${kubernetes.pod.uid}/${kubernetes.container.name}/*.log
processors:
- add_fields:
fields:
Expand Down Expand Up @@ -949,7 +949,7 @@ spec:
labels:
name: agent-pernode-example
annotations:
checksum/config: 05797fdfdd3cdeefb99e39e0f4756a6b812465509b31195ff57ae3925aa5e087
checksum/config: 916336da56b9ea407c8b613a721335c42af4fb6df724f54d0f301e6c193b8e4e
spec:
automountServiceAccountToken: true
containers:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,13 @@ stringData:
- data_stream:
dataset: kubernetes.container_logs
type: logs
id: kubernetes-container-logs-${kubernetes.pod.name}-${kubernetes.container.id}
id: kubernetes-container-logs-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
parsers:
- container:
format: auto
stream: all
paths:
- /var/log/containers/*${kubernetes.container.id}.log
- /var/log/pods/${kubernetes.namespace}_${kubernetes.pod.name}_${kubernetes.pod.uid}/${kubernetes.container.name}/*.log
processors:
- add_fields:
fields:
Expand Down Expand Up @@ -223,7 +223,7 @@ spec:
labels:
name: agent-pernode-example
annotations:
checksum/config: 0840dcdf026f64cefb7aa69f420bc923d7e2d7d6e9a239e107fd2684e309d8ae
checksum/config: 7c35f814b93d366f98c472a618bd968083df7c07072b6e510cbb71ad26b0b6ab
spec:
automountServiceAccountToken: true
containers:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -346,13 +346,13 @@ stringData:
- data_stream:
dataset: kubernetes.container_logs
type: logs
id: kubernetes-container-logs-${kubernetes.pod.name}-${kubernetes.container.id}
id: kubernetes-container-logs-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
parsers:
- container:
format: auto
stream: all
paths:
- /var/log/containers/*${kubernetes.container.id}.log
- /var/log/pods/${kubernetes.namespace}_${kubernetes.pod.name}_${kubernetes.pod.uid}/${kubernetes.container.name}/*.log
processors:
- add_fields:
fields:
Expand Down Expand Up @@ -917,7 +917,7 @@ spec:
labels:
name: agent-pernode-example
annotations:
checksum/config: daca0d998edb3afa587d96e69b0833f6919ca6ba72f58f3a1f83b22d7e5ffaf6
checksum/config: 440922798e9423e250298ce729018751f6a9d3343a03f3e54f93380e932acdfe
spec:
automountServiceAccountToken: true
containers:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,13 +296,13 @@ stringData:
- data_stream:
dataset: kubernetes.container_logs
type: logs
id: kubernetes-container-logs-${kubernetes.pod.name}-${kubernetes.container.id}
id: kubernetes-container-logs-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
parsers:
- container:
format: auto
stream: all
paths:
- /var/log/containers/*${kubernetes.container.id}.log
- /var/log/pods/${kubernetes.namespace}_${kubernetes.pod.name}_${kubernetes.pod.uid}/${kubernetes.container.name}/*.log
processors:
- add_fields:
fields:
Expand Down Expand Up @@ -868,7 +868,7 @@ spec:
labels:
name: agent-pernode-example
annotations:
checksum/config: 233affcd72143e637a130b5f099c30e194d90042eb00a26512f51c844c65a821
checksum/config: 0a02c8b3783f5e072f6eb62d87c3fefb2df9cdaf8efb7dbda9bcc11050246c3d
spec:
automountServiceAccountToken: true
containers:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ Config input for container logs
namespace: {{ .Values.kubernetes.namespace }}
use_output: {{ .Values.kubernetes.output }}
streams:
- id: kubernetes-container-logs-${kubernetes.pod.name}-${kubernetes.container.id}
- id: kubernetes-container-logs-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new ID does not seem to be unique, this will cause problems and lead to data duplication.
Are you sure that
${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name} will always be unique and will make this block be rendered only once?

The previous implementation used the ${kubernetes.container.id} because that's guaranteed to always be unique.

Suggested change
- id: kubernetes-container-logs-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
- id: kubernetes-container-logs-${kubernetes.pod.name}-${kubernetes.container.id}

The key thing we have to ensure here is that there will never be two Filestream inputs started with the same ID.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see this file is in the helm chart folder, how does upgrade here work? Is there any change an user will upgrade and have the input ID changing? If so, that will lead to data duplication because the new ID will make all files be considered new, thus fully re-ingested.

Copy link
Contributor

@pkoutsovasilis pkoutsovasilis Jan 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@belimawr some things I have in mind, the combination of ${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name} should always be unique in a k8s cluster, right?!

also about the

how does upgrade here work

indeed when we restart shouldn't fingerprint be the same no matter the ID?! that said, if we have to do it then we should do it now that helm chart is not GA yet?! 🙂

Copy link
Contributor

@belimawr belimawr Jan 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the combination of ${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name} should always be unique in a k8s cluster, right?!

I hope so, however I'm not sure. It would be nice to back it with some K8s documentation. I know for sure ${kubernetes.container.id} is always unique, hence it's the one I've always used/recommended.

indeed when we restart shouldn't fingerprint be the same no matter the ID?!

Yes, however, the input ID is part of the state ID, hence changing the input ID will affect the final state of the file.

Within a single instance of the Filestream input, files are identified by the file_identity, in this case it's fingerpring, however the Filestream input will only look at states that match it's ID.

Here is an example of an registry (file state) entry:

{
  "k": "filestream::my-filestream-id::native::8787338-65029",
  "v": {
    "ttl": 1800000000000,
    "updated": [
      516104569337,
      1653410356
    ],
    "cursor": {
      "offset": 29
    },
    "meta": {
      "source": "/home/n/go/src/github.com/elastic/beats/filebeat/test.log",
      "identifier_name": "native"
    }
  }
}

Even if the file is the same, changing the input ID will change the key: "k": "filestream::my-filestream-id::native::8787338-65029".

That's actually one of the benefits of Filestream over Log input, because the input ID is part of the state ID, we can have different instances of the input harvesting the same file as long as the input ID is different.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Quoting from k8s docs https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/#when-to-use-multiple-namespaces

Namespaces provide a scope for names. Names of resources need to be unique within a namespace, but not across namespaces.
hence, given a namespace + podname pair it's guaranteed to be unique in the cluster (as opposed to the previous id that was NOT namespaced)

Within a pod spec container names must be unique so once the pod is created the tuple (namespace, pod name, container name) is unique and the input definition does not change with container restarts.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for checking the docs and clarifying @pchila !!

Then ignore the change I suggested.

The only issue that persists is the ID changing when the helm chart is upgraded... That will cause re-ingestion of files, even with fingerprint.

data_stream:
dataset: kubernetes.container_logs
type: logs
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As long as this translates to a running filestream input and we don't switch from 8.x to 9.x the change in the PR should cause no re-indexing.

Starting with 9.0 we will have fingerprint as a default file identity which is based on the file content, not its filesystem metadata.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean that an upgrade from 8.x to 9.x will result in a re-index of all files? That would be very bad.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean that an upgrade from 8.x to 9.x will result in a re-index of all files? That would be very bad.

No, we also implemented a feature to migrate the state from the old default (native that uses inode + device ID) to the new default fingerprint. If you're curious, here is the PR.

paths:
- '/var/log/containers/*${kubernetes.container.id}.log'
- '/var/log/pods/${kubernetes.namespace}_${kubernetes.pod.name}_${kubernetes.pod.uid}/${kubernetes.container.name}/*.log'
prospector.scanner.symlinks: {{ dig "vars" "symlinks" true .Values.kubernetes.containers.logs }}
parsers:
- container:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ data:
condition: '${host.platform} == ''windows'''
ignore_older: 72h
# Input ID allowing Elastic Agent to track the state of this input. Must be unique.
- id: container-log-${kubernetes.pod.name}-${kubernetes.container.id}
- id: container-log-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
type: filestream
use_output: default
meta:
Expand All @@ -366,7 +366,7 @@ data:
streams:
# Stream ID for this data stream allowing Filebeat to track the state of the ingested files. Must be unique.
# Each filestream data stream creates a separate instance of the Filebeat filestream input.
- id: container-log-${kubernetes.pod.name}-${kubernetes.container.id}
- id: container-log-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
data_stream:
dataset: kubernetes.container_logs
type: logs
Expand All @@ -381,7 +381,7 @@ data:
# negate: true
# match: after
paths:
- /var/log/containers/*${kubernetes.container.id}.log
- /var/log/pods/${kubernetes.namespace}_${kubernetes.pod.name}_${kubernetes.pod.uid}/${kubernetes.container.name}/*.log
- id: audit-log
type: filestream
use_output: default
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ data:
dataset: system.system
condition: '${host.platform} == ''windows'''
ignore_older: 72h
- id: container-log-${kubernetes.pod.name}-${kubernetes.container.id}
- id: container-log-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
type: filestream
use_output: default
meta:
Expand All @@ -146,7 +146,7 @@ data:
# negate: true
# match: after
paths:
- /var/log/containers/*${kubernetes.container.id}.log
- /var/log/pods/${kubernetes.namespace}_${kubernetes.pod.name}_${kubernetes.pod.uid}/${kubernetes.container.name}/*.log
- id: audit-log
type: filestream
use_output: default
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ data:
dataset: system.system
condition: '${host.platform} == ''windows'''
ignore_older: 72h
- id: container-log-${kubernetes.pod.name}-${kubernetes.container.id}
- id: container-log-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
type: filestream
use_output: default
meta:
Expand All @@ -146,7 +146,7 @@ data:
# negate: true
# match: after
paths:
- /var/log/containers/*${kubernetes.container.id}.log
- /var/log/pods/${kubernetes.namespace}_${kubernetes.pod.name}_${kubernetes.pod.uid}/${kubernetes.container.name}/*.log
- id: audit-log
type: filestream
use_output: default
Expand Down
6 changes: 3 additions & 3 deletions deploy/kubernetes/elastic-agent-standalone-kubernetes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ data:
condition: '${host.platform} == ''windows'''
ignore_older: 72h
# Input ID allowing Elastic Agent to track the state of this input. Must be unique.
- id: container-log-${kubernetes.pod.name}-${kubernetes.container.id}
- id: container-log-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
type: filestream
use_output: default
meta:
Expand All @@ -363,7 +363,7 @@ data:
streams:
# Stream ID for this data stream allowing Filebeat to track the state of the ingested files. Must be unique.
# Each filestream data stream creates a separate instance of the Filebeat filestream input.
- id: container-log-${kubernetes.pod.name}-${kubernetes.container.id}
- id: container-log-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
data_stream:
dataset: kubernetes.container_logs
type: logs
Expand All @@ -378,7 +378,7 @@ data:
# negate: true
# match: after
paths:
- /var/log/containers/*${kubernetes.container.id}.log
- /var/log/pods/${kubernetes.namespace}_${kubernetes.pod.name}_${kubernetes.pod.uid}/${kubernetes.container.name}/*.log
- id: audit-log
type: filestream
use_output: default
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ data:
condition: '${host.platform} == ''windows'''
ignore_older: 72h
# Input ID allowing Elastic Agent to track the state of this input. Must be unique.
- id: container-log-${kubernetes.pod.name}-${kubernetes.container.id}
- id: container-log-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
type: filestream
use_output: default
meta:
Expand All @@ -363,7 +363,7 @@ data:
streams:
# Stream ID for this data stream allowing Filebeat to track the state of the ingested files. Must be unique.
# Each filestream data stream creates a separate instance of the Filebeat filestream input.
- id: container-log-${kubernetes.pod.name}-${kubernetes.container.id}
- id: container-log-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
data_stream:
dataset: kubernetes.container_logs
type: logs
Expand All @@ -378,7 +378,7 @@ data:
# negate: true
# match: after
paths:
- /var/log/containers/*${kubernetes.container.id}.log
- /var/log/pods/${kubernetes.namespace}_${kubernetes.pod.name}_${kubernetes.pod.uid}/${kubernetes.container.name}/*.log
- id: audit-log
type: filestream
use_output: default
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ data:
dataset: system.system
condition: '${host.platform} == ''windows'''
ignore_older: 72h
- id: container-log-${kubernetes.pod.name}-${kubernetes.container.id}
- id: container-log-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
type: filestream
use_output: default
meta:
Expand All @@ -146,7 +146,7 @@ data:
# negate: true
# match: after
paths:
- /var/log/containers/*${kubernetes.container.id}.log
- /var/log/pods/${kubernetes.namespace}_${kubernetes.pod.name}_${kubernetes.pod.uid}/${kubernetes.container.name}/*.log
- id: audit-log
type: filestream
use_output: default
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ data:
dataset: system.system
condition: '${host.platform} == ''windows'''
ignore_older: 72h
- id: container-log-${kubernetes.pod.name}-${kubernetes.container.id}
- id: container-log-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
type: filestream
use_output: default
meta:
Expand All @@ -354,7 +354,7 @@ data:
# negate: true
# match: after
paths:
- /var/log/containers/*${kubernetes.container.id}.log
- /var/log/pods/${kubernetes.namespace}_${kubernetes.pod.name}_${kubernetes.pod.uid}/${kubernetes.container.name}/*.log
- id: audit-log
type: filestream
use_output: default
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ inputs:
dataset: system.system
condition: '${host.platform} == ''windows'''
ignore_older: 72h
- id: container-log-${kubernetes.pod.name}-${kubernetes.container.id}
- id: container-log-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
type: filestream
use_output: default
meta:
Expand All @@ -253,15 +253,15 @@ inputs:
data_stream:
namespace: default
streams:
- id: container-log-${kubernetes.pod.name}-${kubernetes.container.id}
- id: container-log-${kubernetes.namespace}-${kubernetes.pod.name}-${kubernetes.container.name}
data_stream:
dataset: kubernetes.container_logs
type: logs
prospector.scanner.symlinks: true
parsers:
- container: ~
paths:
- /var/log/containers/*${kubernetes.container.id}.log
- /var/log/pods/${kubernetes.namespace}_${kubernetes.pod.name}_${kubernetes.pod.uid}/${kubernetes.container.name}/*.log
- id: audit-log
type: filestream
use_output: default
Expand Down
Loading