Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add registry to Filebeat's diagnostic #41795

Merged
merged 31 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
862af71
[WIP] Add registry to Filebeat's diagnostic
belimawr Nov 26, 2024
d8d42a4
Move the hook to a better place
belimawr Nov 26, 2024
65fba59
Add integration tests
belimawr Nov 26, 2024
4c83385
Add build tag
belimawr Nov 27, 2024
01774ed
Remove temporary file
belimawr Nov 27, 2024
0e36f68
fix returning errors
belimawr Nov 27, 2024
f741dbe
Merge branch 'main' of github.com:elastic/beats into filestream-regis…
belimawr Nov 27, 2024
a8c5f4f
Improve error handling and fix lint issues
belimawr Nov 27, 2024
df75570
Improve logging
belimawr Nov 27, 2024
d51565a
Fix error handling
belimawr Nov 27, 2024
7a4ce56
Fix error handling
belimawr Nov 27, 2024
74c042a
Add changelog
belimawr Nov 27, 2024
1ddc906
Merge branch 'main' of github.com:elastic/beats into filestream-regis…
belimawr Nov 27, 2024
14c8f99
Merge branch 'main' of github.com:elastic/beats into filestream-regis…
belimawr Dec 2, 2024
9d8b2a1
Merge branch 'main' of github.com:elastic/beats into filestream-regis…
belimawr Dec 19, 2024
2ec1ae6
Ensure we only get the registry files
belimawr Dec 19, 2024
affd18b
Add a 20mb limit and improve tests
belimawr Dec 19, 2024
82b42b8
Merge branch 'main' of github.com:elastic/beats into filestream-regis…
belimawr Dec 19, 2024
464ea06
mage check
belimawr Dec 19, 2024
d44c609
Support windows path separator
belimawr Dec 19, 2024
2e3d85d
Merge branch 'main' of github.com:elastic/beats into filestream-regis…
belimawr Jan 2, 2025
add8e21
Update notice to 2025
belimawr Jan 2, 2025
608d841
Merge branch 'main' of github.com:elastic/beats into filestream-regis…
belimawr Jan 2, 2025
2c45a4c
fix otel API
belimawr Jan 2, 2025
29f7f8e
Merge branch 'main' of github.com:elastic/beats into filestream-regis…
belimawr Jan 3, 2025
f63dc6d
Merge branch 'main' of github.com:elastic/beats into filestream-regis…
belimawr Jan 6, 2025
216a5d1
Disable fingerprint on tests
belimawr Jan 6, 2025
7f3e5ae
Stop using `init()` and generate `registryFileRegExps` once needed
belimawr Jan 7, 2025
44c25ce
Add a test for an "emtpy" registry
belimawr Jan 7, 2025
0400ca9
Merge branch 'main' of github.com:elastic/beats into filestream-regis…
belimawr Jan 22, 2025
5c43e5a
Reformat code and fix logging
belimawr Jan 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
- Add ability to remove request trace logs from entityanalytics input. {pull}40004[40004]
- Refactor & cleanup with updates to default values and documentation. {pull}41834[41834]
- Update CEL mito extensions to v1.16.0. {pull}41727[41727]
- Filebeat's registry is now added to the Elastic-Agent diagnostics bundle {issue}33238[33238] {pull}41795[41795]
- Add `unifiedlogs` input for MacOS. {pull}41791[41791]
- Add evaluation state dump debugging option to CEL input. {pull}41335[41335]
- Added support for retry configuration in GCS input. {issue}11580[11580] {pull}41862[41862]
Expand Down
196 changes: 196 additions & 0 deletions filebeat/beater/diagnostics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package beater

import (
"archive/tar"
"bytes"
"compress/gzip"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"regexp"
"strings"

"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent-libs/paths"
)

func init() {
preFilesList := [][]string{
[]string{"^registry$"},
[]string{"^registry", "filebeat$"},
[]string{"^registry", "filebeat", "meta\\.json$"},
[]string{"^registry", "filebeat", "log\\.json$"},
[]string{"^registry", "filebeat", "active\\.dat$"},
[]string{"^registry", "filebeat", "[[:digit:]]*\\.json$"},
}

for _, lst := range preFilesList {
var path string
if filepath.Separator == '\\' {
belimawr marked this conversation as resolved.
Show resolved Hide resolved
path = strings.Join(lst, `\\`)
} else {
path = filepath.Join(lst...)
}
registryFileRegExps = append(registryFileRegExps, regexp.MustCompile(path))
}
}

func gzipRegistry() []byte {
logger := logp.L().Named("diagnostics")
buf := bytes.Buffer{}
dataPath := paths.Resolve(paths.Data, "")
registryPath := filepath.Join(dataPath, "registry")
belimawr marked this conversation as resolved.
Show resolved Hide resolved
f, err := os.CreateTemp("", "filebeat-registry-*.tar")
if err != nil {
logger.Errorw("cannot create temporary registry archive", "error.message", err)
}
// Close the file, we just need the empty file created to use it later
f.Close()
defer logger.Debug("finished gziping Filebeat's registry")

defer func() {
if err := os.Remove(f.Name()); err != nil {
logp.L().Named("diagnostics").Warnf("cannot remove temporary registry archive '%s': '%s'", f.Name(), err)
}
}()

logger.Debugf("temporary file '%s' created", f.Name())
if err := tarFolder(logger, registryPath, f.Name()); err != nil {
logger.Errorw(fmt.Sprintf("cannot archive Filebeat's registry at '%s'", f.Name()), "error.message", err)
}

if err := gzipFile(logger, f.Name(), &buf); err != nil {
logger.Errorw("cannot gzip Filebeat's registry", "error.message", err)
}

// if the final file is too large, skip it
if buf.Len() >= 20_000_000 { // 20 Mb
logger.Warnf("registry is too large for diagnostics, %dmb bytes > 20mb", buf.Len()/1_000_000)
return nil
}

return buf.Bytes()
}

// gzipFile gzips src writing the compressed data to dst
func gzipFile(logger *logp.Logger, src string, dst io.Writer) error {
reader, err := os.Open(src)
if err != nil {
return fmt.Errorf("cannot open '%s': '%w'", src, err)
}
defer reader.Close()

writer := gzip.NewWriter(dst)
defer writer.Close()
writer.Name = filepath.Base(src)

if _, err := io.Copy(writer, reader); err != nil {
if err != nil {
return fmt.Errorf("cannot gzip file '%s': '%w'", src, err)
}
}

return nil
}

// tarFolder creates a tar archive from the folder src and stores it at dst.
//
// dst must be the full path with extension, e.g: /tmp/foo.tar
// If src is not a folder an error is retruned
func tarFolder(logger *logp.Logger, src, dst string) error {
fullPath, err := filepath.Abs(src)
if err != nil {
return fmt.Errorf("cannot get full path from '%s': '%w'", src, err)
}

tarFile, err := os.Create(dst)
cmacknz marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return fmt.Errorf("cannot create tar file '%s': '%w'", dst, err)
}
defer tarFile.Close()

tarWriter := tar.NewWriter(tarFile)
defer tarWriter.Close()

info, err := os.Stat(fullPath)
if err != nil {
return fmt.Errorf("cannot stat '%s': '%w'", fullPath, err)
}

if !info.IsDir() {
return fmt.Errorf("'%s' is not a directory", fullPath)
}
baseDir := filepath.Base(src)

logger.Debugf("starting to walk '%s'", fullPath)
return filepath.Walk(fullPath, func(path string, info fs.FileInfo, prevErr error) error {
// Stop if there is any errors
if prevErr != nil {
return prevErr
}

pathInTar := filepath.Join(baseDir, strings.TrimPrefix(path, src))
if !matchRegistyFiles(pathInTar) {
return nil
}
header, err := tar.FileInfoHeader(info, info.Name())
if err != nil {
return fmt.Errorf("cannot create tar info header: '%w'", err)
}
header.Name = pathInTar

if err := tarWriter.WriteHeader(header); err != nil {
return fmt.Errorf("cannot write tar header for '%s': '%w'", path, err)
}

if info.IsDir() {
return nil
}

file, err := os.Open(path)
if err != nil {
return fmt.Errorf("cannot open '%s' for reading: '%w", path, err)
}
defer file.Close()

logger.Debugf("adding '%s' to the tar archive", file.Name())
if _, err := io.Copy(tarWriter, file); err != nil {
return fmt.Errorf("cannot read '%s': '%w'", path, err)
}

return nil
})
}

// We use regexps here because globs do not support specifying a character
// range like we do in the checkpoint file. This slice is populated in the
// `init` function because Windows path separators need to be escaped.
var registryFileRegExps = []*regexp.Regexp{}

func matchRegistyFiles(path string) bool {
for _, regExp := range registryFileRegExps {
if regExp.MatchString(path) {
return true
}
}
return false
}
62 changes: 62 additions & 0 deletions filebeat/beater/diagnostics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package beater

import (
"fmt"
"path/filepath"
"testing"
)

func TestMatchRegistryFiles(t *testing.T) {
positiveMatches := []string{
filepath.Join("registry", "filebeat", "49855.json"),
filepath.Join("registry", "filebeat", "active.dat"),
filepath.Join("registry", "filebeat", "meta.json"),
filepath.Join("registry", "filebeat", "log.json"),
}
negativeMatches := []string{
filepath.Join("registry", "filebeat", "bar.dat"),
filepath.Join("registry", "filebeat", "log.txt"),
filepath.Join("registry", "42.json"),
filepath.Join("nop", "active.dat"),
}

testFn := func(t *testing.T, path string, match bool) {
result := matchRegistyFiles(path)
if result != match {
t.Errorf(
"mathRegisryFiles('%s') should return %t, got %t instead",
path,
match,
result)
}
}

for _, path := range positiveMatches {
t.Run(fmt.Sprintf("%s returns true", path), func(t *testing.T) {
testFn(t, path, true)
})
}

for _, path := range negativeMatches {
t.Run(fmt.Sprintf("%s returns false", path), func(t *testing.T) {
testFn(t, path, false)
})
}
}
7 changes: 7 additions & 0 deletions filebeat/beater/filebeat.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,13 @@ func newBeater(b *beat.Beat, plugins PluginFactory, rawConfig *conf.C) (beat.Bea
}
return data
})

b.Manager.RegisterDiagnosticHook(
"registry",
"Filebeat's registry",
"registry.tar.gz",
"application/octet-stream",
gzipRegistry)
}

// Add inputs created by the modules
Expand Down
Loading
Loading