diff --git a/NOTICE.txt b/NOTICE.txt index d83e9c9cd1e..9e587b6e433 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -204,6 +204,38 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +-------------------------------------------------------------------------------- +Dependency : github.com/cespare/xxhash/v2 +Version: v2.3.0 +Licence type (autodetected): MIT +-------------------------------------------------------------------------------- + +Contents of probable licence file $GOMODCACHE/github.com/cespare/xxhash/v2@v2.3.0/LICENSE.txt: + +Copyright (c) 2016 Caleb Spare + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------------- Dependency : github.com/docker/docker Version: v27.2.1+incompatible @@ -22040,38 +22072,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. --------------------------------------------------------------------------------- -Dependency : github.com/cespare/xxhash/v2 -Version: v2.3.0 -Licence type (autodetected): MIT --------------------------------------------------------------------------------- - -Contents of probable licence file $GOMODCACHE/github.com/cespare/xxhash/v2@v2.3.0/LICENSE.txt: - -Copyright (c) 2016 Caleb Spare - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - -------------------------------------------------------------------------------- Dependency : github.com/chai2010/gettext-go Version: v1.0.2 diff --git a/changelog/fragments/1733238171-use-xxhash-for-ast-hashing.yaml b/changelog/fragments/1733238171-use-xxhash-for-ast-hashing.yaml new file mode 100644 index 00000000000..a6655e8cda6 --- /dev/null +++ b/changelog/fragments/1733238171-use-xxhash-for-ast-hashing.yaml @@ -0,0 +1,32 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: enhancement + +# Change summary; a 80ish characters long description of the change. +summary: Use xxHash for hashing AST nodes + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +#description: + +# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc. +component: elastic-agent + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +#pr: https://github.com/owner/repo/1234 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +#issue: https://github.com/owner/repo/1234 diff --git a/go.mod b/go.mod index bec26559475..67b76e3b21f 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,12 @@ require ( github.com/blakesmith/ar v0.0.0-20150311145944-8bd4349a67f2 github.com/cavaliergopher/rpm v1.2.0 github.com/cenkalti/backoff/v4 v4.3.0 +<<<<<<< HEAD github.com/docker/docker v27.2.1+incompatible +======= + github.com/cespare/xxhash/v2 v2.3.0 + github.com/docker/docker v27.3.1+incompatible +>>>>>>> 9c1311073b (Use xxhash instead of sha256 for hashing AST nodes (#6192)) github.com/docker/go-units v0.5.0 github.com/dolmen-go/contextio v0.0.0-20200217195037-68fc5150bcd5 github.com/elastic/elastic-agent-autodiscover v0.9.0 @@ -168,7 +173,6 @@ require ( github.com/aws/aws-sdk-go v1.55.5 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bmatcuk/doublestar/v4 v4.7.1 // indirect - github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/chai2010/gettext-go v1.0.2 // indirect github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 // indirect github.com/cockroachdb/errors v1.11.3 // indirect diff --git a/internal/pkg/agent/transpiler/ast.go b/internal/pkg/agent/transpiler/ast.go index 1fae370ce40..75bcf79ca1f 100644 --- a/internal/pkg/agent/transpiler/ast.go +++ b/internal/pkg/agent/transpiler/ast.go @@ -5,7 +5,6 @@ package transpiler import ( - "bytes" "crypto/sha256" "encoding/base64" "fmt" @@ -14,6 +13,8 @@ import ( "strconv" "strings" + "github.com/cespare/xxhash/v2" + "github.com/elastic/elastic-agent/internal/pkg/eql" ) @@ -58,7 +59,18 @@ type Node interface { // Hash compute a sha256 hash of the current node and recursively call any children. Hash() []byte +<<<<<<< HEAD // Apply apply the current vars, returning the new value for the node. +======= + // Hash64With recursively computes the given hash for the Node and its children + Hash64With(h *xxhash.Digest) error + + // Vars adds to the array with the variables identified in the node. Returns the array in-case + // the capacity of the array had to be changed. + Vars([]string) []string + + // Apply apply the current vars, returning the new value for the node. This does not modify the original Node. +>>>>>>> 9c1311073b (Use xxhash instead of sha256 for hashing AST nodes (#6192)) Apply(*Vars) (Node, error) // Processors returns any attached processors, because of variable substitution. @@ -162,7 +174,30 @@ func (d *Dict) Hash() []byte { return h.Sum(nil) } +<<<<<<< HEAD // Apply applies the vars to all the nodes in the dictionary. +======= +// Hash64With recursively computes the given hash for the Node and its children +func (d *Dict) Hash64With(h *xxhash.Digest) error { + for _, v := range d.value { + if err := v.Hash64With(h); err != nil { + return err + } + } + return nil +} + +// Vars returns a list of all variables referenced in the dictionary. +func (d *Dict) Vars(vars []string) []string { + for _, v := range d.value { + k := v.(*Key) + vars = k.Vars(vars) + } + return vars +} + +// Apply applies the vars to all the nodes in the dictionary. This does not modify the original dictionary. +>>>>>>> 9c1311073b (Use xxhash instead of sha256 for hashing AST nodes (#6192)) func (d *Dict) Apply(vars *Vars) (Node, error) { nodes := make([]Node, 0, len(d.value)) for _, v := range d.value { @@ -277,7 +312,30 @@ func (k *Key) Hash() []byte { return h.Sum(nil) } +<<<<<<< HEAD // Apply applies the vars to the value. +======= +// Hash64With recursively computes the given hash for the Node and its children +func (k *Key) Hash64With(h *xxhash.Digest) error { + if _, err := h.WriteString(k.name); err != nil { + return err + } + if k.value != nil { + return k.value.Hash64With(h) + } + return nil +} + +// Vars returns a list of all variables referenced in the value. +func (k *Key) Vars(vars []string) []string { + if k.value == nil { + return vars + } + return k.value.Vars(vars) +} + +// Apply applies the vars to the value. This does not modify the original node. +>>>>>>> 9c1311073b (Use xxhash instead of sha256 for hashing AST nodes (#6192)) func (k *Key) Apply(vars *Vars) (Node, error) { if k.value == nil { return k, nil @@ -352,6 +410,16 @@ func (l *List) Hash() []byte { return h.Sum(nil) } +// Hash64With recursively computes the given hash for the Node and its children +func (l *List) Hash64With(h *xxhash.Digest) error { + for _, v := range l.value { + if err := v.Hash64With(h); err != nil { + return err + } + } + return nil +} + // Find takes an index and return the values at that index. func (l *List) Find(idx string) (Node, bool) { i, err := strconv.Atoi(idx) @@ -472,7 +540,27 @@ func (s *StrVal) Hash() []byte { return []byte(s.value) } +<<<<<<< HEAD // Apply applies the vars to the string value. +======= +// Hash64With recursively computes the given hash for the Node and its children +func (s *StrVal) Hash64With(h *xxhash.Digest) error { + _, err := h.WriteString(s.value) + return err +} + +// Vars returns a list of all variables referenced in the string. +func (s *StrVal) Vars(vars []string) []string { + // errors are ignored (if there is an error determine the vars it will also error computing the policy) + _, _ = replaceVars(s.value, func(variable string) (Node, Processors, bool) { + vars = append(vars, variable) + return nil, nil, false + }, false) + return vars +} + +// Apply applies the vars to the string value. This does not modify the original string. +>>>>>>> 9c1311073b (Use xxhash instead of sha256 for hashing AST nodes (#6192)) func (s *StrVal) Apply(vars *Vars) (Node, error) { return vars.Replace(s.value) } @@ -533,6 +621,12 @@ func (s *IntVal) Hash() []byte { return []byte(s.String()) } +// Hash64With recursively computes the given hash for the Node and its children +func (s *IntVal) Hash64With(h *xxhash.Digest) error { + _, err := h.WriteString(s.String()) + return err +} + // Processors returns any linked processors that are now connected because of Apply. func (s *IntVal) Processors() Processors { return s.processors @@ -584,6 +678,20 @@ func (s *UIntVal) Hash() []byte { return []byte(s.String()) } +<<<<<<< HEAD +======= +// Hash64With recursively computes the given hash for the Node and its children +func (s *UIntVal) Hash64With(h *xxhash.Digest) error { + _, err := h.WriteString(s.String()) + return err +} + +// Vars does nothing. Cannot have variable in an UIntVal. +func (s *UIntVal) Vars(vars []string) []string { + return vars +} + +>>>>>>> 9c1311073b (Use xxhash instead of sha256 for hashing AST nodes (#6192)) // Apply does nothing. func (s *UIntVal) Apply(_ *Vars) (Node, error) { return s, nil @@ -638,7 +746,18 @@ func (s *FloatVal) ShallowClone() Node { // Hash return a string representation of the value, we try to return the minimal precision we can. func (s *FloatVal) Hash() []byte { - return []byte(strconv.FormatFloat(s.value, 'f', -1, 64)) + return []byte(s.hashString()) +} + +// Hash64With recursively computes the given hash for the Node and its children +func (s *FloatVal) Hash64With(h *xxhash.Digest) error { + _, err := h.WriteString(s.hashString()) + return err +} + +// hashString returns a string representation of s suitable for hashing. +func (s *FloatVal) hashString() string { + return strconv.FormatFloat(s.value, 'f', -1, 64) } // Apply does nothing. @@ -703,6 +822,26 @@ func (s *BoolVal) Hash() []byte { return falseVal } +<<<<<<< HEAD +======= +// Hash64With recursively computes the given hash for the Node and its children +func (s *BoolVal) Hash64With(h *xxhash.Digest) error { + var encodedBool []byte + if s.value { + encodedBool = trueVal + } else { + encodedBool = falseVal + } + _, err := h.Write(encodedBool) + return err +} + +// Vars does nothing. Cannot have variable in an BoolVal. +func (s *BoolVal) Vars(vars []string) []string { + return vars +} + +>>>>>>> 9c1311073b (Use xxhash instead of sha256 for hashing AST nodes (#6192)) // Apply does nothing. func (s *BoolVal) Apply(_ *Vars) (Node, error) { return s, nil @@ -818,6 +957,11 @@ func (a *AST) Hash() []byte { return a.root.Hash() } +// Hash64With recursively computes the given hash for the Node and its children +func (a *AST) Hash64With(h *xxhash.Digest) error { + return a.root.Hash64With(h) +} + // HashStr return the calculated hash as a base64 url encoded string. func (a *AST) HashStr() string { return base64.URLEncoding.EncodeToString(a.root.Hash()) @@ -828,7 +972,13 @@ func (a *AST) Equal(other *AST) bool { if a.root == nil || other.root == nil { return a.root == other.root } - return bytes.Equal(a.Hash(), other.Hash()) + hasher := xxhash.New() + _ = a.Hash64With(hasher) + thisHash := hasher.Sum64() + hasher.Reset() + _ = other.Hash64With(hasher) + otherHash := hasher.Sum64() + return thisHash == otherHash } // Lookup looks for a value from the AST. diff --git a/internal/pkg/agent/transpiler/utils.go b/internal/pkg/agent/transpiler/utils.go index 05ee174f4d3..2d2eb41ab3e 100644 --- a/internal/pkg/agent/transpiler/utils.go +++ b/internal/pkg/agent/transpiler/utils.go @@ -7,6 +7,8 @@ package transpiler import ( "errors" "fmt" + + "github.com/cespare/xxhash/v2" ) const ( @@ -23,7 +25,8 @@ func RenderInputs(inputs Node, varsArray []*Vars) (Node, error) { return nil, fmt.Errorf("inputs must be an array") } var nodes []varIDMap - nodesMap := map[string]*Dict{} + nodesMap := map[uint64]*Dict{} + hasher := xxhash.New() for _, vars := range varsArray { for _, node := range l.Value().([]Node) { dict, ok := node.Clone().(*Dict) @@ -55,7 +58,9 @@ func RenderInputs(inputs Node, varsArray []*Vars) (Node, error) { continue } } - hash := string(dict.Hash()) + hasher.Reset() + _ = dict.Hash64With(hasher) + hash := hasher.Sum64() _, exists := nodesMap[hash] if !exists { nodesMap[hash] = dict