diff --git a/NOTICE.txt b/NOTICE.txt index a417faa2f16..9990a73b44f 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -204,6 +204,38 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +-------------------------------------------------------------------------------- +Dependency : github.com/cespare/xxhash/v2 +Version: v2.3.0 +Licence type (autodetected): MIT +-------------------------------------------------------------------------------- + +Contents of probable licence file $GOMODCACHE/github.com/cespare/xxhash/v2@v2.3.0/LICENSE.txt: + +Copyright (c) 2016 Caleb Spare + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + -------------------------------------------------------------------------------- Dependency : github.com/docker/docker Version: v27.3.1+incompatible @@ -33769,38 +33801,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. --------------------------------------------------------------------------------- -Dependency : github.com/cespare/xxhash/v2 -Version: v2.3.0 -Licence type (autodetected): MIT --------------------------------------------------------------------------------- - -Contents of probable licence file $GOMODCACHE/github.com/cespare/xxhash/v2@v2.3.0/LICENSE.txt: - -Copyright (c) 2016 Caleb Spare - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - -------------------------------------------------------------------------------- Dependency : github.com/chai2010/gettext-go Version: v1.0.2 diff --git a/changelog/fragments/1733238171-use-xxhash-for-ast-hashing.yaml b/changelog/fragments/1733238171-use-xxhash-for-ast-hashing.yaml new file mode 100644 index 00000000000..a6655e8cda6 --- /dev/null +++ b/changelog/fragments/1733238171-use-xxhash-for-ast-hashing.yaml @@ -0,0 +1,32 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: enhancement + +# Change summary; a 80ish characters long description of the change. +summary: Use xxHash for hashing AST nodes + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +#description: + +# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc. +component: elastic-agent + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +#pr: https://github.com/owner/repo/1234 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +#issue: https://github.com/owner/repo/1234 diff --git a/go.mod b/go.mod index cdec9a4dd6e..872783a8ed5 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/blakesmith/ar v0.0.0-20150311145944-8bd4349a67f2 github.com/cavaliergopher/rpm v1.2.0 github.com/cenkalti/backoff/v4 v4.3.0 + github.com/cespare/xxhash/v2 v2.3.0 github.com/docker/docker v27.3.1+incompatible github.com/docker/go-units v0.5.0 github.com/dolmen-go/contextio v0.0.0-20200217195037-68fc5150bcd5 @@ -222,7 +223,6 @@ require ( github.com/aws/smithy-go v1.20.4 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/bmatcuk/doublestar/v4 v4.7.1 // indirect - github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/chai2010/gettext-go v1.0.2 // indirect github.com/cloudfoundry-community/go-cfclient v0.0.0-20190808214049-35bcce23fc5f // indirect github.com/cloudfoundry/noaa v2.1.0+incompatible // indirect diff --git a/internal/pkg/agent/transpiler/ast.go b/internal/pkg/agent/transpiler/ast.go index 149818d502b..c92417aff38 100644 --- a/internal/pkg/agent/transpiler/ast.go +++ b/internal/pkg/agent/transpiler/ast.go @@ -5,7 +5,6 @@ package transpiler import ( - "bytes" "crypto/sha256" "encoding/base64" "fmt" @@ -14,6 +13,8 @@ import ( "strconv" "strings" + "github.com/cespare/xxhash/v2" + "github.com/elastic/elastic-agent/internal/pkg/eql" ) @@ -58,6 +59,9 @@ type Node interface { // Hash compute a sha256 hash of the current node and recursively call any children. Hash() []byte + // Hash64With recursively computes the given hash for the Node and its children + Hash64With(h *xxhash.Digest) error + // Vars adds to the array with the variables identified in the node. Returns the array in-case // the capacity of the array had to be changed. Vars([]string) []string @@ -166,6 +170,16 @@ func (d *Dict) Hash() []byte { return h.Sum(nil) } +// Hash64With recursively computes the given hash for the Node and its children +func (d *Dict) Hash64With(h *xxhash.Digest) error { + for _, v := range d.value { + if err := v.Hash64With(h); err != nil { + return err + } + } + return nil +} + // Vars returns a list of all variables referenced in the dictionary. func (d *Dict) Vars(vars []string) []string { for _, v := range d.value { @@ -290,6 +304,17 @@ func (k *Key) Hash() []byte { return h.Sum(nil) } +// Hash64With recursively computes the given hash for the Node and its children +func (k *Key) Hash64With(h *xxhash.Digest) error { + if _, err := h.WriteString(k.name); err != nil { + return err + } + if k.value != nil { + return k.value.Hash64With(h) + } + return nil +} + // Vars returns a list of all variables referenced in the value. func (k *Key) Vars(vars []string) []string { if k.value == nil { @@ -373,6 +398,16 @@ func (l *List) Hash() []byte { return h.Sum(nil) } +// Hash64With recursively computes the given hash for the Node and its children +func (l *List) Hash64With(h *xxhash.Digest) error { + for _, v := range l.value { + if err := v.Hash64With(h); err != nil { + return err + } + } + return nil +} + // Find takes an index and return the values at that index. func (l *List) Find(idx string) (Node, bool) { i, err := strconv.Atoi(idx) @@ -501,6 +536,12 @@ func (s *StrVal) Hash() []byte { return []byte(s.value) } +// Hash64With recursively computes the given hash for the Node and its children +func (s *StrVal) Hash64With(h *xxhash.Digest) error { + _, err := h.WriteString(s.value) + return err +} + // Vars returns a list of all variables referenced in the string. func (s *StrVal) Vars(vars []string) []string { // errors are ignored (if there is an error determine the vars it will also error computing the policy) @@ -577,6 +618,12 @@ func (s *IntVal) Hash() []byte { return []byte(s.String()) } +// Hash64With recursively computes the given hash for the Node and its children +func (s *IntVal) Hash64With(h *xxhash.Digest) error { + _, err := h.WriteString(s.String()) + return err +} + // Processors returns any linked processors that are now connected because of Apply. func (s *IntVal) Processors() Processors { return s.processors @@ -628,6 +675,12 @@ func (s *UIntVal) Hash() []byte { return []byte(s.String()) } +// Hash64With recursively computes the given hash for the Node and its children +func (s *UIntVal) Hash64With(h *xxhash.Digest) error { + _, err := h.WriteString(s.String()) + return err +} + // Vars does nothing. Cannot have variable in an UIntVal. func (s *UIntVal) Vars(vars []string) []string { return vars @@ -687,7 +740,18 @@ func (s *FloatVal) ShallowClone() Node { // Hash return a string representation of the value, we try to return the minimal precision we can. func (s *FloatVal) Hash() []byte { - return []byte(strconv.FormatFloat(s.value, 'f', -1, 64)) + return []byte(s.hashString()) +} + +// Hash64With recursively computes the given hash for the Node and its children +func (s *FloatVal) Hash64With(h *xxhash.Digest) error { + _, err := h.WriteString(s.hashString()) + return err +} + +// hashString returns a string representation of s suitable for hashing. +func (s *FloatVal) hashString() string { + return strconv.FormatFloat(s.value, 'f', -1, 64) } // Vars does nothing. Cannot have variable in an FloatVal. @@ -757,6 +821,18 @@ func (s *BoolVal) Hash() []byte { return falseVal } +// Hash64With recursively computes the given hash for the Node and its children +func (s *BoolVal) Hash64With(h *xxhash.Digest) error { + var encodedBool []byte + if s.value { + encodedBool = trueVal + } else { + encodedBool = falseVal + } + _, err := h.Write(encodedBool) + return err +} + // Vars does nothing. Cannot have variable in an BoolVal. func (s *BoolVal) Vars(vars []string) []string { return vars @@ -877,6 +953,11 @@ func (a *AST) Hash() []byte { return a.root.Hash() } +// Hash64With recursively computes the given hash for the Node and its children +func (a *AST) Hash64With(h *xxhash.Digest) error { + return a.root.Hash64With(h) +} + // HashStr return the calculated hash as a base64 url encoded string. func (a *AST) HashStr() string { return base64.URLEncoding.EncodeToString(a.root.Hash()) @@ -887,7 +968,13 @@ func (a *AST) Equal(other *AST) bool { if a.root == nil || other.root == nil { return a.root == other.root } - return bytes.Equal(a.Hash(), other.Hash()) + hasher := xxhash.New() + _ = a.Hash64With(hasher) + thisHash := hasher.Sum64() + hasher.Reset() + _ = other.Hash64With(hasher) + otherHash := hasher.Sum64() + return thisHash == otherHash } // Lookup looks for a value from the AST. diff --git a/internal/pkg/agent/transpiler/utils.go b/internal/pkg/agent/transpiler/utils.go index 05ee174f4d3..2d2eb41ab3e 100644 --- a/internal/pkg/agent/transpiler/utils.go +++ b/internal/pkg/agent/transpiler/utils.go @@ -7,6 +7,8 @@ package transpiler import ( "errors" "fmt" + + "github.com/cespare/xxhash/v2" ) const ( @@ -23,7 +25,8 @@ func RenderInputs(inputs Node, varsArray []*Vars) (Node, error) { return nil, fmt.Errorf("inputs must be an array") } var nodes []varIDMap - nodesMap := map[string]*Dict{} + nodesMap := map[uint64]*Dict{} + hasher := xxhash.New() for _, vars := range varsArray { for _, node := range l.Value().([]Node) { dict, ok := node.Clone().(*Dict) @@ -55,7 +58,9 @@ func RenderInputs(inputs Node, varsArray []*Vars) (Node, error) { continue } } - hash := string(dict.Hash()) + hasher.Reset() + _ = dict.Hash64With(hasher) + hash := hasher.Sum64() _, exists := nodesMap[hash] if !exists { nodesMap[hash] = dict