From aa82226f9c12bd5124aab8244de77b8c0179c8c8 Mon Sep 17 00:00:00 2001 From: Shawn Hurley Date: Fri, 22 Nov 2024 12:52:56 -0500 Subject: [PATCH] :sparkles: Adding a Scope to the running of the rule engine (#723) The scope will allow for users to call the engine directly will be able to add information to the condition context. Providers are still responsible for restricting based on that condition context. Working off of @jmle awesome work to get the file paths respected in providers Signed-off-by: Shawn Hurley --- .github/workflows/pr-testing.yml | 2 +- demo-output.yaml | 2 +- engine/conditions.go | 12 ++ engine/engine.go | 39 ++++-- engine/scopes.go | 88 ++++++++++++++ .../java_external_provider/service_client.go | 20 ++-- .../yq-external-provider/Dockerfile | 2 +- provider/internal/builtin/service_client.go | 111 ++++++++++++++++-- provider/lib.go | 4 +- provider/provider.go | 13 +- 10 files changed, 261 insertions(+), 32 deletions(-) create mode 100644 engine/scopes.go diff --git a/.github/workflows/pr-testing.yml b/.github/workflows/pr-testing.yml index e8ce1d1e..0b00fb0f 100644 --- a/.github/workflows/pr-testing.yml +++ b/.github/workflows/pr-testing.yml @@ -10,7 +10,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v3 with: - go-version: '1.20' + go-version: '1.21' - name: Test run: go test -v ./... diff --git a/demo-output.yaml b/demo-output.yaml index 44184ab5..5acd5ed2 100644 --- a/demo-output.yaml +++ b/demo-output.yaml @@ -1187,7 +1187,7 @@ errors: error-rule-001: |- unable to get query info: yaml: unmarshal errors: - line 10: cannot unmarshal !!map into string + line 11: cannot unmarshal !!map into string unmatched: - file-002 - lang-ref-002 diff --git a/engine/conditions.go b/engine/conditions.go index 59936c8c..e09130c0 100644 --- a/engine/conditions.go +++ b/engine/conditions.go @@ -3,6 +3,7 @@ package engine import ( "context" "fmt" + "maps" "regexp" "strings" @@ -26,6 +27,17 @@ type ConditionResponse struct { type ConditionContext struct { Tags map[string]interface{} `yaml:"tags"` Template map[string]ChainTemplate `yaml:"template"` + RuleID string `yaml:ruleID` +} + +// This will copy the condition, but this will not copy the ruleID +func (c *ConditionContext) Copy() ConditionContext { + newTags := maps.Clone(c.Tags) + newTemplate := maps.Clone(c.Template) + return ConditionContext{ + Tags: newTags, + Template: newTemplate, + } } type ConditionEntry struct { diff --git a/engine/engine.go b/engine/engine.go index bc9cd276..d5202615 100644 --- a/engine/engine.go +++ b/engine/engine.go @@ -26,6 +26,7 @@ import ( type RuleEngine interface { RunRules(context context.Context, rules []RuleSet, selectors ...RuleSelector) []konveyor.RuleSet + RunRulesScoped(ctx context.Context, ruleSets []RuleSet, scopes Scope, selectors ...RuleSelector) []konveyor.RuleSet Stop() } @@ -33,6 +34,7 @@ type ruleMessage struct { rule Rule ruleSetName string ctx ConditionContext + scope Scope returnChan chan response } @@ -128,7 +130,12 @@ func processRuleWorker(ctx context.Context, ruleMessages chan ruleMessage, logge case m := <-ruleMessages: logger.V(5).Info("taking rule", "ruleset", m.ruleSetName, "rule", m.rule.RuleID) newLogger := logger.WithValues("ruleID", m.rule.RuleID) + //We createa new rule context for a every rule run, here we need to apply the scope m.ctx.Template = make(map[string]ChainTemplate) + if m.scope != nil { + m.scope.AddToContext(&m.ctx) + } + bo, err := processRule(ctx, m.rule, m.ctx, newLogger) logger.V(5).Info("finished rule", "found", len(bo.Incidents), "error", err, "rule", m.rule.RuleID) m.returnChan <- response{ @@ -162,13 +169,32 @@ func (r *ruleEngine) createRuleSet(ruleSet RuleSet) *konveyor.RuleSet { // This will run tagging rules first, synchronously, generating tags to pass on further as context to other rules // then runs remaining rules async, fanning them out, fanning them in, finally generating the results. will block until completed. func (r *ruleEngine) RunRules(ctx context.Context, ruleSets []RuleSet, selectors ...RuleSelector) []konveyor.RuleSet { + return r.RunRulesScoped(ctx, ruleSets, nil, selectors...) +} + +func (r *ruleEngine) RunRulesScoped(ctx context.Context, ruleSets []RuleSet, scopes Scope, selectors ...RuleSelector) []konveyor.RuleSet { // determine if we should run + conditionContext := ConditionContext{ + Tags: make(map[string]interface{}), + Template: make(map[string]ChainTemplate), + } + if scopes != nil { + r.logger.Info("using scopes", "scope", scopes.Name()) + err := scopes.AddToContext(&conditionContext) + if err != nil { + r.logger.Error(err, "unable to apply scopes to ruleContext") + // Call this, even though it is not used, to make sure that + // we don't leak anything. + return []konveyor.RuleSet{} + } + r.logger.Info("added scopes to condition context", "scopes", scopes, "conditionContext", conditionContext) + } ctx, cancelFunc := context.WithCancel(ctx) taggingRules, otherRules, mapRuleSets := r.filterRules(ruleSets, selectors...) - ruleContext := r.runTaggingRules(ctx, taggingRules, mapRuleSets) + ruleContext := r.runTaggingRules(ctx, taggingRules, mapRuleSets, conditionContext) // Need a better name for this thing ret := make(chan response) @@ -241,6 +267,7 @@ func (r *ruleEngine) RunRules(ctx context.Context, ruleSets []RuleSet, selectors wg.Add(1) rule.returnChan = ret rule.ctx = ruleContext + rule.scope = scopes r.ruleProcessing <- rule } r.logger.V(5).Info("All rules added buffer, waiting for engine to complete", "size", len(otherRules)) @@ -318,11 +345,7 @@ func (r *ruleEngine) filterRules(ruleSets []RuleSet, selectors ...RuleSelector) // runTaggingRules filters and runs info rules synchronously // returns list of non-info rules, a context to pass to them -func (r *ruleEngine) runTaggingRules(ctx context.Context, infoRules []ruleMessage, mapRuleSets map[string]*konveyor.RuleSet) ConditionContext { - context := ConditionContext{ - Tags: make(map[string]interface{}), - Template: make(map[string]ChainTemplate), - } +func (r *ruleEngine) runTaggingRules(ctx context.Context, infoRules []ruleMessage, mapRuleSets map[string]*konveyor.RuleSet, context ConditionContext) ConditionContext { // track unique tags per ruleset rulesetTagsCache := map[string]map[string]bool{} for _, ruleMessage := range infoRules { @@ -427,9 +450,11 @@ func processRule(ctx context.Context, rule Rule, ruleCtx ConditionContext, log l ctx, span := tracing.StartNewSpan( ctx, "process-rule", attribute.Key("rule").String(rule.RuleID)) defer span.End() + newContext := ruleCtx.Copy() + newContext.RuleID = rule.RuleID // Here is what a worker should run when getting a rule. // For now, lets not fan out the running of conditions. - return rule.When.Evaluate(ctx, log, ruleCtx) + return rule.When.Evaluate(ctx, log, newContext) } diff --git a/engine/scopes.go b/engine/scopes.go new file mode 100644 index 00000000..108ee286 --- /dev/null +++ b/engine/scopes.go @@ -0,0 +1,88 @@ +package engine + +import ( + "fmt" + + "github.com/go-logr/logr" +) + +const TemplateContextPathScopeKey = "konveyor.io/path-scope" + +// Scopes apply to individual calls to the providers and will add inforamtion to the ConditionContext +// To apply the scope. It is the responsiblity of the provider to use these correctly. +type Scope interface { + Name() string + // For now this is the only place that we are considering adding a scope + // in the future, we could scope other things + AddToContext(*ConditionContext) error +} + +type scopeWrapper struct { + scopes []Scope +} + +func (s *scopeWrapper) Name() string { + name := "" + for i, s := range s.scopes { + if i == 0 { + name = s.Name() + + } else { + name = fmt.Sprintf("%s -- %s", name, s.Name()) + } + } + return name +} + +func (s *scopeWrapper) AddToContext(conditionCTX *ConditionContext) error { + for _, s := range s.scopes { + err := s.AddToContext(conditionCTX) + if err != nil { + return err + } + } + return nil +} + +var _ Scope = &scopeWrapper{} + +func NewScope(scopes ...Scope) Scope { + return &scopeWrapper{scopes: scopes} +} + +type includedPathScope struct { + log logr.Logger + paths []string +} + +var _ Scope = &includedPathScope{} + +func (i *includedPathScope) Name() string { + return "IncludedPathScope" +} + +// This will only update conditionCTX if filepaths is not set. +func (i *includedPathScope) AddToContext(conditionCTX *ConditionContext) error { + // If any chain template has the filepaths set, only use those. + for k, chainTemplate := range conditionCTX.Template { + if chainTemplate.Filepaths != nil && len(chainTemplate.Filepaths) > 0 { + i.log.V(5).Info("includedPathScope not used because filepath set", "filepaths", chainTemplate.Filepaths, "key", k) + return nil + } + } + + // if no As clauses have filepaths, then assume we need to add the special cased filepath for scopes here + conditionCTX.Template[TemplateContextPathScopeKey] = ChainTemplate{ + Filepaths: i.paths, + Extras: nil, + } + return nil + +} + +func IncludedPathsScope(paths []string, log logr.Logger) Scope { + return &includedPathScope{ + paths: paths, + log: log, + } +} diff --git a/external-providers/java-external-provider/pkg/java_external_provider/service_client.go b/external-providers/java-external-provider/pkg/java_external_provider/service_client.go index 12fa4232..df9fee77 100644 --- a/external-providers/java-external-provider/pkg/java_external_provider/service_client.go +++ b/external-providers/java-external-provider/pkg/java_external_provider/service_client.go @@ -14,7 +14,6 @@ import ( "time" "github.com/go-logr/logr" - "github.com/konveyor/analyzer-lsp/engine" "github.com/konveyor/analyzer-lsp/jsonrpc2" "github.com/konveyor/analyzer-lsp/lsp/protocol" "github.com/konveyor/analyzer-lsp/provider" @@ -59,7 +58,7 @@ func (p *javaServiceClient) Evaluate(ctx context.Context, cap string, conditionI cond.Referenced.Filepaths = strings.Split(cond.Referenced.Filepaths[0], " ") } - condCtx := &engine.ConditionContext{} + condCtx := &provider.ProviderContext{} err = yaml.Unmarshal(conditionInfo, condCtx) if err != nil { return provider.ProviderEvaluateResponse{}, fmt.Errorf("unable to get condition context info: %v", err) @@ -68,7 +67,7 @@ func (p *javaServiceClient) Evaluate(ctx context.Context, cap string, conditionI if cond.Referenced.Pattern == "" { return provider.ProviderEvaluateResponse{}, fmt.Errorf("provided query pattern empty") } - symbols, err := p.GetAllSymbols(ctx, *cond) + symbols, err := p.GetAllSymbols(ctx, *cond, condCtx) if err != nil { p.log.Error(err, "unable to get symbols", "symbols", symbols, "cap", cap, "conditionInfo", cond) return provider.ProviderEvaluateResponse{}, err @@ -121,7 +120,7 @@ func (p *javaServiceClient) Evaluate(ctx context.Context, cap string, conditionI }, nil } -func (p *javaServiceClient) GetAllSymbols(ctx context.Context, c javaCondition) ([]protocol.WorkspaceSymbol, error) { +func (p *javaServiceClient) GetAllSymbols(ctx context.Context, c javaCondition, condCTX *provider.ProviderContext) ([]protocol.WorkspaceSymbol, error) { // This command will run the added bundle to the language server. The command over the wire needs too look like this. // in this case the project is hardcoded in the init of the Langauge Server above // workspace/executeCommand '{"command": "io.konveyor.tackle.ruleEntry", "arguments": {"query":"*customresourcedefinition","project": "java"}}' @@ -136,9 +135,14 @@ func (p *javaServiceClient) GetAllSymbols(ctx context.Context, c javaCondition) argumentsMap["annotationQuery"] = c.Referenced.Annotated } - if p.includedPaths != nil && len(p.includedPaths) > 0 { + log := p.log.WithValues("ruleID", condCTX.RuleID) + + if len(p.includedPaths) > 0 { argumentsMap[provider.IncludedPathsConfigKey] = p.includedPaths - p.log.V(5).Info("setting search scope by filepaths", "paths", p.includedPaths) + log.V(8).Info("setting search scope by filepaths", "paths", p.includedPaths) + } else if ok, paths := condCTX.GetScopedFilepaths(); ok { + argumentsMap[provider.IncludedPathsConfigKey] = paths + log.V(8).Info("setting search scope by filepaths", "paths", p.includedPaths, "argumentMap", argumentsMap) } argumentsBytes, _ := json.Marshal(argumentsMap) @@ -155,10 +159,10 @@ func (p *javaServiceClient) GetAllSymbols(ctx context.Context, c javaCondition) err := p.rpc.Call(timeOutCtx, "workspace/executeCommand", wsp, &refs) if err != nil { if jsonrpc2.IsRPCClosed(err) { - p.log.Error(err, "connection to the language server is closed, language server is not running") + log.Error(err, "connection to the language server is closed, language server is not running") return refs, fmt.Errorf("connection to the language server is closed, language server is not running") } else { - p.log.Error(err, "unable to ask for Konveyor rule entry") + log.Error(err, "unable to ask for Konveyor rule entry") return refs, fmt.Errorf("unable to ask for Konveyor rule entry") } } diff --git a/external-providers/yq-external-provider/Dockerfile b/external-providers/yq-external-provider/Dockerfile index 7ef044d7..a6144f84 100644 --- a/external-providers/yq-external-provider/Dockerfile +++ b/external-providers/yq-external-provider/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.20 as go-builder +FROM golang:1.21 as go-builder copy / /analyzer-lsp diff --git a/provider/internal/builtin/service_client.go b/provider/internal/builtin/service_client.go index b9ac4ad5..1e81410f 100644 --- a/provider/internal/builtin/service_client.go +++ b/provider/internal/builtin/service_client.go @@ -47,6 +47,8 @@ func (p *builtinServiceClient) Evaluate(ctx context.Context, cap string, conditi if err != nil { return provider.ProviderEvaluateResponse{}, fmt.Errorf("unable to get query info: %v", err) } + log := p.log.WithValues("ruleID", cond.ProviderContext.RuleID) + log.V(5).Info("builtin condition context", "condition", cond, "provider context", cond.ProviderContext) response := provider.ProviderEvaluateResponse{Matched: false} switch cap { case "file": @@ -54,9 +56,29 @@ func (p *builtinServiceClient) Evaluate(ctx context.Context, cap string, conditi if c.Pattern == "" { return response, fmt.Errorf("could not parse provided file pattern as string: %v", conditionInfo) } - matchingFiles, err := findFilesMatchingPattern(p.config.Location, c.Pattern) - if err != nil { - return response, fmt.Errorf("unable to find files using pattern `%s`: %v", c.Pattern, err) + matchingFiles := []string{} + if ok, paths := cond.ProviderContext.GetScopedFilepaths(); ok { + regex, _ := regexp.Compile(c.Pattern) + for _, path := range paths { + matched := false + if regex != nil { + matched = regex.MatchString(path) + } else { + // TODO(fabianvf): is a fileglob style pattern sufficient or do we need regexes? + matched, err = filepath.Match(c.Pattern, path) + if err != nil { + continue + } + } + if matched { + matchingFiles = append(matchingFiles, path) + } + } + } else { + matchingFiles, err = findFilesMatchingPattern(p.config.Location, c.Pattern) + if err != nil { + return response, fmt.Errorf("unable to find files using pattern `%s`: %v", c.Pattern, err) + } } response.TemplateContext = map[string]interface{}{"filepaths": matchingFiles} @@ -84,7 +106,13 @@ func (p *builtinServiceClient) Evaluate(ctx context.Context, cap string, conditi return response, fmt.Errorf("could not parse provided regex pattern as string: %v", conditionInfo) } var outputBytes []byte - grep := exec.Command("grep", "-o", "-n", "-R", "-P", c.Pattern, p.config.Location) + grep := exec.Command("grep", "-o", "-n", "-R", "-P", c.Pattern) + if ok, paths := cond.ProviderContext.GetScopedFilepaths(); ok { + grep.Args = append(grep.Args, paths...) + } else { + grep.Args = append(grep.Args, p.config.Location) + } + outputBytes, err := grep.Output() if err != nil { if exitError, ok := err.(*exec.ExitError); ok && exitError.ExitCode() == 1 { @@ -151,14 +179,42 @@ func (p *builtinServiceClient) Evaluate(ctx context.Context, cap string, conditi if query == nil || err != nil { return response, fmt.Errorf("could not parse provided xpath query '%s': %v", cond.XML.XPath, err) } - xmlFiles, err := findXMLFiles(p.config.Location, cond.XML.Filepaths) + filePaths := []string{} + if ok, paths := cond.ProviderContext.GetScopedFilepaths(); ok { + if len(cond.XML.Filepaths) > 0 { + newPaths := []string{} + // Sometimes rules have hardcoded filepaths + // Or use other searching to get them. If so, then we + // Should respect that added filter on the scoped filepaths + for _, p := range cond.XML.Filepaths { + for _, path := range paths { + if p == path { + newPaths = append(newPaths, path) + } + if filepath.Base(path) == p { + newPaths = append(newPaths, path) + } + } + } + if len(newPaths) == 0 { + // There are no files to search, return. + return response, nil + } + filePaths = newPaths + } else { + filePaths = paths + } + } else if len(cond.XML.Filepaths) > 0 { + filePaths = cond.XML.Filepaths + } + xmlFiles, err := findXMLFiles(p.config.Location, filePaths, log) if err != nil { return response, fmt.Errorf("unable to find XML files: %v", err) } for _, file := range xmlFiles { nodes, err := queryXMLFile(file, query) if err != nil { - p.log.V(5).Error(err, "failed to query xml file", "file", file) + log.V(5).Error(err, "failed to query xml file", "file", file) continue } if len(nodes) != 0 { @@ -204,14 +260,39 @@ func (p *builtinServiceClient) Evaluate(ctx context.Context, cap string, conditi if query == nil || err != nil { return response, fmt.Errorf("could not parse public-id xml query '%s': %v", cond.XML.XPath, err) } - xmlFiles, err := findXMLFiles(p.config.Location, cond.XMLPublicID.Filepaths) + filePaths := []string{} + if ok, paths := cond.ProviderContext.GetScopedFilepaths(); ok { + if len(cond.XML.Filepaths) > 0 { + newPaths := []string{} + // Sometimes rules have hardcoded filepaths + // Or use other searching to get them. If so, then we + // Should respect that added filter on the scoped filepaths + for _, p := range cond.XML.Filepaths { + for _, path := range paths { + if p == path { + newPaths = append(newPaths, path) + } + } + } + if len(newPaths) == 0 { + // There are no files to search, return. + return response, nil + } + filePaths = newPaths + } else { + filePaths = paths + } + } else if len(cond.XML.Filepaths) > 0 { + filePaths = cond.XML.Filepaths + } + xmlFiles, err := findXMLFiles(p.config.Location, filePaths, p.log) if err != nil { return response, fmt.Errorf("unable to find XML files: %v", err) } for _, file := range xmlFiles { nodes, err := queryXMLFile(file, query) if err != nil { - p.log.V(5).Error(err, "failed to query xml file", "file", file) + log.Error(err, "failed to query xml file", "file", file) continue } @@ -250,19 +331,25 @@ func (p *builtinServiceClient) Evaluate(ctx context.Context, cap string, conditi return response, fmt.Errorf("could not parse provided xpath query as string: %v", conditionInfo) } pattern := "*.json" - jsonFiles, err := provider.GetFiles(p.config.Location, cond.JSON.Filepaths, pattern) + filePaths := []string{} + if ok, paths := cond.ProviderContext.GetScopedFilepaths(); ok { + filePaths = paths + } else if len(cond.XML.Filepaths) > 0 { + filePaths = cond.JSON.Filepaths + } + jsonFiles, err := provider.GetFiles(p.config.Location, filePaths, pattern) if err != nil { return response, fmt.Errorf("unable to find files using pattern `%s`: %v", pattern, err) } for _, file := range jsonFiles { f, err := os.Open(file) if err != nil { - p.log.V(5).Error(err, "error opening json file", "file", file) + log.V(5).Error(err, "error opening json file", "file", file) continue } doc, err := jsonquery.Parse(f) if err != nil { - p.log.V(5).Error(err, "error parsing json file", "file", file) + log.V(5).Error(err, "error parsing json file", "file", file) continue } list, err := jsonquery.QueryAll(doc, query) @@ -413,7 +500,7 @@ func findFilesMatchingPattern(root, pattern string) ([]string, error) { return matches, err } -func findXMLFiles(baseLocation string, filePaths []string) ([]string, error) { +func findXMLFiles(baseLocation string, filePaths []string, log logr.Logger) ([]string, error) { patterns := []string{"*.xml", "*.xhtml"} // TODO(fabianvf): how should we scope the files searched here? xmlFiles, err := provider.GetFiles(baseLocation, filePaths, patterns...) diff --git a/provider/lib.go b/provider/lib.go index 0026a6ea..c61ab0d1 100644 --- a/provider/lib.go +++ b/provider/lib.go @@ -73,6 +73,9 @@ func GetFiles(configLocation string, filepaths []string, patterns ...string) ([] // Currently, rendering will render a list as a space separated paths as a single string. patterns := strings.Split(filepaths[0], " ") for _, pattern := range patterns { + if p, err := filepath.Rel(configLocation, pattern); err == nil { + pattern = p + } files, err := FindFilesMatchingPattern(configLocation, pattern) if err != nil { // Something went wrong dealing with the pattern, so we'll assume the user input @@ -83,7 +86,6 @@ func GetFiles(configLocation string, filepaths []string, patterns ...string) ([] } else { xmlFiles = append(xmlFiles, files...) } - // } } else { for _, pattern := range filepaths { diff --git a/provider/provider.go b/provider/provider.go index be51ffe8..81d9576e 100644 --- a/provider/provider.go +++ b/provider/provider.go @@ -330,6 +330,16 @@ type ExternalLinks struct { type ProviderContext struct { Tags map[string]interface{} `yaml:"tags"` Template map[string]engine.ChainTemplate `yaml:"template"` + RuleID string `yaml:ruleID` +} + +func (p *ProviderContext) GetScopedFilepaths() (bool, []string) { + for key, value := range p.Template { + if key == engine.TemplateContextPathScopeKey { + return true, value.Filepaths + } + } + return false, nil } func HasCapability(caps []Capability, name string) bool { @@ -480,6 +490,7 @@ func (p ProviderCondition) Evaluate(ctx context.Context, log logr.Logger, condCt ProviderContext: ProviderContext{ Tags: condCtx.Tags, Template: condCtx.Template, + RuleID: condCtx.RuleID, }, Capability: map[string]interface{}{ p.Capability: p.ConditionInfo, @@ -491,7 +502,7 @@ func (p ProviderCondition) Evaluate(ctx context.Context, log logr.Logger, condCt //TODO(fabianvf) panic(err) } - log = log.WithValues("provider info", "cap", p.Capability, "condInfo", p.ConditionInfo) + log = log.WithValues("provider info", "cap", p.Capability, "condInfo", serializedInfo, "ruleID", condCtx.RuleID) templatedInfo, err := templateCondition(serializedInfo, condCtx.Template) if err != nil { //TODO(fabianvf)