From 89a4432447ecdfee9b6329a3bfb981e53f200b50 Mon Sep 17 00:00:00 2001 From: Michael Chernicoff Date: Wed, 22 Jan 2025 10:22:48 -0500 Subject: [PATCH] feat: Add English language parsing of failure messages Signed-off-by: Michael Chernicoff --- hipcheck/src/policy_exprs/env.rs | 52 ++++--- hipcheck/src/policy_exprs/error.rs | 3 + hipcheck/src/policy_exprs/expr.rs | 1 + hipcheck/src/policy_exprs/mod.rs | 209 +++++++++++++++++++++++++- hipcheck/src/report/mod.rs | 28 +++- hipcheck/src/report/report_builder.rs | 8 +- hipcheck/src/score.rs | 14 +- hipcheck/src/shell/mod.rs | 5 +- plugins/activity/src/main.rs | 2 +- plugins/affiliation/src/main.rs | 3 +- plugins/binary/src/main.rs | 2 +- plugins/churn/src/main.rs | 2 +- plugins/entropy/src/main.rs | 2 +- plugins/fuzz/src/main.rs | 2 +- plugins/review/src/main.rs | 2 +- plugins/typo/src/main.rs | 3 +- 16 files changed, 293 insertions(+), 45 deletions(-) diff --git a/hipcheck/src/policy_exprs/env.rs b/hipcheck/src/policy_exprs/env.rs index 31f0db6c..be33fd3e 100644 --- a/hipcheck/src/policy_exprs/env.rs +++ b/hipcheck/src/policy_exprs/env.rs @@ -363,45 +363,45 @@ impl Env<'_> { let mut env = Env::empty(); // Comparison functions. - env.add_fn("gt", gt, 2, ty_comp); - env.add_fn("lt", lt, 2, ty_comp); - env.add_fn("gte", gte, 2, ty_comp); - env.add_fn("lte", lte, 2, ty_comp); - env.add_fn("eq", eq, 2, ty_comp); - env.add_fn("neq", neq, 2, ty_comp); + env.add_fn("gt", "greater than", gt, 2, ty_comp); + env.add_fn("lt", "less than", lt, 2, ty_comp); + env.add_fn("gte", "greater than or equal to", gte, 2, ty_comp); + env.add_fn("lte", "less than or equal to", lte, 2, ty_comp); + env.add_fn("eq", "equal to", eq, 2, ty_comp); + env.add_fn("neq", "not equal to", neq, 2, ty_comp); // Math functions. - env.add_fn("add", add, 2, ty_arithmetic_binary_ops); - env.add_fn("sub", sub, 2, ty_arithmetic_binary_ops); - env.add_fn("divz", divz, 2, ty_divz); + env.add_fn("add", "plus", add, 2, ty_arithmetic_binary_ops); + env.add_fn("sub", "minus", sub, 2, ty_arithmetic_binary_ops); + env.add_fn("divz", "divided by", divz, 2, ty_divz); // Additional datetime math functions - env.add_fn("duration", duration, 2, ty_duration); + env.add_fn("duration", "minus", duration, 2, ty_duration); // Logical functions. - env.add_fn("and", and, 2, ty_bool_binary); - env.add_fn("or", or, 2, ty_bool_binary); - env.add_fn("not", not, 1, ty_bool_unary); + env.add_fn("and", "and", and, 2, ty_bool_binary); + env.add_fn("or", "or", or, 2, ty_bool_binary); + env.add_fn("not", "not", not, 1, ty_bool_unary); // Array math functions. - env.add_fn("max", max, 1, ty_from_first_arr); - env.add_fn("min", min, 1, ty_from_first_arr); - env.add_fn("avg", avg, 1, ty_avg); - env.add_fn("median", median, 1, ty_from_first_arr); - env.add_fn("count", count, 1, ty_count); + env.add_fn("max", "the maximum of", max, 1, ty_from_first_arr); + env.add_fn("min", "the minimum of", min, 1, ty_from_first_arr); + env.add_fn("avg", "the mean of", avg, 1, ty_avg); + env.add_fn("median", "the median of", median, 1, ty_from_first_arr); + env.add_fn("count", "the number of elements in", count, 1, ty_count); // Array logic functions. - env.add_fn("all", all, 1, ty_higher_order_bool_fn); - env.add_fn("nall", nall, 1, ty_higher_order_bool_fn); - env.add_fn("some", some, 1, ty_higher_order_bool_fn); - env.add_fn("none", none, 1, ty_higher_order_bool_fn); + env.add_fn("all", "all of", all, 1, ty_higher_order_bool_fn); + env.add_fn("nall", "not all of", nall, 1, ty_higher_order_bool_fn); + env.add_fn("some", "some of", some, 1, ty_higher_order_bool_fn); + env.add_fn("none", "none of", none, 1, ty_higher_order_bool_fn); // Array higher-order functions. - env.add_fn("filter", filter, 2, ty_filter); - env.add_fn("foreach", foreach, 2, ty_foreach); + env.add_fn("filter", "filtered on", filter, 2, ty_filter); + env.add_fn("foreach", "each to be", foreach, 2, ty_foreach); // Debugging functions. - env.add_fn("dbg", dbg, 1, ty_inherit_first); + env.add_fn("dbg", "debugging", dbg, 1, ty_inherit_first); env } @@ -423,6 +423,7 @@ impl Env<'_> { pub fn add_fn( &mut self, name: &str, + english: &str, op: Op, expected_args: usize, ty_checker: TypeChecker, @@ -431,6 +432,7 @@ impl Env<'_> { name.to_owned(), Binding::Fn(FunctionDef { name: name.to_owned(), + english: english.to_owned(), expected_args, ty_checker, op, diff --git a/hipcheck/src/policy_exprs/error.rs b/hipcheck/src/policy_exprs/error.rs index 024bb2c1..b86f4041 100644 --- a/hipcheck/src/policy_exprs/error.rs +++ b/hipcheck/src/policy_exprs/error.rs @@ -43,6 +43,9 @@ pub enum Error { #[error("expression returned '{0:?}', not a boolean")] DidNotReturnBool(Expr), + #[error("evaluation of inner expression returned '{0:?}', not a primitive")] + BadReturnType(Expr), + #[error("tried to call unknown function '{0}'")] UnknownFunction(String), diff --git a/hipcheck/src/policy_exprs/expr.rs b/hipcheck/src/policy_exprs/expr.rs index 4d2bbdad..ad088fa7 100644 --- a/hipcheck/src/policy_exprs/expr.rs +++ b/hipcheck/src/policy_exprs/expr.rs @@ -64,6 +64,7 @@ pub type TypeChecker = fn(&[Type]) -> Result; #[derive(Clone, PartialEq, Debug, Eq)] pub struct FunctionDef { pub name: String, + pub english: String, pub expected_args: usize, pub ty_checker: TypeChecker, pub op: Op, diff --git a/hipcheck/src/policy_exprs/mod.rs b/hipcheck/src/policy_exprs/mod.rs index f897e1de..7761355b 100644 --- a/hipcheck/src/policy_exprs/mod.rs +++ b/hipcheck/src/policy_exprs/mod.rs @@ -75,18 +75,17 @@ impl FromStr for Expr { } /// Evaluates `deke` expressions. -#[cfg(test)] pub struct Executor { env: Env<'static>, } -#[cfg(test)] impl Executor { /// Create an `Executor` with the standard set of functions defined. pub fn std() -> Self { Executor { env: Env::std() } } + #[cfg(test)] /// Run a `deke` program. pub fn run(&self, raw_program: &str, context: &Value) -> Result { match self.parse_and_eval(raw_program, context)? { @@ -156,6 +155,212 @@ impl ExprMutator for Env<'_> { } } +/// Return an English language explanation for what a failing plugin was expected to see and what it saw instead +pub fn parse_failing_expr_to_english( + input: &Expr, + message: &str, + value: &Option, +) -> Result { + // Create a standard environment, with its list of functions and their English descriptions + let env = Env::std(); + // Store that environment and the plugin explanation message in a struct for English parsing + let english = English { + env, + message: message.to_string(), + }; + + // Check that the "top level" of the policy expression is a function, then recursively parse that function into an English language description of why the plugin failed + if let Expr::Function(func) = input { + // Recursively parse the top level function to English + let english_expr = english.visit_function(func)?; + + // Get the function's args + let args = &func.args; + + // Confirm that the outermost function has two arguments + if args.len() != 2 { + return Err(Error::MissingArgs); + } + + // Get whichever of the function's arguments is **not** a primitive (i.e. the top level expected value) for evaluation + let inner = match (&args[0], &args[1]) { + (&Expr::Primitive(_), inner) => inner, + (inner, &Expr::Primitive(_)) => inner, + _ => return Err(Error::MissingArgs), + }; + + // Evaluate that argument using the value returned by the plugin to see what the top level operator is comparing the expected value to + let inner_value = match value { + Some(context) => { + format!( + "it was {}", + match Executor::std().parse_and_eval(&inner.to_string(), context)? { + Expr::Primitive(prim) => english.visit_primitive(&prim)?, + _ => return Err(Error::BadReturnType(inner.clone())), + } + ) + } + None => "no value was returned by the query".to_string(), + }; + + return Ok(format!("Expected {english_expr} but {inner_value}")); + } + + Err(Error::MissingIdent) +} + +/// Struct that contains a basic environment, with its English function descriptions, and a plugin explanation message. +pub struct English<'a> { + env: Env<'a>, + message: String, +} + +// Trait implementation to return English descriptions of an Expr +impl ExprVisitor> for English<'_> { + /// Parse a function expression into an English string + fn visit_function(&self, func: &Function) -> Result { + let env = &self.env; + + // Get the function operator from the list of functions in the environment + let ident = &func.ident; + let fn_name = ident.to_string(); + + let function_def = match env.get(&fn_name) { + Some(binding) => match binding { + Binding::Fn(function_def) => function_def, + _ => { + return Err(Error::UnknownFunction(format!( + "Given function name {} is not a function", + fn_name + ))) + } + }, + _ => { + return Err(Error::UnknownFunction(format!( + "Given function name {} not found in list of functions", + fn_name + ))) + } + }; + + // Convert theoperator to English, with additional phrasing specific to comparison operators in a function + let operator = match function_def.name.as_ref() { + "gt" | "lt" | "gte" | "lte" | "eq" | "ne" => format!("to be {}", function_def.english), + _ => function_def.english, + }; + + // Get the number of args the function should have + let expected_args = function_def.expected_args; + + // Get the funciton's args + let args = &func.args; + + // Check for an invalid number of arguments + if args.len() < expected_args { + return Err(Error::NotEnoughArgs { + name: fn_name, + expected: expected_args, + given: args.len(), + }); + } + if args.len() > expected_args { + return Err(Error::TooManyArgs { + name: fn_name, + expected: expected_args, + given: args.len(), + }); + } + + if args.len() == 2 { + // If there are two arguments, parse a function comparing a pair of some combination of primitives, + // JSON pointers, nested functions (including lambdas in the first position), or arrays (in the second position) to English + if matches!(args[0], Expr::Array(_)) || matches!(args[1], Expr::Lambda(_)) { + return Err(Error::BadType("English::visit_function()")); + } + let argument_1 = self.visit_expr(&args[0])?; + let argument_2 = self.visit_expr(&args[1])?; + + Ok(format!("{} {} {}", argument_1, operator, argument_2)) + } else { + // If there is one argument, parse a function operating on an array, JSON pointer, or a nested function to English + if matches!(args[0], Expr::Lambda(_)) { + return Err(Error::BadType("English::visit_function()")); + } + let argument = self.visit_expr(&args[0])?; + + Ok(format!("{} {}", operator, argument)) + } + } + + /// Parse a lambda expression into an English string + fn visit_lambda(&self, func: &Lambda) -> Result { + let env = &self.env; + + // Get the lambda function from the lambda + let function = &func.body; + //Get the lambda's function operator from the list of functions in the environment + let ident = &function.ident; + let fn_name = ident.to_string(); + + let function_def = match env.get(&fn_name) { + Some(binding) => match binding { + Binding::Fn(function_def) => function_def, + _ => { + return Err(Error::UnknownFunction(format!( + "Given function name {} is not a function", + fn_name + ))) + } + }, + _ => { + return Err(Error::UnknownFunction(format!( + "Given function name {} not found in list of functions", + fn_name + ))) + } + }; + + // Convert the operator to English + let operator = function_def.english; + + // Get the lambda function's argument and parse it to English + // Note: The useful arugment for a lambda function is the *second* argument + let args = &function.args; + let argument = self.visit_expr(&args[1])?; + + Ok(format!("\"{} {}\"", operator, argument)) + } + + // Parse a primitive type expression to English + fn visit_primitive(&self, prim: &Primitive) -> Result { + match prim { + Primitive::Bool(true) => Ok("true".to_string()), + Primitive::Bool(false) => Ok("false".to_string()), + Primitive::Int(i) => Ok(i.to_string()), + Primitive::Float(f) => Ok(f.to_string()), + Primitive::DateTime(dt) => Ok(dt.to_string()), + Primitive::Span(span) => Ok(span.to_string()), + _ => Err(Error::BadType("English::visit_primitive()")), + } + } + + // Parse a primitive type array expression to English + fn visit_array(&self, arr: &Array) -> Result { + let english_elts = arr + .elts + .iter() + .map(|p| self.visit_primitive(p).unwrap()) + .collect::>() + .join(","); + Ok(format!("the array [{}]", english_elts)) + } + + // Parse a JSON pointer expression into English by returning the explanation message for the plugin + fn visit_json_pointer(&self, _func: &JsonPointer) -> Result { + Ok(self.message.clone()) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/hipcheck/src/report/mod.rs b/hipcheck/src/report/mod.rs index 64d83982..4b4fcc89 100644 --- a/hipcheck/src/report/mod.rs +++ b/hipcheck/src/report/mod.rs @@ -14,12 +14,13 @@ pub mod report_builder; use crate::{ cli::Format, error::{Context, Error, Result}, - policy_exprs::{std_exec, Expr}, + policy_exprs::{self, std_exec, Expr}, version::VersionQuery, }; use chrono::prelude::*; use schemars::JsonSchema; use serde::{Serialize, Serializer}; +use serde_json::Value; use std::{ default::Default, fmt, @@ -284,6 +285,12 @@ pub struct Analysis { #[schemars(schema_with = "String::json_schema")] policy_expr: Expr, + /// The value returned by the analysis, if any, that was used in computing the policy expression + /// + /// We use this when printing the result to help explain to the user + /// *why* an analysis failed. + value: Option, + /// The default query explanation pulled from RPC with the plugin. message: String, } @@ -294,11 +301,18 @@ pub struct Analysis { // } impl Analysis { - pub fn plugin(name: String, passed: bool, policy_expr: Expr, message: String) -> Self { + pub fn plugin( + name: String, + passed: bool, + policy_expr: Expr, + value: Option, + message: String, + ) -> Self { Analysis { name, passed, policy_expr, + value, message, } } @@ -318,6 +332,16 @@ impl Analysis { pub fn explanation(&self) -> String { self.message.clone() } + + pub fn failing_explanation(&self) -> Result { + let input = &self.policy_expr; + // Split the explanation message into components on commas, and reverse the order so it can serve as a stack + let message = &self.message; + let value = &self.value; + Ok(policy_exprs::parse_failing_expr_to_english( + input, message, value, + )?) + } } /// Value and threshold for counting-based analyses. diff --git a/hipcheck/src/report/report_builder.rs b/hipcheck/src/report/report_builder.rs index 2729757c..f3ad21fc 100644 --- a/hipcheck/src/report/report_builder.rs +++ b/hipcheck/src/report/report_builder.rs @@ -44,7 +44,13 @@ pub fn build_report(session: &Session, scoring: &ScoringResults) -> Result, pub policy: Expr, + pub value: Option, pub passed: bool, } @@ -218,7 +220,7 @@ pub fn score_results(_phase: &SpinnerPhase, db: &dyn ScoringProvider) -> Result< ); // Determine if analysis passed by evaluating policy expr - let passed = { + let (value, passed) = { if let Ok(output) = &response { // by this time, the result cached should have evaluated to a single Value, so // use the first value @@ -231,10 +233,13 @@ pub fn score_results(_phase: &SpinnerPhase, db: &dyn ScoringProvider) -> Result< )); } - std_exec(policy.clone(), Some(output.value.first().unwrap())) - .map_err(|e| hc_error!("{}", e))? + ( + Some(output.value.first().unwrap().clone()), + std_exec(policy.clone(), Some(output.value.first().unwrap())) + .map_err(|e| hc_error!("{}", e))?, + ) } else { - false + (None, false) } }; @@ -244,6 +249,7 @@ pub fn score_results(_phase: &SpinnerPhase, db: &dyn ScoringProvider) -> Result< PluginAnalysisResult { response, policy, + value, passed, }, ); diff --git a/hipcheck/src/shell/mod.rs b/hipcheck/src/shell/mod.rs index f4768e06..761c37c4 100644 --- a/hipcheck/src/shell/mod.rs +++ b/hipcheck/src/shell/mod.rs @@ -382,7 +382,10 @@ fn print_human(report: Report) -> Result<()> { Title::Failed, analysis.statement() ); - macros::println!("{EMPTY:LEFT_COL_WIDTH$} {}", analysis.explanation()); + macros::println!( + "{EMPTY:LEFT_COL_WIDTH$} {}", + analysis.failing_explanation().unwrap() + ); for concern in failing_analysis.concerns() { macros::println!("{EMPTY:LEFT_COL_WIDTH$} {}", concern); diff --git a/plugins/activity/src/main.rs b/plugins/activity/src/main.rs index 11baa04e..65ab77b6 100644 --- a/plugins/activity/src/main.rs +++ b/plugins/activity/src/main.rs @@ -81,7 +81,7 @@ impl Plugin for ActivityPlugin { fn explain_default_query(&self) -> Result> { Ok(Some( - "Span of time that has elapsed since last activity in repo".to_string(), + "span of time that has elapsed since last activity in repo".to_string(), )) } diff --git a/plugins/affiliation/src/main.rs b/plugins/affiliation/src/main.rs index aa7bbd84..54a5b6cb 100644 --- a/plugins/affiliation/src/main.rs +++ b/plugins/affiliation/src/main.rs @@ -528,8 +528,7 @@ impl Plugin for AffiliationPlugin { fn explain_default_query(&self) -> Result> { Ok(Some( - "Returns whether each of the repository's contributors was flagged as affiliated or not" - .to_string(), + "the repository's contributors flagged as affiliated".to_string(), )) } diff --git a/plugins/binary/src/main.rs b/plugins/binary/src/main.rs index 0fdfd72a..931a6f64 100644 --- a/plugins/binary/src/main.rs +++ b/plugins/binary/src/main.rs @@ -113,7 +113,7 @@ impl Plugin for BinaryPlugin { fn explain_default_query(&self) -> Result> { Ok(Some( - "Returns number of detected binary files in a repo".to_owned(), + "the number of detected binary files in a repo".to_owned(), )) } diff --git a/plugins/churn/src/main.rs b/plugins/churn/src/main.rs index c6eef6e9..7249864a 100644 --- a/plugins/churn/src/main.rs +++ b/plugins/churn/src/main.rs @@ -298,7 +298,7 @@ impl Plugin for ChurnPlugin { fn explain_default_query(&self) -> Result> { Ok(Some( - "The churn frequency calculation of each commit in a repo".to_owned(), + "the churn frequency of each commit in the repository".to_owned(), )) } diff --git a/plugins/entropy/src/main.rs b/plugins/entropy/src/main.rs index 26d87722..d4140a8e 100644 --- a/plugins/entropy/src/main.rs +++ b/plugins/entropy/src/main.rs @@ -210,7 +210,7 @@ impl Plugin for EntropyPlugin { fn explain_default_query(&self) -> Result> { Ok(Some( - "The entropy calculation of each commit in a repo".to_owned(), + "The entropy calculation of each commit in the repository".to_owned(), )) } diff --git a/plugins/fuzz/src/main.rs b/plugins/fuzz/src/main.rs index ce517c29..4badcb98 100644 --- a/plugins/fuzz/src/main.rs +++ b/plugins/fuzz/src/main.rs @@ -37,7 +37,7 @@ impl Plugin for FuzzAnalysisPlugin { } fn explain_default_query(&self) -> Result> { - Ok(Some("Does the target repo do fuzzing".to_owned())) + Ok(Some("'Does the target repo do fuzzing?'".to_owned())) } queries! {} diff --git a/plugins/review/src/main.rs b/plugins/review/src/main.rs index 48d730ce..1109ad33 100644 --- a/plugins/review/src/main.rs +++ b/plugins/review/src/main.rs @@ -100,7 +100,7 @@ impl Plugin for ReviewPlugin { fn explain_default_query(&self) -> Result> { Ok(Some( - "Percentage of unreviewed commits to the repo".to_string(), + "commits to the repo indicating review or not".to_string(), )) } diff --git a/plugins/typo/src/main.rs b/plugins/typo/src/main.rs index eff15fa4..a07a34ad 100644 --- a/plugins/typo/src/main.rs +++ b/plugins/typo/src/main.rs @@ -144,8 +144,7 @@ impl Plugin for TypoPlugin { fn explain_default_query(&self) -> Result> { Ok(Some( - "Returns whether each of the repository's package dependencies has a typo in its name" - .to_string(), + "the repository's dependencies flagged as typos".to_string(), )) }