From f7e5c2dda6e8a8d162089e34259308fd84912ce7 Mon Sep 17 00:00:00 2001 From: Lars T Hansen Date: Sun, 8 Dec 2024 14:43:14 +0100 Subject: [PATCH] Fix #714 - compiler / exe engine for query expressions --- code/generate-table/generate-table.go | 120 +++- code/go-utils/gpuset/gpuset.go | 2 +- code/sonalyze/MANUAL.md | 59 +- .../sonalyze/application/parse_format_test.go | 4 +- code/sonalyze/cmd/args.go | 34 + code/sonalyze/cmd/cli.go | 11 +- code/sonalyze/cmd/clusters/cluster-table.go | 20 + code/sonalyze/cmd/clusters/clusters.go | 10 + code/sonalyze/cmd/config.go | 2 +- code/sonalyze/cmd/configs/config-table.go | 55 ++ code/sonalyze/cmd/configs/configs.go | 10 + code/sonalyze/cmd/gpus/gpus-table.go | 70 ++ code/sonalyze/cmd/gpus/gpus.go | 7 +- code/sonalyze/cmd/jobs/jobs-table.go | 420 ++++++++++- code/sonalyze/cmd/jobs/jobs.go | 6 + code/sonalyze/cmd/jobs/perform.go | 45 +- code/sonalyze/cmd/jobs/print.go | 12 +- code/sonalyze/cmd/load/load-table.go | 100 +++ code/sonalyze/cmd/load/perform.go | 32 +- code/sonalyze/cmd/metadata/metadata-table.go | 21 + code/sonalyze/cmd/metadata/metadata.go | 4 + code/sonalyze/cmd/nodes/node-table.go | 49 ++ code/sonalyze/cmd/nodes/nodes.go | 29 +- code/sonalyze/cmd/parse/parse-table.go | 174 ++++- code/sonalyze/cmd/parse/parse.go | 24 +- code/sonalyze/cmd/profile/profile-table.go | 52 ++ code/sonalyze/cmd/profile/profile.go | 3 + code/sonalyze/cmd/sacct/print.go | 4 + code/sonalyze/cmd/sacct/sacct-table.go | 172 +++++ code/sonalyze/cmd/top/top.go | 11 +- code/sonalyze/cmd/uptime/perform.go | 20 +- code/sonalyze/cmd/uptime/print.go | 5 + code/sonalyze/cmd/uptime/uptime-table.go | 31 + code/sonalyze/db/sample.go | 12 +- code/sonalyze/db/sample_test.go | 4 +- code/sonalyze/db/samplefilter.go | 2 +- code/sonalyze/sonarlog/postprocess.go | 26 +- code/sonalyze/sonarlog/synthesize.go | 8 +- code/sonalyze/sonarlog/types.go | 8 +- code/sonalyze/table/.gitignore | 1 + code/sonalyze/table/data.go | 294 ++++++++ code/sonalyze/table/data_test.go | 258 ++++++- code/sonalyze/table/hosts.go | 260 +++++++ code/sonalyze/table/hosts_test.go | 160 +++++ code/sonalyze/table/perform.go | 29 + code/sonalyze/table/query.go | 230 ++++++ code/sonalyze/table/query_test.go | 145 ++++ code/sonalyze/table/queryexpr.go | 657 ++++++++++++++++++ code/sonalyze/table/queryexpr.y | 200 ++++++ 49 files changed, 3783 insertions(+), 129 deletions(-) create mode 100644 code/sonalyze/table/.gitignore create mode 100644 code/sonalyze/table/hosts.go create mode 100644 code/sonalyze/table/hosts_test.go create mode 100644 code/sonalyze/table/perform.go create mode 100644 code/sonalyze/table/query.go create mode 100644 code/sonalyze/table/query_test.go create mode 100644 code/sonalyze/table/queryexpr.go create mode 100644 code/sonalyze/table/queryexpr.y diff --git a/code/generate-table/generate-table.go b/code/generate-table/generate-table.go index 8e90c75b..a9ef271b 100644 --- a/code/generate-table/generate-table.go +++ b/code/generate-table/generate-table.go @@ -85,26 +85,38 @@ func main() { // operators apply; if a type is "GpuSet" then some kind of set operators apply (TBD). type typeInfo struct { - helpName string // default is the name as given - formatter string // default is Format + helpName string // default is the name as given + comparer string // setType == false: default is cmp.Compare + formatter string // default is Format + parser string // default is CvtString2 + setComparer string // if "", not a set; otherwise a function } var knownTypes = map[string]typeInfo{ + "bool": typeInfo{ + comparer: "CompareBool", + }, "[]string": typeInfo{ helpName: "string list", formatter: "FormatStrings", + parser: "CvtString2Strings", + setComparer: "SetCompareStrings", }, "F64Ceil": typeInfo{ helpName: "int", + parser: "CvtString2Float64", }, "U64Div1M": typeInfo{ helpName: "int", + parser: "CvtString2Uint64", }, "IntOrEmpty": typeInfo{ helpName: "int", + parser: "CvtString2Int", }, "DateTimeValueOrBlank": typeInfo{ helpName: "DateTimeValue", + parser: "CvtString2DateTimeValue", }, "IsoDateTimeOrUnknown": typeInfo{helpName: "IsoDateTimeValue"}, "Ustr": typeInfo{helpName: "string"}, @@ -112,9 +124,45 @@ var knownTypes = map[string]typeInfo{ "gpuset.GpuSet": typeInfo{ helpName: "GpuSet", formatter: "FormatGpuSet", + parser: "CvtString2GpuSet", + setComparer: "SetCompareGpuSets", + }, + "*Hostnames": typeInfo{ + helpName: "Hostnames", + formatter: "FormatHostnames", + parser: "CvtString2Hostnames", + setComparer: "SetCompareHostnames", }, } +func isComparable(ty string) bool { + if probe, found := knownTypes[ty]; found { + return probe.setComparer == "" + } + return true +} + +func fieldComparer(ty string) string { + if probe, found := knownTypes[ty]; found && probe.comparer != "" { + return probe.comparer + } + return "cmp.Compare" +} + +func setComparer(ty string) string { + if probe, found := knownTypes[ty]; found && probe.setComparer != "" { + return probe.setComparer + } + panic("Not a set") +} + +func isSetType(ty string) bool { + if probe, found := knownTypes[ty]; found { + return probe.setComparer != "" + } + return false +} + func formatName(ty string) string { if probe := knownTypes[ty]; probe.formatter != "" { return probe.formatter @@ -122,6 +170,13 @@ func formatName(ty string) string { return "Format" + capitalize(ty) } +func parseName(ty string) string { + if probe := knownTypes[ty]; probe.parser != "" { + return probe.parser + } + return "CvtString2" + capitalize(ty) +} + func userFacingTypeName(ty string) string { if probe := knownTypes[ty]; probe.helpName != "" { return probe.helpName @@ -187,6 +242,7 @@ var ( func fieldSection(tableName string, fields *parser.FieldSect) (fieldList []fieldSpec) { fieldList = fieldFormatters(tableName, fields) + fieldPredicates(tableName, fields) return } @@ -270,6 +326,66 @@ func fieldFormatters(tableName string, fields *parser.FieldSect) (fieldList []fi return } +func fieldPredicates(tableName string, fields *parser.FieldSect) { + fmt.Fprintf(output, "// MT: Constant after initialization; immutable\n") + fmt.Fprintf(output, "var %sPredicates = map[string]Predicate[%s]{\n", tableName, fields.Type) + for _, field := range fields.Fields { + attrs := make(map[string]string) + for _, attr := range field.Attrs { + attrs[attr.Name] = attr.Value + } + + actualFieldName := field.Name + if fn, found := attrs["field"]; found { + actualFieldName = fn + } + + // Here: + // + // * If Convert is nil then type must be string and we just use the input string. + // * Compare must not be nil, it extracts the field and then does a straight value + // comparison + // * TODO: For nil pointers, the field always compares less than a concrete value, + // this may not be ideal + // * TODO: Set comparison. For []string and GpuSet, the relationals should be + // set operators: < for strict subset, etc. To select records where `2` is in + // the gpuset S would simply be 'S >= 2', no special inclusion operator required. + // Right now every set compare returns -1. + + fmt.Fprintf(output, "\t\"%s\": Predicate[%s]{\n", field.Name, fields.Type) + if field.Type != "string" { + fmt.Fprintf(output, "\t\tConvert: %s,\n", parseName(field.Type)) + } + switch { + case isComparable(field.Type): + fmt.Fprintf(output, "\t\tCompare: func(d %s, v any) int {\n", fields.Type) + comparator := fieldComparer(field.Type) + if ptrName := attrs["indirect"]; ptrName != "" { + fmt.Fprintf(output, "\t\t\tif (d.%s) != nil {\n", ptrName) + fmt.Fprintf(output, "\t\t\t\treturn %s((d.%s.%s), v.(%s))\n", + comparator, ptrName, actualFieldName, field.Type) + fmt.Fprintf(output, "\t\t\t}\n") + fmt.Fprintf(output, "\t\t\treturn -1\n") + } else { + fmt.Fprintf(output, "\t\t\treturn %s((d.%s), v.(%s))\n", + comparator, actualFieldName, field.Type) + } + fmt.Fprintf(output, "\t\t},\n") + case isSetType(field.Type): + if attrs["indirect"] != "" { + panic("No support for indirection to set types yet") + } + fmt.Fprintf(output, "\t\tSetCompare: func(d %s, v any, op int) bool {\n", fields.Type) + fmt.Fprintf(output, "\t\t\treturn %s((d.%s), v.(%s), op)\n", setComparer(field.Type), actualFieldName, field.Type) + fmt.Fprintf(output, "\t\t},\n") + default: + panic("Unknown case") + } + fmt.Fprintf(output, "\t},\n") + } + fmt.Fprintf(output, "}\n\n") +} + var validAttr = map[string]bool{ "desc": true, "alias": true, diff --git a/code/go-utils/gpuset/gpuset.go b/code/go-utils/gpuset/gpuset.go index a200ef5d..9e0071d0 100644 --- a/code/go-utils/gpuset/gpuset.go +++ b/code/go-utils/gpuset/gpuset.go @@ -65,7 +65,7 @@ func (this GpuSet) HasSubset(that GpuSet, proper bool) bool { if this == unknown || that == unknown { return false } - return this & that == that && (!proper || this != that) + return this&that == that && (!proper || this != that) } func Adjoin(s GpuSet, xs ...uint32) (GpuSet, error) { diff --git a/code/sonalyze/MANUAL.md b/code/sonalyze/MANUAL.md index 3a9d5602..9fcde980 100644 --- a/code/sonalyze/MANUAL.md +++ b/code/sonalyze/MANUAL.md @@ -203,10 +203,13 @@ filters. in some ways; see later section. The option can be repeated. -#### Job filtering and aggregation options +#### Aggregation filter options + +##### Job aggregation filtering These are only available with the `jobs` command. All filters are optional. Jobs must pass all -specified filters. +specified filters. The filters generally have an equivalent `-q` variant (see below) but predate +that functionality. `--merge-all`, `--batch` @@ -312,10 +315,11 @@ specified filters. option does not guarantee that a job is observed at different points in time. Use `--min-runtime` if that's what you mean.) -#### Load filtering and aggregation options +#### Load aggregation filtering These are only available with the `load` command. All filters are optional. Records must pass all -specified filters. +specified filters. The filters generally have an equivalent `-q` variant (see below) but predate +that functionality. `--hourly`, `--half-hourly` @@ -334,9 +338,10 @@ specified filters. Sum bucketed/averaged data by time step across all the selected hosts, yielding an aggregate for this group/subcluster of hosts. Requires bucketing other than `--none`. -#### Sacct filtering and aggregation options +##### Sacct aggregation filtering -Since these are not sample records they have their own filtering rules. +Since these are not sample records they have their own filtering rules. The filters generally have +an equivalent `-q` variant (see below) but predate that functionality. The default is to print "regular" jobs, ie, not Array jobs or Het jobs. Select the latter groups with `-array` and `-het`. @@ -403,6 +408,48 @@ with `-array` and `-het`. Select only het jobs (not implemented yet). +##### General (`-q`) aggregation filtering + +This is an experimental facility. The data extraction and aggregation verbs usually allow a `-q` +option whose argument is an expression that is applied to each record in the aggregation result and +selects it or not. + +`-q expression` + + Select records for which `expression` is true. The expression is formed from these simple elements: + +* FieldName binop String, where binop is <, <=, >, >=, and = +* Fieldname "=~" Regexp +* expression "and" expression +* expression "or" expression +* "not" expression +* "(" expression ")" + + The string, if it does not look like an identifier, can be quoted with `'`, `"`, `/`, or `. + + The field names are the field names available for printing. + + For example: + + ``` + sonalyze jobs -q 'Cmd =~ python and Host =~ /^(gpu-|int-)/ and Job > 2500000' + ``` + + The typing rules are: + +* for relational operators, the field has a type, and the string value is parsed as that type, and + then a comparison is performed on the two values according to type. +* for `=~`, the field is formatted using the standard formatter without modifiers, and the resulting + string is matched against the regular expression. + + Some field have set-like values (GPU sets, host sets); how we handle them is TBD, but likely the + relational operators will act as set operators (subsets, set equality) and the string value will + be parsed as a set value. + + There is no query optimization. It may be advantageous to apply record filters first, or to + arrange multiple tests so that the most discriminating test comes first. In the example above, + for example, filtering by job ID first will frequently lead to a faster query. + #### Job printing options `--breakdown=` diff --git a/code/sonalyze/application/parse_format_test.go b/code/sonalyze/application/parse_format_test.go index 63c8d38f..c8e11859 100644 --- a/code/sonalyze/application/parse_format_test.go +++ b/code/sonalyze/application/parse_format_test.go @@ -30,7 +30,7 @@ func TestParseOldFieldNames(t *testing.T) { } func TestParseNewFieldNames(t *testing.T) { - fields := "Timestamp,Host,Cores,MemtotalKB,User,Pid,Ppid,Job,Cmd,CpuPct,CpuKB,RssAnonKB," + + fields := "Timestamp,Hostname,Cores,MemtotalKB,User,Pid,Ppid,Job,Cmd,CpuPct,CpuKB,RssAnonKB," + "Gpus,GpuPct,GpuMemPct,GpuKB,GpuFail,CpuTimeSec,Rolledup,Flags,Version" lines := strings.Split(mockitParse(t, fields), "\n") @@ -39,7 +39,7 @@ func TestParseNewFieldNames(t *testing.T) { } // The next line should be the lowest timestamped record, but in the order of fields - expect := "Timestamp=2024-10-31 00:00,Host=ml6.hpc.uio.no,Cores=64,MemtotalKB=263419260," + + expect := "Timestamp=2024-10-31 00:00,Hostname=ml6.hpc.uio.no,Cores=64,MemtotalKB=263419260," + "User=testuser,Pid=2811127,Ppid=1234,Job=1999327,Cmd=testprog.cuda,CpuPct=96.8,CpuKB=9361016," + "RssAnonKB=476264,Gpus=5,GpuPct=85,GpuMemPct=16,GpuKB=581632,GpuFail=3,CpuTimeSec=1454," + "Rolledup=4,Flags=0,Version=0.9.0" diff --git a/code/sonalyze/cmd/args.go b/code/sonalyze/cmd/args.go index 29c88d84..1477bef7 100644 --- a/code/sonalyze/cmd/args.go +++ b/code/sonalyze/cmd/args.go @@ -415,6 +415,32 @@ func (rfa *RecordFilterArgs) DefaultUserFilters() (allUsers, skipSystemUsers, de return } +/////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Query arguments + +type QueryArgs struct { + QueryStmt string + ParsedQuery PNode +} + +func (qa *QueryArgs) Add(fs *CLI) { + fs.Group("query") + fs.StringVar(&qa.QueryStmt, "q", "", "A query expression") +} + +func (qa *QueryArgs) ReifyForRemote(x *ArgReifier) error { + x.String("q", qa.QueryStmt) + return nil +} + +func (qa *QueryArgs) Validate() (err error) { + if qa.QueryStmt != "" { + qa.ParsedQuery, err = ParseQuery(qa.QueryStmt) + } + return +} + /////////////////////////////////////////////////////////////////////////////////////////////////// // // Config file @@ -605,6 +631,7 @@ func NewRepeatableString(xs *[]string) *RepeatableString { type SampleAnalysisArgs struct { DevArgs SourceArgs + QueryArgs RecordFilterArgs ConfigFileArgs VerboseArgs @@ -617,6 +644,7 @@ func (sa *SampleAnalysisArgs) SampleAnalysisFlags() *SampleAnalysisArgs { func (s *SampleAnalysisArgs) Add(fs *CLI) { s.DevArgs.Add(fs) s.SourceArgs.Add(fs) + s.QueryArgs.Add(fs) s.RecordFilterArgs.Add(fs) s.ConfigFileArgs.Add(fs) s.VerboseArgs.Add(fs) @@ -628,6 +656,7 @@ func (s *SampleAnalysisArgs) ReifyForRemote(x *ArgReifier) error { return errors.Join( s.DevArgs.ReifyForRemote(x), s.SourceArgs.ReifyForRemote(x), + s.QueryArgs.ReifyForRemote(x), s.RecordFilterArgs.ReifyForRemote(x), s.ConfigFileArgs.ReifyForRemote(x), ) @@ -637,6 +666,7 @@ func (s *SampleAnalysisArgs) Validate() error { return errors.Join( s.DevArgs.Validate(), s.SourceArgs.Validate(), + s.QueryArgs.Validate(), s.RecordFilterArgs.Validate(), s.ConfigFileArgs.Validate(), s.VerboseArgs.Validate(), @@ -652,6 +682,7 @@ func (s *SampleAnalysisArgs) Validate() error { type HostAnalysisArgs struct { DevArgs SourceArgs + QueryArgs HostArgs ConfigFileArgs VerboseArgs @@ -664,6 +695,7 @@ func (sa *HostAnalysisArgs) HostAnalysisFlags() *HostAnalysisArgs { func (s *HostAnalysisArgs) Add(fs *CLI) { s.DevArgs.Add(fs) s.SourceArgs.Add(fs) + s.QueryArgs.Add(fs) s.HostArgs.Add(fs) s.ConfigFileArgs.Add(fs) s.VerboseArgs.Add(fs) @@ -675,6 +707,7 @@ func (s *HostAnalysisArgs) ReifyForRemote(x *ArgReifier) error { return errors.Join( s.DevArgs.ReifyForRemote(x), s.SourceArgs.ReifyForRemote(x), + s.QueryArgs.ReifyForRemote(x), s.HostArgs.ReifyForRemote(x), s.ConfigFileArgs.ReifyForRemote(x), ) @@ -684,6 +717,7 @@ func (s *HostAnalysisArgs) Validate() error { return errors.Join( s.DevArgs.Validate(), s.SourceArgs.Validate(), + s.QueryArgs.Validate(), s.HostArgs.Validate(), s.ConfigFileArgs.Validate(), s.VerboseArgs.Validate(), diff --git a/code/sonalyze/cmd/cli.go b/code/sonalyze/cmd/cli.go index 582f1c94..20c225bb 100644 --- a/code/sonalyze/cmd/cli.go +++ b/code/sonalyze/cmd/cli.go @@ -30,11 +30,12 @@ var ( "operation-selection": 1, "aggregation": 2, "job-filter": 3, - "printing": 4, - "record-filter": 5, - "remote-data-source": 6, - "local-data-source": 7, - "development": 8, + "query": 4, + "printing": 5, + "record-filter": 6, + "remote-data-source": 7, + "local-data-source": 8, + "development": 9, } ) diff --git a/code/sonalyze/cmd/clusters/cluster-table.go b/code/sonalyze/cmd/clusters/cluster-table.go index 7f0dec88..8128580e 100644 --- a/code/sonalyze/cmd/clusters/cluster-table.go +++ b/code/sonalyze/cmd/clusters/cluster-table.go @@ -49,6 +49,26 @@ func init() { DefAlias(clusterFormatters, "Aliases", "aliases") } +// MT: Constant after initialization; immutable +var clusterPredicates = map[string]Predicate[*db.ClusterEntry]{ + "Name": Predicate[*db.ClusterEntry]{ + Compare: func(d *db.ClusterEntry, v any) int { + return cmp.Compare((d.Name), v.(string)) + }, + }, + "Description": Predicate[*db.ClusterEntry]{ + Compare: func(d *db.ClusterEntry, v any) int { + return cmp.Compare((d.Description), v.(string)) + }, + }, + "Aliases": Predicate[*db.ClusterEntry]{ + Convert: CvtString2Strings, + SetCompare: func(d *db.ClusterEntry, v any, op int) bool { + return SetCompareStrings((d.Aliases), v.([]string), op) + }, + }, +} + func (c *ClusterCommand) Summary(out io.Writer) { fmt.Fprint(out, `Display information about the clusters and overall cluster configuration. diff --git a/code/sonalyze/cmd/clusters/clusters.go b/code/sonalyze/cmd/clusters/clusters.go index f24e5605..bcb3c490 100644 --- a/code/sonalyze/cmd/clusters/clusters.go +++ b/code/sonalyze/cmd/clusters/clusters.go @@ -59,6 +59,7 @@ ELBAT*/ type ClusterCommand struct { DevArgs RemotingArgsNoCluster + QueryArgs VerboseArgs FormatArgs JobanalyzerDir string @@ -67,6 +68,7 @@ type ClusterCommand struct { func (cc *ClusterCommand) Add(fs *CLI) { cc.DevArgs.Add(fs) cc.RemotingArgsNoCluster.Add(fs) + cc.QueryArgs.Add(fs) cc.VerboseArgs.Add(fs) cc.FormatArgs.Add(fs) fs.Group("local-data-source") @@ -76,6 +78,7 @@ func (cc *ClusterCommand) Add(fs *CLI) { func (cc *ClusterCommand) ReifyForRemote(x *ArgReifier) error { return errors.Join( cc.DevArgs.ReifyForRemote(x), + cc.QueryArgs.ReifyForRemote(x), cc.FormatArgs.ReifyForRemote(x), ) } @@ -92,6 +95,7 @@ func (cc *ClusterCommand) Validate() error { cc.DevArgs.Validate(), cc.VerboseArgs.Validate(), cc.RemotingArgsNoCluster.Validate(), + cc.QueryArgs.Validate(), ValidateFormatArgs( &cc.FormatArgs, clusterDefaultFields, @@ -127,6 +131,12 @@ func (cc *ClusterCommand) Perform(_ io.Reader, stdout, stderr io.Writer) error { return cmp.Compare(a.Name, b.Name) }) + printable, err = ApplyQuery( + cc.ParsedQuery, clusterFormatters, clusterPredicates, printable) + if err != nil { + return err + } + FormatData( stdout, cc.PrintFields, diff --git a/code/sonalyze/cmd/config.go b/code/sonalyze/cmd/config.go index ea5b6527..2bb27149 100644 --- a/code/sonalyze/cmd/config.go +++ b/code/sonalyze/cmd/config.go @@ -26,7 +26,7 @@ func EnsureConfigForInputStreams( // Remove streams for which we have no config data. bad := make(map[sonarlog.InputStreamKey]bool) for key, stream := range streams { - hn := (*stream)[0].Host.String() + hn := (*stream)[0].Hostname.String() if cfg.LookupHost(hn) == nil { bad[key] = true Log.Infof("Warning: Missing host configuration for %s", hn) diff --git a/code/sonalyze/cmd/configs/config-table.go b/code/sonalyze/cmd/configs/config-table.go index e2c05b78..4c8df7ca 100644 --- a/code/sonalyze/cmd/configs/config-table.go +++ b/code/sonalyze/cmd/configs/config-table.go @@ -91,6 +91,61 @@ func init() { DefAlias(configFormatters, "GpuMemPct", "gpumempct") } +// MT: Constant after initialization; immutable +var configPredicates = map[string]Predicate[*config.NodeConfigRecord]{ + "Timestamp": Predicate[*config.NodeConfigRecord]{ + Compare: func(d *config.NodeConfigRecord, v any) int { + return cmp.Compare((d.Timestamp), v.(string)) + }, + }, + "Hostname": Predicate[*config.NodeConfigRecord]{ + Compare: func(d *config.NodeConfigRecord, v any) int { + return cmp.Compare((d.Hostname), v.(string)) + }, + }, + "Description": Predicate[*config.NodeConfigRecord]{ + Compare: func(d *config.NodeConfigRecord, v any) int { + return cmp.Compare((d.Description), v.(string)) + }, + }, + "CrossNodeJobs": Predicate[*config.NodeConfigRecord]{ + Convert: CvtString2Bool, + Compare: func(d *config.NodeConfigRecord, v any) int { + return CompareBool((d.CrossNodeJobs), v.(bool)) + }, + }, + "CpuCores": Predicate[*config.NodeConfigRecord]{ + Convert: CvtString2Int, + Compare: func(d *config.NodeConfigRecord, v any) int { + return cmp.Compare((d.CpuCores), v.(int)) + }, + }, + "MemGB": Predicate[*config.NodeConfigRecord]{ + Convert: CvtString2Int, + Compare: func(d *config.NodeConfigRecord, v any) int { + return cmp.Compare((d.MemGB), v.(int)) + }, + }, + "GpuCards": Predicate[*config.NodeConfigRecord]{ + Convert: CvtString2Int, + Compare: func(d *config.NodeConfigRecord, v any) int { + return cmp.Compare((d.GpuCards), v.(int)) + }, + }, + "GpuMemGB": Predicate[*config.NodeConfigRecord]{ + Convert: CvtString2Int, + Compare: func(d *config.NodeConfigRecord, v any) int { + return cmp.Compare((d.GpuMemGB), v.(int)) + }, + }, + "GpuMemPct": Predicate[*config.NodeConfigRecord]{ + Convert: CvtString2Bool, + Compare: func(d *config.NodeConfigRecord, v any) int { + return CompareBool((d.GpuMemPct), v.(bool)) + }, + }, +} + func (c *ConfigCommand) Summary(out io.Writer) { fmt.Fprint(out, `Display information about nodes in a cluster configuration. diff --git a/code/sonalyze/cmd/configs/configs.go b/code/sonalyze/cmd/configs/configs.go index b388121e..69846509 100644 --- a/code/sonalyze/cmd/configs/configs.go +++ b/code/sonalyze/cmd/configs/configs.go @@ -77,6 +77,7 @@ ELBAT*/ type ConfigCommand struct { DevArgs + QueryArgs HostArgs RemotingArgs VerboseArgs @@ -87,6 +88,7 @@ type ConfigCommand struct { func (cc *ConfigCommand) Add(fs *CLI) { cc.DevArgs.Add(fs) cc.RemotingArgs.Add(fs) + cc.QueryArgs.Add(fs) cc.HostArgs.Add(fs) cc.VerboseArgs.Add(fs) cc.ConfigFileArgs.Add(fs) @@ -101,6 +103,7 @@ func (cc *ConfigCommand) ReifyForRemote(x *ArgReifier) error { return errors.Join( cc.DevArgs.ReifyForRemote(x), cc.ConfigFileArgs.ReifyForRemote(x), + cc.QueryArgs.ReifyForRemote(x), cc.HostArgs.ReifyForRemote(x), cc.FormatArgs.ReifyForRemote(x), ) @@ -115,6 +118,7 @@ func (cc *ConfigCommand) Validate() error { return errors.Join( cc.DevArgs.Validate(), + cc.QueryArgs.Validate(), cc.HostArgs.Validate(), cc.RemotingArgs.Validate(), cc.VerboseArgs.Validate(), @@ -151,6 +155,12 @@ func (cc *ConfigCommand) Perform(_ io.Reader, stdout, _ io.Writer) error { }) } + records, err = ApplyQuery( + cc.ParsedQuery, configFormatters, configPredicates, records) + if err != nil { + return err + } + slices.SortFunc(records, func(a, b *config.NodeConfigRecord) int { return cmp.Compare(a.Hostname, b.Hostname) }) diff --git a/code/sonalyze/cmd/gpus/gpus-table.go b/code/sonalyze/cmd/gpus/gpus-table.go index 49f84086..1853a1f4 100644 --- a/code/sonalyze/cmd/gpus/gpus-table.go +++ b/code/sonalyze/cmd/gpus/gpus-table.go @@ -89,6 +89,76 @@ var gpuFormatters = map[string]Formatter[*ReportLine]{ }, } +// MT: Constant after initialization; immutable +var gpuPredicates = map[string]Predicate[*ReportLine]{ + "Timestamp": Predicate[*ReportLine]{ + Convert: CvtString2DateTimeValue, + Compare: func(d *ReportLine, v any) int { + return cmp.Compare((d.Timestamp), v.(DateTimeValue)) + }, + }, + "Hostname": Predicate[*ReportLine]{ + Convert: CvtString2Ustr, + Compare: func(d *ReportLine, v any) int { + return cmp.Compare((d.Hostname), v.(Ustr)) + }, + }, + "Gpu": Predicate[*ReportLine]{ + Convert: CvtString2Int, + Compare: func(d *ReportLine, v any) int { + return cmp.Compare((d.Gpu), v.(int)) + }, + }, + "FanPct": Predicate[*ReportLine]{ + Convert: CvtString2Int, + Compare: func(d *ReportLine, v any) int { + return cmp.Compare((d.FanPct), v.(int)) + }, + }, + "PerfMode": Predicate[*ReportLine]{ + Convert: CvtString2Int, + Compare: func(d *ReportLine, v any) int { + return cmp.Compare((d.PerfMode), v.(int)) + }, + }, + "MemUsedKB": Predicate[*ReportLine]{ + Convert: CvtString2Int64, + Compare: func(d *ReportLine, v any) int { + return cmp.Compare((d.MemUsedKB), v.(int64)) + }, + }, + "TempC": Predicate[*ReportLine]{ + Convert: CvtString2Int, + Compare: func(d *ReportLine, v any) int { + return cmp.Compare((d.TempC), v.(int)) + }, + }, + "PowerDrawW": Predicate[*ReportLine]{ + Convert: CvtString2Int, + Compare: func(d *ReportLine, v any) int { + return cmp.Compare((d.PowerDrawW), v.(int)) + }, + }, + "PowerLimitW": Predicate[*ReportLine]{ + Convert: CvtString2Int, + Compare: func(d *ReportLine, v any) int { + return cmp.Compare((d.PowerLimitW), v.(int)) + }, + }, + "CeClockMHz": Predicate[*ReportLine]{ + Convert: CvtString2Int, + Compare: func(d *ReportLine, v any) int { + return cmp.Compare((d.CeClockMHz), v.(int)) + }, + }, + "MemClockMHz": Predicate[*ReportLine]{ + Convert: CvtString2Int, + Compare: func(d *ReportLine, v any) int { + return cmp.Compare((d.MemClockMHz), v.(int)) + }, + }, +} + func (c *GpuCommand) Summary(out io.Writer) { fmt.Fprint(out, `Experimental: Print per-gpu data across time for one or more cards on one or more nodes. `) diff --git a/code/sonalyze/cmd/gpus/gpus.go b/code/sonalyze/cmd/gpus/gpus.go index 3fa64816..5598bf87 100644 --- a/code/sonalyze/cmd/gpus/gpus.go +++ b/code/sonalyze/cmd/gpus/gpus.go @@ -143,7 +143,7 @@ func (gc *GpuCommand) Perform(stdin io.Reader, stdout, stderr io.Writer) error { if gc.Gpu != -1 && i == gc.Gpu { var r ReportLine r.Timestamp = DateTimeValue(d.Time) - r.Hostname = s.Host + r.Hostname = s.Hostname r.Gpu = i r.PerGpuDatum = &gpu reports = append(reports, &r) @@ -152,6 +152,11 @@ func (gc *GpuCommand) Perform(stdin io.Reader, stdout, stderr io.Writer) error { } } + reports, err = ApplyQuery(gc.ParsedQuery, gpuFormatters, gpuPredicates, reports) + if err != nil { + return err + } + FormatData( stdout, gc.PrintFields, diff --git a/code/sonalyze/cmd/jobs/jobs-table.go b/code/sonalyze/cmd/jobs/jobs-table.go index 92ac38d3..4f9325b5 100644 --- a/code/sonalyze/cmd/jobs/jobs-table.go +++ b/code/sonalyze/cmd/jobs/jobs-table.go @@ -219,11 +219,11 @@ var jobsFormatters = map[string]Formatter[*jobSummary]{ }, Help: "(string) The commands invoking the processes of the job", }, - "Host": { + "Hosts": { Fmt: func(d *jobSummary, ctx PrintMods) string { - return FormatString((d.Host), ctx) + return FormatHostnames((d.Hosts), ctx) }, - Help: "(string) List of the host name(s) running the job (first elements of FQDNs, compressed)", + Help: "(Hostnames) List of the host name(s) running the job", }, "Now": { Fmt: func(d *jobSummary, ctx PrintMods) string { @@ -462,13 +462,421 @@ func init() { DefAlias(jobsFormatters, "Gpus", "gpus") DefAlias(jobsFormatters, "GpuFail", "gpufail") DefAlias(jobsFormatters, "Cmd", "cmd") - DefAlias(jobsFormatters, "Host", "host") + DefAlias(jobsFormatters, "Hosts", "host") + DefAlias(jobsFormatters, "Hosts", "hosts") DefAlias(jobsFormatters, "Now", "now") DefAlias(jobsFormatters, "Classification", "classification") DefAlias(jobsFormatters, "CpuTime", "cputime") DefAlias(jobsFormatters, "GpuTime", "gputime") } +// MT: Constant after initialization; immutable +var jobsPredicates = map[string]Predicate[*jobSummary]{ + "JobAndMark": Predicate[*jobSummary]{ + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.JobAndMark), v.(string)) + }, + }, + "Job": Predicate[*jobSummary]{ + Convert: CvtString2Uint32, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.JobId), v.(uint32)) + }, + }, + "User": Predicate[*jobSummary]{ + Convert: CvtString2Ustr, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.User), v.(Ustr)) + }, + }, + "Duration": Predicate[*jobSummary]{ + Convert: CvtString2DurationValue, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.Duration), v.(DurationValue)) + }, + }, + "Start": Predicate[*jobSummary]{ + Convert: CvtString2DateTimeValue, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.Start), v.(DateTimeValue)) + }, + }, + "End": Predicate[*jobSummary]{ + Convert: CvtString2DateTimeValue, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.End), v.(DateTimeValue)) + }, + }, + "CpuAvgPct": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kCpuPctAvg]), v.(F64Ceil)) + }, + }, + "CpuPeakPct": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kCpuPctPeak]), v.(F64Ceil)) + }, + }, + "RelativeCpuAvgPct": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kRcpuPctAvg]), v.(F64Ceil)) + }, + }, + "RelativeCpuPeakPct": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kRcpuPctPeak]), v.(F64Ceil)) + }, + }, + "MemAvgGB": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kCpuGBAvg]), v.(F64Ceil)) + }, + }, + "MemPeakGB": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kCpuGBPeak]), v.(F64Ceil)) + }, + }, + "RelativeMemAvgPct": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kRcpuGBAvg]), v.(F64Ceil)) + }, + }, + "RelativeMemPeakPct": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kRcpuGBPeak]), v.(F64Ceil)) + }, + }, + "ResidentMemAvgGB": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kRssAnonGBAvg]), v.(F64Ceil)) + }, + }, + "ResidentMemPeakGB": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kRssAnonGBPeak]), v.(F64Ceil)) + }, + }, + "RelativeResidentMemAvgPct": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kRrssAnonGBAvg]), v.(F64Ceil)) + }, + }, + "RelativeResidentMemPeakPct": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kRrssAnonGBPeak]), v.(F64Ceil)) + }, + }, + "GpuAvgPct": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kGpuPctAvg]), v.(F64Ceil)) + }, + }, + "GpuPeakPct": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kGpuPctPeak]), v.(F64Ceil)) + }, + }, + "RelativeGpuAvgPct": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kRgpuPctAvg]), v.(F64Ceil)) + }, + }, + "RelativeGpuPeakPct": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kRgpuPctPeak]), v.(F64Ceil)) + }, + }, + "OccupiedRelativeGpuAvgPct": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kSgpuPctAvg]), v.(F64Ceil)) + }, + }, + "OccupiedRelativeGpuPeakPct": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kSgpuPctPeak]), v.(F64Ceil)) + }, + }, + "GpuMemAvgGB": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kGpuGBAvg]), v.(F64Ceil)) + }, + }, + "GpuMemPeakGB": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kGpuGBPeak]), v.(F64Ceil)) + }, + }, + "RelativeGpuMemAvgPct": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kRgpuGBAvg]), v.(F64Ceil)) + }, + }, + "RelativeGpuMemPeakPct": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kRgpuGBPeak]), v.(F64Ceil)) + }, + }, + "OccupiedRelativeGpuMemAvgPct": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kSgpuGBAvg]), v.(F64Ceil)) + }, + }, + "OccupiedRelativeGpuMemPeakPct": Predicate[*jobSummary]{ + Convert: CvtString2Float64, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.computed[kSgpuGBPeak]), v.(F64Ceil)) + }, + }, + "Gpus": Predicate[*jobSummary]{ + Convert: CvtString2GpuSet, + SetCompare: func(d *jobSummary, v any, op int) bool { + return SetCompareGpuSets((d.Gpus), v.(gpuset.GpuSet), op) + }, + }, + "GpuFail": Predicate[*jobSummary]{ + Convert: CvtString2Int, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.GpuFail), v.(int)) + }, + }, + "Cmd": Predicate[*jobSummary]{ + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.Cmd), v.(string)) + }, + }, + "Hosts": Predicate[*jobSummary]{ + Convert: CvtString2Hostnames, + SetCompare: func(d *jobSummary, v any, op int) bool { + return SetCompareHostnames((d.Hosts), v.(*Hostnames), op) + }, + }, + "Now": Predicate[*jobSummary]{ + Convert: CvtString2DateTimeValue, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.Now), v.(DateTimeValue)) + }, + }, + "Classification": Predicate[*jobSummary]{ + Convert: CvtString2Int, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.Classification), v.(int)) + }, + }, + "CpuTime": Predicate[*jobSummary]{ + Convert: CvtString2DurationValue, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.CpuTime), v.(DurationValue)) + }, + }, + "GpuTime": Predicate[*jobSummary]{ + Convert: CvtString2DurationValue, + Compare: func(d *jobSummary, v any) int { + return cmp.Compare((d.GpuTime), v.(DurationValue)) + }, + }, + "SomeGpu": Predicate[*jobSummary]{ + Convert: CvtString2Bool, + Compare: func(d *jobSummary, v any) int { + return CompareBool((d.computedFlags&kUsesGpu != 0), v.(bool)) + }, + }, + "NoGpu": Predicate[*jobSummary]{ + Convert: CvtString2Bool, + Compare: func(d *jobSummary, v any) int { + return CompareBool((d.computedFlags&kDoesNotUseGpu != 0), v.(bool)) + }, + }, + "Running": Predicate[*jobSummary]{ + Convert: CvtString2Bool, + Compare: func(d *jobSummary, v any) int { + return CompareBool((d.computedFlags&kIsLiveAtEnd != 0), v.(bool)) + }, + }, + "Completed": Predicate[*jobSummary]{ + Convert: CvtString2Bool, + Compare: func(d *jobSummary, v any) int { + return CompareBool((d.computedFlags&kIsNotLiveAtEnd != 0), v.(bool)) + }, + }, + "Zombie": Predicate[*jobSummary]{ + Convert: CvtString2Bool, + Compare: func(d *jobSummary, v any) int { + return CompareBool((d.computedFlags&kIsZombie != 0), v.(bool)) + }, + }, + "Primordial": Predicate[*jobSummary]{ + Convert: CvtString2Bool, + Compare: func(d *jobSummary, v any) int { + return CompareBool((d.computedFlags&kIsLiveAtStart != 0), v.(bool)) + }, + }, + "BornLater": Predicate[*jobSummary]{ + Convert: CvtString2Bool, + Compare: func(d *jobSummary, v any) int { + return CompareBool((d.computedFlags&kIsNotLiveAtStart != 0), v.(bool)) + }, + }, + "Submit": Predicate[*jobSummary]{ + Convert: CvtString2DateTimeValue, + Compare: func(d *jobSummary, v any) int { + if (d.sacctInfo) != nil { + return cmp.Compare((d.sacctInfo.Submit), v.(DateTimeValue)) + } + return -1 + }, + }, + "JobName": Predicate[*jobSummary]{ + Convert: CvtString2Ustr, + Compare: func(d *jobSummary, v any) int { + if (d.sacctInfo) != nil { + return cmp.Compare((d.sacctInfo.JobName), v.(Ustr)) + } + return -1 + }, + }, + "State": Predicate[*jobSummary]{ + Convert: CvtString2Ustr, + Compare: func(d *jobSummary, v any) int { + if (d.sacctInfo) != nil { + return cmp.Compare((d.sacctInfo.State), v.(Ustr)) + } + return -1 + }, + }, + "Account": Predicate[*jobSummary]{ + Convert: CvtString2Ustr, + Compare: func(d *jobSummary, v any) int { + if (d.sacctInfo) != nil { + return cmp.Compare((d.sacctInfo.Account), v.(Ustr)) + } + return -1 + }, + }, + "Layout": Predicate[*jobSummary]{ + Convert: CvtString2Ustr, + Compare: func(d *jobSummary, v any) int { + if (d.sacctInfo) != nil { + return cmp.Compare((d.sacctInfo.Layout), v.(Ustr)) + } + return -1 + }, + }, + "Reservation": Predicate[*jobSummary]{ + Convert: CvtString2Ustr, + Compare: func(d *jobSummary, v any) int { + if (d.sacctInfo) != nil { + return cmp.Compare((d.sacctInfo.Reservation), v.(Ustr)) + } + return -1 + }, + }, + "Partition": Predicate[*jobSummary]{ + Convert: CvtString2Ustr, + Compare: func(d *jobSummary, v any) int { + if (d.sacctInfo) != nil { + return cmp.Compare((d.sacctInfo.Partition), v.(Ustr)) + } + return -1 + }, + }, + "RequestedGpus": Predicate[*jobSummary]{ + Convert: CvtString2Ustr, + Compare: func(d *jobSummary, v any) int { + if (d.sacctInfo) != nil { + return cmp.Compare((d.sacctInfo.ReqGPUS), v.(Ustr)) + } + return -1 + }, + }, + "DiskReadAvgGB": Predicate[*jobSummary]{ + Convert: CvtString2Uint32, + Compare: func(d *jobSummary, v any) int { + if (d.sacctInfo) != nil { + return cmp.Compare((d.sacctInfo.AveDiskRead), v.(uint32)) + } + return -1 + }, + }, + "DiskWriteAvgGB": Predicate[*jobSummary]{ + Convert: CvtString2Uint32, + Compare: func(d *jobSummary, v any) int { + if (d.sacctInfo) != nil { + return cmp.Compare((d.sacctInfo.AveDiskWrite), v.(uint32)) + } + return -1 + }, + }, + "RequestedCpus": Predicate[*jobSummary]{ + Convert: CvtString2Uint32, + Compare: func(d *jobSummary, v any) int { + if (d.sacctInfo) != nil { + return cmp.Compare((d.sacctInfo.ReqCPUS), v.(uint32)) + } + return -1 + }, + }, + "RequestedMemGB": Predicate[*jobSummary]{ + Convert: CvtString2Uint32, + Compare: func(d *jobSummary, v any) int { + if (d.sacctInfo) != nil { + return cmp.Compare((d.sacctInfo.ReqMem), v.(uint32)) + } + return -1 + }, + }, + "RequestedNodes": Predicate[*jobSummary]{ + Convert: CvtString2Uint32, + Compare: func(d *jobSummary, v any) int { + if (d.sacctInfo) != nil { + return cmp.Compare((d.sacctInfo.ReqNodes), v.(uint32)) + } + return -1 + }, + }, + "TimeLimit": Predicate[*jobSummary]{ + Convert: CvtString2U32Duration, + Compare: func(d *jobSummary, v any) int { + if (d.sacctInfo) != nil { + return cmp.Compare((d.sacctInfo.TimelimitRaw), v.(U32Duration)) + } + return -1 + }, + }, + "ExitCode": Predicate[*jobSummary]{ + Convert: CvtString2Uint8, + Compare: func(d *jobSummary, v any) int { + if (d.sacctInfo) != nil { + return cmp.Compare((d.sacctInfo.ExitCode), v.(uint8)) + } + return -1 + }, + }, +} + func (c *JobsCommand) Summary(out io.Writer) { fmt.Fprint(out, `Display jobs jobs aggregated from process samples. @@ -517,8 +925,8 @@ var jobsAliases = map[string][]string{ "gpumem": []string{"gpumem-avg", "gpumem-peak"}, "rgpumem": []string{"rgpumem-avg", "rgpumem-peak"}, "sgpumem": []string{"sgpumem-avg", "sgpumem-peak"}, - "All": []string{"JobAndMark", "Job", "User", "Duration", "Duration/sec", "Start", "Start/sec", "End", "End/sec", "CpuAvgPct", "CpuPeakPct", "RelativeCpuAvgPct", "RelativeCpuPeakPct", "MemAvgGB", "MemPeakGB", "RelativeMemAvgPct", "RelativeMemPeakPct", "ResidentMemAvgGB", "ResidentMemPeakGB", "RelativeResidentMemAvgPct", "RelativeResidentMemPeakPct", "GpuAvgPct", "GpuPeakPct", "RelativeGpuAvgPct", "RelativeGpuPeakPct", "OccupiedRelativeGpuAvgPct", "OccupiedRelativeGpuPeakPct", "GpuMemAvgGB", "GpuMemPeakGB", "RelativeGpuMemAvgPct", "RelativeGpuMemPeakPct", "OccupiedRelativeGpuMemAvgPct", "OccupiedRelativeGpuMemPeakPct", "Gpus", "GpuFail", "Cmd", "Host", "Now", "Now/sec", "Classification", "CpuTime/sec", "CpuTime", "GpuTime/sec", "GpuTime", "SomeGpu", "NoGpu", "Running", "Completed", "Zombie", "Primordial", "BornLater"}, - "Std": []string{"JobAndMark", "User", "Duration", "Host"}, + "All": []string{"JobAndMark", "Job", "User", "Duration", "Duration/sec", "Start", "Start/sec", "End", "End/sec", "CpuAvgPct", "CpuPeakPct", "RelativeCpuAvgPct", "RelativeCpuPeakPct", "MemAvgGB", "MemPeakGB", "RelativeMemAvgPct", "RelativeMemPeakPct", "ResidentMemAvgGB", "ResidentMemPeakGB", "RelativeResidentMemAvgPct", "RelativeResidentMemPeakPct", "GpuAvgPct", "GpuPeakPct", "RelativeGpuAvgPct", "RelativeGpuPeakPct", "OccupiedRelativeGpuAvgPct", "OccupiedRelativeGpuPeakPct", "GpuMemAvgGB", "GpuMemPeakGB", "RelativeGpuMemAvgPct", "RelativeGpuMemPeakPct", "OccupiedRelativeGpuMemAvgPct", "OccupiedRelativeGpuMemPeakPct", "Gpus", "GpuFail", "Cmd", "Hosts", "Now", "Now/sec", "Classification", "CpuTime/sec", "CpuTime", "GpuTime/sec", "GpuTime", "SomeGpu", "NoGpu", "Running", "Completed", "Zombie", "Primordial", "BornLater"}, + "Std": []string{"JobAndMark", "User", "Duration", "Hosts"}, "Cpu": []string{"CpuAvgPct", "CpuPeakPct"}, "RelativeCpu": []string{"RelativeCpuAvgPct", "RelativeCpuPeakPct"}, "Mem": []string{"MemAvgGB", "MemPeakGB"}, diff --git a/code/sonalyze/cmd/jobs/jobs.go b/code/sonalyze/cmd/jobs/jobs.go index de7c0028..85791d19 100644 --- a/code/sonalyze/cmd/jobs/jobs.go +++ b/code/sonalyze/cmd/jobs/jobs.go @@ -365,6 +365,12 @@ func (jc *JobsCommand) DefaultRecordFilters() ( // `--zombie` implies `--user=-` because the use case for `--zombie` is to hunt across all // users. allUsers, skipSystemUsers = jc.Zombie, false + + // `-q` implies `--user=-`: it's a power user command; the User field may be used in the + // query. + if jc.QueryStmt != "" { + allUsers = true + } } excludeSystemCommands = true excludeHeartbeat = true diff --git a/code/sonalyze/cmd/jobs/perform.go b/code/sonalyze/cmd/jobs/perform.go index 5e33e48c..fa7c5e9c 100644 --- a/code/sonalyze/cmd/jobs/perform.go +++ b/code/sonalyze/cmd/jobs/perform.go @@ -11,7 +11,6 @@ import ( "go-utils/config" "go-utils/gpuset" "go-utils/hostglob" - umaps "go-utils/maps" "go-utils/sonalyze" . "sonalyze/cmd" @@ -98,7 +97,7 @@ type jobAggregate struct { computed [numF64Fields]float64 IsZombie bool Cmd string - Host string + Hosts *Hostnames } func (jc *JobsCommand) NeedsBounds() bool { @@ -183,7 +182,7 @@ func (jc *JobsCommand) aggregateAndFilterJobs( } var ( needCmd = false - needHost = false + needHosts = false needJobAndMark = false needSacctInfo = slurmFilter != nil ) @@ -191,8 +190,8 @@ func (jc *JobsCommand) aggregateAndFilterJobs( switch f.Name { case "cmd", "Cmd": needCmd = true - case "host", "Host": - needHost = true + case "host", "hosts", "Hosts": + needHosts = true case "jobm", "JobAndMark": needJobAndMark = true case "Submit", "JobName", "State", "Account", "Layout", "Reservation", @@ -207,13 +206,13 @@ func (jc *JobsCommand) aggregateAndFilterJobs( discarded := 0 for _, job := range jobs { if uint(len(*job)) >= minSamples { - host := (*job)[0].Host + host := (*job)[0].Hostname jobId := (*job)[0].Job user := (*job)[0].User first := (*job)[0].Timestamp last := (*job)[len(*job)-1].Timestamp duration := last - first - aggregate := jc.aggregateJob(cfg, host, *job, needCmd, needHost, jc.Zombie) + aggregate := jc.aggregateJob(cfg, host, *job, needCmd, needHosts, jc.Zombie) aggregate.computed[kDuration] = float64(duration) usesGpu := !aggregate.Gpus.IsEmpty() flags := 0 @@ -436,7 +435,7 @@ func (jc *JobsCommand) aggregateJob( cfg *config.ClusterConfig, host Ustr, job sonarlog.SampleStream, - needCmd, needHost, needZombie bool, + needCmd, needHosts, needZombie bool, ) jobAggregate { gpus := gpuset.EmptyGpuSet() var ( @@ -531,39 +530,19 @@ func (jc *JobsCommand) aggregateJob( } } - hostnames := "" - if needHost { - // TODO: It's not clear any more why len(hosts) would ever be other than 1, and why this - // processing is needed at all. This could be very old code that is no longer relevant. - // The Go code just copies the Rust code here. - // - // Names are assumed to be compressed as the set of jobs is always the output of some - // merge process that will compress when appropriate. (If they are not compressed for - // reasons having to do with how the merge was done, and we don't compress them here, - // then there may be substantial redundancy in the output: "c1-10.fox, c1-11.fox", etc, - // instead of the desirable "c1-[10,11].fox", but that should not currently be an issue - // for `jobs`.) Therefore there is no compression here. But even the uniq'ing, sorting - // and joining may be redundant. - hosts := make(map[string]bool) + var hosts *Hostnames + if needHosts { + hosts = NewHostnames() for _, s := range job { - var name string - if jc.PrintOpts.Fixed { - name, _, _ = strings.Cut(s.Host.String(), ".") - } else { - name = s.Host.String() - } - hosts[name] = true + hosts.Add(s.Hostname.String()) } - keys := umaps.Keys(hosts) - slices.Sort(keys) - hostnames = strings.Join(keys, ", ") } n := float64(len(job)) a := jobAggregate{ Gpus: gpus, GpuFail: int(gpuFail), Cmd: cmd, - Host: hostnames, + Hosts: hosts, IsZombie: isZombie, } a.computed[kCpuPctAvg] = cpuPctAvg / n diff --git a/code/sonalyze/cmd/jobs/print.go b/code/sonalyze/cmd/jobs/print.go index 51d741cb..71673061 100644 --- a/code/sonalyze/cmd/jobs/print.go +++ b/code/sonalyze/cmd/jobs/print.go @@ -70,7 +70,7 @@ FIELDS *jobSummary Gpus gpuset.GpuSet desc:"GPU device numbers used by the job, 'none' if none or 'unknown' in error states" alias:"gpus" GpuFail int desc:"Flag indicating GPU status (0=Ok, 1=Failing)" alias:"gpufail" Cmd string desc:"The commands invoking the processes of the job" alias:"cmd" - Host string desc:"List of the host name(s) running the job (first elements of FQDNs, compressed)" alias:"host" + Hosts *Hostnames desc:"List of the host name(s) running the job" alias:"host,hosts" Now DateTimeValue desc:"The current time" alias:"now" Classification int desc:"Bit vector of live-at-start (2) and live-at-end (1) flags" alias:"classification" CpuTime DurationValue desc:"Total CPU time of the job across all cores" alias:"cputime" @@ -166,9 +166,9 @@ ALIASES GpuAvgPct,GpuPeakPct,RelativeGpuAvgPct,RelativeGpuPeakPct,OccupiedRelativeGpuAvgPct,\ OccupiedRelativeGpuPeakPct,GpuMemAvgGB,GpuMemPeakGB,RelativeGpuMemAvgPct,\ RelativeGpuMemPeakPct,OccupiedRelativeGpuMemAvgPct,OccupiedRelativeGpuMemPeakPct,Gpus,GpuFail,\ - Cmd,Host,Now,Now/sec,Classification,CpuTime/sec,CpuTime,GpuTime/sec,GpuTime,\ + Cmd,Hosts,Now,Now/sec,Classification,CpuTime/sec,CpuTime,GpuTime/sec,GpuTime,\ SomeGpu,NoGpu,Running,Completed,Zombie,Primordial,BornLater - Std JobAndMark,User,Duration,Host + Std JobAndMark,User,Duration,Hosts Cpu CpuAvgPct,CpuPeakPct RelativeCpu RelativeCpuAvgPct,RelativeCpuPeakPct Mem MemAvgGB,MemPeakGB @@ -194,6 +194,11 @@ DEFAULTS default ELBAT*/ func (jc *JobsCommand) printJobSummaries(out io.Writer, summaries []*jobSummary) error { + summaries, err := ApplyQuery(jc.ParsedQuery, jobsFormatters, jobsPredicates, summaries) + if err != nil { + return err + } + // Sort ascending by lowest beginning timestamp, and if those are equal, by job number. slices.SortStableFunc(summaries, func(a, b *jobSummary) int { c := cmp.Compare(a.Start, b.Start) @@ -229,6 +234,7 @@ func (jc *JobsCommand) printJobSummaries(out io.Writer, summaries []*jobSummary) } summaries = slices.DeleteFunc(summaries, func(s *jobSummary) bool { return !s.selected }) + FormatData( out, jc.PrintFields, diff --git a/code/sonalyze/cmd/load/load-table.go b/code/sonalyze/cmd/load/load-table.go index 84738170..0755fcac 100644 --- a/code/sonalyze/cmd/load/load-table.go +++ b/code/sonalyze/cmd/load/load-table.go @@ -143,6 +143,106 @@ func init() { DefAlias(loadFormatters, "Hostname", "host") } +// MT: Constant after initialization; immutable +var loadPredicates = map[string]Predicate[*ReportRecord]{ + "Now": Predicate[*ReportRecord]{ + Convert: CvtString2DateTimeValue, + Compare: func(d *ReportRecord, v any) int { + return cmp.Compare((d.Now), v.(DateTimeValue)) + }, + }, + "DateTime": Predicate[*ReportRecord]{ + Convert: CvtString2DateTimeValue, + Compare: func(d *ReportRecord, v any) int { + return cmp.Compare((d.DateTime), v.(DateTimeValue)) + }, + }, + "Date": Predicate[*ReportRecord]{ + Convert: CvtString2DateValue, + Compare: func(d *ReportRecord, v any) int { + return cmp.Compare((d.Date), v.(DateValue)) + }, + }, + "Time": Predicate[*ReportRecord]{ + Convert: CvtString2TimeValue, + Compare: func(d *ReportRecord, v any) int { + return cmp.Compare((d.Time), v.(TimeValue)) + }, + }, + "Cpu": Predicate[*ReportRecord]{ + Convert: CvtString2Int, + Compare: func(d *ReportRecord, v any) int { + return cmp.Compare((d.Cpu), v.(int)) + }, + }, + "RelativeCpu": Predicate[*ReportRecord]{ + Convert: CvtString2Int, + Compare: func(d *ReportRecord, v any) int { + return cmp.Compare((d.RelativeCpu), v.(int)) + }, + }, + "VirtualGB": Predicate[*ReportRecord]{ + Convert: CvtString2Int, + Compare: func(d *ReportRecord, v any) int { + return cmp.Compare((d.VirtualGB), v.(int)) + }, + }, + "RelativeVirtualMem": Predicate[*ReportRecord]{ + Convert: CvtString2Int, + Compare: func(d *ReportRecord, v any) int { + return cmp.Compare((d.RelativeVirtualMem), v.(int)) + }, + }, + "ResidentGB": Predicate[*ReportRecord]{ + Convert: CvtString2Int, + Compare: func(d *ReportRecord, v any) int { + return cmp.Compare((d.ResidentGB), v.(int)) + }, + }, + "RelativeResidentMem": Predicate[*ReportRecord]{ + Convert: CvtString2Int, + Compare: func(d *ReportRecord, v any) int { + return cmp.Compare((d.RelativeResidentMem), v.(int)) + }, + }, + "Gpu": Predicate[*ReportRecord]{ + Convert: CvtString2Int, + Compare: func(d *ReportRecord, v any) int { + return cmp.Compare((d.Gpu), v.(int)) + }, + }, + "RelativeGpu": Predicate[*ReportRecord]{ + Convert: CvtString2Int, + Compare: func(d *ReportRecord, v any) int { + return cmp.Compare((d.RelativeGpu), v.(int)) + }, + }, + "GpuGB": Predicate[*ReportRecord]{ + Convert: CvtString2Int, + Compare: func(d *ReportRecord, v any) int { + return cmp.Compare((d.GpuGB), v.(int)) + }, + }, + "RelativeGpuMem": Predicate[*ReportRecord]{ + Convert: CvtString2Int, + Compare: func(d *ReportRecord, v any) int { + return cmp.Compare((d.RelativeGpuMem), v.(int)) + }, + }, + "Gpus": Predicate[*ReportRecord]{ + Convert: CvtString2GpuSet, + SetCompare: func(d *ReportRecord, v any, op int) bool { + return SetCompareGpuSets((d.Gpus), v.(gpuset.GpuSet), op) + }, + }, + "Hostname": Predicate[*ReportRecord]{ + Convert: CvtString2Ustr, + Compare: func(d *ReportRecord, v any) int { + return cmp.Compare((d.Hostname), v.(Ustr)) + }, + }, +} + type ReportRecord struct { Now DateTimeValue DateTime DateTimeValue diff --git a/code/sonalyze/cmd/load/perform.go b/code/sonalyze/cmd/load/perform.go index f1bc60dc..85a72b4f 100644 --- a/code/sonalyze/cmd/load/perform.go +++ b/code/sonalyze/cmd/load/perform.go @@ -1,8 +1,10 @@ package load import ( + "fmt" "io" "math" + "slices" "time" "go-utils/config" @@ -73,7 +75,7 @@ func (lc *LoadCommand) Perform( if cfg != nil { for _, stream := range mergedStreams { // probe is non-nil by previous construction - probe := cfg.LookupHost((*stream)[0].Host.String()) + probe := cfg.LookupHost((*stream)[0].Hostname.String()) if theConf.Description != "" { theConf.Description += "|||" // JSON-compatible separator } @@ -99,21 +101,37 @@ func (lc *LoadCommand) Perform( } } + var queryNeg func(*ReportRecord) bool + if lc.ParsedQuery != nil { + var err error + queryNeg, err = CompileQueryNeg(loadFormatters, loadPredicates, lc.ParsedQuery) + if err != nil { + return fmt.Errorf("Could not compile query: %v", err) + } + } + // Generate data to be printed reports := make([]LoadReport, 0) for _, stream := range mergedStreams { - hostname := (*stream)[0].Host.String() + hostname := (*stream)[0].Hostname.String() conf := mergedConf if conf == nil && cfg != nil { conf = cfg.LookupHost(hostname) } + rs := generateReport(*stream, time.Now().Unix(), conf) + if queryNeg != nil { + rs = slices.DeleteFunc(rs, queryNeg) + } reports = append(reports, LoadReport{ hostname: hostname, - records: generateReport(*stream, time.Now().Unix(), conf), + records: rs, conf: conf, }) } + // Probably the query logic is applied to each list of records in the reports here. + // Possibly, if a list is empty the entire report disappears. + // And print it lc.printStreams(out, reports) @@ -142,13 +160,13 @@ func (lc *LoadCommand) insertMissingRecords(ss *sonarlog.SampleStream, fromIncl, default: panic("Unexpected case") } - host := (*ss)[0].Host + host := (*ss)[0].Hostname t := trunc(fromIncl) result := make(sonarlog.SampleStream, 0) for _, s := range *ss { for t < s.Timestamp { - newS := sonarlog.Sample{Sample: &db.Sample{Timestamp: t, Host: host}} + newS := sonarlog.Sample{Sample: &db.Sample{Timestamp: t, Hostname: host}} result = append(result, newS) t = step(t) } @@ -157,7 +175,7 @@ func (lc *LoadCommand) insertMissingRecords(ss *sonarlog.SampleStream, fromIncl, } ending := trunc(toIncl) for t <= ending { - newS := sonarlog.Sample{Sample: &db.Sample{Timestamp: t, Host: host}} + newS := sonarlog.Sample{Sample: &db.Sample{Timestamp: t, Hostname: host}} result = append(result, newS) t = step(t) } @@ -208,7 +226,7 @@ func generateReport( GpuGB: int(d.GpuKB / (1024 * 1024)), RelativeGpuMem: relativeGpuMem, Gpus: d.Gpus, - Hostname: d.Host, + Hostname: d.Hostname, }) } return diff --git a/code/sonalyze/cmd/metadata/metadata-table.go b/code/sonalyze/cmd/metadata/metadata-table.go index 57458ff8..320ed56b 100644 --- a/code/sonalyze/cmd/metadata/metadata-table.go +++ b/code/sonalyze/cmd/metadata/metadata-table.go @@ -47,6 +47,27 @@ func init() { DefAlias(metadataFormatters, "Latest", "latest") } +// MT: Constant after initialization; immutable +var metadataPredicates = map[string]Predicate[*metadataItem]{ + "Hostname": Predicate[*metadataItem]{ + Compare: func(d *metadataItem, v any) int { + return cmp.Compare((d.Hostname), v.(string)) + }, + }, + "Earliest": Predicate[*metadataItem]{ + Convert: CvtString2DateTimeValue, + Compare: func(d *metadataItem, v any) int { + return cmp.Compare((d.Earliest), v.(DateTimeValue)) + }, + }, + "Latest": Predicate[*metadataItem]{ + Convert: CvtString2DateTimeValue, + Compare: func(d *metadataItem, v any) int { + return cmp.Compare((d.Latest), v.(DateTimeValue)) + }, + }, +} + type metadataItem struct { Hostname string Earliest DateTimeValue diff --git a/code/sonalyze/cmd/metadata/metadata.go b/code/sonalyze/cmd/metadata/metadata.go index 56bdc78d..d4f69776 100644 --- a/code/sonalyze/cmd/metadata/metadata.go +++ b/code/sonalyze/cmd/metadata/metadata.go @@ -178,6 +178,10 @@ func (mdc *MetadataCommand) Perform( } return c }) + items, err := ApplyQuery(mdc.ParsedQuery, metadataFormatters, metadataPredicates, items) + if err != nil { + return err + } FormatData(out, mdc.PrintFields, metadataFormatters, mdc.PrintOpts, items) } diff --git a/code/sonalyze/cmd/nodes/node-table.go b/code/sonalyze/cmd/nodes/node-table.go index f7549544..4e3c74ad 100644 --- a/code/sonalyze/cmd/nodes/node-table.go +++ b/code/sonalyze/cmd/nodes/node-table.go @@ -84,6 +84,55 @@ func init() { DefAlias(nodeFormatters, "GpuMemPct", "gpumempct") } +// MT: Constant after initialization; immutable +var nodePredicates = map[string]Predicate[*config.NodeConfigRecord]{ + "Timestamp": Predicate[*config.NodeConfigRecord]{ + Compare: func(d *config.NodeConfigRecord, v any) int { + return cmp.Compare((d.Timestamp), v.(string)) + }, + }, + "Hostname": Predicate[*config.NodeConfigRecord]{ + Compare: func(d *config.NodeConfigRecord, v any) int { + return cmp.Compare((d.Hostname), v.(string)) + }, + }, + "Description": Predicate[*config.NodeConfigRecord]{ + Compare: func(d *config.NodeConfigRecord, v any) int { + return cmp.Compare((d.Description), v.(string)) + }, + }, + "CpuCores": Predicate[*config.NodeConfigRecord]{ + Convert: CvtString2Int, + Compare: func(d *config.NodeConfigRecord, v any) int { + return cmp.Compare((d.CpuCores), v.(int)) + }, + }, + "MemGB": Predicate[*config.NodeConfigRecord]{ + Convert: CvtString2Int, + Compare: func(d *config.NodeConfigRecord, v any) int { + return cmp.Compare((d.MemGB), v.(int)) + }, + }, + "GpuCards": Predicate[*config.NodeConfigRecord]{ + Convert: CvtString2Int, + Compare: func(d *config.NodeConfigRecord, v any) int { + return cmp.Compare((d.GpuCards), v.(int)) + }, + }, + "GpuMemGB": Predicate[*config.NodeConfigRecord]{ + Convert: CvtString2Int, + Compare: func(d *config.NodeConfigRecord, v any) int { + return cmp.Compare((d.GpuMemGB), v.(int)) + }, + }, + "GpuMemPct": Predicate[*config.NodeConfigRecord]{ + Convert: CvtString2Bool, + Compare: func(d *config.NodeConfigRecord, v any) int { + return CompareBool((d.GpuMemPct), v.(bool)) + }, + }, +} + func (c *NodeCommand) Summary(out io.Writer) { fmt.Fprint(out, `Display self-reported information about nodes in a cluster. diff --git a/code/sonalyze/cmd/nodes/nodes.go b/code/sonalyze/cmd/nodes/nodes.go index e06efe1a..3fbe3fb7 100644 --- a/code/sonalyze/cmd/nodes/nodes.go +++ b/code/sonalyze/cmd/nodes/nodes.go @@ -81,6 +81,7 @@ ELBAT*/ type NodeCommand struct { DevArgs SourceArgs + QueryArgs HostArgs VerboseArgs ConfigFileArgs @@ -93,6 +94,7 @@ var _ = (SimpleCommand)((*NodeCommand)(nil)) func (nc *NodeCommand) Add(fs *CLI) { nc.DevArgs.Add(fs) nc.SourceArgs.Add(fs) + nc.QueryArgs.Add(fs) nc.HostArgs.Add(fs) nc.VerboseArgs.Add(fs) nc.ConfigFileArgs.Add(fs) @@ -108,6 +110,7 @@ func (nc *NodeCommand) ReifyForRemote(x *ArgReifier) error { return errors.Join( nc.DevArgs.ReifyForRemote(x), nc.SourceArgs.ReifyForRemote(x), + nc.QueryArgs.ReifyForRemote(x), nc.HostArgs.ReifyForRemote(x), nc.ConfigFileArgs.ReifyForRemote(x), nc.FormatArgs.ReifyForRemote(x), @@ -118,6 +121,7 @@ func (nc *NodeCommand) Validate() error { return errors.Join( nc.DevArgs.Validate(), nc.SourceArgs.Validate(), + nc.QueryArgs.Validate(), nc.HostArgs.Validate(), nc.VerboseArgs.Validate(), nc.ConfigFileArgs.Validate(), @@ -175,7 +179,7 @@ func (nc *NodeCommand) Perform(_ io.Reader, stdout, stderr io.Writer) error { UstrStats(stderr, false) } - hostGlobber, recordFilter, err := nc.buildRecordFilter(nc.Verbose) + hostGlobber, recordFilter, query, err := nc.buildRecordFilter(nc.Verbose) if err != nil { return fmt.Errorf("Failed to create record filter: %v", err) } @@ -189,7 +193,13 @@ func (nc *NodeCommand) Perform(_ io.Reader, stdout, stderr io.Writer) error { return true } t := parsed.Unix() - return !(t >= recordFilter.From && t <= recordFilter.To) + if !(t >= recordFilter.From && t <= recordFilter.To) { + return true + } + if query != nil && !query(s) { + return true + } + return false }) if nc.Newest { @@ -227,10 +237,10 @@ func (nc *NodeCommand) Perform(_ io.Reader, stdout, stderr io.Writer) error { func (nc *NodeCommand) buildRecordFilter( verbose bool, -) (*hostglob.HostGlobber, *db.SampleFilter, error) { +) (*hostglob.HostGlobber, *db.SampleFilter, func(*config.NodeConfigRecord) bool, error) { includeHosts, err := hostglob.NewGlobber(true, nc.HostArgs.Host) if err != nil { - return nil, nil, err + return nil, nil, nil, err } haveFrom := nc.SourceArgs.HaveFrom @@ -250,5 +260,14 @@ func (nc *NodeCommand) buildRecordFilter( To: to, } - return includeHosts, recordFilter, nil + var query func(*config.NodeConfigRecord) bool + if nc.ParsedQuery != nil { + c, err := CompileQuery(nodeFormatters, nodePredicates, nc.ParsedQuery) + if err != nil { + return nil, nil, nil, fmt.Errorf("Could not compile query: %v", err) + } + query = c + } + + return includeHosts, recordFilter, query, nil } diff --git a/code/sonalyze/cmd/parse/parse-table.go b/code/sonalyze/cmd/parse/parse-table.go index 9ceb6aaa..303aca58 100644 --- a/code/sonalyze/cmd/parse/parse-table.go +++ b/code/sonalyze/cmd/parse/parse-table.go @@ -41,9 +41,9 @@ var parseFormatters = map[string]Formatter[sonarlog.Sample]{ }, Help: "(IsoDateTimeValue) Timestamp of record", }, - "Host": { + "Hostname": { Fmt: func(d sonarlog.Sample, ctx PrintMods) string { - return FormatUstr((d.Host), ctx) + return FormatUstr((d.Hostname), ctx) }, Help: "(string) Host name (FQDN)", }, @@ -191,7 +191,7 @@ func init() { DefAlias(parseFormatters, "Version", "version") DefAlias(parseFormatters, "Version", "v") DefAlias(parseFormatters, "Timestamp", "localtime") - DefAlias(parseFormatters, "Host", "host") + DefAlias(parseFormatters, "Hostname", "host") DefAlias(parseFormatters, "Cores", "cores") DefAlias(parseFormatters, "User", "user") DefAlias(parseFormatters, "Pid", "pid") @@ -214,6 +214,172 @@ func init() { DefAlias(parseFormatters, "CpuUtilPct", "cpu_util_pct") } +// MT: Constant after initialization; immutable +var parsePredicates = map[string]Predicate[sonarlog.Sample]{ + "Version": Predicate[sonarlog.Sample]{ + Convert: CvtString2Ustr, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.Version), v.(Ustr)) + }, + }, + "Timestamp": Predicate[sonarlog.Sample]{ + Convert: CvtString2DateTimeValue, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.Timestamp), v.(DateTimeValue)) + }, + }, + "time": Predicate[sonarlog.Sample]{ + Convert: CvtString2IsoDateTimeValue, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.Timestamp), v.(IsoDateTimeValue)) + }, + }, + "Hostname": Predicate[sonarlog.Sample]{ + Convert: CvtString2Ustr, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.Hostname), v.(Ustr)) + }, + }, + "Cores": Predicate[sonarlog.Sample]{ + Convert: CvtString2Uint32, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.Cores), v.(uint32)) + }, + }, + "MemtotalKB": Predicate[sonarlog.Sample]{ + Convert: CvtString2Uint64, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.MemtotalKB), v.(uint64)) + }, + }, + "memtotal": Predicate[sonarlog.Sample]{ + Convert: CvtString2Uint64, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.MemtotalKB), v.(U64Div1M)) + }, + }, + "User": Predicate[sonarlog.Sample]{ + Convert: CvtString2Ustr, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.User), v.(Ustr)) + }, + }, + "Pid": Predicate[sonarlog.Sample]{ + Convert: CvtString2Uint32, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.Pid), v.(uint32)) + }, + }, + "Ppid": Predicate[sonarlog.Sample]{ + Convert: CvtString2Uint32, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.Ppid), v.(uint32)) + }, + }, + "Job": Predicate[sonarlog.Sample]{ + Convert: CvtString2Uint32, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.Job), v.(uint32)) + }, + }, + "Cmd": Predicate[sonarlog.Sample]{ + Convert: CvtString2Ustr, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.Cmd), v.(Ustr)) + }, + }, + "CpuPct": Predicate[sonarlog.Sample]{ + Convert: CvtString2Float32, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.CpuPct), v.(float32)) + }, + }, + "CpuKB": Predicate[sonarlog.Sample]{ + Convert: CvtString2Uint64, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.CpuKB), v.(uint64)) + }, + }, + "mem_gb": Predicate[sonarlog.Sample]{ + Convert: CvtString2Uint64, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.CpuKB), v.(U64Div1M)) + }, + }, + "RssAnonKB": Predicate[sonarlog.Sample]{ + Convert: CvtString2Uint64, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.RssAnonKB), v.(uint64)) + }, + }, + "res_gb": Predicate[sonarlog.Sample]{ + Convert: CvtString2Uint64, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.RssAnonKB), v.(U64Div1M)) + }, + }, + "Gpus": Predicate[sonarlog.Sample]{ + Convert: CvtString2GpuSet, + SetCompare: func(d sonarlog.Sample, v any, op int) bool { + return SetCompareGpuSets((d.Gpus), v.(gpuset.GpuSet), op) + }, + }, + "GpuPct": Predicate[sonarlog.Sample]{ + Convert: CvtString2Float32, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.GpuPct), v.(float32)) + }, + }, + "GpuMemPct": Predicate[sonarlog.Sample]{ + Convert: CvtString2Float32, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.GpuMemPct), v.(float32)) + }, + }, + "GpuKB": Predicate[sonarlog.Sample]{ + Convert: CvtString2Uint64, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.GpuKB), v.(uint64)) + }, + }, + "gpumem_gb": Predicate[sonarlog.Sample]{ + Convert: CvtString2Uint64, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.GpuKB), v.(U64Div1M)) + }, + }, + "GpuFail": Predicate[sonarlog.Sample]{ + Convert: CvtString2Uint8, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.GpuFail), v.(uint8)) + }, + }, + "CpuTimeSec": Predicate[sonarlog.Sample]{ + Convert: CvtString2Uint64, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.CpuTimeSec), v.(uint64)) + }, + }, + "Rolledup": Predicate[sonarlog.Sample]{ + Convert: CvtString2Uint32, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.Rolledup), v.(uint32)) + }, + }, + "Flags": Predicate[sonarlog.Sample]{ + Convert: CvtString2Uint8, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.Flags), v.(uint8)) + }, + }, + "CpuUtilPct": Predicate[sonarlog.Sample]{ + Convert: CvtString2Float32, + Compare: func(d sonarlog.Sample, v any) int { + return cmp.Compare((d.CpuUtilPct), v.(float32)) + }, + }, +} + func (c *ParseCommand) Summary(out io.Writer) { fmt.Fprint(out, `Export sample data in various formats, after optional preprocessing. @@ -241,7 +407,7 @@ var parseAliases = map[string][]string{ "default": []string{"job", "user", "cmd"}, "Default": []string{"Job", "User", "Cmd"}, "all": []string{"version", "localtime", "host", "cores", "memtotal", "user", "pid", "job", "cmd", "cpu_pct", "mem_gb", "res_gb", "gpus", "gpu_pct", "gpumem_pct", "gpumem_gb", "gpu_status", "cputime_sec", "rolledup", "cpu_util_pct"}, - "All": []string{"Version", "Timestamp", "Host", "Cores", "MemtotalKB", "User", "Pid", "Ppid", "Job", "Cmd", "CpuPct", "CpuKB", "RssAnonKB", "Gpus", "GpuPct", "GpuMemPct", "GpuKB", "GpuFail", "CpuTimeSec", "Rolledup", "CpuUtilPct"}, + "All": []string{"Version", "Timestamp", "Hostname", "Cores", "MemtotalKB", "User", "Pid", "Ppid", "Job", "Cmd", "CpuPct", "CpuKB", "RssAnonKB", "Gpus", "GpuPct", "GpuMemPct", "GpuKB", "GpuFail", "CpuTimeSec", "Rolledup", "CpuUtilPct"}, "roundtrip": []string{"v", "time", "host", "cores", "user", "job", "pid", "cmd", "cpu%", "cpukib", "gpus", "gpu%", "gpumem%", "gpukib", "gpufail", "cputime_sec", "rolledup"}, } diff --git a/code/sonalyze/cmd/parse/parse.go b/code/sonalyze/cmd/parse/parse.go index f80277b1..b3c7a641 100644 --- a/code/sonalyze/cmd/parse/parse.go +++ b/code/sonalyze/cmd/parse/parse.go @@ -34,7 +34,7 @@ FIELDS sonarlog.Sample Version Ustr desc:"Semver string (MAJOR.MINOR.BUGFIX)" alias:"version,v" Timestamp DateTimeValue desc:"Timestamp of record " alias:"localtime" time IsoDateTimeValue desc:"Timestamp of record" field:"Timestamp" - Host Ustr desc:"Host name (FQDN)" alias:"host" + Hostname Ustr desc:"Host name (FQDN)" alias:"host" Cores uint32 desc:"Total number of cores (including hyperthreads)" alias:"cores" MemtotalKB uint64 desc:"Installed main memory" memtotal U64Div1M desc:"Installed main memory (GB)" field:"MemtotalKB" @@ -82,7 +82,7 @@ ALIASES default job,user,cmd Default Job,User,Cmd all version,localtime,host,cores,memtotal,user,pid,job,cmd,cpu_pct,mem_gb,res_gb,gpus,gpu_pct,gpumem_pct,gpumem_gb,gpu_status,cputime_sec,rolledup,cpu_util_pct - All Version,Timestamp,Host,Cores,MemtotalKB,User,Pid,Ppid,Job,Cmd,CpuPct,CpuKB,RssAnonKB,Gpus,GpuPct,GpuMemPct,GpuKB,GpuFail,CpuTimeSec,Rolledup,CpuUtilPct + All Version,Timestamp,Hostname,Cores,MemtotalKB,User,Pid,Ppid,Job,Cmd,CpuPct,CpuKB,RssAnonKB,Gpus,GpuPct,GpuMemPct,GpuKB,GpuFail,CpuTimeSec,Rolledup,CpuUtilPct roundtrip v,time,host,cores,user,job,pid,cmd,cpu%,cpukib,gpus,gpu%,gpumem%,gpukib,gpufail,cputime_sec,rolledup DEFAULTS default @@ -196,10 +196,19 @@ func (pc *ParseCommand) Perform( mergedSamples = sonarlog.MergeByHostAndJob(streams) } + var queryNeg func(sonarlog.Sample) bool + if pc.ParsedQuery != nil { + var err error + queryNeg, err = CompileQueryNeg(parseFormatters, parsePredicates, pc.ParsedQuery) + if err != nil { + return fmt.Errorf("Could not compile query: %v", err) + } + } + if mergedSamples != nil { // All elements that are part of the InputStreamKey must be part of the sort key here. slices.SortStableFunc(mergedSamples, func(a, b *sonarlog.SampleStream) int { - c := cmp.Compare((*a)[0].Host.String(), (*b)[0].Host.String()) + c := cmp.Compare((*a)[0].Hostname.String(), (*b)[0].Hostname.String()) if c == 0 { c = cmp.Compare((*a)[0].Timestamp, (*b)[0].Timestamp) if c == 0 { @@ -212,16 +221,23 @@ func (pc *ParseCommand) Perform( return c }) for _, stream := range mergedSamples { + xs := *stream + if queryNeg != nil { + xs = slices.DeleteFunc(xs, queryNeg) + } fmt.Fprintln(out, "*") FormatData( out, pc.PrintFields, parseFormatters, pc.PrintOpts, - *stream, + xs, ) } } else { + if queryNeg != nil { + samples = slices.DeleteFunc(samples, queryNeg) + } FormatData( out, pc.PrintFields, diff --git a/code/sonalyze/cmd/profile/profile-table.go b/code/sonalyze/cmd/profile/profile-table.go index 9c15d462..7734f649 100644 --- a/code/sonalyze/cmd/profile/profile-table.go +++ b/code/sonalyze/cmd/profile/profile-table.go @@ -83,6 +83,58 @@ func init() { DefAlias(profileFormatters, "NumProcs", "nproc") } +// MT: Constant after initialization; immutable +var profilePredicates = map[string]Predicate[*fixedLine]{ + "Timestamp": Predicate[*fixedLine]{ + Convert: CvtString2DateTimeValue, + Compare: func(d *fixedLine, v any) int { + return cmp.Compare((d.Timestamp), v.(DateTimeValueOrBlank)) + }, + }, + "CpuUtilPct": Predicate[*fixedLine]{ + Convert: CvtString2Int, + Compare: func(d *fixedLine, v any) int { + return cmp.Compare((d.CpuUtilPct), v.(int)) + }, + }, + "VirtualMemGB": Predicate[*fixedLine]{ + Convert: CvtString2Int, + Compare: func(d *fixedLine, v any) int { + return cmp.Compare((d.VirtualMemGB), v.(int)) + }, + }, + "ResidentMemGB": Predicate[*fixedLine]{ + Convert: CvtString2Int, + Compare: func(d *fixedLine, v any) int { + return cmp.Compare((d.ResidentMemGB), v.(int)) + }, + }, + "Gpu": Predicate[*fixedLine]{ + Convert: CvtString2Int, + Compare: func(d *fixedLine, v any) int { + return cmp.Compare((d.Gpu), v.(int)) + }, + }, + "GpuMemGB": Predicate[*fixedLine]{ + Convert: CvtString2Int, + Compare: func(d *fixedLine, v any) int { + return cmp.Compare((d.GpuMemGB), v.(int)) + }, + }, + "Command": Predicate[*fixedLine]{ + Convert: CvtString2Ustr, + Compare: func(d *fixedLine, v any) int { + return cmp.Compare((d.Command), v.(Ustr)) + }, + }, + "NumProcs": Predicate[*fixedLine]{ + Convert: CvtString2Int, + Compare: func(d *fixedLine, v any) int { + return cmp.Compare((d.NumProcs), v.(IntOrEmpty)) + }, + }, +} + type fixedLine struct { Timestamp DateTimeValueOrBlank CpuUtilPct int diff --git a/code/sonalyze/cmd/profile/profile.go b/code/sonalyze/cmd/profile/profile.go index 53847ea2..0154b104 100644 --- a/code/sonalyze/cmd/profile/profile.go +++ b/code/sonalyze/cmd/profile/profile.go @@ -106,6 +106,9 @@ func (pc *ProfileCommand) DefaultRecordFilters() ( allUsers, skipSystemUsers, determined := pc.RecordFilterArgs.DefaultUserFilters() if !determined { allUsers, skipSystemUsers = false, false + if pc.QueryStmt != "" { + allUsers = true + } } excludeSystemCommands = false excludeHeartbeat = true diff --git a/code/sonalyze/cmd/sacct/print.go b/code/sonalyze/cmd/sacct/print.go index d163f814..997c0c1f 100644 --- a/code/sonalyze/cmd/sacct/print.go +++ b/code/sonalyze/cmd/sacct/print.go @@ -117,6 +117,10 @@ func (sc *SacctCommand) printRegularJobs(stdout io.Writer, regular []*sacctSumma ArrayIndex: int(r.Main.ArrayIndex), } } + toPrint, err := ApplyQuery(sc.ParsedQuery, sacctFormatters, sacctPredicates, toPrint) + if err != nil { + return err + } FormatData( stdout, sc.PrintFields, diff --git a/code/sonalyze/cmd/sacct/sacct-table.go b/code/sonalyze/cmd/sacct/sacct-table.go index 0b4c012a..710386ae 100644 --- a/code/sonalyze/cmd/sacct/sacct-table.go +++ b/code/sonalyze/cmd/sacct/sacct-table.go @@ -198,6 +198,178 @@ func init() { DefAlias(sacctFormatters, "RelativeResidentMem", "rmem") } +// MT: Constant after initialization; immutable +var sacctPredicates = map[string]Predicate[*SacctRegular]{ + "Start": Predicate[*SacctRegular]{ + Convert: CvtString2IsoDateTimeOrUnknown, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.Start), v.(IsoDateTimeOrUnknown)) + }, + }, + "End": Predicate[*SacctRegular]{ + Convert: CvtString2IsoDateTimeOrUnknown, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.End), v.(IsoDateTimeOrUnknown)) + }, + }, + "Submit": Predicate[*SacctRegular]{ + Convert: CvtString2IsoDateTimeOrUnknown, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.Submit), v.(IsoDateTimeOrUnknown)) + }, + }, + "RequestedCPU": Predicate[*SacctRegular]{ + Convert: CvtString2Int, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.RequestedCPU), v.(int)) + }, + }, + "UsedCPU": Predicate[*SacctRegular]{ + Convert: CvtString2Int, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.UsedCPU), v.(int)) + }, + }, + "RelativeCPU": Predicate[*SacctRegular]{ + Convert: CvtString2Int, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.RelativeCPU), v.(int)) + }, + }, + "RelativeResidentMem": Predicate[*SacctRegular]{ + Convert: CvtString2Int, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.RelativeResidentMem), v.(int)) + }, + }, + "User": Predicate[*SacctRegular]{ + Convert: CvtString2Ustr, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.User), v.(Ustr)) + }, + }, + "JobName": Predicate[*SacctRegular]{ + Convert: CvtString2UstrMax30, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.JobName), v.(UstrMax30)) + }, + }, + "State": Predicate[*SacctRegular]{ + Convert: CvtString2Ustr, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.State), v.(Ustr)) + }, + }, + "Account": Predicate[*SacctRegular]{ + Convert: CvtString2Ustr, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.Account), v.(Ustr)) + }, + }, + "Reservation": Predicate[*SacctRegular]{ + Convert: CvtString2Ustr, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.Reservation), v.(Ustr)) + }, + }, + "Layout": Predicate[*SacctRegular]{ + Convert: CvtString2Ustr, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.Layout), v.(Ustr)) + }, + }, + "NodeList": Predicate[*SacctRegular]{ + Convert: CvtString2Ustr, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.NodeList), v.(Ustr)) + }, + }, + "JobID": Predicate[*SacctRegular]{ + Convert: CvtString2Int, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.JobID), v.(int)) + }, + }, + "MaxRSS": Predicate[*SacctRegular]{ + Convert: CvtString2Int, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.MaxRSS), v.(int)) + }, + }, + "ReqMem": Predicate[*SacctRegular]{ + Convert: CvtString2Int, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.ReqMem), v.(int)) + }, + }, + "ReqCPUS": Predicate[*SacctRegular]{ + Convert: CvtString2Int, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.ReqCPUS), v.(int)) + }, + }, + "ReqGPUS": Predicate[*SacctRegular]{ + Convert: CvtString2Ustr, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.ReqGPUS), v.(Ustr)) + }, + }, + "ReqNodes": Predicate[*SacctRegular]{ + Convert: CvtString2Int, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.ReqNodes), v.(int)) + }, + }, + "Elapsed": Predicate[*SacctRegular]{ + Convert: CvtString2Int, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.Elapsed), v.(int)) + }, + }, + "Suspended": Predicate[*SacctRegular]{ + Convert: CvtString2Int, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.Suspended), v.(int)) + }, + }, + "Timelimit": Predicate[*SacctRegular]{ + Convert: CvtString2Int, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.Timelimit), v.(int)) + }, + }, + "ExitCode": Predicate[*SacctRegular]{ + Convert: CvtString2Int, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.ExitCode), v.(int)) + }, + }, + "Wait": Predicate[*SacctRegular]{ + Convert: CvtString2Int, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.Wait), v.(int)) + }, + }, + "Partition": Predicate[*SacctRegular]{ + Convert: CvtString2Ustr, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.Partition), v.(Ustr)) + }, + }, + "ArrayJobID": Predicate[*SacctRegular]{ + Convert: CvtString2Int, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.ArrayJobID), v.(int)) + }, + }, + "ArrayIndex": Predicate[*SacctRegular]{ + Convert: CvtString2Int, + Compare: func(d *SacctRegular, v any) int { + return cmp.Compare((d.ArrayIndex), v.(int)) + }, + }, +} + type SacctRegular struct { Start IsoDateTimeOrUnknown End IsoDateTimeOrUnknown diff --git a/code/sonalyze/cmd/top/top.go b/code/sonalyze/cmd/top/top.go index 4c92d869..3086efa4 100644 --- a/code/sonalyze/cmd/top/top.go +++ b/code/sonalyze/cmd/top/top.go @@ -18,6 +18,7 @@ package top import ( "bufio" + "cmp" _ "embed" "fmt" "io" @@ -102,13 +103,7 @@ func (tc *TopCommand) Perform(stdin io.Reader, stdout, stderr io.Writer) error { hostStreams := umaps.Values(streams) slices.SortFunc(hostStreams, func(a, b *sonarlog.LoadData) int { - if a.Host.String() < b.Host.String() { - return -1 - } - if a.Host.String() > b.Host.String() { - return 1 - } - return 0 + return cmp.Compare(a.Hostname.String(), b.Hostname.String()) }) // Ad-hoc fixed-format output for now @@ -118,7 +113,7 @@ func (tc *TopCommand) Perform(stdin io.Reader, stdout, stderr io.Writer) error { for _, v := range hostStreams { if len(v.Data) > 0 { buf.WriteString("HOST: ") - buf.WriteString(v.Host.String()) + buf.WriteString(v.Hostname.String()) buf.WriteByte('\n') for i := 1; i < len(v.Data); i++ { diff --git a/code/sonalyze/cmd/uptime/perform.go b/code/sonalyze/cmd/uptime/perform.go index c47facc6..89528a3a 100644 --- a/code/sonalyze/cmd/uptime/perform.go +++ b/code/sonalyze/cmd/uptime/perform.go @@ -95,8 +95,8 @@ func (uc *UptimeCommand) computeReports( fromIncl, toIncl := uc.InterpretFromToWithBounds(bounds) slices.SortStableFunc(samples, func(a, b sonarlog.Sample) int { - if a.Host != b.Host { - return cmp.Compare(a.Host.String(), b.Host.String()) + if a.Hostname != b.Hostname { + return cmp.Compare(a.Hostname.String(), b.Hostname.String()) } return cmp.Compare(a.Timestamp, b.Timestamp) }) @@ -118,7 +118,7 @@ func (uc *UptimeCommand) computeReports( if !uc.OnlyUp { reports = append(reports, &UptimeLine{ Device: "host", - Hostname: hostFirst.Host.String(), + Hostname: hostFirst.Hostname.String(), State: "down", Start: DateTimeValue(fromIncl), End: DateTimeValue(hostFirst.Timestamp), @@ -134,7 +134,7 @@ func (uc *UptimeCommand) computeReports( if !uc.OnlyUp { reports = append(reports, &UptimeLine{ Device: "host", - Hostname: hostFirst.Host.String(), + Hostname: hostFirst.Hostname.String(), State: "down", Start: DateTimeValue(hostLast.Timestamp), End: DateTimeValue(toIncl), @@ -165,7 +165,7 @@ func (uc *UptimeCommand) computeReports( if !uc.OnlyDown { reports = append(reports, &UptimeLine{ Device: "host", - Hostname: hostFirst.Host.String(), + Hostname: hostFirst.Hostname.String(), State: "up", Start: DateTimeValue(samples[windowStart].Timestamp), End: DateTimeValue(samples[j-1].Timestamp), @@ -188,7 +188,7 @@ func (uc *UptimeCommand) computeReports( if !uc.OnlyUp { reports = append(reports, &UptimeLine{ Device: "host", - Hostname: hostFirst.Host.String(), + Hostname: hostFirst.Hostname.String(), State: "down", Start: DateTimeValue(prevTimestamp), End: DateTimeValue(samples[j].Timestamp), @@ -215,7 +215,7 @@ func (uc *UptimeCommand) computeReports( if !(updown == "up" && uc.OnlyDown) && !(updown == "down" && uc.OnlyUp) { reports = append(reports, &UptimeLine{ Device: "gpu", - Hostname: samples[w.start].Host.String(), + Hostname: samples[w.start].Hostname.String(), State: updown, Start: DateTimeValue(samples[start].Timestamp), End: DateTimeValue(samples[min(w.end, i)].Timestamp), @@ -252,10 +252,10 @@ func (uc *UptimeCommand) computeHostWindows( // Collect the window hostStart := i hostEnd := i - host := samples[hostStart].Host + host := samples[hostStart].Hostname hostStr := host.String() i++ - for i < lim && samples[i].Host == host { + for i < lim && samples[i].Hostname == host { if samples[i].Timestamp <= toIncl { hostEnd = i } @@ -295,7 +295,7 @@ func (uc *UptimeCommand) computeAlwaysDown( hs[StringToUstr(h)] = true } for _, sample := range samples { - delete(hs, sample.Host) + delete(hs, sample.Hostname) } for h := range hs { if !hostGlobber.IsEmpty() && !hostGlobber.Match(h.String()) { diff --git a/code/sonalyze/cmd/uptime/print.go b/code/sonalyze/cmd/uptime/print.go index 93de47a9..e154c1f7 100644 --- a/code/sonalyze/cmd/uptime/print.go +++ b/code/sonalyze/cmd/uptime/print.go @@ -78,6 +78,11 @@ DEFAULTS default ELBAT*/ func (uc *UptimeCommand) printReports(out io.Writer, reports []*UptimeLine) error { + reports, err := ApplyQuery(uc.ParsedQuery, uptimeFormatters, uptimePredicates, reports) + if err != nil { + return err + } + slices.SortFunc(reports, func(a, b *UptimeLine) int { c := cmp.Compare(a.Hostname, b.Hostname) if c == 0 { diff --git a/code/sonalyze/cmd/uptime/uptime-table.go b/code/sonalyze/cmd/uptime/uptime-table.go index acf18482..3d98f299 100644 --- a/code/sonalyze/cmd/uptime/uptime-table.go +++ b/code/sonalyze/cmd/uptime/uptime-table.go @@ -61,6 +61,37 @@ func init() { DefAlias(uptimeFormatters, "End", "end") } +// MT: Constant after initialization; immutable +var uptimePredicates = map[string]Predicate[*UptimeLine]{ + "Device": Predicate[*UptimeLine]{ + Compare: func(d *UptimeLine, v any) int { + return cmp.Compare((d.Device), v.(string)) + }, + }, + "Hostname": Predicate[*UptimeLine]{ + Compare: func(d *UptimeLine, v any) int { + return cmp.Compare((d.Hostname), v.(string)) + }, + }, + "State": Predicate[*UptimeLine]{ + Compare: func(d *UptimeLine, v any) int { + return cmp.Compare((d.State), v.(string)) + }, + }, + "Start": Predicate[*UptimeLine]{ + Convert: CvtString2DateTimeValue, + Compare: func(d *UptimeLine, v any) int { + return cmp.Compare((d.Start), v.(DateTimeValue)) + }, + }, + "End": Predicate[*UptimeLine]{ + Convert: CvtString2DateTimeValue, + Compare: func(d *UptimeLine, v any) int { + return cmp.Compare((d.End), v.(DateTimeValue)) + }, + }, +} + type UptimeLine struct { Device string Hostname string diff --git a/code/sonalyze/db/sample.go b/code/sonalyze/db/sample.go index b717bd38..061369d3 100644 --- a/code/sonalyze/db/sample.go +++ b/code/sonalyze/db/sample.go @@ -77,7 +77,7 @@ type Sample struct { CpuTimeSec uint64 Version Ustr Cluster Ustr - Host Ustr + Hostname Ustr Cores uint32 User Ustr Job uint32 @@ -102,7 +102,7 @@ type Sample struct { type LoadDatum struct { Timestamp int64 - Host Ustr + Hostname Ustr Encoded []byte } @@ -110,7 +110,7 @@ type LoadDatum struct { type GpuDatum struct { Timestamp int64 - Host Ustr + Hostname Ustr Encoded []byte } @@ -603,7 +603,7 @@ LineLoop: samples = append(samples, &Sample{ Version: version, Timestamp: timestamp, - Host: hostname, + Hostname: hostname, Cores: numCores, MemtotalKB: memTotalKB, User: user, @@ -626,14 +626,14 @@ LineLoop: if load != nil { loadData = append(loadData, &LoadDatum{ Timestamp: timestamp, - Host: hostname, + Hostname: hostname, Encoded: load, }) } if gpuinfo != nil { gpuData = append(gpuData, &GpuDatum{ Timestamp: timestamp, - Host: hostname, + Hostname: hostname, Encoded: gpuinfo, }) } diff --git a/code/sonalyze/db/sample_test.go b/code/sonalyze/db/sample_test.go index 2e7d510d..3eedee44 100644 --- a/code/sonalyze/db/sample_test.go +++ b/code/sonalyze/db/sample_test.go @@ -32,7 +32,7 @@ func TestParseSonarLogTagged(t *testing.T) { t.Errorf("Expected 5 readings, got %d", len(readings)) } x := readings[0] - if x.Host.String() != "ml4.hpc.uio.no" || x.User.String() != "root" || x.Cmd.String() != "tuned" { + if x.Hostname.String() != "ml4.hpc.uio.no" || x.User.String() != "root" || x.Cmd.String() != "tuned" { t.Errorf("First record is bogus: %v", x) } if (x.Flags & FlagHeartbeat) != 0 { @@ -72,7 +72,7 @@ func TestParseSonarLogUntagged(t *testing.T) { t.Errorf("Expected 2 readings, got %d", len(readings)) } x := readings[0] - if x.Host.String() != "ml3.hpc.uio.no" || x.User.String() != "larsbent" || x.Cmd.String() != "python" { + if x.Hostname.String() != "ml3.hpc.uio.no" || x.User.String() != "larsbent" || x.Cmd.String() != "python" { t.Errorf("First record is bogus: %v", x) } if (x.Flags & FlagHeartbeat) != 0 { diff --git a/code/sonalyze/db/samplefilter.go b/code/sonalyze/db/samplefilter.go index 05dd148e..7afa8698 100644 --- a/code/sonalyze/db/samplefilter.go +++ b/code/sonalyze/db/samplefilter.go @@ -254,7 +254,7 @@ func InstantiateSampleFilter(recordFilter *SampleFilter) func(*Sample) bool { return false } case testIncludeHosts: - if !recordFilter.IncludeHosts.Match(e.Host.String()) { + if !recordFilter.IncludeHosts.Match(e.Hostname.String()) { return false } case testIncludeSingleCommand: diff --git a/code/sonalyze/sonarlog/postprocess.go b/code/sonalyze/sonarlog/postprocess.go index cb338238..b97d365d 100644 --- a/code/sonalyze/sonarlog/postprocess.go +++ b/code/sonalyze/sonarlog/postprocess.go @@ -65,7 +65,7 @@ func standardSampleRectifier(xs []*db.Sample, cfg *config.ClusterConfig) []*db.S return xs } - conf := cfg.LookupHost(xs[0].Host.String()) + conf := cfg.LookupHost(xs[0].Hostname.String()) if conf == nil { return xs } @@ -108,13 +108,13 @@ func createInputStreams( for _, entries := range entryBlobs { for _, e := range entries { if wantBounds { - if bound, found := bounds[e.Host]; found { - bounds[e.Host] = Timebound{ + if bound, found := bounds[e.Hostname]; found { + bounds[e.Hostname] = Timebound{ Earliest: min(bound.Earliest, e.Timestamp), Latest: max(bound.Latest, e.Timestamp), } } else { - bounds[e.Host] = Timebound{ + bounds[e.Hostname] = Timebound{ Earliest: e.Timestamp, Latest: e.Timestamp, } @@ -125,7 +125,7 @@ func createInputStreams( continue } - key := InputStreamKey{e.Host, streamId(e), e.Cmd} + key := InputStreamKey{e.Hostname, streamId(e), e.Cmd} if stream, found := streams[key]; found { *stream = append(*stream, Sample{Sample: e}) } else { @@ -266,14 +266,14 @@ func rectifyLoadData(dataBlobs [][]*db.LoadDatum) (streams LoadDataSet, bounds T Time: d.Timestamp, Decoded: decoded, } - if stream, found := streams[d.Host]; found { + if stream, found := streams[d.Hostname]; found { stream.Data = append(stream.Data, datum) } else { stream := LoadData{ - Host: d.Host, - Data: []LoadDatum{datum}, + Hostname: d.Hostname, + Data: []LoadDatum{datum}, } - streams[d.Host] = &stream + streams[d.Hostname] = &stream } } } @@ -380,14 +380,14 @@ func rectifyGpuData(dataBlobs [][]*db.GpuDatum) (streams GpuDataSet, bounds Time Time: d.Timestamp, Decoded: decoded, } - if stream, found := streams[d.Host]; found { + if stream, found := streams[d.Hostname]; found { stream.Data = append(stream.Data, datum) } else { stream := GpuData{ - Host: d.Host, - Data: []GpuDatum{datum}, + Hostname: d.Hostname, + Data: []GpuDatum{datum}, } - streams[d.Host] = &stream + streams[d.Hostname] = &stream } } } diff --git a/code/sonalyze/sonarlog/synthesize.go b/code/sonalyze/sonarlog/synthesize.go index 1841a6ba..a77a16ee 100644 --- a/code/sonalyze/sonarlog/synthesize.go +++ b/code/sonalyze/sonarlog/synthesize.go @@ -152,7 +152,7 @@ func MergeByJob(streams InputStreamSet, bounds Timebounds) (SampleStreams, Timeb // Initialize the set of new bounds with the zero jobs for _, z := range zero { - hostname := (*z)[0].Host + hostname := (*z)[0].Hostname if _, found := newBounds[hostname]; !found { probe, found := bounds[hostname] if !found { @@ -240,7 +240,7 @@ func MergeAcrossHostsByTime(streams SampleStreams) SampleStreams { return streams } names := slices.Map(streams, func(s *SampleStream) string { - return (*s)[0].Host.String() + return (*s)[0].Hostname.String() }) hostname := StringToUstr(strings.Join(hostglob.CompressHostnames(names), ",")) tmp := mergeStreams( @@ -618,7 +618,7 @@ func sumRecords( Sample: &db.Sample{ Version: version, Timestamp: timestamp, - Host: hostname, + Hostname: hostname, User: username, Job: jobId, Cmd: command, @@ -674,7 +674,7 @@ func foldSamples(samples SampleStream, truncTime func(int64) int64) SampleStream r := sumRecords( v000, t0, - s0.Host, + s0.Hostname, merged, 0, merged, diff --git a/code/sonalyze/sonarlog/types.go b/code/sonalyze/sonarlog/types.go index 887aff55..e159db35 100644 --- a/code/sonalyze/sonarlog/types.go +++ b/code/sonalyze/sonarlog/types.go @@ -65,8 +65,8 @@ type InputStreamSet map[InputStreamKey]*SampleStream // Per-cpu load data, expanded. type LoadData struct { - Host Ustr - Data []LoadDatum + Hostname Ustr + Data []LoadDatum } type LoadDatum struct { @@ -98,8 +98,8 @@ type LoadDataSet map[Ustr]*LoadData // Ditto for GPU data type GpuData struct { - Host Ustr - Data []GpuDatum // one per timestamp + Hostname Ustr + Data []GpuDatum // one per timestamp } type PerGpuDatum struct { diff --git a/code/sonalyze/table/.gitignore b/code/sonalyze/table/.gitignore new file mode 100644 index 00000000..26520536 --- /dev/null +++ b/code/sonalyze/table/.gitignore @@ -0,0 +1 @@ +y.output diff --git a/code/sonalyze/table/data.go b/code/sonalyze/table/data.go index cf8ae56e..69dbe7eb 100644 --- a/code/sonalyze/table/data.go +++ b/code/sonalyze/table/data.go @@ -6,12 +6,14 @@ package table import ( "fmt" "math" + "regexp" "slices" "strconv" "strings" "time" "go-utils/gpuset" + "go-utils/hostglob" . "sonalyze/common" ) @@ -258,3 +260,295 @@ func FormatYyyyMmDdHhMmUtc(t int64) string { func FormatIsoUtc(t int64) string { return time.Unix(t, 0).UTC().Format(time.RFC3339) } + +func FormatHostnames(x *Hostnames, ctx PrintMods) string { + if ctx&PrintModFixed != 0 { + return x.FormatBrief() + } + return x.FormatFull() +} + +// This returns error to conform to an interface but never returns non-nil error. +func CvtString2Strings(s string) (any, error) { + if s == "" { + return make([]string, 0), nil + } + ss := strings.Split(s, ",") + // Sorted is required by SetCompareStrings + slices.Sort(ss) + return ss, nil +} + +func CvtString2GpuSet(s string) (any, error) { + return gpuset.NewGpuSet(s) +} + +func CvtString2Ustr(s string) (any, error) { + return StringToUstr(s), nil +} + +func CvtString2UstrMax30(s string) (any, error) { + return StringToUstr(s), nil +} + +func CvtString2IsoDateTimeValue(s string) (any, error) { + t, err := time.Parse(time.RFC3339, s) + if err != nil { + return int64(0), err + } + return t.Unix(), nil +} + +func CvtString2IsoDateTimeOrUnknown(s string) (any, error) { + return CvtString2IsoDateTimeValue(s) +} + +var durationRe = regexp.MustCompile(`^((\d+)[wW])?((\d+)[dD])?((\d+)[hH])?((\d+)[mM])?$`) + +// The value is seconds as int64 +func CvtString2DurationValue(s string) (any, error) { + m := durationRe.FindStringSubmatch(s) + if m == nil { + seconds, err := strconv.Atoi(s) + if err != nil { + return 0, fmt.Errorf("Bad duration %s", s) + } + return int64(seconds), nil + } + var weeks, days, hours, minutes int + if m[1] != "" { + weeks, _ = strconv.Atoi(m[2]) + } + if m[3] != "" { + days, _ = strconv.Atoi(m[4]) + } + if m[5] != "" { + hours, _ = strconv.Atoi(m[6]) + } + if m[7] != "" { + minutes, _ = strconv.Atoi(m[8]) + } + return (((int64(weeks)*7+int64(days))*24+int64(hours))*60 + int64(minutes)) * 60, nil +} + +func CvtString2U32Duration(s string) (any, error) { + d, err := CvtString2DurationValue(s) + if err != nil { + return uint32(0), err + } + return uint32(d.(int64)), nil +} + +func CvtString2DateTimeValue(s string) (any, error) { + t, err := time.Parse(time.DateTime, s) + if err != nil { + return int64(0), err + } + return t.Unix(), nil +} + +func CvtString2DateTimeValueOrBlank(s string) (any, error) { + return CvtString2DateTimeValue(s) +} + +func CvtString2DateValue(s string) (any, error) { + t, err := time.Parse(time.DateOnly, s) + if err != nil { + return int64(0), err + } + return t.Unix(), nil +} + +func CvtString2TimeValue(s string) (any, error) { + t, err := time.Parse(time.TimeOnly, s) + if err != nil { + return int64(0), err + } + return t.Unix(), nil +} + +// TODO: Really needs to be case-insensitive +func CvtString2Bool(s string) (any, error) { + switch s { + case "true", "yes", "1": + return true, nil + case "false", "no", "0": + return false, nil + default: + return nil, fmt.Errorf("Not a boolean value: %s", s) + } +} + +func CvtString2Int(s string) (any, error) { + i, err := strconv.ParseInt(s, 10, 64) + if err != nil { + return nil, err + } + return int(i), nil +} + +func CvtString2Int64(s string) (any, error) { + i, err := strconv.ParseInt(s, 10, 64) + if err != nil { + return nil, err + } + return i, nil +} + +func CvtString2Uint8(s string) (any, error) { + i, err := strconv.ParseUint(s, 10, 8) + if err != nil { + return nil, err + } + return uint8(i), nil +} + +func CvtString2Uint32(s string) (any, error) { + i, err := strconv.ParseUint(s, 10, 32) + if err != nil { + return nil, err + } + return uint32(i), nil +} + +func CvtString2Uint64(s string) (any, error) { + i, err := strconv.ParseUint(s, 10, 64) + if err != nil { + return nil, err + } + return i, nil +} + +func CvtString2Float32(s string) (any, error) { + i, err := strconv.ParseFloat(s, 32) + if err != nil { + return nil, err + } + return float32(i), nil +} + +func CvtString2Float64(s string) (any, error) { + i, err := strconv.ParseFloat(s, 64) + if err != nil { + return nil, err + } + return i, nil +} + +func CvtString2Hostnames(s string) (any, error) { + ps, err := hostglob.SplitMultiPattern(s) + if err != nil { + return nil, err + } + xs := make([]string, 0) + for _, p := range ps { + ss, err := hostglob.ExpandPattern(p) + if err != nil { + return nil, err + } + xs = append(xs, ss...) + } + return xs, nil +} + +// true > false +func CompareBool(b1, b2 bool) int { + if b1 == b2 { + return 0 + } + if b1 { + return 1 + } + return -1 +} + +func SetCompareGpuSets(a, b gpuset.GpuSet, op int) bool { + switch op { + case opEq: + return a.Equal(b) + case opLt: + return b.HasSubset(a, true) + case opLe: + return b.HasSubset(a, false) + case opGt: + return a.HasSubset(b, true) + case opGe: + return a.HasSubset(b, false) + default: + panic("Unknown op") + } +} + +// This is special purpose. `a` comes from the data record and is assumed to have (fully) qualified +// host names, while `b` comes from CvtString2Hostnames and will typically have just prefixes. The +// meaning of A= /gpu-[1-3]/ will find jobs that +// use at least gpu-1.fox, gpu-2.fox and gpu-3.fox; Hosts <= /gpu-[1-3]/ will find jobs that use no +// other nodes than those three, and may use just one of them. +// +// When printing to the "brief" form, only "a" is printed even if a.b.c (and thus also a.b) was in +// the set. +// +// When printing to the "full" form, only "a.b.c" is printed, the shorter variants are ignored. +// +// Thus for every name in the set there is a "brief" form (first element) and a "full" form (all +// elements). +// +// Set equality: A = B if every full name of B is found in A and a (possibly improper) prefix of +// every full name in A is found by this comparison. Note A = B does not imply B = A and that if +// A={a.b.c.d,a.b.c.e} and B={a} then A = B since {a} touches both the names in A. +// +// Subset: B < A if every full name of B is found in A. +// +// There is a simple way of thinking about the set relations. Consider each set as a nested map, +// where the first element maps to a set of the suffixes: A={a => {b.c.d, b.c.e}} and B={a => {}} in +// the case above. The nesting continues, so in the end A={a => {b => {c => {d => {}, e => {}}}}}. +// +// B < A if at every nesting level in a traversal of A and B every member of B is represented in the +// corresponding member of A, ie, the members of B are a subset of the members of A at that level, +// and at least one of those subsets is proper. [This is dodgy, elaborate further.] +// +// A = B if at every nesting level, the members of A and B at some nesting level are equal. +// Traversal is driven by the values in B and stops when those are exhausted. +// +// Consider A={a.x, a.c, b.c} and B={a, b}, which is to say +// +// A = {a => {x => {}, c => {}}, b => {c => {}}} +// B = {a => {}, b => {}} +// +// Here the first level of A and the first level of B are the same {a,b} so we continue. None +// of the values in B have nonempty sets so we're done. +// +// More complicated, consider B = {a, b.c}: +// +// A = {a => {x => {}, c => {}}, b => {c => {}}} +// B = {a => {}, b => {c => {}}} +// +// The first levels are the same, we descend to the second level for "b", the sets are the same {c} +// and the values are empty and we're done: they're equal, as desired. +// +// +// Under typical circumstances (queries): +// +// - the lhs set will be created once for each row in the table and then used for a single +// comparison before being discarded +// - the rhs set will be created once for a ton of rows and then used for many comparisons +// - the lhs set will typically be very small, often only one full name +// - the rhs set can be of more variable size, but will often have only brief names +// - the lhs set will be printed at most once +// - the rhs set will not be printed at all +// +// Consider the query `Hostnames <= /gpu-[1-100]/` (to filter jobs that ran only on GPU nodes) as +// somewhat typical though often the rhs will not be that big. + +package table + +import ( + "slices" + "strings" + + umaps "go-utils/maps" +) + +// The data structure to support this is a tree where the sources are all the first elements of all +// the names in the set and the sinks are all the full names. + +type node struct { + me string + back *node + next map[string]*node +} + +func makeNode(me string, back *node) *node { + return &node{ + me: me, + back: back, + next: make(map[string]*node), + } +} + +type set struct { + all []*node // all nodes in the set + sources *node // head node with me == "" and not in all + lazySinks []*node // inserts will clear this, reconstructed as needed +} + +func makeSet() *set { + return &set{ + all: make([]*node, 0), // head node not here + sources: makeNode("", nil), // head node + lazySinks: nil, + } +} + +// Descend in the tree and add the node. Dirty the set and return true if new information was +// added. +func (s *set) add(elements []string) bool { + n := s.sources + added := false + for _, e := range elements { + if probe := n.next[e]; probe != nil { + n = probe + } else { + added = true + x := makeNode(e, n) + s.all = append(s.all, x) + n.next[e] = x + n = x + } + } + if added { + s.lazySinks = nil + } + return added +} + +// Descend in the tree and see if we get to a node, return true if so. +func (s *set) lookup(elements []string) bool { + n := s.sources + for _, e := range elements { + probe := n.next[e] + if probe == nil { + return false + } + n = probe + } + return true +} + +func (s *set) sinks() []*node { + if s.lazySinks == nil { + sinks := make([]*node, 0) + for _, n := range s.all { + if len(n.next) == 0 { + sinks = append(sinks, n) + } + } + s.lazySinks = sinks + } + return s.lazySinks +} + +type Hostnames struct { + s *set + serial uint +} + +func NewHostnames() *Hostnames { + return &Hostnames{ + s: makeSet(), + } +} + +func (h *Hostnames) Add(hostname string) { + if h.s.add(h.splitName(hostname)) { + h.serial++ + } +} + +func (h *Hostnames) HasElement(hostname string) bool { + return h.s.lookup(h.splitName(hostname)) +} + +func (this *Hostnames) Equal(that *Hostnames) bool { + return setCompare(this, that) == 0 +} + +// A > B if we can traverse B and A together and reach every node in A that is in B. +// +// A >= B if A > B or A = B. +// +// A = B if we can traverse B and A together and reach every node in A that is in B, and if from the +// terminal nodes of that traversal we can continue to walk down the graph to the sinks and we reach +// all the sinks that way. +// +// This is equivalent to saying that when we at every node of A that is reached by members of B, +// every outgoing edge is touched, and there are no members of B that do not touch an outgoing edge, +// or in other words, the set of first elements of B equals (in the normal sense) the set of first +// elements of A. +// +// Consider A = {a.b, a.c, b.x} and B = {a}. At level 1, A has two outgoing edges, for a and b. B +// touches only one of them, and B is therefore a subset, not equal. +// +// It could be that we just need to be talking about the set of leading elements of a and b... that +// would be a blessing. + +func (a *Hostnames) HasSubset(b *Hostnames, proper bool) bool { + if proper { + return properSubset(a.s, b.s) + } + return false +} + +func properSubset(a, b *set) bool { + return false +} + +// B is a subset of A if we can traverse B and A together and reach every node in A that is in B. +// +// B is "equal" to A if + +// This checks whether `that` is a subset of `this`, equal to `this`, or neither (superset or +// incomparable). It returns -1 for the first, 0 for the second, and 1 for the last. + +func setCompare(this, that *Hostnames) int { + panic("NYI") +} + +// Returns a string that is a comma-separated lists of the first elements of all the hosts in the +// set, in sorted order, without compression. This is precisely the set of names in the map of +// the head node. + +func (h *Hostnames) FormatBrief() string { + xs := umaps.Keys(h.s.sources.next) + slices.Sort(xs) + return strings.Join(xs, ",") +} + +// Returns a string that is a comma-separated lists of the hosts in the set, in sorted order, +// without compression. For this, we need the sinks. From each sink we walk the graph backward to +// the root, constructing full names as we go. + +func (h *Hostnames) FormatFull() string { + xs := make([]string, 0) + for _, n := range h.s.sinks() { + x := "" + for { + x = n.me + x + if n.back.me == "" { + break + } + x = "." + x + n = n.back + } + xs = append(xs, x) + } + slices.Sort(xs) + return strings.Join(xs, ",") +} + +func (h *Hostnames) splitName(hostname string) []string { + elements := strings.Split(hostname, ".") + if len(elements[0]) == 0 { + return nil + } + elements = slices.DeleteFunc(elements, func(x string) bool { + return x == "" + }) + return elements +} diff --git a/code/sonalyze/table/hosts_test.go b/code/sonalyze/table/hosts_test.go new file mode 100644 index 00000000..f609c385 --- /dev/null +++ b/code/sonalyze/table/hosts_test.go @@ -0,0 +1,160 @@ +package table + +import ( + "fmt" + "testing" +) + +var ( + _ fmt.Formatter +) + +func TestSet(t *testing.T) { + s := makeSet() + s.add([]string{"a","b"}) + if !s.lookup([]string{"a"}) { + t.Fatal("lookup") + } + if !s.lookup([]string{"a","b"}) { + t.Fatal("lookup") + } + if s.lookup([]string{"b"}) { + t.Fatal("lookup") + } + s.add([]string{"a"}) + s.add([]string{"a","b","c"}) + s.add([]string{"a","b","d"}) + if !s.lookup([]string{"a"}) { + t.Fatal("lookup") + } + if !s.lookup([]string{"a","b"}) { + t.Fatal("lookup") + } + if !s.lookup([]string{"a","b","c"}) { + t.Fatal("lookup") + } + if !s.lookup([]string{"a","b","d"}) { + t.Fatal("lookup") + } + if s.lookup([]string{"b"}) { + t.Fatal("lookup") + } + if s.lookup([]string{"a","c"}) { + t.Fatal("lookup") + } + sinks := s.sinks() + if len(sinks) != 2 { + t.Fatal("sinks") + } + counts := make(map[string]int) + for _, x := range sinks { + counts[x.me]++ + } + if counts["c"] != 1 { + t.Fatal("count") + } + if counts["d"] != 1 { + t.Fatal("count") + } +} + +func TestHostnames(t *testing.T) { + // Basic test + h := NewHostnames() + h.Add("a.b.c") + h.Add("x.b.c") + h.Add("y.c") + n := h.FormatBrief() + if n != "a,x,y" { + t.Fatal(n) + } + n = h.FormatFull() + if n != "a.b.c,x.b.c,y.c" { + t.Fatal(n) + } + for _, x := range []string{"a","a.b","a.b.c", "x","x.b","x.b.c"} { + if !h.HasElement(x) { + t.Fatal(x) + } + } + + // Add something that was there, data should not change + x := h.serial + h.Add("a.b.c") + n = h.FormatFull() + if n != "a.b.c,x.b.c,y.c" { + t.Fatal(n) + } + h.Add("a.b") + n = h.FormatFull() + if n != "a.b.c,x.b.c,y.c" { + t.Fatal(n) + } + h.Add("a") + n = h.FormatFull() + if n != "a.b.c,x.b.c,y.c" { + t.Fatal(n) + } + if h.serial != x { + t.Fatal("Changed") + } + + // Add a longer name, it replaces the existing entry + x = h.serial + h.Add("a.b.c.d") + if h.serial == x { + t.Fatal("Unchanged") + } + n = h.FormatFull() + if n != "a.b.c.d,x.b.c,y.c" { + t.Fatal(n) + } + if !h.HasElement("a.b.c.d") { + t.Fatal("a.b.c.d") + } + + // Add a new name, it creates a new entry + x = h.serial + h.Add("a.b.c.e") + if h.serial == x { + t.Fatal("Unchanged") + } + n = h.FormatFull() + if n != "a.b.c.d,a.b.c.e,x.b.c,y.c" { + t.Fatal(n) + } + n = h.FormatBrief() + if n != "a,x,y" { + t.Fatal(n) + } + if !h.HasElement("a.b.c.e") { + t.Fatal("a.b.c.e") + } + + /* + if setCompare(h, h) != 0 { + t.Fatal("Self-equal") + } + + briefs := NewHostnames() + briefs.Add("a") + briefs.Add("x") + if setCompare(h, briefs) != -1 { + t.Fatal("Brief subset") + } + briefs.Add("y") + if setCompare(h, briefs) != 0 { + println(h.FormatBrief()) + println(h.FormatFull()) + t.Fatal("Brief subset", setCompare(h, briefs)) + } + + sub := NewHostnames() + sub.Add("a.b") + sub.Add("x") + sub.Add("y.c") + if setCompare(h, sub) != -1 { + t.Fatal("Subset") + } + */ +} diff --git a/code/sonalyze/table/perform.go b/code/sonalyze/table/perform.go new file mode 100644 index 00000000..6e86e0b5 --- /dev/null +++ b/code/sonalyze/table/perform.go @@ -0,0 +1,29 @@ +// Query abstraction + +// The query runs on the generated table rows. The *names* are always relative to the printed table +// rows. And the *input* to the query predicate is such a table row. So the query can't be applied +// until those rows have been computed. This makes most sense from the user's point of view also. +// But it may limit applicability. + +package table + +import ( + "fmt" + "slices" +) + +func ApplyQuery[T any]( + q PNode, + formatters map[string]Formatter[T], + predicates map[string]Predicate[T], + records []T, +) ([]T, error) { + if q != nil { + queryNeg, err := CompileQueryNeg(formatters, predicates, q) + if err != nil { + return nil, fmt.Errorf("Could not compile query: %v", err) + } + records = slices.DeleteFunc(records, queryNeg) + } + return records, nil +} diff --git a/code/sonalyze/table/query.go b/code/sonalyze/table/query.go new file mode 100644 index 00000000..b18ad2cc --- /dev/null +++ b/code/sonalyze/table/query.go @@ -0,0 +1,230 @@ +// Simple query compiler. + +package table + +import ( + "fmt" + "regexp" +) + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Predicate table. +// +// The table generator will generate a table of converters and predicates for every field. +// +// The Convert function, if not nil, converts a string value supplied as part of the query text to a +// value of the appropriate type, but represented as an `any`. The value will be converted once, +// during compilation. +// +// The Compare predicate takes a table row of the appropriate type, and the converted value, and +// returns -1, 0, or 1 depending on the value of the field in relation to the argument value. +// +// The SetCompare predicate takes a table row of the appropriate type, and the converted value, and +// an operation from the set =, <, <=, >, >=, encoded as 1, 2, 3, 4, 5 respectively, and returns +// true if the field and the value have the corresponding set relation (equal, proper subset, +// improper subset, proper superset, improper superset). + +type Predicate[T any] struct { + Convert func(d string) (any, error) + Compare func(d T, v any) int + SetCompare func(d T, v any, op int) bool +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Syntax trees. +// +// Parsed queries are represented as PNode instances, all of them are tagged with a POpXx. + +const ( + // The value 0 is never a valid opcode. + opEq = 1 + iota + opLt + opLe + opGt + opGe + opMatch + opAnd + opOr + opNot +) + +var pop2op = [...]string{ + "*BAD*", + "=", + "<", + "<=", + ">", + ">=", + "=~", + "and", + "or", + "not", +} + +type PNode fmt.Stringer + +type unaryOp struct { + op int + opd PNode +} + +func (b *unaryOp) String() string { + return fmt.Sprintf("(%s %s)", pop2op[b.op], b.opd) +} + +type logicalOp struct { + op int + lhs, rhs PNode +} + +func (b *logicalOp) String() string { + return fmt.Sprintf("(%s %s %s)", pop2op[b.op], b.lhs, b.rhs) +} + +type binaryOp struct { + op int + field, value string +} + +func (b *binaryOp) String() string { + return fmt.Sprintf("(%s %s %s)", pop2op[b.op], b.field, b.value) +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Query parsing + +func ParseQuery(input string) (PNode, error) { + parser, err := newQueryParser(input) + if err != nil { + return nil, err + } + return parser.Parse() +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Query generator. +// +// The compilers take both predicates and formatters because the =~ operator needs to format the lhs +// to be able to match it against the rhs. + +// The returned predicate returns true iff the test passed. + +func CompileQuery[T any]( + formatters map[string]Formatter[T], + predicates map[string]Predicate[T], + q PNode, +) (func(d T) bool, error) { + return compileQuery(formatters, predicates, q) +} + +// The returned predicate returns false iff the test passed. +// +// TODO: It's possible to avoid wrapping the predicate here. + +func CompileQueryNeg[T any]( + formatters map[string]Formatter[T], + predicates map[string]Predicate[T], + q PNode, +) (func(d T) bool, error) { + query, err := compileQuery(formatters, predicates, q) + if err != nil { + return nil, err + } + return func(d T) bool { return !query(d) }, nil +} + +func compileQuery[T any]( + formatters map[string]Formatter[T], + predicates map[string]Predicate[T], + q PNode, +) (func(d T) bool, error) { + switch l := q.(type) { + case *logicalOp: + lhs, err := compileQuery(formatters, predicates, l.lhs) + if err != nil { + return nil, err + } + rhs, err := compileQuery(formatters, predicates, l.rhs) + if err != nil { + return nil, err + } + switch l.op { + case opAnd: + return func(d T) bool { return lhs(d) && rhs(d) }, nil + case opOr: + return func(d T) bool { return lhs(d) || rhs(d) }, nil + default: + panic("Unknown op") + } + case *unaryOp: + opd, err := compileQuery(formatters, predicates, l.opd) + if err != nil { + return nil, err + } + switch l.op { + case opNot: + return func(d T) bool { return !opd(d) }, nil + default: + panic("Unknown op") + } + case *binaryOp: + if l.op == opMatch { + format, found := formatters[l.field] + if !found { + return nil, fmt.Errorf("Field not found: %s", l.field) + } + re, err := regexp.Compile(l.value) + if err != nil { + return nil, err + } + formatter := format.Fmt + return func(d T) bool { return re.MatchString(formatter(d, 0)) }, nil + } + + p, found := predicates[l.field] + if !found { + return nil, fmt.Errorf("Field not found: %s", l.field) + } + var value any + if p.Convert != nil { + v, err := p.Convert(l.value) + if err != nil { + return nil, err + } + value = v + } else { + value = l.value + } + if p.SetCompare != nil { + setCompare := p.SetCompare + op := l.op + switch op { + case opEq, opLt, opLe, opGt, opGe: + return func(d T) bool { return setCompare(d, value, op) }, nil + default: + panic("Unknown op") + } + } + compare := p.Compare + switch l.op { + case opEq: + return func(d T) bool { return compare(d, value) == 0 }, nil + case opLt: + return func(d T) bool { return compare(d, value) < 0 }, nil + case opLe: + return func(d T) bool { return compare(d, value) <= 0 }, nil + case opGt: + return func(d T) bool { return compare(d, value) > 0 }, nil + case opGe: + return func(d T) bool { return compare(d, value) >= 0 }, nil + default: + panic("Unknown op") + } + default: + panic("Bad operator type") + } +} diff --git a/code/sonalyze/table/query_test.go b/code/sonalyze/table/query_test.go new file mode 100644 index 00000000..105bd53a --- /dev/null +++ b/code/sonalyze/table/query_test.go @@ -0,0 +1,145 @@ +package table + +import ( + _ "fmt" + "testing" +) + +func TestLexer(t *testing.T) { + p, err := newQueryParser( + " = <= < >= > =~ (and) or not andor \"and\" 'or' /not/ `zappa` hi1 ho2 " + + "10 10.5 -10.5e+7 -10e8 5w 4d 3h 2m 5w2m 3d2m", + ) + assertNotErr(t, err) + ts := p.tokens + //fmt.Println(ts) + toks := []int{ + tEq, tLe, tLt, tGe, tGt, tMatch, tLparen, tAnd, + tRparen, tOr, tNot, tIdent, tString, tString, tString, tString, tIdent, tIdent, + tString, tString, tString, tString, tString, tString, tString, tString, tString, tString, + } + strs := []string{ + "andor", "and", "or", "not", "zappa", "hi1", "ho2", "10", "10.5", "-10.5e+7", "-10e8", + "5w", "4d", "3h", "2m", "5w2m", "3d2m", + } + j := 0 + assertEq(t, len(toks), len(ts)) + for i := range toks { + assertEq(t, ts[i].tok, toks[i]) + if toks[i] == tString || toks[i] == tIdent { + assertEq(t, ts[i].text, strs[j]) + j++ + } + } +} + +func TestParser(t *testing.T) { + // Basic expr + check that andor is not split as and and or + n, err := ParseQuery(`a=andor`) + assertNotErr(t, err) + bin := n.(*binaryOp) + assertEq(t, bin.op, opEq) + assertEq(t, bin.field, "a") + assertEq(t, bin.value, "andor") + + // Operators + some space and quote stuff + n, err = ParseQuery(`a< 10`) + assertNotErr(t, err) + bin = n.(*binaryOp) + assertEq(t, bin.op, opLt) + assertEq(t, bin.value, "10") + + n, err = ParseQuery(`a <= "="`) + assertNotErr(t, err) + bin = n.(*binaryOp) + assertEq(t, bin.op, opLe) + assertEq(t, bin.value, "=") + + // Identifiers are strings, in the right context + n, err = ParseQuery(`a <= abracadabra`) + assertNotErr(t, err) + bin = n.(*binaryOp) + assertEq(t, bin.op, opLe) + assertEq(t, bin.value, "abracadabra") + + n, err = ParseQuery(`abc >'10.('`) + assertNotErr(t, err) + bin = n.(*binaryOp) + assertEq(t, bin.op, opGt) + assertEq(t, bin.field, "abc") + assertEq(t, bin.value, "10.(") + + n, err = ParseQuery(`abc0 >=/hi ho/ `) + assertNotErr(t, err) + bin = n.(*binaryOp) + assertEq(t, bin.op, opGe) + assertEq(t, bin.field, "abc0") + assertEq(t, bin.value, "hi ho") + + n, err = ParseQuery(` abc0 >= 37.5`) + assertNotErr(t, err) + bin = n.(*binaryOp) + assertEq(t, bin.op, opGe) + assertEq(t, bin.field, "abc0") + assertEq(t, bin.value, "37.5") + + // The rightparen is not part of the string literal. The + is required so as to + // not interpret the = as an operator. + n, err = ParseQuery("(ab <= `+=`)") + assertNotErr(t, err) + bin = n.(*binaryOp) + assertEq(t, bin.op, opLe) + assertEq(t, bin.field, "ab") + assertEq(t, bin.value, "+=") + + n, err = ParseQuery(`User =~ /ec-[x-z]*/`) + assertNotErr(t, err) + bin = n.(*binaryOp) + assertEq(t, bin.op, opMatch) + assertEq(t, bin.field, "User") + assertEq(t, bin.value, "ec-[x-z]*") + + // The not binds to the =~ binop and then the and groups that tree and the > binop. + n, err = ParseQuery(`not User =~ /root|toor|zabbix/ and Duration > 1h`) + assertNotErr(t, err) + log := n.(*logicalOp) + assertEq(t, log.op, opAnd) + un := log.lhs.(*unaryOp) + assertEq(t, un.op, opNot) + bin = un.opd.(*binaryOp) + assertEq(t, bin.op, opMatch) + assertEq(t, bin.field, "User") + assertEq(t, bin.value, "root|toor|zabbix") + bin = log.rhs.(*binaryOp) + assertEq(t, bin.op, opGt) + assertEq(t, bin.field, "Duration") + assertEq(t, bin.value, "1h") + + // and binds tighter than or + n, err = ParseQuery(`User = u1 or User = u2 and Duration > 1h`) + assertNotErr(t, err) + log = n.(*logicalOp) + assertEq(t, log.op, opOr) + log = log.rhs.(*logicalOp) + assertEq(t, log.op, opAnd) + + // same + n, err = ParseQuery(`Duration > 1h and User = u1 or User = u2`) + assertNotErr(t, err) + log = n.(*logicalOp) + assertEq(t, log.op, opOr) + log = log.lhs.(*logicalOp) + assertEq(t, log.op, opAnd) +} + +func assertEq[T comparable](t *testing.T, a, b T) { + if a != b { + t.Fatalf("Unequal: %v %v", a, b) + } +} + +func assertNotErr(t *testing.T, err error) { + if err != nil { + t.Fatal(err) + } +} diff --git a/code/sonalyze/table/queryexpr.go b/code/sonalyze/table/queryexpr.go new file mode 100644 index 00000000..706e8f0d --- /dev/null +++ b/code/sonalyze/table/queryexpr.go @@ -0,0 +1,657 @@ +// Code generated by goyacc -o queryexpr.go queryexpr.y. DO NOT EDIT. + +//line queryexpr.y:31 +//go:generate goyacc -o queryexpr.go queryexpr.y + +package table + +import __yyfmt__ "fmt" + +//line queryexpr.y:33 + +import ( + "fmt" + "regexp" + "strings" +) + +//line queryexpr.y:43 +type yySymType struct { + yys int + text string + node PNode +} + +const tIdent = 57346 +const tString = 57347 +const tOr = 57348 +const tAnd = 57349 +const tEq = 57350 +const tLt = 57351 +const tLe = 57352 +const tGt = 57353 +const tGe = 57354 +const tMatch = 57355 +const tNot = 57356 +const tLparen = 57357 +const tRparen = 57358 + +var yyToknames = [...]string{ + "$end", + "error", + "$unk", + "tIdent", + "tString", + "tOr", + "tAnd", + "tEq", + "tLt", + "tLe", + "tGt", + "tGe", + "tMatch", + "tNot", + "tLparen", + "tRparen", +} + +var yyStatenames = [...]string{} + +const yyEofCode = 1 +const yyErrCode = 2 +const yyInitialStackSize = 16 + +//line queryexpr.y:78 + +type token struct { + tok int + text string +} + +type queryParser struct { + input string + tokens []token + errtxt string + expr PNode +} + +func (q *queryParser) Lex(lval *yySymType) (tok int) { + if len(q.tokens) == 0 { + tok = -1 + } else { + tok = q.tokens[0].tok + lval.text = q.tokens[0].text + q.tokens = q.tokens[1:] + } + return +} + +func (q *queryParser) Error(s string) { + if q.errtxt == "" { + q.errtxt = s + } +} + +func (q *queryParser) Parse() (PNode, error) { + r := yyParse(q) + if r != 0 { + return nil, fmt.Errorf("Can't parse %s: %s", q.input, q.errtxt) + } + return q.expr, nil +} + +var tokenRe = regexp.MustCompile( + strings.Join([]string{ + `(\s+)`, + `(<=|<|>=|>|=~|=|and|or|not|\(|\))`, + `([a-zA-Z_][a-zA-Z0-9_]*)`, + `"([^"]*)"`, + `'([^']*)'`, + `/([^/]*)/`, + "`([^`]*)`", + `(\d+[wW](?:\d+[dD])?(?:\d+[hH])?(?:\d+[mM])?)`, + `(\d+[dD](?:\d+[hH])?(?:\d+[mM])?)`, + `(\d+[hH](?:\d+[mM])?)`, + `(\d+[mM])`, + `(-?\d+(?:\.\d+)?(?:[eE][-+]?\d+)?)`, + `(.)`, + }, "|")) + +func init() { + tokenRe.Longest() +} + +const ( + spaces = 1 + punctuation = 2 + ident = 3 + firstString = 4 + lastString = 12 + bad = 13 +) + +var punct = map[string]int{ + "<": tLt, + "<=": tLe, + ">": tGt, + ">=": tGe, + "=": tEq, + "=~": tMatch, + "and": tAnd, + "or": tOr, + "not": tNot, + "(": tLparen, + ")": tRparen, +} + +func newQueryParser(input string) (*queryParser, error) { + m := tokenRe.FindAllStringSubmatch(input, -1) + if m == nil { + // This shouldn't actually happen: the regex should match every possible string. + return nil, fmt.Errorf("Can't lex %s", input) + } + tokens := make([]token, 0) + for _, tm := range m { + var t int + var text string + switch { + case tm[spaces] != "": + continue + case tm[ident] != "": + text = tm[ident] + t = tIdent + case tm[punctuation] != "": + text = tm[punctuation] + t = punct[text] + case tm[bad] != "": + return nil, fmt.Errorf("Bad character: %s", tm[bad]) + default: + for i := firstString; i <= lastString; i++ { + if tm[i] != "" { + text = tm[i] + t = tString + break + } + } + if t == 0 { + panic("Bad match") + } + } + tokens = append(tokens, token{t, text}) + } + return &queryParser{ + input: input, + tokens: tokens, + }, nil +} + +//line yacctab:1 +var yyExca = [...]int8{ + -1, 1, + 1, -1, + -2, 0, +} + +const yyPrivate = 57344 + +const yyLast = 43 + +var yyAct = [...]int8{ + 18, 6, 7, 7, 1, 0, 6, 7, 0, 0, + 0, 24, 25, 26, 27, 28, 29, 20, 19, 21, + 22, 0, 0, 0, 0, 4, 0, 23, 9, 10, + 11, 12, 13, 14, 2, 3, 5, 0, 8, 0, + 15, 16, 17, +} + +var yyPact = [...]int16{ + 21, -1000, -5, 21, 20, 21, 21, 21, -1000, 13, + 13, 13, 13, 13, 13, 0, -4, -1000, -1000, -1000, + -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, +} + +var yyPgo = [...]int8{ + 0, 4, 34, 0, +} + +var yyR1 = [...]int8{ + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 3, 3, 3, 3, 3, +} + +var yyR2 = [...]int8{ + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 1, 1, 1, 1, 1, +} + +var yyChk = [...]int16{ + -1000, -1, -2, 14, 4, 15, 6, 7, -2, 8, + 9, 10, 11, 12, 13, -2, -2, -2, -3, 5, + 4, 6, 7, 14, -3, -3, -3, -3, -3, 16, +} + +var yyDef = [...]int8{ + 0, -2, 1, 0, 0, 0, 0, 0, 2, 0, + 0, 0, 0, 0, 0, 0, 3, 4, 5, 12, + 13, 14, 15, 16, 6, 7, 8, 9, 10, 11, +} + +var yyTok1 = [...]int8{ + 1, +} + +var yyTok2 = [...]int8{ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, +} + +var yyTok3 = [...]int8{ + 0, +} + +var yyErrorMessages = [...]struct { + state int + token int + msg string +}{} + +//line yaccpar:1 + +/* parser for yacc output */ + +var ( + yyDebug = 0 + yyErrorVerbose = false +) + +type yyLexer interface { + Lex(lval *yySymType) int + Error(s string) +} + +type yyParser interface { + Parse(yyLexer) int + Lookahead() int +} + +type yyParserImpl struct { + lval yySymType + stack [yyInitialStackSize]yySymType + char int +} + +func (p *yyParserImpl) Lookahead() int { + return p.char +} + +func yyNewParser() yyParser { + return &yyParserImpl{} +} + +const yyFlag = -1000 + +func yyTokname(c int) string { + if c >= 1 && c-1 < len(yyToknames) { + if yyToknames[c-1] != "" { + return yyToknames[c-1] + } + } + return __yyfmt__.Sprintf("tok-%v", c) +} + +func yyStatname(s int) string { + if s >= 0 && s < len(yyStatenames) { + if yyStatenames[s] != "" { + return yyStatenames[s] + } + } + return __yyfmt__.Sprintf("state-%v", s) +} + +func yyErrorMessage(state, lookAhead int) string { + const TOKSTART = 4 + + if !yyErrorVerbose { + return "syntax error" + } + + for _, e := range yyErrorMessages { + if e.state == state && e.token == lookAhead { + return "syntax error: " + e.msg + } + } + + res := "syntax error: unexpected " + yyTokname(lookAhead) + + // To match Bison, suggest at most four expected tokens. + expected := make([]int, 0, 4) + + // Look for shiftable tokens. + base := int(yyPact[state]) + for tok := TOKSTART; tok-1 < len(yyToknames); tok++ { + if n := base + tok; n >= 0 && n < yyLast && int(yyChk[int(yyAct[n])]) == tok { + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) + } + } + + if yyDef[state] == -2 { + i := 0 + for yyExca[i] != -1 || int(yyExca[i+1]) != state { + i += 2 + } + + // Look for tokens that we accept or reduce. + for i += 2; yyExca[i] >= 0; i += 2 { + tok := int(yyExca[i]) + if tok < TOKSTART || yyExca[i+1] == 0 { + continue + } + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) + } + + // If the default action is to accept or reduce, give up. + if yyExca[i+1] != 0 { + return res + } + } + + for i, tok := range expected { + if i == 0 { + res += ", expecting " + } else { + res += " or " + } + res += yyTokname(tok) + } + return res +} + +func yylex1(lex yyLexer, lval *yySymType) (char, token int) { + token = 0 + char = lex.Lex(lval) + if char <= 0 { + token = int(yyTok1[0]) + goto out + } + if char < len(yyTok1) { + token = int(yyTok1[char]) + goto out + } + if char >= yyPrivate { + if char < yyPrivate+len(yyTok2) { + token = int(yyTok2[char-yyPrivate]) + goto out + } + } + for i := 0; i < len(yyTok3); i += 2 { + token = int(yyTok3[i+0]) + if token == char { + token = int(yyTok3[i+1]) + goto out + } + } + +out: + if token == 0 { + token = int(yyTok2[1]) /* unknown char */ + } + if yyDebug >= 3 { + __yyfmt__.Printf("lex %s(%d)\n", yyTokname(token), uint(char)) + } + return char, token +} + +func yyParse(yylex yyLexer) int { + return yyNewParser().Parse(yylex) +} + +func (yyrcvr *yyParserImpl) Parse(yylex yyLexer) int { + var yyn int + var yyVAL yySymType + var yyDollar []yySymType + _ = yyDollar // silence set and not used + yyS := yyrcvr.stack[:] + + Nerrs := 0 /* number of errors */ + Errflag := 0 /* error recovery flag */ + yystate := 0 + yyrcvr.char = -1 + yytoken := -1 // yyrcvr.char translated into internal numbering + defer func() { + // Make sure we report no lookahead when not parsing. + yystate = -1 + yyrcvr.char = -1 + yytoken = -1 + }() + yyp := -1 + goto yystack + +ret0: + return 0 + +ret1: + return 1 + +yystack: + /* put a state and value onto the stack */ + if yyDebug >= 4 { + __yyfmt__.Printf("char %v in %v\n", yyTokname(yytoken), yyStatname(yystate)) + } + + yyp++ + if yyp >= len(yyS) { + nyys := make([]yySymType, len(yyS)*2) + copy(nyys, yyS) + yyS = nyys + } + yyS[yyp] = yyVAL + yyS[yyp].yys = yystate + +yynewstate: + yyn = int(yyPact[yystate]) + if yyn <= yyFlag { + goto yydefault /* simple state */ + } + if yyrcvr.char < 0 { + yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval) + } + yyn += yytoken + if yyn < 0 || yyn >= yyLast { + goto yydefault + } + yyn = int(yyAct[yyn]) + if int(yyChk[yyn]) == yytoken { /* valid shift */ + yyrcvr.char = -1 + yytoken = -1 + yyVAL = yyrcvr.lval + yystate = yyn + if Errflag > 0 { + Errflag-- + } + goto yystack + } + +yydefault: + /* default state action */ + yyn = int(yyDef[yystate]) + if yyn == -2 { + if yyrcvr.char < 0 { + yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval) + } + + /* look through exception table */ + xi := 0 + for { + if yyExca[xi+0] == -1 && int(yyExca[xi+1]) == yystate { + break + } + xi += 2 + } + for xi += 2; ; xi += 2 { + yyn = int(yyExca[xi+0]) + if yyn < 0 || yyn == yytoken { + break + } + } + yyn = int(yyExca[xi+1]) + if yyn < 0 { + goto ret0 + } + } + if yyn == 0 { + /* error ... attempt to resume parsing */ + switch Errflag { + case 0: /* brand new error */ + yylex.Error(yyErrorMessage(yystate, yytoken)) + Nerrs++ + if yyDebug >= 1 { + __yyfmt__.Printf("%s", yyStatname(yystate)) + __yyfmt__.Printf(" saw %s\n", yyTokname(yytoken)) + } + fallthrough + + case 1, 2: /* incompletely recovered error ... try again */ + Errflag = 3 + + /* find a state where "error" is a legal shift action */ + for yyp >= 0 { + yyn = int(yyPact[yyS[yyp].yys]) + yyErrCode + if yyn >= 0 && yyn < yyLast { + yystate = int(yyAct[yyn]) /* simulate a shift of "error" */ + if int(yyChk[yystate]) == yyErrCode { + goto yystack + } + } + + /* the current p has no shift on "error", pop stack */ + if yyDebug >= 2 { + __yyfmt__.Printf("error recovery pops state %d\n", yyS[yyp].yys) + } + yyp-- + } + /* there is no state on the stack with an error shift ... abort */ + goto ret1 + + case 3: /* no shift yet; clobber input char */ + if yyDebug >= 2 { + __yyfmt__.Printf("error recovery discards %s\n", yyTokname(yytoken)) + } + if yytoken == yyEofCode { + goto ret1 + } + yyrcvr.char = -1 + yytoken = -1 + goto yynewstate /* try again in the same state */ + } + } + + /* reduction by production yyn */ + if yyDebug >= 2 { + __yyfmt__.Printf("reduce %v in:\n\t%v\n", yyn, yyStatname(yystate)) + } + + yynt := yyn + yypt := yyp + _ = yypt // guard against "declared and not used" + + yyp -= int(yyR2[yyn]) + // yyp is now the index of $0. Perform the default action. Iff the + // reduced production is ε, $1 is possibly out of range. + if yyp+1 >= len(yyS) { + nyys := make([]yySymType, len(yyS)*2) + copy(nyys, yyS) + yyS = nyys + } + yyVAL = yyS[yyp+1] + + /* consult goto table to find next state */ + yyn = int(yyR1[yyn]) + yyg := int(yyPgo[yyn]) + yyj := yyg + yyS[yyp].yys + 1 + + if yyj >= yyLast { + yystate = int(yyAct[yyg]) + } else { + yystate = int(yyAct[yyj]) + if int(yyChk[yystate]) != -yyn { + yystate = int(yyAct[yyg]) + } + } + // dummy call; replaced with literal code + switch yynt { + + case 1: + yyDollar = yyS[yypt-1 : yypt+1] +//line queryexpr.y:62 + { + yylex.(*queryParser).expr = yyDollar[1].node + } + case 2: + yyDollar = yyS[yypt-2 : yypt+1] +//line queryexpr.y:64 + { + yyVAL.node = &unaryOp{opNot, yyDollar[2].node} + } + case 3: + yyDollar = yyS[yypt-3 : yypt+1] +//line queryexpr.y:65 + { + yyVAL.node = &logicalOp{opOr, yyDollar[1].node, yyDollar[3].node} + } + case 4: + yyDollar = yyS[yypt-3 : yypt+1] +//line queryexpr.y:66 + { + yyVAL.node = &logicalOp{opAnd, yyDollar[1].node, yyDollar[3].node} + } + case 5: + yyDollar = yyS[yypt-3 : yypt+1] +//line queryexpr.y:67 + { + yyVAL.node = &binaryOp{opEq, yyDollar[1].text, yyDollar[3].text} + } + case 6: + yyDollar = yyS[yypt-3 : yypt+1] +//line queryexpr.y:68 + { + yyVAL.node = &binaryOp{opLt, yyDollar[1].text, yyDollar[3].text} + } + case 7: + yyDollar = yyS[yypt-3 : yypt+1] +//line queryexpr.y:69 + { + yyVAL.node = &binaryOp{opLe, yyDollar[1].text, yyDollar[3].text} + } + case 8: + yyDollar = yyS[yypt-3 : yypt+1] +//line queryexpr.y:70 + { + yyVAL.node = &binaryOp{opGt, yyDollar[1].text, yyDollar[3].text} + } + case 9: + yyDollar = yyS[yypt-3 : yypt+1] +//line queryexpr.y:71 + { + yyVAL.node = &binaryOp{opGe, yyDollar[1].text, yyDollar[3].text} + } + case 10: + yyDollar = yyS[yypt-3 : yypt+1] +//line queryexpr.y:72 + { + yyVAL.node = &binaryOp{opMatch, yyDollar[1].text, yyDollar[3].text} + } + case 11: + yyDollar = yyS[yypt-3 : yypt+1] +//line queryexpr.y:73 + { + yyVAL.node = yyDollar[2].node + } + } + goto yystack /* stack new state and value */ +} diff --git a/code/sonalyze/table/queryexpr.y b/code/sonalyze/table/queryexpr.y new file mode 100644 index 00000000..5f59f256 --- /dev/null +++ b/code/sonalyze/table/queryexpr.y @@ -0,0 +1,200 @@ +// Parser for query expressions. +// +// The grammar is simple: +// +// expr ::= ident binop string | expr logop expr | unop expr | "(" expr ")" +// +// Idents are the usual [a-zA-Z_][a-zA-Z0-9_]* thing except operator names (and, or, not). Idents +// always denote fields in a table row. +// +// Strings are either idents, operator names, numbers, durations, or quoted things. Strings are always +// literal, idents and operator names never denote fields or operators in a string context. +// +// Numbers are full signed floating-point numbers. Durations are of the form n[wW]m[dD]o[hH]p[mM] +// where all four elements are optional but at least one must be present. +// +// Quoted things can be quoted '...', "...", `...`, or /.../, the quote cannot appear in the quoted +// string. +// +// In primitive binops <, <=, >, >=, =, =~ the first five require the string rhs to be convertible +// to the type of the field given by the ident lhs, and for the last the string is a regular +// expression and the field is formatted(!) to string before matching. The regex is not augmented +// at all; if you want ^ or $ say, you must add them yourself. +// +// Logical ops "and", "or", and "not" combine other expressions; parens override precedence. +// +// TODO: It would have been nice to have an even more permissive string syntax for user convenience. +// This can maybe be done if the parser is made to feed back to the lexer to allow a more permissive +// lexer to lex strings. Another possibility is to expand the character set allowed by identifiers. + +%{ +//go:generate goyacc -o queryexpr.go queryexpr.y + +package table + +import ( + "fmt" + "regexp" + "strings" +) + +%} + +%union { + text string + node PNode +} + +%start Query + +%token tIdent tString +%left tOr +%left tAnd +%nonassoc tEq tLt tLe tGt tGe tMatch +%right tNot +%token tLparen tRparen + +%type Expr Query +%type String + +%% + +Query : Expr { yylex.(*queryParser).expr = $1 } ; + +Expr : tNot Expr { $$ = &unaryOp{opNot, $2} } + | Expr tOr Expr { $$ = &logicalOp{opOr, $1, $3} } + | Expr tAnd Expr { $$ = &logicalOp{opAnd, $1, $3} } + | tIdent tEq String { $$ = &binaryOp{opEq, $1, $3} } + | tIdent tLt String { $$ = &binaryOp{opLt, $1, $3} } + | tIdent tLe String { $$ = &binaryOp{opLe, $1, $3} } + | tIdent tGt String { $$ = &binaryOp{opGt, $1, $3} } + | tIdent tGe String { $$ = &binaryOp{opGe, $1, $3} } + | tIdent tMatch String { $$ = &binaryOp{opMatch, $1, $3} } + | tLparen Expr tRparen { $$ = $2 } + ; + +String : tString | tIdent | tOr | tAnd | tNot ; + +%% + +type token struct { + tok int + text string +} + +type queryParser struct { + input string + tokens []token + errtxt string + expr PNode +} + +func (q *queryParser) Lex(lval *yySymType) (tok int) { + if len(q.tokens) == 0 { + tok = -1 + } else { + tok = q.tokens[0].tok + lval.text = q.tokens[0].text + q.tokens = q.tokens[1:] + } + return +} + +func (q *queryParser) Error(s string) { + if q.errtxt == "" { + q.errtxt = s + } +} + +func (q *queryParser) Parse() (PNode, error) { + r := yyParse(q) + if r != 0 { + return nil, fmt.Errorf("Can't parse %s: %s", q.input, q.errtxt) + } + return q.expr, nil +} + +var tokenRe = regexp.MustCompile( + strings.Join([]string{ + `(\s+)`, + `(<=|<|>=|>|=~|=|and|or|not|\(|\))`, + `([a-zA-Z_][a-zA-Z0-9_]*)`, + `"([^"]*)"`, + `'([^']*)'`, + `/([^/]*)/`, + "`([^`]*)`", + `(\d+[wW](?:\d+[dD])?(?:\d+[hH])?(?:\d+[mM])?)`, + `(\d+[dD](?:\d+[hH])?(?:\d+[mM])?)`, + `(\d+[hH](?:\d+[mM])?)`, + `(\d+[mM])`, + `(-?\d+(?:\.\d+)?(?:[eE][-+]?\d+)?)`, + `(.)`, + }, "|")) + +func init() { + tokenRe.Longest() +} + +const ( + spaces = 1 + punctuation = 2 + ident = 3 + firstString = 4 + lastString = 12 + bad = 13 +) + +var punct = map[string]int{ + "<": tLt, + "<=": tLe, + ">": tGt, + ">=": tGe, + "=": tEq, + "=~": tMatch, + "and": tAnd, + "or": tOr, + "not": tNot, + "(": tLparen, + ")": tRparen, +} + +func newQueryParser(input string) (*queryParser, error) { + m := tokenRe.FindAllStringSubmatch(input, -1) + if m == nil { + // This shouldn't actually happen: the regex should match every possible string. + return nil, fmt.Errorf("Can't lex %s", input) + } + tokens := make([]token, 0) + for _, tm := range m { + var t int + var text string + switch { + case tm[spaces] != "": + continue + case tm[ident] != "": + text = tm[ident] + t = tIdent + case tm[punctuation] != "": + text = tm[punctuation] + t = punct[text] + case tm[bad] != "": + return nil, fmt.Errorf("Bad character: %s", tm[bad]) + default: + for i := firstString ; i <= lastString ; i++ { + if tm[i] != "" { + text = tm[i] + t = tString + break + } + } + if t == 0 { + panic("Bad match") + } + } + tokens = append(tokens, token{t, text}) + } + return &queryParser{ + input: input, + tokens: tokens, + }, nil +}