Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #714 - compiler / exe engine for query expressions #719

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 120 additions & 8 deletions code/generate-table/generate-table.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,47 +85,98 @@ func main() {
// operators apply; if a type is "GpuSet" then some kind of set operators apply (TBD).

type typeInfo struct {
helpName string // default is the name as given
formatter string // default is Format<Typename>
helpName string // default is the name as given
comparer string // setType == false: default is cmp.Compare
formatter string // default is Format<Typename>
parser string // default is CvtString2<Typename>
setComparer string // if "", not a set; otherwise a function
}

var knownTypes = map[string]typeInfo{
"bool": typeInfo{
comparer: "CompareBool",
},
"[]string": typeInfo{
helpName: "string list",
formatter: "FormatStrings",
helpName: "string list",
formatter: "FormatStrings",
parser: "CvtString2Strings",
setComparer: "SetCompareStrings",
},
"F64Ceil": typeInfo{
helpName: "int",
parser: "CvtString2Float64",
},
"U64Div1M": typeInfo{
helpName: "int",
parser: "CvtString2Uint64",
},
"IntOrEmpty": typeInfo{
helpName: "int",
parser: "CvtString2Int",
},
"DateTimeValueOrBlank": typeInfo{
helpName: "DateTimeValue",
parser: "CvtString2DateTimeValue",
},
"IsoDateTimeOrUnknown": typeInfo{helpName: "IsoDateTimeValue"},
"Ustr": typeInfo{helpName: "string"},
"UstrMax30": typeInfo{helpName: "string"},
"gpuset.GpuSet": typeInfo{
helpName: "GpuSet",
formatter: "FormatGpuSet",
helpName: "GpuSet",
formatter: "FormatGpuSet",
parser: "CvtString2GpuSet",
setComparer: "SetCompareGpuSets",
},
"*Hostnames": typeInfo{
helpName: "Hostnames",
formatter: "FormatHostnames",
helpName: "Hostnames",
formatter: "FormatHostnames",
parser: "CvtString2Hostnames",
setComparer: "SetCompareHostnames",
},
}

func isComparable(ty string) bool {
if probe, found := knownTypes[ty]; found {
return probe.setComparer == ""
}
return true
}

func fieldComparer(ty string) string {
if probe, found := knownTypes[ty]; found && probe.comparer != "" {
return probe.comparer
}
return "cmp.Compare"
}

func setComparer(ty string) string {
if probe, found := knownTypes[ty]; found && probe.setComparer != "" {
return probe.setComparer
}
panic("Not a set")
}

func isSetType(ty string) bool {
if probe, found := knownTypes[ty]; found {
return probe.setComparer != ""
}
return false
}

func formatName(ty string) string {
if probe := knownTypes[ty]; probe.formatter != "" {
return probe.formatter
}
return "Format" + capitalize(ty)
}

func parseName(ty string) string {
if probe := knownTypes[ty]; probe.parser != "" {
return probe.parser
}
return "CvtString2" + capitalize(ty)
}

func userFacingTypeName(ty string) string {
if probe := knownTypes[ty]; probe.helpName != "" {
return probe.helpName
Expand Down Expand Up @@ -191,6 +242,7 @@ var (

func fieldSection(tableName string, fields *parser.FieldSect) (fieldList []fieldSpec) {
fieldList = fieldFormatters(tableName, fields)
fieldPredicates(tableName, fields)
return
}

Expand Down Expand Up @@ -274,6 +326,66 @@ func fieldFormatters(tableName string, fields *parser.FieldSect) (fieldList []fi
return
}

func fieldPredicates(tableName string, fields *parser.FieldSect) {
fmt.Fprintf(output, "// MT: Constant after initialization; immutable\n")
fmt.Fprintf(output, "var %sPredicates = map[string]Predicate[%s]{\n", tableName, fields.Type)
for _, field := range fields.Fields {
attrs := make(map[string]string)
for _, attr := range field.Attrs {
attrs[attr.Name] = attr.Value
}

actualFieldName := field.Name
if fn, found := attrs["field"]; found {
actualFieldName = fn
}

// Here:
//
// * If Convert is nil then type must be string and we just use the input string.
// * Compare must not be nil, it extracts the field and then does a straight value
// comparison
// * TODO: For nil pointers, the field always compares less than a concrete value,
// this may not be ideal
// * TODO: Set comparison. For []string and GpuSet, the relationals should be
// set operators: < for strict subset, etc. To select records where `2` is in
// the gpuset S would simply be 'S >= 2', no special inclusion operator required.
// Right now every set compare returns -1.

fmt.Fprintf(output, "\t\"%s\": Predicate[%s]{\n", field.Name, fields.Type)
if field.Type != "string" {
fmt.Fprintf(output, "\t\tConvert: %s,\n", parseName(field.Type))
}
switch {
case isComparable(field.Type):
fmt.Fprintf(output, "\t\tCompare: func(d %s, v any) int {\n", fields.Type)
comparator := fieldComparer(field.Type)
if ptrName := attrs["indirect"]; ptrName != "" {
fmt.Fprintf(output, "\t\t\tif (d.%s) != nil {\n", ptrName)
fmt.Fprintf(output, "\t\t\t\treturn %s((d.%s.%s), v.(%s))\n",
comparator, ptrName, actualFieldName, field.Type)
fmt.Fprintf(output, "\t\t\t}\n")
fmt.Fprintf(output, "\t\t\treturn -1\n")
} else {
fmt.Fprintf(output, "\t\t\treturn %s((d.%s), v.(%s))\n",
comparator, actualFieldName, field.Type)
}
fmt.Fprintf(output, "\t\t},\n")
case isSetType(field.Type):
if attrs["indirect"] != "" {
panic("No support for indirection to set types yet")
}
fmt.Fprintf(output, "\t\tSetCompare: func(d %s, v any, op int) bool {\n", fields.Type)
fmt.Fprintf(output, "\t\t\treturn %s((d.%s), v.(%s), op)\n", setComparer(field.Type), actualFieldName, field.Type)
fmt.Fprintf(output, "\t\t},\n")
default:
panic("Unknown case")
}
fmt.Fprintf(output, "\t},\n")
}
fmt.Fprintf(output, "}\n\n")
}

var validAttr = map[string]bool{
"desc": true,
"alias": true,
Expand Down
59 changes: 53 additions & 6 deletions code/sonalyze/MANUAL.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,13 @@ filters.
in some ways; see later section. The option can be repeated.


#### Job filtering and aggregation options
#### Aggregation filter options

##### Job aggregation filtering

These are only available with the `jobs` command. All filters are optional. Jobs must pass all
specified filters.
specified filters. The filters generally have an equivalent `-q` variant (see below) but predate
that functionality.

`--merge-all`, `--batch`

Expand Down Expand Up @@ -312,10 +315,11 @@ specified filters.
option does not guarantee that a job is observed at different points in time. Use `--min-runtime`
if that's what you mean.)

#### Load filtering and aggregation options
#### Load aggregation filtering

These are only available with the `load` command. All filters are optional. Records must pass all
specified filters.
specified filters. The filters generally have an equivalent `-q` variant (see below) but predate
that functionality.

`--hourly`, `--half-hourly`

Expand All @@ -334,9 +338,10 @@ specified filters.
Sum bucketed/averaged data by time step across all the selected hosts, yielding an aggregate for this
group/subcluster of hosts. Requires bucketing other than `--none`.

#### Sacct filtering and aggregation options
##### Sacct aggregation filtering

Since these are not sample records they have their own filtering rules.
Since these are not sample records they have their own filtering rules. The filters generally have
an equivalent `-q` variant (see below) but predate that functionality.

The default is to print "regular" jobs, ie, not Array jobs or Het jobs. Select the latter groups
with `-array` and `-het`.
Expand Down Expand Up @@ -403,6 +408,48 @@ with `-array` and `-het`.

Select only het jobs (not implemented yet).

##### General (`-q`) aggregation filtering

This is an experimental facility. The data extraction and aggregation verbs usually allow a `-q`
option whose argument is an expression that is applied to each record in the aggregation result and
selects it or not.

`-q expression`

Select records for which `expression` is true. The expression is formed from these simple elements:

* FieldName binop String, where binop is <, <=, >, >=, and =
* Fieldname "=~" Regexp
* expression "and" expression
* expression "or" expression
* "not" expression
* "(" expression ")"

The string, if it does not look like an identifier, can be quoted with `'`, `"`, `/`, or <code>`</code>.

The field names are the field names available for printing.

For example:

```
sonalyze jobs -q 'Cmd =~ python and Host =~ /^(gpu-|int-)/ and Job > 2500000'
```

The typing rules are:

* for relational operators, the field has a type, and the string value is parsed as that type, and
then a comparison is performed on the two values according to type.
* for `=~`, the field is formatted using the standard formatter without modifiers, and the resulting
string is matched against the regular expression.

Some field have set-like values (GPU sets, host sets); how we handle them is TBD, but likely the
relational operators will act as set operators (subsets, set equality) and the string value will
be parsed as a set value.

There is no query optimization. It may be advantageous to apply record filters first, or to
arrange multiple tests so that the most discriminating test comes first. In the example above,
for example, filtering by job ID first will frequently lead to a faster query.

#### Job printing options

`--breakdown=<keywords>`
Expand Down
34 changes: 34 additions & 0 deletions code/sonalyze/cmd/args.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,32 @@ func (rfa *RecordFilterArgs) DefaultUserFilters() (allUsers, skipSystemUsers, de
return
}

///////////////////////////////////////////////////////////////////////////////////////////////////
//
// Query arguments

type QueryArgs struct {
QueryStmt string
ParsedQuery PNode
}

func (qa *QueryArgs) Add(fs *CLI) {
fs.Group("query")
fs.StringVar(&qa.QueryStmt, "q", "", "A query expression")
}

func (qa *QueryArgs) ReifyForRemote(x *ArgReifier) error {
x.String("q", qa.QueryStmt)
return nil
}

func (qa *QueryArgs) Validate() (err error) {
if qa.QueryStmt != "" {
qa.ParsedQuery, err = ParseQuery(qa.QueryStmt)
}
return
}

///////////////////////////////////////////////////////////////////////////////////////////////////
//
// Config file
Expand Down Expand Up @@ -605,6 +631,7 @@ func NewRepeatableString(xs *[]string) *RepeatableString {
type SampleAnalysisArgs struct {
DevArgs
SourceArgs
QueryArgs
RecordFilterArgs
ConfigFileArgs
VerboseArgs
Expand All @@ -617,6 +644,7 @@ func (sa *SampleAnalysisArgs) SampleAnalysisFlags() *SampleAnalysisArgs {
func (s *SampleAnalysisArgs) Add(fs *CLI) {
s.DevArgs.Add(fs)
s.SourceArgs.Add(fs)
s.QueryArgs.Add(fs)
s.RecordFilterArgs.Add(fs)
s.ConfigFileArgs.Add(fs)
s.VerboseArgs.Add(fs)
Expand All @@ -628,6 +656,7 @@ func (s *SampleAnalysisArgs) ReifyForRemote(x *ArgReifier) error {
return errors.Join(
s.DevArgs.ReifyForRemote(x),
s.SourceArgs.ReifyForRemote(x),
s.QueryArgs.ReifyForRemote(x),
s.RecordFilterArgs.ReifyForRemote(x),
s.ConfigFileArgs.ReifyForRemote(x),
)
Expand All @@ -637,6 +666,7 @@ func (s *SampleAnalysisArgs) Validate() error {
return errors.Join(
s.DevArgs.Validate(),
s.SourceArgs.Validate(),
s.QueryArgs.Validate(),
s.RecordFilterArgs.Validate(),
s.ConfigFileArgs.Validate(),
s.VerboseArgs.Validate(),
Expand All @@ -652,6 +682,7 @@ func (s *SampleAnalysisArgs) Validate() error {
type HostAnalysisArgs struct {
DevArgs
SourceArgs
QueryArgs
HostArgs
ConfigFileArgs
VerboseArgs
Expand All @@ -664,6 +695,7 @@ func (sa *HostAnalysisArgs) HostAnalysisFlags() *HostAnalysisArgs {
func (s *HostAnalysisArgs) Add(fs *CLI) {
s.DevArgs.Add(fs)
s.SourceArgs.Add(fs)
s.QueryArgs.Add(fs)
s.HostArgs.Add(fs)
s.ConfigFileArgs.Add(fs)
s.VerboseArgs.Add(fs)
Expand All @@ -675,6 +707,7 @@ func (s *HostAnalysisArgs) ReifyForRemote(x *ArgReifier) error {
return errors.Join(
s.DevArgs.ReifyForRemote(x),
s.SourceArgs.ReifyForRemote(x),
s.QueryArgs.ReifyForRemote(x),
s.HostArgs.ReifyForRemote(x),
s.ConfigFileArgs.ReifyForRemote(x),
)
Expand All @@ -684,6 +717,7 @@ func (s *HostAnalysisArgs) Validate() error {
return errors.Join(
s.DevArgs.Validate(),
s.SourceArgs.Validate(),
s.QueryArgs.Validate(),
s.HostArgs.Validate(),
s.ConfigFileArgs.Validate(),
s.VerboseArgs.Validate(),
Expand Down
11 changes: 6 additions & 5 deletions code/sonalyze/cmd/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@ var (
"operation-selection": 1,
"aggregation": 2,
"job-filter": 3,
"printing": 4,
"record-filter": 5,
"remote-data-source": 6,
"local-data-source": 7,
"development": 8,
"query": 4,
"printing": 5,
"record-filter": 6,
"remote-data-source": 7,
"local-data-source": 8,
"development": 9,
}
)

Expand Down
Loading