Skip to content

Commit

Permalink
WIP: #59
Browse files Browse the repository at this point in the history
  • Loading branch information
steve-r-west committed Feb 4, 2025
1 parent ab69dea commit 81c0a36
Show file tree
Hide file tree
Showing 3 changed files with 299 additions and 215 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,10 @@ func (l *LowerCaseEmail) VisitEq(first, second string) (*bson.D, error) {

##### Advanced Customization

###### Nested Subqueries

Elastic Search doesn't natively support arrays, and so you can't easily support filters such as `eq(parent[0].id,)` or `text(locale.FR.description,"touté")`


###### Field Types

Expand Down
232 changes: 157 additions & 75 deletions external/epsearchast/v3/es/es_query_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,37 @@ import (
"strings"
)

type MustQuery struct {
Term map[string]struct {
Value string
}
}
type NestedReplacement struct {
Path string
Must MustQuery
}
type DefaultEsQueryBuilder struct {
// OpenSearch supports Multi-Fields (https://opensearch.org/docs/latest/field-types/supported-field-types/index/#multifields) which allows a single field to be encoded in different ways
// If you have multiple mappings for a field, this can let that field be used in range, keyword, or text queries.
// The keys here should be field names from the filter (after processing from NestedFieldToQuery), and for each type of filter query the resulting filter to use.
OpTypeToFieldNames map[string]*OperatorTypeToMultiFieldName

// https://opensearch.org/docs/latest/field-types/supported-field-types/nested/
// https://opensearch.org/docs/latest/query-dsl/joining/nested/

// NestedFieldToQuery is a keyed map that should be a nested query (https://opensearch.org/docs/latest/query-dsl/joining/nested/) that contains the path nested.query.bool.must.term
// NestedFieldToQuery is a keyed map that takes as a key a regular expression for an attribute that we should match (e.g., requested by the user, after aliases have been processed).
// The value is information about how to replace it, and allows us to create a nested query (https://opensearch.org/docs/latest/query-dsl/joining/nested/) that contains the path nested.
// The regular expression can have capture groups that will be used as replacements in the subsquery keys and values.
NestedFieldToQuery map[string]NestedReplacement
}

type NestedReplacement struct {
// The path that will be used in the nested argument (See: https://opensearch.org/docs/latest/query-dsl/joining/nested/#parameters)
Path string

// A map which generates the set of subqueries queries that should be generated.
// Named capture groups in the parent map will be replaced (e.g., a field ^foo\[(?P<id>\d+)\].bar$) can use $id as a replacement in this string.
Subqueries map[string]Replacement
}

type Replacement struct {
// The value we should search for, we can use the named capture groups from the parent regex as replacements, also the special value $value is available
Value string

// By default, we will use the existing search term as a replacement, if set to true, we will generate an equality match.
ForceEQ bool
}

func (d *DefaultEsQueryBuilder) Validate() {
//TODO
//TODO
Expand Down Expand Up @@ -99,71 +111,29 @@ func (d DefaultEsQueryBuilder) VisitIn(args ...string) (*JsonObject, error) {
}), nil
}

func (d DefaultEsQueryBuilder) VisitEq(first, second string) (*JsonObject, error) {
var baseQuery = &JsonObject{
"term": map[string]any{
d.getFieldMapping(first).Equality: second,
},
func (d DefaultEsQueryBuilder) eqBuilder() func(args ...string) *JsonObject {
return func(args ...string) *JsonObject {
return &JsonObject{
"term": map[string]any{
d.getFieldMapping(args[0]).Equality: args[1],
},
}
}
}
func (d DefaultEsQueryBuilder) VisitEq(first, second string) (*JsonObject, error) {
b := d.eqBuilder()

numMatches := 0
for k, v := range d.NestedFieldToQuery {
p := regexp.MustCompile(k)

if p.MatchString(first) {
numMatches++

res := p.FindStringSubmatch(first)

// Extract specific named group
groupMap := make(map[string]string)
for i, name := range p.SubexpNames() {
if i != 0 && name != "" { // Skip empty or unnamed groups
groupMap[name] = res[i]
}
}

musts := []*JsonObject{}

// TBD why do I allow multiple terms
for term_key, term_val := range v.Must.Term {

resultValue := term_val.Value
resultKey := term_key

for group, replacement := range groupMap {
resultKey = strings.ReplaceAll(resultKey, "$"+group, replacement)
resultValue = strings.ReplaceAll(resultValue, "$"+group, replacement)
}

resultValue = strings.ReplaceAll(resultValue, `$value`, second)

musts = append(musts, &JsonObject{
"term": map[string]any{
d.getFieldMapping(resultKey).Equality: resultValue,
},
})

}

baseQuery = &JsonObject{
"nested": JsonObject{
"path": v.Path,
"query": JsonObject{
"bool": JsonObject{
"must": musts,
},
},
},
}
nestedQuery, ok, err := d.processNestedFieldToQuery(b, first, second)

}
if err != nil {
return nil, err
}

if numMatches > 1 {
return nil, fmt.Errorf("found more than one nested field for %s", first)
if ok {
return nestedQuery, nil
}
return baseQuery, nil

return b(first, second), nil

}

Expand All @@ -176,11 +146,26 @@ func (d DefaultEsQueryBuilder) VisitContains(first, second string) (*JsonObject,
}

func (d DefaultEsQueryBuilder) VisitText(first, second string) (*JsonObject, error) {
return (*JsonObject)(&map[string]any{
"match": map[string]any{
d.getFieldMapping(first).Text: second,
},
}), nil
b := func(args ...string) *JsonObject {
return (*JsonObject)(&map[string]any{
"match": map[string]any{
d.getFieldMapping(args[0]).Text: args[1],
},
})
}

nestedQuery, ok, err := d.processNestedFieldToQuery(b, first, second)

if err != nil {
return nil, err
}

if ok {
return nestedQuery, nil
}

return b(first, second), nil

}

// Useful doc: https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-range-query.html
Expand Down Expand Up @@ -321,4 +306,101 @@ func (d DefaultEsQueryBuilder) EscapeWildcardString(s string) string {
return str
}

// processNestedFieldToQuery converts a request for a field that is embedded in an ES nested object into an AND query that indexes into the object by the ID, and then searches the field
// in a nutshell, we can't query eq(field[0].foo, bar), we need to do eq(field.foo, bar):eq(field.id,0). In ES we also need to wrap this in another nested object.
// builder essentially takes the arguments and returns the subquery, it changes whether or not we need to build a match, term, range or other ES query.
func (d DefaultEsQueryBuilder) processNestedFieldToQuery(builder func(args ...string) *JsonObject, first string, second string) (*JsonObject, bool, error) {

var baseQuery *JsonObject = nil

numMatches := 0
for k, v := range d.NestedFieldToQuery {
p := regexp.MustCompile(k)

if p.MatchString(first) {
numMatches++

res := p.FindStringSubmatch(first)

// Extract specific named group
groupMap := make(map[string]string)
for i, name := range p.SubexpNames() {
if i != 0 && name != "" { // Skip empty or unnamed groups
groupMap[name] = res[i]
}
}

musts := []*JsonObject{}

// TBD why do I allow multiple terms
for term_key, term_val := range v.Subqueries {

resultValue := term_val.Value
resultKey := term_key

// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length
// TODO probably need to sort these by length

for group, replacement := range groupMap {
resultKey = strings.ReplaceAll(resultKey, "$"+group, replacement)
resultValue = strings.ReplaceAll(resultValue, "$"+group, replacement)
}

resultValue = strings.ReplaceAll(resultValue, `$value`, second)

if term_val.ForceEQ {
musts = append(musts, d.eqBuilder()(resultKey, resultValue))
} else {
musts = append(musts, builder(resultKey, resultValue))
}

}

baseQuery = &JsonObject{
"nested": JsonObject{
"path": v.Path,
"query": JsonObject{
"bool": JsonObject{
"must": musts,
},
},
},
}

}
}

if numMatches > 1 {
return nil, false, fmt.Errorf("found more than one nested field for %s", first)
}

if numMatches == 0 {
return nil, false, nil
}

return baseQuery, true, nil
}

// Generate an implementation of SemanticReducer[JsonObject] for the Elasticsearch query builder.
Loading

0 comments on commit 81c0a36

Please sign in to comment.