diff --git a/plugin/README.md b/plugin/README.md index 6bc40b490..91eb9f3f5 100755 --- a/plugin/README.md +++ b/plugin/README.md @@ -392,7 +392,7 @@ It transforms `{"server":{"os":"linux","arch":"amd64"}}` into `{"server":"{\"os\ [More details...](plugin/action/json_encode/README.md) ## json_extract -It extracts a field from JSON-encoded event field and adds extracted field to the event root. +It extracts fields from JSON-encoded event field and adds extracted fields to the event root. > If extracted field already exists in the event root, it will be overridden. [More details...](plugin/action/json_extract/README.md) diff --git a/plugin/action/README.md b/plugin/action/README.md index 597068601..a21df64bd 100755 --- a/plugin/action/README.md +++ b/plugin/action/README.md @@ -235,7 +235,7 @@ It transforms `{"server":{"os":"linux","arch":"amd64"}}` into `{"server":"{\"os\ [More details...](plugin/action/json_encode/README.md) ## json_extract -It extracts a field from JSON-encoded event field and adds extracted field to the event root. +It extracts fields from JSON-encoded event field and adds extracted fields to the event root. > If extracted field already exists in the event root, it will be overridden. [More details...](plugin/action/json_extract/README.md) diff --git a/plugin/action/json_extract/README.idoc.md b/plugin/action/json_extract/README.idoc.md index 23fff9a10..0dde083b3 100644 --- a/plugin/action/json_extract/README.idoc.md +++ b/plugin/action/json_extract/README.idoc.md @@ -1,11 +1,11 @@ # JSON extract plugin @introduction -### Examples +## Examples @examples -### Benchmarks +## Benchmarks @benchmarks -### Config params +## Config params @config-params|description diff --git a/plugin/action/json_extract/README.md b/plugin/action/json_extract/README.md index 0de9fd060..99fc219fd 100755 --- a/plugin/action/json_extract/README.md +++ b/plugin/action/json_extract/README.md @@ -1,8 +1,8 @@ # JSON extract plugin -It extracts a field from JSON-encoded event field and adds extracted field to the event root. +It extracts fields from JSON-encoded event field and adds extracted fields to the event root. > If extracted field already exists in the event root, it will be overridden. -### Examples +## Examples ```yaml pipelines: example_pipeline: @@ -10,47 +10,75 @@ pipelines: actions: - type: json_extract field: log - extract_field: error.code + extract_fields: + - error.code + - level + - meta + - flags ... ``` The original event: ```json { - "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"service\":\"my-service\",\"error\":{\"code\":2,\"args\":[]}}", + "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"error\":{\"code\":2,\"args\":[]},\"meta\":{\"service\":\"my-service\",\"pod\":\"my-service-5c4dfcdcd4-4v5zw\"},\"flags\":[\"flag1\",\"flag2\"]}", "time": "2024-03-01T10:49:28.263317941Z" } ``` The resulting event: ```json { - "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"service\":\"my-service\",\"error\":{\"code\":2,\"args\":[]}}", + "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"error\":{\"code\":2,\"args\":[]},\"meta\":{\"service\":\"my-service\",\"pod\":\"my-service-5c4dfcdcd4-4v5zw\"},\"flags\":[\"flag1\",\"flag2\"]}", "time": "2024-03-01T10:49:28.263317941Z", - "code": 2 + "code": 2, + "level": "error", + "meta": { + "service": "my-service", + "pod": "my-service-5c4dfcdcd4-4v5zw" + }, + "flags": ["flag1", "flag2"] } ``` -### Benchmarks +## Benchmarks Performance comparison of `json_extract` and `json_decode` plugins. -`json_extract` on average 3 times faster than `json_decode`. +`json_extract` on average 2.5 times faster than `json_decode` and +doesn't allocate memory during the extract process. +### Extract 1 field | json (length) | json_extract (time ns) | json_decode (time ns) | |---------------|------------------------|-----------------------| -| 129 | 33 | 176 | -| 309 | 264 | 520 | -| 2109 | 2263 | 6778 | -| 10909 | 11289 | 32205 | -| 21909 | 23277 | 62819 | +| 309 | 300 | 560 | +| 2109 | 2570 | 7250 | +| 10909 | 13550 | 34250 | +| 21909 | 26000 | 67940 | +| 237909 | 262500 | 741530 | -### Config params +### Extract 5 fields +| json (length) | json_extract (time ns) | json_decode (time ns) | +|---------------|------------------------|-----------------------| +| 309 | 450 | 685 | +| 2109 | 2990 | 7410 | +| 10909 | 14540 | 35000 | +| 21909 | 28340 | 69950 | +| 237909 | 286600 | 741600 | + +## Config params **`field`** *`cfg.FieldSelector`* *`required`* The event field from which to extract. Must be a string.
-**`extract_field`** *`cfg.FieldSelector`* *`required`* +**`extract_field`** *`cfg.FieldSelector`* Field to extract. +> ⚠ DEPRECATED. Use `extract_fields` instead. + +
+ +**`extract_fields`** *`[]cfg.FieldSelector`* + +Fields to extract.
diff --git a/plugin/action/json_extract/json_extract.go b/plugin/action/json_extract/json_extract.go index afebf5fc9..ed11e0c09 100644 --- a/plugin/action/json_extract/json_extract.go +++ b/plugin/action/json_extract/json_extract.go @@ -1,8 +1,6 @@ package json_extract import ( - "bytes" - "github.com/go-faster/jx" "github.com/ozontech/file.d/cfg" "github.com/ozontech/file.d/fd" @@ -11,7 +9,7 @@ import ( ) /*{ introduction -It extracts a field from JSON-encoded event field and adds extracted field to the event root. +It extracts fields from JSON-encoded event field and adds extracted fields to the event root. > If extracted field already exists in the event root, it will be overridden. }*/ @@ -23,42 +21,65 @@ pipelines: actions: - type: json_extract field: log - extract_field: error.code + extract_fields: + - error.code + - level + - meta + - flags ... ``` The original event: ```json { - "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"service\":\"my-service\",\"error\":{\"code\":2,\"args\":[]}}", + "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"error\":{\"code\":2,\"args\":[]},\"meta\":{\"service\":\"my-service\",\"pod\":\"my-service-5c4dfcdcd4-4v5zw\"},\"flags\":[\"flag1\",\"flag2\"]}", "time": "2024-03-01T10:49:28.263317941Z" } ``` The resulting event: ```json { - "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"service\":\"my-service\",\"error\":{\"code\":2,\"args\":[]}}", + "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"error\":{\"code\":2,\"args\":[]},\"meta\":{\"service\":\"my-service\",\"pod\":\"my-service-5c4dfcdcd4-4v5zw\"},\"flags\":[\"flag1\",\"flag2\"]}", "time": "2024-03-01T10:49:28.263317941Z", - "code": 2 + "code": 2, + "level": "error", + "meta": { + "service": "my-service", + "pod": "my-service-5c4dfcdcd4-4v5zw" + }, + "flags": ["flag1", "flag2"] } ``` }*/ /*{ benchmarks Performance comparison of `json_extract` and `json_decode` plugins. -`json_extract` on average 3 times faster than `json_decode`. +`json_extract` on average 2.5 times faster than `json_decode` and +doesn't allocate memory during the extract process. + +### Extract 1 field +| json (length) | json_extract (time ns) | json_decode (time ns) | +|---------------|------------------------|-----------------------| +| 309 | 300 | 560 | +| 2109 | 2570 | 7250 | +| 10909 | 13550 | 34250 | +| 21909 | 26000 | 67940 | +| 237909 | 262500 | 741530 | +### Extract 5 fields | json (length) | json_extract (time ns) | json_decode (time ns) | |---------------|------------------------|-----------------------| -| 129 | 33 | 176 | -| 309 | 264 | 520 | -| 2109 | 2263 | 6778 | -| 10909 | 11289 | 32205 | -| 21909 | 23277 | 62819 | +| 309 | 450 | 685 | +| 2109 | 2990 | 7410 | +| 10909 | 14540 | 35000 | +| 21909 | 28340 | 69950 | +| 237909 | 286600 | 741600 | }*/ type Plugin struct { - config *Config - decoder *jx.Decoder + config *Config + + extractFields *pathTree + decoder *jx.Decoder } // ! config-params @@ -73,8 +94,14 @@ type Config struct { // > @3@4@5@6 // > // > Field to extract. - ExtractField cfg.FieldSelector `json:"extract_field" parse:"selector" required:"true"` // * + // >> ⚠ DEPRECATED. Use `extract_fields` instead. + ExtractField cfg.FieldSelector `json:"extract_field" parse:"selector"` // * ExtractField_ []string + + // > @3@4@5@6 + // > + // > Fields to extract. + ExtractFields []cfg.FieldSelector `json:"extract_fields" slice:"true"` // * } func init() { @@ -88,9 +115,25 @@ func factory() (pipeline.AnyPlugin, pipeline.AnyConfig) { return &Plugin{}, &Config{} } -func (p *Plugin) Start(config pipeline.AnyConfig, _ *pipeline.ActionPluginParams) { +func (p *Plugin) Start(config pipeline.AnyConfig, params *pipeline.ActionPluginParams) { p.config = config.(*Config) p.decoder = &jx.Decoder{} + + p.extractFields = newPathTree() + dupl := false + for _, f := range p.config.ExtractFields { + if f == p.config.ExtractField { + dupl = true + } + p.extractFields.add(cfg.ParseFieldSelector(string(f))) + } + if !dupl { + p.extractFields.add(p.config.ExtractField_) + } + + if len(p.extractFields.root.children) == 0 { + params.Logger.Fatal("extract fields are empty") + } } func (p *Plugin) Stop() {} @@ -102,36 +145,52 @@ func (p *Plugin) Do(event *pipeline.Event) pipeline.ActionResult { } p.decoder.ResetBytes(jsonNode.AsBytes()) - extract(event.Root, p.decoder, p.config.ExtractField_, 0, false) + extract(event.Root, p.decoder, p.extractFields.root.children, false) return pipeline.ActionPass } -// extract extracts field from decoder and adds it to the root. -// `skipAddField` flag is required for proper benchmarking. -func extract(root *insaneJSON.Root, d *jx.Decoder, field []string, depth int, skipAddField bool) { +// extract extracts fields from decoder and adds it to the root. +// +// [skipAddField] flag is required for proper benchmarking. +func extract(root *insaneJSON.Root, d *jx.Decoder, fields pathNodes, skipAddField bool) { objIter, err := d.ObjIter() if err != nil { return } + processed := len(fields) for objIter.Next() { - if bytes.Equal(objIter.Key(), pipeline.StringToByteUnsafe(field[depth])) { - if depth == len(field)-1 { // add field - if skipAddField { - _ = d.Skip() - } else { - addField(root, field[depth], d) - } - } else { // go deep - raw, err := d.Raw() - if err != nil { - break - } - d.ResetBytes(raw) - extract(root, d, field, depth+1, skipAddField) + // find the field at the current depth + n := fields.find(string(objIter.Key())) + if n == nil { + if err = d.Skip(); err != nil { + break } - break - } else if err = d.Skip(); err != nil { + continue + } + + if len(n.children) == 0 { // last field in path, add to root + if skipAddField { + _ = d.Skip() + } else { + addField(root, n.data, d) + } + } else { // go deep + // Capture calls f and then rolls back to state before call + _ = d.Capture(func(d *jx.Decoder) error { + // recursively extract child fields + extract(root, d, n.children, skipAddField) + return nil + }) + // skip the current field because we have processed it + // and rolled back the state of the decoder + if err = d.Skip(); err != nil { + break + } + } + + processed-- + if processed == 0 { break } } @@ -154,6 +213,7 @@ func addField(root *insaneJSON.Root, field string, d *jx.Decoder) { s, _ := d.StrBytes() root.AddFieldNoAlloc(root, field).MutateToBytesCopy(root, s) case jx.Null: + _ = d.Null() root.AddFieldNoAlloc(root, field).MutateToNull() case jx.Bool: b, _ := d.Bool() diff --git a/plugin/action/json_extract/json_extract_test.go b/plugin/action/json_extract/json_extract_test.go index f41d21bae..e0a130348 100644 --- a/plugin/action/json_extract/json_extract_test.go +++ b/plugin/action/json_extract/json_extract_test.go @@ -7,6 +7,7 @@ import ( "testing" "github.com/go-faster/jx" + "github.com/ozontech/file.d/cfg" "github.com/ozontech/file.d/pipeline" "github.com/ozontech/file.d/test" insaneJSON "github.com/ozontech/insane-json" @@ -18,88 +19,128 @@ func TestJsonExtract(t *testing.T) { name string config *Config in string - want string + want map[string]string }{ { - name: "extract_string", + name: "extract_single_old", config: &Config{ Field: "json_field", ExtractField: "extracted", }, - in: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":\"text\"}","field3":3}`, - want: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":\"text\"}","field3":3,"extracted":"text"}`, + in: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":\"text\"}","field3":3}`, + want: map[string]string{ + "extracted": "text", + }, }, { - name: "extract_int", + name: "extract_single_new", config: &Config{ - Field: "json_field", - ExtractField: "extracted", + Field: "json_field", + ExtractFields: []cfg.FieldSelector{ + "extracted", + }, + }, + in: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":\"text\"}","field3":3}`, + want: map[string]string{ + "extracted": "text", }, - in: `{"field1":"value1","json_field":"{\"extracted\":5,\"test\":\"test_value\"}","field3":3}`, - want: `{"field1":"value1","json_field":"{\"extracted\":5,\"test\":\"test_value\"}","field3":3,"extracted":5}`, }, { - name: "extract_float", + name: "extract_single_nested", config: &Config{ - Field: "json_field", - ExtractField: "extracted", + Field: "log.json_field", + ExtractFields: []cfg.FieldSelector{ + "extracted.extracted2", + }, + }, + in: `{"field1":"value1","log":{"json_field":"{\"test\":\"test_value\",\"extracted\":{\"extracted1\":\"text\",\"extracted2\":15}}","field3":3}}`, + want: map[string]string{ + "extracted2": "15", }, - in: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":95.6}","field3":3}`, - want: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":95.6}","field3":3,"extracted":95.6}`, }, { - name: "extract_bool", + name: "extract_multi", config: &Config{ - Field: "json_field", - ExtractField: "extracted", + Field: "json_field", + ExtractFields: []cfg.FieldSelector{ + "extracted_str", + "extracted_int", + "extracted_float", + "extracted_bool", + "extracted_null", + "extracted_obj", + "extracted_arr", + }, + }, + in: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted_str\":\"str\",\"extracted_int\":10,\"extracted_float\":123.45,\"extracted_bool\":false,\"extracted_null\":null,\"extracted_obj\":{\"ext1\":\"val1\",\"ext2\":25},\"extracted_arr\":[1,2,3,4,5]}","field3":3}`, + want: map[string]string{ + "extracted_str": "str", + "extracted_int": "10", + "extracted_float": "123.45", + "extracted_bool": "false", + "extracted_null": "null", + "extracted_obj": `{"ext1":"val1","ext2":25}`, + "extracted_arr": "[1,2,3,4,5]", }, - in: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":true}","field3":3}`, - want: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":true}","field3":3,"extracted":true}`, }, { - name: "extract_null", + name: "extract_multi_nested", config: &Config{ - Field: "json_field", - ExtractField: "extracted", + Field: "json_field", + ExtractFields: []cfg.FieldSelector{ + "ext1.ext2.ext3.ext4", + "ext1.ext5", + "ext6", + "ext1.ext2.ext7", + }, + }, + in: `{"field1":"value1","json_field":"{\"ext1\":{\"ext2\":{\"ext3\":{\"ext4\":\"test4\",\"ext5\":10},\"ext7\":\"test7\"},\"ext5\":\"test5\"},\"ext2\":2,\"ext6\":\"test6\"}","field3":3}`, + want: map[string]string{ + "ext4": "test4", + "ext5": "test5", + "ext6": "test6", + "ext7": "test7", }, - in: `{"field1":"value1","json_field":"{\"extracted\":null,\"test\":\"test_value\"}","field3":3}`, - want: `{"field1":"value1","json_field":"{\"extracted\":null,\"test\":\"test_value\"}","field3":3,"extracted":null}`, }, { - name: "extract_object", + name: "field_not_exists", config: &Config{ - Field: "json_field", - ExtractField: "extracted", + Field: "json_field", + ExtractFields: []cfg.FieldSelector{ + "extracted", + }, + }, + in: `{"field1":"value1","field3":3}`, + want: map[string]string{ + "extracted": "", }, - in: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":{\"ext1\":\"val1\",\"ext2\":25}}","field3":3}`, - want: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":{\"ext1\":\"val1\",\"ext2\":25}}","field3":3,"extracted":{"ext1":"val1","ext2":25}}`, }, { - name: "nested_fields", + name: "extracted_field_not_exists", config: &Config{ - Field: "log.json_field", - ExtractField: "extracted.extracted2", + Field: "json_field", + ExtractFields: []cfg.FieldSelector{ + "extracted", + }, + }, + in: `{"field1":"value1","json_field":"{\"test\":\"test_value\"}","field3":3}`, + want: map[string]string{ + "extracted": "", }, - in: `{"field1":"value1","log":{"json_field":"{\"test\":\"test_value\",\"extracted\":{\"extracted1\":\"text\",\"extracted2\":15}}","field3":3}}`, - want: `{"field1":"value1","log":{"json_field":"{\"test\":\"test_value\",\"extracted\":{\"extracted1\":\"text\",\"extracted2\":15}}","field3":3},"extracted2":15}`, }, { - name: "field_not_exists", + name: "extracted_field_duple", config: &Config{ Field: "json_field", ExtractField: "extracted", + ExtractFields: []cfg.FieldSelector{ + "extracted", + }, }, - in: `{"field1":"value1","field3":3}`, - want: `{"field1":"value1","field3":3}`, - }, - { - name: "extracted_field_not_exists", - config: &Config{ - Field: "test1", - ExtractField: "extracted", + in: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":\"text\"}","field3":3}`, + want: map[string]string{ + "extracted": "text", }, - in: `{"field1":"value1","json_field":"{\"test\":\"test_value\"}","field3":3}`, - want: `{"field1":"value1","json_field":"{\"test\":\"test_value\"}","field3":3}`, }, } for _, tt := range cases { @@ -114,7 +155,16 @@ func TestJsonExtract(t *testing.T) { wg.Add(1) output.SetOutFn(func(e *pipeline.Event) { - assert.Equal(t, tt.want, e.Root.EncodeToString(), "wrong event root") + for k, v := range tt.want { + node := e.Root.Dig(k) + got := "" + if node != nil && v != "" && (v[0] == '[' || v[0] == '{') { + got = node.EncodeToString() + } else { + got = node.AsString() + } + assert.Equal(t, v, got, "wrong event value with key %q", k) + } wg.Done() }) @@ -126,7 +176,7 @@ func TestJsonExtract(t *testing.T) { } } -func genFields(count int) string { +func genBenchFields(count int) string { var sb strings.Builder for i := 0; i < count; i++ { sb.WriteString(fmt.Sprintf(`"field_%d":"val_%d",`, i, i)) @@ -136,54 +186,116 @@ func genFields(count int) string { const extractBenchJsonFormat = `{%s"level":"info","ts":"2024-02-21T08:31:24.621Z","message":"some message","traceID":"123e456e789e0123","rule_name":"simple_trace"}` -var extractedField = []string{"level"} - var extractBenchCases = []struct { - json string + json []byte + extractFields []cfg.FieldSelector }{ { - json: fmt.Sprintf(extractBenchJsonFormat, genFields(0)), + json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(10))), + extractFields: []cfg.FieldSelector{"level"}, }, { - json: fmt.Sprintf(extractBenchJsonFormat, genFields(10)), + json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(100))), + extractFields: []cfg.FieldSelector{"level"}, }, { - json: fmt.Sprintf(extractBenchJsonFormat, genFields(100)), + json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(500))), + extractFields: []cfg.FieldSelector{"level"}, }, { - json: fmt.Sprintf(extractBenchJsonFormat, genFields(500)), + json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(1000))), + extractFields: []cfg.FieldSelector{"level"}, }, { - json: fmt.Sprintf(extractBenchJsonFormat, genFields(1000)), + json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(10000))), + extractFields: []cfg.FieldSelector{"level"}, + }, + { + json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(10))), + extractFields: []cfg.FieldSelector{ + "field3", + "field4", + "field5", + "field6", + "field7", + }, + }, + { + json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(100))), + extractFields: []cfg.FieldSelector{ + "field30", + "field40", + "field50", + "field60", + "field70", + }, + }, + { + json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(500))), + extractFields: []cfg.FieldSelector{ + "field1", + "field100", + "field200", + "field300", + "field400", + }, + }, + { + json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(1000))), + extractFields: []cfg.FieldSelector{ + "field300", + "field400", + "field500", + "field600", + "field700", + }, + }, + { + json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(10000))), + extractFields: []cfg.FieldSelector{ + "field3000", + "field4000", + "field5000", + "field6000", + "field7000", + }, }, } -func BenchmarkExtractObj(b *testing.B) { +func BenchmarkExtract(b *testing.B) { for _, benchCase := range extractBenchCases { - name := fmt.Sprintf("json_length_%d", len(benchCase.json)) + name := fmt.Sprintf("json_len-%d_ext_fields_count-%d", len(benchCase.json), len(benchCase.extractFields)) + extractFields := newPathTree() + for _, f := range benchCase.extractFields { + extractFields.add(cfg.ParseFieldSelector(string(f))) + } b.Run(name, func(b *testing.B) { - node := insaneJSON.Spawn() d := &jx.Decoder{} for i := 0; i < b.N; i++ { - d.ResetBytes(pipeline.StringToByteUnsafe(benchCase.json)) + d.ResetBytes(benchCase.json) // remove allocs for adding new fields to root by passing `skipAddField` flag for correct benching - extract(node, d, extractedField, 0, true) + extract(nil, d, extractFields.root.children, true) } - insaneJSON.Release(node) }) } } func BenchmarkInsaneDecodeDig(b *testing.B) { for _, benchCase := range extractBenchCases { - name := fmt.Sprintf("json_length_%d", len(benchCase.json)) + name := fmt.Sprintf("json_len-%d_ext_fields_count-%d", len(benchCase.json), len(benchCase.extractFields)) + extractFields := make([][]string, 0, len(benchCase.extractFields)) + for _, f := range benchCase.extractFields { + extractFields = append(extractFields, cfg.ParseFieldSelector(string(f))) + } b.Run(name, func(b *testing.B) { node := insaneJSON.Spawn() for i := 0; i < b.N; i++ { - _ = node.DecodeString(benchCase.json) - node.Dig(extractedField...) + _ = node.DecodeBytes(benchCase.json) + for _, f := range extractFields { + _ = node.Dig(f...) + } } insaneJSON.Release(node) }) diff --git a/plugin/action/json_extract/path_tree.go b/plugin/action/json_extract/path_tree.go new file mode 100644 index 000000000..13a606bc0 --- /dev/null +++ b/plugin/action/json_extract/path_tree.go @@ -0,0 +1,70 @@ +package json_extract + +type pathNode struct { + data string + children pathNodes +} + +type pathNodes []*pathNode + +func (pn pathNodes) find(data string) *pathNode { + for _, n := range pn { + if n.data == data { + return n + } + } + return nil +} + +// pathTree is multi linked list. +// +// For example, we have list of paths: +// +// - f1.f2.f3 +// - f1.f4 +// - f1.f2.f5 +// - f6 +// +// After add all specified paths to [pathTree], we will get the following: +// +// - root's children -> f1, f6 +// - f1's children -> f2, f4 +// - f2's children -> f3, f5 +type pathTree struct { + root *pathNode +} + +func newPathTree() *pathTree { + return &pathTree{ + root: &pathNode{ + children: make([]*pathNode, 0), + }, + } +} + +func (l *pathTree) add(path []string) { + cur := l.root + depth := 0 + for depth < len(path)-1 { + found := false + for _, c := range cur.children { + if c.data == path[depth] { + cur = c + depth++ + found = true + break + } + } + if !found { + break + } + } + for i := depth; i < len(path); i++ { + newNode := &pathNode{ + data: path[i], + children: make([]*pathNode, 0), + } + cur.children = append(cur.children, newNode) + cur = newNode + } +} diff --git a/plugin/action/json_extract/path_tree_test.go b/plugin/action/json_extract/path_tree_test.go new file mode 100644 index 000000000..058f1c070 --- /dev/null +++ b/plugin/action/json_extract/path_tree_test.go @@ -0,0 +1,58 @@ +package json_extract + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestPathTree(t *testing.T) { + paths := [][]string{ + {"f1", "f2", "f3", "f4"}, + {"f1", "f5"}, + {"f1", "f2", "f6"}, + {"f7"}, + } + want := &pathNode{ + children: []*pathNode{ + { + data: "f1", + children: []*pathNode{ + { + data: "f2", + children: []*pathNode{ + { + data: "f3", + children: []*pathNode{ + { + data: "f4", + children: []*pathNode{}, + }, + }, + }, + { + data: "f6", + children: []*pathNode{}, + }, + }, + }, + { + data: "f5", + children: []*pathNode{}, + }, + }, + }, + { + data: "f7", + children: []*pathNode{}, + }, + }, + } + + l := newPathTree() + for _, p := range paths { + l.add(p) + } + + require.Equal(t, want, l.root) +}