diff --git a/plugin/README.md b/plugin/README.md
index 6bc40b490..91eb9f3f5 100755
--- a/plugin/README.md
+++ b/plugin/README.md
@@ -392,7 +392,7 @@ It transforms `{"server":{"os":"linux","arch":"amd64"}}` into `{"server":"{\"os\
[More details...](plugin/action/json_encode/README.md)
## json_extract
-It extracts a field from JSON-encoded event field and adds extracted field to the event root.
+It extracts fields from JSON-encoded event field and adds extracted fields to the event root.
> If extracted field already exists in the event root, it will be overridden.
[More details...](plugin/action/json_extract/README.md)
diff --git a/plugin/action/README.md b/plugin/action/README.md
index 597068601..a21df64bd 100755
--- a/plugin/action/README.md
+++ b/plugin/action/README.md
@@ -235,7 +235,7 @@ It transforms `{"server":{"os":"linux","arch":"amd64"}}` into `{"server":"{\"os\
[More details...](plugin/action/json_encode/README.md)
## json_extract
-It extracts a field from JSON-encoded event field and adds extracted field to the event root.
+It extracts fields from JSON-encoded event field and adds extracted fields to the event root.
> If extracted field already exists in the event root, it will be overridden.
[More details...](plugin/action/json_extract/README.md)
diff --git a/plugin/action/json_extract/README.idoc.md b/plugin/action/json_extract/README.idoc.md
index 23fff9a10..0dde083b3 100644
--- a/plugin/action/json_extract/README.idoc.md
+++ b/plugin/action/json_extract/README.idoc.md
@@ -1,11 +1,11 @@
# JSON extract plugin
@introduction
-### Examples
+## Examples
@examples
-### Benchmarks
+## Benchmarks
@benchmarks
-### Config params
+## Config params
@config-params|description
diff --git a/plugin/action/json_extract/README.md b/plugin/action/json_extract/README.md
index 0de9fd060..99fc219fd 100755
--- a/plugin/action/json_extract/README.md
+++ b/plugin/action/json_extract/README.md
@@ -1,8 +1,8 @@
# JSON extract plugin
-It extracts a field from JSON-encoded event field and adds extracted field to the event root.
+It extracts fields from JSON-encoded event field and adds extracted fields to the event root.
> If extracted field already exists in the event root, it will be overridden.
-### Examples
+## Examples
```yaml
pipelines:
example_pipeline:
@@ -10,47 +10,75 @@ pipelines:
actions:
- type: json_extract
field: log
- extract_field: error.code
+ extract_fields:
+ - error.code
+ - level
+ - meta
+ - flags
...
```
The original event:
```json
{
- "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"service\":\"my-service\",\"error\":{\"code\":2,\"args\":[]}}",
+ "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"error\":{\"code\":2,\"args\":[]},\"meta\":{\"service\":\"my-service\",\"pod\":\"my-service-5c4dfcdcd4-4v5zw\"},\"flags\":[\"flag1\",\"flag2\"]}",
"time": "2024-03-01T10:49:28.263317941Z"
}
```
The resulting event:
```json
{
- "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"service\":\"my-service\",\"error\":{\"code\":2,\"args\":[]}}",
+ "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"error\":{\"code\":2,\"args\":[]},\"meta\":{\"service\":\"my-service\",\"pod\":\"my-service-5c4dfcdcd4-4v5zw\"},\"flags\":[\"flag1\",\"flag2\"]}",
"time": "2024-03-01T10:49:28.263317941Z",
- "code": 2
+ "code": 2,
+ "level": "error",
+ "meta": {
+ "service": "my-service",
+ "pod": "my-service-5c4dfcdcd4-4v5zw"
+ },
+ "flags": ["flag1", "flag2"]
}
```
-### Benchmarks
+## Benchmarks
Performance comparison of `json_extract` and `json_decode` plugins.
-`json_extract` on average 3 times faster than `json_decode`.
+`json_extract` on average 2.5 times faster than `json_decode` and
+doesn't allocate memory during the extract process.
+### Extract 1 field
| json (length) | json_extract (time ns) | json_decode (time ns) |
|---------------|------------------------|-----------------------|
-| 129 | 33 | 176 |
-| 309 | 264 | 520 |
-| 2109 | 2263 | 6778 |
-| 10909 | 11289 | 32205 |
-| 21909 | 23277 | 62819 |
+| 309 | 300 | 560 |
+| 2109 | 2570 | 7250 |
+| 10909 | 13550 | 34250 |
+| 21909 | 26000 | 67940 |
+| 237909 | 262500 | 741530 |
-### Config params
+### Extract 5 fields
+| json (length) | json_extract (time ns) | json_decode (time ns) |
+|---------------|------------------------|-----------------------|
+| 309 | 450 | 685 |
+| 2109 | 2990 | 7410 |
+| 10909 | 14540 | 35000 |
+| 21909 | 28340 | 69950 |
+| 237909 | 286600 | 741600 |
+
+## Config params
**`field`** *`cfg.FieldSelector`* *`required`*
The event field from which to extract. Must be a string.
-**`extract_field`** *`cfg.FieldSelector`* *`required`*
+**`extract_field`** *`cfg.FieldSelector`*
Field to extract.
+> ⚠ DEPRECATED. Use `extract_fields` instead.
+
+
+
+**`extract_fields`** *`[]cfg.FieldSelector`*
+
+Fields to extract.
diff --git a/plugin/action/json_extract/json_extract.go b/plugin/action/json_extract/json_extract.go
index afebf5fc9..ed11e0c09 100644
--- a/plugin/action/json_extract/json_extract.go
+++ b/plugin/action/json_extract/json_extract.go
@@ -1,8 +1,6 @@
package json_extract
import (
- "bytes"
-
"github.com/go-faster/jx"
"github.com/ozontech/file.d/cfg"
"github.com/ozontech/file.d/fd"
@@ -11,7 +9,7 @@ import (
)
/*{ introduction
-It extracts a field from JSON-encoded event field and adds extracted field to the event root.
+It extracts fields from JSON-encoded event field and adds extracted fields to the event root.
> If extracted field already exists in the event root, it will be overridden.
}*/
@@ -23,42 +21,65 @@ pipelines:
actions:
- type: json_extract
field: log
- extract_field: error.code
+ extract_fields:
+ - error.code
+ - level
+ - meta
+ - flags
...
```
The original event:
```json
{
- "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"service\":\"my-service\",\"error\":{\"code\":2,\"args\":[]}}",
+ "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"error\":{\"code\":2,\"args\":[]},\"meta\":{\"service\":\"my-service\",\"pod\":\"my-service-5c4dfcdcd4-4v5zw\"},\"flags\":[\"flag1\",\"flag2\"]}",
"time": "2024-03-01T10:49:28.263317941Z"
}
```
The resulting event:
```json
{
- "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"service\":\"my-service\",\"error\":{\"code\":2,\"args\":[]}}",
+ "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"error\":{\"code\":2,\"args\":[]},\"meta\":{\"service\":\"my-service\",\"pod\":\"my-service-5c4dfcdcd4-4v5zw\"},\"flags\":[\"flag1\",\"flag2\"]}",
"time": "2024-03-01T10:49:28.263317941Z",
- "code": 2
+ "code": 2,
+ "level": "error",
+ "meta": {
+ "service": "my-service",
+ "pod": "my-service-5c4dfcdcd4-4v5zw"
+ },
+ "flags": ["flag1", "flag2"]
}
```
}*/
/*{ benchmarks
Performance comparison of `json_extract` and `json_decode` plugins.
-`json_extract` on average 3 times faster than `json_decode`.
+`json_extract` on average 2.5 times faster than `json_decode` and
+doesn't allocate memory during the extract process.
+
+### Extract 1 field
+| json (length) | json_extract (time ns) | json_decode (time ns) |
+|---------------|------------------------|-----------------------|
+| 309 | 300 | 560 |
+| 2109 | 2570 | 7250 |
+| 10909 | 13550 | 34250 |
+| 21909 | 26000 | 67940 |
+| 237909 | 262500 | 741530 |
+### Extract 5 fields
| json (length) | json_extract (time ns) | json_decode (time ns) |
|---------------|------------------------|-----------------------|
-| 129 | 33 | 176 |
-| 309 | 264 | 520 |
-| 2109 | 2263 | 6778 |
-| 10909 | 11289 | 32205 |
-| 21909 | 23277 | 62819 |
+| 309 | 450 | 685 |
+| 2109 | 2990 | 7410 |
+| 10909 | 14540 | 35000 |
+| 21909 | 28340 | 69950 |
+| 237909 | 286600 | 741600 |
}*/
type Plugin struct {
- config *Config
- decoder *jx.Decoder
+ config *Config
+
+ extractFields *pathTree
+ decoder *jx.Decoder
}
// ! config-params
@@ -73,8 +94,14 @@ type Config struct {
// > @3@4@5@6
// >
// > Field to extract.
- ExtractField cfg.FieldSelector `json:"extract_field" parse:"selector" required:"true"` // *
+ // >> ⚠ DEPRECATED. Use `extract_fields` instead.
+ ExtractField cfg.FieldSelector `json:"extract_field" parse:"selector"` // *
ExtractField_ []string
+
+ // > @3@4@5@6
+ // >
+ // > Fields to extract.
+ ExtractFields []cfg.FieldSelector `json:"extract_fields" slice:"true"` // *
}
func init() {
@@ -88,9 +115,25 @@ func factory() (pipeline.AnyPlugin, pipeline.AnyConfig) {
return &Plugin{}, &Config{}
}
-func (p *Plugin) Start(config pipeline.AnyConfig, _ *pipeline.ActionPluginParams) {
+func (p *Plugin) Start(config pipeline.AnyConfig, params *pipeline.ActionPluginParams) {
p.config = config.(*Config)
p.decoder = &jx.Decoder{}
+
+ p.extractFields = newPathTree()
+ dupl := false
+ for _, f := range p.config.ExtractFields {
+ if f == p.config.ExtractField {
+ dupl = true
+ }
+ p.extractFields.add(cfg.ParseFieldSelector(string(f)))
+ }
+ if !dupl {
+ p.extractFields.add(p.config.ExtractField_)
+ }
+
+ if len(p.extractFields.root.children) == 0 {
+ params.Logger.Fatal("extract fields are empty")
+ }
}
func (p *Plugin) Stop() {}
@@ -102,36 +145,52 @@ func (p *Plugin) Do(event *pipeline.Event) pipeline.ActionResult {
}
p.decoder.ResetBytes(jsonNode.AsBytes())
- extract(event.Root, p.decoder, p.config.ExtractField_, 0, false)
+ extract(event.Root, p.decoder, p.extractFields.root.children, false)
return pipeline.ActionPass
}
-// extract extracts field from decoder and adds it to the root.
-// `skipAddField` flag is required for proper benchmarking.
-func extract(root *insaneJSON.Root, d *jx.Decoder, field []string, depth int, skipAddField bool) {
+// extract extracts fields from decoder and adds it to the root.
+//
+// [skipAddField] flag is required for proper benchmarking.
+func extract(root *insaneJSON.Root, d *jx.Decoder, fields pathNodes, skipAddField bool) {
objIter, err := d.ObjIter()
if err != nil {
return
}
+ processed := len(fields)
for objIter.Next() {
- if bytes.Equal(objIter.Key(), pipeline.StringToByteUnsafe(field[depth])) {
- if depth == len(field)-1 { // add field
- if skipAddField {
- _ = d.Skip()
- } else {
- addField(root, field[depth], d)
- }
- } else { // go deep
- raw, err := d.Raw()
- if err != nil {
- break
- }
- d.ResetBytes(raw)
- extract(root, d, field, depth+1, skipAddField)
+ // find the field at the current depth
+ n := fields.find(string(objIter.Key()))
+ if n == nil {
+ if err = d.Skip(); err != nil {
+ break
}
- break
- } else if err = d.Skip(); err != nil {
+ continue
+ }
+
+ if len(n.children) == 0 { // last field in path, add to root
+ if skipAddField {
+ _ = d.Skip()
+ } else {
+ addField(root, n.data, d)
+ }
+ } else { // go deep
+ // Capture calls f and then rolls back to state before call
+ _ = d.Capture(func(d *jx.Decoder) error {
+ // recursively extract child fields
+ extract(root, d, n.children, skipAddField)
+ return nil
+ })
+ // skip the current field because we have processed it
+ // and rolled back the state of the decoder
+ if err = d.Skip(); err != nil {
+ break
+ }
+ }
+
+ processed--
+ if processed == 0 {
break
}
}
@@ -154,6 +213,7 @@ func addField(root *insaneJSON.Root, field string, d *jx.Decoder) {
s, _ := d.StrBytes()
root.AddFieldNoAlloc(root, field).MutateToBytesCopy(root, s)
case jx.Null:
+ _ = d.Null()
root.AddFieldNoAlloc(root, field).MutateToNull()
case jx.Bool:
b, _ := d.Bool()
diff --git a/plugin/action/json_extract/json_extract_test.go b/plugin/action/json_extract/json_extract_test.go
index f41d21bae..e0a130348 100644
--- a/plugin/action/json_extract/json_extract_test.go
+++ b/plugin/action/json_extract/json_extract_test.go
@@ -7,6 +7,7 @@ import (
"testing"
"github.com/go-faster/jx"
+ "github.com/ozontech/file.d/cfg"
"github.com/ozontech/file.d/pipeline"
"github.com/ozontech/file.d/test"
insaneJSON "github.com/ozontech/insane-json"
@@ -18,88 +19,128 @@ func TestJsonExtract(t *testing.T) {
name string
config *Config
in string
- want string
+ want map[string]string
}{
{
- name: "extract_string",
+ name: "extract_single_old",
config: &Config{
Field: "json_field",
ExtractField: "extracted",
},
- in: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":\"text\"}","field3":3}`,
- want: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":\"text\"}","field3":3,"extracted":"text"}`,
+ in: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":\"text\"}","field3":3}`,
+ want: map[string]string{
+ "extracted": "text",
+ },
},
{
- name: "extract_int",
+ name: "extract_single_new",
config: &Config{
- Field: "json_field",
- ExtractField: "extracted",
+ Field: "json_field",
+ ExtractFields: []cfg.FieldSelector{
+ "extracted",
+ },
+ },
+ in: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":\"text\"}","field3":3}`,
+ want: map[string]string{
+ "extracted": "text",
},
- in: `{"field1":"value1","json_field":"{\"extracted\":5,\"test\":\"test_value\"}","field3":3}`,
- want: `{"field1":"value1","json_field":"{\"extracted\":5,\"test\":\"test_value\"}","field3":3,"extracted":5}`,
},
{
- name: "extract_float",
+ name: "extract_single_nested",
config: &Config{
- Field: "json_field",
- ExtractField: "extracted",
+ Field: "log.json_field",
+ ExtractFields: []cfg.FieldSelector{
+ "extracted.extracted2",
+ },
+ },
+ in: `{"field1":"value1","log":{"json_field":"{\"test\":\"test_value\",\"extracted\":{\"extracted1\":\"text\",\"extracted2\":15}}","field3":3}}`,
+ want: map[string]string{
+ "extracted2": "15",
},
- in: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":95.6}","field3":3}`,
- want: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":95.6}","field3":3,"extracted":95.6}`,
},
{
- name: "extract_bool",
+ name: "extract_multi",
config: &Config{
- Field: "json_field",
- ExtractField: "extracted",
+ Field: "json_field",
+ ExtractFields: []cfg.FieldSelector{
+ "extracted_str",
+ "extracted_int",
+ "extracted_float",
+ "extracted_bool",
+ "extracted_null",
+ "extracted_obj",
+ "extracted_arr",
+ },
+ },
+ in: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted_str\":\"str\",\"extracted_int\":10,\"extracted_float\":123.45,\"extracted_bool\":false,\"extracted_null\":null,\"extracted_obj\":{\"ext1\":\"val1\",\"ext2\":25},\"extracted_arr\":[1,2,3,4,5]}","field3":3}`,
+ want: map[string]string{
+ "extracted_str": "str",
+ "extracted_int": "10",
+ "extracted_float": "123.45",
+ "extracted_bool": "false",
+ "extracted_null": "null",
+ "extracted_obj": `{"ext1":"val1","ext2":25}`,
+ "extracted_arr": "[1,2,3,4,5]",
},
- in: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":true}","field3":3}`,
- want: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":true}","field3":3,"extracted":true}`,
},
{
- name: "extract_null",
+ name: "extract_multi_nested",
config: &Config{
- Field: "json_field",
- ExtractField: "extracted",
+ Field: "json_field",
+ ExtractFields: []cfg.FieldSelector{
+ "ext1.ext2.ext3.ext4",
+ "ext1.ext5",
+ "ext6",
+ "ext1.ext2.ext7",
+ },
+ },
+ in: `{"field1":"value1","json_field":"{\"ext1\":{\"ext2\":{\"ext3\":{\"ext4\":\"test4\",\"ext5\":10},\"ext7\":\"test7\"},\"ext5\":\"test5\"},\"ext2\":2,\"ext6\":\"test6\"}","field3":3}`,
+ want: map[string]string{
+ "ext4": "test4",
+ "ext5": "test5",
+ "ext6": "test6",
+ "ext7": "test7",
},
- in: `{"field1":"value1","json_field":"{\"extracted\":null,\"test\":\"test_value\"}","field3":3}`,
- want: `{"field1":"value1","json_field":"{\"extracted\":null,\"test\":\"test_value\"}","field3":3,"extracted":null}`,
},
{
- name: "extract_object",
+ name: "field_not_exists",
config: &Config{
- Field: "json_field",
- ExtractField: "extracted",
+ Field: "json_field",
+ ExtractFields: []cfg.FieldSelector{
+ "extracted",
+ },
+ },
+ in: `{"field1":"value1","field3":3}`,
+ want: map[string]string{
+ "extracted": "",
},
- in: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":{\"ext1\":\"val1\",\"ext2\":25}}","field3":3}`,
- want: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":{\"ext1\":\"val1\",\"ext2\":25}}","field3":3,"extracted":{"ext1":"val1","ext2":25}}`,
},
{
- name: "nested_fields",
+ name: "extracted_field_not_exists",
config: &Config{
- Field: "log.json_field",
- ExtractField: "extracted.extracted2",
+ Field: "json_field",
+ ExtractFields: []cfg.FieldSelector{
+ "extracted",
+ },
+ },
+ in: `{"field1":"value1","json_field":"{\"test\":\"test_value\"}","field3":3}`,
+ want: map[string]string{
+ "extracted": "",
},
- in: `{"field1":"value1","log":{"json_field":"{\"test\":\"test_value\",\"extracted\":{\"extracted1\":\"text\",\"extracted2\":15}}","field3":3}}`,
- want: `{"field1":"value1","log":{"json_field":"{\"test\":\"test_value\",\"extracted\":{\"extracted1\":\"text\",\"extracted2\":15}}","field3":3},"extracted2":15}`,
},
{
- name: "field_not_exists",
+ name: "extracted_field_duple",
config: &Config{
Field: "json_field",
ExtractField: "extracted",
+ ExtractFields: []cfg.FieldSelector{
+ "extracted",
+ },
},
- in: `{"field1":"value1","field3":3}`,
- want: `{"field1":"value1","field3":3}`,
- },
- {
- name: "extracted_field_not_exists",
- config: &Config{
- Field: "test1",
- ExtractField: "extracted",
+ in: `{"field1":"value1","json_field":"{\"test\":\"test_value\",\"extracted\":\"text\"}","field3":3}`,
+ want: map[string]string{
+ "extracted": "text",
},
- in: `{"field1":"value1","json_field":"{\"test\":\"test_value\"}","field3":3}`,
- want: `{"field1":"value1","json_field":"{\"test\":\"test_value\"}","field3":3}`,
},
}
for _, tt := range cases {
@@ -114,7 +155,16 @@ func TestJsonExtract(t *testing.T) {
wg.Add(1)
output.SetOutFn(func(e *pipeline.Event) {
- assert.Equal(t, tt.want, e.Root.EncodeToString(), "wrong event root")
+ for k, v := range tt.want {
+ node := e.Root.Dig(k)
+ got := ""
+ if node != nil && v != "" && (v[0] == '[' || v[0] == '{') {
+ got = node.EncodeToString()
+ } else {
+ got = node.AsString()
+ }
+ assert.Equal(t, v, got, "wrong event value with key %q", k)
+ }
wg.Done()
})
@@ -126,7 +176,7 @@ func TestJsonExtract(t *testing.T) {
}
}
-func genFields(count int) string {
+func genBenchFields(count int) string {
var sb strings.Builder
for i := 0; i < count; i++ {
sb.WriteString(fmt.Sprintf(`"field_%d":"val_%d",`, i, i))
@@ -136,54 +186,116 @@ func genFields(count int) string {
const extractBenchJsonFormat = `{%s"level":"info","ts":"2024-02-21T08:31:24.621Z","message":"some message","traceID":"123e456e789e0123","rule_name":"simple_trace"}`
-var extractedField = []string{"level"}
-
var extractBenchCases = []struct {
- json string
+ json []byte
+ extractFields []cfg.FieldSelector
}{
{
- json: fmt.Sprintf(extractBenchJsonFormat, genFields(0)),
+ json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(10))),
+ extractFields: []cfg.FieldSelector{"level"},
},
{
- json: fmt.Sprintf(extractBenchJsonFormat, genFields(10)),
+ json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(100))),
+ extractFields: []cfg.FieldSelector{"level"},
},
{
- json: fmt.Sprintf(extractBenchJsonFormat, genFields(100)),
+ json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(500))),
+ extractFields: []cfg.FieldSelector{"level"},
},
{
- json: fmt.Sprintf(extractBenchJsonFormat, genFields(500)),
+ json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(1000))),
+ extractFields: []cfg.FieldSelector{"level"},
},
{
- json: fmt.Sprintf(extractBenchJsonFormat, genFields(1000)),
+ json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(10000))),
+ extractFields: []cfg.FieldSelector{"level"},
+ },
+ {
+ json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(10))),
+ extractFields: []cfg.FieldSelector{
+ "field3",
+ "field4",
+ "field5",
+ "field6",
+ "field7",
+ },
+ },
+ {
+ json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(100))),
+ extractFields: []cfg.FieldSelector{
+ "field30",
+ "field40",
+ "field50",
+ "field60",
+ "field70",
+ },
+ },
+ {
+ json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(500))),
+ extractFields: []cfg.FieldSelector{
+ "field1",
+ "field100",
+ "field200",
+ "field300",
+ "field400",
+ },
+ },
+ {
+ json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(1000))),
+ extractFields: []cfg.FieldSelector{
+ "field300",
+ "field400",
+ "field500",
+ "field600",
+ "field700",
+ },
+ },
+ {
+ json: []byte(fmt.Sprintf(extractBenchJsonFormat, genBenchFields(10000))),
+ extractFields: []cfg.FieldSelector{
+ "field3000",
+ "field4000",
+ "field5000",
+ "field6000",
+ "field7000",
+ },
},
}
-func BenchmarkExtractObj(b *testing.B) {
+func BenchmarkExtract(b *testing.B) {
for _, benchCase := range extractBenchCases {
- name := fmt.Sprintf("json_length_%d", len(benchCase.json))
+ name := fmt.Sprintf("json_len-%d_ext_fields_count-%d", len(benchCase.json), len(benchCase.extractFields))
+ extractFields := newPathTree()
+ for _, f := range benchCase.extractFields {
+ extractFields.add(cfg.ParseFieldSelector(string(f)))
+ }
b.Run(name, func(b *testing.B) {
- node := insaneJSON.Spawn()
d := &jx.Decoder{}
for i := 0; i < b.N; i++ {
- d.ResetBytes(pipeline.StringToByteUnsafe(benchCase.json))
+ d.ResetBytes(benchCase.json)
// remove allocs for adding new fields to root by passing `skipAddField` flag for correct benching
- extract(node, d, extractedField, 0, true)
+ extract(nil, d, extractFields.root.children, true)
}
- insaneJSON.Release(node)
})
}
}
func BenchmarkInsaneDecodeDig(b *testing.B) {
for _, benchCase := range extractBenchCases {
- name := fmt.Sprintf("json_length_%d", len(benchCase.json))
+ name := fmt.Sprintf("json_len-%d_ext_fields_count-%d", len(benchCase.json), len(benchCase.extractFields))
+ extractFields := make([][]string, 0, len(benchCase.extractFields))
+ for _, f := range benchCase.extractFields {
+ extractFields = append(extractFields, cfg.ParseFieldSelector(string(f)))
+ }
b.Run(name, func(b *testing.B) {
node := insaneJSON.Spawn()
for i := 0; i < b.N; i++ {
- _ = node.DecodeString(benchCase.json)
- node.Dig(extractedField...)
+ _ = node.DecodeBytes(benchCase.json)
+ for _, f := range extractFields {
+ _ = node.Dig(f...)
+ }
}
insaneJSON.Release(node)
})
diff --git a/plugin/action/json_extract/path_tree.go b/plugin/action/json_extract/path_tree.go
new file mode 100644
index 000000000..13a606bc0
--- /dev/null
+++ b/plugin/action/json_extract/path_tree.go
@@ -0,0 +1,70 @@
+package json_extract
+
+type pathNode struct {
+ data string
+ children pathNodes
+}
+
+type pathNodes []*pathNode
+
+func (pn pathNodes) find(data string) *pathNode {
+ for _, n := range pn {
+ if n.data == data {
+ return n
+ }
+ }
+ return nil
+}
+
+// pathTree is multi linked list.
+//
+// For example, we have list of paths:
+//
+// - f1.f2.f3
+// - f1.f4
+// - f1.f2.f5
+// - f6
+//
+// After add all specified paths to [pathTree], we will get the following:
+//
+// - root's children -> f1, f6
+// - f1's children -> f2, f4
+// - f2's children -> f3, f5
+type pathTree struct {
+ root *pathNode
+}
+
+func newPathTree() *pathTree {
+ return &pathTree{
+ root: &pathNode{
+ children: make([]*pathNode, 0),
+ },
+ }
+}
+
+func (l *pathTree) add(path []string) {
+ cur := l.root
+ depth := 0
+ for depth < len(path)-1 {
+ found := false
+ for _, c := range cur.children {
+ if c.data == path[depth] {
+ cur = c
+ depth++
+ found = true
+ break
+ }
+ }
+ if !found {
+ break
+ }
+ }
+ for i := depth; i < len(path); i++ {
+ newNode := &pathNode{
+ data: path[i],
+ children: make([]*pathNode, 0),
+ }
+ cur.children = append(cur.children, newNode)
+ cur = newNode
+ }
+}
diff --git a/plugin/action/json_extract/path_tree_test.go b/plugin/action/json_extract/path_tree_test.go
new file mode 100644
index 000000000..058f1c070
--- /dev/null
+++ b/plugin/action/json_extract/path_tree_test.go
@@ -0,0 +1,58 @@
+package json_extract
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestPathTree(t *testing.T) {
+ paths := [][]string{
+ {"f1", "f2", "f3", "f4"},
+ {"f1", "f5"},
+ {"f1", "f2", "f6"},
+ {"f7"},
+ }
+ want := &pathNode{
+ children: []*pathNode{
+ {
+ data: "f1",
+ children: []*pathNode{
+ {
+ data: "f2",
+ children: []*pathNode{
+ {
+ data: "f3",
+ children: []*pathNode{
+ {
+ data: "f4",
+ children: []*pathNode{},
+ },
+ },
+ },
+ {
+ data: "f6",
+ children: []*pathNode{},
+ },
+ },
+ },
+ {
+ data: "f5",
+ children: []*pathNode{},
+ },
+ },
+ },
+ {
+ data: "f7",
+ children: []*pathNode{},
+ },
+ },
+ }
+
+ l := newPathTree()
+ for _, p := range paths {
+ l.add(p)
+ }
+
+ require.Equal(t, want, l.root)
+}