Skip to content

Commit

Permalink
Fix operation tracing when encoder is included (#261)
Browse files Browse the repository at this point in the history
Signed-off-by: Pradithya Aria <[email protected]>
  • Loading branch information
aria authored Jun 17, 2022
1 parent da829b9 commit b5d7288
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 1 deletion.
28 changes: 28 additions & 0 deletions api/pkg/transformer/executor/transformer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,34 @@ func TestStandardTransformer_Execute(t *testing.T) {
},
wantResponseByte: []byte(`{"response":{"status":"ok"},"operation_tracing":{"preprocess":[{"input":null,"output":{"customer_id":1111},"spec":{"name":"customer_id","jsonPath":"$.customer.id"},"operation_type":"variable_op"},{"input":null,"output":{"zero":0},"spec":{"name":"zero","literal":{"intValue":"0"}},"operation_type":"variable_op"},{"input":null,"output":{"driver_table":[{"acceptance_rate":0.8,"id":1,"name":"driver-1","rating":4},{"acceptance_rate":0.6,"id":2,"name":"driver-2","rating":3},{"acceptance_rate":0.77,"id":3,"name":"driver-3","rating":3.5},{"acceptance_rate":0.9,"id":4,"name":"driver-4","rating":2.5},{"acceptance_rate":0.88,"id":4,"name":"driver-4","rating":2.5}]},"spec":{"name":"driver_table","baseTable":{"fromJson":{"jsonPath":"$.drivers[*]"}}},"operation_type":"create_table_op"},{"input":{"driver_table":[{"acceptance_rate":0.8,"id":1,"name":"driver-1","rating":4},{"acceptance_rate":0.6,"id":2,"name":"driver-2","rating":3},{"acceptance_rate":0.77,"id":3,"name":"driver-3","rating":3.5},{"acceptance_rate":0.9,"id":4,"name":"driver-4","rating":2.5},{"acceptance_rate":0.88,"id":4,"name":"driver-4","rating":2.5}]},"output":{"transformed_driver_table":[{"acceptance_rate":0.8,"customer_id":1111,"distance_contains_zero":true,"distance_in_km":0,"distance_in_m":0,"distance_is_not_far_away":true,"distance_is_valid":true,"driver_id":1,"driver_performa":6,"name":"driver-1","rating":4},{"acceptance_rate":0.77,"customer_id":1111,"distance_contains_zero":true,"distance_in_km":0.729,"distance_in_m":729,"distance_is_not_far_away":true,"distance_is_valid":true,"driver_id":3,"driver_performa":3.5,"name":"driver-3","rating":3.5}]},"spec":{"inputTable":"driver_table","outputTable":"transformed_driver_table","steps":[{"updateColumns":[{"column":"customer_id","expression":"customer_id"},{"column":"distance_in_km","expression":"map(JsonExtract(\"$.details\", \"$.points[*].distanceInMeter\"), {# * 0.001})"},{"column":"distance_in_m","expression":"filter(JsonExtract(\"$.details\", \"$.points[*].distanceInMeter\"), {# \u003e= 0})"},{"column":"distance_is_valid","expression":"all(JsonExtract(\"$.details\", \"$.points[*].distanceInMeter\"), {# \u003e= 0})"},{"column":"distance_is_not_far_away","expression":"none(JsonExtract(\"$.details\", \"$.points[*].distanceInMeter\"), {# * 0.001 \u003e 10})"},{"column":"distance_contains_zero","expression":"any(JsonExtract(\"$.details\", \"$.points[*].distanceInMeter\"), {# == 0.0})"},{"column":"driver_performa","conditions":[{"rowSelector":"driver_table.Col(\"rating\") * 2 \u003c= 7","expression":"driver_table.Col(\"rating\") * 1"},{"rowSelector":"driver_table.Col(\"rating\") * 2 \u003e= 8","expression":"driver_table.Col(\"rating\") * 1.5"},{"default":{"expression":"zero"}}]}]},{"filterRow":{"condition":"driver_table.Col(\"acceptance_rate\") \u003e 0.7"}},{"sliceRow":{"start":0,"end":2}},{"renameColumns":{"id":"driver_id"}}]},"operation_type":"table_transform_op"},{"input":null,"output":{"max_performa":6},"spec":{"name":"max_performa","expression":"transformed_driver_table.Col('driver_performa').Max()"},"operation_type":"variable_op"},{"input":null,"output":{"instances":{"columns":["acceptance_rate","driver_id","name","rating","customer_id","distance_contains_zero","distance_in_km","distance_in_m","distance_is_not_far_away","distance_is_valid","driver_performa"],"data":[[0.8,1,"driver-1",4,1111,true,0,0,true,true,6],[0.77,3,"driver-3",3.5,1111,true,0.729,729,true,true,3.5]]},"max_performa":6},"spec":{"jsonTemplate":{"fields":[{"fieldName":"instances","fromTable":{"tableName":"transformed_driver_table","format":"SPLIT"}},{"fieldName":"max_performa","expression":"max_performa"}]}},"operation_type":"json_output_op"}],"postprocess":[]}}`),
},
{
desc: "transformation with enoder",
specYamlPath: "../pipeline/testdata/valid_encoder.yaml",
executorCfg: transformerExecutorConfig{
traceEnabled: true,
logger: logger,
},
modelPredictor: NewMockModelPredictor(types.JSONObject{"status": "ok"}, map[string]string{"Content-Type": "application/json"}),
requestPayload: []byte(`{"drivers":[{"id":1,"name":"driver-1","rating":4,"acceptance_rate":0.8,"vehicle":"mpv","previous_vehicle":"suv"},{"id":2,"name":"driver-2","rating":3,"acceptance_rate":0.6,"vehicle": "mpv","previous_vehicle":"suv"},{"id":3,"name":"driver-3","rating":3.5,"acceptance_rate":0.77,"vehicle": "mpv","previous_vehicle":"suv"},{"id":4,"name":"driver-4","rating":2.5,"acceptance_rate":0.9,"vehicle": "mpv","previous_vehicle":"suv"},{"id":4,"name":"driver-4","rating":2.5,"acceptance_rate":0.88,"vehicle": "mpv","previous_vehicle":"suv"}],"customer":{"id":1111},"details":"{\"points\": [{\"distanceInMeter\": 0.0}, {\"distanceInMeter\": 8976.0}, {\"distanceInMeter\": 729.0}, {\"distanceInMeter\": 8573.0}, {\"distanceInMeter\": 9000.0}]}"}`),
requestHeaders: map[string]string{
"Content-Type": "application/json",
},
wantResponseByte: []byte(`{"response":{"status":"ok"},"operation_tracing":{"preprocess":[{"input":null,"output":{"customer_id":1111},"spec":{"name":"customer_id","jsonPathConfig":{"jsonPath":"$.customer.id"}},"operation_type":"variable_op"},{"input":null,"output":{"driver_table":[{"acceptance_rate":0.8,"id":1,"name":"driver-1","previous_vehicle":"suv","rating":4,"row_number":0,"vehicle":"mpv"},{"acceptance_rate":0.6,"id":2,"name":"driver-2","previous_vehicle":"suv","rating":3,"row_number":1,"vehicle":"mpv"},{"acceptance_rate":0.77,"id":3,"name":"driver-3","previous_vehicle":"suv","rating":3.5,"row_number":2,"vehicle":"mpv"},{"acceptance_rate":0.9,"id":4,"name":"driver-4","previous_vehicle":"suv","rating":2.5,"row_number":3,"vehicle":"mpv"},{"acceptance_rate":0.88,"id":4,"name":"driver-4","previous_vehicle":"suv","rating":2.5,"row_number":4,"vehicle":"mpv"}]},"spec":{"name":"driver_table","baseTable":{"fromJson":{"jsonPath":"$.drivers[*]","addRowNumber":true}}},"operation_type":"create_table_op"},{"input":null,"output":{"vehicle_mapping":"The result of this operation is on the transformer step that use this encoder"},"spec":{"name":"vehicle_mapping","ordinalEncoderConfig":{"defaultValue":"0","targetValueType":"INT","mapping":{"mpv":"3","sedan":"2","suv":"1"}}},"operation_type":"json_output_op"},{"input":{"driver_table":[{"acceptance_rate":0.8,"id":1,"name":"driver-1","previous_vehicle":"suv","rating":4,"row_number":0,"vehicle":"mpv"},{"acceptance_rate":0.6,"id":2,"name":"driver-2","previous_vehicle":"suv","rating":3,"row_number":1,"vehicle":"mpv"},{"acceptance_rate":0.77,"id":3,"name":"driver-3","previous_vehicle":"suv","rating":3.5,"row_number":2,"vehicle":"mpv"},{"acceptance_rate":0.9,"id":4,"name":"driver-4","previous_vehicle":"suv","rating":2.5,"row_number":3,"vehicle":"mpv"},{"acceptance_rate":0.88,"id":4,"name":"driver-4","previous_vehicle":"suv","rating":2.5,"row_number":4,"vehicle":"mpv"}]},"output":{"transformed_driver_table":[{"customer_id":1111,"name":"driver-4","previous_vehicle":1,"rank":17.5,"rating":0.375,"vehicle":3},{"customer_id":1111,"name":"driver-4","previous_vehicle":1,"rank":12.5,"rating":0.375,"vehicle":3},{"customer_id":1111,"name":"driver-3","previous_vehicle":1,"rank":7.5,"rating":0.625,"vehicle":3},{"customer_id":1111,"name":"driver-2","previous_vehicle":1,"rank":2.5,"rating":0.5,"vehicle":3},{"customer_id":1111,"name":"driver-1","previous_vehicle":1,"rank":-2.5,"rating":0.75,"vehicle":3}]},"spec":{"inputTable":"driver_table","outputTable":"transformed_driver_table","steps":[{"dropColumns":["id"]},{"sort":[{"column":"row_number","order":"DESC"}]},{"renameColumns":{"row_number":"rank"}},{"updateColumns":[{"column":"customer_id","expression":"customer_id"}]},{"scaleColumns":[{"column":"rank","standardScalerConfig":{"mean":0.5,"std":0.2}}]},{"scaleColumns":[{"column":"rating","minMaxScalerConfig":{"min":1,"max":5}}]},{"encodeColumns":[{"columns":["vehicle","previous_vehicle"],"encoder":"vehicle_mapping"}]},{"selectColumns":["customer_id","name","rank","rating","vehicle","previous_vehicle"]}]},"operation_type":"table_transform_op"},{"input":null,"output":{"instances":{"columns":["customer_id","name","rank","rating","vehicle","previous_vehicle"],"data":[[1111,"driver-4",17.5,0.375,3,1],[1111,"driver-4",12.5,0.375,3,1],[1111,"driver-3",7.5,0.625,3,1],[1111,"driver-2",2.5,0.5,3,1],[1111,"driver-1",-2.5,0.75,3,1]]}},"spec":{"jsonTemplate":{"fields":[{"fieldName":"instances","fromTable":{"tableName":"transformed_driver_table","format":"SPLIT"}}]}},"operation_type":"json_output_op"}],"postprocess":[]}}`),
},
{
desc: "transformation with table join",
specYamlPath: "../pipeline/testdata/valid_table_join.yaml",
executorCfg: transformerExecutorConfig{
traceEnabled: true,
logger: logger,
},
modelPredictor: NewMockModelPredictor(types.JSONObject{"status": "ok"}, map[string]string{"Content-Type": "application/json"}),
requestPayload: []byte(`{"drivers":[{"id":1,"name":"driver-1"},{"id":2,"name":"driver-2"},{"id":3,"name":"driver-3"},{"id":4,"name":"driver-4"},{"id":4,"name":"driver-4"}],"drivers_features":[{"id":1,"name":"driver-1","rating":4,"acceptance_rate":0.8,"vehicle":"mpv","previous_vehicle":"suv"},{"id":2,"name":"driver-2","rating":3,"acceptance_rate":0.6,"vehicle":"mpv","previous_vehicle":"suv"},{"id":3,"name":"driver-3","rating":3.5,"acceptance_rate":0.77,"vehicle":"mpv","previous_vehicle":"suv"},{"id":4,"name":"driver-4","rating":2.5,"acceptance_rate":0.9,"vehicle":"mpv","previous_vehicle":"suv"},{"id":4,"name":"driver-4","rating":2.5,"acceptance_rate":0.88,"vehicle":"mpv","previous_vehicle":"suv"}]}`),
requestHeaders: map[string]string{
"Content-Type": "application/json",
},
wantResponseByte: []byte(`{"response":{"status":"ok"},"operation_tracing":{"preprocess":[{"input":null,"output":{"driver_table":[{"id":1,"name":"driver-1","row_number":0},{"id":2,"name":"driver-2","row_number":1},{"id":3,"name":"driver-3","row_number":2},{"id":4,"name":"driver-4","row_number":3},{"id":4,"name":"driver-4","row_number":4}]},"spec":{"name":"driver_table","baseTable":{"fromJson":{"jsonPath":"$.drivers[*]","addRowNumber":true}}},"operation_type":"create_table_op"},{"input":null,"output":{"driver_feature_table":[{"acceptance_rate":0.8,"id":1,"name":"driver-1","previous_vehicle":"suv","rating":4,"row_number":0,"vehicle":"mpv"},{"acceptance_rate":0.6,"id":2,"name":"driver-2","previous_vehicle":"suv","rating":3,"row_number":1,"vehicle":"mpv"},{"acceptance_rate":0.77,"id":3,"name":"driver-3","previous_vehicle":"suv","rating":3.5,"row_number":2,"vehicle":"mpv"},{"acceptance_rate":0.9,"id":4,"name":"driver-4","previous_vehicle":"suv","rating":2.5,"row_number":3,"vehicle":"mpv"},{"acceptance_rate":0.88,"id":4,"name":"driver-4","previous_vehicle":"suv","rating":2.5,"row_number":4,"vehicle":"mpv"}]},"spec":{"name":"driver_feature_table","baseTable":{"fromJson":{"jsonPath":"$.drivers_features[*]","addRowNumber":true}}},"operation_type":"create_table_op"},{"input":{"driver_feature_table":[{"acceptance_rate":0.8,"id":1,"name":"driver-1","previous_vehicle":"suv","rating":4,"row_number":0,"vehicle":"mpv"},{"acceptance_rate":0.6,"id":2,"name":"driver-2","previous_vehicle":"suv","rating":3,"row_number":1,"vehicle":"mpv"},{"acceptance_rate":0.77,"id":3,"name":"driver-3","previous_vehicle":"suv","rating":3.5,"row_number":2,"vehicle":"mpv"},{"acceptance_rate":0.9,"id":4,"name":"driver-4","previous_vehicle":"suv","rating":2.5,"row_number":3,"vehicle":"mpv"},{"acceptance_rate":0.88,"id":4,"name":"driver-4","previous_vehicle":"suv","rating":2.5,"row_number":4,"vehicle":"mpv"}],"driver_table":[{"id":1,"name":"driver-1","row_number":0},{"id":2,"name":"driver-2","row_number":1},{"id":3,"name":"driver-3","row_number":2},{"id":4,"name":"driver-4","row_number":3},{"id":4,"name":"driver-4","row_number":4}]},"output":{"result_table":[{"acceptance_rate":0.8,"id":1,"name_0":"driver-1","name_1":"driver-1","previous_vehicle":"suv","rating":4,"row_number_0":0,"row_number_1":0,"vehicle":"mpv"},{"acceptance_rate":0.6,"id":2,"name_0":"driver-2","name_1":"driver-2","previous_vehicle":"suv","rating":3,"row_number_0":1,"row_number_1":1,"vehicle":"mpv"},{"acceptance_rate":0.77,"id":3,"name_0":"driver-3","name_1":"driver-3","previous_vehicle":"suv","rating":3.5,"row_number_0":2,"row_number_1":2,"vehicle":"mpv"},{"acceptance_rate":0.9,"id":4,"name_0":"driver-4","name_1":"driver-4","previous_vehicle":"suv","rating":2.5,"row_number_0":3,"row_number_1":3,"vehicle":"mpv"},{"acceptance_rate":0.88,"id":4,"name_0":"driver-4","name_1":"driver-4","previous_vehicle":"suv","rating":2.5,"row_number_0":3,"row_number_1":4,"vehicle":"mpv"},{"acceptance_rate":0.9,"id":4,"name_0":"driver-4","name_1":"driver-4","previous_vehicle":"suv","rating":2.5,"row_number_0":4,"row_number_1":3,"vehicle":"mpv"},{"acceptance_rate":0.88,"id":4,"name_0":"driver-4","name_1":"driver-4","previous_vehicle":"suv","rating":2.5,"row_number_0":4,"row_number_1":4,"vehicle":"mpv"}]},"spec":{"leftTable":"driver_table","rightTable":"driver_feature_table","outputTable":"result_table","how":"LEFT","onColumns":["id"]},"operation_type":"table_join_op"},{"input":null,"output":{"instances":{"columns":["id","name_0","row_number_0","acceptance_rate","name_1","previous_vehicle","rating","row_number_1","vehicle"],"data":[[1,"driver-1",0,0.8,"driver-1","suv",4,0,"mpv"],[2,"driver-2",1,0.6,"driver-2","suv",3,1,"mpv"],[3,"driver-3",2,0.77,"driver-3","suv",3.5,2,"mpv"],[4,"driver-4",3,0.9,"driver-4","suv",2.5,3,"mpv"],[4,"driver-4",3,0.88,"driver-4","suv",2.5,4,"mpv"],[4,"driver-4",4,0.9,"driver-4","suv",2.5,3,"mpv"],[4,"driver-4",4,0.88,"driver-4","suv",2.5,4,"mpv"]]}},"spec":{"jsonTemplate":{"fields":[{"fieldName":"instances","fromTable":{"tableName":"result_table","format":"SPLIT"}}]}},"operation_type":"json_output_op"}],"postprocess":[]}}`),
},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
Expand Down
7 changes: 6 additions & 1 deletion api/pkg/transformer/pipeline/encoder_op.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"

"github.com/gojek/merlin/pkg/transformer/spec"
"github.com/gojek/merlin/pkg/transformer/types"
enc "github.com/gojek/merlin/pkg/transformer/types/encoder"
"github.com/opentracing/opentracing-go"
)
Expand All @@ -19,7 +20,11 @@ type Encoder interface {
}

func NewEncoderOp(encoders []*spec.Encoder, tracingEnabled bool) *EncoderOp {
return &EncoderOp{encoderSpecs: encoders}
encoderOp := &EncoderOp{encoderSpecs: encoders}
if tracingEnabled {
encoderOp.OperationTracing = NewOperationTracing(encoders, types.JsonOutputOpType)
}
return encoderOp
}

func (e *EncoderOp) Execute(ctx context.Context, env *Environment) error {
Expand Down
69 changes: 69 additions & 0 deletions api/pkg/transformer/pipeline/testdata/valid_encoder.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
transformerConfig:
preprocess:
inputs:
- variables:
- name: customer_id
jsonPathConfig:
jsonPath: $.customer.id
- tables:
- name: driver_table
baseTable:
fromJson:
jsonPath: $.drivers[*]
addRowNumber: true
- encoders:
- name: vehicle_mapping
ordinalEncoderConfig:
defaultValue: '0'
targetValueType: INT
mapping:
mpv: '3'
sedan: '2'
suv: '1'
transformations:
- tableTransformation:
inputTable: driver_table
outputTable: transformed_driver_table
steps:
- dropColumns:
- id
- sort:
- column: row_number
order: DESC
- renameColumns:
row_number: rank
- updateColumns:
- column: customer_id
expression: customer_id
- scaleColumns:
- column: rank
standardScalerConfig:
mean: 0.5
std: 0.2
- scaleColumns:
- column: rating
minMaxScalerConfig:
min: 1
max: 5
- encodeColumns:
- columns:
- vehicle
- previous_vehicle
encoder: vehicle_mapping
- selectColumns:
- customer_id
- name
- rank
- rating
- vehicle
- previous_vehicle
outputs:
- jsonOutput:
jsonTemplate:
fields:
- fieldName: instances
fromTable:
tableName: transformed_driver_table
format: SPLIT
fields: []
postprocess: {}
30 changes: 30 additions & 0 deletions api/pkg/transformer/pipeline/testdata/valid_table_join.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
transformerConfig:
preprocess:
inputs:
- tables:
- name: driver_table
baseTable:
fromJson:
jsonPath: $.drivers[*]
addRowNumber: true
- tables:
- name: driver_feature_table
baseTable:
fromJson:
jsonPath: $.drivers_features[*]
addRowNumber: true
transformations:
- tableJoin:
leftTable: driver_table
rightTable: driver_feature_table
outputTable: result_table
how: LEFT
onColumns: [id]
outputs:
- jsonOutput:
jsonTemplate:
fields:
- fieldName: instances
fromTable:
tableName: result_table
format: SPLIT

0 comments on commit b5d7288

Please sign in to comment.