-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathinference.go
413 lines (334 loc) · 10.3 KB
/
inference.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
package rknnlite
/*
#include "rknn_api.h"
#include <stdlib.h>
#include <string.h>
*/
import "C"
import (
"fmt"
"gocv.io/x/gocv"
"sync"
"unsafe"
)
// Input represents the C.rknn_input struct and defines the Input used for
// inference
type Input struct {
// Index is the input index
Index uint32
// Buf is the gocv Mat input
Buf unsafe.Pointer
// Size is the number of bytes of Buf
Size uint32
// Passthrough defines the mode, if True the buf data is passed directly to
// the input node of the rknn model without any conversion. If False the
// buf data is converted into an input consistent with the model according
// to the following type and fmt
PassThrough bool
// Type is the data type of Buf. This is a required parameter if Passthrough
// is False
Type TensorType
// Fmt is the data format of Buf. This is a required parameter if Passthrough
// is False
Fmt TensorFormat
}
// Inference runs the model inference on the given inputs
func (r *Runtime) Inference(mats []gocv.Mat) (*Outputs, error) {
// convert the cv Mat's into RKNN inputs
inputs := make([]Input, len(mats))
for idx, mat := range mats {
// make mat continuous
if !mat.IsContinuous() {
mat = mat.Clone()
}
if r.inputTypeFloat32 {
// pass data as float32 to RKNN backend
data, err := mat.DataPtrFloat32()
if err != nil {
return &Outputs{}, fmt.Errorf("error getting data pointer to Mat: %w", err)
}
inputs[idx] = Input{
Index: uint32(idx),
Type: TensorFloat32,
// multiply by 4 for size of float32
Size: uint32(mat.Cols() * mat.Rows() * mat.Channels() * 4),
Fmt: TensorNHWC,
Buf: unsafe.Pointer(&data[0]),
PassThrough: false,
}
} else {
// pass data as uint8 to RKNN backend
data, err := mat.DataPtrUint8()
if err != nil {
return &Outputs{}, fmt.Errorf("error getting data pointer to Mat: %w", err)
}
inputs[idx] = Input{
Index: uint32(idx),
Type: TensorUint8,
Size: uint32(mat.Cols() * mat.Rows() * mat.Channels()),
Fmt: TensorNHWC,
Buf: unsafe.Pointer(&data[0]),
PassThrough: false,
}
}
}
// set the Inputs
err := r.SetInputs(inputs)
if err != nil {
return &Outputs{}, fmt.Errorf("error setting inputs: %w", err)
}
// run the model
err = r.RunModel()
if err != nil {
return &Outputs{}, fmt.Errorf("error running model: %w", err)
}
// get Outputs
return r.GetOutputs(r.ioNum.NumberOutput, r.wantFloat)
}
// setInputs wraps C.rknn_inputs_set
func (r *Runtime) SetInputs(inputs []Input) error {
nInputs := C.uint32_t(len(inputs))
// make a C array of inputs
cInputs := make([]C.rknn_input, len(inputs))
for i, input := range inputs {
cInputs[i].index = C.uint32_t(input.Index)
cInputs[i].buf = input.Buf
cInputs[i].size = C.uint32_t(input.Size)
cInputs[i].pass_through = C.uint8_t(0)
if input.PassThrough {
cInputs[i].pass_through = C.uint8_t(1)
}
cInputs[i]._type = C.rknn_tensor_type(input.Type)
cInputs[i].fmt = C.rknn_tensor_format(input.Fmt)
}
ret := C.rknn_inputs_set(r.ctx, nInputs, &cInputs[0])
if ret != 0 {
return fmt.Errorf("C.rknn_inputs_set failed with code %d, error: %s",
int(ret), ErrorCodes(ret).String())
}
return nil
}
// RunModel wraps C.rknn_run
func (r *Runtime) RunModel() error {
ret := C.rknn_run(r.ctx, nil)
if ret < 0 {
return fmt.Errorf("C.rknn_run failed with code %d, error: %s",
int(ret), ErrorCodes(ret).String())
}
return nil
}
// Output wraps C.rknn_output
type Output struct {
WantFloat uint8 // want transfer output data to float
IsPrealloc uint8 // whether buf is pre-allocated
Index uint32 // the output index
// the output buf cast to float32, when WantFloat = 1
// this is a slice header that points to C memory
BufFloat []float32
// the output buf cast to int8, when WantFloat = 0
// this is a slice header that points to C memory
BufInt []int8
Size uint32 // the size of output buf
}
// Outputs is a struct containing Go and C output data
type Outputs struct {
Output []Output
cOutputs []C.rknn_output
// freed is a flag to indicate if the cOutputs have been released from
// memory or not
freed bool
// mutex to lock access to freed variable
sync.Mutex
// rknn runtime instance
rt *Runtime
}
// GetOutputs returns the Output results
func (r *Runtime) GetOutputs(nOutputs uint32, wantFloat bool) (*Outputs, error) {
outputs := &Outputs{
Output: make([]Output, nOutputs),
cOutputs: make([]C.rknn_output, nOutputs),
rt: r,
}
// set want float for all outputs
useWantFloat := uint8(1)
if !wantFloat {
useWantFloat = 0
}
for idx := range outputs.cOutputs {
outputs.cOutputs[idx].index = C.uint32_t(idx)
outputs.cOutputs[idx].want_float = C.uint8_t(useWantFloat)
}
// call C function
ret := C.rknn_outputs_get(r.ctx, C.uint32_t(nOutputs),
(*C.rknn_output)(unsafe.Pointer(&outputs.cOutputs[0])), nil)
if ret < 0 {
return &Outputs{}, fmt.Errorf("C.rknn_outputs_get failed with code %d, error: %s",
int(ret), ErrorCodes(ret).String())
}
// convert C.rknn_output array back to Go Output array
for i, cOutput := range outputs.cOutputs {
outputs.Output[i] = Output{
WantFloat: uint8(cOutput.want_float),
IsPrealloc: uint8(cOutput.is_prealloc),
Index: uint32(cOutput.index),
Size: uint32(cOutput.size),
}
if outputs.Output[i].WantFloat == 1 {
// convert buffer to []float32
outputs.Output[i].BufFloat = (*[1 << 30]float32)(outputs.cOutputs[i].buf)[:outputs.cOutputs[i].size/4]
} else if outputs.Output[i].WantFloat == 0 {
// yolov8-pose has output tensors of int8 and fp16, so we need to
// handle the fp16 specially
if r.outputAttrs[i].Type == TensorFloat16 {
// convert float16 buffer to []float32
float16Buf := (*[1 << 30]uint16)(outputs.cOutputs[i].buf)[:outputs.cOutputs[i].size/2]
outputs.Output[i].BufFloat = convertFloat16BufferToFloat32(float16Buf)
} else {
// convert buffer to []int8
outputs.Output[i].BufInt = (*[1 << 30]int8)(outputs.cOutputs[i].buf)[:outputs.cOutputs[i].size]
}
}
}
return outputs, nil
}
// convertFloat16BufferToFloat32 converts a float16 buffer to float32 as Go
// has not support for FP16.
func convertFloat16BufferToFloat32(float16Buf []uint16) []float32 {
float32Buf := make([]float32, len(float16Buf))
float16ToFloat32Buffer(float16Buf, float32Buf)
return float32Buf
}
// Free C memory buffer holding RKNN inference outputs
func (o *Outputs) Free() error {
o.Lock()
defer o.Unlock()
if o.freed {
// C memory already released
return nil
}
o.freed = true
return o.rt.releaseOutputs(o.cOutputs)
}
// InputAttribute of trained model input tensor
type InputAttribute struct {
Width uint32
Height uint32
Channel uint32
}
// InputAttributes queries the Model and returns Input image dimensions
func (o *Outputs) InputAttributes() InputAttribute {
// set default vars where inputAttr is NCHW
channel := o.rt.inputAttrs[0].Dims[1]
height := o.rt.inputAttrs[0].Dims[2]
width := o.rt.inputAttrs[0].Dims[3]
if o.rt.inputAttrs[0].Fmt == TensorNHWC {
height = o.rt.inputAttrs[0].Dims[1]
width = o.rt.inputAttrs[0].Dims[2]
channel = o.rt.inputAttrs[0].Dims[3]
}
return InputAttribute{
Width: width,
Height: height,
Channel: channel,
}
}
// OutputAttribute of trained model output tensor
type OutputAttribute struct {
DimForDFL uint32
Scales []float32
ZPs []int32
DimHeights []uint32
DimWidths []uint32
IONumber uint32
}
// OutputAttributes returns the Model output attribute scales and zero points
func (o *Outputs) OutputAttributes() OutputAttribute {
data := OutputAttribute{
DimForDFL: o.rt.outputAttrs[0].Dims[1],
Scales: make([]float32, 0),
ZPs: make([]int32, 0),
DimHeights: make([]uint32, 0),
DimWidths: make([]uint32, 0),
IONumber: o.rt.ioNum.NumberOutput,
}
for i := 0; i < int(o.rt.ioNum.NumberOutput); i++ {
data.Scales = append(data.Scales, o.rt.outputAttrs[i].Scale)
data.ZPs = append(data.ZPs, o.rt.outputAttrs[i].ZP)
data.DimHeights = append(data.DimHeights, o.rt.outputAttrs[i].Dims[2])
data.DimWidths = append(data.DimWidths, o.rt.outputAttrs[i].Dims[3])
}
return data
}
// releaseOutputs releases the memory allocated for the outputs by the RKNN
// toolkit directly using C rknn_output structs
func (r *Runtime) releaseOutputs(cOutputs []C.rknn_output) error {
// directly use the C array of rknn_output obtained from getOutputs or similar.
outputsPtr := (*C.rknn_output)(unsafe.Pointer(&cOutputs[0]))
// call C.rknn_outputs_release with the context and the outputs pointer
ret := C.rknn_outputs_release(r.ctx, C.uint32_t(len(cOutputs)), outputsPtr)
if ret != 0 {
return fmt.Errorf("C.rknn_outputs_release failed with code %d, error: %s",
ret, ErrorCodes(ret).String())
}
return nil
}
type Probability struct {
LabelIndex int32
Probability float32
}
// GetTop5 outputs the Top5 matches in the model, with left column as label
// index and right column the match probability. The results are returned
// in the Probability slice in descending order from top match.
func GetTop5(outputs []Output) []Probability {
probs := make([]Probability, 5)
for i := 0; i < len(outputs); i++ {
var MaxClass [5]int32
var fMaxProb [5]float32
GetTop(outputs[i].BufFloat, fMaxProb[:], MaxClass[:], int32(len(outputs[i].BufFloat)), 5)
for i := 0; i < 5; i++ {
probs[i] = Probability{
LabelIndex: MaxClass[i],
Probability: fMaxProb[i],
}
}
}
return probs
}
const MAX_TOP_NUM = 20
// GetTop takes outputs and produces a top list of matches by probability
func GetTop(pfProb []float32, pfMaxProb []float32, pMaxClass []int32,
outputCount int32, topNum int32) int {
if topNum > MAX_TOP_NUM {
return 0
}
// initialize pfMaxProb with default values, ie: 0
for j := range pfMaxProb {
pfMaxProb[j] = 0
}
// initialize pMaxClass with default values, ie: -1
for j := range pMaxClass {
pMaxClass[j] = -1
}
for j := int32(0); j < topNum; j++ {
for i := int32(0); i < outputCount; i++ {
// skip if the current class is already in the top list
skip := false
for k := 0; k < len(pMaxClass); k++ {
if i == pMaxClass[k] {
skip = true
break
}
}
if skip {
continue
}
// if the current probability is greater than the j'th max
// probability, update pfMaxProb and pMaxClass
if pfProb[i] > pfMaxProb[j] && pfProb[i] > 0.000001 {
pfMaxProb[j] = pfProb[i]
pMaxClass[j] = i
}
}
}
return 1
}