From 3ce4874abd2b579dcb0b66399598d38f0291bfde Mon Sep 17 00:00:00 2001 From: DavePearce Date: Thu, 5 Sep 2024 16:40:45 +0100 Subject: [PATCH 1/2] Parallelise reading binary `lt` trace data This now parallelises the process of converting the binary bytes from a given `lt` trace file into arrays of `fr.Element`. This improves the time needed to read large trace files by a fairly considerable amount. --- pkg/schema/schemas.go | 6 +++--- pkg/trace/lt/reader.go | 25 ++++++++++++++++++------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/pkg/schema/schemas.go b/pkg/schema/schemas.go index 8a06507b..ea0aed74 100644 --- a/pkg/schema/schemas.go +++ b/pkg/schema/schemas.go @@ -75,10 +75,10 @@ func processConstraintBatch(batch uint, batchsize uint, iter util.Iterator[Const // Get ith constraint ith := iter.Next() // Launch checker for constraint - go func(tr tr.Trace) { + go func() { // Send outcome back - c <- ith.Accepts(tr) - }(trace) + c <- ith.Accepts(trace) + }() } // for i := uint(0); i < n; i++ { diff --git a/pkg/trace/lt/reader.go b/pkg/trace/lt/reader.go index adee287d..e86d51d4 100644 --- a/pkg/trace/lt/reader.go +++ b/pkg/trace/lt/reader.go @@ -36,20 +36,31 @@ func FromBytes(data []byte) ([]trace.RawColumn, error) { } // Determine byte slices offset := uint(len(data) - buf.Len()) - + c := make(chan util.Pair[uint, util.Array[fr.Element]], 10) + // Dispatch go-routines for i := uint(0); i < uint(ncols); i++ { ith := headers[i] - // Split qualified column name - mod, col := splitQualifiedColumnName(ith.name) // Calculate length (in bytes) of this column nbytes := ith.width * ith.length - // Read column data - elements := readColumnData(ith, data[offset:offset+nbytes]) - // Construct appropriate slice - columns[i] = trace.RawColumn{Module: mod, Name: col, Data: elements} + // Dispatch go-routine + go func(i uint, offset uint) { + // Read column data + elements := readColumnData(ith, data[offset:offset+nbytes]) + // Package result + c <- util.NewPair(i, elements) + }(i, offset) // Update byte offset offset += nbytes } + // Collect results + for i := uint(0); i < uint(ncols); i++ { + // Read packaged result from channel + res := <-c + // Split qualified column name + mod, col := splitQualifiedColumnName(headers[res.Left].name) + // Construct appropriate slice + columns[res.Left] = trace.RawColumn{Module: mod, Name: col, Data: res.Right} + } // Done return columns, nil } From e0b5c5ab73f3b50e174fd5256d94b3f1f4d17fb8 Mon Sep 17 00:00:00 2001 From: DavePearce Date: Thu, 5 Sep 2024 17:45:52 +0100 Subject: [PATCH 2/2] Special case `lt` data readers This puts in place some special case readers for `lt` files. Specifically, when reading columns which are bytes, words, double words and quadwords. This offers a small improvement. --- pkg/trace/lt/reader.go | 69 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/pkg/trace/lt/reader.go b/pkg/trace/lt/reader.go index e86d51d4..3611f5c3 100644 --- a/pkg/trace/lt/reader.go +++ b/pkg/trace/lt/reader.go @@ -36,7 +36,7 @@ func FromBytes(data []byte) ([]trace.RawColumn, error) { } // Determine byte slices offset := uint(len(data) - buf.Len()) - c := make(chan util.Pair[uint, util.Array[fr.Element]], 10) + c := make(chan util.Pair[uint, util.Array[fr.Element]], 100) // Dispatch go-routines for i := uint(0); i < uint(ncols); i++ { ith := headers[i] @@ -108,9 +108,76 @@ func readColumnHeader(buf *bytes.Reader) (columnHeader, error) { func readColumnData(header columnHeader, bytes []byte) util.FrArray { // Construct array data := util.NewFrArray(header.length, header.width*8) + // Handle special cases + switch header.width { + case 1: + return readByteColumnData(data, header, bytes) + case 2: + return readWordColumnData(data, header, bytes) + case 4: + return readDWordColumnData(data, header, bytes) + case 8: + return readQWordColumnData(data, header, bytes) + } + // General case + return readArbitraryColumnData(data, header, bytes) +} + +func readByteColumnData(data util.Array[fr.Element], header columnHeader, bytes []byte) util.FrArray { + for i := uint(0); i < header.length; i++ { + // Construct ith field element + data.Set(i, fr.NewElement(uint64(bytes[i]))) + } + // Done + return data +} + +func readWordColumnData(data util.Array[fr.Element], header columnHeader, bytes []byte) util.FrArray { + offset := uint(0) // Assign elements + for i := uint(0); i < header.length; i++ { + ith := binary.BigEndian.Uint16(bytes[offset : offset+2]) + // Construct ith field element + data.Set(i, fr.NewElement(uint64(ith))) + // Move offset to next element + offset += 2 + } + // Done + return data +} + +func readDWordColumnData(data util.Array[fr.Element], header columnHeader, bytes []byte) util.FrArray { offset := uint(0) + // Assign elements + for i := uint(0); i < header.length; i++ { + ith := binary.BigEndian.Uint32(bytes[offset : offset+4]) + // Construct ith field element + data.Set(i, fr.NewElement(uint64(ith))) + // Move offset to next element + offset += 4 + } + // Done + return data +} +func readQWordColumnData(data util.Array[fr.Element], header columnHeader, bytes []byte) util.FrArray { + offset := uint(0) + // Assign elements + for i := uint(0); i < header.length; i++ { + ith := binary.BigEndian.Uint64(bytes[offset : offset+8]) + // Construct ith field element + data.Set(i, fr.NewElement(ith)) + // Move offset to next element + offset += 8 + } + // Done + return data +} + +// Read column data which is has arbitrary width +func readArbitraryColumnData(data util.Array[fr.Element], header columnHeader, bytes []byte) util.FrArray { + offset := uint(0) + // Assign elements for i := uint(0); i < header.length; i++ { var ith fr.Element // Calculate position of next element