Skip to content

Commit

Permalink
mixed vec_ops
Browse files Browse the repository at this point in the history
  • Loading branch information
emirsoyturk authored Jan 1, 2025
1 parent 0ed301f commit 290ca67
Show file tree
Hide file tree
Showing 28 changed files with 443 additions and 67 deletions.
57 changes: 29 additions & 28 deletions icicle/backend/cpu/src/field/cpu_vec_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ enum VecOperation {
* Based on the enum value, the functionality is selected and the worker execute that function for every task that
* dispatched by the manager.
*/
template <typename T>
template <typename T, typename U>
class VectorOpTask : public TaskBase
{
public:
Expand All @@ -58,7 +58,7 @@ class VectorOpTask : public TaskBase
VecOperation operation,
const uint32_t nof_operations,
const T* op_a,
const T* op_b,
const U* op_b,
const uint32_t stride,
T* output)
{
Expand Down Expand Up @@ -202,7 +202,7 @@ class VectorOpTask : public TaskBase
void vector_div()
{
for (uint64_t i = 0; i < m_nof_operations; ++i) {
m_output[i] = m_op_a[i] * T::inverse(m_op_b[i]);
m_output[i] = m_op_a[i] * U::inverse(m_op_b[i]);
}
}
// Single worker functionality to execute conversion from barret to montgomery
Expand Down Expand Up @@ -354,7 +354,7 @@ class VectorOpTask : public TaskBase
VecOperation m_operation; // the operation to execute
uint32_t m_nof_operations; // number of operations to execute for this task
const T* m_op_a; // pointer to operand A. Operand A is a vector, or matrix in case of replace_elements
const T* m_op_b; // pointer to operand B. Operand B is a vector or scalar
const U* m_op_b; // pointer to operand B. Operand B is a vector or scalar
uint64_t m_start_index; // index used in bitreverse operation and out of place matrix transpose
uint64_t m_stop_index; // index used in reduce operations and out of place matrix transpose
uint32_t m_bit_size; // use in bitrev operation
Expand Down Expand Up @@ -386,14 +386,14 @@ int get_nof_workers(const VecOpsConfig& config)
}

// Execute a full task from the type vector = vector (op) vector
template <typename T>
template <typename T, typename U>
eIcicleError
cpu_2vectors_op(VecOperation op, const T* vec_a, const T* vec_b, uint64_t size, const VecOpsConfig& config, T* output)
cpu_2vectors_op(VecOperation op, const T* vec_a, const U* vec_b, uint64_t size, const VecOpsConfig& config, T* output)
{
TasksManager<VectorOpTask<T>> task_manager(get_nof_workers(config) - 1);
TasksManager<VectorOpTask<T, U>> task_manager(get_nof_workers(config) - 1);
const uint64_t total_nof_operations = size * config.batch_size;
for (uint64_t i = 0; i < total_nof_operations; i += NOF_OPERATIONS_PER_TASK) {
VectorOpTask<T>* task_p = task_manager.get_idle_or_completed_task();
VectorOpTask<T, U>* task_p = task_manager.get_idle_or_completed_task();
task_p->send_2ops_task(
op, std::min((uint64_t)NOF_OPERATIONS_PER_TASK, total_nof_operations - i), vec_a + i, vec_b + i, 1, output + i);
}
Expand All @@ -406,12 +406,12 @@ template <typename T>
eIcicleError cpu_scalar_vector_op(
VecOperation op, const T* scalar_a, const T* vec_b, uint64_t size, const VecOpsConfig& config, T* output)
{
TasksManager<VectorOpTask<T>> task_manager(get_nof_workers(config) - 1);
TasksManager<VectorOpTask<T, T>> task_manager(get_nof_workers(config) - 1);
const uint64_t total_nof_operations = size;
const uint32_t stride = config.columns_batch ? config.batch_size : 1;
for (uint32_t idx_in_batch = 0; idx_in_batch < config.batch_size; idx_in_batch++) {
for (uint64_t i = 0; i < total_nof_operations; i += NOF_OPERATIONS_PER_TASK) {
VectorOpTask<T>* task_p = task_manager.get_idle_or_completed_task();
VectorOpTask<T, T>* task_p = task_manager.get_idle_or_completed_task();
task_p->send_2ops_task(
op, std::min((uint64_t)NOF_OPERATIONS_PER_TASK, total_nof_operations - i), scalar_a + idx_in_batch,
config.columns_batch ? vec_b + idx_in_batch + i * config.batch_size : vec_b + idx_in_batch * size + i, stride,
Expand Down Expand Up @@ -455,14 +455,14 @@ eIcicleError cpu_vector_sub(
REGISTER_VECTOR_SUB_BACKEND("CPU", cpu_vector_sub<scalar_t>);

/*********************************** MUL ***********************************/
template <typename T>
template <typename T, typename U>
eIcicleError cpu_vector_mul(
const Device& device, const T* vec_a, const T* vec_b, uint64_t size, const VecOpsConfig& config, T* output)
const Device& device, const T* vec_a, const U* vec_b, uint64_t size, const VecOpsConfig& config, T* output)
{
return cpu_2vectors_op(VecOperation::VECTOR_MUL, vec_a, vec_b, size, config, output);
}

REGISTER_VECTOR_MUL_BACKEND("CPU", cpu_vector_mul<scalar_t>);
REGISTER_VECTOR_MUL_BACKEND("CPU", (cpu_vector_mul<scalar_t, scalar_t>));

/*********************************** DIV ***********************************/
template <typename T>
Expand All @@ -479,10 +479,10 @@ template <typename T>
eIcicleError cpu_convert_montgomery(
const Device& device, const T* input, uint64_t size, bool is_to_montgomery, const VecOpsConfig& config, T* output)
{
TasksManager<VectorOpTask<T>> task_manager(get_nof_workers(config) - 1);
TasksManager<VectorOpTask<T, T>> task_manager(get_nof_workers(config) - 1);
const uint64_t total_nof_operations = size * config.batch_size;
for (uint64_t i = 0; i < total_nof_operations; i += NOF_OPERATIONS_PER_TASK) {
VectorOpTask<T>* task_p = task_manager.get_idle_or_completed_task();
VectorOpTask<T, T>* task_p = task_manager.get_idle_or_completed_task();
task_p->send_1op_task(
(is_to_montgomery ? CONVERT_TO_MONTGOMERY : CONVERT_FROM_MONTGOMERY),
std::min((uint64_t)NOF_OPERATIONS_PER_TASK, total_nof_operations - i), input + i, output + i);
Expand All @@ -499,13 +499,13 @@ REGISTER_CONVERT_MONTGOMERY_BACKEND("CPU", cpu_convert_montgomery<scalar_t>);
template <typename T>
eIcicleError cpu_vector_sum(const Device& device, const T* vec_a, uint64_t size, const VecOpsConfig& config, T* output)
{
TasksManager<VectorOpTask<T>> task_manager(get_nof_workers(config) - 1);
TasksManager<VectorOpTask<T, T>> task_manager(get_nof_workers(config) - 1);
std::vector<bool> output_initialized = std::vector<bool>(config.batch_size, false);
uint64_t vec_a_offset = 0;
uint64_t idx_in_batch = 0;
// run until all vector deployed and all tasks completed
while (true) {
VectorOpTask<T>* task_p =
VectorOpTask<T, T>* task_p =
vec_a_offset < size ? task_manager.get_idle_or_completed_task() : task_manager.get_completed_task();
if (task_p == nullptr) { return eIcicleError::SUCCESS; }
if (task_p->is_completed()) {
Expand Down Expand Up @@ -539,13 +539,13 @@ template <typename T>
eIcicleError
cpu_vector_product(const Device& device, const T* vec_a, uint64_t size, const VecOpsConfig& config, T* output)
{
TasksManager<VectorOpTask<T>> task_manager(get_nof_workers(config) - 1);
TasksManager<VectorOpTask<T, T>> task_manager(get_nof_workers(config) - 1);
std::vector<bool> output_initialized = std::vector<bool>(config.batch_size, false);
uint64_t vec_a_offset = 0;
uint64_t idx_in_batch = 0;
// run until all vector deployed and all tasks completed
while (true) {
VectorOpTask<T>* task_p =
VectorOpTask<T, T>* task_p =
vec_a_offset < size ? task_manager.get_idle_or_completed_task() : task_manager.get_completed_task();
if (task_p == nullptr) { return eIcicleError::SUCCESS; }
if (task_p->is_completed()) {
Expand Down Expand Up @@ -610,7 +610,7 @@ template <typename T>
eIcicleError out_of_place_matrix_transpose(
const Device& device, const T* mat_in, uint32_t nof_rows, uint32_t nof_cols, const VecOpsConfig& config, T* mat_out)
{
TasksManager<VectorOpTask<T>> task_manager(get_nof_workers(config) - 1);
TasksManager<VectorOpTask<T, T>> task_manager(get_nof_workers(config) - 1);
uint32_t stride = config.columns_batch ? config.batch_size : 1;
const uint64_t total_elements_one_mat = static_cast<uint64_t>(nof_rows) * nof_cols;
const uint32_t NOF_ROWS_PER_TASK =
Expand All @@ -620,7 +620,7 @@ eIcicleError out_of_place_matrix_transpose(
T* cur_mat_out = config.columns_batch ? mat_out + idx_in_batch : mat_out + idx_in_batch * total_elements_one_mat;
// Perform the matrix transpose
for (uint32_t i = 0; i < nof_rows; i += NOF_ROWS_PER_TASK) {
VectorOpTask<T>* task_p = task_manager.get_idle_or_completed_task();
VectorOpTask<T, T>* task_p = task_manager.get_idle_or_completed_task();
task_p->send_out_of_place_matrix_transpose_task(
OUT_OF_PLACE_MATRIX_TRANSPOSE, cur_mat_in + stride * i * nof_cols,
std::min((uint64_t)NOF_ROWS_PER_TASK, (uint64_t)nof_rows - i), nof_rows, nof_cols, stride,
Expand Down Expand Up @@ -695,11 +695,11 @@ eIcicleError matrix_transpose_necklaces(
std::vector<uint64_t> start_indices_in_mat; // Collect start indices
gen_necklace<T>(1, 1, k, length, necklace, start_indices_in_mat);

TasksManager<VectorOpTask<T>> task_manager(get_nof_workers(config) - 1);
TasksManager<VectorOpTask<T, T>> task_manager(get_nof_workers(config) - 1);
for (uint64_t i = 0; i < start_indices_in_mat.size(); i += max_nof_operations) {
uint64_t nof_operations = std::min((uint64_t)max_nof_operations, start_indices_in_mat.size() - i);
for (uint64_t idx_in_batch = 0; idx_in_batch < config.batch_size; idx_in_batch++) {
VectorOpTask<T>* task_p = task_manager.get_idle_or_completed_task();
VectorOpTask<T, T>* task_p = task_manager.get_idle_or_completed_task();
task_p->send_replace_elements_task(
REPLACE_ELEMENTS, config.columns_batch ? mat_in + idx_in_batch : mat_in + idx_in_batch * total_elements_one_mat,
nof_operations, start_indices_in_mat, i, log_nof_rows, log_nof_cols,
Expand Down Expand Up @@ -746,10 +746,10 @@ cpu_bit_reverse(const Device& device, const T* vec_in, uint64_t size, const VecO
ICICLE_ASSERT((1ULL << logn) == size) << "Invalid argument - size is not a power of 2";

// Perform the bit reverse
TasksManager<VectorOpTask<T>> task_manager(get_nof_workers(config) - 1);
TasksManager<VectorOpTask<T, T>> task_manager(get_nof_workers(config) - 1);
for (uint64_t idx_in_batch = 0; idx_in_batch < config.batch_size; idx_in_batch++) {
for (uint64_t i = 0; i < size; i += NOF_OPERATIONS_PER_TASK) {
VectorOpTask<T>* task_p = task_manager.get_idle_or_completed_task();
VectorOpTask<T, T>* task_p = task_manager.get_idle_or_completed_task();

task_p->send_bit_reverse_task(
BIT_REVERSE, logn, i, std::min((uint64_t)NOF_OPERATIONS_PER_TASK, size - i),
Expand Down Expand Up @@ -783,10 +783,10 @@ eIcicleError cpu_slice(
ICICLE_ASSERT(vec_in != nullptr && vec_out != nullptr) << "Error: Invalid argument - input or output vector is null";
ICICLE_ASSERT(offset + (size_out - 1) * stride < size_in) << "Error: Invalid argument - slice out of bound";

TasksManager<VectorOpTask<T>> task_manager(get_nof_workers(config) - 1);
TasksManager<VectorOpTask<T, T>> task_manager(get_nof_workers(config) - 1);
for (uint64_t idx_in_batch = 0; idx_in_batch < config.batch_size; idx_in_batch++) {
for (uint64_t i = 0; i < size_out; i += NOF_OPERATIONS_PER_TASK) {
VectorOpTask<T>* task_p = task_manager.get_idle_or_completed_task();
VectorOpTask<T, T>* task_p = task_manager.get_idle_or_completed_task();
task_p->send_slice_task(
SLICE, config.columns_batch ? stride * config.batch_size : stride, config.columns_batch ? config.batch_size : 1,
std::min((uint64_t)NOF_OPERATIONS_PER_TASK, size_out - i),
Expand Down Expand Up @@ -978,7 +978,8 @@ REGISTER_POLYNOMIAL_DIVISION("CPU", cpu_poly_divide<scalar_t>);
REGISTER_VECTOR_ADD_EXT_FIELD_BACKEND("CPU", cpu_vector_add<extension_t>);
REGISTER_VECTOR_ACCUMULATE_EXT_FIELD_BACKEND("CPU", cpu_vector_accumulate<extension_t>);
REGISTER_VECTOR_SUB_EXT_FIELD_BACKEND("CPU", cpu_vector_sub<extension_t>);
REGISTER_VECTOR_MUL_EXT_FIELD_BACKEND("CPU", cpu_vector_mul<extension_t>);
REGISTER_VECTOR_MUL_EXT_FIELD_BACKEND("CPU", (cpu_vector_mul<extension_t, extension_t>));
REGISTER_VECTOR_MIXED_MUL_BACKEND("CPU", (cpu_vector_mul<extension_t, scalar_t>));
REGISTER_VECTOR_DIV_EXT_FIELD_BACKEND("CPU", cpu_vector_div<extension_t>);
REGISTER_CONVERT_MONTGOMERY_EXT_FIELD_BACKEND("CPU", cpu_convert_montgomery<extension_t>);
REGISTER_VECTOR_SUM_EXT_FIELD_BACKEND("CPU", cpu_vector_sum<extension_t>);
Expand Down
26 changes: 18 additions & 8 deletions icicle/include/icicle/backend/vec_ops_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,20 +269,20 @@ namespace icicle {
const VecOpsConfig& config,
extension_t* output)>;

using extFieldVectorOpImplInplaceA = std::function<eIcicleError(
const Device& device, extension_t* vec_a, const extension_t* vec_b, uint64_t size, const VecOpsConfig& config)>;

using extFieldVectorReduceOpImpl = std::function<eIcicleError(
const Device& device, const extension_t* vec_a, uint64_t size, const VecOpsConfig& config, extension_t* output)>;

using extFieldVectorOpImpl = std::function<eIcicleError(
using mixedVectorOpImpl = std::function<eIcicleError(
const Device& device,
const extension_t* scalar_a,
const extension_t* vec_b,
const scalar_t* vec_b,
uint64_t size,
const VecOpsConfig& config,
extension_t* output)>;

using extFieldVectorOpImplInplaceA = std::function<eIcicleError(
const Device& device, extension_t* vec_a, const extension_t* vec_b, uint64_t size, const VecOpsConfig& config)>;

using extFieldVectorReduceOpImpl = std::function<eIcicleError(
const Device& device, const extension_t* vec_a, uint64_t size, const VecOpsConfig& config, extension_t* output)>;

void register_extension_vector_add(const std::string& deviceType, extFieldVectorOpImpl impl);

#define REGISTER_VECTOR_ADD_EXT_FIELD_BACKEND(DEVICE_TYPE, FUNC) \
Expand Down Expand Up @@ -322,6 +322,16 @@ namespace icicle {
}(); \
}

void register_extension_vector_mixed_mul(const std::string& deviceType, mixedVectorOpImpl impl);

#define REGISTER_VECTOR_MIXED_MUL_BACKEND(DEVICE_TYPE, FUNC) \
namespace { \
static bool UNIQUE(_reg_vec_mixed_mul) = []() -> bool { \
register_extension_vector_mixed_mul(DEVICE_TYPE, FUNC); \
return true; \
}(); \
}

void register_extension_vector_div(const std::string& deviceType, extFieldVectorOpImpl impl);

#define REGISTER_VECTOR_DIV_EXT_FIELD_BACKEND(DEVICE_TYPE, FUNC) \
Expand Down
20 changes: 20 additions & 0 deletions icicle/include/icicle/vec_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,26 @@ namespace icicle {
template <typename T>
eIcicleError vector_mul(const T* vec_a, const T* vec_b, uint64_t size, const VecOpsConfig& config, T* output);

/**
* @brief Multiplies two vectors element-wise.
*
* @tparam T Type of the elements in the vectors.
* @param vec_a Pointer to the first input vector(s).
* - If `config.batch_size > 1`, this should be a concatenated array of vectors.
* - The layout depends on `config.columns_batch`:
* - If `false`, vectors are stored contiguously in memory.
* - If `true`, vectors are stored as columns in a 2D array.
* @param vec_b Pointer to the second input vector(s).
* - The storage layout should match that of `vec_a`.
* @param size Number of elements in each vector.
* @param config Configuration for the operation.
* @param output Pointer to the output vector(s) where the results will be stored.
* The output array should have the same storage layout as the input vectors.
* @return eIcicleError Error code indicating success or failure.
*/
template <typename T, typename U>
eIcicleError vector_mul(const T* vec_a, const U* vec_b, uint64_t size, const VecOpsConfig& config, T* output);

/**
* @brief Divides vector `a` by vector `b` element-wise.
*
Expand Down
15 changes: 15 additions & 0 deletions icicle/src/vec_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,21 @@ namespace icicle {
{
return CONCAT_EXPAND(FIELD, extension_vector_mul)(vec_a, vec_b, size, &config, output);
}

ICICLE_DISPATCHER_INST(VectorMixedMulDispatcher, extension_vector_mixed_mul, mixedVectorOpImpl);

extern "C" eIcicleError CONCAT_EXPAND(FIELD, extension_vector_mixed_mul)(
const extension_t* vec_a, const scalar_t* vec_b, uint64_t size, const VecOpsConfig* config, extension_t* output)
{
return VectorMixedMulDispatcher::execute(vec_a, vec_b, size, *config, output);
}

template <>
eIcicleError vector_mul(
const extension_t* vec_a, const scalar_t* vec_b, uint64_t size, const VecOpsConfig& config, extension_t* output)
{
return CONCAT_EXPAND(FIELD, extension_vector_mixed_mul)(vec_a, vec_b, size, &config, output);
}
#endif // EXT_FIELD

/*********************************** DIV ***********************************/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ int babybear_extension_matrix_transpose(
scalar_t* mat_out
);

int babybear_extension_vector_mixed_mul(
scalar_t* vec_a,
scalar_t* vec_b,
int n,
VecOpsConfig* config,
scalar_t* result
);

#ifdef __cplusplus
}
#endif
Expand Down
17 changes: 17 additions & 0 deletions wrappers/golang/fields/babybear/extension/vecOps/vec_ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,20 @@ func TransposeMatrix(in, out core.HostOrDeviceSlice, columnSize, rowSize int, co
err := (C.babybear_extension_matrix_transpose(cIn, cRowSize, cColumnSize, cConfig, cOut))
return runtime.EIcicleError(err)
}

func MixedVecOp(a, b, out core.HostOrDeviceSlice, config core.VecOpsConfig, op core.VecOps) (ret runtime.EIcicleError) {
aPointer, bPointer, outPointer, cfgPointer, size := core.VecOpCheck(a, b, out, &config)

cA := (*C.scalar_t)(aPointer)
cB := (*C.scalar_t)(bPointer)
cOut := (*C.scalar_t)(outPointer)
cConfig := (*C.VecOpsConfig)(cfgPointer)
cSize := (C.int)(size)

switch op {
case core.Mul:
ret = (runtime.EIcicleError)(C.babybear_extension_vector_mixed_mul(cA, cB, cSize, cConfig, cOut))
}

return ret
}
19 changes: 19 additions & 0 deletions wrappers/golang/fields/babybear/tests/extension_vec_ops_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"testing"

"github.com/ingonyama-zk/icicle/v3/wrappers/golang/core"
babybear "github.com/ingonyama-zk/icicle/v3/wrappers/golang/fields/babybear"
babybear_extension "github.com/ingonyama-zk/icicle/v3/wrappers/golang/fields/babybear/extension"
"github.com/ingonyama-zk/icicle/v3/wrappers/golang/fields/babybear/extension/vecOps"
"github.com/stretchr/testify/suite"
Expand Down Expand Up @@ -64,13 +65,31 @@ func testBabybear_extensionTranspose(suite *suite.Suite) {
suite.Equal(matrix, output)
}

func testBabybear_extensionMixedVecOps(suite *suite.Suite) {
testSize := 1 << 14

a := babybear_extension.GenerateScalars(testSize)
var scalar babybear.ScalarField
scalar.One()
ones := core.HostSliceWithValue(scalar, testSize)

out := make(core.HostSlice[babybear_extension.ExtensionField], testSize)

cfg := core.DefaultVecOpsConfig()

vecOps.MixedVecOp(a, ones, out, cfg, core.Mul)

suite.Equal(a, out)
}

type Babybear_extensionVecOpsTestSuite struct {
suite.Suite
}

func (s *Babybear_extensionVecOpsTestSuite) TestBabybear_extensionVecOps() {
s.Run("TestBabybear_extensionVecOps", testWrapper(&s.Suite, testBabybear_extensionVecOps))
s.Run("TestBabybear_extensionTranspose", testWrapper(&s.Suite, testBabybear_extensionTranspose))
s.Run("TestBabybear_extensionMixedVecOps", testWrapper(&s.Suite, testBabybear_extensionMixedVecOps))
}

func TestSuiteBabybear_extensionVecOps(t *testing.T) {
Expand Down
1 change: 1 addition & 0 deletions wrappers/golang/fields/babybear/tests/vec_ops_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ type BabybearVecOpsTestSuite struct {
func (s *BabybearVecOpsTestSuite) TestBabybearVecOps() {
s.Run("TestBabybearVecOps", testWrapper(&s.Suite, testBabybearVecOps))
s.Run("TestBabybearTranspose", testWrapper(&s.Suite, testBabybearTranspose))

}

func TestSuiteBabybearVecOps(t *testing.T) {
Expand Down
12 changes: 12 additions & 0 deletions wrappers/golang/fields/koalabear/extension/extension_field.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,18 @@ func (f ExtensionField) Sqr() ExtensionField {
return res
}

func (f ExtensionField) Pow(exp int) ExtensionField {
var res ExtensionField

cF := (*C.scalar_t)(unsafe.Pointer(&f))
cExp := (C.int)(exp)
cRes := (*C.scalar_t)(unsafe.Pointer(&res))

C.koalabear_extension_pow(cF, cExp, cRes)

return res
}

func convertScalarsMontgomery(scalars core.HostOrDeviceSlice, isInto bool) runtime.EIcicleError {
defaultCfg := core.DefaultVecOpsConfig()
cValues, _, _, cCfg, cSize := core.VecOpCheck(scalars, scalars, scalars, &defaultCfg)
Expand Down
Loading

0 comments on commit 290ca67

Please sign in to comment.