Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/repo-refactor' into graph-librar…
Browse files Browse the repository at this point in the history
…y-testing
  • Loading branch information
Pietro Max Marsella committed Oct 15, 2024
2 parents 44b32f8 + 1d5140d commit a773b20
Show file tree
Hide file tree
Showing 212 changed files with 4,377 additions and 2,071 deletions.
21 changes: 21 additions & 0 deletions lib/compiler/include/compiler/allowed_machine_views.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#ifndef _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H
#define _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H

#include "pcg/machine_specification.dtg.h"
#include "pcg/machine_view.dtg.h"
#include "pcg/operator_task_space.dtg.h"

namespace FlexFlow {

bool is_valid_machine_view(MachineView const &mv,
OperatorTaskSpace const &task,
MachineSpecification const &ms);

std::unordered_set<MachineView>
get_allowed_machine_views(MachineSpecification const &machine_spec,
OperatorTaskSpace const &task,
DeviceType device_type);

} // namespace FlexFlow

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_PARALLEL_LAYER_GUID_OBLIVIOUS_MACHINE_MAPPING_H

#include "compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.dtg.h"
#include <optional>

namespace FlexFlow {

Expand Down
122 changes: 122 additions & 0 deletions lib/compiler/src/compiler/allowed_machine_views.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#include "compiler/allowed_machine_views.h"
#include "pcg/machine_specification.h"
#include "pcg/machine_view.h"
#include "pcg/multi_dimensional_stride.dtg.h"
#include "pcg/operator_task_space.h"
#include "utils/containers/all_of.h"
#include "utils/containers/cartesian_product.h"
#include "utils/containers/extend.h"
#include "utils/containers/filter.h"
#include "utils/containers/get_all_permutations_with_repetition.h"
#include "utils/containers/map_from_keys_and_values.h"
#include "utils/containers/product.h"
#include "utils/containers/range.h"
#include "utils/containers/replicate.h"
#include "utils/containers/sorted.h"
#include "utils/containers/transform.h"
#include "utils/containers/unordered_multiset_of.h"
#include "utils/containers/unordered_set_of.h"
#include "utils/containers/zip.h"
#include "utils/overload.h"

namespace FlexFlow {

bool is_valid_machine_view(MachineView const &mv,
OperatorTaskSpace const &task,
MachineSpecification const &ms) {
std::optional<MachineSpaceCoordinate> maximum_device_coord =
get_machine_space_coordinate(
task, mv, get_task_space_maximum_coordinate(task), ms);
return maximum_device_coord.has_value();
}

/*
* Generates a set of candidate `MachineView`s.
* The returned set includes all valid machine views, and might contain invalid
* ones. This function should not be used externally (see
* `get_allowed_machine_views` instead). There is no guarantee that a non-empty
* returned set contains a valid machine view (i.e. it's possible for all
* the returned `MachineView`s to be invalid)
*/
static std::unordered_set<MachineView>
get_candidate_machine_views(MachineSpecification const &machine_spec,
OperatorTaskSpace const &task,
DeviceType const &device_type) {

auto get_max_stride_upper_bound = [](std::vector<int> const &tensor_dims,
int total_devices) -> int {
int min_num_devices_with_full_stride_volume = product(transform(
tensor_dims, [](int const &num_devices) { return num_devices - 1; }));
return std::ceil(total_devices / min_num_devices_with_full_stride_volume);
};

auto candidate_strides = [&](std::vector<int> const &tensor_dims,
int total_devices)
-> std::unordered_multiset<MultiDimensionalStride> {
int max_stride_upper_bound =
get_max_stride_upper_bound(tensor_dims, total_devices);

std::vector<stride_t> single_stride_range =
transform(range(1, max_stride_upper_bound + 1),
[](int stride) { return stride_t{stride}; });
std::unordered_multiset<std::vector<stride_t>> raw_stride_vectors =
cartesian_product(replicate(tensor_dims.size(), single_stride_range));
std::unordered_multiset<MultiDimensionalStride> strides =
transform(raw_stride_vectors, [](auto const &stride_vec) {
return MultiDimensionalStride{stride_vec};
});
return strides;
};

auto candidate_starts = [](MachineSpecification const &ms,
DeviceType const &device_type) {
std::unordered_set<MachineSpaceCoordinate> result;
for (int node_idx : range(ms.num_nodes)) {
for (int device_idx : range(get_num_devices_per_node(ms, device_type))) {
result.insert(
MachineSpaceCoordinate{node_idx, device_idx, device_type});
}
}
return result;
};

auto candidate_dimensions = [](OperatorTaskSpace const &task) {
std::unordered_set<MachineSpecificationDimension> options = {
MachineSpecificationDimension::INTER_NODE,
MachineSpecificationDimension::INTRA_NODE};
return get_all_permutations_with_repetition(options, num_dims(task));
};

std::vector<int> tensor_dims = task.degrees;
int total_devices = get_num_devices(machine_spec, device_type);

std::unordered_set<MachineView> machine_views;

for (MultiDimensionalStride const &strides :
candidate_strides(tensor_dims, total_devices)) {
for (MachineSpaceCoordinate start :
candidate_starts(machine_spec, device_type)) {
for (std::vector<MachineSpecificationDimension> const &dims :
candidate_dimensions(task)) {
machine_views.insert(
machine_view_from_strides_and_machine_spec_dimensions(
start, strides.raw_strides, dims));
}
}
}
return machine_views;
}

std::unordered_set<MachineView>
get_allowed_machine_views(MachineSpecification const &machine_spec,
OperatorTaskSpace const &task,
DeviceType device_type) {

std::unordered_set<MachineView> views =
get_candidate_machine_views(machine_spec, task, device_type);
return filter(views, [&](MachineView const &mv) {
return is_valid_machine_view(mv, task, machine_spec);
});
}

} // namespace FlexFlow
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include "compiler/machine_mapping/machine_mapping.h"
#include "utils/containers.h"
#include "utils/containers/are_disjoint.h"
#include "utils/containers/keys.h"
#include "utils/containers/merge_maps.h"
Expand Down
104 changes: 104 additions & 0 deletions lib/compiler/test/src/allowed_machine_views.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#include "compiler/allowed_machine_views.h"
#include "doctest/doctest.h"
#include "utils/containers/extend.h"
#include "utils/containers/range.h"
#include "utils/containers/transform.h"
#include "utils/containers/unordered_set_of.h"
#include "utils/containers/zip.h"
#include "utils/fmt/unordered_set.h"

using namespace FlexFlow;

TEST_SUITE(FF_TEST_SUITE) {

TEST_CASE("get_allowed_machine_views") {

SUBCASE("1 degree of parallelism") {
MachineSpecification ms = MachineSpecification{
/*num_nodes=*/1,
/*num_cpus_per_node=*/5,
/*num_gpus_per_node=*/5,
/*inter_node_bandwidth=*/0,
/*intra_node_bandwidth=*/0,
};

OperatorTaskSpace task = OperatorTaskSpace{{3}};

std::unordered_set<MachineView> correct = {
MachineView{
MachineSpaceCoordinate{
/*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU},
{MachineViewDimension{stride_t{1},
MachineSpecificationDimension::INTRA_NODE}},
},

MachineView{
MachineSpaceCoordinate{
/*node_idx=*/0, /*device_idx=*/1, DeviceType::GPU},
{MachineViewDimension{stride_t{1},
MachineSpecificationDimension::INTRA_NODE}},
},
MachineView{
MachineSpaceCoordinate{
/*node_idx=*/0, /*device_idx=*/2, DeviceType::GPU},
{MachineViewDimension{stride_t{1},
MachineSpecificationDimension::INTRA_NODE}},
},
MachineView{
MachineSpaceCoordinate{
/*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU},
{MachineViewDimension{stride_t{2},
MachineSpecificationDimension::INTRA_NODE}},
},
};

std::unordered_set<MachineView> result =
get_allowed_machine_views(ms, task, DeviceType::GPU);

CHECK(correct == result);
}

SUBCASE("2 degrees of parallelism") {

MachineSpecification ms = MachineSpecification{
/*num_nodes=*/3,
/*num_cpus_per_node=*/3,
/*num_gpus_per_node=*/3,
/*inter_node_bandwidth=*/0,
/*intra_node_bandwidth=*/0,
};
OperatorTaskSpace task = OperatorTaskSpace{{2, 3}};

auto make_2d_view = [&](int start_node_idx,
int start_device_idx,
int stride1,
int stride2,
MachineSpecificationDimension m1,
MachineSpecificationDimension m2) {
return MachineView{
MachineSpaceCoordinate{
start_node_idx, start_device_idx, DeviceType::GPU},
{MachineViewDimension{stride_t{stride1}, m1},
MachineViewDimension{stride_t{stride2}, m2}},
};
};

auto intra = MachineSpecificationDimension::INTRA_NODE;
auto inter = MachineSpecificationDimension::INTER_NODE;
std::unordered_set<MachineView> correct = {
make_2d_view(0, 0, /*stride1=*/1, /*stride2=*/1, inter, intra),
make_2d_view(1, 0, /*stride1=*/1, /*stride2=*/1, inter, intra),
make_2d_view(0, 0, /*stride1=*/2, /*stride2=*/1, inter, intra),

make_2d_view(0, 0, /*stride1=*/1, /*stride2=*/1, intra, inter),
make_2d_view(0, 1, /*stride1=*/1, /*stride2=*/1, intra, inter),
make_2d_view(0, 0, /*stride1=*/2, /*stride2=*/1, intra, inter),
};

std::unordered_set<MachineView> result =
get_allowed_machine_views(ms, task, DeviceType::GPU);

CHECK(correct == result);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,35 @@ TEST_SUITE(FF_TEST_SUITE) {
};
};

MachineView mv1 = make_1d_machine_view(gpu_id_t(1), gpu_id_t(2));
MachineView mv2 = make_1d_machine_view(gpu_id_t(1), gpu_id_t(3));
MachineView mv1 = MachineView{
/*start=*/MachineSpaceCoordinate{
/*node_idx=*/0,
/*device_idx=*/0,
/*device_type=*/DeviceType::GPU,
},
/*dimensions=*/
{
MachineViewDimension{
stride_t{1},
MachineSpecificationDimension::INTRA_NODE,
},
},
};

MachineView mv2 = MachineView{
/*start=*/MachineSpaceCoordinate{
/*node_idx=*/0,
/*device_idx=*/0,
/*device_type=*/DeviceType::GPU,
},
/*dimensions=*/
{
MachineViewDimension{
stride_t{2},
MachineSpecificationDimension::INTRA_NODE,
},
},
};

MachineSpecification full_machine_spec = MachineSpecification{
/*num_nodes=*/2,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,65 @@ TEST_SUITE(FF_TEST_SUITE) {
ParallelLayerAddedResult relu_2 = add_parallel_layer(
pcg, relu_attrs, {get_only(relu_1.outputs)}, {relu_output_attrs});

MachineView pre_mv1 = make_1d_machine_view(gpu_id_t{0}, gpu_id_t{1});
MachineView pre_mv2 = make_1d_machine_view(gpu_id_t{0}, gpu_id_t{2});
MachineView post_mv1 = make_1d_machine_view(gpu_id_t{0}, gpu_id_t{3});
MachineView post_mv2 = make_1d_machine_view(gpu_id_t{0}, gpu_id_t{4});
MachineView pre_mv1 = MachineView{
/*start=*/MachineSpaceCoordinate{
/*node_idx=*/0,
/*device_idx=*/0,
/*device_type=*/DeviceType::GPU,
},
/*dimensions=*/
{
MachineViewDimension{
stride_t{1},
MachineSpecificationDimension::INTRA_NODE,
},
},
};

MachineView pre_mv2 = MachineView{
/*start=*/MachineSpaceCoordinate{
/*node_idx=*/0,
/*device_idx=*/0,
/*device_type=*/DeviceType::GPU,
},
/*dimensions=*/
{
MachineViewDimension{
stride_t{2},
MachineSpecificationDimension::INTRA_NODE,
},
},
};

MachineView post_mv1 = MachineView{
/*start=*/MachineSpaceCoordinate{
/*node_idx=*/0,
/*device_idx=*/0,
/*device_type=*/DeviceType::GPU,
},
/*dimensions=*/
{
MachineViewDimension{
stride_t{3},
MachineSpecificationDimension::INTRA_NODE,
},
},
};

MachineView post_mv2 = MachineView{
/*start=*/MachineSpaceCoordinate{
/*node_idx=*/0,
/*device_idx=*/0,
/*device_type=*/DeviceType::GPU,
},
/*dimensions=*/
{
MachineViewDimension{
stride_t{4},
MachineSpecificationDimension::INTRA_NODE,
},
},
};

SUBCASE("single edge across split") {
PCGBinarySeriesSplit split = PCGBinarySeriesSplit{
Expand Down
Loading

0 comments on commit a773b20

Please sign in to comment.