-
Notifications
You must be signed in to change notification settings - Fork 234
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'origin/repo-refactor' into graph-librar…
…y-testing
- Loading branch information
Showing
212 changed files
with
4,377 additions
and
2,071 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#ifndef _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H | ||
#define _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H | ||
|
||
#include "pcg/machine_specification.dtg.h" | ||
#include "pcg/machine_view.dtg.h" | ||
#include "pcg/operator_task_space.dtg.h" | ||
|
||
namespace FlexFlow { | ||
|
||
bool is_valid_machine_view(MachineView const &mv, | ||
OperatorTaskSpace const &task, | ||
MachineSpecification const &ms); | ||
|
||
std::unordered_set<MachineView> | ||
get_allowed_machine_views(MachineSpecification const &machine_spec, | ||
OperatorTaskSpace const &task, | ||
DeviceType device_type); | ||
|
||
} // namespace FlexFlow | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
#include "compiler/allowed_machine_views.h" | ||
#include "pcg/machine_specification.h" | ||
#include "pcg/machine_view.h" | ||
#include "pcg/multi_dimensional_stride.dtg.h" | ||
#include "pcg/operator_task_space.h" | ||
#include "utils/containers/all_of.h" | ||
#include "utils/containers/cartesian_product.h" | ||
#include "utils/containers/extend.h" | ||
#include "utils/containers/filter.h" | ||
#include "utils/containers/get_all_permutations_with_repetition.h" | ||
#include "utils/containers/map_from_keys_and_values.h" | ||
#include "utils/containers/product.h" | ||
#include "utils/containers/range.h" | ||
#include "utils/containers/replicate.h" | ||
#include "utils/containers/sorted.h" | ||
#include "utils/containers/transform.h" | ||
#include "utils/containers/unordered_multiset_of.h" | ||
#include "utils/containers/unordered_set_of.h" | ||
#include "utils/containers/zip.h" | ||
#include "utils/overload.h" | ||
|
||
namespace FlexFlow { | ||
|
||
bool is_valid_machine_view(MachineView const &mv, | ||
OperatorTaskSpace const &task, | ||
MachineSpecification const &ms) { | ||
std::optional<MachineSpaceCoordinate> maximum_device_coord = | ||
get_machine_space_coordinate( | ||
task, mv, get_task_space_maximum_coordinate(task), ms); | ||
return maximum_device_coord.has_value(); | ||
} | ||
|
||
/* | ||
* Generates a set of candidate `MachineView`s. | ||
* The returned set includes all valid machine views, and might contain invalid | ||
* ones. This function should not be used externally (see | ||
* `get_allowed_machine_views` instead). There is no guarantee that a non-empty | ||
* returned set contains a valid machine view (i.e. it's possible for all | ||
* the returned `MachineView`s to be invalid) | ||
*/ | ||
static std::unordered_set<MachineView> | ||
get_candidate_machine_views(MachineSpecification const &machine_spec, | ||
OperatorTaskSpace const &task, | ||
DeviceType const &device_type) { | ||
|
||
auto get_max_stride_upper_bound = [](std::vector<int> const &tensor_dims, | ||
int total_devices) -> int { | ||
int min_num_devices_with_full_stride_volume = product(transform( | ||
tensor_dims, [](int const &num_devices) { return num_devices - 1; })); | ||
return std::ceil(total_devices / min_num_devices_with_full_stride_volume); | ||
}; | ||
|
||
auto candidate_strides = [&](std::vector<int> const &tensor_dims, | ||
int total_devices) | ||
-> std::unordered_multiset<MultiDimensionalStride> { | ||
int max_stride_upper_bound = | ||
get_max_stride_upper_bound(tensor_dims, total_devices); | ||
|
||
std::vector<stride_t> single_stride_range = | ||
transform(range(1, max_stride_upper_bound + 1), | ||
[](int stride) { return stride_t{stride}; }); | ||
std::unordered_multiset<std::vector<stride_t>> raw_stride_vectors = | ||
cartesian_product(replicate(tensor_dims.size(), single_stride_range)); | ||
std::unordered_multiset<MultiDimensionalStride> strides = | ||
transform(raw_stride_vectors, [](auto const &stride_vec) { | ||
return MultiDimensionalStride{stride_vec}; | ||
}); | ||
return strides; | ||
}; | ||
|
||
auto candidate_starts = [](MachineSpecification const &ms, | ||
DeviceType const &device_type) { | ||
std::unordered_set<MachineSpaceCoordinate> result; | ||
for (int node_idx : range(ms.num_nodes)) { | ||
for (int device_idx : range(get_num_devices_per_node(ms, device_type))) { | ||
result.insert( | ||
MachineSpaceCoordinate{node_idx, device_idx, device_type}); | ||
} | ||
} | ||
return result; | ||
}; | ||
|
||
auto candidate_dimensions = [](OperatorTaskSpace const &task) { | ||
std::unordered_set<MachineSpecificationDimension> options = { | ||
MachineSpecificationDimension::INTER_NODE, | ||
MachineSpecificationDimension::INTRA_NODE}; | ||
return get_all_permutations_with_repetition(options, num_dims(task)); | ||
}; | ||
|
||
std::vector<int> tensor_dims = task.degrees; | ||
int total_devices = get_num_devices(machine_spec, device_type); | ||
|
||
std::unordered_set<MachineView> machine_views; | ||
|
||
for (MultiDimensionalStride const &strides : | ||
candidate_strides(tensor_dims, total_devices)) { | ||
for (MachineSpaceCoordinate start : | ||
candidate_starts(machine_spec, device_type)) { | ||
for (std::vector<MachineSpecificationDimension> const &dims : | ||
candidate_dimensions(task)) { | ||
machine_views.insert( | ||
machine_view_from_strides_and_machine_spec_dimensions( | ||
start, strides.raw_strides, dims)); | ||
} | ||
} | ||
} | ||
return machine_views; | ||
} | ||
|
||
std::unordered_set<MachineView> | ||
get_allowed_machine_views(MachineSpecification const &machine_spec, | ||
OperatorTaskSpace const &task, | ||
DeviceType device_type) { | ||
|
||
std::unordered_set<MachineView> views = | ||
get_candidate_machine_views(machine_spec, task, device_type); | ||
return filter(views, [&](MachineView const &mv) { | ||
return is_valid_machine_view(mv, task, machine_spec); | ||
}); | ||
} | ||
|
||
} // namespace FlexFlow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
#include "compiler/allowed_machine_views.h" | ||
#include "doctest/doctest.h" | ||
#include "utils/containers/extend.h" | ||
#include "utils/containers/range.h" | ||
#include "utils/containers/transform.h" | ||
#include "utils/containers/unordered_set_of.h" | ||
#include "utils/containers/zip.h" | ||
#include "utils/fmt/unordered_set.h" | ||
|
||
using namespace FlexFlow; | ||
|
||
TEST_SUITE(FF_TEST_SUITE) { | ||
|
||
TEST_CASE("get_allowed_machine_views") { | ||
|
||
SUBCASE("1 degree of parallelism") { | ||
MachineSpecification ms = MachineSpecification{ | ||
/*num_nodes=*/1, | ||
/*num_cpus_per_node=*/5, | ||
/*num_gpus_per_node=*/5, | ||
/*inter_node_bandwidth=*/0, | ||
/*intra_node_bandwidth=*/0, | ||
}; | ||
|
||
OperatorTaskSpace task = OperatorTaskSpace{{3}}; | ||
|
||
std::unordered_set<MachineView> correct = { | ||
MachineView{ | ||
MachineSpaceCoordinate{ | ||
/*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU}, | ||
{MachineViewDimension{stride_t{1}, | ||
MachineSpecificationDimension::INTRA_NODE}}, | ||
}, | ||
|
||
MachineView{ | ||
MachineSpaceCoordinate{ | ||
/*node_idx=*/0, /*device_idx=*/1, DeviceType::GPU}, | ||
{MachineViewDimension{stride_t{1}, | ||
MachineSpecificationDimension::INTRA_NODE}}, | ||
}, | ||
MachineView{ | ||
MachineSpaceCoordinate{ | ||
/*node_idx=*/0, /*device_idx=*/2, DeviceType::GPU}, | ||
{MachineViewDimension{stride_t{1}, | ||
MachineSpecificationDimension::INTRA_NODE}}, | ||
}, | ||
MachineView{ | ||
MachineSpaceCoordinate{ | ||
/*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU}, | ||
{MachineViewDimension{stride_t{2}, | ||
MachineSpecificationDimension::INTRA_NODE}}, | ||
}, | ||
}; | ||
|
||
std::unordered_set<MachineView> result = | ||
get_allowed_machine_views(ms, task, DeviceType::GPU); | ||
|
||
CHECK(correct == result); | ||
} | ||
|
||
SUBCASE("2 degrees of parallelism") { | ||
|
||
MachineSpecification ms = MachineSpecification{ | ||
/*num_nodes=*/3, | ||
/*num_cpus_per_node=*/3, | ||
/*num_gpus_per_node=*/3, | ||
/*inter_node_bandwidth=*/0, | ||
/*intra_node_bandwidth=*/0, | ||
}; | ||
OperatorTaskSpace task = OperatorTaskSpace{{2, 3}}; | ||
|
||
auto make_2d_view = [&](int start_node_idx, | ||
int start_device_idx, | ||
int stride1, | ||
int stride2, | ||
MachineSpecificationDimension m1, | ||
MachineSpecificationDimension m2) { | ||
return MachineView{ | ||
MachineSpaceCoordinate{ | ||
start_node_idx, start_device_idx, DeviceType::GPU}, | ||
{MachineViewDimension{stride_t{stride1}, m1}, | ||
MachineViewDimension{stride_t{stride2}, m2}}, | ||
}; | ||
}; | ||
|
||
auto intra = MachineSpecificationDimension::INTRA_NODE; | ||
auto inter = MachineSpecificationDimension::INTER_NODE; | ||
std::unordered_set<MachineView> correct = { | ||
make_2d_view(0, 0, /*stride1=*/1, /*stride2=*/1, inter, intra), | ||
make_2d_view(1, 0, /*stride1=*/1, /*stride2=*/1, inter, intra), | ||
make_2d_view(0, 0, /*stride1=*/2, /*stride2=*/1, inter, intra), | ||
|
||
make_2d_view(0, 0, /*stride1=*/1, /*stride2=*/1, intra, inter), | ||
make_2d_view(0, 1, /*stride1=*/1, /*stride2=*/1, intra, inter), | ||
make_2d_view(0, 0, /*stride1=*/2, /*stride2=*/1, intra, inter), | ||
}; | ||
|
||
std::unordered_set<MachineView> result = | ||
get_allowed_machine_views(ms, task, DeviceType::GPU); | ||
|
||
CHECK(correct == result); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.