Skip to content

Commit

Permalink
Memory optimization algorithm (#1523)
Browse files Browse the repository at this point in the history
* initial implmentation of meomry algorithm

* fmt

* pass existing tests

* initialize memory algorithm

* fix tests & format

* minimum tests for memory algorithm

* renaming

* fmt

* fix

* rename single machine mapping

* format

---------

Co-authored-by: Colin Unger <[email protected]>
  • Loading branch information
wmdi and lockshaw authored Jan 9, 2025
1 parent 186e8eb commit 030bfd6
Show file tree
Hide file tree
Showing 19 changed files with 1,551 additions and 19 deletions.
5 changes: 3 additions & 2 deletions lib/compiler/include/compiler/cost_estimator/cost_estimator.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_COST_ESTIMATOR_H

#include "compiler/cost_estimator/op_cost_estimate_key.dtg.h"
#include "compiler/cost_estimator/op_cost_metrics.dtg.h"
#include "compiler/cost_estimator/tensor_set_movement.dtg.h"
#include "op-attrs/parallel_tensor_shape.dtg.h"
#include "op-attrs/pcg_operator_attrs.dtg.h"
Expand All @@ -11,7 +12,7 @@
namespace FlexFlow {

struct ICostEstimator {
virtual float estimate_cost(OpCostEstimateKey const &) const = 0;
virtual OpCostMetrics estimate_cost(OpCostEstimateKey const &) const = 0;
virtual float estimate_cost(TensorSetMovement const &) const = 0;

ICostEstimator() = default;
Expand All @@ -23,7 +24,7 @@ struct ICostEstimator {
CHECK_RC_COPY_VIRTUAL_COMPLIANT(ICostEstimator);

struct CostEstimator {
float estimate_cost(OpCostEstimateKey const &k) const;
OpCostMetrics estimate_cost(OpCostEstimateKey const &) const;
float estimate_cost(TensorSetMovement const &m) const;

template <typename T, typename... Args>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
namespace = "FlexFlow"
name = "OpCostMetrics"
features = [
"eq",
"fmt",
"hash",
]

includes = [
]

[[fields]]
name = "runtime"
type = "float"

[[fields]]
name = "memory"
type = "size_t"
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#ifndef _FLEXFLOW_COMPILER_MACHINE_MAPPING_MEMORY_OPTIMIZATION_GET_OPTIMAL_MACHINE_MAPPING_WITH_MEMORY_H
#define _FLEXFLOW_COMPILER_MACHINE_MAPPING_MEMORY_OPTIMIZATION_GET_OPTIMAL_MACHINE_MAPPING_WITH_MEMORY_H

#include "compiler/machine_mapping/machine_mapping_cache.dtg.h"
#include "compiler/machine_mapping/machine_mapping_constraints.dtg.h"
#include "compiler/machine_mapping/machine_mapping_context.dtg.h"
#include "compiler/machine_mapping/machine_mapping_problem_tree/machine_mapping_problem_tree.dtg.h"
#include "compiler/machine_mapping/machine_mapping_problem_tree/mm_problem_tree_parallel_split.dtg.h"
#include "compiler/machine_mapping/machine_mapping_problem_tree/mm_problem_tree_series_split.dtg.h"
#include "compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_cache.dtg.h"
#include "compiler/machine_mapping/parallel_split_transformation.dtg.h"
#include "pcg/machine_specification.dtg.h"

namespace FlexFlow {

MachineMappingWithMemoryResult get_optimal_machine_mapping_with_memory(
MachineMappingWithMemoryCache &result_cache,
MachineMappingContext const &context,
MachineMappingProblemTree const &problem_tree,
MachineSpecification const &resources,
MachineMappingConstraints const &constraints);

MachineMappingWithMemoryResult get_optimal_machine_mapping_with_memory(
MachineMappingWithMemoryCache &result_cache,
MachineMappingContext const &context,
MMProblemTreeSeriesSplit const &series_split,
MachineSpecification const &resources,
MachineMappingConstraints const &constraints,
std::optional<ParallelSplitTransformation> const
&parallel_split_transformation);

MachineMappingWithMemoryResult get_optimal_machine_mapping_with_memory(
MachineMappingWithMemoryCache &result_cache,
MachineMappingContext const &context,
MMProblemTreeParallelSplit const &parallel_split,
MachineSpecification const &resources,
MachineMappingConstraints const &constraints);

MachineMappingWithMemoryResult get_optimal_machine_mapping_with_memory(
MachineMappingWithMemoryCache &result_cache,
MachineMappingContext const &,
UnmappedOpCostEstimateKey const &leaf,
MachineSpecification const &resources,
MachineMappingConstraints const &constraints);

} // namespace FlexFlow

#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
namespace = "FlexFlow"
name = "MachineMappingForSingleLayer"
features = [
"eq",
"hash",
"fmt",
]

includes = [
"compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.dtg.h",
"compiler/cost_estimator/op_cost_metrics.dtg.h",
]

[[fields]]
name = "cost"
type = "::FlexFlow::OpCostMetrics"

[[fields]]
name = "machine_mapping"
type = "::FlexFlow::ParallelLayerGuidObliviousMachineMapping"
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_MEMORY_OPTIMIZATION_MACHINE_MAPPING_CACHE_WITH_MEMORY_H
#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_MEMORY_OPTIMIZATION_MACHINE_MAPPING_CACHE_WITH_MEMORY_H

#include "compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_cache.dtg.h"

namespace FlexFlow {

MachineMappingWithMemoryCache empty_machine_mapping_with_memory_cache();
std::optional<MachineMappingWithMemoryResult>
machine_mapping_with_memory_cache_load(
MachineMappingWithMemoryCache const &, MachineMappingState const &);
void machine_mapping_with_memory_cache_save(
MachineMappingWithMemoryCache &,
MachineMappingState const &,
MachineMappingWithMemoryResult const &);

} // namespace FlexFlow

#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
namespace = "FlexFlow"
name = "MachineMappingWithMemoryCache"
features = [
"eq",
"hash",
"fmt",
]

includes = [
"<unordered_map>",
"compiler/machine_mapping/machine_mapping_state.dtg.h",
"compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.dtg.h",
]

src_includes = [
"utils/fmt/unordered_map.h",
"utils/hash/unordered_map.h",
]

[[fields]]
name = "raw_map"
type = "std::unordered_map<::FlexFlow::MachineMappingState, ::FlexFlow::MachineMappingWithMemoryResult>"
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#ifndef _FLEXFLOW_COMPILER_MACHINE_MAPPING_MEMORY_OPTIMIZATION_MACHINE_MAPPING_RESULT_WITH_MEMORY_H
#define _FLEXFLOW_COMPILER_MACHINE_MAPPING_MEMORY_OPTIMIZATION_MACHINE_MAPPING_RESULT_WITH_MEMORY_H

#include "compiler/machine_mapping/memory_optimization/machine_mapping_with_memory_result.dtg.h"
#include "compiler/machine_mapping/parallel_split_transformation.dtg.h"
#include <optional>

namespace FlexFlow {

[[nodiscard]] MachineMappingWithMemoryResult
empty_machine_mapping_with_memory_result();
[[nodiscard]] bool is_empty(MachineMappingWithMemoryResult const &);

[[nodiscard]] MachineMappingWithMemoryResult get_mapping_with_minimal_runtime(
std::unordered_set<MachineMappingWithMemoryResult> const &);

[[nodiscard]] MachineMappingWithMemoryResult
remove_non_pareto_optimal_machine_mapping_result(
MachineMappingWithMemoryResult const &);

[[nodiscard]] MachineMappingWithMemoryResult
series_combine(float comm_cost,
MachineMappingWithMemoryResult const &pre_result,
MachineMappingWithMemoryResult const &post_result,
std::optional<ParallelSplitTransformation> const
&parallel_split_transformation);
[[nodiscard]] MachineMappingWithMemoryResult
parallel_combine(MachineMappingWithMemoryResult const &lhs_result,
MachineMappingWithMemoryResult const &rhs_result);

[[nodiscard]] MachineMappingWithMemoryResult
minimize_runtime(MachineMappingWithMemoryResult const &m1,
MachineMappingWithMemoryResult const &m2);

[[nodiscard]] MachineMappingWithMemoryResult
make_singleton_machine_mapping_with_memory_result(
OpCostMetrics cost, MachineView const &machine_view);

} // namespace FlexFlow

#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
namespace = "FlexFlow"
name = "MachineMappingWithMemoryResult"
features = [
"eq",
"hash",
"fmt",
]

includes = [
"compiler/machine_mapping/memory_optimization/machine_mapping_for_single_layer.dtg.h",
]

src_includes = [
"utils/hash/unordered_set.h",
"utils/fmt/unordered_set.h",
]

[[fields]]
name = "machine_mappings"
type = "std::unordered_set<::FlexFlow::MachineMappingForSingleLayer>"
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
namespace = "FlexFlow"
name = "MachineMemoryConstraints"
features = [
"eq",
"hash",
"fmt",
]

includes = []

[[fields]]
name = "memory_limit"
type = "size_t"
2 changes: 1 addition & 1 deletion lib/compiler/src/compiler/cost_estimator/cost_estimator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace FlexFlow {
CostEstimator::CostEstimator(std::shared_ptr<ICostEstimator> implementation_ptr)
: implementation_ptr(implementation_ptr) {}

float CostEstimator::estimate_cost(OpCostEstimateKey const &k) const {
OpCostMetrics CostEstimator::estimate_cost(OpCostEstimateKey const &k) const {
return this->implementation_ptr->estimate_cost(k);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ MachineMappingResult
auto get_mapping_result = [&](MachineView const &machine_view) {
OpCostEstimateKey mapped =
map_unmapped_op_cost_estimate_key(leaf, machine_view);
float cost = context.cost_estimator.estimate_cost(mapped);
float cost = context.cost_estimator.estimate_cost(mapped).runtime;

return make_singleton_machine_mapping_result(cost, machine_view);
};
Expand Down
Loading

0 comments on commit 030bfd6

Please sign in to comment.