Skip to content

Commit

Permalink
[XLA:GPU] Move collectives related code under transforms/collectives/.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 723108068
  • Loading branch information
allanrenucci authored and Google-ML-Automation committed Feb 4, 2025
1 parent 604738e commit f7c2280
Show file tree
Hide file tree
Showing 15 changed files with 194 additions and 195 deletions.
178 changes: 9 additions & 169 deletions xla/service/gpu/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1376,16 +1376,12 @@ cc_library(
":flag_utils",
":fusion_dispatch_pipeline",
":fusion_pipeline",
":gpu_all_gather_combiner",
":gpu_all_reduce_combiner",
":gpu_collective_combiner_utils",
":gpu_constants",
":gpu_executable",
":gpu_float_support",
":gpu_hlo_schedule",
":gpu_latency_hiding_scheduler",
":gpu_p2p_pipeliner",
":gpu_reduce_scatter_combiner",
":gpu_spmd_pipeline",
":hlo_fusion_stats",
":ir_emission_utils",
Expand Down Expand Up @@ -1494,6 +1490,10 @@ cc_library(
"//xla/service/gpu/model:gpu_hlo_cost_analysis",
"//xla/service/gpu/model:sol_gpu_cost_model_stats_collection",
"//xla/service/gpu/transforms/collectives:convert_async_collectives_to_sync",
"//xla/service/gpu/transforms/collectives:gpu_all_gather_combiner",
"//xla/service/gpu/transforms/collectives:gpu_all_reduce_combiner",
"//xla/service/gpu/transforms/collectives:gpu_collective_combiner_utils",
"//xla/service/gpu/transforms/collectives:gpu_reduce_scatter_combiner",
"//xla/service/gpu/transforms:algebraic_simplifier",
"//xla/service/gpu/transforms:algorithm_checker",
"//xla/service/gpu/transforms:all_gather_dynamic_slice_simplifier",
Expand Down Expand Up @@ -1546,6 +1546,7 @@ cc_library(
"//xla/service/gpu/transforms:tree_reduction_rewriter",
"//xla/service/gpu/transforms:triton_fusion_numerics_verifier",
"//xla/service/gpu/transforms:windowed_einsum_handler",
"//xla/service/llvm_ir:llvm_command_line_options",
"//xla/service/llvm_ir:llvm_util",
"//xla/service/spmd:collective_permute_motion",
"//xla/service:all_reduce_promotion",
Expand Down Expand Up @@ -1603,6 +1604,9 @@ cc_library(
"//xla/stream_executor:semantic_version",
"//xla/stream_executor:stream_executor_h",
"//xla/tsl/lib/monitoring:counter",
"//xla/tsl/platform:env",
"//xla/tsl/platform:errors",
"//xla/tsl/platform:statusor",
"//xla:autotune_results_proto_cc",
"//xla:debug_options_flags",
"//xla:shape_util",
Expand All @@ -1626,12 +1630,7 @@ cc_library(
]) + xla_internal(["service:export_hlo"]) + if_google([
"//xla/hlo/experimental/auto_sharding",
"//xla/hlo/experimental/auto_sharding:auto_sharding_option",
]) + [
"//xla/service/llvm_ir:llvm_command_line_options",
"//xla/tsl/platform:env",
"//xla/tsl/platform:errors",
"//xla/tsl/platform:statusor",
],
]),
)

xla_test(
Expand Down Expand Up @@ -3089,165 +3088,6 @@ xla_cc_test(
],
)

cc_library(
name = "gpu_collective_combiner_utils",
srcs = ["gpu_collective_combiner_utils.cc"],
hdrs = ["gpu_collective_combiner_utils.h"],
deps = [
":backend_configs_cc",
":gpu_hlo_schedule",
"//xla:util",
"//xla/hlo/ir:hlo",
"//xla/hlo/pass:hlo_pass_pipeline",
"//xla/hlo/utils:hlo_query",
"//xla/service:collective_ops_utils",
"//xla/service:collective_utils",
"//xla/service/gpu/transforms/collectives:convert_async_collectives_to_sync",
"//xla/stream_executor:device_description",
"//xla/tsl/platform:errors",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/log",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
],
)

xla_cc_test(
name = "gpu_collective_combiner_utils_test",
srcs = ["gpu_collective_combiner_utils_test.cc"],
deps = [
":backend_configs_cc",
":gpu_collective_combiner_utils",
"//xla:util",
"//xla/hlo/ir:hlo",
"//xla/hlo/pass:hlo_pass",
"//xla/hlo/pass:hlo_pass_pipeline",
"//xla/hlo/transforms/simplifiers:hlo_dce",
"//xla/hlo/utils:hlo_query",
"//xla/service:collective_pipeliner",
"//xla/service:hlo_module_config",
"//xla/stream_executor:device_description",
"//xla/tests:hlo_test_base",
"//xla/tsl/platform:status_matchers",
"//xla/tsl/platform:statusor",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings:string_view",
"@com_google_googletest//:gtest_main",
],
)

cc_library(
name = "gpu_all_gather_combiner",
srcs = ["all_gather_combiner.cc"],
hdrs = ["all_gather_combiner.h"],
deps = [
":backend_configs_cc",
":gpu_collective_combiner_utils",
"//xla/hlo/ir:hlo",
"//xla/hlo/pass:hlo_pass",
"//xla/hlo/transforms/collectives:all_gather_combiner",
"//xla/service:hlo_domain_map",
"//xla/stream_executor:device_description",
"//xla/tsl/platform:statusor",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings:string_view",
],
)

xla_cc_test(
name =
"gpu_all_gather_combiner_test",
srcs = ["all_gather_combiner_test.cc"],
deps = [
":gpu_all_gather_combiner",
"//xla/hlo/ir:hlo",
"//xla/hlo/testlib:filecheck",
"//xla/service:collective_utils",
"//xla/stream_executor:device_description",
"//xla/tests:hlo_test_base",
"@com_google_absl//absl/log",
"@com_google_absl//absl/strings:string_view",
"@com_google_googletest//:gtest_main",
"@tsl//tsl/platform:statusor",
"@tsl//tsl/platform:test",
],
)

cc_library(
name = "gpu_reduce_scatter_combiner",
srcs = ["reduce_scatter_combiner.cc"],
hdrs = ["reduce_scatter_combiner.h"],
deps = [
":backend_configs_cc",
":gpu_collective_combiner_utils",
"//xla/hlo/ir:hlo",
"//xla/hlo/pass:hlo_pass",
"//xla/service:hlo_domain_map",
"//xla/service:reduce_scatter_combiner",
"//xla/stream_executor:device_description",
"//xla/tsl/platform:statusor",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings:string_view",
],
)

xla_cc_test(
name = "reduce_scatter_combiner_test",
srcs = ["reduce_scatter_combiner_test.cc"],
deps = [
":gpu_reduce_scatter_combiner",
"//xla/hlo/ir:hlo",
"//xla/hlo/testlib:filecheck",
"//xla/service:collective_utils",
"//xla/stream_executor:device_description",
"//xla/tests:hlo_test_base",
"@com_google_absl//absl/log",
"@com_google_absl//absl/strings:string_view",
"@com_google_googletest//:gtest_main",
"@tsl//tsl/platform:statusor",
],
)

cc_library(
name = "gpu_all_reduce_combiner",
srcs = ["all_reduce_combiner.cc"],
hdrs = ["all_reduce_combiner.h"],
deps = [
":backend_configs_cc",
":gpu_collective_combiner_utils",
"//xla/hlo/ir:hlo",
"//xla/hlo/pass:hlo_pass",
"//xla/hlo/transforms/collectives:all_reduce_combiner",
"//xla/service:hlo_domain_map",
"//xla/stream_executor:device_description",
"//xla/tsl/platform:statusor",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings:string_view",
],
)

xla_cc_test(
name = "all_reduce_combiner_test",
srcs = ["all_reduce_combiner_test.cc"],
deps = [
":gpu_all_reduce_combiner",
"//xla/hlo/ir:hlo",
"//xla/hlo/testlib:filecheck",
"//xla/service:collective_utils",
"//xla/stream_executor:device_description",
"//xla/tests:hlo_test_base",
"@com_google_absl//absl/log",
"@com_google_absl//absl/strings:string_view",
"@com_google_googletest//:gtest_main",
"@tsl//tsl/platform:statusor",
],
)

cc_library(
name = "ptx_compile_options_from_debug_options",
srcs = ["ptx_compile_options_from_debug_options.cc"],
Expand Down
8 changes: 4 additions & 4 deletions xla/service/gpu/gpu_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,6 @@ limitations under the License.
#include "xla/service/export_hlo.h"
#include "xla/service/float_support.h"
#include "xla/service/gather_expander.h"
#include "xla/service/gpu/all_gather_combiner.h"
#include "xla/service/gpu/all_reduce_combiner.h"
#include "xla/service/gpu/autotuning/autotuner_util.h"
#include "xla/service/gpu/autotuning/custom_kernel_fusion_autotuner.h"
#include "xla/service/gpu/compile_module_to_llvm_ir.h"
Expand All @@ -155,7 +153,6 @@ limitations under the License.
#include "xla/service/gpu/flag_utils.h"
#include "xla/service/gpu/fusion_dispatch_pipeline.h"
#include "xla/service/gpu/fusion_pipeline.h"
#include "xla/service/gpu/gpu_collective_combiner_utils.h"
#include "xla/service/gpu/gpu_executable.h"
#include "xla/service/gpu/gpu_float_support.h"
#include "xla/service/gpu/gpu_hlo_schedule.h"
Expand All @@ -173,7 +170,6 @@ limitations under the License.
#include "xla/service/gpu/model/gpu_hlo_cost_analysis.h"
#include "xla/service/gpu/model/sol_gpu_cost_model_stats_collection.h"
#include "xla/service/gpu/prepare_hlo_for_ir_emitting_pipeline.h"
#include "xla/service/gpu/reduce_scatter_combiner.h"
#include "xla/service/gpu/reduction_utils.h"
#include "xla/service/gpu/runtime_intrinsics.h"
#include "xla/service/gpu/stream_executor_util.h"
Expand All @@ -187,7 +183,11 @@ limitations under the License.
#include "xla/service/gpu/transforms/collective_permute_cycle_decomposer.h"
#include "xla/service/gpu/transforms/collective_permute_valid_iteration_annotator.h"
#include "xla/service/gpu/transforms/collective_select_folder.h"
#include "xla/service/gpu/transforms/collectives/all_gather_combiner.h"
#include "xla/service/gpu/transforms/collectives/all_reduce_combiner.h"
#include "xla/service/gpu/transforms/collectives/convert_async_collectives_to_sync.h"
#include "xla/service/gpu/transforms/collectives/gpu_collective_combiner_utils.h"
#include "xla/service/gpu/transforms/collectives/reduce_scatter_combiner.h"
#include "xla/service/gpu/transforms/command_buffer_scheduling.h"
#include "xla/service/gpu/transforms/conv_rewriter.h"
#include "xla/service/gpu/transforms/cudnn_custom_call_converter.h"
Expand Down
Loading

0 comments on commit f7c2280

Please sign in to comment.