Skip to content

Commit

Permalink
[XLA:GPU] Rename `xla_gpu_enable_experimental_pipeline_parallelism_op…
Browse files Browse the repository at this point in the history
…t` to `xla_gpu_experimental_enable_pipeline_parallelism_opt`.

This is to follow the agreed upon flag nomenclature.

PiperOrigin-RevId: 717879485
  • Loading branch information
bchetioui authored and Google-ML-Automation committed Jan 23, 2025
1 parent 13125fb commit 6bf6dd8
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 15 deletions.
8 changes: 4 additions & 4 deletions xla/debug_options_flags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {

opts.set_xla_gpu_collective_permute_decomposer_threshold(
std::numeric_limits<int64_t>::max());
opts.set_xla_gpu_enable_experimental_pipeline_parallelism_opt(false);
opts.set_xla_gpu_experimental_enable_pipeline_parallelism_opt(false);

opts.set_xla_cpu_enable_mlir_tiling_and_fusion(true);
opts.set_xla_cpu_enable_custom_matmul_tiling(false);
Expand Down Expand Up @@ -1713,11 +1713,11 @@ void MakeDebugOptionsFlags(std::vector<tsl::Flag>* flag_list,
debug_options->xla_gpu_collective_permute_decomposer_threshold(),
"Collective permute decomposer threshold."));
flag_list->push_back(tsl::Flag(
"xla_gpu_enable_experimental_pipeline_parallelism_opt",
"xla_gpu_experimental_enable_pipeline_parallelism_opt",
bool_setter_for(
&DebugOptions::
set_xla_gpu_enable_experimental_pipeline_parallelism_opt),
debug_options->xla_gpu_enable_experimental_pipeline_parallelism_opt(),
set_xla_gpu_experimental_enable_pipeline_parallelism_opt),
debug_options->xla_gpu_experimental_enable_pipeline_parallelism_opt(),
"Experimental optimizations for SPMD-based pipeline parallelism on "
"GPU."));
flag_list->push_back(tsl::Flag(
Expand Down
6 changes: 3 additions & 3 deletions xla/service/gpu/gpu_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,7 @@ absl::Status RunCollectiveOptimizationPasses(

if (hlo_module->config()
.debug_options()
.xla_gpu_enable_experimental_pipeline_parallelism_opt()) {
.xla_gpu_experimental_enable_pipeline_parallelism_opt()) {
collectives_pipeline.AddPass<CollectiveSelectFolder>();
}

Expand All @@ -971,7 +971,7 @@ absl::Status RunCollectiveOptimizationPasses(
collectives_pipeline,
hlo_module->config()
.debug_options()
.xla_gpu_enable_experimental_pipeline_parallelism_opt());
.xla_gpu_experimental_enable_pipeline_parallelism_opt());
}

// Run algebraic simplifier to reshape(broadcast) into a broadcast when
Expand Down Expand Up @@ -2669,7 +2669,7 @@ absl::Status GpuCompiler::RunPostSchedulingPipelines(

if (!module->config()
.debug_options()
.xla_gpu_enable_experimental_pipeline_parallelism_opt() &&
.xla_gpu_experimental_enable_pipeline_parallelism_opt() &&
(module->config()
.debug_options()
.xla_gpu_enable_pipelined_collectives() ||
Expand Down
4 changes: 2 additions & 2 deletions xla/service/gpu/gpu_latency_hiding_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -272,13 +272,13 @@ void GpuAsyncTrackerBase::PostProcessScheduleGraph(
// Schedule partially pipelined send/recv instructions late so that they can
// overlap with compute. Schedule send/recv late and, when unblocked,
// schedule send-done/recv-done early.
if (debug_options.xla_gpu_enable_experimental_pipeline_parallelism_opt() &&
if (debug_options.xla_gpu_experimental_enable_pipeline_parallelism_opt() &&
IsPartiallyPipelinedSendRecv(inst)) {
HloGraphNode& node = schedule_graph->GetNode(inst);
node.SetForceDelay(true);
VLOG(5) << "Setting force delay for instruction: " << inst->ToString();
}
if (debug_options.xla_gpu_enable_experimental_pipeline_parallelism_opt() &&
if (debug_options.xla_gpu_experimental_enable_pipeline_parallelism_opt() &&
IsPartiallyPipelinedSendRecvDone(inst)) {
HloGraphNode& node = schedule_graph->GetNode(inst);
node.SetForceEarly(true);
Expand Down
2 changes: 1 addition & 1 deletion xla/service/gpu/gpu_latency_hiding_scheduler_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class GpuLatencyHidingSchedulerBaseTest : public HloTestBase {
HloModuleConfig config;
DebugOptions debug_options = GetDebugOptionsForTest();
debug_options.set_xla_gpu_enable_latency_hiding_scheduler(true);
debug_options.set_xla_gpu_enable_experimental_pipeline_parallelism_opt(
debug_options.set_xla_gpu_experimental_enable_pipeline_parallelism_opt(
enable_experimental_pipeline_parallelism_opt);
config.set_debug_options(debug_options);
config.set_fdo_profile(fdo_profile);
Expand Down
2 changes: 1 addition & 1 deletion xla/tests/collective_pipeline_parallelism_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class CollectivePipelineParallelismTest

// Set debug options.
DebugOptions debug_options = GetDebugOptionsForTest();
debug_options.set_xla_gpu_enable_experimental_pipeline_parallelism_opt(
debug_options.set_xla_gpu_experimental_enable_pipeline_parallelism_opt(
GetParam());
config.set_debug_options(debug_options);

Expand Down
7 changes: 3 additions & 4 deletions xla/xla.proto
Original file line number Diff line number Diff line change
Expand Up @@ -386,10 +386,6 @@ message DebugOptions {
// dynamic-update-slice operations around library calls.
bool xla_gpu_enable_dynamic_slice_fusion = 105;

// Experimental optimizations for SPMD-based pipeline parallelism on GPU.
// TODO(bchetioui): adjust this name to follow the naming convention.
bool xla_gpu_enable_experimental_pipeline_parallelism_opt = 351;

// When true we lower the Minimum and Maximum hlos in the GPU backend such
// that Min(NotNaN, NaN) = Min(NaN, NotNaN) = NotNaN. In other words, if flag
// this is true we don't propagate NaNs through Min and Max.
Expand Down Expand Up @@ -513,6 +509,9 @@ message DebugOptions {
// Pre-existing block-level fusions are left unmodified.
bool xla_gpu_experimental_enable_fusion_block_level_rewriter = 334;

// Experimental optimizations for SPMD-based pipeline parallelism on GPU.
bool xla_gpu_experimental_enable_pipeline_parallelism_opt = 351;

// When enabled, the PriorityFusion pass will try to make Triton fusions first
// and foremost where it is possible.
//
Expand Down

0 comments on commit 6bf6dd8

Please sign in to comment.