diff --git a/third_party/tsl/tsl/profiler/lib/BUILD b/third_party/tsl/tsl/profiler/lib/BUILD index 1a38c776f87627..be941c5d93bd99 100644 --- a/third_party/tsl/tsl/profiler/lib/BUILD +++ b/third_party/tsl/tsl/profiler/lib/BUILD @@ -1,4 +1,4 @@ -load("@xla//xla/tsl:tsl.bzl", "if_not_android", "if_oss", "internal_visibility", "nvtx_headers") +load("@xla//xla/tsl:tsl.bzl", "if_google", "if_not_android", "if_oss", "internal_visibility", "nvtx_headers") load("@xla//xla/tsl:tsl.default.bzl", "filegroup", "get_compatible_with_portable") load("@xla//xla/tsl/platform:build_config.bzl", "tsl_cc_test") load("@xla//xla/tsl/platform:build_config_root.bzl", "if_static") @@ -278,14 +278,17 @@ cc_library( cc_library( name = "nvtx_utils_impl", - srcs = if_cuda_is_configured( - ["nvtx_utils.cc"], + srcs = if_google( + if_cuda_is_configured( + ["nvtx_utils.cc"], + ["nvtx_utils_stub.cc"], + ), ["nvtx_utils_stub.cc"], ), hdrs = ["nvtx_utils.h"], local_defines = if_oss(["NVTX_VERSION_3_1=1"]), visibility = ["//visibility:public"], - deps = if_cuda_is_configured(nvtx_headers()), + deps = if_google(if_cuda_is_configured(nvtx_headers())), ) cc_library( diff --git a/xla/backends/profiler/BUILD b/xla/backends/profiler/BUILD index 65f5da2138f0f9..0c309e67520445 100644 --- a/xla/backends/profiler/BUILD +++ b/xla/backends/profiler/BUILD @@ -18,6 +18,8 @@ package_group( tsl_gpu_library( name = "profiler_backends", + add_gpu_deps_for_oss = False, + # copybara:uncomment compatible_with = ["//buildenv/target:non_prod"], visibility = internal_visibility(["//xla:internal"]), deps = [ "//xla/backends/profiler/cpu:host_tracer", diff --git a/xla/python/BUILD b/xla/python/BUILD index 0a4e7c39279b89..884884eae4c833 100644 --- a/xla/python/BUILD +++ b/xla/python/BUILD @@ -330,9 +330,9 @@ cc_library( "-fexceptions", "-fno-strict-aliasing", ], - defines = if_cuda(["GOOGLE_CUDA=1"]) + if_rocm([ + defines = if_google(if_cuda(["GOOGLE_CUDA=1"]) + if_rocm([ "TENSORFLOW_USE_ROCM=1", - ]), + ])), features = ["-use_header_modules"], deps = [ ":aggregate_profile", @@ -438,9 +438,9 @@ cc_library( # keep sorted "@local_config_rocm//rocm:hip", "@local_config_rocm//rocm:rocm_headers", - ]) + if_cuda_or_rocm([ + ]) + if_google(if_cuda_or_rocm([ ":py_client_gpu", # TODO(b/337876408): remove after migration to plugin - ]) + if_google(["@com_google_protobuf//:any_cc_proto"]), + ]) + ["@com_google_protobuf//:any_cc_proto"]), ) cc_library( @@ -1244,18 +1244,6 @@ tsl_pybind_extension( "-fno-strict-aliasing", ], features = ["-use_header_modules"], - linkopts = select({ - ":use_jax_cuda_pip_rpaths": [ - "-Wl,-rpath,$$ORIGIN/../nvidia/cuda_cupti/lib", - "-Wl,-rpath,$$ORIGIN/../nvidia/cuda_runtime/lib", - "-Wl,-rpath,$$ORIGIN/../nvidia/cublas/lib", - "-Wl,-rpath,$$ORIGIN/../nvidia/cufft/lib", - "-Wl,-rpath,$$ORIGIN/../nvidia/cudnn/lib", - "-Wl,-rpath,$$ORIGIN/../nvidia/cusolver/lib", - "-Wl,-rpath,$$ORIGIN/../nvidia/nccl/lib", - ], - "//conditions:default": [], - }), pytype_deps = [ "//third_party/py/numpy", ], diff --git a/xla/tsl/distributed_runtime/coordination/BUILD b/xla/tsl/distributed_runtime/coordination/BUILD index c9e12db657a6a1..a244fc992e5253 100644 --- a/xla/tsl/distributed_runtime/coordination/BUILD +++ b/xla/tsl/distributed_runtime/coordination/BUILD @@ -70,6 +70,7 @@ cc_library( tsl_gpu_library( name = "coordination_service_impl", srcs = ["coordination_service.cc"], + add_gpu_deps_for_oss = False, deps = [ ":coordination_client", ":coordination_service", @@ -141,6 +142,7 @@ tsl_gpu_library( name = "coordination_service_agent", srcs = ["coordination_service_agent.cc"], hdrs = ["coordination_service_agent.h"], + add_gpu_deps_for_oss = False, deps = [ ":coordination_client", ":coordination_service_error_util", diff --git a/xla/tsl/tsl.bzl b/xla/tsl/tsl.bzl index b28d3fe53e94f4..4a9f34ad0487eb 100644 --- a/xla/tsl/tsl.bzl +++ b/xla/tsl/tsl.bzl @@ -357,7 +357,12 @@ def tf_openmp_copts(): "//conditions:default": [], }) -def tsl_gpu_library(deps = None, cuda_deps = None, copts = tsl_copts(), **kwargs): +def tsl_gpu_library( + deps = None, + cuda_deps = None, + copts = tsl_copts(), + add_gpu_deps_for_oss = True, + **kwargs): """Generate a cc_library with a conditional set of CUDA dependencies. When the library is built with --config=cuda: @@ -373,6 +378,7 @@ def tsl_gpu_library(deps = None, cuda_deps = None, copts = tsl_copts(), **kwargs '--config=cuda' is passed to the bazel command line. deps: dependencies which will always be linked. copts: copts always passed to the cc_library. + add_gpu_deps_for_oss: Whether to add gpu deps for OSS too. **kwargs: Any other argument to cc_library. """ if not deps: @@ -381,19 +387,24 @@ def tsl_gpu_library(deps = None, cuda_deps = None, copts = tsl_copts(), **kwargs cuda_deps = [] kwargs["features"] = kwargs.get("features", []) + ["-use_header_modules"] - deps = deps + if_cuda(cuda_deps) + deps = deps + (if_cuda(cuda_deps) if add_gpu_deps_for_oss else if_google(if_cuda(cuda_deps))) if "default_copts" in kwargs: copts = kwargs["default_copts"] + copts kwargs.pop("default_copts", None) + all_cuda_deps = if_cuda([ + clean_dep("//xla/tsl/cuda:cudart"), + "@local_config_cuda//cuda:cuda_headers", + ]) + if_rocm([ + "@local_config_rocm//rocm:hip", + "@local_config_rocm//rocm:rocm_headers", + ]) + all_cuda_copts = if_cuda(["-DGOOGLE_CUDA=1", "-DNV_CUDNN_DISABLE_EXCEPTION"]) + if_rocm(["-DTENSORFLOW_USE_ROCM=1"]) + if not add_gpu_deps_for_oss: + all_cuda_deps = if_google(all_cuda_deps) + all_cuda_copts = if_google(all_cuda_copts) cc_library( - deps = deps + if_cuda([ - clean_dep("//xla/tsl/cuda:cudart"), - "@local_config_cuda//cuda:cuda_headers", - ]) + if_rocm([ - "@local_config_rocm//rocm:hip", - "@local_config_rocm//rocm:rocm_headers", - ]), - copts = (copts + if_cuda(["-DGOOGLE_CUDA=1", "-DNV_CUDNN_DISABLE_EXCEPTION"]) + if_rocm(["-DTENSORFLOW_USE_ROCM=1"]) + if_xla_available(["-DTENSORFLOW_USE_XLA=1"]) + if_mkl(["-DINTEL_MKL=1"]) + if_enable_mkl(["-DENABLE_MKL"]) + if_tensorrt(["-DGOOGLE_TENSORRT=1"])), + deps = deps + all_cuda_deps, + copts = (copts + all_cuda_copts + if_xla_available(["-DTENSORFLOW_USE_XLA=1"]) + if_mkl(["-DINTEL_MKL=1"]) + if_enable_mkl(["-DENABLE_MKL"]) + if_tensorrt(["-DGOOGLE_TENSORRT=1"])), **kwargs )