Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for compiling the CUDA stubs on Windows. #15518

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 56 additions & 41 deletions xla/tsl/cuda/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Description:
# Stubs for dynamically loading CUDA.

load("@bazel_skylib//lib:selects.bzl", "selects")
load(
"@tsl//tsl/platform:rules_cc.bzl",
"cc_library",
Expand All @@ -16,17 +17,26 @@ package(
licenses = ["notice"],
)

selects.config_setting_group(
name = "linux_with_cuda_enabled",
match_all = [
"@local_config_cuda//:is_cuda_enabled",
"@platforms//os:linux",
],
)

cuda_stub(
name = "cublas",
srcs = ["cublas.symbols"],
)

cc_library(
name = "cublas", # buildifier: disable=duplicated-name
srcs = if_cuda_is_configured([
"cublas_stub.cc",
"cublas.tramp.S",
]),
srcs = select({
":linux_with_cuda_enabled": ["cublas_stub.cc", "cublas.tramp.S"],
"@local_config_cuda//:is_cuda_enabled": ["cublas_stub.cc"],
"//conditions:default": [],
}),
linkopts = if_cuda_is_configured(cuda_rpath_flags(
"nvidia/cublas/lib",
)),
Expand All @@ -51,10 +61,11 @@ cuda_stub(

cc_library(
name = "cublas_lt",
srcs = if_cuda_is_configured([
"cublasLt_stub.cc",
"cublasLt.tramp.S",
]),
srcs = select({
":linux_with_cuda_enabled": ["cublasLt_stub.cc", "cublasL.tramp.S"],
"@local_config_cuda//:is_cuda_enabled": ["cublasLt_stub.cc"],
"//conditions:default": [],
}),
local_defines = [
"IMPLIB_EXPORT_SHIMS=1",
],
Expand All @@ -75,10 +86,11 @@ cuda_stub(

cc_library(
name = "cuda", # buildifier: disable=duplicated-name
srcs = if_cuda_is_configured([
"cuda_stub.cc",
"cuda.tramp.S",
]),
srcs = select({
":linux_with_cuda_enabled": ["cuda_stub.cc", "cuda.tramp.S"],
"@local_config_cuda//:is_cuda_enabled": ["cuda_stub.cc"],
"//conditions:default": [],
}),
local_defines = [
"IMPLIB_EXPORT_SHIMS=1",
],
Expand All @@ -100,11 +112,8 @@ cuda_stub(
cc_library(
name = "cudart", # buildifier: disable=duplicated-name
srcs = select({
# include dynamic loading implementation only when if_cuda_is_configured and build dynamically
"@xla//xla/tsl:is_cuda_enabled_and_oss": [
"cudart.tramp.S",
"cudart_stub.cc",
],
":linux_with_cuda_enabled": ["cudart_stub.cc", "cudart.tramp.S"],
"@local_config_cuda//:is_cuda_enabled": ["cudart_stub.cc"],
"//conditions:default": [],
}),
linkopts = select({
Expand Down Expand Up @@ -136,10 +145,11 @@ cuda_stub(

cc_library(
name = "cudnn", # buildifier: disable=duplicated-name
srcs = if_cuda_is_configured([
"cudnn_stub.cc",
"cudnn.tramp.S",
]),
srcs = select({
":linux_with_cuda_enabled": ["cudnn_stub.cc", "cudnn.tramp.S"],
"@local_config_cuda//:is_cuda_enabled": ["cudnn_stub.cc"],
"//conditions:default": [],
}),
linkopts = if_cuda_is_configured(cuda_rpath_flags("nvidia/cudnn/lib")),
local_defines = [
"IMPLIB_EXPORT_SHIMS=1",
Expand Down Expand Up @@ -174,10 +184,11 @@ cuda_stub(

cc_library(
name = "cufft", # buildifier: disable=duplicated-name
srcs = if_cuda_is_configured([
"cufft_stub.cc",
"cufft.tramp.S",
]),
srcs = select({
":linux_with_cuda_enabled": ["cufft_stub.cc", "cufft.tramp.S"],
"@local_config_cuda//:is_cuda_enabled": ["cufft_stub.cc"],
"//conditions:default": [],
}),
linkopts = if_cuda_is_configured(cuda_rpath_flags("nvidia/cufft/lib")),
local_defines = [
"IMPLIB_EXPORT_SHIMS=1",
Expand All @@ -199,10 +210,11 @@ cuda_stub(

cc_library(
name = "cupti", # buildifier: disable=duplicated-name
srcs = if_cuda_is_configured([
"cupti_stub.cc",
"cupti.tramp.S",
]),
srcs = select({
":linux_with_cuda_enabled": ["cupti_stub.cc", "cupti.tramp.S"],
"@local_config_cuda//:is_cuda_enabled": ["cupti_stub.cc"],
"//conditions:default": [],
}),
data = if_cuda_is_configured(["@local_config_cuda//cuda:cupti_dsos"]),
linkopts = if_cuda_is_configured(cuda_rpath_flags("nvidia/cuda_cupti/lib")),
local_defines = [
Expand All @@ -226,10 +238,11 @@ cuda_stub(

cc_library(
name = "cusolver", # buildifier: disable=duplicated-name
srcs = if_cuda_is_configured([
"cusolver_stub.cc",
"cusolver.tramp.S",
]),
srcs = select({
":linux_with_cuda_enabled": ["cusolver_stub.cc", "cusolver.tramp.S"],
"@local_config_cuda//:is_cuda_enabled": ["cusolver_stub.cc"],
"//conditions:default": [],
}),
linkopts = if_cuda_is_configured(cuda_rpath_flags("nvidia/cusolver/lib")),
local_defines = [
"IMPLIB_EXPORT_SHIMS=1",
Expand All @@ -251,10 +264,11 @@ cuda_stub(

cc_library(
name = "cusparse", # buildifier: disable=duplicated-name
srcs = if_cuda_is_configured([
"cusparse_stub.cc",
"cusparse.tramp.S",
]),
srcs = select({
":linux_with_cuda_enabled": ["cusparse_stub.cc", "cusparse.tramp.S"],
"@local_config_cuda//:is_cuda_enabled": ["cusparse_stub.cc"],
"//conditions:default": [],
}),
linkopts = if_cuda_is_configured(cuda_rpath_flags("nvidia/cusparse/lib")),
local_defines = [
"IMPLIB_EXPORT_SHIMS=1",
Expand All @@ -277,10 +291,11 @@ cuda_stub(

cc_library(
name = "nccl_stub",
srcs = if_cuda_is_configured([
"nccl_stub.cc",
"nccl.tramp.S",
]),
srcs = select({
":linux_with_cuda_enabled": ["nccl_stub.cc", "nccl.tramp.S"],
"@local_config_cuda//:is_cuda_enabled": ["nccl_stub.cc"],
"//conditions:default": [],
}),
linkopts = if_cuda_is_configured(cuda_rpath_flags("nvidia/nccl/lib")),
local_defines = [
"IMPLIB_EXPORT_SHIMS=1",
Expand Down
4 changes: 4 additions & 0 deletions xla/tsl/cuda/cublasLt_stub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ limitations under the License.

// Implements the cuBLASLt API by forwarding to cuBLASLt loaded from the DSO.

// Note that we do not need this for MSVC because it already uses lazy loading.
#if !defined(_MSC_VER)

namespace {
// Returns DSO handle or null if loading the DSO fails.
void* GetDsoHandle() {
Expand Down Expand Up @@ -67,3 +70,4 @@ void _cublasLt_tramp_resolve(int i) {
}

} // extern "C"
#endif
4 changes: 4 additions & 0 deletions xla/tsl/cuda/cublas_stub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ limitations under the License.
// Implements the cuBLAS API by forwarding to cuBLAS loaded from the DSO.
// Note that it does not implement the v1 interface.

// Note that we do not need this for MSVC because it already uses lazy loading.
#if !defined(_MSC_VER)

namespace {
// Returns DSO handle or null if loading the DSO fails.
void *GetDsoHandle() {
Expand Down Expand Up @@ -244,3 +247,4 @@ void _cublas_tramp_resolve(int i) {
}

} // extern "C"
#endif
4 changes: 4 additions & 0 deletions xla/tsl/cuda/cuda_stub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ limitations under the License.

// Implements the CUDA driver API by forwarding to CUDA loaded from the DSO.

// Note that we do not need this for MSVC because it already uses lazy loading.
#if !defined(_MSC_VER)

namespace {
// Returns DSO handle or null if loading the DSO fails.
void* GetDsoHandle() {
Expand Down Expand Up @@ -70,3 +73,4 @@ void _cuda_tramp_resolve(int i) {
}

} // extern "C"
#endif
4 changes: 4 additions & 0 deletions xla/tsl/cuda/cudart_stub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ limitations under the License.
#include "tsl/platform/load_library.h"
#include "tsl/platform/logging.h"

// Note that we do not need this for MSVC because it already uses lazy loading.
#if !defined(_MSC_VER)

namespace {
void *GetDsoHandle() {
static auto handle = []() -> void * {
Expand Down Expand Up @@ -89,3 +92,4 @@ void _cudart_tramp_resolve(int i) {
}

} // extern "C"
#endif
5 changes: 5 additions & 0 deletions xla/tsl/cuda/cudnn_stub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ limitations under the License.

// Implements the cuDNN API by forwarding to cuDNN loaded from the DSO.

// Note that we do not need this for MSVC because it already uses lazy loading.
#if !defined(_MSC_VER)

namespace {
// Returns DSO handle or null if loading the DSO fails.
void* GetDsoHandle() {
Expand Down Expand Up @@ -94,3 +97,5 @@ void _cudnn_tramp_resolve(int i) {
}

} // extern "C"

#endif
4 changes: 4 additions & 0 deletions xla/tsl/cuda/cufft_stub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ limitations under the License.

// Implements the cuFFT API by forwarding to cuFFT loaded from the DSO.

// Note that we do not need this for MSVC because it already uses lazy loading.
#if !defined(_MSC_VER)

namespace {
// Returns DSO handle or null if loading the DSO fails.
void* GetDsoHandle() {
Expand Down Expand Up @@ -69,3 +72,4 @@ void _cufft_tramp_resolve(int i) {
}

} // extern "C"
#endif
4 changes: 4 additions & 0 deletions xla/tsl/cuda/cupti_stub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ limitations under the License.

// Implements the CUPTI API by forwarding to CUPTI loaded from the DSO.

// Note that we do not need this for MSVC because it already uses lazy loading.
#if !defined(_MSC_VER)

namespace {
// Returns DSO handle or null if loading the DSO fails.
void* GetDsoHandle() {
Expand Down Expand Up @@ -70,3 +73,4 @@ void _cupti_tramp_resolve(int i) {
}

} // extern "C"
#endif
4 changes: 4 additions & 0 deletions xla/tsl/cuda/cusolver_stub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ limitations under the License.

// Implements the cusolver API by forwarding to cusolver loaded from the DSO.

// Note that we do not need this for MSVC because it already uses lazy loading.
#if !defined(_MSC_VER)

namespace {
// Returns DSO handle or null if loading the DSO fails.
void* GetDsoHandle() {
Expand Down Expand Up @@ -72,3 +75,4 @@ void _cusolver_tramp_resolve(int i) {
}

} // extern "C"
#endif
4 changes: 4 additions & 0 deletions xla/tsl/cuda/cusparse_stub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ limitations under the License.

// Implements the cusparse API by forwarding to cusparse loaded from the DSO.

// Note that we do not need this for MSVC because it already uses lazy loading.
#if !defined(_MSC_VER)

namespace {
// Returns DSO handle or null if loading the DSO fails.
void* GetDsoHandle() {
Expand Down Expand Up @@ -92,3 +95,4 @@ void _cusparse_tramp_resolve(int i) {
}

} // extern "C"
#endif
4 changes: 4 additions & 0 deletions xla/tsl/cuda/nccl_stub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ limitations under the License.

// Implements the nccl API by forwarding to nccl loaded from a DSO.

// Note that we do not need this for MSVC because it already uses lazy loading.
#if !defined(_MSC_VER)

namespace {
// Returns DSO handle or null if loading the DSO fails.
void* GetDsoHandle() {
Expand Down Expand Up @@ -91,3 +94,4 @@ void _nccl_tramp_resolve(int i) {
}

} // extern "C"
#endif
1 change: 1 addition & 0 deletions xla/tsl/cuda/stub.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def cuda_stub(name, srcs):
"@xla//xla/tsl:linux_aarch64": "$(location //third_party/implib_so:make_stub) $< --outdir $(RULEDIR) --target aarch64",
"@xla//xla/tsl:linux_x86_64": "$(location //third_party/implib_so:make_stub) $< --outdir $(RULEDIR) --target x86_64",
"@xla//xla/tsl:linux_ppc64le": "$(location //third_party/implib_so:make_stub) $< --outdir $(RULEDIR) --target powerpc64le",
"@xla//xla/tsl:windows_x86_64": "$(location //third_party/implib_so:make_stub) $< --outdir $(RULEDIR) --target x86_64",
"//conditions:default": "NOT_IMPLEMENTED_FOR_THIS_PLATFORM_OR_ARCHITECTURE",
}),
)
Loading