-
Notifications
You must be signed in to change notification settings - Fork 486
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fixed some issues around compiling on Windows. #15444
Changes from 11 commits
eacee95
b12e4cf
e23ef17
bdae19b
2f90e6b
5700979
a978b1f
d7fe81d
fc40d91
326aec3
a7603b7
edcc97a
cec2448
28342ae
df3eb22
8997345
219a9f1
73f3cd7
d266259
d5c3cce
d584004
091793f
6037149
866e013
e289dff
57c98d8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -186,18 +186,18 @@ void AddGraphTraceActivityEvent(CuptiEventCollectorDelegate &collector, | |
AnnotationMap::AnnotationInfo info = collector.annotation_map.LookUp( | ||
graph_trace->deviceId, graph_trace->correlationId); | ||
collector.receive(CuptiTracerEvent{ | ||
.type = CuptiTracerEventType::CudaGraph, | ||
.source = CuptiTracerEventSource::Activity, | ||
.name = absl::StrCat("CudaGraphExec:", graph_trace->graphId), | ||
.annotation = info.annotation, | ||
.nvtx_range = info.nvtx_range, | ||
.start_time_ns = graph_trace->start, | ||
.end_time_ns = graph_trace->end, | ||
.device_id = graph_trace->deviceId, | ||
.correlation_id = graph_trace->correlationId, | ||
.context_id = graph_trace->contextId, | ||
.stream_id = graph_trace->streamId, | ||
.graph_id = graph_trace->graphId, | ||
/* .type = */ CuptiTracerEventType::CudaGraph, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. XLA is configured to build using C++ 17. However, this is a C++ 20 feature, resulting in the following error when trying to compile on Windows:
|
||
/* .source = */ CuptiTracerEventSource::Activity, | ||
/* .name = */ absl::StrCat("CudaGraphExec:", graph_trace->graphId), | ||
/* .annotation = */ info.annotation, | ||
/* .nvtx_range = */ info.nvtx_range, | ||
/* .start_time_ns = */ graph_trace->start, | ||
/* .end_time_ns = */ graph_trace->end, | ||
/* .device_id = */ graph_trace->deviceId, | ||
/* .correlation_id = */ graph_trace->correlationId, | ||
/* .context_id = */ graph_trace->contextId, | ||
/* .stream_id = */ graph_trace->streamId, | ||
/* .graph_id = */ graph_trace->graphId, | ||
}); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -56,7 +56,7 @@ struct MemcpyDetails { | |
int8_t dst_mem_kind; | ||
|
||
// ID of the hardware channel on which this operation ran. | ||
uint32_t channel_id = -1; | ||
uint32_t channel_id = static_cast<uint32_t>(-1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This resulted in an implicit type narrowing error (I believe it was C2397). The explicit static cast fixes it. |
||
// CUpti_ChannelType of the channel above. | ||
int8_t channel_type = 0; // CUPTI_CHANNEL_TYPE_INVALID | ||
}; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -199,13 +199,17 @@ StreamExecutorGpuCompiler::Compile(CompileOptions options, | |
#endif | ||
} | ||
|
||
#if TENSORFLOW_USE_ROCM | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The nested macro seems to not be supported by MSVC. Pushing the inner ifdef outside the other macro seems to work and doesn't change the behavior/functionality of the code here. |
||
STREAM_EXECUTOR_REGISTER_MODULE_INITIALIZER(pjrt_register_se_gpu_compiler, { | ||
PjRtRegisterCompiler( | ||
#if TENSORFLOW_USE_ROCM | ||
RocmName(), | ||
RocmName(), | ||
std::make_unique<StreamExecutorGpuCompiler>()); | ||
}); | ||
#else | ||
CudaName(), | ||
#endif | ||
std::make_unique<StreamExecutorGpuCompiler>()); | ||
STREAM_EXECUTOR_REGISTER_MODULE_INITIALIZER(pjrt_register_se_gpu_compiler, { | ||
PjRtRegisterCompiler( | ||
CudaName(), | ||
std::make_unique<StreamExecutorGpuCompiler>()); | ||
}); | ||
#endif | ||
} // namespace xla |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,7 +41,7 @@ void EigenConv2DImpl( | |
Eigen::Index padding_y_after, Eigen::Index lhs_x_dilation, | ||
Eigen::Index lhs_y_dilation, Eigen::Index rhs_x_dilation, | ||
Eigen::Index rhs_y_dilation, Eigen::Index feature_group_count, | ||
std::optional<std::function<void()>> done_callback = std::nullopt) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This resulted in this error:
I just removed a couple default arguments that were causing this error and propagated them at call sites where they were missing. |
||
std::optional<std::function<void()>> done_callback) { | ||
const Eigen::TensorMap<Eigen::Tensor<const ScalarType, 4, Eigen::RowMajor>, | ||
Eigen::Aligned> | ||
input(lhs, input_batch, input_x, input_y, input_channels); | ||
|
@@ -129,7 +129,7 @@ void EigenConv3DImpl( | |
Eigen::Index lhs_z_dilation, Eigen::Index rhs_x_dilation, | ||
Eigen::Index rhs_y_dilation, Eigen::Index rhs_z_dilation, | ||
Eigen::Index feature_group_count, | ||
std::optional<std::function<void()>> done_callback = std::nullopt) { | ||
std::optional<std::function<void()>> done_callback) { | ||
using ConstTType = | ||
Eigen::TensorMap<Eigen::Tensor<const ScalarType, 5, Eigen::RowMajor>, | ||
Eigen::Aligned>; | ||
|
@@ -223,7 +223,7 @@ void EigenConv3DImpl( | |
Eigen::Index padding_y_after, Eigen::Index lhs_x_dilation, \ | ||
Eigen::Index lhs_y_dilation, Eigen::Index rhs_x_dilation, \ | ||
Eigen::Index rhs_y_dilation, Eigen::Index feature_group_count, \ | ||
std::optional<std::function<void()>> done_callback = std::nullopt) | ||
std::optional<std::function<void()>> done_callback) | ||
|
||
CONV2D_EXTERN_TEMPLATE(Eigen::DefaultDevice, Eigen::half); | ||
CONV2D_EXTERN_TEMPLATE(Eigen::DefaultDevice, float); | ||
|
@@ -249,7 +249,7 @@ CONV2D_EXTERN_TEMPLATE(Eigen::ThreadPoolDevice, float); | |
Eigen::Index lhs_z_dilation, Eigen::Index rhs_x_dilation, \ | ||
Eigen::Index rhs_y_dilation, Eigen::Index rhs_z_dilation, \ | ||
Eigen::Index feature_group_count, \ | ||
std::optional<std::function<void()>> done_callback = std::nullopt) | ||
std::optional<std::function<void()>> done_callback) | ||
|
||
CONV3D_EXTERN_TEMPLATE(Eigen::DefaultDevice, Eigen::half); | ||
CONV3D_EXTERN_TEMPLATE(Eigen::DefaultDevice, float); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -101,7 +101,11 @@ KernelArgsPacking ArgsPacking(int32_t m, int32_t n, int32_t k, | |
// object constructed in the storage. For now we ignore it, and it's textbook | ||
// definition of UB, but for CUTLASS kernels we use today it's perfectly safe. | ||
struct Params { | ||
#if defined(_MSC_VER) | ||
alignas(64) std::byte storage[1024]; | ||
#else | ||
alignas(128) std::byte storage[1024]; | ||
#endif | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This results in the following error on Windows:
cc @dimvar who previously made the change from |
||
}; | ||
|
||
return [=](const se::Kernel& kernel, const se::KernelArgs& args) -> Packed { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is mostly self-explanatory. In Windows, the CUDA version looks something like
64_121
. There are other parts of the build that already handle such version numbers but it was not being handled properly here.