Skip to content

Commit

Permalink
Merge branch 'master' into autocast_global_tensor
Browse files Browse the repository at this point in the history
  • Loading branch information
ShawnXuan authored Dec 31, 2024
2 parents 411a930 + 9358ac7 commit 8490225
Show file tree
Hide file tree
Showing 9 changed files with 48 additions and 404 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/community_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
schedule:
# beijing: 6 pm.
# utc: 10 am.
- cron: "0 10 * * *"
- cron: "0 10 * * sat"
workflow_dispatch:
inputs:
priv_branch:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/priv_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
schedule:
# beijing: 12 pm.
# utc: 4 am.
- cron: "0 4 * * *"
- cron: "0 4 * * sun"
workflow_dispatch:
inputs:
priv_branch:
Expand Down
10 changes: 10 additions & 0 deletions cmake/cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,16 @@ if(BUILD_CUDA)
# GeForce RTX 30xx
list(APPEND CMAKE_CUDA_ARCHITECTURES 86-real)
endif()

if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
# GeForce RTX 40xx
list(APPEND CMAKE_CUDA_ARCHITECTURES 89-real)
endif()

if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
# H100, H20
list(APPEND CMAKE_CUDA_ARCHITECTURES 90-real)
endif()
endif()

foreach(CUDA_ARCH ${CMAKE_CUDA_ARCHITECTURES})
Expand Down
9 changes: 8 additions & 1 deletion cmake/third_party/FindCUDNN.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,14 @@ if(CUDNN_FOUND)

set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR})

if(NOT CUDNN_STATIC AND CUDNN_VERSION_MAJOR GREATER_EQUAL 8)
if(NOT CUDNN_STATIC AND CUDNN_VERSION_MAJOR GREATER_EQUAL 9)
# skipping: libcudnn_adv_infer.so libcudnn_adv_train.so
set(CUDNN_DYNAMIC_NAMES libcudnn_cnn.so libcudnn_ops.so)
get_filename_component(CUDNN_LIBRARY_DIRECTORY ${CUDNN_LIBRARY} DIRECTORY)
foreach(CUDNN_DYNAMIC_NAME ${CUDNN_DYNAMIC_NAMES})
list(APPEND CUDNN_LIBRARIES ${CUDNN_LIBRARY_DIRECTORY}/${CUDNN_DYNAMIC_NAME})
endforeach()
elseif(NOT CUDNN_STATIC AND CUDNN_VERSION_MAJOR GREATER_EQUAL 8)
# skipping: libcudnn_adv_infer.so libcudnn_adv_train.so
set(CUDNN_DYNAMIC_NAMES libcudnn_cnn_infer.so libcudnn_cnn_train.so libcudnn_ops_infer.so
libcudnn_ops_train.so)
Expand Down
21 changes: 18 additions & 3 deletions oneflow/core/graph/task_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -880,14 +880,16 @@ DEFINE_BLD_SUB_TASK_GRAPH_METHOD(BldSubTskGphByBoxing) {
if (device_type != DeviceType::kCPU
&& device_type2sub_tsk_gph_builder_.find(device_type)
!= device_type2sub_tsk_gph_builder_.end()) {
status = CHECK_JUST( // NOLINT
auto maybe_status = // NOLINT
device_type2sub_tsk_gph_builder_ // NOLINT
.at(device_type) // NOLINT
->Build(sub_tsk_gph_builder_ctx_.get(), in_nodes, &out_nodes, // NOLINT
&sorted_ctrl_tasks, src_parallel_desc, dst_parallel_desc, lbi, // NOLINT
blob_desc, src_nd_sbp, dst_nd_sbp, // NOLINT
*(CHECK_JUST(src_op_node->op().GetOpTimeShape()).get()))); // NOLINT
} else {
*(CHECK_JUST(src_op_node->op().GetOpTimeShape()).get())); // NOLINT
if (maybe_status.IsOk()) { status = CHECK_JUST(maybe_status); }
}
if (!status) {
status = CHECK_JUST(hierarchical_sub_tsk_gph_builder_->Build(
sub_tsk_gph_builder_ctx_.get(), in_nodes, &out_nodes, &sorted_ctrl_tasks,
src_parallel_desc, dst_parallel_desc, lbi, blob_desc, src_nd_sbp, dst_nd_sbp,
Expand Down Expand Up @@ -1052,6 +1054,12 @@ Maybe<void> GlobalTaskGraph::Init() {
OpGraph* op_graph = Singleton<OpGraph>::Get();
sub_tsk_gph_builder_ctx_.reset(new SubTskGphBuilderCtx(this));
boxing_logger_ = CreateBoxingLogger();
// Register the corresponding task graph builder based on the device type and store them to map
const auto* global_device_type_create_sub_tsk_gph_builder_fn =
GlobalDeviceType2CreateSubTskGphBuilderFn();
for (const auto& pair : *global_device_type_create_sub_tsk_gph_builder_fn) {
device_type2sub_tsk_gph_builder_.emplace(pair.first, pair.second());
}
hierarchical_sub_tsk_gph_builder_.reset(new DispatchHierarchicalSubTskGphBuilder());
HashMap<const OpNode*, std::vector<CompTaskNode*>> op_node2sorted_comp_tasks;

Expand Down Expand Up @@ -1088,6 +1096,13 @@ Maybe<void> BoxingTaskGraph::Init(
OpGraph* op_graph = Singleton<OpGraph>::Get();
sub_tsk_gph_builder_ctx_.reset(new SubTskGphBuilderCtx(this));
boxing_logger_ = CreateBoxingLogger();
// Register the corresponding task graph builder based on the device type and store them to map
const auto* global_device_type_create_sub_tsk_gph_builder_fn =
GlobalDeviceType2CreateSubTskGphBuilderFn();
for (const auto& pair : *global_device_type_create_sub_tsk_gph_builder_fn) {
device_type2sub_tsk_gph_builder_.emplace(pair.first, pair.second());
}

hierarchical_sub_tsk_gph_builder_.reset(new DispatchHierarchicalSubTskGphBuilder());

const auto& TryCreateSortedCompTaskNodes = [&](const OpNode* op_node) {
Expand Down
3 changes: 3 additions & 0 deletions oneflow/user/ops/group_norm_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@ limitations under the License.

namespace oneflow {

DEFINE_ENV_BOOL(ONEFLOW_GROUP_NORM_USE_FP16_DIRECTLY, false);

namespace {

oneflow::DataType InferGnParamDataType(const DataType x_data_type) {
if (EnvBool<ONEFLOW_GROUP_NORM_USE_FP16_DIRECTLY>()) { return x_data_type; }
return (x_data_type == DataType::kFloat16 || x_data_type == DataType::kBFloat16)
? DataType::kFloat
: x_data_type;
Expand Down
7 changes: 7 additions & 0 deletions oneflow/user/ops/layer_norm_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ limitations under the License.

namespace oneflow {

DEFINE_ENV_BOOL(ONEFLOW_LAYER_NORM_PARAM_KEEP_DIM, false);

namespace {

int64_t ShiftNegativeAxisIfNeed(const Shape& shape, int64_t axis) {
Expand All @@ -31,6 +33,11 @@ Shape InferBnParamShape(const Shape& x_shape, const int64_t begin_norm_axis) {
DimVector bn_param_shape_dim_vec;
bn_param_shape_dim_vec.insert(bn_param_shape_dim_vec.end(), x_shape.dim_vec().cbegin(),
x_shape.dim_vec().cbegin() + begin_norm_axis);
if (EnvBool<ONEFLOW_LAYER_NORM_PARAM_KEEP_DIM>()) {
while (bn_param_shape_dim_vec.size() < x_shape.dim_vec().size()) {
bn_param_shape_dim_vec.push_back(1);
}
}
const Shape bn_param_shape(bn_param_shape_dim_vec);
return bn_param_shape;
}
Expand Down
266 changes: 0 additions & 266 deletions python/oneflow/test/modules/test_normal.py

This file was deleted.

Loading

0 comments on commit 8490225

Please sign in to comment.