Add SELU activation function

- Now, user can use SELU activation function like torch or tensor flow. **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: kimhan0515 <[email protected]>
nnstreamer · Apr 29, 2024 · 65a9eec · 65a9eec
1 parent f9a4cd4
commit 65a9eec
Show file tree

Hide file tree

Showing 4 changed files with 90 additions and 0 deletions.
diff --git a/api/ccapi/include/layer.h b/api/ccapi/include/layer.h
@@ -602,6 +602,14 @@ ELU(const std::vector<std::string> &properties = {}) {
   return Activation("Activation=elu", properties);
 }
 
+/**
+ * @brief Helper function to create selu activation layer
+ */
+inline std::unique_ptr<Layer>
+SELU(const std::vector<std::string> &properties = {}) {
+  return Activation("Activation=selu", properties);
+}
+
 /**
  * @brief Helper function to create mish activation layer
  */

diff --git a/nntrainer/layers/acti_func.h b/nntrainer/layers/acti_func.h
@@ -81,6 +81,9 @@ class ActiFunc {
     case ActivationType::ACT_ELU:
       this->setActivation<T>(elu<T>, eluPrime<T>);
       break;
+    case ActivationType::ACT_SELU:
+      this->setActivation<T>(selu<T>, seluPrime<T>);
+      break;
     case ActivationType::ACT_SOFTPLUS:
       this->setActivation<T>(softplus<T>, softplusPrime<T>);
       break;
@@ -483,6 +486,30 @@ class ActiFunc {
                                     : static_cast<T>(alpha * exp(x));
   }
 
+  /**
+   * @brief selu function
+   * @tparam T type of an input/output
+   * @param x input
+   * @return T type output
+   */
+  template <typename T = float> static T selu(T x) {
+    return x > static_cast<T>(0.0)
+             ? static_cast<T>(selu_scale * x)
+             : static_cast<T>(selu_scale * selu_alpha * (exp(x) - 1));
+  }
+
+  /**
+   * @brief selu prime function
+   * @tparam T type of an input/output
+   * @param x input
+   * @return T type output
+   */
+  template <typename T = float> static T seluPrime(T x) {
+    return x > static_cast<T>(0.0)
+             ? static_cast<T>(selu_scale)
+             : static_cast<T>(selu_scale * selu_alpha * exp(x));
+  }
+
   /**
    * @brief     mish activation function
    * @param[in] x input
@@ -667,6 +694,8 @@ class ActiFunc {
 private:
   constexpr static inline float alpha = 1.0f; /**< alpha for elu */
   constexpr static inline float beta = 1.0f;  /**< beta for Softplus */
+  constexpr static inline float selu_alpha = 1.67326324f; /**< alpha for selu */
+  constexpr static inline float selu_scale = 1.05070098f; /**< scale for selu */
 
   std::function<Tensor &(Tensor const &, Tensor &)> _act_fn;
   std::function<Tensor &(Tensor const &, Tensor &, Tensor &, Tensor const &)>

diff --git a/nntrainer/layers/common_properties.h b/nntrainer/layers/common_properties.h
@@ -39,6 +39,7 @@ enum class ActivationType {
   ACT_SOFTPLUS,   /**< softplus */
   ACT_LEAKY_RELU, /**< Leaky ReLU */
   ACT_ELU,        /**< ELU */
+  ACT_SELU,       /**< SELU */
   ACT_MISH,       /**< Mish */
   ACT_NONE,       /**< no op */
   ACT_UNKNOWN     /**< unknown */

diff --git a/test/unittest/unittest_nntrainer_activations.cpp b/test/unittest/unittest_nntrainer_activations.cpp
@@ -517,6 +517,58 @@ TEST(nntrainer_activation, eluPrime_01_p) {
   }
 }
 
+TEST(nntrainer_activation, selu_01_p) {
+  int batch = 3;
+  int channel = 1;
+  int height = 1;
+  int width = 10;
+
+  float answer[30] = {
+    -0.57961011, -0.45566735, -0.31868932, -0.16730525, 0.00000000,
+    0.10507011,  0.21014021,  0.31521031,  0.42028043,  0.52535051,
+    -0.96813440, -0.79323399, -0.57961011, -0.31868932, 0.00000000,
+    0.21014021,  0.42028043,  0.63042063,  0.84056085,  1.05070102,
+    -1.22856998, -1.04330945, -0.79323399, -0.45566735, 0.00000000,
+    0.31521031,  0.63042063,  0.94563091,  1.26084125,  1.57605147};
+
+  nntrainer::Tensor input(batch, channel, height, width);
+  GEN_TEST_INPUT(input, (l - 4) * 0.1 * (i + 1));
+
+  nntrainer::Tensor selu_result =
+    input.apply<float>(nntrainer::ActiFunc::selu<float>);
+  float *data = selu_result.getData();
+  ASSERT_NE(nullptr, data);
+
+  for (int i = 0; i < batch * height * width; ++i) {
+    EXPECT_NEAR(data[i], answer[i], tolerance);
+  }
+}
+
+TEST(nntrainer_activation, seluPrime_01_p) {
+  int batch = 3;
+  int channel = 1;
+  int height = 1;
+  int width = 10;
+  float answer[30] = {
+    1.17848921, 1.30243194, 1.43940997, 1.59079409, 1.75809932, 1.05070102,
+    1.05070102, 1.05070102, 1.05070102, 1.05070102, 0.78996491, 0.96486533,
+    1.17848921, 1.43940997, 1.75809932, 1.05070102, 1.05070102, 1.05070102,
+    1.05070102, 1.05070102, 0.52952927, 0.71478987, 0.96486533, 1.30243194,
+    1.75809932, 1.05070102, 1.05070102, 1.05070102, 1.05070102, 1.05070102};
+
+  nntrainer::Tensor input(batch, channel, height, width);
+  GEN_TEST_INPUT(input, (l - 4) * 0.1 * (i + 1));
+
+  nntrainer::Tensor selu_prime_result =
+    input.apply<float>(nntrainer::ActiFunc::seluPrime<float>);
+  float *data = selu_prime_result.getData();
+  ASSERT_NE(nullptr, data);
+
+  for (int i = 0; i < batch * height * width; ++i) {
+    EXPECT_NEAR(data[i], answer[i], tolerance);
+  }
+}
+
 TEST(nntrainer_activation, mish_01_p) {
   int batch = 3;
   int channel = 1;