soda-inria · Vincent-Maladiere · Jan 15, 2025 · Jul 3, 2024 · Jul 3, 2024 · Jan 14, 2025
diff --git a/hazardous/data/_competing_weibull.py b/hazardous/data/_competing_weibull.py
@@ -20,6 +20,25 @@
 
 
 def _censor(y, relative_scale, random_state=None):
+    """Censoring a population based on a relative scale.
+
+    Individuals are censored by sampling a censoring time from
+    a Weibull distribution with shape 1 and scale equal to
+    the mean duration of the target event times the
+    ``relative_scale``.
+
+    Parameters
+    ----------
+    y: ndarray
+        The target population.
+    relative_scale: float
+        Relative scale of the censoring. Setting it to 0 or None
+        disables censoring, setting it to a small value (e.g. 0.5
+        instead of 1.5) will result in a larger fraction of
+        censored individuals.
+
+    """
+
     if relative_scale == 0 or relative_scale is None:
         return y
 
@@ -52,20 +71,57 @@ def make_synthetic_competing_weibull(
 
     Then we sample event durations for each event type from the corresponding
     Weibull distribution parametrized by the sampled shape and scale
-    parameters.
-
-    The shape and scale parameters are returned as features. For each
-    individual, the event type with the shortest duration is kept as the target
-    event (competing risks setting) and its event identifier and duration are
-    returned as the target dataframe.
-
-    A fraction of the individuals are censored by sampling a censoring time
-    from a Weibull distribution with shape 1 and scale equal to the mean
-    duration of the target event times the ``censoring_relative_scale``.
+    parameters. The shape and scale parameters are returned as features.
+
+    Then, we apply the same procedure to sample the duration for the censoring
+    event (event = 0) if ``censoring_relative_scale`` is not None or 0.
+
+    For each individual, the event type with the shortest duration is kept as
+    the target event (competing risks setting) and its event identifier and
+    duration are returned as the target dataframe.
+
+    Parameters
+    ----------
+    n_events: int, default=3
+        Number of events of interest.
+    n_samples: int, default=3000
+        Number of individuals in the population.
+    return_X_y: bool, default=False
+        If True, returns ``(data, target)`` instead of a Bunch object.
+    feature_rounding: int or None, default=2
+        Round the feature values. If None, no rounding will be applied.
+    target_rounding: int or None, default=1
+        Round the column duration of the target. If None, no rounding will
+        be applied.
+    shape_ranges: tuple of shape (n_events, 2)
+        The lower and upper boundary of the shape, `n_samples` shape
+        values for `n_events` will be drawn from a uniform distribution.
+    scale_ranges: tuple of shape (n_events, 2)
+        The lower and upper boundary of the scale, `n_samples` scale
+        values for `n_events` will be drawn from a uniform distribution.
+    base_scale: int, default=1000
+        Scaling parameter of the ``scale_range``.
+    censoring_relative_scale: float, default=1.5
+        Relative scale of the censoring level. Individuals are censored by
+        sampling a censoring time from a Weibull distribution with shape 1
+        and scale equal to the mean duration of the target event times
+        the ``censoring_relative_scale``.
+        Setting ``censoring_relative_scale`` to 0 or None disables censoring.
+        Setting it to a small value (e.g. 0.5 instead of 1.5) will result in a
+        larger fraction of censored individuals.
+    random_state : int, RandomState instance or None, default=None
+        Controls the randomness of the uniform time sampler.
+
+    Returns
+    -------
+    (data, target): tuple if ``return_X_y`` is True
+        A tuple of two dataframes. The first containing a 2D array of shape
+        (n_samples, n_features) with each row representing one sample
+        and each column representing the features. The second dataframe
+        of shape (n_samples, 2) containing the target samples. The first
+        column contains the event identifier (event = 0 represents the censoring
+        event) and the second column contains the duration of the target event.
 
-    Setting ``censoring_relative_scale`` to 0 or None disables censoring.
-    Setting it to a small value (e.g. 0.5 instead of 1.5) will result in a
-    larger fraction of censored individuals.
     """
     rng = check_random_state(random_state)
     all_features = []