[SCALED_RANKING] performance optimization and make the method default

simple-crypto · Feb 23, 2025 · a29064a · a29064a
1 parent f0eb7d8
commit a29064a
Show file tree

Hide file tree

Showing 5 changed files with 81 additions and 68 deletions.
diff --git a/setup.py b/setup.py
@@ -70,7 +70,7 @@ def env_true(env):
 
 print(f"Build config: {noflags=} {portable=} {x86_64_v3=} {rustflags=}.")
 
-scalib_features = ["pyo3/abi3"]
+scalib_features = ["pyo3/abi3", "ntl"]
 
 if env_true("SCALIB_BLIS"):
     scalib_features.append("blis")

diff --git a/src/scalib/postprocessing/rankestimation.py b/src/scalib/postprocessing/rankestimation.py
@@ -97,7 +97,7 @@ def rank_nbin(costs, key, nbins, method="hist"):
         )
 
 
-def rank_accuracy(costs, key, acc_bit=1.0, method="hist", max_nb_bin=2**26):
+def rank_accuracy(costs, key, acc_bit=1.0, method="scaledhist", max_nb_bin=2**26):
     r"""Estimate the rank of the full keys based on scores based on histograms.
 
     Parameters
@@ -118,7 +118,8 @@ def rank_accuracy(costs, key, acc_bit=1.0, method="hist", max_nb_bin=2**26):
     method : string
         Method used to estimate the rank. Can be the following:
 
-        * "hist": using histograms (default).
+        * "scaledhist": using scaled histograms (default).
+        * "hist": using histograms.
         * "ntl": using NTL library, allows better precision.
     max_nb_bin : int, default: 2**26
         Maximum number of bins to use.

diff --git a/src/scalib_ext/ranklib/src/histogram.rs b/src/scalib_ext/ranklib/src/histogram.rs
@@ -18,7 +18,7 @@ pub enum HistogramType {
 
 pub trait Histogram {
     fn new(size: usize) -> Self;
-    fn convolve(&self, other: &Self) -> Self;
+    fn convolve(&mut self, other: &mut Self) -> Self;
     fn coefs_f64(&self) -> Vec<f64>;
     fn coefs_f64_upper(&self) -> Vec<f64>;
     fn from_elems(size: usize, iter: impl Iterator<Item = usize>) -> Self;
@@ -54,7 +54,7 @@ impl Histogram for F64Hist {
             ifft: planner.plan_fft_inverse(2 * size),
         }
     }
-    fn convolve(&self, other: &Self) -> Self {
+    fn convolve(&mut self, other: &mut Self) -> Self {
         assert_eq!(self.state.len(), other.state.len());
         let mut self_tr = {
             let mut tr = self.fft.make_output_vec();
@@ -126,50 +126,50 @@ impl Histogram for ScaledF64Hist {
     /// The _lower and _upper histograms are convolved separatley
     /// Multiply the scales of the histogram objects to keep track of the compression ratio
     /// other: the histogram we want to convolve with
-    fn convolve(&self, other: &Self) -> Self {
+    fn convolve(&mut self, other: &mut Self) -> Self {
         assert_eq!(self.state_lower.len(), other.state_lower.len());
         assert_eq!(self.state_upper.len(), other.state_upper.len());
-        let first_histogram = self.rescale();
-        let second_histogram = other.rescale();
+        self.rescale();
+        other.rescale();
 
         let mut self_tr = {
-            let mut tr = first_histogram.fft.make_output_vec();
-            let mut input = vec![0.0; 2 * first_histogram.state_lower.len()];
+            let mut tr = self.fft.make_output_vec();
+            let mut input = vec![0.0; 2 * self.state_lower.len()];
             input
                 .iter_mut()
-                .zip(first_histogram.state_lower.iter())
+                .zip(self.state_lower.iter())
                 .for_each(|(i, s)| *i = *s);
-            first_histogram.fft.process(&mut input, &mut tr).unwrap();
+            self.fft.process(&mut input, &mut tr).unwrap();
             tr
         };
         let mut self_tr_upper = {
-            let mut tr = first_histogram.fft.make_output_vec();
-            let mut input = vec![0.0; 2 * first_histogram.state_upper.len()];
+            let mut tr = self.fft.make_output_vec();
+            let mut input = vec![0.0; 2 * self.state_upper.len()];
             input
                 .iter_mut()
-                .zip(first_histogram.state_upper.iter())
+                .zip(self.state_upper.iter())
                 .for_each(|(i, s)| *i = *s);
-            first_histogram.fft.process(&mut input, &mut tr).unwrap();
+            self.fft.process(&mut input, &mut tr).unwrap();
             tr
         };
         let other_tr = {
-            let mut tr = first_histogram.fft.make_output_vec();
-            let mut input = vec![0.0; 2 * first_histogram.state_lower.len()];
+            let mut tr = self.fft.make_output_vec();
+            let mut input = vec![0.0; 2 * self.state_lower.len()];
             input
                 .iter_mut()
-                .zip(second_histogram.state_lower.iter())
-                .for_each(|(i, s)| *i = *s / (first_histogram.state_lower.len() as f64 * 2.0));
-            first_histogram.fft.process(&mut input, &mut tr).unwrap();
+                .zip(other.state_lower.iter())
+                .for_each(|(i, s)| *i = *s / (self.state_lower.len() as f64 * 2.0));
+            self.fft.process(&mut input, &mut tr).unwrap();
             tr
         };
         let other_tr_upper = {
-            let mut tr = first_histogram.fft.make_output_vec();
-            let mut input = vec![0.0; 2 * first_histogram.state_upper.len()];
+            let mut tr = self.fft.make_output_vec();
+            let mut input = vec![0.0; 2 * self.state_upper.len()];
             input
                 .iter_mut()
-                .zip(second_histogram.state_upper.iter())
-                .for_each(|(i, s)| *i = *s / (first_histogram.state_upper.len() as f64 * 2.0));
-            first_histogram.fft.process(&mut input, &mut tr).unwrap();
+                .zip(other.state_upper.iter())
+                .for_each(|(i, s)| *i = *s / (self.state_upper.len() as f64 * 2.0));
+            self.fft.process(&mut input, &mut tr).unwrap();
             tr
         };
         self_tr
@@ -181,28 +181,24 @@ impl Histogram for ScaledF64Hist {
             .zip(other_tr_upper.iter())
             .for_each(|(s, o)| *s *= *o);
 
-        let mut res = vec![0.0; 2 * first_histogram.state_lower.len()];
-        let mut res_upper = vec![0.0; 2 * first_histogram.state_upper.len()];
-        first_histogram
-            .ifft
-            .process(&mut self_tr, &mut res)
-            .unwrap();
-        first_histogram
-            .ifft
+        let mut res = vec![0.0; 2 * self.state_lower.len()];
+        let mut res_upper = vec![0.0; 2 * self.state_upper.len()];
+        self.ifft.process(&mut self_tr, &mut res).unwrap();
+        self.ifft
             .process(&mut self_tr_upper, &mut res_upper)
             .unwrap();
         return Self {
-            state_lower: res[..first_histogram.state_lower.len()]
+            state_lower: res[..self.state_lower.len()]
                 .iter()
                 .map(|x| x.round())
                 .collect(),
-            fft: first_histogram.fft.clone(),
-            ifft: first_histogram.ifft.clone(),
-            state_upper: res_upper[..first_histogram.state_upper.len()]
+            fft: self.fft.clone(),
+            ifft: self.ifft.clone(),
+            state_upper: res_upper[..self.state_upper.len()]
                 .iter()
                 .map(|x| x.round())
                 .collect(),
-            scale: first_histogram.scale * second_histogram.scale,
+            scale: self.scale * other.scale,
         };
     }
     fn coefs_f64(&self) -> Vec<f64> {
@@ -242,33 +238,20 @@ impl ScaledF64Hist {
     /// Check whether the sum of the bin counts exceeds the predefined limit for safe convolution
     /// If exceeds, adjust the state values by a scaling factor to ensure they remain within the convolution limit
     /// Multiply the original scale by the new scaling factor to accurately track the total compression of the histogram
-    fn rescale(&self) -> Self {
+    fn rescale(&mut self) {
         let sum: f64 = self.state_upper.iter().sum();
-        let scaler = if sum > CONV_LIMIT {
-            sum / CONV_LIMIT
-        } else {
-            1.0
-        };
-        let new_scale = self.scale * scaler;
-        let temp_scaler: f64 = 1.0 / scaler;
-        let new_state: Vec<f64> = self
-            .state_lower
-            .iter()
-            .map(|s| (s * temp_scaler).floor())
-            .collect();
-        let new_upper_state: Vec<f64> = self
-            .state_upper
-            .iter()
-            .map(|s| (s * temp_scaler).ceil())
-            .collect();
-
-        return Self {
-            state_lower: new_state,
-            fft: self.fft.clone(),
-            ifft: self.ifft.clone(),
-            state_upper: new_upper_state,
-            scale: new_scale,
-        };
+        if sum > CONV_LIMIT {
+            let scaler: f64 = sum / CONV_LIMIT;
+            let temp_scaler: f64 = 1.0 / scaler;
+            self.scale *= scaler;
+            self.state_lower
+                .iter_mut()
+                .zip(self.state_upper.iter_mut())
+                .for_each(|(low, up)| {
+                    *low = (*low * temp_scaler).floor();
+                    *up = (*up * temp_scaler).ceil();
+                });
+        }
     }
 }
 
@@ -280,7 +263,7 @@ impl Histogram for BigNumHist {
     fn new(nb_bins: usize) -> Self {
         return Self(unsafe { bnp::bnp_new_ZZX() }, nb_bins);
     }
-    fn convolve(&self, other: &Self) -> Self {
+    fn convolve(&mut self, other: &mut Self) -> Self {
         assert_eq!(self.1, other.1);
         let res = Self::new(self.1);
         unsafe {

diff --git a/src/scalib_ext/ranklib/src/rank.rs b/src/scalib_ext/ranklib/src/rank.rs
@@ -187,8 +187,8 @@ impl RankProblem {
                         .filter(|bin| *bin < nb_bins),
                 )
             })
-            .fold(None, |acc: Option<H>, hist| {
-                acc.map(|x| x.convolve(&hist)).or(Some(hist))
+            .fold(None, |acc: Option<H>, mut hist| {
+                acc.map(|mut x| x.convolve(&mut hist)).or(Some(hist))
             })
             .expect("Some subkey");
         return Ok((hist, bin_size));

diff --git a/tests/test_postprocessing.py b/tests/test_postprocessing.py
@@ -38,6 +38,9 @@ def test_rank_accuracy():
     assert np.log2(rmax) - np.log2(rmin) <= acc
 
 
+import time
+
+
 # Compare the ntl and scaled histogram implementation with a normal probability distribution
 def test_rank_accuracy_scaled_vs_ntl():
     nc = 256
@@ -61,6 +64,32 @@ def test_rank_accuracy_scaled_vs_ntl():
     assert np.abs(lrmax - lrmax_scaled) <= max_error
 
 
+# Compare the ntl and scaled histogram implementation in rand edge cases
+# The normal histogram implementation would most likely return negative ranks
+def test_rank_accuracy_scaled_edge_cases():
+    nc = 256
+    nsubkeys = 16
+    max_error = 3.0
+    k_probs = np.zeros((nsubkeys, nc))
+    secret_key = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+
+    for j in range(nsubkeys):
+        for i in range(nc):
+            if i < 6:
+                k_probs[j][i] = 1 / random.randint(2, 5)
+            else:
+                k_probs[j][i] = 1 / 16
+
+    rmin, r, rmax = rank_accuracy(-np.log10(k_probs), secret_key, method="histbignum")
+    lrmin, lr, lrmax = (np.log2(rmin), np.log2(r), np.log2(rmax))
+    rmin, r, rmax = rank_accuracy(
+        -np.log10(k_probs), secret_key, method="scaledhist", acc_bit=7.0
+    )
+    lrmin_scaled, lr_scaled, lrmax_scaled = (np.log2(rmin), np.log2(r), np.log2(rmax))
+    rmin, r, rmax = rank_accuracy(-np.log10(k_probs), secret_key)
+    assert np.abs(lrmin - lrmin_scaled) <= max_error
+
+
 # Compare the ntl and scaled histogram implementation in a known edge case
 # The normal histogram implementation would return negative ranks
 def test_rank_accuracy_scaled_edge_case():