Various small changes

- Added calculation of minimum false positive risk from p-values in AOVSTAT output of the `bootlm` function - Added calculation of minimum false positive risk from p-values calculated in `randtest2` and return them as the 3rd output argument. PERMSTAT is now the 4th (not 3rd output argument of `randtest2` - Made change in `bootlm` to compute minimum false positive risk from multiplicity-adjusted p-values (not the original p-values) after posthoc tests - Added calculation of minimum false positive risk from p-values in the `boot1way` function and add them as 10th column to the C output argument. Note that the order of columns in C has changed; the t-ratio, multiplicity-adjusted p-value and the minimum false positive risk are the last three columns.
gnu-octave · Dec 26, 2023 · cb962ed · cb962ed
1 parent fe06a1f
commit cb962ed
Show file tree

Hide file tree

Showing 4 changed files with 172 additions and 67 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 name: statistics-resampling
-version: 5.4.4
-date: 2023-12-24
+version: 5.5.0
+date: 2023-12-26
 author: Andrew Penn <[email protected]>
 maintainer: Andrew Penn <[email protected]>
 title: A statistics package with a variety of resampling tools

diff --git a/inst/boot1way.m b/inst/boot1way.m
@@ -80,15 +80,16 @@
 %
 %     '[PVAL, C] = boot1way (DATA, GROUP, ...)' also returns a 9 column matrix
 %     that summarises multiple comparison test results. The columns of C are:
-%       - column 1: test GROUP number
-%       - column 2: reference GROUP number
-%       - column 3: value of BOOTFUN evaluated for the test GROUP
-%       - column 4: value of BOOTFUN evaluated for the reference GROUP
-%       - column 5: the difference between the groups (column 3 minus column 4)
-%       - column 6: t-ratio
-%       - column 7: multiplicity-adjusted p-value
-%       - column 8: LOWER bound of the 100*(1-ALPHA)% bootstrap-t CI
-%       - column 9: UPPER bound of the 100*(1-ALPHA)% bootstrap-t CI
+%       - column 1:  test GROUP number
+%       - column 2:  reference GROUP number
+%       - column 3:  value of BOOTFUN evaluated for the test GROUP
+%       - column 4:  value of BOOTFUN evaluated for the reference GROUP
+%       - column 5:  the difference between the groups (column 3 minus column 4)
+%       - column 6:  LOWER bound of the 100*(1-ALPHA)% bootstrap-t CI
+%       - column 7:  UPPER bound of the 100*(1-ALPHA)% bootstrap-t CI
+%       - column 8:  t-ratio
+%       - column 9:  multiplicity-adjusted p-value
+%       - column 10: minimum false positive risk for the p-value
 %
 %     '[PVAL, C, STATS] = boot1way (DATA, GROUP, ...)' also returns a structure 
 %     containing additional statistics. The stats structure contains the 
@@ -100,11 +101,13 @@
 %       ref      - index of the reference group
 %       groups   - group index and BOOTFUN for each group with sample size,
 %                  standard error and CI, which start to overlap at a
-%                  multiplicity adjusted p-value of approximately 0.05
+%                  multiplicity-adjusted p-value of approximately 0.05
 %       Var      - weighted mean (pooled) sampling variance
-%       nboot    - number of bootstrap resamples
+%       nboot    - number of bootstrap resamples (1st and 2nd resampling layers)
 %       alpha    - two-tailed significance level for the CI reported in C.
-%       bootstat - test statistic computed for each bootstrap resample 
+%
+%     '[PVAL, C, STATS, BOOTSTAT] = boot1way (DATA, GROUP, ...)' also returns
+%     the maximum test statistic computed for each bootstrap resample
 %
 %     '[...] = boot1way (..., 'display', DISPLAYOPT)' a logical value (true 
 %      or false) used to specify whether to display the results and plot the
@@ -115,7 +118,7 @@
 %        New York, NY: Chapman & Hall
 %
 %  boot1way (version 2023.10.04)
-%  Bootstrap Null Hypothesis Significance Test
+%  Bootstrap tests for comparing independent groups in a one-way layout
 %  Author: Andrew Charles Penn
 %  https://www.researchgate.net/profile/Andrew_Penn/
 %
@@ -134,7 +137,7 @@
 %  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 
-function [pval, c, stats] = boot1way (data, group, varargin)
+function [pval, c, stats, Q] = boot1way (data, group, varargin)
 
   % Evaluate the number of function arguments
   if (nargin < 2)
@@ -149,7 +152,7 @@
   % Check if running in Octave (else assume Matlab)
   info = ver; 
   ISOCTAVE = any (ismember ({info.Name}, 'Octave'));
-  
+
   % Apply defaults
   bootfun = 'mean';
   nboot = [999,99];
@@ -268,8 +271,8 @@
   if (~ isempty (ref) && strcmpi (ref,'pairwise'))
     ref = [];
   end
-  if (nargout > 3)
-    error ('boot1way: Only supports up to 3 output arguments')
+  if (nargout > 4)
+    error ('boot1way: Only supports up to 4 output arguments')
   end
   if (~ islogical (DisplayOpt) || (numel (DisplayOpt) > 1))
     error ('boot1way: The value DISPLAYOPT must be a logical scalar value')
@@ -485,47 +488,49 @@
     ridx = (M(:,1) == 0);
     M(ridx, :) = [];
     n = size (M, 1);
-    c = zeros (n, 9);
+    c = zeros (n, 10);
     c(:,1:2) = M;
     for i = 1:n
       c(i,3) = theta(c(i,1));
       c(i,4) = theta(c(i,2));
       c(i,5) = c(i,3) - c(i,4);
       SED = sqrt (Var * (w(c(i,1)) + w(c(i,2))));
-      c(i,6) = c(i,5) / SED;
-      if (c(i,6) < Q(1))
-        c(i,7) = interp1 (Q, P, abs (c(i,6)), 'linear', 1);
+      c(i,6) = c(i,5) - SED * interp1 (F, Q, 1 - alpha, 'linear', max (Q));
+      c(i,7) = c(i,5) + SED * interp1 (F, Q, 1 - alpha, 'linear', max (Q));
+      c(i,8) = c(i,5) / SED;
+      if (c(i,8) < Q(1))
+        c(i,9) = interp1 (Q, P, abs (c(i,8)), 'linear', 1);
       else
-        c(i,7) = interp1 (Q, P, abs (c(i,6)), 'linear', res_lim);
+        c(i,9) = interp1 (Q, P, abs (c(i,8)), 'linear', res_lim);
       end
-      c(i,8) = c(i,5) - SED * interp1 (F, Q, 1 - alpha, 'linear', max (Q));
-      c(i,9) = c(i,5) + SED * interp1 (F, Q, 1 - alpha, 'linear', max (Q));
     end
+    c(:,10) = pval2fpr (c(:,9));
   else
     % Single-step maxT procedure for treatment vs control comparisons is
     % a resampling version of Dunnett's test
-    c = zeros (k, 9);
+    c = zeros (k, 10);
     c(:,2) = ref;
     c(:,4) = theta (ref);
     for j = 1:k
       c(j,1) = gk(j);
       c(j,3) = theta(c(j,1));
       c(j,5) = c(j,3) - c(j,4); 
       SED = sqrt (Var * (w(c(j,1)) + w(c(j,2))));
-      c(j,6) = c(j,5) / SED;
-      if (c(j,6) < Q(1))
-        c(j,7) = interp1 (Q, P, abs (c(j,6)), 'linear', 1);
+      c(j,6) = c(j,5) - SED * interp1 (F, Q, 1 - alpha, 'linear', max (Q));
+      c(j,7) = c(j,5) + SED * interp1 (F, Q, 1 - alpha, 'linear', max (Q));
+      c(j,8) = c(j,5) / SED;
+      if (c(j,8) < Q(1))
+        c(j,9) = interp1 (Q, P, abs (c(j,8)), 'linear', 1);
       else
-        c(j,7) = interp1 (Q, P, abs (c(j,6)), 'linear', res_lim);
+        c(j,9) = interp1 (Q, P, abs (c(j,8)), 'linear', res_lim);
       end
-      c(j,8) = c(j,5) - SED * interp1 (F, Q, 1 - alpha, 'linear', max (Q));
-      c(j,9) = c(j,5) + SED * interp1 (F, Q, 1 - alpha, 'linear', max (Q));
     end
+    c(:,10) = pval2fpr (c(:,9));
     c(ref,:) = [];
   end
 
   % Assign the calculated p-values to the return value
-  pval = c(:,7);
+  pval = c(:,9);
 
   % Prepare stats output structure
   stats = struct;
@@ -544,11 +549,10 @@
   stats.Var = Var;
   stats.nboot = nboot;
   stats.alpha = alpha;
-  stats.bootstat = Q;
 
   % Print output and plot graph with confidence intervals if no output
   % arguments are requested
-  cols = [1,2,5,6,7]; % columns in c that we want to print data for
+  cols = [1,2,5,8,9]; % columns in c that we want to print data for
   if ((nargout == 0) || (DisplayOpt == true))
     if (~ iscellstr (gnames))
       gnames = cellstr (num2str (gnames));
@@ -586,20 +590,20 @@
         for i = 1:n
           tmp = num2cell (c(i, cols));
           tmp{end} = round (tmp{end} * 1000);
-          if (c(i,7) <= 0.001)
+          if (c(i,9) <= 0.001)
             tmp(end) = [];
             fprintf (cat (2, '| %10u | %10u | %10u | %#+10.4g | %+10.2f |', ...
                              '    <.001 |***\n'), i, tmp{:});
-          elseif (c(i,7) > 0.999)
+          elseif (c(i,9) > 0.999)
             tmp(end) = [];
             fprintf (cat (2, '| %10u | %10u | %10u | %#+10.4g | %+10.2f |', ...
                              '    1.000 |\n'), i, tmp{:});
           else
             fprintf (cat (2, '| %10u | %10u | %10u | %#+10.4g | %+10.2f |', ...
                              '     .%03u |'), i, tmp{:});
-            if (c(i,7) < 0.01)
+            if (c(i,9) < 0.01)
               fprintf ('**\n')
-            elseif (c(i,7) < 0.05)
+            elseif (c(i,9) < 0.05)
               fprintf ('*\n')
             else
               fprintf ('\n')
@@ -610,20 +614,20 @@
         for j = 1:k-1
           tmp = num2cell (c(j, cols));
           tmp{end} = round (tmp{end} * 1000);
-          if (c(j,7) <= 0.001)
+          if (c(j,9) <= 0.001)
             tmp(end) = [];
             fprintf (cat (2, '| %10u | %10u | %10u | %#+10.4g | %+10.2f |', ...
                              '    <.001 |***\n'), j, tmp{:});
-          elseif (c(j,7) > 0.999)
+          elseif (c(j,9) > 0.999)
             tmp(end) = [];
             fprintf (cat (2, '| %10u | %10u | %10u | %#+10.4g | %+10.2f |', ...
                              '    1.000 |\n'), j, tmp{:});
           else
             fprintf (cat (2, '| %10u | %10u | %10u | %#+10.4g | %+10.2f |', ...
                              '     .%03u |'), j, tmp{:});
-            if (c(j,7) < 0.01)
+            if (c(j,9) < 0.01)
               fprintf ('**\n')
-            elseif (c(j,7) < 0.05)
+            elseif (c(j,9) < 0.05)
               fprintf ('*\n')
             else
               fprintf ('\n')
@@ -652,16 +656,16 @@
     ylim ([0.5, nc + 0.5]);            % Set y-axis limits
     hold on
     for i = 1 : nc
-      if (c(i,7) < 0.05)
+      if (c(i,9) < 0.05)
         % Plot marker for the difference estimate
         plot (c(i, 5), i, 'or', 'MarkerFaceColor', 'r');
         % Plot line for each confidence interval
-        plot ([c(i, 8), c(i, 9)], i * ones (2, 1), 'r-');
+        plot ([c(i, 6), c(i, 7)], i * ones (2, 1), 'r-');
       else
         % Plot marker for the difference estimate
         plot (c(i,5), i, 'ob', 'MarkerFaceColor', 'b');
         % Plot line for each confidence interval 
-        plot ([c(i,8), c(i,9)], i * ones (2, 1), 'b-');   
+        plot ([c(i,6), c(i,7)], i * ones (2, 1), 'b-');   
       end
     end
     hold off
@@ -756,6 +760,35 @@
 
 %--------------------------------------------------------------------------
 
+% FUNCTION TO COMPUTE MINIMUM FALSE POSITIVE RISK (FPR)
+
+function fpr = pval2fpr (p)
+
+  % Subfunction to compute minimum false positive risk. These are calculated
+  % from a Bayes factor based on the sampling distributions of the p-value and
+  % that H0 and H1 have equal prior probabilities. This is called the Sellke-
+  % Berger approach.
+  % 
+  % References:
+  %  Held and Ott (2018) On p-Values and Bayes Factors. 
+  %    Annu. Rev. of Stat. Appl. 5:393-419
+  %  David Colquhoun (2019) The False Positive Risk: A Proposal 
+  %    Concerning What to Do About p-Values, The American Statistician, 
+  %    73:sup1, 192-201, DOI: 10.1080/00031305.2018.1529622 
+
+  % Calculate minimum Bayes Factor (P(H0) / P(H1)) by the Sellke-Berger method 
+  logp = min (log (p), -1);
+  minBF = exp (1 + logp + log (-logp));
+
+  % Calculate the false-positive risk from the minumum Bayes Factor
+  L10 = 1 ./ minBF;      % Convert to Maximum Likelihood ratio L10 (P(H1)/P(H0))
+  fpr = max (0, 1 ./ (1 + L10));  % Calculate minimum false positive risk 
+  fpr(isnan(p)) = NaN; 
+
+end
+
+%--------------------------------------------------------------------------
+
 %!demo
 %!
 %! ## EXAMPLE 1A: 
@@ -978,4 +1011,4 @@
 %! g = [zeros(10, 1); ones(10, 1)];
 %! func = @(M) subsref (M(:,2:end) \ M(:,1), ...
 %!                      struct ('type', '()', 'subs', {{2}}));
-%! p = boot1way ([y, X], g, 'bootfun', func, 'DisplayOpt', false);
+%! p = boot1way ([y, X], g, 'bootfun', func, 'DisplayOpt', false);
diff --git a/inst/bootlm.m b/inst/bootlm.m
@@ -334,18 +334,19 @@
 %       - 'estimate': The value of the estimates
 %       - 'CI_lower': The lower bound(s) of the confidence/credible interval(s)
 %       - 'CI_upper': The upper bound(s) of the confidence/credible interval(s)
-%       - 'pval': The p-value(s) for the hypothesis that the estimate(s) = 0
-%       - 'fpr': The false positive risk 
+%       - 'pval': The p-value(s) for the hypothesis that the estimate(s) == 0
+%       - 'fpr': The minimum false positive risk (FPR) for each p-value
 %       - 'N': The number of independnet sampling units used to compute CIs
 %       - 'prior': The prior used for Bayesian bootstrap
 %       - 'levels': A cell array with the levels of each predictor.
 %       - 'contrasts': A cell array of contrasts associated with each predictor.
 %
 %          Note that the p-values returned are truncated at the resolution
 %          limit determined by the number of bootstrap replicates, specifically 
-%          1 / (NBOOT + 1). The following fields are only returned when
-%          'estimate' corresponds to the model regression coefficients:
-%          'levels' and 'contrasts'.
+%          1 / (NBOOT + 1). Values for the minumum false positive risk are
+%          computed using the Sellke-Berger approach. The following fields are
+%          only returned when 'estimate' corresponds to model regression
+%          coefficients: 'levels' and 'contrasts'.
 %
 %     '[STATS, BOOTSTAT] = bootlm (...)' also returns a P x NBOOT matrix of
 %     bootstrap statistics for the estimated parameters, where P is the number
@@ -364,6 +365,7 @@
 %       - 'MS': Mean-squares
 %       - 'F': F-Statistic
 %       - 'PVAL': p-values
+%       - 'FPR': The minimum false positive risk for each p-value
 %       - 'SSE': Sum-of-Squared Error
 %       - 'DFE': Degrees of Freedom for Error
 %       - 'MSE': Mean Squared Error
@@ -1049,6 +1051,8 @@
                                             L, ISOCTAVE);
               % Control the type 1 error rate across multiple comparisons
               STATS.pval = holm (STATS.pval);
+              % Update minimum false positive risk after multiple comparisons
+              STATS.fpr = pval2fpr (STATS.pval);
               % Clean-up
               STATS = rmfield (STATS, {'std_err', 'tstat', 'sse'});
               STATS.prior = [];
@@ -1718,6 +1722,35 @@
 
 %--------------------------------------------------------------------------
 
+% FUNCTION TO COMPUTE MINIMUM FALSE POSITIVE RISK (FPR)
+
+function fpr = pval2fpr (p)
+
+  % Subfunction to compute minimum false positive risk. These are calculated
+  % from a Bayes factor based on the sampling distributions of the p-value and
+  % that H0 and H1 have equal prior probabilities. This is called the Sellke-
+  % Berger approach.
+  % 
+  % References:
+  %  Held and Ott (2018) On p-Values and Bayes Factors. 
+  %    Annu. Rev. of Stat. Appl. 5:393-419
+  %  David Colquhoun (2019) The False Positive Risk: A Proposal 
+  %    Concerning What to Do About p-Values, The American Statistician, 
+  %    73:sup1, 192-201, DOI: 10.1080/00031305.2018.1529622 
+
+  % Calculate minimum Bayes Factor (P(H0) / P(H1)) by the Sellke-Berger method 
+  logp = min (log (p), -1);
+  minBF = exp (1 + logp + log (-logp));
+
+  % Calculate the false-positive risk from the minumum Bayes Factor
+  L10 = 1 ./ minBF;      % Convert to Maximum Likelihood ratio L10 (P(H1)/P(H0))
+  fpr = max (0, 1 ./ (1 + L10));  % Calculate minimum false positive risk 
+  fpr(isnan(p)) = NaN; 
+
+end
+
+%--------------------------------------------------------------------------
+
 % FUNCTION TO PERFORM ANOVA
 
 function AOVSTAT = bootanova (Y, X, DF, DFE, DEP, NBOOT, ALPHA, SEED, ...
@@ -1780,10 +1813,13 @@
     end
   end
 
+  % Compute minimum false positive risk
+  FPR = pval2fpr (PVAL);
+
   % Prepare output
   AOVSTAT = struct ('MODEL', [], 'SS', SS, 'DF', DF(2:end), 'MS', MS, 'F', ...
-                     F, 'PVAL', PVAL, 'SSE', SSE{end}, 'DFE', DFE, ...
-                    'MSE', MSE);
+                     F, 'PVAL', PVAL, 'FPR', FPR, 'SSE', SSE{end}, ...
+                    'DFE', DFE, 'MSE', MSE);
 
 end
 
@@ -2239,9 +2275,7 @@
 
 %!demo
 %!
-%! ## Unbalanced one-way design with custom, orthogonal contrasts. The
-%! ## statistics relating to the contrasts are shown in the table of model
-%! ## parameters, and can be retrieved from the STATS.coeffs output. Data from
+%! ## Unbalanced one-way design with custom, orthogonal contrasts. Data from
 %! ## www.uvm.edu/~statdhtx/StatPages/Unequal-ns/Unequal_n%27s_contrasts.html
 %!
 %! dv =  [ 8.706 10.362 11.552  6.941 10.983 10.092  6.421 14.943 15.931 ...
@@ -2756,9 +2790,9 @@
 %! assert (stats.pval(1), 0.02381212481394462, 1e-09);
 %! assert (stats.pval(2), 0.009547350172112052, 1e-09);
 %! assert (stats.pval(3), 0.1541408530918242, 1e-09);
-%! assert (stats.fpr(1), 0.1254120362272493, 1e-09);
-%! assert (stats.fpr(2), 0.04738586302975815, 1e-09);
-%! assert (stats.fpr(3), 0.4392984660114189, 1e-09);
+%! assert (stats.fpr(1), 0.1947984337990365, 1e-09);
+%! assert (stats.fpr(2), 0.1077143325366211, 1e-09);
+%! assert (stats.fpr(3), 0.4392984660114188, 1e-09);
 
 %!test
 %!