From cb962edc6450951ecb1fa071247d8c49a1a7ae1c Mon Sep 17 00:00:00 2001 From: acp29 Date: Tue, 26 Dec 2023 20:59:19 +0000 Subject: [PATCH] Various small changes - Added calculation of minimum false positive risk from p-values in AOVSTAT output of the `bootlm` function - Added calculation of minimum false positive risk from p-values calculated in `randtest2` and return them as the 3rd output argument. PERMSTAT is now the 4th (not 3rd output argument of `randtest2` - Made change in `bootlm` to compute minimum false positive risk from multiplicity-adjusted p-values (not the original p-values) after posthoc tests - Added calculation of minimum false positive risk from p-values in the `boot1way` function and add them as 10th column to the C output argument. Note that the order of columns in C has changed; the t-ratio, multiplicity-adjusted p-value and the minimum false positive risk are the last three columns. --- DESCRIPTION | 4 +- inst/boot1way.m | 125 ++++++++++++++++++++++++++++++----------------- inst/bootlm.m | 60 ++++++++++++++++++----- inst/randtest2.m | 50 ++++++++++++++++--- 4 files changed, 172 insertions(+), 67 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 51454f0c..321100f9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ name: statistics-resampling -version: 5.4.4 -date: 2023-12-24 +version: 5.5.0 +date: 2023-12-26 author: Andrew Penn maintainer: Andrew Penn title: A statistics package with a variety of resampling tools diff --git a/inst/boot1way.m b/inst/boot1way.m index cfddcc2e..ca9a1b3c 100644 --- a/inst/boot1way.m +++ b/inst/boot1way.m @@ -80,15 +80,16 @@ % % '[PVAL, C] = boot1way (DATA, GROUP, ...)' also returns a 9 column matrix % that summarises multiple comparison test results. The columns of C are: -% - column 1: test GROUP number -% - column 2: reference GROUP number -% - column 3: value of BOOTFUN evaluated for the test GROUP -% - column 4: value of BOOTFUN evaluated for the reference GROUP -% - column 5: the difference between the groups (column 3 minus column 4) -% - column 6: t-ratio -% - column 7: multiplicity-adjusted p-value -% - column 8: LOWER bound of the 100*(1-ALPHA)% bootstrap-t CI -% - column 9: UPPER bound of the 100*(1-ALPHA)% bootstrap-t CI +% - column 1: test GROUP number +% - column 2: reference GROUP number +% - column 3: value of BOOTFUN evaluated for the test GROUP +% - column 4: value of BOOTFUN evaluated for the reference GROUP +% - column 5: the difference between the groups (column 3 minus column 4) +% - column 6: LOWER bound of the 100*(1-ALPHA)% bootstrap-t CI +% - column 7: UPPER bound of the 100*(1-ALPHA)% bootstrap-t CI +% - column 8: t-ratio +% - column 9: multiplicity-adjusted p-value +% - column 10: minimum false positive risk for the p-value % % '[PVAL, C, STATS] = boot1way (DATA, GROUP, ...)' also returns a structure % containing additional statistics. The stats structure contains the @@ -100,11 +101,13 @@ % ref - index of the reference group % groups - group index and BOOTFUN for each group with sample size, % standard error and CI, which start to overlap at a -% multiplicity adjusted p-value of approximately 0.05 +% multiplicity-adjusted p-value of approximately 0.05 % Var - weighted mean (pooled) sampling variance -% nboot - number of bootstrap resamples +% nboot - number of bootstrap resamples (1st and 2nd resampling layers) % alpha - two-tailed significance level for the CI reported in C. -% bootstat - test statistic computed for each bootstrap resample +% +% '[PVAL, C, STATS, BOOTSTAT] = boot1way (DATA, GROUP, ...)' also returns +% the maximum test statistic computed for each bootstrap resample % % '[...] = boot1way (..., 'display', DISPLAYOPT)' a logical value (true % or false) used to specify whether to display the results and plot the @@ -115,7 +118,7 @@ % New York, NY: Chapman & Hall % % boot1way (version 2023.10.04) -% Bootstrap Null Hypothesis Significance Test +% Bootstrap tests for comparing independent groups in a one-way layout % Author: Andrew Charles Penn % https://www.researchgate.net/profile/Andrew_Penn/ % @@ -134,7 +137,7 @@ % along with this program. If not, see . -function [pval, c, stats] = boot1way (data, group, varargin) +function [pval, c, stats, Q] = boot1way (data, group, varargin) % Evaluate the number of function arguments if (nargin < 2) @@ -149,7 +152,7 @@ % Check if running in Octave (else assume Matlab) info = ver; ISOCTAVE = any (ismember ({info.Name}, 'Octave')); - + % Apply defaults bootfun = 'mean'; nboot = [999,99]; @@ -268,8 +271,8 @@ if (~ isempty (ref) && strcmpi (ref,'pairwise')) ref = []; end - if (nargout > 3) - error ('boot1way: Only supports up to 3 output arguments') + if (nargout > 4) + error ('boot1way: Only supports up to 4 output arguments') end if (~ islogical (DisplayOpt) || (numel (DisplayOpt) > 1)) error ('boot1way: The value DISPLAYOPT must be a logical scalar value') @@ -485,26 +488,27 @@ ridx = (M(:,1) == 0); M(ridx, :) = []; n = size (M, 1); - c = zeros (n, 9); + c = zeros (n, 10); c(:,1:2) = M; for i = 1:n c(i,3) = theta(c(i,1)); c(i,4) = theta(c(i,2)); c(i,5) = c(i,3) - c(i,4); SED = sqrt (Var * (w(c(i,1)) + w(c(i,2)))); - c(i,6) = c(i,5) / SED; - if (c(i,6) < Q(1)) - c(i,7) = interp1 (Q, P, abs (c(i,6)), 'linear', 1); + c(i,6) = c(i,5) - SED * interp1 (F, Q, 1 - alpha, 'linear', max (Q)); + c(i,7) = c(i,5) + SED * interp1 (F, Q, 1 - alpha, 'linear', max (Q)); + c(i,8) = c(i,5) / SED; + if (c(i,8) < Q(1)) + c(i,9) = interp1 (Q, P, abs (c(i,8)), 'linear', 1); else - c(i,7) = interp1 (Q, P, abs (c(i,6)), 'linear', res_lim); + c(i,9) = interp1 (Q, P, abs (c(i,8)), 'linear', res_lim); end - c(i,8) = c(i,5) - SED * interp1 (F, Q, 1 - alpha, 'linear', max (Q)); - c(i,9) = c(i,5) + SED * interp1 (F, Q, 1 - alpha, 'linear', max (Q)); end + c(:,10) = pval2fpr (c(:,9)); else % Single-step maxT procedure for treatment vs control comparisons is % a resampling version of Dunnett's test - c = zeros (k, 9); + c = zeros (k, 10); c(:,2) = ref; c(:,4) = theta (ref); for j = 1:k @@ -512,20 +516,21 @@ c(j,3) = theta(c(j,1)); c(j,5) = c(j,3) - c(j,4); SED = sqrt (Var * (w(c(j,1)) + w(c(j,2)))); - c(j,6) = c(j,5) / SED; - if (c(j,6) < Q(1)) - c(j,7) = interp1 (Q, P, abs (c(j,6)), 'linear', 1); + c(j,6) = c(j,5) - SED * interp1 (F, Q, 1 - alpha, 'linear', max (Q)); + c(j,7) = c(j,5) + SED * interp1 (F, Q, 1 - alpha, 'linear', max (Q)); + c(j,8) = c(j,5) / SED; + if (c(j,8) < Q(1)) + c(j,9) = interp1 (Q, P, abs (c(j,8)), 'linear', 1); else - c(j,7) = interp1 (Q, P, abs (c(j,6)), 'linear', res_lim); + c(j,9) = interp1 (Q, P, abs (c(j,8)), 'linear', res_lim); end - c(j,8) = c(j,5) - SED * interp1 (F, Q, 1 - alpha, 'linear', max (Q)); - c(j,9) = c(j,5) + SED * interp1 (F, Q, 1 - alpha, 'linear', max (Q)); end + c(:,10) = pval2fpr (c(:,9)); c(ref,:) = []; end % Assign the calculated p-values to the return value - pval = c(:,7); + pval = c(:,9); % Prepare stats output structure stats = struct; @@ -544,11 +549,10 @@ stats.Var = Var; stats.nboot = nboot; stats.alpha = alpha; - stats.bootstat = Q; % Print output and plot graph with confidence intervals if no output % arguments are requested - cols = [1,2,5,6,7]; % columns in c that we want to print data for + cols = [1,2,5,8,9]; % columns in c that we want to print data for if ((nargout == 0) || (DisplayOpt == true)) if (~ iscellstr (gnames)) gnames = cellstr (num2str (gnames)); @@ -586,20 +590,20 @@ for i = 1:n tmp = num2cell (c(i, cols)); tmp{end} = round (tmp{end} * 1000); - if (c(i,7) <= 0.001) + if (c(i,9) <= 0.001) tmp(end) = []; fprintf (cat (2, '| %10u | %10u | %10u | %#+10.4g | %+10.2f |', ... ' <.001 |***\n'), i, tmp{:}); - elseif (c(i,7) > 0.999) + elseif (c(i,9) > 0.999) tmp(end) = []; fprintf (cat (2, '| %10u | %10u | %10u | %#+10.4g | %+10.2f |', ... ' 1.000 |\n'), i, tmp{:}); else fprintf (cat (2, '| %10u | %10u | %10u | %#+10.4g | %+10.2f |', ... ' .%03u |'), i, tmp{:}); - if (c(i,7) < 0.01) + if (c(i,9) < 0.01) fprintf ('**\n') - elseif (c(i,7) < 0.05) + elseif (c(i,9) < 0.05) fprintf ('*\n') else fprintf ('\n') @@ -610,20 +614,20 @@ for j = 1:k-1 tmp = num2cell (c(j, cols)); tmp{end} = round (tmp{end} * 1000); - if (c(j,7) <= 0.001) + if (c(j,9) <= 0.001) tmp(end) = []; fprintf (cat (2, '| %10u | %10u | %10u | %#+10.4g | %+10.2f |', ... ' <.001 |***\n'), j, tmp{:}); - elseif (c(j,7) > 0.999) + elseif (c(j,9) > 0.999) tmp(end) = []; fprintf (cat (2, '| %10u | %10u | %10u | %#+10.4g | %+10.2f |', ... ' 1.000 |\n'), j, tmp{:}); else fprintf (cat (2, '| %10u | %10u | %10u | %#+10.4g | %+10.2f |', ... ' .%03u |'), j, tmp{:}); - if (c(j,7) < 0.01) + if (c(j,9) < 0.01) fprintf ('**\n') - elseif (c(j,7) < 0.05) + elseif (c(j,9) < 0.05) fprintf ('*\n') else fprintf ('\n') @@ -652,16 +656,16 @@ ylim ([0.5, nc + 0.5]); % Set y-axis limits hold on for i = 1 : nc - if (c(i,7) < 0.05) + if (c(i,9) < 0.05) % Plot marker for the difference estimate plot (c(i, 5), i, 'or', 'MarkerFaceColor', 'r'); % Plot line for each confidence interval - plot ([c(i, 8), c(i, 9)], i * ones (2, 1), 'r-'); + plot ([c(i, 6), c(i, 7)], i * ones (2, 1), 'r-'); else % Plot marker for the difference estimate plot (c(i,5), i, 'ob', 'MarkerFaceColor', 'b'); % Plot line for each confidence interval - plot ([c(i,8), c(i,9)], i * ones (2, 1), 'b-'); + plot ([c(i,6), c(i,7)], i * ones (2, 1), 'b-'); end end hold off @@ -756,6 +760,35 @@ %-------------------------------------------------------------------------- +% FUNCTION TO COMPUTE MINIMUM FALSE POSITIVE RISK (FPR) + +function fpr = pval2fpr (p) + + % Subfunction to compute minimum false positive risk. These are calculated + % from a Bayes factor based on the sampling distributions of the p-value and + % that H0 and H1 have equal prior probabilities. This is called the Sellke- + % Berger approach. + % + % References: + % Held and Ott (2018) On p-Values and Bayes Factors. + % Annu. Rev. of Stat. Appl. 5:393-419 + % David Colquhoun (2019) The False Positive Risk: A Proposal + % Concerning What to Do About p-Values, The American Statistician, + % 73:sup1, 192-201, DOI: 10.1080/00031305.2018.1529622 + + % Calculate minimum Bayes Factor (P(H0) / P(H1)) by the Sellke-Berger method + logp = min (log (p), -1); + minBF = exp (1 + logp + log (-logp)); + + % Calculate the false-positive risk from the minumum Bayes Factor + L10 = 1 ./ minBF; % Convert to Maximum Likelihood ratio L10 (P(H1)/P(H0)) + fpr = max (0, 1 ./ (1 + L10)); % Calculate minimum false positive risk + fpr(isnan(p)) = NaN; + +end + +%-------------------------------------------------------------------------- + %!demo %! %! ## EXAMPLE 1A: @@ -978,4 +1011,4 @@ %! g = [zeros(10, 1); ones(10, 1)]; %! func = @(M) subsref (M(:,2:end) \ M(:,1), ... %! struct ('type', '()', 'subs', {{2}})); -%! p = boot1way ([y, X], g, 'bootfun', func, 'DisplayOpt', false); \ No newline at end of file +%! p = boot1way ([y, X], g, 'bootfun', func, 'DisplayOpt', false); diff --git a/inst/bootlm.m b/inst/bootlm.m index 1b4ca9a4..f8418d40 100644 --- a/inst/bootlm.m +++ b/inst/bootlm.m @@ -334,8 +334,8 @@ % - 'estimate': The value of the estimates % - 'CI_lower': The lower bound(s) of the confidence/credible interval(s) % - 'CI_upper': The upper bound(s) of the confidence/credible interval(s) -% - 'pval': The p-value(s) for the hypothesis that the estimate(s) = 0 -% - 'fpr': The false positive risk +% - 'pval': The p-value(s) for the hypothesis that the estimate(s) == 0 +% - 'fpr': The minimum false positive risk (FPR) for each p-value % - 'N': The number of independnet sampling units used to compute CIs % - 'prior': The prior used for Bayesian bootstrap % - 'levels': A cell array with the levels of each predictor. @@ -343,9 +343,10 @@ % % Note that the p-values returned are truncated at the resolution % limit determined by the number of bootstrap replicates, specifically -% 1 / (NBOOT + 1). The following fields are only returned when -% 'estimate' corresponds to the model regression coefficients: -% 'levels' and 'contrasts'. +% 1 / (NBOOT + 1). Values for the minumum false positive risk are +% computed using the Sellke-Berger approach. The following fields are +% only returned when 'estimate' corresponds to model regression +% coefficients: 'levels' and 'contrasts'. % % '[STATS, BOOTSTAT] = bootlm (...)' also returns a P x NBOOT matrix of % bootstrap statistics for the estimated parameters, where P is the number @@ -364,6 +365,7 @@ % - 'MS': Mean-squares % - 'F': F-Statistic % - 'PVAL': p-values +% - 'FPR': The minimum false positive risk for each p-value % - 'SSE': Sum-of-Squared Error % - 'DFE': Degrees of Freedom for Error % - 'MSE': Mean Squared Error @@ -1049,6 +1051,8 @@ L, ISOCTAVE); % Control the type 1 error rate across multiple comparisons STATS.pval = holm (STATS.pval); + % Update minimum false positive risk after multiple comparisons + STATS.fpr = pval2fpr (STATS.pval); % Clean-up STATS = rmfield (STATS, {'std_err', 'tstat', 'sse'}); STATS.prior = []; @@ -1718,6 +1722,35 @@ %-------------------------------------------------------------------------- +% FUNCTION TO COMPUTE MINIMUM FALSE POSITIVE RISK (FPR) + +function fpr = pval2fpr (p) + + % Subfunction to compute minimum false positive risk. These are calculated + % from a Bayes factor based on the sampling distributions of the p-value and + % that H0 and H1 have equal prior probabilities. This is called the Sellke- + % Berger approach. + % + % References: + % Held and Ott (2018) On p-Values and Bayes Factors. + % Annu. Rev. of Stat. Appl. 5:393-419 + % David Colquhoun (2019) The False Positive Risk: A Proposal + % Concerning What to Do About p-Values, The American Statistician, + % 73:sup1, 192-201, DOI: 10.1080/00031305.2018.1529622 + + % Calculate minimum Bayes Factor (P(H0) / P(H1)) by the Sellke-Berger method + logp = min (log (p), -1); + minBF = exp (1 + logp + log (-logp)); + + % Calculate the false-positive risk from the minumum Bayes Factor + L10 = 1 ./ minBF; % Convert to Maximum Likelihood ratio L10 (P(H1)/P(H0)) + fpr = max (0, 1 ./ (1 + L10)); % Calculate minimum false positive risk + fpr(isnan(p)) = NaN; + +end + +%-------------------------------------------------------------------------- + % FUNCTION TO PERFORM ANOVA function AOVSTAT = bootanova (Y, X, DF, DFE, DEP, NBOOT, ALPHA, SEED, ... @@ -1780,10 +1813,13 @@ end end + % Compute minimum false positive risk + FPR = pval2fpr (PVAL); + % Prepare output AOVSTAT = struct ('MODEL', [], 'SS', SS, 'DF', DF(2:end), 'MS', MS, 'F', ... - F, 'PVAL', PVAL, 'SSE', SSE{end}, 'DFE', DFE, ... - 'MSE', MSE); + F, 'PVAL', PVAL, 'FPR', FPR, 'SSE', SSE{end}, ... + 'DFE', DFE, 'MSE', MSE); end @@ -2239,9 +2275,7 @@ %!demo %! -%! ## Unbalanced one-way design with custom, orthogonal contrasts. The -%! ## statistics relating to the contrasts are shown in the table of model -%! ## parameters, and can be retrieved from the STATS.coeffs output. Data from +%! ## Unbalanced one-way design with custom, orthogonal contrasts. Data from %! ## www.uvm.edu/~statdhtx/StatPages/Unequal-ns/Unequal_n%27s_contrasts.html %! %! dv = [ 8.706 10.362 11.552 6.941 10.983 10.092 6.421 14.943 15.931 ... @@ -2756,9 +2790,9 @@ %! assert (stats.pval(1), 0.02381212481394462, 1e-09); %! assert (stats.pval(2), 0.009547350172112052, 1e-09); %! assert (stats.pval(3), 0.1541408530918242, 1e-09); -%! assert (stats.fpr(1), 0.1254120362272493, 1e-09); -%! assert (stats.fpr(2), 0.04738586302975815, 1e-09); -%! assert (stats.fpr(3), 0.4392984660114189, 1e-09); +%! assert (stats.fpr(1), 0.1947984337990365, 1e-09); +%! assert (stats.fpr(2), 0.1077143325366211, 1e-09); +%! assert (stats.fpr(3), 0.4392984660114188, 1e-09); %!test %! diff --git a/inst/randtest2.m b/inst/randtest2.m index 4d4e7509..70d475a1 100644 --- a/inst/randtest2.m +++ b/inst/randtest2.m @@ -6,7 +6,8 @@ % -- Function File: PVAL = randtest2 (X, Y, PAIRED, NREPS, FUNC, SEED) % -- Function File: PVAL = randtest2 ([X, GX], [Y, GY], ...) % -- Function File: [PVAL, STAT] = randtest (...) -% -- Function File: [PVAL, STAT, PERMSTATS] = randtest (...) +% -- Function File: [PVAL, STAT, FPR] = randtest (...) +% -- Function File: [PVAL, STAT, FPR, PERMSTAT] = randtest (...) % % 'PVAL = randtest2 (X, Y)' performs a randomization (a.k.a. permutation) % test to ascertain whether data samples X and Y come from populations with @@ -67,8 +68,12 @@ % % '[PVAL, STAT] = randtest2 (...)' also returns the test statistic. % -% '[PVAL, STAT, PERMSTATS] = randtest2 (...)' also returns the statistics -% of the permutation distribution. +% '[PVAL, STAT, FPR] = randtest2 (...)' also returns the minimum false +% positive risk (FPR) calculated for the p-value, computed using the +% Sellke-Berger approach. +% +% '[PVAL, STAT, FPR, PERMSTAT] = randtest2 (...)' also returns the +% statistics of the permutation distribution. % % Bibliography: % [1] Dowd (2020) A New ECDF Two-Sample Test Statistic. arXiv. @@ -93,7 +98,7 @@ % You should have received a copy of the GNU General Public License % along with this program. If not, see . -function [pval, stat, STATS] = randtest2 (x, y, paired, nreps, func, seed) +function [pval, stat, fpr, STATS] = randtest2 (x, y, paired, nreps, func, seed) % Check if we are running Octave or Matlab info = ver; @@ -128,7 +133,7 @@ if (nargin > 6) error ('randtest2: Too many input arguments') end - if (nargout > 3) + if (nargout > 4) error ('randtest2: Too many output arguments') end @@ -332,6 +337,10 @@ else pval = interp1 (x, P, abs (stat), 'linear', res_lim); end + if (nargout > 2) + % Compute minimum false positive risk + fpr = pval2fpr (pval); + end end @@ -401,6 +410,35 @@ %-------------------------------------------------------------------------- +% FUNCTION TO COMPUTE MINIMUM FALSE POSITIVE RISK (FPR) + +function fpr = pval2fpr (p) + + % Subfunction to compute minimum false positive risk. These are calculated + % from a Bayes factor based on the sampling distributions of the p-value and + % that H0 and H1 have equal prior probabilities. This is called the Sellke- + % Berger approach. + % + % References: + % Held and Ott (2018) On p-Values and Bayes Factors. + % Annu. Rev. of Stat. Appl. 5:393-419 + % David Colquhoun (2019) The False Positive Risk: A Proposal + % Concerning What to Do About p-Values, The American Statistician, + % 73:sup1, 192-201, DOI: 10.1080/00031305.2018.1529622 + + % Calculate minimum Bayes Factor (P(H0) / P(H1)) by the Sellke-Berger method + logp = min (log (p), -1); + minBF = exp (1 + logp + log (-logp)); + + % Calculate the false-positive risk from the minumum Bayes Factor + L10 = 1 ./ minBF; % Convert to Maximum Likelihood ratio L10 (P(H1)/P(H0)) + fpr = max (0, 1 ./ (1 + L10)); % Calculate minimum false positive risk + fpr(isnan(p)) = NaN; + +end + +%-------------------------------------------------------------------------- + %!demo %! %! % Mouse data from Table 2 (page 11) of Efron and Tibshirani (1993) @@ -497,4 +535,4 @@ %! pval5 = randtest2 (X, Y, false, 5000); %! pval5 = randtest2 (X, Y, false, [], [], 1); %! pval6 = randtest2 (X, Y, false, [], @(X, Y) mean (X) - mean (Y), 1); -%! pval7 = randtest2 (X, Y, false, [], @(A, B) log (var (A) ./ var (B)), 1); \ No newline at end of file +%! pval7 = randtest2 (X, Y, false, [], @(A, B) log (var (A) ./ var (B)), 1);