Skip to content

Commit

Permalink
Made BOOTSAM a documented input argument
Browse files Browse the repository at this point in the history
- Made BOOTSAM a documented input argument
- Minor tweaks to coding style and documentation
  • Loading branch information
acp29 committed Jan 22, 2023
1 parent 2dca3ca commit 331e862
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 60 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: statistics-bootstrap
version: 5.1.5
date: 2023-01-21
version: 5.1.6
date: 2023-01-22
author: Andrew Penn <[email protected]>
maintainer: Andrew Penn <[email protected]>
title: A statistics package with a variety of bootstrap resampling tools
Expand Down
4 changes: 2 additions & 2 deletions inst/bootci.m
Original file line number Diff line number Diff line change
Expand Up @@ -324,9 +324,9 @@
% estimate of the standard error using bootknife resampling
szx = cellfun (@(x) size (x, 2), data);
data = [data{:}];
cellfunc = @(bootsam) bootknife (mat2cell (data (bootsam,:), n, szx), nbootstd, bootfun, NaN, [], 0, [], ISOCTAVE);
cellfunc = @(bootsam) bootknife (mat2cell (data (bootsam,:), n, szx), nbootstd, bootfun, NaN, [], 0, [], [], ISOCTAVE);
else
cellfunc = @(bootsam) bootknife (data (bootsam,:), nbootstd, bootfun, NaN, [], 0, [], ISOCTAVE);
cellfunc = @(bootsam) bootknife (data (bootsam,:), nbootstd, bootfun, NaN, [], 0, [], [], ISOCTAVE);
end
if (ncpus > 1)
if (ISOCTAVE)
Expand Down
100 changes: 56 additions & 44 deletions inst/bootknife.m
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
% reduce Monte Carlo error [2,3].
%
% If single bootstrap is requested, the confidence intervals are obtained from
% the quantiles of a kernel density estimate of bootstrap statistics (with
% the quantiles of a kernel density estimate of the bootstrap statistics (with
% shrinkage corrrection). By default, the confidence intervals are bias-
% corrected and accelerated (BCa) [4-5]. BCa intervals are fast to compute and
% have good coverage and correctness when combined with bootknife resampling
Expand All @@ -20,7 +20,7 @@
% If double bootstrap is requested, the algorithm uses calibration to improve
% the accuracy (of the bias and standard error) and coverage of the confidence
% intervals [6-12], which are obtained from the empirical distribution of the
% bootstrap statistics by linear interpolation.
% bootstrap statistics by linear interpolation [9].
%
% STATS = bootknife (DATA)
% STATS = bootknife ({DATA})
Expand All @@ -30,6 +30,7 @@
% STATS = bootknife (DATA, NBOOT, ..., ALPHA)
% STATS = bootknife (DATA, NBOOT, ..., ALPHA, STRATA)
% STATS = bootknife (DATA, NBOOT, ..., ALPHA, STRATA, NPROC)
% STATS = bootknife (DATA, NBOOT, ..., ALPHA, STRATA, NPROC, BOOTSAM)
% [STATS, BOOTSTAT] = bootknife (...)
% [STATS, BOOTSTAT] = bootknife (...)
% [STATS, BOOTSTAT, BOOTSAM] = bootknife (...)
Expand Down Expand Up @@ -155,6 +156,11 @@
% feature requires the Parallel package (in Octave), or the Parallel
% Computing Toolbox (in Matlab).
%
% STATS = bootknife (DATA, NBOOT, ..., ALPHA, STRATA, NPROC, BOOTSAM) uses
% bootstrap resampling indices provided in BOOTSAM. The BOOTSAM should be a
% matrix with the same number of rows as the data. When BOOTSAM is provided,
% the first element of NBOOT is ignored.
%
% [STATS, BOOTSTAT] = bootknife (...) also returns BOOTSTAT, a vector of
% bootstrap statistics calculated over the (first, or outer layer of)
% bootstrap resamples.
Expand Down Expand Up @@ -187,7 +193,7 @@
% Bootstrap. New York, NY: Chapman & Hall
% [6] Beran (1987). Prepivoting to Reduce Level Error of Confidence Sets.
% Biometrika, 74(3), 457–468.
% [7] Lee and Young (1999) The effectt of Monte Carlo approximation on coverage
% [7] Lee and Young (1999) The effect of Monte Carlo approximation on coverage
% error of double-bootstrap con®dence intervals. J R Statist Soc B.
% 61:353-366.
% [8] Booth J. and Presnell B. (1998) Allocation of Monte Carlo Resources for
Expand Down Expand Up @@ -225,11 +231,11 @@
% along with this program. If not, see <http://www.gnu.org/licenses/>.


function [stats, bootstat, BOOTSAM] = bootknife (x, nboot, bootfun, alpha, ...
strata, ncpus, REF, ISOCTAVE, BOOTSAM, ERRCHK)
function [stats, bootstat, bootsam] = bootknife (x, nboot, bootfun, alpha, ...
strata, ncpus, bootsam, REF, ISOCTAVE, ERRCHK)

% Input argument names in all-caps are for internal use only
% REF, ISOCTAVE, BOOTSAM, ERRCHK are undocumented input arguments required
% REF, ISOCTAVE and ERRCHK are undocumented input arguments required
% for some of the functionalities of bootknife

% Store local functions in a stucture for parallel processes
Expand Down Expand Up @@ -343,7 +349,7 @@
error ('bootknife: NPROC must be a scalar value');
end
end
if ((nargin < 8) || isempty (ISOCTAVE))
if ((nargin < 9) || isempty (ISOCTAVE))
% Check if running in Octave (else assume Matlab)
info = ver;
ISOCTAVE = any (ismember ({info.Name}, 'Octave'));
Expand Down Expand Up @@ -511,21 +517,21 @@

% Perform balanced bootknife resampling
unbiased = true; % Set to true for bootknife resampling
if ((nargin < 9) || isempty (BOOTSAM))
if ((nargin < 7) || isempty (bootsam))
if (~ isempty (strata))
if (nvar > 1) || (nargout > 2)
% We can save some memory by making BOOTSAM an int32 datatype
BOOTSAM = zeros (n, B, 'int32');
% We can save some memory by making bootsam an int32 datatype
bootsam = zeros (n, B, 'int32');
for k = 1:K
if ((sum (g(:, k))) > 1)
BOOTSAM(g(:, k), :) = boot (find (g(:, k)), B, unbiased);
bootsam(g(:, k), :) = boot (find (g(:, k)), B, unbiased);
else
BOOTSAM(g(:, k), :) = find (g(:, k)) * ones (1, B);
bootsam(g(:, k), :) = find (g(:, k)) * ones (1, B);
end
end
else
% For more efficiency, if we don't need BOOTSAM, we can directly resample values of x
BOOTSAM = [];
% For more efficiency, if we don't need bootsam, we can directly resample values of x
bootsam = [];
X = zeros (n, B);
for k = 1:K
if ((sum (g(:, k))) > 1)
Expand All @@ -537,19 +543,25 @@
end
else
if (nvar > 1) || (nargout > 2)
% We can save some memory by making BOOTSAM an int32 datatype
BOOTSAM = zeros (n, B, 'int32');
BOOTSAM(:, :) = boot (n, B, unbiased);
% We can save some memory by making bootsam an int32 datatype
bootsam = zeros (n, B, 'int32');
bootsam(:, :) = boot (n, B, unbiased);
else
% For more efficiency, if we don't need BOOTSAM, we can directly resample values of x
BOOTSAM = [];
% For more efficiency, if we don't need bootsam, we can directly resample values of x
bootsam = [];
X = boot (x, B, unbiased);
end
end
else
if (size (bootsam, 1) ~= n)
error ('bootknife: BOOTSAM must have the same number of rows as X')
end
nboot(1) = size (bootsam, 2);
B = nboot(1);
end

% Evaluate bootfun each bootstrap resample
if (isempty (BOOTSAM))
if (isempty (bootsam))
if (vectorized)
% Vectorized evaluation of bootfun on the DATA resamples
bootstat = bootfun (X);
Expand All @@ -570,35 +582,35 @@
end
else
if (vectorized)
% DATA resampling (using BOOTSAM) and vectorized evaluation of bootfun on
% DATA resampling (using bootsam) and vectorized evaluation of bootfun on
% the DATA resamples
if (nvar > 1)
% Multivariate
% Perform DATA sampling
X = cell2mat (cellfun (@(i) reshape (x(BOOTSAM, i), n, B), ...
X = cell2mat (cellfun (@(i) reshape (x(bootsam, i), n, B), ...
num2cell (1:nvar, 1), 'UniformOutput', false));
else
% Univariate
% Perform DATA sampling
X = x(BOOTSAM);
X = x(bootsam);
end
% Function evaluation on bootknife samples
bootstat = bootfun (X);
else
cellfunc = @(BOOTSAM) bootfun (x(BOOTSAM, :));
cellfunc = @(bootsam) bootfun (x(bootsam, :));
if (ncpus > 1)
% Evaluate bootfun on each bootstrap resample in PARALLEL
if (ISOCTAVE)
% OCTAVE
bootstat = parcellfun (ncpus, cellfunc, num2cell (BOOTSAM, 1), 'UniformOutput', false);
bootstat = parcellfun (ncpus, cellfunc, num2cell (bootsam, 1), 'UniformOutput', false);
else
% MATLAB
bootstat = cell (1, B);
parfor b = 1:B; bootstat{b} = cellfunc (BOOTSAM(:, b)); end
parfor b = 1:B; bootstat{b} = cellfunc (bootsam(:, b)); end
end
else
% Evaluate bootfun on each bootstrap resample in SERIAL
bootstat = cellfun (cellfunc, num2cell (BOOTSAM, 1), 'UniformOutput', false);
bootstat = cellfun (cellfunc, num2cell (bootsam, 1), 'UniformOutput', false);
end
end
end
Expand All @@ -607,14 +619,14 @@
end

% Remove bootstrap statistics that contain NaN, along with their associated
% DATA resamples in X or BOOTSAM
% DATA resamples in X or bootsam
ridx = any (isnan (bootstat), 1);
bootstat_all = bootstat;
bootstat(:, ridx) = [];
if (isempty (BOOTSAM))
if (isempty (bootsam))
X(:, ridx) = [];
else
BOOTSAM(:, ridx) = [];
bootsam(:, ridx) = [];
end
if (isempty (bootstat))
error ('bootknife: BOOTFUN returned NaN for every bootstrap resamples')
Expand All @@ -630,12 +642,12 @@
% OCTAVE
% Set unique random seed for each parallel thread
pararrayfun (ncpus, @boot, 1, 1, false, 1:ncpus);
if (vectorized && isempty (BOOTSAM))
cellfunc = @(x) bootknife (x, C, bootfun, NaN, strata, 0, T0, ISOCTAVE, [], false);
if (vectorized && isempty (bootsam))
cellfunc = @(x) bootknife (x, C, bootfun, NaN, strata, 0, [], T0, ISOCTAVE, false);
bootout = parcellfun (ncpus, cellfunc, num2cell (X, 1), 'UniformOutput', false);
else
cellfunc = @(BOOTSAM) bootknife (x(BOOTSAM, :), C, bootfun, NaN, strata, 0, T0, ISOCTAVE, [], false);
bootout = parcellfun (ncpus, cellfunc, num2cell (BOOTSAM, 1), 'UniformOutput', false);
cellfunc = @(bootsam) bootknife (x(bootsam, :), C, bootfun, NaN, strata, 0, [], T0, ISOCTAVE, false);
bootout = parcellfun (ncpus, cellfunc, num2cell (bootsam, 1), 'UniformOutput', false);
end
else
% MATLAB
Expand All @@ -644,22 +656,22 @@
% Perform inner layer of resampling
% Preallocate structure array
bootout = cell (1, B);
if (vectorized && isempty (BOOTSAM))
cellfunc = @(x) bootknife (x, C, bootfun, NaN, strata, 0, T0, ISOCTAVE, [], false);
if (vectorized && isempty (bootsam))
cellfunc = @(x) bootknife (x, C, bootfun, NaN, strata, 0, [], T0, ISOCTAVE, false);
parfor b = 1:B; bootout{b} = cellfunc (X(:, b)); end
else
cellfunc = @(BOOTSAM) bootknife (x(BOOTSAM, :), C, bootfun, NaN, strata, 0, T0, ISOCTAVE, [], false);
parfor b = 1:B; bootout{b} = cellfunc (BOOTSAM(:, b)); end
cellfunc = @(bootsam) bootknife (x(bootsam, :), C, bootfun, NaN, strata, 0, [], T0, ISOCTAVE, false);
parfor b = 1:B; bootout{b} = cellfunc (bootsam(:, b)); end
end
end
else
% SERIAL execution of inner layer resampling for double bootstrap
if (vectorized && isempty (BOOTSAM))
cellfunc = @(x) bootknife (x, C, bootfun, NaN, strata, 0, T0, ISOCTAVE, [], false);
if (vectorized && isempty (bootsam))
cellfunc = @(x) bootknife (x, C, bootfun, NaN, strata, 0, [], T0, ISOCTAVE, false);
bootout = cellfun (cellfunc, num2cell (X, 1), 'UniformOutput', false);
else
cellfunc = @(BOOTSAM) bootknife (x(BOOTSAM, :), C, bootfun, NaN, strata, 0, T0, ISOCTAVE, [], false);
bootout = cellfun (cellfunc, num2cell (BOOTSAM, 1), 'UniformOutput', false);
cellfunc = @(bootsam) bootknife (x(bootsam, :), C, bootfun, NaN, strata, 0, [], T0, ISOCTAVE, false);
bootout = cellfun (cellfunc, num2cell (bootsam, 1), 'UniformOutput', false);
end
end
% Double bootstrap bias estimation
Expand Down Expand Up @@ -807,7 +819,7 @@
stats.CI_lower = ci(:, 1);
stats.CI_upper = ci(:, 2);
% Use quick interpolation to find the proportion (Pr) of bootstat <= REF
if ((nargin > 6) && ~ isempty (REF))
if ((nargin > 7) && ~ isempty (REF))
I = bsxfun (@le, bootstat, REF);
pr = sum (I, 2);
t = cell2mat (arrayfun (@(j) ...
Expand All @@ -828,7 +840,7 @@
if (nargout == 0)
print_output (stats, nboot, alpha, l, m, bootfun_str, strata);
else
if (isempty (BOOTSAM))
if (isempty (bootsam))
[warnmsg, warnID] = lastwarn;
if (ismember (warnID, {'bootknife:biasfail','bootknife:jackfail'}))
warning ('bootknife:lastwarn', warnmsg);
Expand Down
2 changes: 1 addition & 1 deletion inst/bootmode.m
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@
h = criticalBandwidth;

% Random resampling with replacement from a smooth estimate of the distribution
idx = boot (n,B,false);
idx = boot (n, B, false);
Y = x(idx);
xvar = var (x, 1); % calculate sample variance
Ymean = ones (n, 1) * mean(Y);
Expand Down
8 changes: 4 additions & 4 deletions inst/bootnhst.m
Original file line number Diff line number Diff line change
Expand Up @@ -717,9 +717,9 @@
boot (1, 1, false, 1); % set random seed to make bootstrap resampling deterministic
% Use newer, faster and balanced (less biased) resampling functions (boot and bootknife)
if (paropt.UseParallel)
[null, Q] = bootknife (data, nboot(1), func, NaN, [], paropt.nproc, [], ISOCTAVE);
[null, Q] = bootknife (data, nboot(1), func, NaN, [], paropt.nproc, [], [], ISOCTAVE);
else
[null, Q] = bootknife (data, nboot(1), func, NaN, [], 0, [], ISOCTAVE);
[null, Q] = bootknife (data, nboot(1), func, NaN, [], 0, [], [], ISOCTAVE);
end

% Compute the estimate (theta) and it's pooled (weighted mean) sampling variance
Expand Down Expand Up @@ -747,7 +747,7 @@
% Bootknife resampling involves less computation than Jackknife when sample sizes get larger
theta(j) = bootfun (data(g == gk(j), :));
nk(j) = sum (g == gk(j));
bootout = bootknife (data(g == gk(j), :), [nboot(2), 0], bootfun, NaN, [], 0, [], ISOCTAVE, [], false);
bootout = bootknife (data(g == gk(j), :), [nboot(2), 0], bootfun, NaN, [], 0, [], [], ISOCTAVE, false);
SE(j) = bootout.std_error;
if (j==1); se_method = 'Balanced, bootknife resampling'; end;
end
Expand Down Expand Up @@ -1017,7 +1017,7 @@
% Bootknife resampling involves less computation than Jackknife when sample sizes get larger
theta(j) = bootfun (Y(g == gk(j), :));
nk(j) = sum (g == gk(j));
bootout = bootknife(Y(g == gk(j), :), [nboot, 0], bootfun, NaN, [], 0, [], ISOCTAVE, [], false);
bootout = bootknife (Y(g == gk(j), :), [nboot, 0], bootfun, NaN, [], 0, [], [], ISOCTAVE, false);
SE(j) = bootout.std_error;
end
Var(j) = ((nk(j) - 1) / (N - k)) * SE(j)^2;
Expand Down
14 changes: 7 additions & 7 deletions test/test_script.m
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
% warning ('off', 'Octave:nearly-singular-matrix')
% warning ('off', 'Octave:broadcast')
%else
warning ('off', 'MATLAB:rankDeficientMatrix')
warning ('off', 'MATLAB:divideByZero')
% warning ('off', 'MATLAB:rankDeficientMatrix')
% warning ('off', 'MATLAB:divideByZero')
%end

%try
try
% boot
boot (3, 20);
boot (3, 20, false, 1);
Expand Down Expand Up @@ -173,21 +173,21 @@
func = @(M) subsref (M(:,2:end) \ M(:,1), struct ('type', '()', 'subs', {{2}}));
p = bootnhst ([y, X], g, 'bootfun', func, 'DisplayOpt', false);

%catch exception
catch exception

% Turn warnings back on
warning ('on', 'bootknife:parallel')
%if ISOCTAVE
% warning ('on', 'Octave:divide-by-zero')
% warning ('on', 'Octave:nearly-singular-matrix')
%else
warning ('on', 'MATLAB:rankDeficientMatrix')
warning ('on', 'MATLAB:divideByZero')
% warning ('on', 'MATLAB:rankDeficientMatrix')
% warning ('on', 'MATLAB:divideByZero')
%end
%rethrow (exception)


%end
end

% Turn warnings back on
warning ('on', 'bootknife:parallel');
Expand Down

0 comments on commit 331e862

Please sign in to comment.