From 1bd781540de78ba71467cd2108aed9ee90d2f720 Mon Sep 17 00:00:00 2001 From: "Philip B. Stark" Date: Mon, 5 Aug 2019 14:38:28 -0700 Subject: [PATCH 1/2] DOC: fix documentation of calling signature for test statistic in core --- permute/core.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/permute/core.py b/permute/core.py index de11a7c..fe4aa7c 100644 --- a/permute/core.py +++ b/permute/core.py @@ -201,8 +201,7 @@ def two_sample(x, y, reps=10**5, stat='mean', alternative="greater", "new" x and the second argument is the "new" y. For instance, if the test statistic is the Kolmogorov-Smirnov distance between the empirical distributions of the two samples, - $\max_t |F_x(t) - F_y(t)|$, - the test statistic could be written: + $\max_t |F_x(t) - F_y(t)|$, the test statistic could be written: f = lambda u, v: np.max( \ [abs(sum(u<=v)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\ @@ -299,16 +298,17 @@ def two_sample_shift(x, y, reps=10**5, stat='mean', alternative="greater", approximating the permutation distribution. The t-statistic is computed using scipy.stats.ttest_ind (c) If stat is a function (a callable object), the test statistic is - that function. The function should take a permutation of the pooled - data and compute the test function from it. For instance, if the - test statistic is the Kolmogorov-Smirnov distance between the - empirical distributions of the two samples, $\max_t |F_x(t) - F_y(t)|$, - the test statistic could be written: + that function.The function should take two arguments: + given a permutation of the pooled data, the first argument is the + "new" x and the second argument is the "new" y. + For instance, if the test statistic is the Kolmogorov-Smirnov distance + between the empirical distributions of the two samples, + $\max_t |F_x(t) - F_y(t)|$, the test statistic could be written: - f = lambda u: np.max( \ - [abs(sum(u[:len(x)]<=v)/len(x)-sum(u[len(x):]<=v)/len(y)) for v in u]\ + f = lambda u, v: np.max( \ + [abs(sum(u<=v)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\ ) - + alternative : {'greater', 'less', 'two-sided'} The alternative hypothesis to test keep_dist : bool @@ -414,15 +414,17 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None, approximating the permutation distribution. The t-statistic is computed using scipy.stats.ttest_ind (c) If stat is a function (a callable object), the test statistic is - that function. The function should take a permutation of the pooled - data and compute the test function from it. For instance, if the - test statistic is the Kolmogorov-Smirnov distance between the - empirical distributions of the two samples, $\max_t |F_x(t) - F_y(t)|$, - the test statistic could be written: + that function.The function should take two arguments: + given a permutation of the pooled data, the first argument is the + "new" x and the second argument is the "new" y. + For instance, if the test statistic is the Kolmogorov-Smirnov distance + between the empirical distributions of the two samples, + $\max_t |F_x(t) - F_y(t)|$, the test statistic could be written: - f = lambda u: np.max( \ - [abs(sum(u[:len(x)]<=v)/len(x)-sum(u[len(x):]<=v)/len(y)) for v in u]\ + f = lambda u, v: np.max( \ + [abs(sum(u<=v)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\ ) + shift : float The relationship between x and y under the null hypothesis. From cc51b86345994e63fbefb75831eaa463b510d354 Mon Sep 17 00:00:00 2001 From: "Philip B. Stark" Date: Tue, 6 Aug 2019 16:46:30 -0700 Subject: [PATCH 2/2] DOC: fix typos --- permute/core.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/permute/core.py b/permute/core.py index fe4aa7c..4569acb 100644 --- a/permute/core.py +++ b/permute/core.py @@ -196,7 +196,7 @@ def two_sample(x, y, reps=10**5, stat='mean', alternative="greater", approximating the permutation distribution. The t-statistic is computed using scipy.stats.ttest_ind (c) If stat is a function (a callable object), the test statistic is - that function. The function should take two arguments: + that function. The function should take two arguments: given a permutation of the pooled data, the first argument is the "new" x and the second argument is the "new" y. For instance, if the test statistic is the Kolmogorov-Smirnov distance @@ -204,7 +204,7 @@ def two_sample(x, y, reps=10**5, stat='mean', alternative="greater", $\max_t |F_x(t) - F_y(t)|$, the test statistic could be written: f = lambda u, v: np.max( \ - [abs(sum(u<=v)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\ + [abs(sum(u<=val)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\ ) alternative : {'greater', 'less', 'two-sided'} @@ -298,7 +298,7 @@ def two_sample_shift(x, y, reps=10**5, stat='mean', alternative="greater", approximating the permutation distribution. The t-statistic is computed using scipy.stats.ttest_ind (c) If stat is a function (a callable object), the test statistic is - that function.The function should take two arguments: + that function. The function should take two arguments: given a permutation of the pooled data, the first argument is the "new" x and the second argument is the "new" y. For instance, if the test statistic is the Kolmogorov-Smirnov distance @@ -306,7 +306,7 @@ def two_sample_shift(x, y, reps=10**5, stat='mean', alternative="greater", $\max_t |F_x(t) - F_y(t)|$, the test statistic could be written: f = lambda u, v: np.max( \ - [abs(sum(u<=v)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\ + [abs(sum(u<=val)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\ ) alternative : {'greater', 'less', 'two-sided'} @@ -414,7 +414,7 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None, approximating the permutation distribution. The t-statistic is computed using scipy.stats.ttest_ind (c) If stat is a function (a callable object), the test statistic is - that function.The function should take two arguments: + that function. The function should take two arguments: given a permutation of the pooled data, the first argument is the "new" x and the second argument is the "new" y. For instance, if the test statistic is the Kolmogorov-Smirnov distance @@ -422,7 +422,7 @@ def two_sample_conf_int(x, y, cl=0.95, alternative="two-sided", seed=None, $\max_t |F_x(t) - F_y(t)|$, the test statistic could be written: f = lambda u, v: np.max( \ - [abs(sum(u<=v)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\ + [abs(sum(u<=val)/len(u)-sum(v<=val)/len(v)) for val in np.concatenate([u, v])]\ ) shift : float