Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes re List::Util::uniqint #108

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 74 additions & 18 deletions ListUtil.xs
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
# include "ppport.h"
#endif

/* Detect "DoubleDouble" nvtype */
#if defined(USE_LONG_DOUBLE) && LDBL_MANT_DIG == 106
# define NV_IS_DOUBLEDOUBLE
#endif

/* For uniqnum, define ACTUAL_NVSIZE to be the number *
* of bytes that are actually used to store the NV */

Expand All @@ -24,10 +29,22 @@
# define ACTUAL_NVSIZE NVSIZE
#endif

/* Detect "DoubleDouble" nvtype */
/* Define some additional symbols for uniqint */
#if ACTUAL_NVSIZE == 8
# define NV_DEC_PREC 17
# define NV_BUF_SIZE 32

#if defined(USE_LONG_DOUBLE) && LDBL_MANT_DIG == 106
# define NV_IS_DOUBLEDOUBLE
#elif ACTUAL_NVSIZE == 10
# define NV_DEC_PREC 21
# define NV_BUF_SIZE 32

#elif defined(NV_IS_DOUBLEDOUBLE)
# define NV_DEC_PREC 33
# define NV_BUF_SIZE 48

#else
# define NV_DEC_PREC 36
# define NV_BUF_SIZE 48
#endif

#ifndef PERL_VERSION_DECIMAL
Expand Down Expand Up @@ -1360,23 +1377,62 @@ CODE:
#if PERL_VERSION >= 8
/* int_amg only appeared in perl 5.8.0 */
if(SvAMAGIC(arg) && (arg = AMG_CALLun(arg, int)))
; /* nothing to do */
; /* nothing to do */
else
#endif
if(!SvOK(arg) || SvNOK(arg) || SvPOK(arg))
{
/* Convert undef, NVs and PVs into a well-behaved int */
/* Convert undef, NVs and PVs to an integral value */
NV nv = SvNV(arg);
char buffer[NV_BUF_SIZE]; /* for when additional precision is needed */

if(nv > (NV)UV_MAX)
/* Too positive for UV - use NV */
arg = newSVnv(Perl_floor(nv));
else if(nv < (NV)IV_MIN)
/* Too negative for IV - use NV */
arg = newSVnv(Perl_ceil(nv));
else if(nv > 0 && (UV)nv > (UV)IV_MAX)
if(nv != nv) {
/* Leave all NaN values as NaNs. Use newSVnv(nv) */
arg = newSVnv(nv);
}
else if(nv >= (NV)UV_MAX) {
/* Too big for IV - use NV */
if(nv > NV_MAX) {
/* If nv > NV_MAX, then nv is +Inf. */
arg = newSVnv(nv);
}
else {
/* For each NV type, NV_DEC_PREC provides sufficient precision *
* as to allow uniqint to determine if floored NVs are unique. *
* (Perl itself does not always provide sufficient precision.) *
* NV_DEC_PREC is defined near the start of this file. */
sprintf(buffer, "%.*" NVgf, NV_DEC_PREC, Perl_floor(nv));
arg = newSVpv(buffer, 0);
}
}
else if(nv <= (NV)IV_MIN) {
/* Too negative for UV - use NV */
if(nv < -NV_MAX) {
/* If nv < -NV_MAX, then nv is -Inf. Use newSVnv(nv) */
arg = newSVnv(nv);
}
else {
/* For each NV type, NV_DEC_PREC provides sufficient precision *
* as to allow uniqint to determine if ceiled NVs are unique. *
* (Perl itself does not always provide sufficient precision.) *
* NV_DEC_PREC is defined near the start of this file. */
sprintf(buffer, "%.*" NVgf, NV_DEC_PREC, Perl_ceil(nv));
arg = newSVpv(buffer, 0);
}
}
else if(nv > 0 && (UV)nv > (UV)IV_MAX) {
/* Too positive for IV - use UV */
#if defined(_MSC_VER) && _MSC_VER < 1900 && IVSIZE == 8 && NVSIZE == 8

/* Less recent versions of Visual Studio make a mess of casting NVs *
* that are greater than 2 ** 63 and less than 2 ** 64 to the *
* correct UV. Hence, we use this workaround. */

arg = newSVuv( 9223372036854775808 + (UV)(nv - 9223372036854775808.0) );
#else
arg = newSVuv(nv);
#endif
}
else
/* Must now fit into IV */
arg = newSViv(nv);
Expand Down Expand Up @@ -1449,7 +1505,7 @@ CODE:
#endif
}
#if NVSIZE > IVSIZE /* $Config{nvsize} > $Config{ivsize} */
/* Avoid altering arg's flags */
/* Avoid altering arg's flags */
if(SvUOK(arg)) nv_arg = (NV)SvUV(arg);
else if(SvIOK(arg)) nv_arg = (NV)SvIV(arg);
else nv_arg = SvNV(arg);
Expand All @@ -1474,9 +1530,9 @@ CODE:
* that are allocated but never used. (It is only the 10-byte *
* extended precision long double that allocates bytes that are *
* never used. For all other NV types ACTUAL_NVSIZE == sizeof(NV). */
sv_setpvn(keysv, (char *) &nv_arg, ACTUAL_NVSIZE);
sv_setpvn(keysv, (char *) &nv_arg, ACTUAL_NVSIZE);
}
#else /* $Config{nvsize} == $Config{ivsize} == 8 */
#else /* $Config{nvsize} == $Config{ivsize} == 8 */
if( SvIOK(arg) || !SvOK(arg) ) {

/* It doesn't matter if SvUOK(arg) is TRUE */
Expand Down Expand Up @@ -1506,7 +1562,7 @@ CODE:
* Then subtract 1 so that all of the ("allowed") bits below the set bit *
* are 1 && all other ("disallowed") bits are set to 0. *
* (If the value prior to subtraction was 0, then subtracting 1 will set *
* all bits - which is also fine.) */
* all bits - which is also fine.) */
UV valid_bits = (lowest_set << 53) - 1;

/* The value of arg can be exactly represented by a double unless one *
Expand All @@ -1515,9 +1571,9 @@ CODE:
* by -1 prior to performing that '&' operation - so multiply iv by sign.*/
if( !((iv * sign) & (~valid_bits)) ) {
/* Avoid altering arg's flags */
nv_arg = uok ? (NV)SvUV(arg) : (NV)SvIV(arg);
nv_arg = uok ? (NV)SvUV(arg) : (NV)SvIV(arg);
sv_setpvn(keysv, (char *) &nv_arg, 8);
}
}
else {
/* Read in the bytes, rather than the numeric value of the IV/UV as *
* this is more efficient, despite having to sv_catpvn an extra byte.*/
Expand Down
5 changes: 5 additions & 0 deletions lib/List/Util.pm
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,11 @@ are enabled (C<use warnings 'uninitialized';>). In addition, an C<undef> in
the returned list is coerced into a numerical zero, so that the entire list of
values returned by C<uniqint> are well-behaved as integers.

Note also that, for perls whose NV is the IBM DoubleDouble, NVs whose absolute
values are greater than 2**106 might not be handled correctly by uniqint.
They will be handled correctly by uniqnum (see below) but of course, unlike
uniqint, uniqnum does not truncate fractional values to integers.

=head2 uniqnum

my @subset = uniqnum @values
Expand Down
186 changes: 185 additions & 1 deletion t/uniq.t
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use strict;
use warnings;
use Config; # to determine ivsize
use Test::More tests => 31;
use Test::More tests => 42;
use List::Util qw( uniqstr uniqint uniq );

use Tie::Array;
Expand Down Expand Up @@ -83,6 +83,190 @@ is_deeply( [ uniqint 6.1, 6.2, 6.3 ],
[ 6 ],
'uniqint compares as and returns integers' );

my $ls = 31; # maximum left shift for 32-bit unity

if( $Config{ivsize} == 8 ) {
$ls = 63; # maximum left shift for 64-bit unity
}

# Populate @in with UV-NV pairs of equivalent values.
# Each of these values is exactly representable as
# either a UV or an NV.

my @in = (1 << $ls, 2 ** $ls,
1 << ($ls - 3), 2 ** ($ls - 3),
5 << ($ls - 3), 5 * (2 ** ($ls - 3)));

my @correct = (1 << $ls, 1 << ($ls - 3), 5 << ($ls -3));

if( $Config{ivsize} == 8 && $Config{nvsize} == 8 ) {

# Add some more IV-NV pairs of equivalent values. Each of these
# values is exactly representable as either an IV or an NV, and
# they are samples of values that were problematic with respect
# to uniqnum. We include them for completeness. These IV-NV pairs
# also represent values whose absolutes are less than ~0 and can
# be expressed either as $num << $shift or $num * (2 ** $shift),
# where $num is less than 1 << 53 (ie less than 9007199254740992).


push @in, ( 9007199254740991, 9.007199254740991e+15, # 9007199254740991 << 0
9007199254740992, 9.007199254740992e+15, # 1 << 53
9223372036854774784, 9.223372036854774784e+18, # 9007199254740991 << 10
100000000000262144, 1.00000000000262144e+17, # 762939453127 << 17
100000000001310720, 1.0000000000131072e+17, # 762939453135 << 17
144115188075593728, 1.44115188075593728e+17, # 549755813887 << 18
-9007199254740991, -9.007199254740991e+15, # -(9007199254740991 << 0 )
-9007199254740992, -9.007199254740992e+15, # -(1 << 53)
-9223372036854774784, -9.223372036854774784e+18, # -(9007199254740991 << 10)
-100000000000262144, -1.00000000000262144e+17, # -(762939453127 << 17)
-100000000001310720, -1.0000000000131072e+17, # -(762939453135 << 17)
-144115188075593728, -1.44115188075593728e+17 ); # -(549755813887 << 18)

push @correct, ( 9007199254740991,
9007199254740992,
9223372036854774784,
100000000000262144,
100000000001310720,
144115188075593728,
-9007199254740991,
-9007199254740992,
-9223372036854774784,
-100000000000262144,
-100000000001310720,
-144115188075593728 );
}

# uniqint should discard each of the NVs as being a
# duplicate of the preceding IV.

is_deeply( [ uniqint @in],
[ @correct],
'uniqint correctly compares IVs that don\'t overflow NVs' );

# This test did not always pass.
# The 2 input values are NVs of distinct values.
# the 2 expected values are UVs with (respectively) the same values as the NVs.
is_deeply( [ uniqint ((2 ** $ls) + (2 ** ($ls - 1)),
(2 ** $ls) + (2 ** ($ls - 2))) ],
[ (3 << ($ls - 1), 5 << ($ls - 2)) ],
'uniqint correctly compares UVs that don\'t overflow NVs' );

my ( $nv1, $nv2, $nv3, $nv4, $uniq_count );

# Assign large integer values to $nv1 and $nv2 that differ by only 1 ULP and check that
# uniqint recognizes them as being unique. Both $nv1 and $nv2 are evaluated with full
# NV precision.
# Perl stringifies $nv1 and $nv2 to the same string - hence our interest in checking
# that $nv1 and $nv2 are, indeed, being recognized as unique.
#
# Same goes for $nv3 and $nv4, who also differ by only 1 ULP.

if( $Config{nvsize} == 8 ) {
# NV is either 'double' or 8-byte 'long double'
$nv1 = 3.6893488147419095e19; # 0x1.ffffffffffffep+64 == ((1 << 53) - 2) * (2 ** 12)
$nv2 = 3.6893488147419099e19; # 0x1.fffffffffffffp+64 == ((1 << 53) - 1) * (2 ** 12)

if($Config{ivsize} == 4) {
$nv3 = 9.007199254740992e+15; # 0x1.0000000000000p+53 == 2 ** 53
$nv4 = 9.007199254740994e+15; # 0x1.0000000000001p+53 == (2 ** 53) + 2
}
else {
$nv3 = 1.8446744073709552e+19; # 0x1.0000000000000p+64 == 2 ** 64
$nv4 = 1.8446744073709556e+19; # 0x1.0000000000001p+64 == (2 ** 64) + (2 ** 12)
}
}
elsif(length(sqrt(2)) > 25) {
# NV is either IEEE 'long double' or '__float128' or doubledouble

if(1 + (2 ** -1074) != 1) {
# NV is doubledouble
$nv1 = (2 ** 70) + 1; # 0x1p+70 + 1
$nv2 = (2 ** 70) + 2; # 0x1p+70 + 2

$nv3 = 8.1129638414606681695789005144064e+31; # 2 ** 106
$nv4 = 8.1129638414606681695789005144066e+31; # (2 ** 106) + 2
}
else {
# NV is either IEEE 'long double' or '__float128'
$nv1 = 2.72225893536750770770699685945414517e+39; #0x1.fffffffffffffffffffffffffffep130
$nv2 = 2.72225893536750770770699685945414543e+39; #0x1.ffffffffffffffffffffffffffffp130

$nv3 = 1.0384593717069655257060992658440192e+34;
# 0x2.0000000000000000000000000000p+112 == 2 ** 113
$nv4 = 1.0384593717069655257060992658440194e+34; #
# 0x2.0000000000000000000000000002p+112 == (2 ** 113) + 2
}
}
else {
# NV is extended precision 'long double'
$nv1 = 3.6893488147419103228e+19; # 0x0.fffffffffffffffep+65
$nv2 = 3.689348814741910323e+19; # 0x0.ffffffffffffffffp+65

$nv3 = 1.8446744073709551616e+19; # 0x8.000000000000000p+61 == 2 ** 64
$nv4 = 1.8446744073709551618e+19; # 0x8.000000000000001p+61 == (2 ** 64) + 2
}

SKIP: {
# $nv1 and $nv2 should have been assigned different values, but perl could be buggy:
skip ( 'perl incorrectly assigned identical values to both test variables', 2 ) if $nv1 == $nv2;

$uniq_count = uniqint( $nv1, $nv2 );
is( $uniq_count, 2, 'uniqint detects uniqueness of Nvs that differ by 1 ULP (1st test)' );

# Also check the negatives.
$uniq_count = uniqint( -$nv1, -$nv2 );
is( $uniq_count, 2, 'uniqint detects uniqueness of Nvs that differ by 1 ULP (1st -ve test)' );
}

SKIP: {
# $nv3 and $nv4 should have been assigned different values, but perl could be buggy:
skip ( 'perl incorrectly assigned identical values to both test variables', 2 ) if $nv3 == $nv4;

$uniq_count = uniqint( $nv3, $nv4 );
is( $uniq_count, 2, 'uniqint detects uniqueness of Nvs that differ by 1 ULP (2nd test)' );

# Also check the negatives.
$uniq_count = uniqint( -$nv3, -$nv4 );
is( $uniq_count, 2, 'uniqint detects uniqueness of Nvs that differ by 1 ULP (2nd -ve test)' );
}

# Hard to know for sure what an Inf is going to be. Lets make one
my $Inf = 0 + 1E1000;
my $NaN;
$Inf **= 1000 while ( $NaN = $Inf - $Inf ) == $NaN;

is_deeply( [ uniqint 1 << $ls, -(1 << $ls), 0, 1, 12345, $Inf, -$Inf, $NaN, 0, $Inf, $NaN, -$Inf ],
[ 1 << $ls, -(1 << $ls), 0, 1, 12345, $Inf, -$Inf, $NaN ],
'uniqint handles the special values of +-Inf and Nan' );

# The next 2 tests did not always pass.
# Increment $ls to one greater than maximum allowed left shift
$ls++;

my @u = uniqint(2 ** $ls, -(2 ** $ls));

cmp_ok($u[0], '==', 2 ** $ls, "uniqint handles 2 ** $ls correctly");
cmp_ok($u[1], '==', -(2 ** $ls), "uniqint handles -(2 ** $ls) correctly");

# Another test that did not always pass:
# $nv1 == $nv2 if nvsize == 8.
# Else $nv1 != $nv2.
$nv1 = 2 ** 64;
$nv2 = 1.8446744073709551615e+19;

my $uniq_count1 = uniqint($nv1, $nv2);
my $uniq_count2 = uniqint(-$nv1, -$nv2);

if($nv1 == $nv2) {
is( $uniq_count1, 1, 'uniqint detects that 2 ** 64 == 1.8446744073709551615e+19' );
is( $uniq_count2, 1, 'uniqint detects that -(2 ** 64) == -1.8446744073709551615e+19' );
}
else {
is( $uniq_count1, 2, 'uniqint detects that 2 ** 64 != 1.8446744073709551615e+19' );
is( $uniq_count2, 2, 'uniqint detects that -(2 ** 64) != -1.8446744073709551615e+19' );
}

{
my $warnings = "";
local $SIG{__WARN__} = sub { $warnings .= join "", @_ };
Expand Down
Loading