Dual-Life · sisyphus · May 7, 2020 · May 7, 2020 · May 7, 2020 · May 7, 2020
diff --git a/ListUtil.xs b/ListUtil.xs
@@ -15,6 +15,11 @@
 #  include "ppport.h"
 #endif
 
+/* Detect "DoubleDouble" nvtype */
+#if defined(USE_LONG_DOUBLE) && LDBL_MANT_DIG == 106
+#  define NV_IS_DOUBLEDOUBLE
+#endif
+
 /* For uniqnum, define ACTUAL_NVSIZE to be the number *
  * of bytes that are actually used to store the NV    */
 
@@ -24,10 +29,22 @@
 #  define ACTUAL_NVSIZE NVSIZE
 #endif
 
-/* Detect "DoubleDouble" nvtype */
+/* Define some additional symbols for uniqint */
+#if ACTUAL_NVSIZE == 8
+#  define NV_DEC_PREC 17
+#  define NV_BUF_SIZE 32
 
-#if defined(USE_LONG_DOUBLE) && LDBL_MANT_DIG == 106
-#  define NV_IS_DOUBLEDOUBLE
+#elif ACTUAL_NVSIZE == 10
+#  define NV_DEC_PREC 21
+#  define NV_BUF_SIZE 32
+
+#elif defined(NV_IS_DOUBLEDOUBLE)
+#  define NV_DEC_PREC 33
+#  define NV_BUF_SIZE 48
+
+#else
+#  define NV_DEC_PREC 36
+#  define NV_BUF_SIZE 48
 #endif
 
 #ifndef PERL_VERSION_DECIMAL
@@ -1360,23 +1377,62 @@ CODE:
 #if PERL_VERSION >= 8
             /* int_amg only appeared in perl 5.8.0 */
             if(SvAMAGIC(arg) && (arg = AMG_CALLun(arg, int)))
-                ; /* nothing to do */
+              ; /* nothing to do */
             else
 #endif
             if(!SvOK(arg) || SvNOK(arg) || SvPOK(arg))
             {
-                /* Convert undef, NVs and PVs into a well-behaved int */
+                /* Convert undef, NVs and PVs to an integral value */
                 NV nv = SvNV(arg);
+                char buffer[NV_BUF_SIZE]; /* for when additional precision is needed    */
 
-                if(nv > (NV)UV_MAX)
-                    /* Too positive for UV - use NV */
-                    arg = newSVnv(Perl_floor(nv));
-                else if(nv < (NV)IV_MIN)
-                    /* Too negative for IV - use NV */
-                    arg = newSVnv(Perl_ceil(nv));
-                else if(nv > 0 && (UV)nv > (UV)IV_MAX)
+                if(nv != nv) {
+                    /* Leave all NaN values as NaNs. Use newSVnv(nv)                    */
+                    arg = newSVnv(nv);
+                }
+                else if(nv >= (NV)UV_MAX) {
+                    /* Too big for IV - use NV */
+                    if(nv > NV_MAX) {
+                        /* If nv > NV_MAX, then nv is +Inf.                              */
+                        arg = newSVnv(nv);
+                    }
+                    else {
+                        /* For each NV type, NV_DEC_PREC provides sufficient precision   *
+                         * as to allow uniqint to determine if floored NVs are unique.   *
+                         * (Perl itself does not always provide sufficient precision.)   *
+                         * NV_DEC_PREC is defined near the start of this file.           */
+                        sprintf(buffer, "%.*" NVgf, NV_DEC_PREC, Perl_floor(nv));
+                        arg = newSVpv(buffer, 0);
+                    }
+                }
+                else if(nv <= (NV)IV_MIN) {
+                    /* Too negative for UV - use NV */
+                    if(nv < -NV_MAX) {
+                        /* If nv < -NV_MAX, then nv is -Inf. Use newSVnv(nv)              */
+                        arg = newSVnv(nv);
+                    }
+                    else {
+                        /* For each NV type, NV_DEC_PREC provides sufficient precision   *
+                         * as to allow uniqint to determine if ceiled NVs are unique.    *
+                         * (Perl itself does not always provide sufficient precision.)   *
+                         * NV_DEC_PREC is defined near the start of this file.           */
+                        sprintf(buffer, "%.*" NVgf, NV_DEC_PREC, Perl_ceil(nv));
+                        arg = newSVpv(buffer, 0);
+                    }
+                }
+                else if(nv > 0 && (UV)nv > (UV)IV_MAX) {
                     /* Too positive for IV - use UV */
+#if defined(_MSC_VER) && _MSC_VER < 1900 && IVSIZE == 8 && NVSIZE == 8
+
+                    /* Less recent versions of Visual Studio make a mess of casting NVs  *
+                     * that are greater than 2 ** 63 and less than 2 ** 64 to the        *
+                     * correct UV. Hence, we use this workaround.                        */
+
+                    arg = newSVuv( 9223372036854775808 + (UV)(nv - 9223372036854775808.0) );
+#else 
                     arg = newSVuv(nv);
+#endif
+                }
                 else
                     /* Must now fit into IV */
                     arg = newSViv(nv);
@@ -1449,7 +1505,7 @@ CODE:
 #endif
         }
 #if NVSIZE > IVSIZE                          /* $Config{nvsize} > $Config{ivsize} */
-        /* Avoid altering arg's flags */
+        /* Avoid altering arg's flags */ 
         if(SvUOK(arg))      nv_arg = (NV)SvUV(arg);
         else if(SvIOK(arg)) nv_arg = (NV)SvIV(arg);
         else                nv_arg = SvNV(arg);
@@ -1474,9 +1530,9 @@ CODE:
              * that are allocated but never used. (It is only the 10-byte      *
              * extended precision long double that allocates bytes that are    *
              * never used. For all other NV types ACTUAL_NVSIZE == sizeof(NV). */
-            sv_setpvn(keysv, (char *) &nv_arg, ACTUAL_NVSIZE);
+            sv_setpvn(keysv, (char *) &nv_arg, ACTUAL_NVSIZE);  
         }
-#else                                    /* $Config{nvsize} == $Config{ivsize} == 8 */
+#else                                    /* $Config{nvsize} == $Config{ivsize} == 8 */ 
         if( SvIOK(arg) || !SvOK(arg) ) {
 
             /* It doesn't matter if SvUOK(arg) is TRUE */
@@ -1506,7 +1562,7 @@ CODE:
                  * Then subtract 1 so that all of the ("allowed") bits below the set bit *
                  * are 1 && all other ("disallowed") bits are set to 0.                  *
                  * (If the value prior to subtraction was 0, then subtracting 1 will set *
-                 * all bits - which is also fine.)                                       */
+                 * all bits - which is also fine.)                                       */ 
                 UV valid_bits = (lowest_set << 53) - 1;
 
                 /* The value of arg can be exactly represented by a double unless one    *
@@ -1515,9 +1571,9 @@ CODE:
                  * by -1 prior to performing that '&' operation - so multiply iv by sign.*/
                 if( !((iv * sign) & (~valid_bits)) ) {
                     /* Avoid altering arg's flags */
-                    nv_arg = uok ? (NV)SvUV(arg) : (NV)SvIV(arg);
+                    nv_arg = uok ? (NV)SvUV(arg) : (NV)SvIV(arg); 
                     sv_setpvn(keysv, (char *) &nv_arg, 8);
-                }
+                }          
                 else {
                     /* Read in the bytes, rather than the numeric value of the IV/UV as  *
                      * this is more efficient, despite having to sv_catpvn an extra byte.*/

diff --git a/lib/List/Util.pm b/lib/List/Util.pm
@@ -575,6 +575,11 @@ are enabled (C<use warnings 'uninitialized';>). In addition, an C<undef> in
 the returned list is coerced into a numerical zero, so that the entire list of
 values returned by C<uniqint> are well-behaved as integers.
 
+Note also that, for perls whose NV is the IBM DoubleDouble, NVs whose absolute
+values are greater than 2**106 might not be handled correctly by uniqint.
+They will be handled correctly by uniqnum (see below) but of course, unlike
+uniqint, uniqnum does not truncate fractional values to integers.
+
 =head2 uniqnum
 
     my @subset = uniqnum @values

diff --git a/t/uniq.t b/t/uniq.t
@@ -3,7 +3,7 @@
 use strict;
 use warnings;
 use Config; # to determine ivsize
-use Test::More tests => 31;
+use Test::More tests => 42;
 use List::Util qw( uniqstr uniqint uniq );
 
 use Tie::Array;
@@ -83,6 +83,190 @@ is_deeply( [ uniqint 6.1, 6.2, 6.3 ],
            [ 6 ],
            'uniqint compares as and returns integers' );
 
+my $ls = 31;      # maximum left shift for 32-bit unity
+
+if( $Config{ivsize} == 8 ) {
+  $ls       = 63; # maximum left shift for 64-bit unity
+}
+
+# Populate @in with UV-NV pairs of equivalent values.
+# Each of these values is exactly representable as 
+# either a UV or an NV.
+
+my @in = (1 << $ls, 2 ** $ls,
+          1 << ($ls - 3), 2 ** ($ls - 3),
+          5 << ($ls - 3), 5 * (2 ** ($ls - 3)));
+
+my @correct = (1 << $ls, 1 << ($ls - 3), 5 << ($ls -3));
+
+if( $Config{ivsize} == 8 && $Config{nvsize} == 8 ) {
+
+     # Add some more IV-NV pairs of equivalent values. Each of these
+     # values is exactly representable as either an IV or an NV, and
+     # they are samples of values that were problematic with respect
+     # to uniqnum. We include them for completeness. These IV-NV pairs
+     # also represent values whose absolutes are less than ~0 and can
+     # be expressed either as $num << $shift or $num * (2 ** $shift),
+     # where $num is less than 1 << 53 (ie less than 9007199254740992).
+
+
+     push @in, ( 9007199254740991,     9.007199254740991e+15,       #   9007199254740991 << 0
+                 9007199254740992,     9.007199254740992e+15,       #   1                << 53
+                 9223372036854774784,  9.223372036854774784e+18,    #   9007199254740991 << 10
+                 100000000000262144,   1.00000000000262144e+17,     #   762939453127     << 17
+                 100000000001310720,   1.0000000000131072e+17,      #   762939453135     << 17
+                 144115188075593728,   1.44115188075593728e+17,     #   549755813887     << 18
+                 -9007199254740991,     -9.007199254740991e+15,     # -(9007199254740991 << 0 )
+                 -9007199254740992,     -9.007199254740992e+15,     # -(1                << 53)
+                 -9223372036854774784,  -9.223372036854774784e+18,  # -(9007199254740991 << 10)
+                 -100000000000262144,   -1.00000000000262144e+17,   # -(762939453127     << 17)
+                 -100000000001310720,   -1.0000000000131072e+17,    # -(762939453135     << 17)
+                 -144115188075593728,   -1.44115188075593728e+17 ); # -(549755813887     << 18)
+
+     push @correct, ( 9007199254740991,
+                      9007199254740992,
+                      9223372036854774784,
+                      100000000000262144,
+                      100000000001310720,
+                      144115188075593728,
+                      -9007199254740991,
+                      -9007199254740992,
+                      -9223372036854774784,
+                      -100000000000262144,
+                      -100000000001310720,
+                      -144115188075593728 );
+}
+
+# uniqint should discard each of the NVs as being a
+# duplicate of the preceding IV. 
+
+is_deeply( [ uniqint @in],
+           [ @correct],
+           'uniqint correctly compares IVs that don\'t overflow NVs' );
+
+# This test did not always pass.
+# The 2 input values are NVs of distinct values.
+# the 2 expected values are UVs with (respectively) the same values as the NVs.
+is_deeply( [ uniqint ((2 ** $ls) + (2 ** ($ls - 1)),
+                      (2 ** $ls) + (2 ** ($ls - 2))) ],
+           [ (3 << ($ls - 1), 5 << ($ls - 2)) ],
+            'uniqint correctly compares UVs that don\'t overflow NVs' );
+
+my ( $nv1, $nv2, $nv3, $nv4, $uniq_count );
+
+# Assign large integer values to $nv1 and $nv2 that differ by only 1 ULP and check that
+# uniqint recognizes them as being unique. Both $nv1 and $nv2 are evaluated with full
+# NV precision.
+# Perl stringifies $nv1 and $nv2 to the same string - hence our interest in checking
+# that $nv1 and $nv2 are, indeed, being recognized as unique.
+#
+# Same goes for $nv3 and $nv4, who also differ by only 1 ULP. 
+
+if( $Config{nvsize} == 8 ) {
+    # NV is either 'double' or 8-byte 'long double'
+    $nv1 = 3.6893488147419095e19; # 0x1.ffffffffffffep+64 == ((1 << 53) - 2) * (2 ** 12)
+    $nv2 = 3.6893488147419099e19; # 0x1.fffffffffffffp+64 == ((1 << 53) - 1) * (2 ** 12)
+
+    if($Config{ivsize} == 4) {
+        $nv3 = 9.007199254740992e+15; # 0x1.0000000000000p+53 ==  2 ** 53
+        $nv4 = 9.007199254740994e+15; # 0x1.0000000000001p+53 == (2 ** 53) + 2
+    }
+    else {
+        $nv3 = 1.8446744073709552e+19; # 0x1.0000000000000p+64 ==  2 ** 64
+        $nv4 = 1.8446744073709556e+19; # 0x1.0000000000001p+64 == (2 ** 64) + (2 ** 12)
+    }
+}
+elsif(length(sqrt(2)) > 25) {
+    # NV is either IEEE 'long double' or '__float128' or doubledouble
+
+    if(1 + (2 ** -1074) != 1) {
+        # NV is doubledouble
+        $nv1 = (2 ** 70) + 1; # 0x1p+70 + 1
+        $nv2 = (2 ** 70) + 2; # 0x1p+70 + 2
+
+        $nv3 = 8.1129638414606681695789005144064e+31; #  2 ** 106
+        $nv4 = 8.1129638414606681695789005144066e+31; # (2 ** 106) + 2
+    }
+    else {
+        # NV is either IEEE 'long double' or '__float128'
+        $nv1 = 2.72225893536750770770699685945414517e+39; #0x1.fffffffffffffffffffffffffffep130
+        $nv2 = 2.72225893536750770770699685945414543e+39; #0x1.ffffffffffffffffffffffffffffp130
+
+        $nv3 = 1.0384593717069655257060992658440192e+34;
+                                            # 0x2.0000000000000000000000000000p+112 ==  2 ** 113
+        $nv4 = 1.0384593717069655257060992658440194e+34; #
+                                            # 0x2.0000000000000000000000000002p+112 == (2 ** 113) + 2
+    }
+}
+else {
+    # NV is extended precision 'long double'
+    $nv1 = 3.6893488147419103228e+19; # 0x0.fffffffffffffffep+65
+    $nv2 = 3.689348814741910323e+19;  # 0x0.ffffffffffffffffp+65
+
+    $nv3 = 1.8446744073709551616e+19; # 0x8.000000000000000p+61 ==  2 ** 64
+    $nv4 = 1.8446744073709551618e+19; # 0x8.000000000000001p+61 == (2 ** 64) + 2
+}
+
+SKIP: {
+    # $nv1 and $nv2 should have been assigned different values, but perl could be buggy:
+    skip ( 'perl incorrectly assigned identical values to both test variables', 2 ) if $nv1 == $nv2;
+
+    $uniq_count = uniqint( $nv1, $nv2 );
+    is( $uniq_count, 2, 'uniqint detects uniqueness of Nvs that differ by 1 ULP (1st test)' );
+
+    # Also check the negatives.
+    $uniq_count = uniqint( -$nv1, -$nv2 );
+    is( $uniq_count, 2, 'uniqint detects uniqueness of Nvs that differ by 1 ULP (1st -ve test)' );
+}
+
+SKIP: {
+    # $nv3 and $nv4 should have been assigned different values, but perl could be buggy:
+    skip ( 'perl incorrectly assigned identical values to both test variables', 2 ) if $nv3 == $nv4;
+
+    $uniq_count = uniqint( $nv3, $nv4 );
+    is( $uniq_count, 2, 'uniqint detects uniqueness of Nvs that differ by 1 ULP (2nd test)' );
+
+    # Also check the negatives.
+    $uniq_count = uniqint( -$nv3, -$nv4 );
+    is( $uniq_count, 2, 'uniqint detects uniqueness of Nvs that differ by 1 ULP (2nd -ve test)' );
+}
+
+# Hard to know for sure what an Inf is going to be. Lets make one
+my $Inf = 0 + 1E1000;
+my $NaN;
+$Inf **= 1000 while ( $NaN = $Inf - $Inf ) == $NaN;
+
+is_deeply( [ uniqint 1 << $ls, -(1 << $ls), 0, 1, 12345, $Inf, -$Inf, $NaN, 0, $Inf, $NaN, -$Inf ],
+           [ 1 << $ls, -(1 << $ls), 0, 1, 12345, $Inf, -$Inf, $NaN ],
+           'uniqint handles the special values of +-Inf and Nan' );
+
+# The next 2 tests did not always pass.
+# Increment $ls to one greater than maximum allowed left shift 
+$ls++;
+
+my @u = uniqint(2 ** $ls, -(2 ** $ls));
+
+cmp_ok($u[0], '==', 2 ** $ls,    "uniqint handles 2 ** $ls correctly");
+cmp_ok($u[1], '==', -(2 ** $ls), "uniqint handles -(2 ** $ls) correctly");
+
+# Another test that did not always pass:
+# $nv1 == $nv2 if nvsize == 8.
+# Else $nv1 != $nv2.
+$nv1 = 2 ** 64;
+$nv2 = 1.8446744073709551615e+19;
+
+my $uniq_count1 = uniqint($nv1, $nv2);
+my $uniq_count2 = uniqint(-$nv1, -$nv2);
+
+if($nv1 == $nv2) {
+    is( $uniq_count1, 1, 'uniqint detects that 2 ** 64 == 1.8446744073709551615e+19' );
+    is( $uniq_count2, 1, 'uniqint detects that -(2 ** 64) == -1.8446744073709551615e+19' );
+}
+else {
+    is( $uniq_count1, 2, 'uniqint detects that 2 ** 64 != 1.8446744073709551615e+19' );
+    is( $uniq_count2, 2, 'uniqint detects that -(2 ** 64) != -1.8446744073709551615e+19' );
+}
+
 {
     my $warnings = "";
     local $SIG{__WARN__} = sub { $warnings .= join "", @_ };