JuliaData · bkamins · Jan 25, 2024 · Jan 19, 2024 · Jan 19, 2024
diff --git a/NEWS.md b/NEWS.md
@@ -28,6 +28,8 @@
   ([#3393](https://github.com/JuliaData/DataFrames.jl/pull/3393))
 * Correctly index `eachrow` and `eachcol` with `CartesianIndex`
   ([#3413](https://github.com/JuliaData/DataFrames.jl/issues/3413))
+* Correctly handle non-standard integers when converting them to `BigInt`
+  ([#3419](https://github.com/JuliaData/DataFrames.jl/issues/3419))
 
 
 # DataFrames.jl v1.6.1 Release Notes

diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
@@ -1486,7 +1486,7 @@ function fillcombinations(df::AbstractDataFrame, indexcols;
     end
 
     # make sure we do not overflow in the target data frame size
-    target_rows = Int(prod(x -> big(length(x)), uniquevals))
+    target_rows = Int(prod(x -> BigInt(length(x)), uniquevals))
     if iszero(target_rows)
         @assert iszero(nrow(df))
         cdf = copy(df)

diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl
@@ -1546,7 +1546,7 @@ function allcombinations(::Type{DataFrame}, pairs::Pair{Symbol, <:Any}...)
     @assert length(colvalues) == length(colnames)
     @assert all(x -> x isa AbstractVector, colvalues)
 
-    target_rows = Int(prod(x -> big(length(x)), colvalues))
+    target_rows = Int(prod(x -> BigInt(length(x)), colvalues))
     out_df = DataFrame()
     inner = 1
     for (val, cname) in zip(colvalues, colnames)
@@ -1563,4 +1563,3 @@ function allcombinations(::Type{DataFrame}, pairs::Pair{Symbol, <:Any}...)
 end
 
 _try_select_no_copy(df::DataFrame, cols) = select(df, cols, copycols=false)
-
diff --git a/src/groupeddataframe/utils.jl b/src/groupeddataframe/utils.jl
@@ -156,7 +156,7 @@ function refpool_and_array(x::AbstractArray)
         else
             minval, maxval = extrema(x)
         end
-        ngroups = big(maxval) - big(minval) + 1
+        ngroups = BigInt(maxval) - BigInt(minval) + 1
         # Threshold chosen with the same rationale as the row_group_slots! refpool method:
         # refpool approach is faster but we should not allocate too much memory either
         # We also have to avoid overflow, including with ngroups + 1 for missing values

diff --git a/src/join/core.jl b/src/join/core.jl
@@ -328,7 +328,7 @@ function _innerjoin_unsorted_int(left::AbstractVector{<:Union{Integer, Missing}}
                                  right::AbstractVector{<:Union{Integer, Missing}})
     minv, maxv = extrema_missing(right)
 
-    val_range = big(maxv) - big(minv)
+    val_range = BigInt(maxv) - BigInt(minv)
     if val_range > typemax(Int) - 3 || val_range ÷ 2 > max(64, length(right)) ||
        minv < typemin(Int) + 2 || maxv > typemax(Int) - 3
        return _innerjoin_unsorted(left, right)
@@ -648,7 +648,7 @@ function _semijoin_unsorted_int(left::AbstractVector{<:Union{Integer, Missing}},
                                 right_shorter::Bool)
     minv, maxv = extrema_missing(right)
 
-    val_range = big(maxv) - big(minv)
+    val_range = BigInt(maxv) - BigInt(minv)
     if val_range > typemax(Int) - 3 || val_range ÷ 2 > max(64, length(right)) ||
        minv < typemin(Int) + 2 || maxv > typemax(Int) - 3
        return _semijoin_unsorted(left, right, seen_rows, right_shorter)