From 69eeee336eb216a990ad5eb2b4ded93b547594b1 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Sun, 2 Feb 2025 13:20:13 +0100 Subject: [PATCH] Update rand requirement from 0.8 to 0.9 (#7045) * Update rand requirement from 0.8 to 0.9 Updates the requirements on [rand](https://github.com/rust-random/rand) to permit the latest version. - [Release notes](https://github.com/rust-random/rand/releases) - [Changelog](https://github.com/rust-random/rand/blob/master/CHANGELOG.md) - [Commits](https://github.com/rust-random/rand/compare/0.8.0...0.9.0) --- updated-dependencies: - dependency-name: rand dependency-type: direct:production ... Signed-off-by: dependabot[bot] * Migrate to 0.9 * Add missing feature, also bump `rand` in `object_store` * Rustfmt * Name `UniformUsize` instance `uusize` instead of `usize` * Rename another use of `usize` --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- arrow-array/Cargo.toml | 2 +- arrow-array/benches/fixed_size_list_array.rs | 6 +- arrow-array/benches/occupancy.rs | 8 +- arrow-array/benches/union_array.rs | 12 +-- arrow-array/src/array/boolean_array.rs | 8 +- arrow-array/src/array/run_array.rs | 10 +-- arrow-array/src/run_iterator.rs | 6 +- arrow-avro/Cargo.toml | 3 +- arrow-buffer/Cargo.toml | 2 +- arrow-buffer/benches/i256.rs | 8 +- arrow-buffer/benches/offset.rs | 2 +- arrow-buffer/src/bigint/mod.rs | 12 +-- arrow-buffer/src/builder/boolean.rs | 2 +- arrow-buffer/src/util/bit_chunk_iterator.rs | 17 ++-- arrow-buffer/src/util/bit_mask.rs | 14 +-- arrow-buffer/src/util/bit_util.rs | 8 +- arrow-cast/Cargo.toml | 2 +- arrow-cast/src/base64.rs | 10 +-- arrow-json/Cargo.toml | 3 +- arrow-json/benches/serde.rs | 20 +++-- arrow-ord/Cargo.toml | 2 +- arrow-ord/src/sort.rs | 2 +- arrow-row/Cargo.toml | 3 +- arrow-row/src/lib.rs | 78 ++++++++-------- arrow-select/Cargo.toml | 2 +- arrow-select/src/filter.rs | 48 +++++----- arrow-select/src/nullif.rs | 14 +-- arrow/Cargo.toml | 4 +- arrow/benches/aggregate_kernels.rs | 4 +- arrow/benches/array_from_vec.rs | 8 +- arrow/benches/boolean_append_packed.rs | 10 +-- arrow/benches/buffer_create.rs | 6 +- arrow/benches/builder.rs | 12 +-- arrow/benches/cast_kernels.rs | 16 ++-- arrow/benches/comparison_kernels.rs | 4 +- arrow/benches/csv_reader.rs | 20 ++--- arrow/benches/decimal_validate.rs | 8 +- arrow/benches/interleave_kernels.rs | 4 +- arrow/benches/json_writer.rs | 4 +- arrow/benches/mutable_array.rs | 4 +- arrow/benches/partition_kernels.rs | 4 +- arrow/benches/primitive_run_take.rs | 4 +- arrow/benches/string_dictionary_builder.rs | 8 +- arrow/benches/string_run_iterator.rs | 6 +- arrow/benches/take_kernels.rs | 4 +- arrow/src/util/bench_util.rs | 89 ++++++++++--------- arrow/src/util/data_gen.rs | 14 +-- arrow/src/util/test_util.rs | 2 +- object_store/Cargo.toml | 4 +- object_store/src/aws/dynamo.rs | 6 +- object_store/src/azure/client.rs | 2 +- object_store/src/client/backoff.rs | 8 +- object_store/src/integration.rs | 10 +-- object_store/src/upload.rs | 6 +- object_store/src/util.rs | 14 +-- parquet/Cargo.toml | 2 +- parquet/benches/arrow_reader.rs | 35 ++++---- parquet/benches/compression.rs | 6 +- parquet/benches/encoding.rs | 8 +- parquet/benches/row_selector.rs | 4 +- .../src/arrow/array_reader/byte_view_array.rs | 4 +- .../src/arrow/array_reader/primitive_array.rs | 2 +- parquet/src/arrow/arrow_reader/mod.rs | 37 ++++---- parquet/src/arrow/arrow_reader/selection.rs | 10 +-- parquet/src/arrow/async_reader/mod.rs | 14 +-- parquet/src/arrow/buffer/bit_util.rs | 10 +-- .../arrow/record_reader/definition_levels.rs | 20 ++--- parquet/src/column/reader.rs | 2 +- parquet/src/column/reader/decoder.rs | 10 +-- parquet/src/column/writer/mod.rs | 2 +- parquet/src/encodings/rle.rs | 11 ++- parquet/src/util/bit_util.rs | 4 +- parquet/src/util/test_common/rand_gen.rs | 42 +++++---- 73 files changed, 423 insertions(+), 389 deletions(-) diff --git a/arrow-array/Cargo.toml b/arrow-array/Cargo.toml index 6eae8e24677d..ce66f8a9f96c 100644 --- a/arrow-array/Cargo.toml +++ b/arrow-array/Cargo.toml @@ -55,7 +55,7 @@ ffi = ["arrow-schema/ffi", "arrow-data/ffi"] force_validate = [] [dev-dependencies] -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } criterion = { version = "0.5", default-features = false } [build-dependencies] diff --git a/arrow-array/benches/fixed_size_list_array.rs b/arrow-array/benches/fixed_size_list_array.rs index 5270a4a5def3..f9b862f88086 100644 --- a/arrow-array/benches/fixed_size_list_array.rs +++ b/arrow-array/benches/fixed_size_list_array.rs @@ -18,13 +18,13 @@ use arrow_array::{Array, FixedSizeListArray, Int32Array}; use arrow_schema::Field; use criterion::*; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; use std::sync::Arc; fn gen_fsl(len: usize, value_len: usize) -> FixedSizeListArray { - let mut rng = thread_rng(); + let mut rng = rng(); let values = Arc::new(Int32Array::from( - (0..len).map(|_| rng.gen::()).collect::>(), + (0..len).map(|_| rng.random::()).collect::>(), )); let field = Arc::new(Field::new_list_field(values.data_type().clone(), true)); FixedSizeListArray::new(field, value_len as i32, values, None) diff --git a/arrow-array/benches/occupancy.rs b/arrow-array/benches/occupancy.rs index ed4b94351c28..fd334b613257 100644 --- a/arrow-array/benches/occupancy.rs +++ b/arrow-array/benches/occupancy.rs @@ -19,7 +19,7 @@ use arrow_array::types::Int32Type; use arrow_array::{DictionaryArray, Int32Array}; use arrow_buffer::NullBuffer; use criterion::*; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; use std::sync::Arc; fn gen_dict( @@ -28,11 +28,11 @@ fn gen_dict( occupancy: f64, null_percent: f64, ) -> DictionaryArray { - let mut rng = thread_rng(); + let mut rng = rng(); let values = Int32Array::from(vec![0; values_len]); let max_key = (values_len as f64 * occupancy) as i32; - let keys = (0..len).map(|_| rng.gen_range(0..max_key)).collect(); - let nulls = (0..len).map(|_| !rng.gen_bool(null_percent)).collect(); + let keys = (0..len).map(|_| rng.random_range(0..max_key)).collect(); + let nulls = (0..len).map(|_| !rng.random_bool(null_percent)).collect(); let keys = Int32Array::new(keys, Some(NullBuffer::new(nulls))); DictionaryArray::new(keys, Arc::new(values)) diff --git a/arrow-array/benches/union_array.rs b/arrow-array/benches/union_array.rs index c5b2ec0f7752..753cc8148eef 100644 --- a/arrow-array/benches/union_array.rs +++ b/arrow-array/benches/union_array.rs @@ -24,17 +24,17 @@ use arrow_array::{Array, ArrayRef, Int32Array, UnionArray}; use arrow_buffer::{NullBuffer, ScalarBuffer}; use arrow_schema::{DataType, Field, UnionFields}; use criterion::*; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; fn array_with_nulls() -> ArrayRef { - let mut rng = thread_rng(); + let mut rng = rng(); - let values = ScalarBuffer::from_iter(repeat_with(|| rng.gen()).take(4096)); + let values = ScalarBuffer::from_iter(repeat_with(|| rng.random()).take(4096)); // nulls with at least one null and one valid let nulls: NullBuffer = [true, false] .into_iter() - .chain(repeat_with(|| rng.gen())) + .chain(repeat_with(|| rng.random())) .take(4096) .collect(); @@ -42,9 +42,9 @@ fn array_with_nulls() -> ArrayRef { } fn array_without_nulls() -> ArrayRef { - let mut rng = thread_rng(); + let mut rng = rng(); - let values = ScalarBuffer::from_iter(repeat_with(|| rng.gen()).take(4096)); + let values = ScalarBuffer::from_iter(repeat_with(|| rng.random()).take(4096)); Arc::new(Int32Array::new(values.clone(), None)) } diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index 9c2d4af8c454..fcebf5a0f718 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -479,7 +479,7 @@ impl From for BooleanArray { mod tests { use super::*; use arrow_buffer::Buffer; - use rand::{thread_rng, Rng}; + use rand::{rng, Rng}; #[test] fn test_boolean_fmt_debug() { @@ -667,11 +667,11 @@ mod tests { #[test] #[cfg_attr(miri, ignore)] // Takes too long fn test_true_false_count() { - let mut rng = thread_rng(); + let mut rng = rng(); for _ in 0..10 { // No nulls - let d: Vec<_> = (0..2000).map(|_| rng.gen_bool(0.5)).collect(); + let d: Vec<_> = (0..2000).map(|_| rng.random_bool(0.5)).collect(); let b = BooleanArray::from(d.clone()); let expected_true = d.iter().filter(|x| **x).count(); @@ -680,7 +680,7 @@ mod tests { // With nulls let d: Vec<_> = (0..2000) - .map(|_| rng.gen_bool(0.5).then(|| rng.gen_bool(0.5))) + .map(|_| rng.random_bool(0.5).then(|| rng.random_bool(0.5))) .collect(); let b = BooleanArray::from(d.clone()); diff --git a/arrow-array/src/array/run_array.rs b/arrow-array/src/array/run_array.rs index b340bf9a9065..6d909cbeb9a2 100644 --- a/arrow-array/src/array/run_array.rs +++ b/arrow-array/src/array/run_array.rs @@ -662,8 +662,8 @@ where #[cfg(test)] mod tests { + use rand::rng; use rand::seq::SliceRandom; - use rand::thread_rng; use rand::Rng; use super::*; @@ -691,7 +691,7 @@ mod tests { ]; let mut result: Vec> = Vec::with_capacity(size); let mut ix = 0; - let mut rng = thread_rng(); + let mut rng = rng(); // run length can go up to 8. Cap the max run length for smaller arrays to size / 2. let max_run_length = 8_usize.min(1_usize.max(size / 2)); while result.len() < size { @@ -700,7 +700,7 @@ mod tests { seed.shuffle(&mut rng); } // repeat the items between 1 and 8 times. Cap the length for smaller sized arrays - let num = max_run_length.min(rand::thread_rng().gen_range(1..=max_run_length)); + let num = max_run_length.min(rand::rng().random_range(1..=max_run_length)); for _ in 0..num { result.push(seed[ix]); } @@ -1000,7 +1000,7 @@ mod tests { let mut logical_indices: Vec = (0_u32..(logical_len as u32)).collect(); // add same indices once more logical_indices.append(&mut logical_indices.clone()); - let mut rng = thread_rng(); + let mut rng = rng(); logical_indices.shuffle(&mut rng); let physical_indices = run_array.get_physical_indices(&logical_indices).unwrap(); @@ -1036,7 +1036,7 @@ mod tests { let mut logical_indices: Vec = (0_u32..(slice_len as u32)).collect(); // add same indices once more logical_indices.append(&mut logical_indices.clone()); - let mut rng = thread_rng(); + let mut rng = rng(); logical_indices.shuffle(&mut rng); // test for offset = 0 and slice length = slice_len diff --git a/arrow-array/src/run_iterator.rs b/arrow-array/src/run_iterator.rs index 2922bf04dd2f..f758f749fcb5 100644 --- a/arrow-array/src/run_iterator.rs +++ b/arrow-array/src/run_iterator.rs @@ -172,7 +172,7 @@ where #[cfg(test)] mod tests { - use rand::{seq::SliceRandom, thread_rng, Rng}; + use rand::{rng, seq::SliceRandom, Rng}; use crate::{ array::{Int32Array, StringArray}, @@ -200,7 +200,7 @@ mod tests { ]; let mut result: Vec> = Vec::with_capacity(size); let mut ix = 0; - let mut rng = thread_rng(); + let mut rng = rng(); // run length can go up to 8. Cap the max run length for smaller arrays to size / 2. let max_run_length = 8_usize.min(1_usize.max(size / 2)); while result.len() < size { @@ -209,7 +209,7 @@ mod tests { seed.shuffle(&mut rng); } // repeat the items between 1 and 8 times. Cap the length for smaller sized arrays - let num = max_run_length.min(rand::thread_rng().gen_range(1..=max_run_length)); + let num = max_run_length.min(rand::rng().random_range(1..=max_run_length)); for _ in 0..num { result.push(seed[ix]); } diff --git a/arrow-avro/Cargo.toml b/arrow-avro/Cargo.toml index c103c2ecc0f3..b1391559f292 100644 --- a/arrow-avro/Cargo.toml +++ b/arrow-avro/Cargo.toml @@ -51,5 +51,4 @@ crc = { version = "3.0", optional = true } [dev-dependencies] -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } - +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } diff --git a/arrow-buffer/Cargo.toml b/arrow-buffer/Cargo.toml index 68bfe8ddf732..69029759dee1 100644 --- a/arrow-buffer/Cargo.toml +++ b/arrow-buffer/Cargo.toml @@ -40,7 +40,7 @@ half = { version = "2.1", default-features = false } [dev-dependencies] criterion = { version = "0.5", default-features = false } -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } [build-dependencies] diff --git a/arrow-buffer/benches/i256.rs b/arrow-buffer/benches/i256.rs index ebb45e793bd0..f94ca6951ef9 100644 --- a/arrow-buffer/benches/i256.rs +++ b/arrow-buffer/benches/i256.rs @@ -47,8 +47,8 @@ fn criterion_benchmark(c: &mut Criterion) { let numerators: Vec<_> = (0..SIZE) .map(|_| { - let high = rng.gen_range(1000..i128::MAX); - let low = rng.gen(); + let high = rng.random_range(1000..i128::MAX); + let low = rng.random(); i256::from_parts(low, high) }) .collect(); @@ -56,7 +56,7 @@ fn criterion_benchmark(c: &mut Criterion) { let divisors: Vec<_> = numerators .iter() .map(|n| { - let quotient = rng.gen_range(1..100_i32); + let quotient = rng.random_range(1..100_i32); n.wrapping_div(i256::from(quotient)) }) .collect(); @@ -70,7 +70,7 @@ fn criterion_benchmark(c: &mut Criterion) { }); let divisors: Vec<_> = (0..SIZE) - .map(|_| i256::from(rng.gen_range(1..100_i32))) + .map(|_| i256::from(rng.random_range(1..100_i32))) .collect(); c.bench_function("i256_div_rem small divisor", |b| { diff --git a/arrow-buffer/benches/offset.rs b/arrow-buffer/benches/offset.rs index 1aea5024fbd1..12c00a60c53a 100644 --- a/arrow-buffer/benches/offset.rs +++ b/arrow-buffer/benches/offset.rs @@ -24,7 +24,7 @@ const SIZE: usize = 1024; fn criterion_benchmark(c: &mut Criterion) { let mut rng = StdRng::seed_from_u64(42); - let lengths: Vec = black_box((0..SIZE).map(|_| rng.gen_range(0..40)).collect()); + let lengths: Vec = black_box((0..SIZE).map(|_| rng.random_range(0..40)).collect()); c.bench_function("OffsetBuffer::from_lengths", |b| { b.iter(|| OffsetBuffer::::from_lengths(lengths.iter().copied())); diff --git a/arrow-buffer/src/bigint/mod.rs b/arrow-buffer/src/bigint/mod.rs index f5fab75dc5ef..b4f2e2bddf54 100644 --- a/arrow-buffer/src/bigint/mod.rs +++ b/arrow-buffer/src/bigint/mod.rs @@ -840,7 +840,7 @@ impl ToPrimitive for i256 { mod tests { use super::*; use num::Signed; - use rand::{thread_rng, Rng}; + use rand::{rng, Rng}; #[test] fn test_signed_cmp() { @@ -1091,16 +1091,16 @@ mod tests { #[test] #[cfg_attr(miri, ignore)] fn test_i256_fuzz() { - let mut rng = thread_rng(); + let mut rng = rng(); for _ in 0..1000 { let mut l = [0_u8; 32]; - let len = rng.gen_range(0..32); - l.iter_mut().take(len).for_each(|x| *x = rng.gen()); + let len = rng.random_range(0..32); + l.iter_mut().take(len).for_each(|x| *x = rng.random()); let mut r = [0_u8; 32]; - let len = rng.gen_range(0..32); - r.iter_mut().take(len).for_each(|x| *x = rng.gen()); + let len = rng.random_range(0..32); + r.iter_mut().take(len).for_each(|x| *x = rng.random()); test_ops(i256::from_le_bytes(l), i256::from_le_bytes(r)) } diff --git a/arrow-buffer/src/builder/boolean.rs b/arrow-buffer/src/builder/boolean.rs index 0424e128d2db..da8fb06430e2 100644 --- a/arrow-buffer/src/builder/boolean.rs +++ b/arrow-buffer/src/builder/boolean.rs @@ -399,7 +399,7 @@ mod tests { let mut buffer = BooleanBufferBuilder::new(12); let mut all_bools = vec![]; - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let src_len = 32; let (src, compacted_src) = { diff --git a/arrow-buffer/src/util/bit_chunk_iterator.rs b/arrow-buffer/src/util/bit_chunk_iterator.rs index 54995314c49b..0562c7ad5998 100644 --- a/arrow-buffer/src/util/bit_chunk_iterator.rs +++ b/arrow-buffer/src/util/bit_chunk_iterator.rs @@ -371,7 +371,10 @@ impl ExactSizeIterator for BitChunkIterator<'_> { #[cfg(test)] mod tests { + use rand::distr::uniform::UniformSampler; + use rand::distr::uniform::UniformUsize; use rand::prelude::*; + use rand::rng; use crate::buffer::Buffer; use crate::util::bit_chunk_iterator::UnalignedBitChunk; @@ -624,21 +627,25 @@ mod tests { #[test] #[cfg_attr(miri, ignore)] fn fuzz_unaligned_bit_chunk_iterator() { - let mut rng = thread_rng(); + let mut rng = rng(); + let uusize = UniformUsize::new(usize::MIN, usize::MAX).unwrap(); for _ in 0..100 { - let mask_len = rng.gen_range(0..1024); - let bools: Vec<_> = std::iter::from_fn(|| Some(rng.gen())) + let mask_len = rng.random_range(0..1024); + let bools: Vec<_> = std::iter::from_fn(|| Some(rng.random())) .take(mask_len) .collect(); let buffer = Buffer::from_iter(bools.iter().cloned()); let max_offset = 64.min(mask_len); - let offset = rng.gen::().checked_rem(max_offset).unwrap_or(0); + let offset = uusize.sample(&mut rng).checked_rem(max_offset).unwrap_or(0); let max_truncate = 128.min(mask_len - offset); - let truncate = rng.gen::().checked_rem(max_truncate).unwrap_or(0); + let truncate = uusize + .sample(&mut rng) + .checked_rem(max_truncate) + .unwrap_or(0); let unaligned = UnalignedBitChunk::new(buffer.as_slice(), offset, mask_len - offset - truncate); diff --git a/arrow-buffer/src/util/bit_mask.rs b/arrow-buffer/src/util/bit_mask.rs index 97be7e006dec..0d694d13ec75 100644 --- a/arrow-buffer/src/util/bit_mask.rs +++ b/arrow-buffer/src/util/bit_mask.rs @@ -164,7 +164,7 @@ mod tests { use super::*; use crate::bit_util::{get_bit, set_bit, unset_bit}; use rand::prelude::StdRng; - use rand::{Fill, Rng, SeedableRng}; + use rand::{Rng, SeedableRng, TryRngCore}; use std::fmt::Display; #[test] @@ -322,20 +322,20 @@ mod tests { // -------------------+-----------------+------- // length of data to copy - let len = rng.gen_range(0..=200); + let len = rng.random_range(0..=200); // randomly pick where we will write to - let offset_write_bits = rng.gen_range(0..=200); + let offset_write_bits = rng.random_range(0..=200); let offset_write_bytes = if offset_write_bits % 8 == 0 { offset_write_bits / 8 } else { (offset_write_bits / 8) + 1 }; - let extra_write_data_bytes = rng.gen_range(0..=5); // ensure 0 shows up often + let extra_write_data_bytes = rng.random_range(0..=5); // ensure 0 shows up often // randomly decide where we will read from - let extra_read_data_bytes = rng.gen_range(0..=5); // make sure 0 shows up often - let offset_read_bits = rng.gen_range(0..=200); + let extra_read_data_bytes = rng.random_range(0..=5); // make sure 0 shows up often + let offset_read_bits = rng.random_range(0..=200); let offset_read_bytes = if offset_read_bits % 8 != 0 { (offset_read_bits / 8) + 1 } else { @@ -356,7 +356,7 @@ mod tests { self.data .resize(offset_read_bytes + len + extra_read_data_bytes, 0); // fill source data with random bytes - self.data.try_fill(rng).unwrap(); + rng.try_fill_bytes(self.data.as_mut_slice()).unwrap(); self.offset_read = offset_read_bits; self.len = len; diff --git a/arrow-buffer/src/util/bit_util.rs b/arrow-buffer/src/util/bit_util.rs index ed5d363d607f..f39cb69c314d 100644 --- a/arrow-buffer/src/util/bit_util.rs +++ b/arrow-buffer/src/util/bit_util.rs @@ -153,7 +153,7 @@ mod tests { let mut expected = vec![]; let mut rng = seedable_rng(); for i in 0..8 * NUM_BYTE { - let b = rng.gen_bool(0.5); + let b = rng.random_bool(0.5); expected.push(b); if b { set_bit(&mut buf[..], i) @@ -197,7 +197,7 @@ mod tests { let mut expected = vec![]; let mut rng = seedable_rng(); for i in 0..8 * NUM_BYTE { - let b = rng.gen_bool(0.5); + let b = rng.random_bool(0.5); expected.push(b); if b { unsafe { @@ -221,7 +221,7 @@ mod tests { let mut expected = vec![]; let mut rng = seedable_rng(); for i in 0..8 * NUM_BYTE { - let b = rng.gen_bool(0.5); + let b = rng.random_bool(0.5); expected.push(b); if !b { unsafe { @@ -247,7 +247,7 @@ mod tests { let mut v = HashSet::new(); let mut rng = seedable_rng(); for _ in 0..NUM_SETS { - let offset = rng.gen_range(0..8 * NUM_BYTES); + let offset = rng.random_range(0..8 * NUM_BYTES); v.insert(offset); set_bit(&mut buffer[..], offset); } diff --git a/arrow-cast/Cargo.toml b/arrow-cast/Cargo.toml index 4046f5226094..22e446ce313b 100644 --- a/arrow-cast/Cargo.toml +++ b/arrow-cast/Cargo.toml @@ -58,7 +58,7 @@ ryu = "1.0.16" [dev-dependencies] criterion = { version = "0.5", default-features = false } half = { version = "2.1", default-features = false } -rand = "0.8" +rand = "0.9" [build-dependencies] diff --git a/arrow-cast/src/base64.rs b/arrow-cast/src/base64.rs index 534b21878c56..e7bb84ebe24c 100644 --- a/arrow-cast/src/base64.rs +++ b/arrow-cast/src/base64.rs @@ -90,7 +90,7 @@ pub fn b64_decode( mod tests { use super::*; use arrow_array::BinaryArray; - use rand::{thread_rng, Rng}; + use rand::{rng, Rng}; fn test_engine(e: &E, a: &BinaryArray) { let encoded = b64_encode(e, a); @@ -105,12 +105,12 @@ mod tests { #[test] fn test_b64() { - let mut rng = thread_rng(); - let len = rng.gen_range(1024..1050); + let mut rng = rng(); + let len = rng.random_range(1024..1050); let data: BinaryArray = (0..len) .map(|_| { - let len = rng.gen_range(0..16); - Some((0..len).map(|_| rng.gen()).collect::>()) + let len = rng.random_range(0..16); + Some((0..len).map(|_| rng.random()).collect::>()) }) .collect(); diff --git a/arrow-json/Cargo.toml b/arrow-json/Cargo.toml index 564cb9433b3d..3a2eca2183c5 100644 --- a/arrow-json/Cargo.toml +++ b/arrow-json/Cargo.toml @@ -54,9 +54,8 @@ futures = "0.3" tokio = { version = "1.27", default-features = false, features = ["io-util"] } bytes = "1.4" criterion = { version = "0.5", default-features = false } -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } [[bench]] name = "serde" harness = false - diff --git a/arrow-json/benches/serde.rs b/arrow-json/benches/serde.rs index 7636b9c9dff9..7baaac458f86 100644 --- a/arrow-json/benches/serde.rs +++ b/arrow-json/benches/serde.rs @@ -18,7 +18,7 @@ use arrow_json::ReaderBuilder; use arrow_schema::{DataType, Field, Schema}; use criterion::*; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; use serde::Serialize; use std::sync::Arc; @@ -35,26 +35,28 @@ fn do_bench(c: &mut Criterion, name: &str, rows: &[R], schema: &Sc } fn criterion_benchmark(c: &mut Criterion) { - let mut rng = thread_rng(); + let mut rng = rng(); let schema = Schema::new(vec![Field::new("i32", DataType::Int32, false)]); - let v: Vec = (0..2048).map(|_| rng.gen_range(0..10000)).collect(); + let v: Vec = (0..2048).map(|_| rng.random_range(0..10000)).collect(); do_bench(c, "small_i32", &v, &schema); - let v: Vec = (0..2048).map(|_| rng.gen()).collect(); + let v: Vec = (0..2048).map(|_| rng.random()).collect(); do_bench(c, "large_i32", &v, &schema); let schema = Schema::new(vec![Field::new("i64", DataType::Int64, false)]); - let v: Vec = (0..2048).map(|_| rng.gen_range(0..10000)).collect(); + let v: Vec = (0..2048).map(|_| rng.random_range(0..10000)).collect(); do_bench(c, "small_i64", &v, &schema); - let v: Vec = (0..2048).map(|_| rng.gen_range(0..i32::MAX as _)).collect(); + let v: Vec = (0..2048) + .map(|_| rng.random_range(0..i32::MAX as _)) + .collect(); do_bench(c, "medium_i64", &v, &schema); - let v: Vec = (0..2048).map(|_| rng.gen()).collect(); + let v: Vec = (0..2048).map(|_| rng.random()).collect(); do_bench(c, "large_i64", &v, &schema); let schema = Schema::new(vec![Field::new("f32", DataType::Float32, false)]); - let v: Vec = (0..2048).map(|_| rng.gen_range(0.0..10000.)).collect(); + let v: Vec = (0..2048).map(|_| rng.random_range(0.0..10000.)).collect(); do_bench(c, "small_f32", &v, &schema); - let v: Vec = (0..2048).map(|_| rng.gen_range(0.0..f32::MAX)).collect(); + let v: Vec = (0..2048).map(|_| rng.random_range(0.0..f32::MAX)).collect(); do_bench(c, "large_f32", &v, &schema); } diff --git a/arrow-ord/Cargo.toml b/arrow-ord/Cargo.toml index 8d74d2f97d72..560b19b3f609 100644 --- a/arrow-ord/Cargo.toml +++ b/arrow-ord/Cargo.toml @@ -42,4 +42,4 @@ arrow-select = { workspace = true } [dev-dependencies] half = { version = "2.1", default-features = false, features = ["num-traits"] } -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng"] } diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs index fa5e2b8b2f7e..92ee93e1b656 100644 --- a/arrow-ord/src/sort.rs +++ b/arrow-ord/src/sort.rs @@ -4281,7 +4281,7 @@ mod tests { fn test_partial_rand_sort() { let size = 1000u32; let mut rng = StdRng::seed_from_u64(42); - let mut before: Vec = (0..size).map(|_| rng.gen::()).collect(); + let mut before: Vec = (0..size).map(|_| rng.random::()).collect(); let mut d = before.clone(); let last = (rng.next_u32() % size) as usize; d.sort_unstable(); diff --git a/arrow-row/Cargo.toml b/arrow-row/Cargo.toml index 90d99684d265..2e97abbbc582 100644 --- a/arrow-row/Cargo.toml +++ b/arrow-row/Cargo.toml @@ -44,7 +44,6 @@ half = { version = "2.1", default-features = false } [dev-dependencies] arrow-cast = { workspace = true } arrow-ord = { workspace = true } -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } [features] - diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs index d0fad12210db..0e075d587454 100644 --- a/arrow-row/src/lib.rs +++ b/arrow-row/src/lib.rs @@ -1433,9 +1433,9 @@ unsafe fn decode_column( #[cfg(test)] mod tests { - use rand::distributions::uniform::SampleUniform; - use rand::distributions::{Distribution, Standard}; - use rand::{thread_rng, Rng}; + use rand::distr::uniform::SampleUniform; + use rand::distr::{Distribution, StandardUniform}; + use rand::{rng, Rng}; use arrow_array::builder::*; use arrow_array::types::*; @@ -2193,11 +2193,11 @@ mod tests { fn generate_primitive_array(len: usize, valid_percent: f64) -> PrimitiveArray where K: ArrowPrimitiveType, - Standard: Distribution, + StandardUniform: Distribution, { - let mut rng = thread_rng(); + let mut rng = rng(); (0..len) - .map(|_| rng.gen_bool(valid_percent).then(|| rng.gen())) + .map(|_| rng.random_bool(valid_percent).then(|| rng.random())) .collect() } @@ -2205,12 +2205,12 @@ mod tests { len: usize, valid_percent: f64, ) -> GenericStringArray { - let mut rng = thread_rng(); + let mut rng = rng(); (0..len) .map(|_| { - rng.gen_bool(valid_percent).then(|| { - let len = rng.gen_range(0..100); - let bytes = (0..len).map(|_| rng.gen_range(0..128)).collect(); + rng.random_bool(valid_percent).then(|| { + let len = rng.random_range(0..100); + let bytes = (0..len).map(|_| rng.random_range(0..128)).collect(); String::from_utf8(bytes).unwrap() }) }) @@ -2218,12 +2218,12 @@ mod tests { } fn generate_string_view(len: usize, valid_percent: f64) -> StringViewArray { - let mut rng = thread_rng(); + let mut rng = rng(); (0..len) .map(|_| { - rng.gen_bool(valid_percent).then(|| { - let len = rng.gen_range(0..100); - let bytes = (0..len).map(|_| rng.gen_range(0..128)).collect(); + rng.random_bool(valid_percent).then(|| { + let len = rng.random_range(0..100); + let bytes = (0..len).map(|_| rng.random_range(0..128)).collect(); String::from_utf8(bytes).unwrap() }) }) @@ -2231,12 +2231,12 @@ mod tests { } fn generate_byte_view(len: usize, valid_percent: f64) -> BinaryViewArray { - let mut rng = thread_rng(); + let mut rng = rng(); (0..len) .map(|_| { - rng.gen_bool(valid_percent).then(|| { - let len = rng.gen_range(0..100); - let bytes: Vec<_> = (0..len).map(|_| rng.gen_range(0..128)).collect(); + rng.random_bool(valid_percent).then(|| { + let len = rng.random_range(0..100); + let bytes: Vec<_> = (0..len).map(|_| rng.random_range(0..128)).collect(); bytes }) }) @@ -2252,13 +2252,13 @@ mod tests { K: ArrowDictionaryKeyType, K::Native: SampleUniform, { - let mut rng = thread_rng(); + let mut rng = rng(); let min_key = K::Native::from_usize(0).unwrap(); let max_key = K::Native::from_usize(values.len()).unwrap(); let keys: PrimitiveArray = (0..len) .map(|_| { - rng.gen_bool(valid_percent) - .then(|| rng.gen_range(min_key..max_key)) + rng.random_bool(valid_percent) + .then(|| rng.random_range(min_key..max_key)) }) .collect(); @@ -2277,15 +2277,15 @@ mod tests { } fn generate_fixed_size_binary(len: usize, valid_percent: f64) -> FixedSizeBinaryArray { - let mut rng = thread_rng(); - let width = rng.gen_range(0..20); + let mut rng = rng(); + let width = rng.random_range(0..20); let mut builder = FixedSizeBinaryBuilder::new(width); let mut b = vec![0; width as usize]; for _ in 0..len { - match rng.gen_bool(valid_percent) { + match rng.random_bool(valid_percent) { true => { - b.iter_mut().for_each(|x| *x = rng.gen()); + b.iter_mut().for_each(|x| *x = rng.random()); builder.append_value(&b).unwrap(); } false => builder.append_null(), @@ -2296,8 +2296,8 @@ mod tests { } fn generate_struct(len: usize, valid_percent: f64) -> StructArray { - let mut rng = thread_rng(); - let nulls = NullBuffer::from_iter((0..len).map(|_| rng.gen_bool(valid_percent))); + let mut rng = rng(); + let nulls = NullBuffer::from_iter((0..len).map(|_| rng.random_bool(valid_percent))); let a = generate_primitive_array::(len, valid_percent); let b = generate_strings::(len, valid_percent); let fields = Fields::from(vec![ @@ -2312,18 +2312,18 @@ mod tests { where F: FnOnce(usize) -> ArrayRef, { - let mut rng = thread_rng(); - let offsets = OffsetBuffer::::from_lengths((0..len).map(|_| rng.gen_range(0..10))); + let mut rng = rng(); + let offsets = OffsetBuffer::::from_lengths((0..len).map(|_| rng.random_range(0..10))); let values_len = offsets.last().unwrap().to_usize().unwrap(); let values = values(values_len); - let nulls = NullBuffer::from_iter((0..len).map(|_| rng.gen_bool(valid_percent))); + let nulls = NullBuffer::from_iter((0..len).map(|_| rng.random_bool(valid_percent))); let field = Arc::new(Field::new_list_field(values.data_type().clone(), true)); ListArray::new(field, offsets, values, Some(nulls)) } fn generate_column(len: usize) -> ArrayRef { - let mut rng = thread_rng(); - match rng.gen_range(0..16) { + let mut rng = rng(); + match rng.random_range(0..16) { 0 => Arc::new(generate_primitive_array::(len, 0.8)), 1 => Arc::new(generate_primitive_array::(len, 0.8)), 2 => Arc::new(generate_primitive_array::(len, 0.8)), @@ -2333,14 +2333,14 @@ mod tests { 6 => Arc::new(generate_strings::(len, 0.8)), 7 => Arc::new(generate_dictionary::( // Cannot test dictionaries containing null values because of #2687 - Arc::new(generate_strings::(rng.gen_range(1..len), 1.0)), + Arc::new(generate_strings::(rng.random_range(1..len), 1.0)), len, 0.8, )), 8 => Arc::new(generate_dictionary::( // Cannot test dictionaries containing null values because of #2687 Arc::new(generate_primitive_array::( - rng.gen_range(1..len), + rng.random_range(1..len), 1.0, )), len, @@ -2390,15 +2390,15 @@ mod tests { #[cfg_attr(miri, ignore)] fn fuzz_test() { for _ in 0..100 { - let mut rng = thread_rng(); - let num_columns = rng.gen_range(1..5); - let len = rng.gen_range(5..100); + let mut rng = rng(); + let num_columns = rng.random_range(1..5); + let len = rng.random_range(5..100); let arrays: Vec<_> = (0..num_columns).map(|_| generate_column(len)).collect(); let options: Vec<_> = (0..num_columns) .map(|_| SortOptions { - descending: rng.gen_bool(0.5), - nulls_first: rng.gen_bool(0.5), + descending: rng.random_bool(0.5), + nulls_first: rng.random_bool(0.5), }) .collect(); diff --git a/arrow-select/Cargo.toml b/arrow-select/Cargo.toml index 023788799c94..0d412d15126b 100644 --- a/arrow-select/Cargo.toml +++ b/arrow-select/Cargo.toml @@ -45,4 +45,4 @@ ahash = { version = "0.8", default-features = false} default = [] [dev-dependencies] -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs index c91732848653..7bb140d37f51 100644 --- a/arrow-select/src/filter.rs +++ b/arrow-select/src/filter.rs @@ -864,8 +864,10 @@ mod tests { use arrow_array::builder::*; use arrow_array::cast::as_run_array; use arrow_array::types::*; - use rand::distributions::{Alphanumeric, Standard}; + use rand::distr::uniform::{UniformSampler, UniformUsize}; + use rand::distr::{Alphanumeric, StandardUniform}; use rand::prelude::*; + use rand::rng; use super::*; @@ -1475,9 +1477,9 @@ mod tests { } fn test_slices_fuzz(mask_len: usize, offset: usize, truncate: usize) { - let mut rng = thread_rng(); + let mut rng = rng(); - let bools: Vec = std::iter::from_fn(|| Some(rng.gen())) + let bools: Vec = std::iter::from_fn(|| Some(rng.random())) .take(mask_len) .collect(); @@ -1516,15 +1518,19 @@ mod tests { #[test] #[cfg_attr(miri, ignore)] fn fuzz_test_slices_iterator() { - let mut rng = thread_rng(); + let mut rng = rng(); + let uusize = UniformUsize::new(usize::MIN, usize::MAX).unwrap(); for _ in 0..100 { - let mask_len = rng.gen_range(0..1024); + let mask_len = rng.random_range(0..1024); let max_offset = 64.min(mask_len); - let offset = rng.gen::().checked_rem(max_offset).unwrap_or(0); + let offset = uusize.sample(&mut rng).checked_rem(max_offset).unwrap_or(0); let max_truncate = 128.min(mask_len - offset); - let truncate = rng.gen::().checked_rem(max_truncate).unwrap_or(0); + let truncate = uusize + .sample(&mut rng) + .checked_rem(max_truncate) + .unwrap_or(0); test_slices_fuzz(mask_len, offset, truncate); } @@ -1549,11 +1555,11 @@ mod tests { /// Generates an array of length `len` with `valid_percent` non-null values fn gen_primitive(len: usize, valid_percent: f64) -> Vec> where - Standard: Distribution, + StandardUniform: Distribution, { - let mut rng = thread_rng(); + let mut rng = rng(); (0..len) - .map(|_| rng.gen_bool(valid_percent).then(|| rng.gen())) + .map(|_| rng.random_bool(valid_percent).then(|| rng.random())) .collect() } @@ -1563,11 +1569,11 @@ mod tests { valid_percent: f64, str_len_range: std::ops::Range, ) -> Vec> { - let mut rng = thread_rng(); + let mut rng = rng(); (0..len) .map(|_| { - rng.gen_bool(valid_percent).then(|| { - let len = rng.gen_range(str_len_range.clone()); + rng.random_bool(valid_percent).then(|| { + let len = rng.random_range(str_len_range.clone()); (0..len) .map(|_| char::from(rng.sample(Alphanumeric))) .collect() @@ -1584,24 +1590,24 @@ mod tests { #[test] #[cfg_attr(miri, ignore)] fn fuzz_filter() { - let mut rng = thread_rng(); + let mut rng = rng(); for i in 0..100 { let filter_percent = match i { 0..=4 => 1., 5..=10 => 0., - _ => rng.gen_range(0.0..1.0), + _ => rng.random_range(0.0..1.0), }; - let valid_percent = rng.gen_range(0.0..1.0); + let valid_percent = rng.random_range(0.0..1.0); - let array_len = rng.gen_range(32..256); - let array_offset = rng.gen_range(0..10); + let array_len = rng.random_range(32..256); + let array_offset = rng.random_range(0..10); // Construct a predicate - let filter_offset = rng.gen_range(0..10); - let filter_truncate = rng.gen_range(0..10); - let bools: Vec<_> = std::iter::from_fn(|| Some(rng.gen_bool(filter_percent))) + let filter_offset = rng.random_range(0..10); + let filter_truncate = rng.random_range(0..10); + let bools: Vec<_> = std::iter::from_fn(|| Some(rng.random_bool(filter_percent))) .take(array_len + filter_offset - filter_truncate) .collect(); diff --git a/arrow-select/src/nullif.rs b/arrow-select/src/nullif.rs index 4b90114a4bbc..dc729da7e6c3 100644 --- a/arrow-select/src/nullif.rs +++ b/arrow-select/src/nullif.rs @@ -120,7 +120,7 @@ mod tests { use arrow_array::{Int32Array, NullArray, StringArray, StructArray}; use arrow_data::ArrayData; use arrow_schema::{Field, Fields}; - use rand::{thread_rng, Rng}; + use rand::{rng, Rng}; #[test] fn test_nullif_int_array() { @@ -497,11 +497,13 @@ mod tests { #[test] fn nullif_fuzz() { - let mut rng = thread_rng(); + let mut rng = rng(); let arrays = [ Int32Array::from(vec![0; 128]), - (0..128).map(|_| rng.gen_bool(0.5).then_some(0)).collect(), + (0..128) + .map(|_| rng.random_bool(0.5).then_some(0)) + .collect(), ]; for a in arrays { @@ -511,11 +513,11 @@ mod tests { let a = a.slice(a_offset, a_length); for i in 1..65 { - let b_start_offset = rng.gen_range(0..i); - let b_end_offset = rng.gen_range(0..i); + let b_start_offset = rng.random_range(0..i); + let b_end_offset = rng.random_range(0..i); let b: BooleanArray = (0..a_length + b_start_offset + b_end_offset) - .map(|_| rng.gen_bool(0.5).then(|| rng.gen_bool(0.5))) + .map(|_| rng.random_bool(0.5).then(|| rng.random_bool(0.5))) .collect(); let b = b.slice(b_start_offset, a_length); diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 88231b7f6160..f621e9d3bd6e 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -53,7 +53,7 @@ arrow-schema = { workspace = true } arrow-select = { workspace = true } arrow-string = { workspace = true } -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"], optional = true } pyo3 = { version = "0.23", default-features = false, optional = true } half = { version = "2.1", default-features = false, optional = true } @@ -86,7 +86,7 @@ canonical_extension_types = ["arrow-schema/canonical_extension_types"] chrono = { workspace = true } criterion = { version = "0.5", default-features = false } half = { version = "2.1", default-features = false } -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } serde = { version = "1.0", default-features = false, features = ["derive"] } # used in examples memmap2 = "0.9.3" diff --git a/arrow/benches/aggregate_kernels.rs b/arrow/benches/aggregate_kernels.rs index 6e224a48c4e6..25dbe3548496 100644 --- a/arrow/benches/aggregate_kernels.rs +++ b/arrow/benches/aggregate_kernels.rs @@ -18,7 +18,7 @@ #[macro_use] extern crate criterion; use criterion::{Criterion, Throughput}; -use rand::distributions::{Distribution, Standard}; +use rand::distr::{Distribution, StandardUniform}; extern crate arrow; @@ -31,7 +31,7 @@ const BATCH_SIZE: usize = 64 * 1024; fn primitive_benchmark(c: &mut Criterion, name: &str) where - Standard: Distribution, + StandardUniform: Distribution, { let nonnull_array = create_primitive_array::(BATCH_SIZE, 0.0); let nullable_array = create_primitive_array::(BATCH_SIZE, 0.5); diff --git a/arrow/benches/array_from_vec.rs b/arrow/benches/array_from_vec.rs index fd83ad5c2a10..c256d1523b06 100644 --- a/arrow/benches/array_from_vec.rs +++ b/arrow/benches/array_from_vec.rs @@ -99,10 +99,10 @@ fn decimal_benchmark(c: &mut Criterion) { // bench decimal128 array // create option array let size: usize = 1 << 15; - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let mut array = vec![]; for _ in 0..size { - array.push(Some(rng.gen_range::(0..9999999999))); + array.push(Some(rng.random_range::(0..9999999999))); } c.bench_function("decimal128_array_from_vec 32768", |b| { b.iter(|| decimal128_array_from_vec(array.as_slice())) @@ -112,9 +112,9 @@ fn decimal_benchmark(c: &mut Criterion) { // create option> array let size = 1 << 10; let mut array = vec![]; - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); for _ in 0..size { - let decimal = i256::from_i128(rng.gen_range::(0..9999999999999)); + let decimal = i256::from_i128(rng.random_range::(0..9999999999999)); array.push(Some(decimal)); } diff --git a/arrow/benches/boolean_append_packed.rs b/arrow/benches/boolean_append_packed.rs index 40873422dbd5..508720eb346f 100644 --- a/arrow/benches/boolean_append_packed.rs +++ b/arrow/benches/boolean_append_packed.rs @@ -17,22 +17,22 @@ use arrow::array::BooleanBufferBuilder; use criterion::{criterion_group, criterion_main, Criterion}; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; fn rand_bytes(len: usize) -> Vec { - let mut rng = thread_rng(); + let mut rng = rng(); let mut buf = vec![0_u8; len]; rng.fill(buf.as_mut_slice()); buf } fn boolean_append_packed(c: &mut Criterion) { - let mut rng = thread_rng(); + let mut rng = rng(); let source = rand_bytes(1024); let ranges: Vec<_> = (0..100) .map(|_| { - let start: usize = rng.gen_range(0..1024 * 8); - let end: usize = rng.gen_range(start..1024 * 8); + let start: usize = rng.random_range(0..1024 * 8); + let end: usize = rng.random_range(start..1024 * 8); start..end }) .collect(); diff --git a/arrow/benches/buffer_create.rs b/arrow/benches/buffer_create.rs index e7d24c2166d7..f6199ccab55c 100644 --- a/arrow/benches/buffer_create.rs +++ b/arrow/benches/buffer_create.rs @@ -19,7 +19,7 @@ extern crate criterion; use arrow::util::test_util::seedable_rng; use criterion::Criterion; -use rand::distributions::Uniform; +use rand::distr::Uniform; use rand::Rng; extern crate arrow; @@ -110,7 +110,7 @@ fn from_slice(data: &[Vec], capacity: usize) -> Buffer { fn create_data(size: usize) -> Vec> { let rng = &mut seedable_rng(); - let range = Uniform::new(0, 33); + let range = Uniform::new(0, 33).unwrap(); (0..size) .map(|_| { @@ -125,7 +125,7 @@ fn create_data(size: usize) -> Vec> { fn create_data_bool(size: usize) -> Vec> { let rng = &mut seedable_rng(); - let range = Uniform::new(0, 33); + let range = Uniform::new(0, 33).unwrap(); (0..size) .map(|_| { diff --git a/arrow/benches/builder.rs b/arrow/benches/builder.rs index 87a02e7ad1fd..2776924d8ee9 100644 --- a/arrow/benches/builder.rs +++ b/arrow/benches/builder.rs @@ -22,7 +22,7 @@ extern crate rand; use std::mem::size_of; use criterion::*; -use rand::distributions::Standard; +use rand::distr::StandardUniform; use arrow::array::*; use arrow::util::test_util::seedable_rng; @@ -68,7 +68,7 @@ fn bench_primitive_nulls(c: &mut Criterion) { fn bench_bool(c: &mut Criterion) { let data: Vec = seedable_rng() - .sample_iter(&Standard) + .sample_iter(&StandardUniform) .take(BATCH_SIZE) .collect(); let data_len = data.len(); @@ -110,10 +110,10 @@ fn bench_string(c: &mut Criterion) { fn bench_decimal128(c: &mut Criterion) { c.bench_function("bench_decimal128_builder", |b| { b.iter(|| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let mut decimal_builder = Decimal128Builder::with_capacity(BATCH_SIZE); for _ in 0..BATCH_SIZE { - decimal_builder.append_value(rng.gen_range::(0..9999999999)); + decimal_builder.append_value(rng.random_range::(0..9999999999)); } black_box( decimal_builder @@ -128,11 +128,11 @@ fn bench_decimal128(c: &mut Criterion) { fn bench_decimal256(c: &mut Criterion) { c.bench_function("bench_decimal128_builder", |b| { b.iter(|| { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let mut decimal_builder = Decimal256Builder::with_capacity(BATCH_SIZE); for _ in 0..BATCH_SIZE { decimal_builder - .append_value(i256::from_i128(rng.gen_range::(0..99999999999))); + .append_value(i256::from_i128(rng.random_range::(0..99999999999))); } black_box( decimal_builder diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs index 5c4fcff13dee..9c3063f20759 100644 --- a/arrow/benches/cast_kernels.rs +++ b/arrow/benches/cast_kernels.rs @@ -18,7 +18,7 @@ #[macro_use] extern crate criterion; use criterion::Criterion; -use rand::distributions::{Distribution, Standard, Uniform}; +use rand::distr::{Distribution, StandardUniform, Uniform}; use rand::Rng; use chrono::DateTime; @@ -34,7 +34,7 @@ use arrow::util::test_util::seedable_rng; fn build_array(size: usize) -> ArrayRef where - Standard: Distribution, + StandardUniform: Distribution, { let array = create_primitive_array::(size, 0.1); Arc::new(array) @@ -46,10 +46,10 @@ fn build_utf8_date_array(size: usize, with_nulls: bool) -> ArrayRef { // use random numbers to avoid spurious compiler optimizations wrt to branching let mut rng = seedable_rng(); let mut builder = StringBuilder::new(); - let range = Uniform::new(0, 737776); + let range = Uniform::new(0, 737776).unwrap(); for _ in 0..size { - if with_nulls && rng.gen::() > 0.8 { + if with_nulls && rng.random::() > 0.8 { builder.append_null(); } else { let string = NaiveDate::from_num_days_from_ce_opt(rng.sample(range)) @@ -66,10 +66,10 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef { // use random numbers to avoid spurious compiler optimizations wrt to branching let mut rng = seedable_rng(); let mut builder = StringBuilder::new(); - let range = Uniform::new(0, 1608071414123); + let range = Uniform::new(0, 1608071414123).unwrap(); for _ in 0..size { - if with_nulls && rng.gen::() > 0.8 { + if with_nulls && rng.random::() > 0.8 { builder.append_null(); } else { let string = DateTime::from_timestamp(rng.sample(range), 0) @@ -87,7 +87,7 @@ fn build_decimal128_array(size: usize, precision: u8, scale: i8) -> ArrayRef { let mut builder = Decimal128Builder::with_capacity(size); for _ in 0..size { - builder.append_value(rng.gen_range::(0..1000000000)); + builder.append_value(rng.random_range::(0..1000000000)); } Arc::new( builder @@ -102,7 +102,7 @@ fn build_decimal256_array(size: usize, precision: u8, scale: i8) -> ArrayRef { let mut builder = Decimal256Builder::with_capacity(size); let mut bytes = [0; 32]; for _ in 0..size { - let num = rng.gen_range::(0..1000000000); + let num = rng.random_range::(0..1000000000); bytes[0..16].clone_from_slice(&num.to_le_bytes()); builder.append_value(i256::from_le_bytes(bytes)); } diff --git a/arrow/benches/comparison_kernels.rs b/arrow/benches/comparison_kernels.rs index 4c4a63a775a7..84fd47acc1b2 100644 --- a/arrow/benches/comparison_kernels.rs +++ b/arrow/benches/comparison_kernels.rs @@ -72,8 +72,8 @@ fn bench_string_regexp_is_match_scalar(arr_a: &StringArray, value_b: &str) { fn make_string_array(size: usize, rng: &mut StdRng) -> impl Iterator> + '_ { (0..size).map(|_| { - let len = rng.gen_range(0..64); - let bytes = (0..len).map(|_| rng.gen_range(0..128)).collect(); + let len = rng.random_range(0..64); + let bytes = (0..len).map(|_| rng.random_range(0..128)).collect(); Some(String::from_utf8(bytes).unwrap()) }) } diff --git a/arrow/benches/csv_reader.rs b/arrow/benches/csv_reader.rs index 74a47ef892e0..331ff9edd5b9 100644 --- a/arrow/benches/csv_reader.rs +++ b/arrow/benches/csv_reader.rs @@ -61,45 +61,45 @@ fn criterion_benchmark(c: &mut Criterion) { let mut rng = seedable_rng(); // Single Primitive Column tests - let values = Int32Array::from_iter_values((0..4096).map(|_| rng.gen_range(0..1024))); + let values = Int32Array::from_iter_values((0..4096).map(|_| rng.random_range(0..1024))); let cols = vec![Arc::new(values) as ArrayRef]; do_bench(c, "4096 i32_small(0)", cols); - let values = Int32Array::from_iter_values((0..4096).map(|_| rng.gen())); + let values = Int32Array::from_iter_values((0..4096).map(|_| rng.random())); let cols = vec![Arc::new(values) as ArrayRef]; do_bench(c, "4096 i32(0)", cols); - let values = UInt64Array::from_iter_values((0..4096).map(|_| rng.gen_range(0..1024))); + let values = UInt64Array::from_iter_values((0..4096).map(|_| rng.random_range(0..1024))); let cols = vec![Arc::new(values) as ArrayRef]; do_bench(c, "4096 u64_small(0)", cols); - let values = UInt64Array::from_iter_values((0..4096).map(|_| rng.gen())); + let values = UInt64Array::from_iter_values((0..4096).map(|_| rng.random())); let cols = vec![Arc::new(values) as ArrayRef]; do_bench(c, "4096 u64(0)", cols); - let values = Int64Array::from_iter_values((0..4096).map(|_| rng.gen_range(0..1024) - 512)); + let values = Int64Array::from_iter_values((0..4096).map(|_| rng.random_range(0..1024) - 512)); let cols = vec![Arc::new(values) as ArrayRef]; do_bench(c, "4096 i64_small(0)", cols); - let values = Int64Array::from_iter_values((0..4096).map(|_| rng.gen())); + let values = Int64Array::from_iter_values((0..4096).map(|_| rng.random())); let cols = vec![Arc::new(values) as ArrayRef]; do_bench(c, "4096 i64(0)", cols); let cols = vec![Arc::new(Float32Array::from_iter_values( - (0..4096).map(|_| rng.gen_range(0..1024000) as f32 / 1000.), + (0..4096).map(|_| rng.random_range(0..1024000) as f32 / 1000.), )) as _]; do_bench(c, "4096 f32_small(0)", cols); - let values = Float32Array::from_iter_values((0..4096).map(|_| rng.gen())); + let values = Float32Array::from_iter_values((0..4096).map(|_| rng.random())); let cols = vec![Arc::new(values) as ArrayRef]; do_bench(c, "4096 f32(0)", cols); let cols = vec![Arc::new(Float64Array::from_iter_values( - (0..4096).map(|_| rng.gen_range(0..1024000) as f64 / 1000.), + (0..4096).map(|_| rng.random_range(0..1024000) as f64 / 1000.), )) as _]; do_bench(c, "4096 f64_small(0)", cols); - let values = Float64Array::from_iter_values((0..4096).map(|_| rng.gen())); + let values = Float64Array::from_iter_values((0..4096).map(|_| rng.random())); let cols = vec![Arc::new(values) as ArrayRef]; do_bench(c, "4096 f64(0)", cols); diff --git a/arrow/benches/decimal_validate.rs b/arrow/benches/decimal_validate.rs index be812a225ca2..dfa4f5992023 100644 --- a/arrow/benches/decimal_validate.rs +++ b/arrow/benches/decimal_validate.rs @@ -35,11 +35,11 @@ fn validate_decimal256_array(array: Decimal256Array) { } fn validate_decimal128_benchmark(c: &mut Criterion) { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let size: i128 = 20000; let mut decimal_builder = Decimal128Builder::with_capacity(size as usize); for _ in 0..size { - decimal_builder.append_value(rng.gen_range::(0..999999999999)); + decimal_builder.append_value(rng.random_range::(0..999999999999)); } let decimal_array = decimal_builder .finish() @@ -55,11 +55,11 @@ fn validate_decimal128_benchmark(c: &mut Criterion) { } fn validate_decimal256_benchmark(c: &mut Criterion) { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let size: i128 = 20000; let mut decimal_builder = Decimal256Builder::with_capacity(size as usize); for _ in 0..size { - let v = rng.gen_range::(0..999999999999999); + let v = rng.random_range::(0..999999999999999); let decimal = i256::from_i128(v); decimal_builder.append_value(decimal); } diff --git a/arrow/benches/interleave_kernels.rs b/arrow/benches/interleave_kernels.rs index 0941f1e3fd33..ed7ac12379d4 100644 --- a/arrow/benches/interleave_kernels.rs +++ b/arrow/benches/interleave_kernels.rs @@ -54,8 +54,8 @@ fn bench_values(c: &mut Criterion, name: &str, len: usize, values: &[&dyn Array] let mut rng = seedable_rng(); let indices: Vec<_> = (0..len) .map(|_| { - let array_idx = rng.gen_range(0..values.len()); - let value_idx = rng.gen_range(0..values[array_idx].len()); + let array_idx = rng.random_range(0..values.len()); + let value_idx = rng.random_range(0..values[array_idx].len()); (array_idx, value_idx) }) .collect(); diff --git a/arrow/benches/json_writer.rs b/arrow/benches/json_writer.rs index 48be0bccb462..ff76ecdd6253 100644 --- a/arrow/benches/json_writer.rs +++ b/arrow/benches/json_writer.rs @@ -61,7 +61,7 @@ fn create_mixed(len: usize) -> RecordBatch { fn create_nulls(len: usize) -> NullBuffer { let mut rng = seedable_rng(); - BooleanBuffer::from_iter((0..len).map(|_| rng.gen_bool(0.2))).into() + BooleanBuffer::from_iter((0..len).map(|_| rng.random_bool(0.2))).into() } fn create_offsets(len: usize) -> (usize, OffsetBuffer) { @@ -70,7 +70,7 @@ fn create_offsets(len: usize) -> (usize, OffsetBuffer) { let mut offsets = Vec::with_capacity(len + 1); offsets.push(0); for _ in 0..len { - let len = rng.gen_range(0..10); + let len = rng.random_range(0..10); offsets.push(last_offset + len); last_offset += len; } diff --git a/arrow/benches/mutable_array.rs b/arrow/benches/mutable_array.rs index b04e5cd84926..67591194ae6d 100644 --- a/arrow/benches/mutable_array.rs +++ b/arrow/benches/mutable_array.rs @@ -31,8 +31,8 @@ fn create_slices(size: usize) -> Vec<(usize, usize)> { (0..size) .map(|_| { - let start = rng.gen_range(0..size / 2); - let end = rng.gen_range(start + 1..size); + let start = rng.random_range(0..size / 2); + let end = rng.random_range(start + 1..size); (start, end) }) .collect() diff --git a/arrow/benches/partition_kernels.rs b/arrow/benches/partition_kernels.rs index fce8634a10a0..e6a067def292 100644 --- a/arrow/benches/partition_kernels.rs +++ b/arrow/benches/partition_kernels.rs @@ -27,12 +27,12 @@ use arrow::{ datatypes::{Float64Type, UInt8Type}, }; use arrow_ord::partition::partition; -use rand::distributions::{Distribution, Standard}; +use rand::distr::{Distribution, StandardUniform}; use std::iter; fn create_array(size: usize, with_nulls: bool) -> ArrayRef where - Standard: Distribution, + StandardUniform: Distribution, { let null_density = if with_nulls { 0.5 } else { 0.0 }; let array = create_primitive_array::(size, null_density); diff --git a/arrow/benches/primitive_run_take.rs b/arrow/benches/primitive_run_take.rs index c10c16bfee3a..cabf9c118f97 100644 --- a/arrow/benches/primitive_run_take.rs +++ b/arrow/benches/primitive_run_take.rs @@ -28,10 +28,10 @@ fn create_random_index(size: usize, null_density: f32, max_value: usize) -> UInt let mut rng = seedable_rng(); let mut builder = UInt32Builder::with_capacity(size); for _ in 0..size { - if rng.gen::() < null_density { + if rng.random::() < null_density { builder.append_null(); } else { - let value = rng.gen_range::(0u32..max_value as u32); + let value = rng.random_range::(0u32..max_value as u32); builder.append_value(value); } } diff --git a/arrow/benches/string_dictionary_builder.rs b/arrow/benches/string_dictionary_builder.rs index 424400674cd8..a39fd5d03847 100644 --- a/arrow/benches/string_dictionary_builder.rs +++ b/arrow/benches/string_dictionary_builder.rs @@ -18,17 +18,17 @@ use arrow::array::StringDictionaryBuilder; use arrow::datatypes::Int32Type; use criterion::{criterion_group, criterion_main, Criterion}; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; /// Note: this is best effort, not all keys are necessarily present or unique fn build_strings(dict_size: usize, total_size: usize, key_len: usize) -> Vec { - let mut rng = thread_rng(); + let mut rng = rng(); let values: Vec = (0..dict_size) - .map(|_| (0..key_len).map(|_| rng.gen::()).collect()) + .map(|_| (0..key_len).map(|_| rng.random::()).collect()) .collect(); (0..total_size) - .map(|_| values[rng.gen_range(0..dict_size)].clone()) + .map(|_| values[rng.random_range(0..dict_size)].clone()) .collect() } diff --git a/arrow/benches/string_run_iterator.rs b/arrow/benches/string_run_iterator.rs index ac5cf7838408..32088573dc25 100644 --- a/arrow/benches/string_run_iterator.rs +++ b/arrow/benches/string_run_iterator.rs @@ -18,17 +18,17 @@ use arrow::array::{Int32RunArray, StringArray, StringRunBuilder}; use arrow::datatypes::Int32Type; use criterion::{criterion_group, criterion_main, Criterion}; -use rand::{thread_rng, Rng}; +use rand::{rng, Rng}; fn build_strings_runs( physical_array_len: usize, logical_array_len: usize, string_len: usize, ) -> Int32RunArray { - let mut rng = thread_rng(); + let mut rng = rng(); let run_len = logical_array_len / physical_array_len; let mut values: Vec = (0..physical_array_len) - .map(|_| (0..string_len).map(|_| rng.gen::()).collect()) + .map(|_| (0..string_len).map(|_| rng.random::()).collect()) .flat_map(|s| std::iter::repeat(s).take(run_len)) .collect(); while values.len() < logical_array_len { diff --git a/arrow/benches/take_kernels.rs b/arrow/benches/take_kernels.rs index 77ec54c97bc5..a09064839f8a 100644 --- a/arrow/benches/take_kernels.rs +++ b/arrow/benches/take_kernels.rs @@ -32,10 +32,10 @@ fn create_random_index(size: usize, null_density: f32) -> UInt32Array { let mut rng = seedable_rng(); let mut builder = UInt32Builder::with_capacity(size); for _ in 0..size { - if rng.gen::() < null_density { + if rng.random::() < null_density { builder.append_null(); } else { - let value = rng.gen_range::(0u32..size as u32); + let value = rng.random_range::(0u32..size as u32); builder.append_value(value); } } diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs index 53e01034122b..387d9b973a9c 100644 --- a/arrow/src/util/bench_util.rs +++ b/arrow/src/util/bench_util.rs @@ -22,12 +22,12 @@ use crate::datatypes::*; use crate::util::test_util::seedable_rng; use arrow_buffer::{Buffer, IntervalMonthDayNano}; use half::f16; -use rand::distributions::uniform::SampleUniform; -use rand::thread_rng; +use rand::distr::uniform::SampleUniform; +use rand::rng; use rand::Rng; use rand::SeedableRng; use rand::{ - distributions::{Alphanumeric, Distribution, Standard}, + distr::{Alphanumeric, Distribution, StandardUniform}, prelude::StdRng, }; use std::ops::Range; @@ -36,16 +36,16 @@ use std::ops::Range; pub fn create_primitive_array(size: usize, null_density: f32) -> PrimitiveArray where T: ArrowPrimitiveType, - Standard: Distribution, + StandardUniform: Distribution, { let mut rng = seedable_rng(); (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { - Some(rng.gen()) + Some(rng.random()) } }) .collect() @@ -60,16 +60,16 @@ pub fn create_primitive_array_with_seed( ) -> PrimitiveArray where T: ArrowPrimitiveType, - Standard: Distribution, + StandardUniform: Distribution, { let mut rng = StdRng::seed_from_u64(seed); (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { - Some(rng.gen()) + Some(rng.random()) } }) .collect() @@ -86,10 +86,14 @@ pub fn create_month_day_nano_array_with_seed( (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { - Some(IntervalMonthDayNano::new(rng.gen(), rng.gen(), rng.gen())) + Some(IntervalMonthDayNano::new( + rng.random(), + rng.random(), + rng.random(), + )) } }) .collect() @@ -98,15 +102,15 @@ pub fn create_month_day_nano_array_with_seed( /// Creates a random (but fixed-seeded) array of a given size and null density pub fn create_boolean_array(size: usize, null_density: f32, true_density: f32) -> BooleanArray where - Standard: Distribution, + StandardUniform: Distribution, { let mut rng = seedable_rng(); (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { - let value = rng.gen::() < true_density; + let value = rng.random::() < true_density; Some(value) } }) @@ -134,10 +138,10 @@ fn create_string_array_with_max_len( let rng = &mut seedable_rng(); (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { - let str_len = rng.gen_range(0..max_str_len); + let str_len = rng.random_range(0..max_str_len); let value = rng.sample_iter(&Alphanumeric).take(str_len).collect(); let value = String::from_utf8(value).unwrap(); Some(value) @@ -156,7 +160,7 @@ pub fn create_string_array_with_len( (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { let value = rng.sample_iter(&Alphanumeric).take(str_len).collect(); @@ -183,10 +187,10 @@ fn create_string_view_array_with_max_len( let rng = &mut seedable_rng(); (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { - let str_len = rng.gen_range(0..max_str_len); + let str_len = rng.random_range(0..max_str_len); let value = rng.sample_iter(&Alphanumeric).take(str_len).collect(); let value = String::from_utf8(value).unwrap(); Some(value) @@ -209,10 +213,10 @@ pub fn create_string_view_array_with_len( // if mixed, we creates first half that string length small than 12 bytes and second half large than 12 bytes if mixed { for _ in 0..size / 2 { - lengths.push(rng.gen_range(1..12)); + lengths.push(rng.random_range(1..12)); } for _ in size / 2..size { - lengths.push(rng.gen_range(12..=std::cmp::max(30, str_len))); + lengths.push(rng.random_range(12..=std::cmp::max(30, str_len))); } } else { lengths.resize(size, str_len); @@ -221,7 +225,7 @@ pub fn create_string_view_array_with_len( lengths .into_iter() .map(|len| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { let value: Vec = rng.sample_iter(&Alphanumeric).take(len).collect(); @@ -242,7 +246,7 @@ pub fn create_string_dict_array( let data: Vec<_> = (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { let value = rng.sample_iter(&Alphanumeric).take(str_len).collect(); @@ -296,7 +300,7 @@ pub fn create_string_array_for_runs( string_len: usize, ) -> Vec { assert!(logical_array_len >= physical_array_len); - let mut rng = thread_rng(); + let mut rng = rng(); // typical length of each run let run_len = logical_array_len / physical_array_len; @@ -305,7 +309,7 @@ pub fn create_string_array_for_runs( let mut run_len_extra = logical_array_len % physical_array_len; let mut values: Vec = (0..physical_array_len) - .map(|_| (0..string_len).map(|_| rng.gen::()).collect()) + .map(|_| (0..string_len).map(|_| rng.random::()).collect()) .flat_map(|s| { let mut take_len = run_len; if run_len_extra > 0 { @@ -332,12 +336,12 @@ pub fn create_binary_array( (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { let value = rng - .sample_iter::(Standard) - .take(range_rng.gen_range(0..8)) + .sample_iter::(StandardUniform) + .take(range_rng.random_range(0..8)) .collect::>(); Some(value) } @@ -351,11 +355,11 @@ pub fn create_fsb_array(size: usize, null_density: f32, value_len: usize) -> Fix FixedSizeBinaryArray::try_from_sparse_iter_with_size( (0..size).map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { let value = rng - .sample_iter::(Standard) + .sample_iter::(StandardUniform) .take(value_len) .collect::>(); Some(value) @@ -375,7 +379,7 @@ pub fn create_dict_from_values( ) -> DictionaryArray where K: ArrowDictionaryKeyType, - Standard: Distribution, + StandardUniform: Distribution, K::Native: SampleUniform, { let min_key = K::Native::from_usize(0).unwrap(); @@ -393,7 +397,7 @@ pub fn create_sparse_dict_from_values( ) -> DictionaryArray where K: ArrowDictionaryKeyType, - Standard: Distribution, + StandardUniform: Distribution, K::Native: SampleUniform, { let mut rng = seedable_rng(); @@ -401,11 +405,14 @@ where DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone())); let keys: Buffer = (0..size) - .map(|_| rng.gen_range(key_range.clone())) + .map(|_| rng.random_range(key_range.clone())) .collect(); - let nulls: Option = - (null_density != 0.).then(|| (0..size).map(|_| rng.gen_bool(null_density as _)).collect()); + let nulls: Option = (null_density != 0.).then(|| { + (0..size) + .map(|_| rng.random_bool(null_density as _)) + .collect() + }); let data = ArrayDataBuilder::new(data_type) .len(size) @@ -424,10 +431,10 @@ pub fn create_f16_array(size: usize, nan_density: f32) -> Float16Array { (0..size) .map(|_| { - if rng.gen::() < nan_density { + if rng.random::() < nan_density { Some(f16::NAN) } else { - Some(f16::from_f32(rng.gen())) + Some(f16::from_f32(rng.random())) } }) .collect() @@ -439,10 +446,10 @@ pub fn create_f32_array(size: usize, nan_density: f32) -> Float32Array { (0..size) .map(|_| { - if rng.gen::() < nan_density { + if rng.random::() < nan_density { Some(f32::NAN) } else { - Some(rng.gen()) + Some(rng.random()) } }) .collect() @@ -454,10 +461,10 @@ pub fn create_f64_array(size: usize, nan_density: f32) -> Float64Array { (0..size) .map(|_| { - if rng.gen::() < nan_density { + if rng.random::() < nan_density { Some(f64::NAN) } else { - Some(rng.gen()) + Some(rng.random()) } }) .collect() diff --git a/arrow/src/util/data_gen.rs b/arrow/src/util/data_gen.rs index 5f63812e51c0..ee5350363289 100644 --- a/arrow/src/util/data_gen.rs +++ b/arrow/src/util/data_gen.rs @@ -19,8 +19,10 @@ use std::sync::Arc; -use rand::distributions::uniform::SampleRange; -use rand::{distributions::uniform::SampleUniform, Rng}; +use rand::{ + distr::uniform::{SampleRange, SampleUniform}, + Rng, +}; use crate::array::*; use crate::error::{ArrowError, Result}; @@ -370,7 +372,7 @@ fn create_random_offsets( offsets.push(current_offset); (0..size).for_each(|_| { - current_offset += rng.gen_range(min..max); + current_offset += rng.random_range(min..max); offsets.push(current_offset); }); @@ -383,7 +385,7 @@ fn create_random_null_buffer(size: usize, null_density: f32) -> Buffer { { let mut_slice = mut_buf.as_slice_mut(); (0..size).for_each(|i| { - if rng.gen::() >= null_density { + if rng.random::() >= null_density { bit_util::set_bit(mut_slice, i) } }) @@ -402,7 +404,7 @@ pub trait RandomTemporalValue: ArrowTemporalType { where Self::Native: SampleUniform, { - rng.gen_range(Self::value_range()) + rng.random_range(Self::value_range()) } /// Generate a random value of the type @@ -503,7 +505,7 @@ where (0..size) .map(|_| { - if rng.gen::() < null_density { + if rng.random::() < null_density { None } else { Some(T::random(&mut rng)) diff --git a/arrow/src/util/test_util.rs b/arrow/src/util/test_util.rs index 2d718d392baf..566ccc6ab536 100644 --- a/arrow/src/util/test_util.rs +++ b/arrow/src/util/test_util.rs @@ -25,7 +25,7 @@ pub fn random_bytes(n: usize) -> Vec { let mut result = vec![]; let mut rng = seedable_rng(); for _ in 0..n { - result.push(rng.gen_range(0..255)); + result.push(rng.random_range(0..255)); } result } diff --git a/object_store/Cargo.toml b/object_store/Cargo.toml index 992ae6662cdb..168d2eb6ae39 100644 --- a/object_store/Cargo.toml +++ b/object_store/Cargo.toml @@ -49,7 +49,7 @@ hyper = { version = "1.2", default-features = false, optional = true } quick-xml = { version = "0.37.0", features = ["serialize", "overlapped-lists"], optional = true } serde = { version = "1.0", default-features = false, features = ["derive"], optional = true } serde_json = { version = "1.0", default-features = false, optional = true } -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"], optional = true } reqwest = { version = "0.12", default-features = false, features = ["rustls-tls-native-roots", "http2"], optional = true } ring = { version = "0.17", default-features = false, features = ["std"], optional = true } rustls-pemfile = { version = "2.0", default-features = false, features = ["std"], optional = true } @@ -76,7 +76,7 @@ futures-test = "0.3" hyper = { version = "1.2", features = ["server"] } hyper-util = "0.1" http-body-util = "0.1" -rand = "0.8" +rand = "0.9" tempfile = "3.1.0" regex = "1.11.1" # The "gzip" feature for reqwest is enabled for an integration test. diff --git a/object_store/src/aws/dynamo.rs b/object_store/src/aws/dynamo.rs index 6283e76c1f87..a66a343be75b 100644 --- a/object_store/src/aws/dynamo.rs +++ b/object_store/src/aws/dynamo.rs @@ -527,8 +527,8 @@ mod tests { use super::*; use crate::aws::AmazonS3; use crate::ObjectStore; - use rand::distributions::Alphanumeric; - use rand::{thread_rng, Rng}; + use rand::distr::Alphanumeric; + use rand::{rng, Rng}; #[test] fn test_attribute_serde() { @@ -571,7 +571,7 @@ mod tests { _ => panic!("Should conflict"), } - let rng = thread_rng(); + let rng = rng(); let etag = String::from_utf8(rng.sample_iter(Alphanumeric).take(32).collect()).unwrap(); let t = Some(etag.as_str()); diff --git a/object_store/src/azure/client.rs b/object_store/src/azure/client.rs index 2c2e27ea4179..7195729af327 100644 --- a/object_store/src/azure/client.rs +++ b/object_store/src/azure/client.rs @@ -561,7 +561,7 @@ impl AzureClient { _part_idx: usize, payload: PutPayload, ) -> Result { - let part_idx = u128::from_be_bytes(rand::thread_rng().gen()); + let part_idx = u128::from_be_bytes(rand::rng().random()); let content_id = format!("{part_idx:032x}"); let block_id = BASE64_STANDARD.encode(&content_id); diff --git a/object_store/src/client/backoff.rs b/object_store/src/client/backoff.rs index 8382a2e16110..8193e8bca424 100644 --- a/object_store/src/client/backoff.rs +++ b/object_store/src/client/backoff.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use rand::prelude::*; +use rand::{prelude::*, rng}; use std::time::Duration; /// Exponential backoff with decorrelated jitter algorithm @@ -78,7 +78,7 @@ impl Backoff { /// Creates a new `Backoff` with the optional `rng` /// - /// Used [`rand::thread_rng()`] if no rng provided + /// Used [`rand::rng()`] if no rng provided pub(crate) fn new_with_rng( config: &BackoffConfig, rng: Option>, @@ -98,8 +98,8 @@ impl Backoff { let range = self.init_backoff..(self.next_backoff_secs * self.base); let rand_backoff = match self.rng.as_mut() { - Some(rng) => rng.gen_range(range), - None => thread_rng().gen_range(range), + Some(rng) => rng.random_range(range), + None => rng().random_range(range), }; let next_backoff = self.max_backoff_secs.min(rand_backoff); diff --git a/object_store/src/integration.rs b/object_store/src/integration.rs index 25a929459ef9..5f9a92b06db3 100644 --- a/object_store/src/integration.rs +++ b/object_store/src/integration.rs @@ -35,8 +35,8 @@ use crate::{ use bytes::Bytes; use futures::stream::FuturesUnordered; use futures::{StreamExt, TryStreamExt}; -use rand::distributions::Alphanumeric; -use rand::{thread_rng, Rng}; +use rand::distr::Alphanumeric; +use rand::{rng, Rng}; pub(crate) async fn flatten_list_stream( storage: &DynObjectStore, @@ -633,7 +633,7 @@ pub async fn put_opts(storage: &dyn ObjectStore, supports_update: bool) { // As a result each conditional operation will need to wait for the lease to timeout before proceeding // One solution would be to clear DynamoDB before each test, but this would require non-trivial additional code // so we instead just generate a random suffix for the filenames - let rng = thread_rng(); + let rng = rng(); let suffix = String::from_utf8(rng.sample_iter(Alphanumeric).take(32).collect()).unwrap(); delete_fixtures(storage).await; @@ -742,10 +742,10 @@ pub async fn put_opts(storage: &dyn ObjectStore, supports_update: bool) { /// Returns a chunk of length `chunk_length` fn get_chunk(chunk_length: usize) -> Bytes { let mut data = vec![0_u8; chunk_length]; - let mut rng = thread_rng(); + let mut rng = rng(); // Set a random selection of bytes for _ in 0..1000 { - data[rng.gen_range(0..chunk_length)] = rng.gen(); + data[rng.random_range(0..chunk_length)] = rng.random(); } data.into() } diff --git a/object_store/src/upload.rs b/object_store/src/upload.rs index 4df4d8fd46ad..af5975a74428 100644 --- a/object_store/src/upload.rs +++ b/object_store/src/upload.rs @@ -312,11 +312,11 @@ mod tests { let mut expected = Vec::with_capacity(1024); for _ in 0..50 { - let chunk_size = rng.gen_range(0..30); - let data: Vec<_> = (0..chunk_size).map(|_| rng.gen()).collect(); + let chunk_size = rng.random_range(0..30); + let data: Vec<_> = (0..chunk_size).map(|_| rng.random()).collect(); expected.extend_from_slice(&data); - match rng.gen_bool(method) { + match rng.random_bool(method) { true => write.put(data.into()), false => write.write(&data), } diff --git a/object_store/src/util.rs b/object_store/src/util.rs index 17a7a8cad4c9..f46c959ba575 100644 --- a/object_store/src/util.rs +++ b/object_store/src/util.rs @@ -329,7 +329,7 @@ mod tests { use crate::Error; use super::*; - use rand::{thread_rng, Rng}; + use rand::{rng, Rng}; use std::ops::Range; /// Calls coalesce_ranges and validates the returned data is correct @@ -395,20 +395,20 @@ mod tests { #[tokio::test] async fn test_coalesce_fuzz() { - let mut rand = thread_rng(); + let mut rand = rng(); for _ in 0..100 { - let object_len = rand.gen_range(10..250); - let range_count = rand.gen_range(0..10); + let object_len = rand.random_range(10..250); + let range_count = rand.random_range(0..10); let ranges: Vec<_> = (0..range_count) .map(|_| { - let start = rand.gen_range(0..object_len); + let start = rand.random_range(0..object_len); let max_len = 20.min(object_len - start); - let len = rand.gen_range(0..max_len); + let len = rand.random_range(0..max_len); start..start + len }) .collect(); - let coalesce = rand.gen_range(1..5); + let coalesce = rand.random_range(1..5); let fetches = do_fetch(ranges.clone(), coalesce).await; for fetch in fetches.windows(2) { diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml index 00d4c5b750f8..75cd867ede54 100644 --- a/parquet/Cargo.toml +++ b/parquet/Cargo.toml @@ -83,7 +83,7 @@ zstd = { version = "0.13", default-features = false } serde_json = { version = "1.0", features = ["std"], default-features = false } arrow = { workspace = true, features = ["ipc", "test_utils", "prettyprint", "json"] } tokio = { version = "1.0", default-features = false, features = ["macros", "rt-multi-thread", "io-util", "fs"] } -rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } +rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } object_store = { version = "0.11.0", default-features = false, features = ["azure"] } # TODO: temporary to fix parquet wasm build diff --git a/parquet/benches/arrow_reader.rs b/parquet/benches/arrow_reader.rs index e5165fee212c..0e887c31c594 100644 --- a/parquet/benches/arrow_reader.rs +++ b/parquet/benches/arrow_reader.rs @@ -37,7 +37,7 @@ use parquet::{ data_type::{ByteArrayType, Int32Type, Int64Type}, schema::types::{ColumnDescPtr, SchemaDescPtr}, }; -use rand::distributions::uniform::SampleUniform; +use rand::distr::uniform::SampleUniform; use rand::{rngs::StdRng, Rng, SeedableRng}; use std::{collections::VecDeque, sync::Arc}; @@ -119,14 +119,14 @@ where let mut values = Vec::with_capacity(VALUES_PER_PAGE); let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE); for _k in 0..VALUES_PER_PAGE { - let def_level = if rng.gen::() < null_density { + let def_level = if rng.random::() < null_density { max_def_level - 1 } else { max_def_level }; if def_level == max_def_level { // create the Float16 value - let value = f16::from_f32(rng.gen_range(min..max)); + let value = f16::from_f32(rng.random_range(min..max)); // Float16 in parquet is stored little-endian let bytes = match column_desc.physical_type() { Type::FIXED_LEN_BYTE_ARRAY => { @@ -177,14 +177,14 @@ where let mut values = Vec::with_capacity(VALUES_PER_PAGE); let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE); for _k in 0..VALUES_PER_PAGE { - let def_level = if rng.gen::() < null_density { + let def_level = if rng.random::() < null_density { max_def_level - 1 } else { max_def_level }; if def_level == max_def_level { // create the decimal value - let value = rng.gen_range(min..max); + let value = rng.random_range(min..max); // decimal of parquet use the big-endian to store let bytes = match column_desc.physical_type() { Type::BYTE_ARRAY => { @@ -235,14 +235,14 @@ fn build_encoded_flba_bytes_page_iterator( let mut values = Vec::with_capacity(VALUES_PER_PAGE); let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE); for _k in 0..VALUES_PER_PAGE { - let def_level = if rng.gen::() < null_density { + let def_level = if rng.random::() < null_density { max_def_level - 1 } else { max_def_level }; if def_level == max_def_level { // create the FLBA(BYTE_LENGTH) value - let value = (0..BYTE_LENGTH).map(|_| rng.gen()).collect::>(); + let value = (0..BYTE_LENGTH).map(|_| rng.random()).collect::>(); let value = ::T::from(value); values.push(value); @@ -284,13 +284,13 @@ where let mut values = Vec::with_capacity(VALUES_PER_PAGE); let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE); for _k in 0..VALUES_PER_PAGE { - let def_level = if rng.gen::() < null_density { + let def_level = if rng.random::() < null_density { max_def_level - 1 } else { max_def_level }; if def_level == max_def_level { - let value = FromPrimitive::from_usize(rng.gen_range(min..max)).unwrap(); + let value = FromPrimitive::from_usize(rng.random_range(min..max)).unwrap(); values.push(value); } def_levels.push(def_level); @@ -336,14 +336,14 @@ where let mut values = Vec::with_capacity(VALUES_PER_PAGE); let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE); for _k in 0..VALUES_PER_PAGE { - let def_level = if rng.gen::() < null_density { + let def_level = if rng.random::() < null_density { max_def_level - 1 } else { max_def_level }; if def_level == max_def_level { // select random value from list of unique values - let value = unique_values[rng.gen_range(0..NUM_UNIQUE_VALUES)]; + let value = unique_values[rng.random_range(0..NUM_UNIQUE_VALUES)]; values.push(value); } def_levels.push(def_level); @@ -393,7 +393,7 @@ fn build_plain_encoded_byte_array_page_iterator_inner( let mut values = Vec::with_capacity(VALUES_PER_PAGE); let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE); for k in 0..VALUES_PER_PAGE { - let def_level = if rng.gen::() < null_density { + let def_level = if rng.random::() < null_density { max_def_level - 1 } else { max_def_level @@ -452,14 +452,15 @@ fn build_dictionary_encoded_string_page_iterator( let mut values = Vec::with_capacity(VALUES_PER_PAGE); let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE); for _k in 0..VALUES_PER_PAGE { - let def_level = if rng.gen::() < null_density { + let def_level = if rng.random::() < null_density { max_def_level - 1 } else { max_def_level }; if def_level == max_def_level { // select random value from list of unique values - let string_value = unique_values[rng.gen_range(0..NUM_UNIQUE_VALUES)].as_str(); + let string_value = + unique_values[rng.random_range(0..NUM_UNIQUE_VALUES)].as_str(); values.push(parquet::data_type::ByteArray::from(string_value)); } def_levels.push(def_level); @@ -512,12 +513,12 @@ fn build_string_list_page_iterator( let mut rep_levels = Vec::with_capacity(VALUES_PER_PAGE * MAX_LIST_LEN); for k in 0..VALUES_PER_PAGE { rep_levels.push(0); - if rng.gen::() < null_density { + if rng.random::() < null_density { // Null list def_levels.push(0); continue; } - let len = rng.gen_range(0..MAX_LIST_LEN); + let len = rng.random_range(0..MAX_LIST_LEN); if len == 0 { // Empty list def_levels.push(1); @@ -527,7 +528,7 @@ fn build_string_list_page_iterator( (1..len).for_each(|_| rep_levels.push(1)); for l in 0..len { - if rng.gen::() < null_density { + if rng.random::() < null_density { // Null element def_levels.push(2); } else { diff --git a/parquet/benches/compression.rs b/parquet/benches/compression.rs index 2275a89405d9..5c9b0d1f7549 100644 --- a/parquet/benches/compression.rs +++ b/parquet/benches/compression.rs @@ -18,7 +18,7 @@ use criterion::*; use parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel}; use parquet::compression::create_codec; -use rand::distributions::Alphanumeric; +use rand::distr::Alphanumeric; use rand::prelude::*; fn do_bench(c: &mut Criterion, name: &str, uncompressed: &[u8]) { @@ -76,7 +76,7 @@ fn criterion_benchmark(c: &mut Criterion) { // Create a collection of 64 words let words: Vec> = (0..64) .map(|_| { - let len = rng.gen_range(1..12); + let len = rng.random_range(1..12); rng.sample_iter(&Alphanumeric).take(len).collect() }) .collect(); @@ -84,7 +84,7 @@ fn criterion_benchmark(c: &mut Criterion) { // Build data by concatenating these words randomly together let mut uncompressed = Vec::with_capacity(DATA_SIZE); while uncompressed.len() < DATA_SIZE { - let word = &words[rng.gen_range(0..words.len())]; + let word = &words[rng.random_range(0..words.len())]; uncompressed.extend_from_slice(&word[..word.len().min(DATA_SIZE - uncompressed.len())]) } assert_eq!(uncompressed.len(), DATA_SIZE); diff --git a/parquet/benches/encoding.rs b/parquet/benches/encoding.rs index 8e61666e6345..68f215d4ea78 100644 --- a/parquet/benches/encoding.rs +++ b/parquet/benches/encoding.rs @@ -86,12 +86,12 @@ fn criterion_benchmark(c: &mut Criterion) { let mut d128s = Vec::new(); for _ in 0..n { f16s.push(FixedLenByteArray::from( - f16::from_f32(rng.gen::()).to_le_bytes().to_vec(), + f16::from_f32(rng.random::()).to_le_bytes().to_vec(), )); - f32s.push(rng.gen::()); - f64s.push(rng.gen::()); + f32s.push(rng.random::()); + f64s.push(rng.random::()); d128s.push(FixedLenByteArray::from( - rng.gen::().to_be_bytes().to_vec(), + rng.random::().to_be_bytes().to_vec(), )); } diff --git a/parquet/benches/row_selector.rs b/parquet/benches/row_selector.rs index 32f0d6a56064..9fa634d09dbc 100644 --- a/parquet/benches/row_selector.rs +++ b/parquet/benches/row_selector.rs @@ -31,9 +31,9 @@ use rand::Rng; /// /// * A `BooleanArray` instance with randomly selected rows based on the provided ratio. fn generate_random_row_selection(total_rows: usize, selection_ratio: f64) -> BooleanArray { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let bools: Vec = (0..total_rows) - .map(|_| rng.gen_bool(selection_ratio)) + .map(|_| rng.random_bool(selection_ratio)) .collect(); BooleanArray::from(bools) } diff --git a/parquet/src/arrow/array_reader/byte_view_array.rs b/parquet/src/arrow/array_reader/byte_view_array.rs index 8df659060040..6d6bbdc7b804 100644 --- a/parquet/src/arrow/array_reader/byte_view_array.rs +++ b/parquet/src/arrow/array_reader/byte_view_array.rs @@ -329,7 +329,7 @@ impl ByteViewArrayDecoderPlain { let to_read = len.min(self.max_remaining_values); - let buf = self.buf.as_ref(); + let buf: &[u8] = self.buf.as_ref(); let mut read = 0; output.views.reserve(to_read); @@ -405,7 +405,7 @@ impl ByteViewArrayDecoderPlain { pub fn skip(&mut self, to_skip: usize) -> Result { let to_skip = to_skip.min(self.max_remaining_values); let mut skip = 0; - let buf = self.buf.as_ref(); + let buf: &[u8] = self.buf.as_ref(); while self.offset < self.buf.len() && skip != to_skip { if self.offset + 4 > buf.len() { diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs index 709d0f8bb16e..c76c41db312d 100644 --- a/parquet/src/arrow/array_reader/primitive_array.rs +++ b/parquet/src/arrow/array_reader/primitive_array.rs @@ -361,7 +361,7 @@ mod tests { use arrow_array::{Array, Date32Array, PrimitiveArray}; use arrow::datatypes::DataType::{Date32, Decimal128}; - use rand::distributions::uniform::SampleUniform; + use rand::distr::uniform::SampleUniform; use std::collections::VecDeque; #[allow(clippy::too_many_arguments)] diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 6eba04c86f91..acdf078b2787 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -317,7 +317,7 @@ impl ArrowReaderOptions { /// /// // Create the reader and read the data using the supplied schema. /// let mut reader = builder.build().unwrap(); - /// let _batch = reader.next().unwrap().unwrap(); + /// let _batch = reader.next().unwrap().unwrap(); /// ``` pub fn with_schema(self, schema: SchemaRef) -> Self { Self { @@ -926,7 +926,7 @@ mod tests { use bytes::Bytes; use half::f16; use num::PrimInt; - use rand::{thread_rng, Rng, RngCore}; + use rand::{rng, Rng, RngCore}; use tempfile::tempfile; use arrow_array::builder::*; @@ -1403,7 +1403,7 @@ mod tests { impl RandGen for RandFixedLenGen { fn gen(len: i32) -> FixedLenByteArray { let mut v = vec![0u8; len as usize]; - thread_rng().fill_bytes(&mut v); + rng().fill_bytes(&mut v); ByteArray::from(v).into() } } @@ -2058,10 +2058,13 @@ mod tests { fn with_row_selections(self) -> Self { assert!(self.row_filter.is_none(), "Must set row selection first"); - let mut rng = thread_rng(); - let step = rng.gen_range(self.record_batch_size..self.num_rows); - let row_selections = - create_test_selection(step, self.num_row_groups * self.num_rows, rng.gen::()); + let mut rng = rng(); + let step = rng.random_range(self.record_batch_size..self.num_rows); + let row_selections = create_test_selection( + step, + self.num_row_groups * self.num_rows, + rng.random::(), + ); Self { row_selections: Some(row_selections), ..self @@ -2074,9 +2077,9 @@ mod tests { None => self.num_row_groups * self.num_rows, }; - let mut rng = thread_rng(); + let mut rng = rng(); Self { - row_filter: Some((0..row_count).map(|_| rng.gen_bool(0.9)).collect()), + row_filter: Some((0..row_count).map(|_| rng.random_bool(0.9)).collect()), ..self } } @@ -2290,7 +2293,7 @@ mod tests { //according to null_percent generate def_levels let (repetition, def_levels) = match opts.null_percent.as_ref() { Some(null_percent) => { - let mut rng = thread_rng(); + let mut rng = rng(); let def_levels: Vec> = (0..opts.num_row_groups) .map(|_| { @@ -4134,7 +4137,7 @@ mod tests { #[test] fn test_list_selection_fuzz() { - let mut rng = thread_rng(); + let mut rng = rng(); let schema = Arc::new(Schema::new(vec![Field::new_list( "list", Field::new_list( @@ -4150,26 +4153,26 @@ mod tests { let mut list_a_builder = ListBuilder::new(ListBuilder::new(Int32Builder::new())); for _ in 0..2048 { - if rng.gen_bool(0.2) { + if rng.random_bool(0.2) { list_a_builder.append(false); continue; } - let list_a_len = rng.gen_range(0..10); + let list_a_len = rng.random_range(0..10); let list_b_builder = list_a_builder.values(); for _ in 0..list_a_len { - if rng.gen_bool(0.2) { + if rng.random_bool(0.2) { list_b_builder.append(false); continue; } - let list_b_len = rng.gen_range(0..10); + let list_b_len = rng.random_range(0..10); let int_builder = list_b_builder.values(); for _ in 0..list_b_len { - match rng.gen_bool(0.2) { + match rng.random_bool(0.2) { true => int_builder.append_null(), - false => int_builder.append_value(rng.gen()), + false => int_builder.append_value(rng.random()), } } list_b_builder.append(true) diff --git a/parquet/src/arrow/arrow_reader/selection.rs b/parquet/src/arrow/arrow_reader/selection.rs index 378d2253f19a..ffcf39df0e23 100644 --- a/parquet/src/arrow/arrow_reader/selection.rs +++ b/parquet/src/arrow/arrow_reader/selection.rs @@ -641,7 +641,7 @@ fn union_row_selections(left: &[RowSelector], right: &[RowSelector]) -> RowSelec mod tests { use super::*; use crate::format::PageLocation; - use rand::{thread_rng, Rng}; + use rand::{rng, Rng}; #[test] fn test_from_filters() { @@ -1013,14 +1013,14 @@ mod tests { #[test] fn test_and_fuzz() { - let mut rand = thread_rng(); + let mut rand = rng(); for _ in 0..100 { - let a_len = rand.gen_range(10..100); - let a_bools: Vec<_> = (0..a_len).map(|_| rand.gen_bool(0.2)).collect(); + let a_len = rand.random_range(10..100); + let a_bools: Vec<_> = (0..a_len).map(|_| rand.random_bool(0.2)).collect(); let a = RowSelection::from_filters(&[BooleanArray::from(a_bools.clone())]); let b_len: usize = a_bools.iter().map(|x| *x as usize).sum(); - let b_bools: Vec<_> = (0..b_len).map(|_| rand.gen_bool(0.8)).collect(); + let b_bools: Vec<_> = (0..b_len).map(|_| rand.random_bool(0.8)).collect(); let b = RowSelection::from_filters(&[BooleanArray::from(b_bools.clone())]); let mut expected_bools = vec![false; a_len]; diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs index 2c8a59399de1..25fdb0266b70 100644 --- a/parquet/src/arrow/async_reader/mod.rs +++ b/parquet/src/arrow/async_reader/mod.rs @@ -1070,7 +1070,7 @@ mod tests { }; use arrow_schema::{DataType, Field, Schema}; use futures::{StreamExt, TryStreamExt}; - use rand::{thread_rng, Rng}; + use rand::{rng, Rng}; use std::collections::HashMap; use std::sync::{Arc, Mutex}; use tempfile::tempfile; @@ -1400,7 +1400,7 @@ mod tests { assert_eq!(metadata.num_row_groups(), 1); - let mut rand = thread_rng(); + let mut rand = rng(); for _ in 0..100 { let mut expected_rows = 0; @@ -1409,7 +1409,7 @@ mod tests { let mut selectors = vec![]; while total_rows < 7300 { - let row_count: usize = rand.gen_range(1..100); + let row_count: usize = rand.random_range(1..100); let row_count = row_count.min(7300 - total_rows); @@ -1436,7 +1436,7 @@ mod tests { .await .unwrap(); - let col_idx: usize = rand.gen_range(0..13); + let col_idx: usize = rand.random_range(0..13); let mask = ProjectionMask::leaves(builder.parquet_schema(), vec![col_idx]); let stream = builder @@ -1467,7 +1467,7 @@ mod tests { assert_eq!(metadata.num_row_groups(), 1); - let mut rand = thread_rng(); + let mut rand = rng(); let mut expected_rows = 0; let mut total_rows = 0; @@ -1480,7 +1480,7 @@ mod tests { }); while total_rows < 7300 { - let row_count: usize = rand.gen_range(1..100); + let row_count: usize = rand.random_range(1..100); let row_count = row_count.min(7300 - total_rows); @@ -1507,7 +1507,7 @@ mod tests { .await .unwrap(); - let col_idx: usize = rand.gen_range(0..13); + let col_idx: usize = rand.random_range(0..13); let mask = ProjectionMask::leaves(builder.parquet_schema(), vec![col_idx]); let stream = builder diff --git a/parquet/src/arrow/buffer/bit_util.rs b/parquet/src/arrow/buffer/bit_util.rs index e7aea56a7f05..1d2c953abcbb 100644 --- a/parquet/src/arrow/buffer/bit_util.rs +++ b/parquet/src/arrow/buffer/bit_util.rs @@ -65,12 +65,12 @@ pub fn sign_extend_be(b: &[u8]) -> [u8; N] { mod tests { use super::*; use arrow_array::builder::BooleanBufferBuilder; - use rand::prelude::*; + use rand::{prelude::*, rng}; #[test] fn test_bit_fns() { - let mut rng = thread_rng(); - let mask_length = rng.gen_range(1..1024); + let mut rng = rng(); + let mask_length = rng.random_range(1..1024); let bools: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0)) .take(mask_length) .collect(); @@ -92,8 +92,8 @@ mod tests { assert_eq!(count_set_bits(&[0xFF], 1..1), 0); for _ in 0..20 { - let start = rng.gen_range(0..bools.len()); - let end = rng.gen_range(start..bools.len()); + let start = rng.random_range(0..bools.len()); + let end = rng.random_range(start..bools.len()); let actual = count_set_bits(nulls.as_slice(), start..end); let expected = bools[start..end].iter().filter(|x| **x).count(); diff --git a/parquet/src/arrow/record_reader/definition_levels.rs b/parquet/src/arrow/record_reader/definition_levels.rs index fcd04fbb9bbe..a90b3c4ec795 100644 --- a/parquet/src/arrow/record_reader/definition_levels.rs +++ b/parquet/src/arrow/record_reader/definition_levels.rs @@ -351,17 +351,17 @@ mod tests { use super::*; use crate::encodings::rle::RleEncoder; - use rand::{thread_rng, Rng}; + use rand::{rng, Rng}; #[test] fn test_packed_decoder() { - let mut rng = thread_rng(); - let len: usize = rng.gen_range(512..1024); + let mut rng = rng(); + let len: usize = rng.random_range(512..1024); let mut expected = BooleanBufferBuilder::new(len); let mut encoder = RleEncoder::new(1, 1024); for _ in 0..len { - let bool = rng.gen_bool(0.8); + let bool = rng.random_bool(0.8); encoder.put(bool as u64); expected.append(bool); } @@ -379,7 +379,7 @@ mod tests { break; } - let to_read = rng.gen_range(1..=remaining); + let to_read = rng.random_range(1..=remaining); decoder.read(&mut decoded, to_read).unwrap(); } @@ -389,15 +389,15 @@ mod tests { #[test] fn test_packed_decoder_skip() { - let mut rng = thread_rng(); - let len: usize = rng.gen_range(512..1024); + let mut rng = rng(); + let len: usize = rng.random_range(512..1024); let mut expected = BooleanBufferBuilder::new(len); let mut encoder = RleEncoder::new(1, 1024); let mut total_value = 0; for _ in 0..len { - let bool = rng.gen_bool(0.8); + let bool = rng.random_bool(0.8); encoder.put(bool as u64); expected.append(bool); if bool { @@ -421,8 +421,8 @@ mod tests { if remaining_levels == 0 { break; } - let to_read_or_skip_level = rng.gen_range(1..=remaining_levels); - if rng.gen_bool(0.5) { + let to_read_or_skip_level = rng.random_range(1..=remaining_levels); + if rng.random_bool(0.5) { let (skip_val_num, skip_level_num) = decoder.skip(to_read_or_skip_level).unwrap(); skip_value += skip_val_num; skip_level += skip_level_num diff --git a/parquet/src/column/reader.rs b/parquet/src/column/reader.rs index cb68351d06fb..b6998057845d 100644 --- a/parquet/src/column/reader.rs +++ b/parquet/src/column/reader.rs @@ -583,7 +583,7 @@ fn parse_v1_level( mod tests { use super::*; - use rand::distributions::uniform::SampleUniform; + use rand::distr::uniform::SampleUniform; use std::{collections::VecDeque, sync::Arc}; use crate::basic::Type as PhysicalType; diff --git a/parquet/src/column/reader/decoder.rs b/parquet/src/column/reader/decoder.rs index afd58b3cd195..a8766e82114b 100644 --- a/parquet/src/column/reader/decoder.rs +++ b/parquet/src/column/reader/decoder.rs @@ -484,7 +484,7 @@ impl RepetitionLevelDecoder for RepetitionLevelDecoderImpl { mod tests { use super::*; use crate::encodings::rle::RleEncoder; - use rand::prelude::*; + use rand::{prelude::*, rng}; #[test] fn test_skip_padding() { @@ -509,9 +509,9 @@ mod tests { #[test] fn test_skip_rep_levels() { for _ in 0..10 { - let mut rng = thread_rng(); + let mut rng = rng(); let total_len = 10000_usize; - let mut encoded: Vec = (0..total_len).map(|_| rng.gen_range(0..5)).collect(); + let mut encoded: Vec = (0..total_len).map(|_| rng.random_range(0..5)).collect(); encoded[0] = 0; let mut encoder = RleEncoder::new(3, 1024); for v in &encoded { @@ -526,8 +526,8 @@ mod tests { let mut remaining_records = total_records; let mut remaining_levels = encoded.len(); loop { - let skip = rng.gen_bool(0.5); - let records = rng.gen_range(1..=remaining_records.min(5)); + let skip = rng.random_bool(0.5); + let records = rng.random_range(1..=remaining_records.min(5)); let (records_read, levels_read) = if skip { decoder.skip_rep_levels(records, remaining_levels).unwrap() } else { diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs index 5f34f34cbb7a..ae418237515e 100644 --- a/parquet/src/column/writer/mod.rs +++ b/parquet/src/column/writer/mod.rs @@ -1528,7 +1528,7 @@ mod tests { schema::parser::parse_message_type, }; use core::str; - use rand::distributions::uniform::SampleUniform; + use rand::distr::uniform::SampleUniform; use std::{fs::File, sync::Arc}; use crate::column::{ diff --git a/parquet/src/encodings/rle.rs b/parquet/src/encodings/rle.rs index d089ba7836e1..3fb0ebb84ac0 100644 --- a/parquet/src/encodings/rle.rs +++ b/parquet/src/encodings/rle.rs @@ -528,7 +528,7 @@ mod tests { use super::*; use crate::util::bit_util::ceil; - use rand::{self, distributions::Standard, thread_rng, Rng, SeedableRng}; + use rand::{self, distr::StandardUniform, rng, Rng, SeedableRng}; const MAX_WIDTH: usize = 32; @@ -1019,15 +1019,18 @@ mod tests { for _ in 0..niters { values.clear(); - let rng = thread_rng(); - let seed_vec: Vec = rng.sample_iter::(&Standard).take(seed_len).collect(); + let rng = rng(); + let seed_vec: Vec = rng + .sample_iter::(&StandardUniform) + .take(seed_len) + .collect(); let mut seed = [0u8; 32]; seed.copy_from_slice(&seed_vec[0..seed_len]); let mut gen = rand::rngs::StdRng::from_seed(seed); let mut parity = false; for _ in 0..ngroups { - let mut group_size = gen.gen_range(1..20); + let mut group_size = gen.random_range(1..20); if group_size > max_group_size { group_size = 1; } diff --git a/parquet/src/util/bit_util.rs b/parquet/src/util/bit_util.rs index b4c929ce0186..8f6c2d8f8184 100644 --- a/parquet/src/util/bit_util.rs +++ b/parquet/src/util/bit_util.rs @@ -716,7 +716,7 @@ mod tests { use super::*; use crate::util::test_common::rand_gen::random_numbers; - use rand::distributions::{Distribution, Standard}; + use rand::distr::{Distribution, StandardUniform}; use std::fmt::Debug; #[test] @@ -1066,7 +1066,7 @@ mod tests { fn test_put_aligned_rand_numbers(total: usize, num_bits: usize) where T: Copy + FromBytes + AsBytes + Debug + PartialEq, - Standard: Distribution, + StandardUniform: Distribution, { assert!(num_bits <= 32); assert!(total % 2 == 0); diff --git a/parquet/src/util/test_common/rand_gen.rs b/parquet/src/util/test_common/rand_gen.rs index ec80d3a593ae..36a7e4b52a1c 100644 --- a/parquet/src/util/test_common/rand_gen.rs +++ b/parquet/src/util/test_common/rand_gen.rs @@ -19,8 +19,8 @@ use crate::basic::Encoding; use crate::column::page::Page; use bytes::Bytes; use rand::{ - distributions::{uniform::SampleUniform, Distribution, Standard}, - thread_rng, Rng, + distr::{uniform::SampleUniform, Distribution, StandardUniform}, + rng, Rng, }; use std::collections::VecDeque; @@ -44,51 +44,55 @@ pub trait RandGen { impl RandGen for BoolType { fn gen(_: i32) -> bool { - thread_rng().gen::() + rng().random::() } } impl RandGen for Int32Type { fn gen(_: i32) -> i32 { - thread_rng().gen::() + rng().random::() } } impl RandGen for Int64Type { fn gen(_: i32) -> i64 { - thread_rng().gen::() + rng().random::() } } impl RandGen for Int96Type { fn gen(_: i32) -> Int96 { - let mut rng = thread_rng(); + let mut rng = rng(); let mut result = Int96::new(); - result.set_data(rng.gen::(), rng.gen::(), rng.gen::()); + result.set_data( + rng.random::(), + rng.random::(), + rng.random::(), + ); result } } impl RandGen for FloatType { fn gen(_: i32) -> f32 { - thread_rng().gen::() + rng().random::() } } impl RandGen for DoubleType { fn gen(_: i32) -> f64 { - thread_rng().gen::() + rng().random::() } } impl RandGen for ByteArrayType { fn gen(_: i32) -> ByteArray { - let mut rng = thread_rng(); + let mut rng = rng(); let mut result = ByteArray::new(); let mut value = vec![]; - let len = rng.gen_range(0..128); + let len = rng.random_range(0..128); for _ in 0..len { - value.push(rng.gen_range(0..255)); + value.push(rng.random_range(0..255)); } result.set_data(Bytes::from(value)); result @@ -105,28 +109,28 @@ impl RandGen for FixedLenByteArrayType { pub fn random_bytes(n: usize) -> Vec { let mut result = vec![]; - let mut rng = thread_rng(); + let mut rng = rng(); for _ in 0..n { - result.push(rng.gen_range(0..255)); + result.push(rng.random_range(0..255)); } result } pub fn random_numbers(n: usize) -> Vec where - Standard: Distribution, + StandardUniform: Distribution, { - let mut rng = thread_rng(); - Standard.sample_iter(&mut rng).take(n).collect() + let mut rng = rng(); + StandardUniform.sample_iter(&mut rng).take(n).collect() } pub fn random_numbers_range(n: usize, low: T, high: T, result: &mut Vec) where T: PartialOrd + SampleUniform + Copy, { - let mut rng = thread_rng(); + let mut rng = rng(); for _ in 0..n { - result.push(rng.gen_range(low..high)); + result.push(rng.random_range(low..high)); } }