diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 442e6e4009f6..06db092d6fc8 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -37,4 +37,4 @@ If there are user-facing changes then we may require documentation to be updated \ No newline at end of file +--> diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 1f5088a1e6ce..cc23e99e8cba 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -43,7 +43,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-node@v4 with: - node-version: "14" + node-version: "20" - name: Prettier check run: | # if you encounter error, rerun the command below and commit the changes diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 55f6cecf54aa..6c3d60bd4399 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -527,7 +527,7 @@ jobs: rust-version: stable - uses: actions/setup-node@v4 with: - node-version: "14" + node-version: "20" - name: Check if configs.md has been modified run: | # If you encounter an error, run './dev/update_config_docs.sh' and commit diff --git a/Cargo.toml b/Cargo.toml index 71088e7fc7ad..1a2f4a84af38 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,7 @@ members = [ "datafusion/substrait", "datafusion/wasmtest", "datafusion-examples", + "docs", "test-utils", "benchmarks", ] @@ -53,9 +54,41 @@ arrow-array = { version = "48.0.0", default-features = false, features = ["chron arrow-buffer = { version = "48.0.0", default-features = false } arrow-flight = { version = "48.0.0", features = ["flight-sql-experimental"] } arrow-schema = { version = "48.0.0", default-features = false } +async-trait = "0.1.73" +bigdecimal = "0.4.1" +bytes = "1.4" +ctor = "0.2.0" +datafusion = { path = "datafusion/core" } +datafusion-common = { path = "datafusion/common" } +datafusion-expr = { path = "datafusion/expr" } +datafusion-sql = { path = "datafusion/sql" } +datafusion-optimizer = { path = "datafusion/optimizer" } +datafusion-physical-expr = { path = "datafusion/physical-expr" } +datafusion-physical-plan = { path = "datafusion/physical-plan" } +datafusion-execution = { path = "datafusion/execution" } +datafusion-proto = { path = "datafusion/proto" } +datafusion-sqllogictest = { path = "datafusion/sqllogictest" } +datafusion-substrait = { path = "datafusion/substrait" } +dashmap = "5.4.0" +doc-comment = "0.3" +env_logger = "0.10" +futures = "0.3" +half = "2.2.1" +indexmap = "2.0.0" +itertools = "0.11" +log = "^0.4" +num_cpus = "1.13.0" +object_store = "0.7.0" +parking_lot = "0.12" parquet = { version = "48.0.0", features = ["arrow", "async", "object_store"] } -sqlparser = { version = "0.38.0", features = ["visitor"] } +rand = "0.8" +rstest = "0.18.0" +serde_json = "1" +sqlparser = { version = "0.39.0", features = ["visitor"] } +tempfile = "3" +thiserror = "1.0.44" chrono = { version = "0.4.31", default-features = false } +url = "2.2" [profile.release] codegen-units = 1 @@ -74,3 +107,4 @@ opt-level = 3 overflow-checks = false panic = 'unwind' rpath = false + diff --git a/README.md b/README.md index 2f10812f9a59..1997a6f73dd5 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,7 @@ Default features: - `compression`: reading files compressed with `xz2`, `bzip2`, `flate2`, and `zstd` - `crypto_expressions`: cryptographic functions such as `md5` and `sha256` - `encoding_expressions`: `encode` and `decode` functions +- `parquet`: support for reading the [Apache Parquet] format - `regex_expressions`: regular expression functions, such as `regexp_match` - `unicode_expressions`: Include unicode aware functions such as `character_length` @@ -59,6 +60,7 @@ Optional features: - `simd`: enable arrow-rs's manual `SIMD` kernels (requires Rust `nightly`) [apache avro]: https://avro.apache.org/ +[apache parquet]: https://parquet.apache.org/ ## Rust Version Compatibility diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 0def335521ce..ce0a4267fc28 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -36,14 +36,14 @@ snmalloc = ["snmalloc-rs"] arrow = { workspace = true } datafusion = { path = "../datafusion/core", version = "32.0.0" } datafusion-common = { path = "../datafusion/common", version = "32.0.0" } -env_logger = "0.10" -futures = "0.3" -log = "^0.4" +env_logger = { workspace = true } +futures = { workspace = true } +log = { workspace = true } mimalloc = { version = "0.1", optional = true, default-features = false } -num_cpus = "1.13.0" +num_cpus = { workspace = true } parquet = { workspace = true } serde = { version = "1.0.136", features = ["derive"] } -serde_json = "1.0.78" +serde_json = { workspace = true } snmalloc-rs = { version = "0.3", optional = true } structopt = { version = "0.3", default-features = false } test-utils = { path = "../test-utils/", version = "0.1.0" } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index b83088f94c57..dc828f018fd5 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -25,15 +25,16 @@ checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" [[package]] name = "ahash" -version = "0.8.3" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" dependencies = [ "cfg-if", "const-random", "getrandom", "once_cell", "version_check", + "zerocopy", ] [[package]] @@ -106,8 +107,8 @@ dependencies = [ "serde", "serde_json", "snap", - "strum 0.25.0", - "strum_macros 0.25.2", + "strum", + "strum_macros", "thiserror", "typed-builder", "uuid", @@ -177,7 +178,7 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.14.1", + "hashbrown 0.14.2", "num", ] @@ -302,7 +303,7 @@ dependencies = [ "arrow-data", "arrow-schema", "half", - "hashbrown 0.14.1", + "hashbrown 0.14.2", ] [[package]] @@ -358,9 +359,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb42b2197bf15ccb092b62c74515dbd8b86d0effd934795f6687c93b6e679a2c" +checksum = "f658e2baef915ba0f26f1f7c42bfb8e12f532a01f449a090ded75ae7a07e9ba2" dependencies = [ "bzip2", "flate2", @@ -370,15 +371,15 @@ dependencies = [ "pin-project-lite", "tokio", "xz2", - "zstd 0.12.4", - "zstd-safe 6.0.6", + "zstd 0.13.0", + "zstd-safe 7.0.0", ] [[package]] name = "async-trait" -version = "0.1.73" +version = "0.1.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" +checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" dependencies = [ "proc-macro2", "quote", @@ -709,9 +710,9 @@ dependencies = [ [[package]] name = "base64" -version = "0.21.4" +version = "0.21.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2" +checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" [[package]] name = "base64-simd" @@ -731,9 +732,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" [[package]] name = "blake2" @@ -779,9 +780,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "2.5.0" +version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da74e2b81409b1b743f8f0c62cc6254afefb8b8e50bbfe3735550f7aeefa3448" +checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -878,9 +879,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1369bc6b9e9a7dfdae2055f6ec151fe9c554a9d23d357c0237cee2e25eaabb7" +checksum = "e23185c0e21df6ed832a12e2bda87c7d1def6842881fb634a8511ced741b0d76" dependencies = [ "chrono", "chrono-tz-build", @@ -889,9 +890,9 @@ dependencies = [ [[package]] name = "chrono-tz-build" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2f5ebdc942f57ed96d560a6d1a459bae5851102a25d5bf89dc04ae453e31ecf" +checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" dependencies = [ "parse-zoneinfo", "phf", @@ -950,34 +951,32 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.0.1" +version = "7.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab77dbd8adecaf3f0db40581631b995f312a8a5ae3aa9993188bb8f23d83a5b" +checksum = "7c64043d6c7b7a4c58e39e7efccfdea7b93d885a795d0c054a69dbbf4dd52686" dependencies = [ - "strum 0.24.1", - "strum_macros 0.24.3", + "strum", + "strum_macros", "unicode-width", ] [[package]] name = "const-random" -version = "0.1.15" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368a7a772ead6ce7e1de82bfb04c485f3db8ec744f72925af5735e29a22cc18e" +checksum = "11df32a13d7892ec42d51d3d175faba5211ffe13ed25d4fb348ac9e9ce835593" dependencies = [ "const-random-macro", - "proc-macro-hack", ] [[package]] name = "const-random-macro" -version = "0.1.15" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d7d6ab3c3a2282db210df5f02c4dab6e0a7057af0fb7ebd4070f30fe05c0ddb" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ "getrandom", "once_cell", - "proc-macro-hack", "tiny-keccak", ] @@ -1014,9 +1013,9 @@ dependencies = [ [[package]] name = "cpufeatures" -version = "0.2.9" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" +checksum = "ce420fe07aecd3e67c5f910618fe65e94158f6dcc0adf44e00d69ce2bdfe0fd0" dependencies = [ "libc", ] @@ -1090,7 +1089,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ "cfg-if", - "hashbrown 0.14.1", + "hashbrown 0.14.2", "lock_api", "once_cell", "parking_lot_core", @@ -1122,7 +1121,7 @@ dependencies = [ "futures", "glob", "half", - "hashbrown 0.14.1", + "hashbrown 0.14.2", "indexmap 2.0.2", "itertools", "log", @@ -1131,7 +1130,6 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "percent-encoding", "pin-project-lite", "rand", "sqlparser", @@ -1197,7 +1195,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "futures", - "hashbrown 0.14.1", + "hashbrown 0.14.2", "log", "object_store", "parking_lot", @@ -1215,8 +1213,8 @@ dependencies = [ "arrow-array", "datafusion-common", "sqlparser", - "strum 0.25.0", - "strum_macros 0.25.2", + "strum", + "strum_macros", ] [[package]] @@ -1229,7 +1227,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown 0.14.1", + "hashbrown 0.14.2", "itertools", "log", "regex-syntax", @@ -1251,7 +1249,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "half", - "hashbrown 0.14.1", + "hashbrown 0.14.2", "hex", "indexmap 2.0.2", "itertools", @@ -1284,7 +1282,7 @@ dependencies = [ "datafusion-physical-expr", "futures", "half", - "hashbrown 0.14.1", + "hashbrown 0.14.2", "indexmap 2.0.2", "itertools", "log", @@ -1310,9 +1308,12 @@ dependencies = [ [[package]] name = "deranged" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946" +checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3" +dependencies = [ + "powerfmt", +] [[package]] name = "difflib" @@ -1482,9 +1483,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.27" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" dependencies = [ "crc32fast", "miniz_oxide", @@ -1516,9 +1517,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" +checksum = "da0290714b38af9b4a7b094b8a37086d1b4e61f2df9122c3cad2577669145335" dependencies = [ "futures-channel", "futures-core", @@ -1531,9 +1532,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" +checksum = "ff4dd66668b557604244583e3e1e1eada8c5c2e96a6d0d6653ede395b78bbacb" dependencies = [ "futures-core", "futures-sink", @@ -1541,15 +1542,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" +checksum = "eb1d22c66e66d9d72e1758f0bd7d4fd0bee04cad842ee34587d68c07e45d088c" [[package]] name = "futures-executor" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" +checksum = "0f4fb8693db0cf099eadcca0efe2a5a22e4550f98ed16aba6c48700da29597bc" dependencies = [ "futures-core", "futures-task", @@ -1558,15 +1559,15 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" +checksum = "8bf34a163b5c4c52d0478a4d757da8fb65cabef42ba90515efee0f6f9fa45aaa" [[package]] name = "futures-macro" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" +checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb" dependencies = [ "proc-macro2", "quote", @@ -1575,15 +1576,15 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" +checksum = "e36d3378ee38c2a36ad710c5d30c2911d752cb941c00c72dbabfb786a7970817" [[package]] name = "futures-task" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" +checksum = "efd193069b0ddadc69c46389b740bbccdd97203899b48d09c5f7969591d6bae2" [[package]] name = "futures-timer" @@ -1593,9 +1594,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" [[package]] name = "futures-util" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +checksum = "a19526d624e703a3179b3d322efec918b6246ea0fa51d41124525f00f1cc8104" dependencies = [ "futures-channel", "futures-core", @@ -1689,9 +1690,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.14.1" +version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dfda62a12f55daeae5015f81b0baea145391cb4520f86c248fc615d72640d12" +checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156" dependencies = [ "ahash", "allocator-api2", @@ -1790,7 +1791,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.4.9", + "socket2 0.4.10", "tokio", "tower-service", "tracing", @@ -1814,30 +1815,30 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.24.1" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d78e1e73ec14cf7375674f74d7dde185c8206fd9dea6fb6295e8a98098aaa97" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", "http", "hyper", - "rustls 0.21.7", + "rustls 0.21.8", "tokio", "tokio-rustls 0.24.1", ] [[package]] name = "iana-time-zone" -version = "0.1.57" +version = "0.1.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" +checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "windows", + "windows-core", ] [[package]] @@ -1876,7 +1877,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8adf3ddd720272c6ea8bf59463c04e0f93d0bbf7c5439b691bca2987e0270897" dependencies = [ "equivalent", - "hashbrown 0.14.1", + "hashbrown 0.14.2", ] [[package]] @@ -1896,9 +1897,9 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "ipnet" -version = "2.8.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6" +checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" [[package]] name = "itertools" @@ -2057,9 +2058,9 @@ checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" [[package]] name = "lock_api" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" dependencies = [ "autocfg", "scopeguard", @@ -2133,9 +2134,9 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" +checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0" dependencies = [ "libc", "wasi", @@ -2317,9 +2318,9 @@ dependencies = [ [[package]] name = "os_str_bytes" -version = "6.5.1" +version = "6.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d5d9eb14b174ee9aa2ef96dc2b94637a2d4b6e7cb873c7e171f0c20c6cf3eac" +checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" [[package]] name = "outref" @@ -2339,13 +2340,13 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.8" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.3.5", + "redox_syscall 0.4.1", "smallvec", "windows-targets", ] @@ -2370,7 +2371,7 @@ dependencies = [ "chrono", "flate2", "futures", - "hashbrown 0.14.1", + "hashbrown 0.14.2", "lz4_flex", "num", "num-bigint", @@ -2491,6 +2492,12 @@ version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -2552,12 +2559,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "proc-macro-hack" -version = "0.5.20+deprecated" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" - [[package]] name = "proc-macro2" version = "1.0.69" @@ -2643,9 +2644,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.3.5" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" dependencies = [ "bitflags 1.3.2", ] @@ -2663,9 +2664,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.0" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d119d7c7ca818f8a53c300863d4f87566aac09943aef5b355bb83969dae75d87" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" dependencies = [ "aho-corasick", "memchr", @@ -2675,9 +2676,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.1" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "465c6fc0621e4abc4187a2bda0937bfd4f722c2730b29562e19689ea796c9a4b" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" dependencies = [ "aho-corasick", "memchr", @@ -2686,15 +2687,15 @@ dependencies = [ [[package]] name = "regex-lite" -version = "0.1.3" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a6ebcd15653947e6140f59a9811a06ed061d18a5c35dfca2e2e4c5525696878" +checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e" [[package]] name = "regex-syntax" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56d84fdd47036b038fc80dd333d10b6aab10d5d31f4a366e20014def75328d33" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "reqwest" @@ -2711,7 +2712,7 @@ dependencies = [ "http", "http-body", "hyper", - "hyper-rustls 0.24.1", + "hyper-rustls 0.24.2", "ipnet", "js-sys", "log", @@ -2719,7 +2720,7 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls 0.21.7", + "rustls 0.21.8", "rustls-pemfile", "serde", "serde_json", @@ -2755,9 +2756,9 @@ dependencies = [ [[package]] name = "ring" -version = "0.17.3" +version = "0.17.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9babe80d5c16becf6594aa32ad2be8fe08498e7ae60b77de8df700e67f191d7e" +checksum = "fb0205304757e5d899b9c2e448b867ffd03ae7f988002e47cd24954391394d0b" dependencies = [ "cc", "getrandom", @@ -2816,11 +2817,11 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.18" +version = "0.38.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a74ee2d7c2581cd139b42447d7d9389b889bdaad3a73f1ebb16f2a3237bb19c" +checksum = "2b426b0506e5d50a7d8dafcf2e81471400deb602392c7dd110815afb4eaf02a3" dependencies = [ - "bitflags 2.4.0", + "bitflags 2.4.1", "errno", "libc", "linux-raw-sys", @@ -2841,12 +2842,12 @@ dependencies = [ [[package]] name = "rustls" -version = "0.21.7" +version = "0.21.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd8d6c9f025a446bc4d18ad9632e69aec8f287aa84499ee335599fabd20c3fd8" +checksum = "446e14c5cda4f3f30fe71863c34ec70f5ac79d6087097ad0bb433e1be5edf04c" dependencies = [ "log", - "ring 0.16.20", + "ring 0.17.5", "rustls-webpki", "sct", ] @@ -2874,12 +2875,12 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.101.6" +version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c7d5dece342910d9ba34d259310cae3e0154b873b35408b787b59bce53d34fe" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "ring 0.16.20", - "untrusted 0.7.1", + "ring 0.17.5", + "untrusted 0.9.0", ] [[package]] @@ -2943,12 +2944,12 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "sct" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ - "ring 0.16.20", - "untrusted 0.7.1", + "ring 0.17.5", + "untrusted 0.9.0", ] [[package]] @@ -2988,18 +2989,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.188" +version = "1.0.190" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +checksum = "91d3c334ca1ee894a2c6f6ad698fe8c435b76d504b13d436f0685d648d6d96f7" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.188" +version = "1.0.190" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +checksum = "67c5609f394e5c2bd7fc51efda478004ea80ef42fee983d5c67a65e34f32c0e3" dependencies = [ "proc-macro2", "quote", @@ -3008,9 +3009,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.107" +version = "1.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" +checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" dependencies = [ "itoa", "ryu", @@ -3091,9 +3092,9 @@ checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" [[package]] name = "socket2" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" +checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d" dependencies = [ "libc", "winapi", @@ -3101,9 +3102,9 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.4" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4031e820eb552adee9295814c0ced9e5cf38ddf1e8b7d566d6de8e2538ea989e" +checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" dependencies = [ "libc", "windows-sys", @@ -3123,9 +3124,9 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "sqlparser" -version = "0.38.0" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0272b7bb0a225320170c99901b4b5fb3a4384e255a7f2cc228f61e2ba3893e75" +checksum = "743b4dc2cbde11890ccb254a8fc9d537fa41b36da00de2a1c5e9848c9bc42bd7" dependencies = [ "log", "sqlparser_derive", @@ -3160,39 +3161,20 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" -[[package]] -name = "strum" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" - [[package]] name = "strum" version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" dependencies = [ - "strum_macros 0.25.2", -] - -[[package]] -name = "strum_macros" -version = "0.24.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 1.0.109", + "strum_macros", ] [[package]] name = "strum_macros" -version = "0.25.2" +version = "0.25.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad8d03b598d3d0fff69bf533ee3ef19b8eeb342729596df84bcc7e1f96ec4059" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" dependencies = [ "heck", "proc-macro2", @@ -3252,13 +3234,13 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.8.0" +version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" +checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" dependencies = [ "cfg-if", "fastrand 2.0.1", - "redox_syscall 0.3.5", + "redox_syscall 0.4.1", "rustix", "windows-sys", ] @@ -3286,18 +3268,18 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "thiserror" -version = "1.0.49" +version = "1.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1177e8c6d7ede7afde3585fd2513e611227efd6481bd78d2e82ba1ce16557ed4" +checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.49" +version = "1.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc" +checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" dependencies = [ "proc-macro2", "quote", @@ -3317,11 +3299,12 @@ dependencies = [ [[package]] name = "time" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "426f806f4089c493dcac0d24c29c01e2c38baf8e30f1b716ee37e83d200b18fe" +checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" dependencies = [ "deranged", + "powerfmt", "serde", "time-core", "time-macros", @@ -3379,7 +3362,7 @@ dependencies = [ "num_cpus", "parking_lot", "pin-project-lite", - "socket2 0.5.4", + "socket2 0.5.5", "tokio-macros", "windows-sys", ] @@ -3412,7 +3395,7 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ - "rustls 0.21.7", + "rustls 0.21.8", "tokio", ] @@ -3429,9 +3412,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.9" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d68074620f57a0b21594d9735eb2e98ab38b17f80d3fcb189fca266771ca60d" +checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" dependencies = [ "bytes", "futures-core", @@ -3471,11 +3454,10 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.37" +version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ - "cfg-if", "log", "pin-project-lite", "tracing-attributes", @@ -3484,9 +3466,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", @@ -3495,9 +3477,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", ] @@ -3614,9 +3596,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.4.1" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d" +checksum = "88ad59a7560b41a70d191093a945f0b87bc1deeda46fb237479708a1d6b6cdfc" dependencies = [ "getrandom", "serde", @@ -3763,7 +3745,7 @@ version = "0.22.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed63aea5ce73d0ff405984102c42de94fc55a6b75765d621c65262469b3c9b53" dependencies = [ - "ring 0.17.3", + "ring 0.17.5", "untrusted 0.9.0", ] @@ -3805,10 +3787,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] -name = "windows" -version = "0.48.0" +name = "windows-core" +version = "0.51.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64" dependencies = [ "windows-targets", ] @@ -3904,6 +3886,26 @@ dependencies = [ "lzma-sys", ] +[[package]] +name = "zerocopy" +version = "0.7.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd66a62464e3ffd4e37bd09950c2b9dd6c4f8767380fabba0d523f9a775bc85a" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "255c4596d41e6916ced49cfafea18727b24d67878fa180ddfd69b9df34fd1726" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", +] + [[package]] name = "zeroize" version = "1.6.0" diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index e5146c7fd94e..57691520a401 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -20,9 +20,9 @@ name = "datafusion-examples" description = "DataFusion usage examples" keywords = ["arrow", "query", "sql"] publish = false +readme = "README.md" version = { workspace = true } edition = { workspace = true } -readme = { workspace = true } homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } @@ -33,26 +33,26 @@ rust-version = { workspace = true } arrow = { workspace = true } arrow-flight = { workspace = true } arrow-schema = { workspace = true } -async-trait = "0.1.41" -bytes = "1.4" -dashmap = "5.4" -datafusion = { path = "../datafusion/core" } +async-trait = { workspace = true } +bytes = { workspace = true } +dashmap = { workspace = true } +datafusion = { path = "../datafusion/core", features = ["avro"] } datafusion-common = { path = "../datafusion/common" } datafusion-expr = { path = "../datafusion/expr" } datafusion-optimizer = { path = "../datafusion/optimizer" } datafusion-sql = { path = "../datafusion/sql" } -env_logger = "0.10" -futures = "0.3" -log = "0.4" +env_logger = { workspace = true } +futures = { workspace = true } +log = { workspace = true } mimalloc = { version = "0.1", default-features = false } -num_cpus = "1.13.0" +num_cpus = { workspace = true } object_store = { version = "0.7.0", features = ["aws", "http"] } prost = { version = "0.12", default-features = false } prost-derive = { version = "0.11", default-features = false } serde = { version = "1.0.136", features = ["derive"] } -serde_json = "1.0.82" -tempfile = "3" +serde_json = { workspace = true } +tempfile = { workspace = true } tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] } tonic = "0.10" -url = "2.2" +url = { workspace = true } uuid = "1.2" diff --git a/datafusion-examples/examples/dataframe.rs b/datafusion-examples/examples/dataframe.rs index 26fddcd226a9..ea01c53b1c62 100644 --- a/datafusion-examples/examples/dataframe.rs +++ b/datafusion-examples/examples/dataframe.rs @@ -18,7 +18,9 @@ use datafusion::arrow::datatypes::{DataType, Field, Schema}; use datafusion::error::Result; use datafusion::prelude::*; -use std::fs; +use std::fs::File; +use std::io::Write; +use tempfile::tempdir; /// This example demonstrates executing a simple query against an Arrow data source (Parquet) and /// fetching results, using the DataFrame trait @@ -41,12 +43,19 @@ async fn main() -> Result<()> { // print the results df.show().await?; + // create a csv file waiting to be written + let dir = tempdir()?; + let file_path = dir.path().join("example.csv"); + let file = File::create(&file_path)?; + write_csv_file(file); + // Reading CSV file with inferred schema example - let csv_df = example_read_csv_file_with_inferred_schema().await; + let csv_df = + example_read_csv_file_with_inferred_schema(file_path.to_str().unwrap()).await; csv_df.show().await?; // Reading CSV file with defined schema - let csv_df = example_read_csv_file_with_schema().await; + let csv_df = example_read_csv_file_with_schema(file_path.to_str().unwrap()).await; csv_df.show().await?; // Reading PARQUET file and print describe @@ -59,31 +68,28 @@ async fn main() -> Result<()> { } // Function to create an test CSV file -fn create_csv_file(path: String) { +fn write_csv_file(mut file: File) { // Create the data to put into the csv file with headers let content = r#"id,time,vote,unixtime,rating a1,"10 6, 2013",3,1381017600,5.0 a2,"08 9, 2013",2,1376006400,4.5"#; // write the data - fs::write(path, content).expect("Problem with writing file!"); + file.write_all(content.as_ref()) + .expect("Problem with writing file!"); } // Example to read data from a csv file with inferred schema -async fn example_read_csv_file_with_inferred_schema() -> DataFrame { - let path = "example.csv"; - // Create a csv file using the predefined function - create_csv_file(path.to_string()); +async fn example_read_csv_file_with_inferred_schema(file_path: &str) -> DataFrame { // Create a session context let ctx = SessionContext::new(); // Register a lazy DataFrame using the context - ctx.read_csv(path, CsvReadOptions::default()).await.unwrap() + ctx.read_csv(file_path, CsvReadOptions::default()) + .await + .unwrap() } // Example to read csv file with a defined schema for the csv file -async fn example_read_csv_file_with_schema() -> DataFrame { - let path = "example.csv"; - // Create a csv file using the predefined function - create_csv_file(path.to_string()); +async fn example_read_csv_file_with_schema(file_path: &str) -> DataFrame { // Create a session context let ctx = SessionContext::new(); // Define the schema @@ -101,5 +107,5 @@ async fn example_read_csv_file_with_schema() -> DataFrame { ..Default::default() }; // Register a lazy DataFrame by using the context and option provider - ctx.read_csv(path, csv_read_option).await.unwrap() + ctx.read_csv(file_path, csv_read_option).await.unwrap() } diff --git a/datafusion-examples/examples/dataframe_subquery.rs b/datafusion-examples/examples/dataframe_subquery.rs index 94049e59b3ab..9fb61008b9f6 100644 --- a/datafusion-examples/examples/dataframe_subquery.rs +++ b/datafusion-examples/examples/dataframe_subquery.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use arrow_schema::DataType; use std::sync::Arc; use datafusion::error::Result; @@ -38,7 +39,7 @@ async fn main() -> Result<()> { Ok(()) } -//select c1,c2 from t1 where (select avg(t2.c2) from t2 where t1.c1 = t2.c1)>0 limit 10; +//select c1,c2 from t1 where (select avg(t2.c2) from t2 where t1.c1 = t2.c1)>0 limit 3; async fn where_scalar_subquery(ctx: &SessionContext) -> Result<()> { ctx.table("t1") .await? @@ -46,7 +47,7 @@ async fn where_scalar_subquery(ctx: &SessionContext) -> Result<()> { scalar_subquery(Arc::new( ctx.table("t2") .await? - .filter(col("t1.c1").eq(col("t2.c1")))? + .filter(out_ref_col(DataType::Utf8, "t1.c1").eq(col("t2.c1")))? .aggregate(vec![], vec![avg(col("t2.c2"))])? .select(vec![avg(col("t2.c2"))])? .into_unoptimized_plan(), @@ -60,7 +61,7 @@ async fn where_scalar_subquery(ctx: &SessionContext) -> Result<()> { Ok(()) } -//SELECT t1.c1, t1.c2 FROM t1 WHERE t1.c2 in (select max(t2.c2) from t2 where t2.c1 > 0 ) limit 10 +//SELECT t1.c1, t1.c2 FROM t1 WHERE t1.c2 in (select max(t2.c2) from t2 where t2.c1 > 0 ) limit 3; async fn where_in_subquery(ctx: &SessionContext) -> Result<()> { ctx.table("t1") .await? @@ -82,14 +83,14 @@ async fn where_in_subquery(ctx: &SessionContext) -> Result<()> { Ok(()) } -//SELECT t1.c1, t1.c2 FROM t1 WHERE EXISTS (select t2.c2 from t2 where t1.c1 = t2.c1) limit 10 +//SELECT t1.c1, t1.c2 FROM t1 WHERE EXISTS (select t2.c2 from t2 where t1.c1 = t2.c1) limit 3; async fn where_exist_subquery(ctx: &SessionContext) -> Result<()> { ctx.table("t1") .await? .filter(exists(Arc::new( ctx.table("t2") .await? - .filter(col("t1.c1").eq(col("t2.c1")))? + .filter(out_ref_col(DataType::Utf8, "t1.c1").eq(col("t2.c1")))? .select(vec![col("t2.c2")])? .into_unoptimized_plan(), )))? diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 490fbeacad85..d04db86b7830 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -19,9 +19,9 @@ name = "datafusion-common" description = "Common functionality for DataFusion query engine" keywords = ["arrow", "query", "sql"] +readme = "README.md" version = { workspace = true } edition = { workspace = true } -readme = { workspace = true } homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } @@ -46,7 +46,7 @@ arrow-buffer = { workspace = true } arrow-schema = { workspace = true } chrono = { workspace = true } half = { version = "2.1", default-features = false } -num_cpus = "1.13.0" +num_cpus = { workspace = true } object_store = { version = "0.7.0", default-features = false, optional = true } parquet = { workspace = true, optional = true } pyo3 = { version = "0.20.0", optional = true } diff --git a/datafusion/common/README.md b/datafusion/common/README.md index 9bccf3f18b7f..524ab4420d2a 100644 --- a/datafusion/common/README.md +++ b/datafusion/common/README.md @@ -19,7 +19,7 @@ # DataFusion Common -[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. This crate is a submodule of DataFusion that provides common data types and utilities. diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index e16acbfedc81..d8cd103a4777 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -391,11 +391,33 @@ impl DFSchema { }) } + /// Returns true if the two schemas have the same qualified named + /// fields with logically equivalent data types. Returns false otherwise. + /// + /// Use [DFSchema]::equivalent_names_and_types for stricter semantic type + /// equivalence checking. + pub fn logically_equivalent_names_and_types(&self, other: &Self) -> bool { + if self.fields().len() != other.fields().len() { + return false; + } + let self_fields = self.fields().iter(); + let other_fields = other.fields().iter(); + self_fields.zip(other_fields).all(|(f1, f2)| { + f1.qualifier() == f2.qualifier() + && f1.name() == f2.name() + && Self::datatype_is_logically_equal(f1.data_type(), f2.data_type()) + }) + } + /// Returns true if the two schemas have the same qualified named /// fields with the same data types. Returns false otherwise. /// /// This is a specialized version of Eq that ignores differences /// in nullability and metadata. + /// + /// Use [DFSchema]::logically_equivalent_names_and_types for a weaker + /// logical type checking, which for example would consider a dictionary + /// encoded UTF8 array to be equivalent to a plain UTF8 array. pub fn equivalent_names_and_types(&self, other: &Self) -> bool { if self.fields().len() != other.fields().len() { return false; @@ -409,6 +431,46 @@ impl DFSchema { }) } + /// Checks if two [`DataType`]s are logically equal. This is a notably weaker constraint + /// than datatype_is_semantically_equal in that a Dictionary type is logically + /// equal to a plain V type, but not semantically equal. Dictionary is also + /// logically equal to Dictionary. + fn datatype_is_logically_equal(dt1: &DataType, dt2: &DataType) -> bool { + // check nested fields + match (dt1, dt2) { + (DataType::Dictionary(_, v1), DataType::Dictionary(_, v2)) => { + v1.as_ref() == v2.as_ref() + } + (DataType::Dictionary(_, v1), othertype) => v1.as_ref() == othertype, + (othertype, DataType::Dictionary(_, v1)) => v1.as_ref() == othertype, + (DataType::List(f1), DataType::List(f2)) + | (DataType::LargeList(f1), DataType::LargeList(f2)) + | (DataType::FixedSizeList(f1, _), DataType::FixedSizeList(f2, _)) + | (DataType::Map(f1, _), DataType::Map(f2, _)) => { + Self::field_is_logically_equal(f1, f2) + } + (DataType::Struct(fields1), DataType::Struct(fields2)) => { + let iter1 = fields1.iter(); + let iter2 = fields2.iter(); + fields1.len() == fields2.len() && + // all fields have to be the same + iter1 + .zip(iter2) + .all(|(f1, f2)| Self::field_is_logically_equal(f1, f2)) + } + (DataType::Union(fields1, _), DataType::Union(fields2, _)) => { + let iter1 = fields1.iter(); + let iter2 = fields2.iter(); + fields1.len() == fields2.len() && + // all fields have to be the same + iter1 + .zip(iter2) + .all(|((t1, f1), (t2, f2))| t1 == t2 && Self::field_is_logically_equal(f1, f2)) + } + _ => dt1 == dt2, + } + } + /// Returns true of two [`DataType`]s are semantically equal (same /// name and type), ignoring both metadata and nullability. /// @@ -456,6 +518,11 @@ impl DFSchema { } } + fn field_is_logically_equal(f1: &Field, f2: &Field) -> bool { + f1.name() == f2.name() + && Self::datatype_is_logically_equal(f1.data_type(), f2.data_type()) + } + fn field_is_semantically_equal(f1: &Field, f2: &Field) -> bool { f1.name() == f2.name() && Self::datatype_is_semantically_equal(f1.data_type(), f2.data_type()) @@ -786,6 +853,13 @@ pub trait SchemaExt { /// /// It works the same as [`DFSchema::equivalent_names_and_types`]. fn equivalent_names_and_types(&self, other: &Self) -> bool; + + /// Returns true if the two schemas have the same qualified named + /// fields with logically equivalent data types. Returns false otherwise. + /// + /// Use [DFSchema]::equivalent_names_and_types for stricter semantic type + /// equivalence checking. + fn logically_equivalent_names_and_types(&self, other: &Self) -> bool; } impl SchemaExt for Schema { @@ -805,6 +879,23 @@ impl SchemaExt for Schema { ) }) } + + fn logically_equivalent_names_and_types(&self, other: &Self) -> bool { + if self.fields().len() != other.fields().len() { + return false; + } + + self.fields() + .iter() + .zip(other.fields().iter()) + .all(|(f1, f2)| { + f1.name() == f2.name() + && DFSchema::datatype_is_logically_equal( + f1.data_type(), + f2.data_type(), + ) + }) + } } #[cfg(test)] diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs index adf58e282ed9..9114c669ab8b 100644 --- a/datafusion/common/src/error.rs +++ b/datafusion/common/src/error.rs @@ -369,7 +369,7 @@ impl From for io::Error { } impl DataFusionError { - const BACK_TRACE_SEP: &str = "\n\nbacktrace: "; + const BACK_TRACE_SEP: &'static str = "\n\nbacktrace: "; /// Get deepest underlying [`DataFusionError`] /// diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index be24e2b933b5..0d701eaad283 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -30,7 +30,7 @@ use crate::cast::{ }; use crate::error::{DataFusionError, Result, _internal_err, _not_impl_err}; use crate::hash_utils::create_hashes; -use crate::utils::wrap_into_list_array; +use crate::utils::array_into_list_array; use arrow::buffer::{NullBuffer, OffsetBuffer}; use arrow::compute::kernels::numeric::*; use arrow::datatypes::{i256, FieldRef, Fields, SchemaBuilder}; @@ -600,117 +600,6 @@ macro_rules! typed_cast { }}; } -macro_rules! build_timestamp_list { - ($TIME_UNIT:expr, $TIME_ZONE:expr, $VALUES:expr, $SIZE:expr) => {{ - match $VALUES { - // the return on the macro is necessary, to short-circuit and return ArrayRef - None => { - return new_null_array( - &DataType::List(Arc::new(Field::new( - "item", - DataType::Timestamp($TIME_UNIT, $TIME_ZONE), - true, - ))), - $SIZE, - ) - } - Some(values) => match $TIME_UNIT { - TimeUnit::Second => { - build_values_list_tz!( - TimestampSecondBuilder, - TimestampSecond, - values, - $SIZE, - $TIME_ZONE - ) - } - TimeUnit::Millisecond => build_values_list_tz!( - TimestampMillisecondBuilder, - TimestampMillisecond, - values, - $SIZE, - $TIME_ZONE - ), - TimeUnit::Microsecond => build_values_list_tz!( - TimestampMicrosecondBuilder, - TimestampMicrosecond, - values, - $SIZE, - $TIME_ZONE - ), - TimeUnit::Nanosecond => build_values_list_tz!( - TimestampNanosecondBuilder, - TimestampNanosecond, - values, - $SIZE, - $TIME_ZONE - ), - }, - } - }}; -} - -macro_rules! new_builder { - (StringBuilder, $len:expr) => { - StringBuilder::new() - }; - (LargeStringBuilder, $len:expr) => { - LargeStringBuilder::new() - }; - ($el:ident, $len:expr) => {{ - <$el>::with_capacity($len) - }}; -} - -macro_rules! build_values_list { - ($VALUE_BUILDER_TY:ident, $SCALAR_TY:ident, $VALUES:expr, $SIZE:expr) => {{ - let builder = new_builder!($VALUE_BUILDER_TY, $VALUES.len()); - let mut builder = ListBuilder::new(builder); - - for _ in 0..$SIZE { - for scalar_value in $VALUES { - match scalar_value { - ScalarValue::$SCALAR_TY(Some(v)) => { - builder.values().append_value(v.clone()); - } - ScalarValue::$SCALAR_TY(None) => { - builder.values().append_null(); - } - _ => panic!("Incompatible ScalarValue for list"), - }; - } - builder.append(true); - } - - builder.finish() - }}; -} - -macro_rules! build_values_list_tz { - ($VALUE_BUILDER_TY:ident, $SCALAR_TY:ident, $VALUES:expr, $SIZE:expr, $TIME_ZONE:expr) => {{ - let mut builder = ListBuilder::new( - $VALUE_BUILDER_TY::with_capacity($VALUES.len()).with_timezone_opt($TIME_ZONE), - ); - - for _ in 0..$SIZE { - for scalar_value in $VALUES { - match scalar_value { - ScalarValue::$SCALAR_TY(Some(v), _) => { - builder.values().append_value(v.clone()); - } - ScalarValue::$SCALAR_TY(None, _) => { - builder.values().append_null(); - } - _ => panic!("Incompatible ScalarValue for list"), - }; - } - builder.append(true); - } - - builder.finish() - }}; -} - macro_rules! build_array_from_option { ($DATA_TYPE:ident, $ARRAY_TYPE:ident, $EXPR:expr, $SIZE:expr) => {{ match $EXPR { @@ -1198,7 +1087,8 @@ impl ScalarValue { } /// Converts an iterator of references [`ScalarValue`] into an [`ArrayRef`] - /// corresponding to those values. For example, + /// corresponding to those values. For example, an iterator of + /// [`ScalarValue::Int32`] would be converted to an [`Int32Array`]. /// /// Returns an error if the iterator is empty or if the /// [`ScalarValue`]s are not all the same type @@ -1312,10 +1202,11 @@ impl ScalarValue { Arc::new(ListArray::from_iter_primitive::<$ARRAY_TY, _, _>( scalars.into_iter().map(|x| match x { ScalarValue::List(arr) => { - if arr.as_any().downcast_ref::().is_some() { + // `ScalarValue::List` contains a single element `ListArray`. + let list_arr = as_list_array(&arr); + if list_arr.is_null(0) { None } else { - let list_arr = as_list_array(&arr); let primitive_arr = list_arr.values().as_primitive::<$ARRAY_TY>(); Some( @@ -1339,12 +1230,14 @@ impl ScalarValue { for scalar in scalars.into_iter() { match scalar { ScalarValue::List(arr) => { - if arr.as_any().downcast_ref::().is_some() { + // `ScalarValue::List` contains a single element `ListArray`. + let list_arr = as_list_array(&arr); + + if list_arr.is_null(0) { builder.append(false); continue; } - let list_arr = as_list_array(&arr); let string_arr = $STRING_ARRAY(list_arr.values()); for v in string_arr.iter() { @@ -1654,41 +1547,6 @@ impl ScalarValue { Ok(array) } - /// This function does not contains nulls but empty array instead. - fn iter_to_array_list_without_nulls( - values: &[ScalarValue], - data_type: &DataType, - ) -> Result> { - let mut elements: Vec = vec![]; - let mut offsets = vec![]; - - if values.is_empty() { - offsets.push(0); - } else { - let arr = ScalarValue::iter_to_array(values.to_vec())?; - offsets.push(arr.len()); - elements.push(arr); - } - - // Concatenate element arrays to create single flat array - let flat_array = if elements.is_empty() { - new_empty_array(data_type) - } else { - let element_arrays: Vec<&dyn Array> = - elements.iter().map(|a| a.as_ref()).collect(); - arrow::compute::concat(&element_arrays)? - }; - - let list_array = ListArray::new( - Arc::new(Field::new("item", flat_array.data_type().to_owned(), true)), - OffsetBuffer::::from_lengths(offsets), - flat_array, - None, - ); - - Ok(list_array) - } - /// This function build with nulls with nulls buffer. fn iter_to_array_list( scalars: impl IntoIterator, @@ -1699,15 +1557,16 @@ impl ScalarValue { for scalar in scalars { if let ScalarValue::List(arr) = scalar { - // i.e. NullArray(1) - if arr.as_any().downcast_ref::().is_some() { + // `ScalarValue::List` contains a single element `ListArray`. + let list_arr = as_list_array(&arr); + + if list_arr.is_null(0) { // Repeat previous offset index offsets.push(0); // Element is null valid.append(false); } else { - let list_arr = as_list_array(&arr); let arr = list_arr.values().to_owned(); offsets.push(arr.len()); elements.push(arr); @@ -1776,7 +1635,8 @@ impl ScalarValue { .unwrap() } - /// Converts `Vec` to ListArray, simplified version of ScalarValue::to_array + /// Converts `Vec` where each element has type corresponding to + /// `data_type`, to a [`ListArray`]. /// /// Example /// ``` @@ -1802,52 +1662,12 @@ impl ScalarValue { /// assert_eq!(result, &expected); /// ``` pub fn new_list(values: &[ScalarValue], data_type: &DataType) -> ArrayRef { - Arc::new(match data_type { - DataType::Boolean => build_values_list!(BooleanBuilder, Boolean, values, 1), - DataType::Int8 => build_values_list!(Int8Builder, Int8, values, 1), - DataType::Int16 => build_values_list!(Int16Builder, Int16, values, 1), - DataType::Int32 => build_values_list!(Int32Builder, Int32, values, 1), - DataType::Int64 => build_values_list!(Int64Builder, Int64, values, 1), - DataType::UInt8 => build_values_list!(UInt8Builder, UInt8, values, 1), - DataType::UInt16 => build_values_list!(UInt16Builder, UInt16, values, 1), - DataType::UInt32 => build_values_list!(UInt32Builder, UInt32, values, 1), - DataType::UInt64 => build_values_list!(UInt64Builder, UInt64, values, 1), - DataType::Utf8 => build_values_list!(StringBuilder, Utf8, values, 1), - DataType::LargeUtf8 => { - build_values_list!(LargeStringBuilder, LargeUtf8, values, 1) - } - DataType::Float32 => build_values_list!(Float32Builder, Float32, values, 1), - DataType::Float64 => build_values_list!(Float64Builder, Float64, values, 1), - DataType::Timestamp(unit, tz) => { - let values = Some(values); - build_timestamp_list!(unit.clone(), tz.clone(), values, 1) - } - DataType::List(_) | DataType::Struct(_) => { - ScalarValue::iter_to_array_list_without_nulls(values, data_type).unwrap() - } - DataType::Decimal128(precision, scale) => { - let mut vals = vec![]; - for value in values.iter() { - if let ScalarValue::Decimal128(v, _, _) = value { - vals.push(v.to_owned()) - } - } - - let arr = Decimal128Array::from(vals) - .with_precision_and_scale(*precision, *scale) - .unwrap(); - wrap_into_list_array(Arc::new(arr)) - } - - DataType::Null => { - let arr = new_null_array(&DataType::Null, values.len()); - wrap_into_list_array(arr) - } - _ => panic!( - "Unsupported data type {:?} for ScalarValue::list_to_array", - data_type - ), - }) + let values = if values.is_empty() { + new_empty_array(data_type) + } else { + Self::iter_to_array(values.iter().cloned()).unwrap() + }; + Arc::new(array_into_list_array(values)) } /// Converts a scalar value into an array of `size` rows. @@ -2234,28 +2054,20 @@ impl ScalarValue { } DataType::Utf8 => typed_cast!(array, index, StringArray, Utf8), DataType::LargeUtf8 => typed_cast!(array, index, LargeStringArray, LargeUtf8), - DataType::List(nested_type) => { + DataType::List(_) => { let list_array = as_list_array(array); - let arr = match list_array.is_null(index) { - true => new_null_array(nested_type.data_type(), 0), - false => { - let nested_array = list_array.value(index); - Arc::new(wrap_into_list_array(nested_array)) - } - }; + let nested_array = list_array.value(index); + // Produces a single element `ListArray` with the value at `index`. + let arr = Arc::new(array_into_list_array(nested_array)); ScalarValue::List(arr) } // TODO: There is no test for FixedSizeList now, add it later - DataType::FixedSizeList(nested_type, _len) => { + DataType::FixedSizeList(_, _) => { let list_array = as_fixed_size_list_array(array)?; - let arr = match list_array.is_null(index) { - true => new_null_array(nested_type.data_type(), 0), - false => { - let nested_array = list_array.value(index); - Arc::new(wrap_into_list_array(nested_array)) - } - }; + let nested_array = list_array.value(index); + // Produces a single element `ListArray` with the value at `index`. + let arr = Arc::new(array_into_list_array(nested_array)); ScalarValue::List(arr) } @@ -2944,8 +2756,15 @@ impl TryFrom<&DataType> for ScalarValue { index_type.clone(), Box::new(value_type.as_ref().try_into()?), ), - DataType::List(_) => ScalarValue::List(new_null_array(&DataType::Null, 0)), - + // `ScalaValue::List` contains single element `ListArray`. + DataType::List(field) => ScalarValue::List(new_null_array( + &DataType::List(Arc::new(Field::new( + "item", + field.data_type().clone(), + true, + ))), + 1, + )), DataType::Struct(fields) => ScalarValue::Struct(None, fields.clone()), DataType::Null => ScalarValue::Null, _ => { @@ -3233,7 +3052,7 @@ mod tests { let array = ScalarValue::new_list(scalars.as_slice(), &DataType::Utf8); - let expected = wrap_into_list_array(Arc::new(StringArray::from(vec![ + let expected = array_into_list_array(Arc::new(StringArray::from(vec![ "rust", "arrow", "data-fusion", @@ -3272,9 +3091,9 @@ mod tests { #[test] fn iter_to_array_string_test() { let arr1 = - wrap_into_list_array(Arc::new(StringArray::from(vec!["foo", "bar", "baz"]))); + array_into_list_array(Arc::new(StringArray::from(vec!["foo", "bar", "baz"]))); let arr2 = - wrap_into_list_array(Arc::new(StringArray::from(vec!["rust", "world"]))); + array_into_list_array(Arc::new(StringArray::from(vec!["rust", "world"]))); let scalars = vec![ ScalarValue::List(Arc::new(arr1)), @@ -3885,6 +3704,78 @@ mod tests { ); } + #[test] + fn scalar_try_from_array_list_array_null() { + let list = ListArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2)]), + None, + ]); + + let non_null_list_scalar = ScalarValue::try_from_array(&list, 0).unwrap(); + let null_list_scalar = ScalarValue::try_from_array(&list, 1).unwrap(); + + let data_type = + DataType::List(Arc::new(Field::new("item", DataType::Int32, true))); + + assert_eq!(non_null_list_scalar.data_type(), data_type.clone()); + assert_eq!(null_list_scalar.data_type(), data_type); + } + + #[test] + fn scalar_try_from_list() { + let data_type = + DataType::List(Arc::new(Field::new("item", DataType::Int32, true))); + let data_type = &data_type; + let scalar: ScalarValue = data_type.try_into().unwrap(); + + let expected = ScalarValue::List(new_null_array( + &DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), + 1, + )); + + assert_eq!(expected, scalar) + } + + #[test] + fn scalar_try_from_list_of_list() { + let data_type = DataType::List(Arc::new(Field::new( + "item", + DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), + true, + ))); + let data_type = &data_type; + let scalar: ScalarValue = data_type.try_into().unwrap(); + + let expected = ScalarValue::List(new_null_array( + &DataType::List(Arc::new(Field::new( + "item", + DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), + true, + ))), + 1, + )); + + assert_eq!(expected, scalar) + } + + #[test] + fn scalar_try_from_not_equal_list_nested_list() { + let list_data_type = + DataType::List(Arc::new(Field::new("item", DataType::Int32, true))); + let data_type = &list_data_type; + let list_scalar: ScalarValue = data_type.try_into().unwrap(); + + let nested_list_data_type = DataType::List(Arc::new(Field::new( + "item", + DataType::List(Arc::new(Field::new("item", DataType::Int32, true))), + true, + ))); + let data_type = &nested_list_data_type; + let nested_list_scalar: ScalarValue = data_type.try_into().unwrap(); + + assert_ne!(list_scalar, nested_list_scalar); + } + #[test] fn scalar_try_from_dict_datatype() { let data_type = @@ -4444,13 +4335,13 @@ mod tests { // Define list-of-structs scalars let nl0_array = ScalarValue::iter_to_array(vec![s0.clone(), s1.clone()]).unwrap(); - let nl0 = ScalarValue::List(Arc::new(wrap_into_list_array(nl0_array))); + let nl0 = ScalarValue::List(Arc::new(array_into_list_array(nl0_array))); let nl1_array = ScalarValue::iter_to_array(vec![s2.clone()]).unwrap(); - let nl1 = ScalarValue::List(Arc::new(wrap_into_list_array(nl1_array))); + let nl1 = ScalarValue::List(Arc::new(array_into_list_array(nl1_array))); let nl2_array = ScalarValue::iter_to_array(vec![s1.clone()]).unwrap(); - let nl2 = ScalarValue::List(Arc::new(wrap_into_list_array(nl2_array))); + let nl2 = ScalarValue::List(Arc::new(array_into_list_array(nl2_array))); // iter_to_array for list-of-struct let array = ScalarValue::iter_to_array(vec![nl0, nl1, nl2]).unwrap(); diff --git a/datafusion/common/src/utils.rs b/datafusion/common/src/utils.rs index b2f71e86f21e..f031f7880436 100644 --- a/datafusion/common/src/utils.rs +++ b/datafusion/common/src/utils.rs @@ -17,6 +17,7 @@ //! This module provides the bisect function, which implements binary search. +use crate::error::_internal_err; use crate::{DataFusionError, Result, ScalarValue}; use arrow::array::{ArrayRef, PrimitiveArray}; use arrow::buffer::OffsetBuffer; @@ -24,7 +25,7 @@ use arrow::compute; use arrow::compute::{partition, SortColumn, SortOptions}; use arrow::datatypes::{Field, SchemaRef, UInt32Type}; use arrow::record_batch::RecordBatch; -use arrow_array::ListArray; +use arrow_array::{Array, ListArray}; use sqlparser::ast::Ident; use sqlparser::dialect::GenericDialect; use sqlparser::parser::Parser; @@ -338,7 +339,7 @@ pub fn longest_consecutive_prefix>( /// Wrap an array into a single element `ListArray`. /// For example `[1, 2, 3]` would be converted into `[[1, 2, 3]]` -pub fn wrap_into_list_array(arr: ArrayRef) -> ListArray { +pub fn array_into_list_array(arr: ArrayRef) -> ListArray { let offsets = OffsetBuffer::from_lengths([arr.len()]); ListArray::new( Arc::new(Field::new("item", arr.data_type().to_owned(), true)), @@ -348,6 +349,47 @@ pub fn wrap_into_list_array(arr: ArrayRef) -> ListArray { ) } +/// Wrap arrays into a single element `ListArray`. +/// +/// Example: +/// ``` +/// use arrow::array::{Int32Array, ListArray, ArrayRef}; +/// use arrow::datatypes::{Int32Type, Field}; +/// use std::sync::Arc; +/// +/// let arr1 = Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef; +/// let arr2 = Arc::new(Int32Array::from(vec![4, 5, 6])) as ArrayRef; +/// +/// let list_arr = datafusion_common::utils::arrays_into_list_array([arr1, arr2]).unwrap(); +/// +/// let expected = ListArray::from_iter_primitive::( +/// vec![ +/// Some(vec![Some(1), Some(2), Some(3)]), +/// Some(vec![Some(4), Some(5), Some(6)]), +/// ] +/// ); +/// +/// assert_eq!(list_arr, expected); +pub fn arrays_into_list_array( + arr: impl IntoIterator, +) -> Result { + let arr = arr.into_iter().collect::>(); + if arr.is_empty() { + return _internal_err!("Cannot wrap empty array into list array"); + } + + let lens = arr.iter().map(|x| x.len()).collect::>(); + // Assume data type is consistent + let data_type = arr[0].data_type().to_owned(); + let values = arr.iter().map(|x| x.as_ref()).collect::>(); + Ok(ListArray::new( + Arc::new(Field::new("item", data_type, true)), + OffsetBuffer::from_lengths(lens), + arrow::compute::concat(values.as_slice())?, + None, + )) +} + /// An extension trait for smart pointers. Provides an interface to get a /// raw pointer to the data (with metadata stripped away). /// diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 5f9d28bd620b..4015ba439e67 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -20,9 +20,9 @@ name = "datafusion" description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model" keywords = ["arrow", "query", "sql"] include = ["benches/*.rs", "src/**/*.rs", "Cargo.toml"] +readme = "README.md" version = { workspace = true } edition = { workspace = true } -readme = { workspace = true } homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } @@ -57,62 +57,61 @@ arrow = { workspace = true } arrow-array = { workspace = true } arrow-schema = { workspace = true } async-compression = { version = "0.4.0", features = ["bzip2", "gzip", "xz", "zstd", "futures-io", "tokio"], optional = true } -async-trait = "0.1.73" -bytes = "1.4" +async-trait = { workspace = true } +bytes = { workspace = true } bzip2 = { version = "0.4.3", optional = true } chrono = { workspace = true } -dashmap = "5.4.0" +dashmap = { workspace = true } datafusion-common = { path = "../common", version = "32.0.0", features = ["object_store"], default-features = false } -datafusion-execution = { path = "../execution", version = "32.0.0" } -datafusion-expr = { path = "../expr", version = "32.0.0" } +datafusion-execution = { workspace = true } +datafusion-expr = { workspace = true } datafusion-optimizer = { path = "../optimizer", version = "32.0.0", default-features = false } datafusion-physical-expr = { path = "../physical-expr", version = "32.0.0", default-features = false } -datafusion-physical-plan = { path = "../physical-plan", version = "32.0.0", default-features = false } -datafusion-sql = { path = "../sql", version = "32.0.0" } +datafusion-physical-plan = { workspace = true } +datafusion-sql = { workspace = true } flate2 = { version = "1.0.24", optional = true } -futures = "0.3" +futures = { workspace = true } glob = "0.3.0" half = { version = "2.1", default-features = false } hashbrown = { version = "0.14", features = ["raw"] } -indexmap = "2.0.0" -itertools = "0.11" -log = "^0.4" +indexmap = { workspace = true } +itertools = { workspace = true } +log = { workspace = true } num-traits = { version = "0.2", optional = true } -num_cpus = "1.13.0" -object_store = "0.7.0" -parking_lot = "0.12" +num_cpus = { workspace = true } +object_store = { workspace = true } +parking_lot = { workspace = true } parquet = { workspace = true, optional = true } -percent-encoding = "2.2.0" pin-project-lite = "^0.2.7" -rand = "0.8" +rand = { workspace = true } sqlparser = { workspace = true } -tempfile = "3" +tempfile = { workspace = true } tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] } tokio-util = { version = "0.7.4", features = ["io"] } -url = "2.2" +url = { workspace = true } uuid = { version = "1.0", features = ["v4"] } xz2 = { version = "0.1", optional = true } zstd = { version = "0.13", optional = true, default-features = false } [dev-dependencies] -async-trait = "0.1.53" -bigdecimal = "0.4.1" +async-trait = { workspace = true } +bigdecimal = { workspace = true } criterion = { version = "0.5", features = ["async_tokio"] } csv = "1.1.6" -ctor = "0.2.0" -doc-comment = "0.3" -env_logger = "0.10" -half = "2.2.1" +ctor = { workspace = true } +doc-comment = { workspace = true } +env_logger = { workspace = true } +half = { workspace = true } postgres-protocol = "0.6.4" postgres-types = { version = "0.2.4", features = ["derive", "with-chrono-0_4"] } rand = { version = "0.8", features = ["small_rng"] } rand_distr = "0.4.3" regex = "1.5.4" -rstest = "0.18.0" +rstest = { workspace = true } rust_decimal = { version = "1.27.0", features = ["tokio-pg"] } -serde_json = "1" +serde_json = { workspace = true } test-utils = { path = "../../test-utils" } -thiserror = "1.0.37" +thiserror = { workspace = true } tokio-postgres = "0.7.7" [target.'cfg(not(target_os = "windows"))'.dev-dependencies] nix = { version = "0.27.1", features = ["fs"] } diff --git a/datafusion/core/README.md b/datafusion/core/README.md new file mode 100644 index 000000000000..5a9493d086cd --- /dev/null +++ b/datafusion/core/README.md @@ -0,0 +1,26 @@ + + +# DataFusion Common + +[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. + +This crate contains the main entrypoints and high level DataFusion APIs such as SessionContext, and DataFrame and ListingTable. + +[df]: https://crates.io/crates/datafusion diff --git a/datafusion/core/src/catalog/mod.rs b/datafusion/core/src/catalog/mod.rs index fe5bdc0ec6a9..ce27d57da00d 100644 --- a/datafusion/core/src/catalog/mod.rs +++ b/datafusion/core/src/catalog/mod.rs @@ -93,12 +93,6 @@ impl CatalogList for MemoryCatalogList { } } -impl Default for MemoryCatalogProvider { - fn default() -> Self { - Self::new() - } -} - /// Represents a catalog, comprising a number of named schemas. pub trait CatalogProvider: Sync + Send { /// Returns the catalog provider as [`Any`] @@ -161,6 +155,12 @@ impl MemoryCatalogProvider { } } +impl Default for MemoryCatalogProvider { + fn default() -> Self { + Self::new() + } +} + impl CatalogProvider for MemoryCatalogProvider { fn as_any(&self) -> &dyn Any { self diff --git a/datafusion/core/src/datasource/default_table_source.rs b/datafusion/core/src/datasource/default_table_source.rs index f93faa50a9b9..00a9c123ceee 100644 --- a/datafusion/core/src/datasource/default_table_source.rs +++ b/datafusion/core/src/datasource/default_table_source.rs @@ -26,10 +26,12 @@ use arrow::datatypes::SchemaRef; use datafusion_common::{internal_err, Constraints, DataFusionError}; use datafusion_expr::{Expr, TableProviderFilterPushDown, TableSource}; -/// DataFusion default table source, wrapping TableProvider +/// DataFusion default table source, wrapping TableProvider. /// /// This structure adapts a `TableProvider` (physical plan trait) to the `TableSource` -/// (logical plan trait) +/// (logical plan trait) and is necessary because the logical plan is contained in +/// the `datafusion_expr` crate, and is not aware of table providers, which exist in +/// the core `datafusion` crate. pub struct DefaultTableSource { /// table provider pub table_provider: Arc, diff --git a/datafusion/core/src/datasource/file_format/arrow.rs b/datafusion/core/src/datasource/file_format/arrow.rs index 16ae4411d1bf..a9bd7d0e27bb 100644 --- a/datafusion/core/src/datasource/file_format/arrow.rs +++ b/datafusion/core/src/datasource/file_format/arrow.rs @@ -20,7 +20,7 @@ //! Works with files following the [Arrow IPC format](https://arrow.apache.org/docs/format/Columnar.html#ipc-file-format) use std::any::Any; -use std::io::{Read, Seek}; +use std::borrow::Cow; use std::sync::Arc; use crate::datasource::file_format::FileFormat; @@ -29,13 +29,18 @@ use crate::error::Result; use crate::execution::context::SessionState; use crate::physical_plan::ExecutionPlan; +use arrow::ipc::convert::fb_to_schema; use arrow::ipc::reader::FileReader; -use arrow_schema::{Schema, SchemaRef}; +use arrow::ipc::root_as_message; +use arrow_schema::{ArrowError, Schema, SchemaRef}; +use bytes::Bytes; use datafusion_common::{FileType, Statistics}; use datafusion_physical_expr::PhysicalExpr; use async_trait::async_trait; +use futures::stream::BoxStream; +use futures::StreamExt; use object_store::{GetResultPayload, ObjectMeta, ObjectStore}; /// Arrow `FileFormat` implementation. @@ -59,13 +64,11 @@ impl FileFormat for ArrowFormat { let r = store.as_ref().get(&object.location).await?; let schema = match r.payload { GetResultPayload::File(mut file, _) => { - read_arrow_schema_from_reader(&mut file)? + let reader = FileReader::try_new(&mut file, None)?; + reader.schema() } - GetResultPayload::Stream(_) => { - // TODO: Fetching entire file to get schema is potentially wasteful - let data = r.bytes().await?; - let mut cursor = std::io::Cursor::new(&data); - read_arrow_schema_from_reader(&mut cursor)? + GetResultPayload::Stream(stream) => { + infer_schema_from_file_stream(stream).await? } }; schemas.push(schema.as_ref().clone()); @@ -99,7 +102,179 @@ impl FileFormat for ArrowFormat { } } -fn read_arrow_schema_from_reader(reader: R) -> Result { - let reader = FileReader::try_new(reader, None)?; - Ok(reader.schema()) +const ARROW_MAGIC: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1']; +const CONTINUATION_MARKER: [u8; 4] = [0xff; 4]; + +/// Custom implementation of inferring schema. Should eventually be moved upstream to arrow-rs. +/// See +async fn infer_schema_from_file_stream( + mut stream: BoxStream<'static, object_store::Result>, +) -> Result { + // Expected format: + // - 6 bytes + // - 2 bytes + // - 4 bytes, not present below v0.15.0 + // - 4 bytes + // + // + + // So in first read we need at least all known sized sections, + // which is 6 + 2 + 4 + 4 = 16 bytes. + let bytes = collect_at_least_n_bytes(&mut stream, 16, None).await?; + + // Files should start with these magic bytes + if bytes[0..6] != ARROW_MAGIC { + return Err(ArrowError::ParseError( + "Arrow file does not contian correct header".to_string(), + ))?; + } + + // Since continuation marker bytes added in later versions + let (meta_len, rest_of_bytes_start_index) = if bytes[8..12] == CONTINUATION_MARKER { + (&bytes[12..16], 16) + } else { + (&bytes[8..12], 12) + }; + + let meta_len = [meta_len[0], meta_len[1], meta_len[2], meta_len[3]]; + let meta_len = i32::from_le_bytes(meta_len); + + // Read bytes for Schema message + let block_data = if bytes[rest_of_bytes_start_index..].len() < meta_len as usize { + // Need to read more bytes to decode Message + let mut block_data = Vec::with_capacity(meta_len as usize); + // In case we had some spare bytes in our initial read chunk + block_data.extend_from_slice(&bytes[rest_of_bytes_start_index..]); + let size_to_read = meta_len as usize - block_data.len(); + let block_data = + collect_at_least_n_bytes(&mut stream, size_to_read, Some(block_data)).await?; + Cow::Owned(block_data) + } else { + // Already have the bytes we need + let end_index = meta_len as usize + rest_of_bytes_start_index; + let block_data = &bytes[rest_of_bytes_start_index..end_index]; + Cow::Borrowed(block_data) + }; + + // Decode Schema message + let message = root_as_message(&block_data).map_err(|err| { + ArrowError::ParseError(format!("Unable to read IPC message as metadata: {err:?}")) + })?; + let ipc_schema = message.header_as_schema().ok_or_else(|| { + ArrowError::IpcError("Unable to read IPC message as schema".to_string()) + })?; + let schema = fb_to_schema(ipc_schema); + + Ok(Arc::new(schema)) +} + +async fn collect_at_least_n_bytes( + stream: &mut BoxStream<'static, object_store::Result>, + n: usize, + extend_from: Option>, +) -> Result> { + let mut buf = extend_from.unwrap_or_else(|| Vec::with_capacity(n)); + // If extending existing buffer then ensure we read n additional bytes + let n = n + buf.len(); + while let Some(bytes) = stream.next().await.transpose()? { + buf.extend_from_slice(&bytes); + if buf.len() >= n { + break; + } + } + if buf.len() < n { + return Err(ArrowError::ParseError( + "Unexpected end of byte stream for Arrow IPC file".to_string(), + ))?; + } + Ok(buf) +} + +#[cfg(test)] +mod tests { + use chrono::DateTime; + use object_store::{chunked::ChunkedStore, memory::InMemory, path::Path}; + + use crate::execution::context::SessionContext; + + use super::*; + + #[tokio::test] + async fn test_infer_schema_stream() -> Result<()> { + let mut bytes = std::fs::read("tests/data/example.arrow")?; + bytes.truncate(bytes.len() - 20); // mangle end to show we don't need to read whole file + let location = Path::parse("example.arrow")?; + let in_memory_store: Arc = Arc::new(InMemory::new()); + in_memory_store.put(&location, bytes.into()).await?; + + let session_ctx = SessionContext::new(); + let state = session_ctx.state(); + let object_meta = ObjectMeta { + location, + last_modified: DateTime::default(), + size: usize::MAX, + e_tag: None, + }; + + let arrow_format = ArrowFormat {}; + let expected = vec!["f0: Int64", "f1: Utf8", "f2: Boolean"]; + + // Test chunk sizes where too small so we keep having to read more bytes + // And when large enough that first read contains all we need + for chunk_size in [7, 3000] { + let store = Arc::new(ChunkedStore::new(in_memory_store.clone(), chunk_size)); + let inferred_schema = arrow_format + .infer_schema( + &state, + &(store.clone() as Arc), + &[object_meta.clone()], + ) + .await?; + let actual_fields = inferred_schema + .fields() + .iter() + .map(|f| format!("{}: {:?}", f.name(), f.data_type())) + .collect::>(); + assert_eq!(expected, actual_fields); + } + + Ok(()) + } + + #[tokio::test] + async fn test_infer_schema_short_stream() -> Result<()> { + let mut bytes = std::fs::read("tests/data/example.arrow")?; + bytes.truncate(20); // should cause error that file shorter than expected + let location = Path::parse("example.arrow")?; + let in_memory_store: Arc = Arc::new(InMemory::new()); + in_memory_store.put(&location, bytes.into()).await?; + + let session_ctx = SessionContext::new(); + let state = session_ctx.state(); + let object_meta = ObjectMeta { + location, + last_modified: DateTime::default(), + size: usize::MAX, + e_tag: None, + }; + + let arrow_format = ArrowFormat {}; + + let store = Arc::new(ChunkedStore::new(in_memory_store.clone(), 7)); + let err = arrow_format + .infer_schema( + &state, + &(store.clone() as Arc), + &[object_meta.clone()], + ) + .await; + + assert!(err.is_err()); + assert_eq!( + "Arrow error: Parser error: Unexpected end of byte stream for Arrow IPC file", + err.unwrap_err().to_string() + ); + + Ok(()) + } } diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs index 62867c0e2b38..2cba474e559e 100644 --- a/datafusion/core/src/datasource/file_format/parquet.rs +++ b/datafusion/core/src/datasource/file_format/parquet.rs @@ -36,7 +36,7 @@ use tokio::sync::mpsc::{self, Receiver, Sender}; use tokio::task::{JoinHandle, JoinSet}; use crate::datasource::file_format::file_compression_type::FileCompressionType; -use crate::datasource::statistics::create_max_min_accs; +use crate::datasource::statistics::{create_max_min_accs, get_col_stats}; use arrow::datatypes::SchemaRef; use arrow::datatypes::{Fields, Schema}; use bytes::{BufMut, BytesMut}; @@ -63,11 +63,9 @@ use crate::arrow::array::{ use crate::arrow::datatypes::DataType; use crate::config::ConfigOptions; -use crate::datasource::get_col_stats; use crate::datasource::physical_plan::{ FileGroupDisplay, FileMeta, FileSinkConfig, ParquetExec, SchemaAdapter, }; - use crate::error::Result; use crate::execution::context::SessionState; use crate::physical_plan::expressions::{MaxAccumulator, MinAccumulator}; diff --git a/datafusion/core/src/datasource/file_format/write/demux.rs b/datafusion/core/src/datasource/file_format/write/demux.rs index 67dd1f940676..27c65dd459ec 100644 --- a/datafusion/core/src/datasource/file_format/write/demux.rs +++ b/datafusion/core/src/datasource/file_format/write/demux.rs @@ -29,7 +29,7 @@ use crate::physical_plan::SendableRecordBatchStream; use arrow_array::builder::UInt64Builder; use arrow_array::cast::AsArray; -use arrow_array::{RecordBatch, StructArray}; +use arrow_array::{downcast_dictionary_array, RecordBatch, StringArray, StructArray}; use arrow_schema::{DataType, Schema}; use datafusion_common::cast::as_string_array; use datafusion_common::DataFusionError; @@ -338,6 +338,22 @@ fn compute_partition_keys_by_row<'a>( partition_values.push(array.value(i)); } } + DataType::Dictionary(_, _) => { + downcast_dictionary_array!( + col_array => { + let array = col_array.downcast_dict::() + .ok_or(DataFusionError::Execution(format!("it is not yet supported to write to hive partitions with datatype {}", + dtype)))?; + + for val in array.values() { + partition_values.push( + val.ok_or(DataFusionError::Execution(format!("Cannot partition by null value for column {}", col)))? + ); + } + }, + _ => unreachable!(), + ) + } _ => { return Err(DataFusionError::NotImplemented(format!( "it is not yet supported to write to hive partitions with datatype {}", diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index 822a78a5522a..d26d417bd8b2 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -824,7 +824,10 @@ impl TableProvider for ListingTable { overwrite: bool, ) -> Result> { // Check that the schema of the plan matches the schema of this table. - if !self.schema().equivalent_names_and_types(&input.schema()) { + if !self + .schema() + .logically_equivalent_names_and_types(&input.schema()) + { return plan_err!( // Return an error if schema of the input query does not match with the table schema. "Inserting query must have the same schema with the table." diff --git a/datafusion/core/src/datasource/listing/url.rs b/datafusion/core/src/datasource/listing/url.rs index 4d1ca4853a73..9197e37adbd5 100644 --- a/datafusion/core/src/datasource/listing/url.rs +++ b/datafusion/core/src/datasource/listing/url.rs @@ -27,7 +27,6 @@ use itertools::Itertools; use log::debug; use object_store::path::Path; use object_store::{ObjectMeta, ObjectStore}; -use percent_encoding; use std::sync::Arc; use url::Url; @@ -46,6 +45,16 @@ pub struct ListingTableUrl { impl ListingTableUrl { /// Parse a provided string as a `ListingTableUrl` /// + /// # URL Encoding + /// + /// URL paths are expected to be URL-encoded. That is, the URL for a file named `bar%2Efoo` + /// would be `file:///bar%252Efoo`, as per the [URL] specification. + /// + /// It should be noted that some tools, such as the AWS CLI, take a different approach and + /// instead interpret the URL path verbatim. For example the object `bar%2Efoo` would be + /// addressed as `s3://BUCKET/bar%252Efoo` using [`ListingTableUrl`] but `s3://BUCKET/bar%2Efoo` + /// when using the aws-cli. + /// /// # Paths without a Scheme /// /// If no scheme is provided, or the string is an absolute filesystem path @@ -77,6 +86,7 @@ impl ListingTableUrl { /// filter when listing files from object storage /// /// [file URI]: https://en.wikipedia.org/wiki/File_URI_scheme + /// [URL]: https://url.spec.whatwg.org/ pub fn parse(s: impl AsRef) -> Result { let s = s.as_ref(); @@ -86,7 +96,7 @@ impl ListingTableUrl { } match Url::parse(s) { - Ok(url) => Ok(Self::new(url, None)), + Ok(url) => Self::try_new(url, None), Err(url::ParseError::RelativeUrlWithoutBase) => Self::parse_path(s), Err(e) => Err(DataFusionError::External(Box::new(e))), } @@ -138,15 +148,13 @@ impl ListingTableUrl { .map_err(|_| DataFusionError::Internal(format!("Can not open path: {s}")))?; // TODO: Currently we do not have an IO-related error variant that accepts () // or a string. Once we have such a variant, change the error type above. - Ok(Self::new(url, glob)) + Self::try_new(url, glob) } /// Creates a new [`ListingTableUrl`] from a url and optional glob expression - fn new(url: Url, glob: Option) -> Self { - let decoded_path = - percent_encoding::percent_decode_str(url.path()).decode_utf8_lossy(); - let prefix = Path::from(decoded_path.as_ref()); - Self { url, prefix, glob } + fn try_new(url: Url, glob: Option) -> Result { + let prefix = Path::from_url_path(url.path())?; + Ok(Self { url, prefix, glob }) } /// Returns the URL scheme @@ -286,6 +294,7 @@ fn split_glob_expression(path: &str) -> Option<(&str, &str)> { #[cfg(test)] mod tests { use super::*; + use tempfile::tempdir; #[test] fn test_prefix_path() { @@ -317,8 +326,27 @@ mod tests { let url = ListingTableUrl::parse("file:///foo/bar?").unwrap(); assert_eq!(url.prefix.as_ref(), "foo/bar"); - let url = ListingTableUrl::parse("file:///foo/😺").unwrap(); - assert_eq!(url.prefix.as_ref(), "foo/%F0%9F%98%BA"); + let err = ListingTableUrl::parse("file:///foo/😺").unwrap_err(); + assert_eq!(err.to_string(), "Object Store error: Encountered object with invalid path: Error parsing Path \"/foo/😺\": Encountered illegal character sequence \"😺\" whilst parsing path segment \"😺\""); + + let url = ListingTableUrl::parse("file:///foo/bar%2Efoo").unwrap(); + assert_eq!(url.prefix.as_ref(), "foo/bar.foo"); + + let url = ListingTableUrl::parse("file:///foo/bar%2Efoo").unwrap(); + assert_eq!(url.prefix.as_ref(), "foo/bar.foo"); + + let url = ListingTableUrl::parse("file:///foo/bar%252Ffoo").unwrap(); + assert_eq!(url.prefix.as_ref(), "foo/bar%2Ffoo"); + + let url = ListingTableUrl::parse("file:///foo/a%252Fb.txt").unwrap(); + assert_eq!(url.prefix.as_ref(), "foo/a%2Fb.txt"); + + let dir = tempdir().unwrap(); + let path = dir.path().join("bar%2Ffoo"); + std::fs::File::create(&path).unwrap(); + + let url = ListingTableUrl::parse(path.to_str().unwrap()).unwrap(); + assert!(url.prefix.as_ref().ends_with("bar%2Ffoo"), "{}", url.prefix); } #[test] diff --git a/datafusion/core/src/datasource/memory.rs b/datafusion/core/src/datasource/memory.rs index a2f8e225e121..6bcaa97a408f 100644 --- a/datafusion/core/src/datasource/memory.rs +++ b/datafusion/core/src/datasource/memory.rs @@ -209,7 +209,10 @@ impl TableProvider for MemTable { ) -> Result> { // Create a physical plan from the logical plan. // Check that the schema of the plan matches the schema of this table. - if !self.schema().equivalent_names_and_types(&input.schema()) { + if !self + .schema() + .logically_equivalent_names_and_types(&input.schema()) + { return plan_err!( "Inserting query must have the same schema with the table." ); diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs index 3ace2c239852..48e9d6992124 100644 --- a/datafusion/core/src/datasource/mod.rs +++ b/datafusion/core/src/datasource/mod.rs @@ -42,6 +42,4 @@ pub use self::memory::MemTable; pub use self::provider::TableProvider; pub use self::view::ViewTable; pub use crate::logical_expr::TableType; -#[cfg(feature = "parquet")] -pub(crate) use statistics::get_col_stats; pub use statistics::get_statistics_with_limit; diff --git a/datafusion/core/src/datasource/physical_plan/arrow_file.rs b/datafusion/core/src/datasource/physical_plan/arrow_file.rs index e00e8aea0a04..30b55db28491 100644 --- a/datafusion/core/src/datasource/physical_plan/arrow_file.rs +++ b/datafusion/core/src/datasource/physical_plan/arrow_file.rs @@ -32,10 +32,7 @@ use crate::physical_plan::{ use arrow_schema::SchemaRef; use datafusion_common::Statistics; use datafusion_execution::TaskContext; -use datafusion_physical_expr::{ - ordering_equivalence_properties_helper, LexOrdering, OrderingEquivalenceProperties, - PhysicalSortExpr, -}; +use datafusion_physical_expr::{EquivalenceProperties, LexOrdering, PhysicalSortExpr}; use futures::StreamExt; use object_store::{GetResultPayload, ObjectStore}; @@ -106,8 +103,8 @@ impl ExecutionPlan for ArrowExec { .map(|ordering| ordering.as_slice()) } - fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties { - ordering_equivalence_properties_helper( + fn equivalence_properties(&self) -> EquivalenceProperties { + EquivalenceProperties::new_with_orderings( self.schema(), &self.projected_output_ordering, ) diff --git a/datafusion/core/src/datasource/physical_plan/avro.rs b/datafusion/core/src/datasource/physical_plan/avro.rs index 237772eb8360..b97f162fd2f5 100644 --- a/datafusion/core/src/datasource/physical_plan/avro.rs +++ b/datafusion/core/src/datasource/physical_plan/avro.rs @@ -31,9 +31,7 @@ use crate::physical_plan::{ use arrow::datatypes::SchemaRef; use datafusion_execution::TaskContext; -use datafusion_physical_expr::{ - ordering_equivalence_properties_helper, LexOrdering, OrderingEquivalenceProperties, -}; +use datafusion_physical_expr::{EquivalenceProperties, LexOrdering}; /// Execution plan for scanning Avro data source #[derive(Debug, Clone)] @@ -101,8 +99,8 @@ impl ExecutionPlan for AvroExec { .map(|ordering| ordering.as_slice()) } - fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties { - ordering_equivalence_properties_helper( + fn equivalence_properties(&self) -> EquivalenceProperties { + EquivalenceProperties::new_with_orderings( self.schema(), &self.projected_output_ordering, ) diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs index 8117e101ea99..75aa343ffbfc 100644 --- a/datafusion/core/src/datasource/physical_plan/csv.rs +++ b/datafusion/core/src/datasource/physical_plan/csv.rs @@ -41,11 +41,10 @@ use crate::physical_plan::{ use arrow::csv; use arrow::datatypes::SchemaRef; use datafusion_execution::TaskContext; -use datafusion_physical_expr::{ - ordering_equivalence_properties_helper, LexOrdering, OrderingEquivalenceProperties, -}; +use datafusion_physical_expr::{EquivalenceProperties, LexOrdering}; use bytes::{Buf, Bytes}; +use datafusion_common::config::ConfigOptions; use futures::{ready, StreamExt, TryStreamExt}; use object_store::{GetOptions, GetResultPayload, ObjectStore}; use tokio::io::AsyncWriteExt; @@ -117,34 +116,6 @@ impl CsvExec { pub fn escape(&self) -> Option { self.escape } - - /// Redistribute files across partitions according to their size - /// See comments on `repartition_file_groups()` for more detail. - /// - /// Return `None` if can't get repartitioned(empty/compressed file). - pub fn get_repartitioned( - &self, - target_partitions: usize, - repartition_file_min_size: usize, - ) -> Option { - // Parallel execution on compressed CSV file is not supported yet. - if self.file_compression_type.is_compressed() { - return None; - } - - let repartitioned_file_groups_option = FileScanConfig::repartition_file_groups( - self.base_config.file_groups.clone(), - target_partitions, - repartition_file_min_size, - ); - - if let Some(repartitioned_file_groups) = repartitioned_file_groups_option { - let mut new_plan = self.clone(); - new_plan.base_config.file_groups = repartitioned_file_groups; - return Some(new_plan); - } - None - } } impl DisplayAs for CsvExec { @@ -186,8 +157,8 @@ impl ExecutionPlan for CsvExec { .map(|ordering| ordering.as_slice()) } - fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties { - ordering_equivalence_properties_helper( + fn equivalence_properties(&self) -> EquivalenceProperties { + EquivalenceProperties::new_with_orderings( self.schema(), &self.projected_output_ordering, ) @@ -205,6 +176,35 @@ impl ExecutionPlan for CsvExec { Ok(self) } + /// Redistribute files across partitions according to their size + /// See comments on `repartition_file_groups()` for more detail. + /// + /// Return `None` if can't get repartitioned(empty/compressed file). + fn repartitioned( + &self, + target_partitions: usize, + config: &ConfigOptions, + ) -> Result>> { + let repartition_file_min_size = config.optimizer.repartition_file_min_size; + // Parallel execution on compressed CSV file is not supported yet. + if self.file_compression_type.is_compressed() { + return Ok(None); + } + + let repartitioned_file_groups_option = FileScanConfig::repartition_file_groups( + self.base_config.file_groups.clone(), + target_partitions, + repartition_file_min_size, + ); + + if let Some(repartitioned_file_groups) = repartitioned_file_groups_option { + let mut new_plan = self.clone(); + new_plan.base_config.file_groups = repartitioned_file_groups; + return Ok(Some(Arc::new(new_plan))); + } + Ok(None) + } + fn execute( &self, partition: usize, diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs index 1ba8e47a523c..73dcb32ac81f 100644 --- a/datafusion/core/src/datasource/physical_plan/json.rs +++ b/datafusion/core/src/datasource/physical_plan/json.rs @@ -40,9 +40,7 @@ use crate::physical_plan::{ use arrow::json::ReaderBuilder; use arrow::{datatypes::SchemaRef, json}; use datafusion_execution::TaskContext; -use datafusion_physical_expr::{ - ordering_equivalence_properties_helper, LexOrdering, OrderingEquivalenceProperties, -}; +use datafusion_physical_expr::{EquivalenceProperties, LexOrdering}; use bytes::{Buf, Bytes}; use futures::{ready, stream, StreamExt, TryStreamExt}; @@ -122,8 +120,8 @@ impl ExecutionPlan for NdJsonExec { .map(|ordering| ordering.as_slice()) } - fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties { - ordering_equivalence_properties_helper( + fn equivalence_properties(&self) -> EquivalenceProperties { + EquivalenceProperties::new_with_orderings( self.schema(), &self.projected_output_ordering, ) diff --git a/datafusion/core/src/datasource/physical_plan/mod.rs b/datafusion/core/src/datasource/physical_plan/mod.rs index 3f84f87eb5d5..ea0a9698ff5c 100644 --- a/datafusion/core/src/datasource/physical_plan/mod.rs +++ b/datafusion/core/src/datasource/physical_plan/mod.rs @@ -30,8 +30,6 @@ pub(crate) use self::csv::plan_to_csv; pub use self::csv::{CsvConfig, CsvExec, CsvOpener}; pub(crate) use self::json::plan_to_json; #[cfg(feature = "parquet")] -pub(crate) use self::parquet::plan_to_parquet; -#[cfg(feature = "parquet")] pub use self::parquet::{ParquetExec, ParquetFileMetrics, ParquetFileReaderFactory}; pub use arrow_file::ArrowExec; @@ -529,6 +527,7 @@ mod tests { }; use arrow_schema::Field; use chrono::Utc; + use datafusion_common::config::ConfigOptions; use crate::physical_plan::{DefaultDisplay, VerboseDisplay}; @@ -830,11 +829,7 @@ mod tests { None, ); - let partitioned_file = parquet_exec - .get_repartitioned(4, 0) - .base_config() - .file_groups - .clone(); + let partitioned_file = repartition_with_size(&parquet_exec, 4, 0); assert!(partitioned_file[0][0].range.is_none()); } @@ -895,13 +890,8 @@ mod tests { None, ); - let actual = file_groups_to_vec( - parquet_exec - .get_repartitioned(n_partition, 10) - .base_config() - .file_groups - .clone(), - ); + let actual = + repartition_with_size_to_vec(&parquet_exec, n_partition, 10); assert_eq!(expected, &actual); } @@ -929,13 +919,7 @@ mod tests { None, ); - let actual = file_groups_to_vec( - parquet_exec - .get_repartitioned(4, 10) - .base_config() - .file_groups - .clone(), - ); + let actual = repartition_with_size_to_vec(&parquet_exec, 4, 10); let expected = vec![ (0, "a".to_string(), 0, 31), (1, "a".to_string(), 31, 62), @@ -966,13 +950,7 @@ mod tests { None, ); - let actual = file_groups_to_vec( - parquet_exec - .get_repartitioned(96, 5) - .base_config() - .file_groups - .clone(), - ); + let actual = repartition_with_size_to_vec(&parquet_exec, 96, 5); let expected = vec![ (0, "a".to_string(), 0, 1), (1, "a".to_string(), 1, 2), @@ -1009,13 +987,7 @@ mod tests { None, ); - let actual = file_groups_to_vec( - parquet_exec - .get_repartitioned(3, 10) - .base_config() - .file_groups - .clone(), - ); + let actual = repartition_with_size_to_vec(&parquet_exec, 3, 10); let expected = vec![ (0, "a".to_string(), 0, 34), (1, "a".to_string(), 34, 40), @@ -1048,13 +1020,7 @@ mod tests { None, ); - let actual = file_groups_to_vec( - parquet_exec - .get_repartitioned(2, 10) - .base_config() - .file_groups - .clone(), - ); + let actual = repartition_with_size_to_vec(&parquet_exec, 2, 10); let expected = vec![ (0, "a".to_string(), 0, 40), (0, "b".to_string(), 0, 10), @@ -1088,11 +1054,7 @@ mod tests { None, ); - let actual = parquet_exec - .get_repartitioned(65, 10) - .base_config() - .file_groups - .clone(); + let actual = repartition_with_size(&parquet_exec, 65, 10); assert_eq!(2, actual.len()); } @@ -1117,17 +1079,47 @@ mod tests { None, ); - let actual = parquet_exec - .get_repartitioned(65, 500) + let actual = repartition_with_size(&parquet_exec, 65, 500); + assert_eq!(1, actual.len()); + } + + /// Calls `ParquetExec.repartitioned` with the specified + /// `target_partitions` and `repartition_file_min_size`, returning the + /// resulting `PartitionedFile`s + fn repartition_with_size( + parquet_exec: &ParquetExec, + target_partitions: usize, + repartition_file_min_size: usize, + ) -> Vec> { + let mut config = ConfigOptions::new(); + config.optimizer.repartition_file_min_size = repartition_file_min_size; + + parquet_exec + .repartitioned(target_partitions, &config) + .unwrap() // unwrap Result + .unwrap() // unwrap Option + .as_any() + .downcast_ref::() + .unwrap() .base_config() .file_groups - .clone(); - assert_eq!(1, actual.len()); + .clone() } - fn file_groups_to_vec( - file_groups: Vec>, + /// Calls `repartition_with_size` and returns a tuple for each output `PartitionedFile`: + /// + /// `(partition index, file path, start, end)` + fn repartition_with_size_to_vec( + parquet_exec: &ParquetExec, + target_partitions: usize, + repartition_file_min_size: usize, ) -> Vec<(usize, String, i64, i64)> { + let file_groups = repartition_with_size( + parquet_exec, + target_partitions, + repartition_file_min_size, + ); + file_groups .iter() .enumerate() diff --git a/datafusion/core/src/datasource/physical_plan/parquet.rs b/datafusion/core/src/datasource/physical_plan/parquet.rs index 3a2459bec817..960b2ec7337d 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet.rs @@ -45,8 +45,7 @@ use crate::{ use arrow::datatypes::{DataType, SchemaRef}; use arrow::error::ArrowError; use datafusion_physical_expr::{ - ordering_equivalence_properties_helper, LexOrdering, OrderingEquivalenceProperties, - PhysicalExpr, PhysicalSortExpr, + EquivalenceProperties, LexOrdering, PhysicalExpr, PhysicalSortExpr, }; use bytes::Bytes; @@ -259,26 +258,6 @@ impl ParquetExec { self.enable_bloom_filter .unwrap_or(config_options.execution.parquet.bloom_filter_enabled) } - - /// Redistribute files across partitions according to their size - /// See comments on `get_file_groups_repartitioned()` for more detail. - pub fn get_repartitioned( - &self, - target_partitions: usize, - repartition_file_min_size: usize, - ) -> Self { - let repartitioned_file_groups_option = FileScanConfig::repartition_file_groups( - self.base_config.file_groups.clone(), - target_partitions, - repartition_file_min_size, - ); - - let mut new_plan = self.clone(); - if let Some(repartitioned_file_groups) = repartitioned_file_groups_option { - new_plan.base_config.file_groups = repartitioned_file_groups; - } - new_plan - } } impl DisplayAs for ParquetExec { @@ -335,8 +314,8 @@ impl ExecutionPlan for ParquetExec { .map(|ordering| ordering.as_slice()) } - fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties { - ordering_equivalence_properties_helper( + fn equivalence_properties(&self) -> EquivalenceProperties { + EquivalenceProperties::new_with_orderings( self.schema(), &self.projected_output_ordering, ) @@ -349,6 +328,27 @@ impl ExecutionPlan for ParquetExec { Ok(self) } + /// Redistribute files across partitions according to their size + /// See comments on `get_file_groups_repartitioned()` for more detail. + fn repartitioned( + &self, + target_partitions: usize, + config: &ConfigOptions, + ) -> Result>> { + let repartition_file_min_size = config.optimizer.repartition_file_min_size; + let repartitioned_file_groups_option = FileScanConfig::repartition_file_groups( + self.base_config.file_groups.clone(), + target_partitions, + repartition_file_min_size, + ); + + let mut new_plan = self.clone(); + if let Some(repartitioned_file_groups) = repartitioned_file_groups_option { + new_plan.base_config.file_groups = repartitioned_file_groups; + } + Ok(Some(Arc::new(new_plan))) + } + fn execute( &self, partition_index: usize, diff --git a/datafusion/core/src/execution/context/parquet.rs b/datafusion/core/src/execution/context/parquet.rs index b02576c6a868..dc202b9903f5 100644 --- a/datafusion/core/src/execution/context/parquet.rs +++ b/datafusion/core/src/execution/context/parquet.rs @@ -17,10 +17,9 @@ use std::sync::Arc; +use crate::datasource::physical_plan::parquet::plan_to_parquet; use parquet::file::properties::WriterProperties; -use crate::datasource::physical_plan::plan_to_parquet; - use super::super::options::{ParquetReadOptions, ReadOptions}; use super::{DataFilePaths, DataFrame, ExecutionPlan, Result, SessionContext}; diff --git a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs index 838ae613683e..2c4e929788df 100644 --- a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs +++ b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs @@ -93,7 +93,7 @@ impl PhysicalOptimizerRule for CombinePartialFinalAggregate { input_agg_exec.filter_expr().to_vec(), input_agg_exec.order_by_expr().to_vec(), input_agg_exec.input().clone(), - input_agg_exec.input_schema().clone(), + input_agg_exec.input_schema(), ) .ok() .map(Arc::new) diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs index fcf4a1985fd6..cc775bd7946b 100644 --- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs +++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs @@ -26,9 +26,6 @@ use std::fmt::Formatter; use std::sync::Arc; use crate::config::ConfigOptions; -use crate::datasource::physical_plan::CsvExec; -#[cfg(feature = "parquet")] -use crate::datasource::physical_plan::ParquetExec; use crate::error::Result; use crate::physical_optimizer::utils::{ add_sort_above, get_children_exectrees, get_plan_string, is_coalesce_partitions, @@ -52,12 +49,11 @@ use crate::physical_plan::{ use arrow::compute::SortOptions; use datafusion_common::tree_node::{Transformed, TreeNode, VisitRecursion}; use datafusion_expr::logical_plan::JoinType; -use datafusion_physical_expr::equivalence::EquivalenceProperties; use datafusion_physical_expr::expressions::{Column, NoOp}; -use datafusion_physical_expr::utils::{ - map_columns_before_projection, ordering_satisfy_requirement_concrete, +use datafusion_physical_expr::utils::map_columns_before_projection; +use datafusion_physical_expr::{ + physical_exprs_equal, EquivalenceProperties, PhysicalExpr, }; -use datafusion_physical_expr::{expr_list_eq_strict_order, PhysicalExpr}; use datafusion_physical_plan::unbounded_output; use datafusion_physical_plan::windows::{get_best_fitting_window, BoundedWindowAggExec}; @@ -501,7 +497,7 @@ fn reorder_aggregate_keys( if parent_required.len() != output_exprs.len() || !agg_exec.group_by().null_expr().is_empty() - || expr_list_eq_strict_order(&output_exprs, parent_required) + || physical_exprs_equal(&output_exprs, parent_required) { Ok(PlanWithKeyRequirements::new(agg_plan)) } else { @@ -554,7 +550,7 @@ fn reorder_aggregate_keys( agg_exec.filter_expr().to_vec(), agg_exec.order_by_expr().to_vec(), partial_agg, - agg_exec.input_schema().clone(), + agg_exec.input_schema(), )?); // Need to create a new projection to change the expr ordering back @@ -567,13 +563,11 @@ fn reorder_aggregate_keys( Arc::new(Column::new( name, agg_schema.index_of(name).unwrap(), - )) - as Arc, + )) as _, name.to_owned(), ) }) .collect::>(); - let agg_schema = new_final_agg.schema(); let agg_fields = agg_schema.fields(); for (idx, field) in agg_fields.iter().enumerate().skip(output_columns.len()) @@ -709,10 +703,9 @@ pub(crate) fn reorder_join_keys_to_inputs( ) { if !new_positions.is_empty() { let new_join_on = new_join_conditions(&left_keys, &right_keys); - let mut new_sort_options = vec![]; - for idx in 0..sort_options.len() { - new_sort_options.push(sort_options[new_positions[idx]]) - } + let new_sort_options = (0..sort_options.len()) + .map(|idx| sort_options[new_positions[idx]]) + .collect(); return Ok(Arc::new(SortMergeJoinExec::try_new( left.clone(), right.clone(), @@ -760,39 +753,40 @@ fn try_reorder( expected: &[Arc], equivalence_properties: &EquivalenceProperties, ) -> Option<(JoinKeyPairs, Vec)> { + let eq_groups = equivalence_properties.eq_group(); let mut normalized_expected = vec![]; let mut normalized_left_keys = vec![]; let mut normalized_right_keys = vec![]; if join_keys.left_keys.len() != expected.len() { return None; } - if expr_list_eq_strict_order(expected, &join_keys.left_keys) - || expr_list_eq_strict_order(expected, &join_keys.right_keys) + if physical_exprs_equal(expected, &join_keys.left_keys) + || physical_exprs_equal(expected, &join_keys.right_keys) { return Some((join_keys, vec![])); - } else if !equivalence_properties.classes().is_empty() { + } else if !equivalence_properties.eq_group().is_empty() { normalized_expected = expected .iter() - .map(|e| equivalence_properties.normalize_expr(e.clone())) + .map(|e| eq_groups.normalize_expr(e.clone())) .collect::>(); assert_eq!(normalized_expected.len(), expected.len()); normalized_left_keys = join_keys .left_keys .iter() - .map(|e| equivalence_properties.normalize_expr(e.clone())) + .map(|e| eq_groups.normalize_expr(e.clone())) .collect::>(); assert_eq!(join_keys.left_keys.len(), normalized_left_keys.len()); normalized_right_keys = join_keys .right_keys .iter() - .map(|e| equivalence_properties.normalize_expr(e.clone())) + .map(|e| eq_groups.normalize_expr(e.clone())) .collect::>(); assert_eq!(join_keys.right_keys.len(), normalized_right_keys.len()); - if expr_list_eq_strict_order(&normalized_expected, &normalized_left_keys) - || expr_list_eq_strict_order(&normalized_expected, &normalized_right_keys) + if physical_exprs_equal(&normalized_expected, &normalized_left_keys) + || physical_exprs_equal(&normalized_expected, &normalized_right_keys) { return Some((join_keys, vec![])); } @@ -873,7 +867,7 @@ fn new_join_conditions( r_key.as_any().downcast_ref::().unwrap().clone(), ) }) - .collect::>() + .collect() } /// Updates `dist_onward` such that, to keep track of @@ -938,9 +932,9 @@ fn add_roundrobin_on_top( let should_preserve_ordering = input.output_ordering().is_some(); let partitioning = Partitioning::RoundRobinBatch(n_target); - let repartition = RepartitionExec::try_new(input, partitioning)? - .with_preserve_order(should_preserve_ordering); - let new_plan = Arc::new(repartition) as Arc; + let repartition = RepartitionExec::try_new(input, partitioning)?; + let new_plan = Arc::new(repartition.with_preserve_order(should_preserve_ordering)) + as Arc; // update distribution onward with new operator update_distribution_onward(new_plan.clone(), dist_onward, input_idx); @@ -1014,9 +1008,9 @@ fn add_hash_on_top( input }; let partitioning = Partitioning::Hash(hash_exprs, n_target); - let repartition = RepartitionExec::try_new(new_plan, partitioning)? - .with_preserve_order(should_preserve_ordering); - new_plan = Arc::new(repartition) as _; + let repartition = RepartitionExec::try_new(new_plan, partitioning)?; + new_plan = + Arc::new(repartition.with_preserve_order(should_preserve_ordering)) as _; // update distribution onward with new operator update_distribution_onward(new_plan.clone(), dist_onward, input_idx); @@ -1188,7 +1182,6 @@ fn ensure_distribution( // When `false`, round robin repartition will not be added to increase parallelism let enable_round_robin = config.optimizer.enable_round_robin_repartition; let repartition_file_scans = config.optimizer.repartition_file_scans; - let repartition_file_min_size = config.optimizer.repartition_file_min_size; let batch_size = config.execution.batch_size; let is_unbounded = unbounded_output(&dist_context.plan); // Use order preserving variants either of the conditions true @@ -1265,25 +1258,13 @@ fn ensure_distribution( // Unless partitioning doesn't increase the partition count, it is not beneficial: && child.output_partitioning().partition_count() < target_partitions { - // When `repartition_file_scans` is set, leverage source operators - // (`ParquetExec`, `CsvExec` etc.) to increase parallelism at the source. + // When `repartition_file_scans` is set, attempt to increase + // parallelism at the source. if repartition_file_scans { - #[cfg(feature = "parquet")] - if let Some(parquet_exec) = - child.as_any().downcast_ref::() + if let Some(new_child) = + child.repartitioned(target_partitions, config)? { - child = Arc::new(parquet_exec.get_repartitioned( - target_partitions, - repartition_file_min_size, - )); - } - if let Some(csv_exec) = child.as_any().downcast_ref::() { - if let Some(csv_exec) = csv_exec.get_repartitioned( - target_partitions, - repartition_file_min_size, - ) { - child = Arc::new(csv_exec); - } + child = new_child; } } // Increase parallelism by adding round-robin repartitioning @@ -1318,16 +1299,12 @@ fn ensure_distribution( // There is an ordering requirement of the operator: if let Some(required_input_ordering) = required_input_ordering { - let existing_ordering = child.output_ordering().unwrap_or(&[]); // Either: // - Ordering requirement cannot be satisfied by preserving ordering through repartitions, or // - using order preserving variant is not desirable. - let ordering_satisfied = ordering_satisfy_requirement_concrete( - existing_ordering, - required_input_ordering, - || child.equivalence_properties(), - || child.ordering_equivalence_properties(), - ); + let ordering_satisfied = child + .equivalence_properties() + .ordering_satisfy_requirement(required_input_ordering); if !ordering_satisfied || !order_preserving_variants_desirable { replace_order_preserving_variants(&mut child, dist_onward)?; // If ordering requirements were satisfied before repartitioning, @@ -1634,6 +1611,8 @@ impl TreeNode for PlanWithKeyRequirements { } } +/// Since almost all of these tests explicitly use `ParquetExec` they only run with the parquet feature flag on +#[cfg(feature = "parquet")] #[cfg(test)] mod tests { use std::ops::Deref; @@ -1642,9 +1621,8 @@ mod tests { use crate::datasource::file_format::file_compression_type::FileCompressionType; use crate::datasource::listing::PartitionedFile; use crate::datasource::object_store::ObjectStoreUrl; - use crate::datasource::physical_plan::FileScanConfig; - #[cfg(feature = "parquet")] use crate::datasource::physical_plan::ParquetExec; + use crate::datasource::physical_plan::{CsvExec, FileScanConfig}; use crate::physical_optimizer::enforce_sorting::EnforceSorting; use crate::physical_optimizer::output_requirements::OutputRequirements; use crate::physical_plan::aggregates::{ @@ -1783,12 +1761,10 @@ mod tests { ])) } - #[cfg(feature = "parquet")] fn parquet_exec() -> Arc { parquet_exec_with_sort(vec![]) } - #[cfg(feature = "parquet")] fn parquet_exec_with_sort( output_ordering: Vec>, ) -> Arc { @@ -1809,13 +1785,11 @@ mod tests { )) } - #[cfg(feature = "parquet")] fn parquet_exec_multiple() -> Arc { parquet_exec_multiple_sorted(vec![]) } // Created a sorted parquet exec with multiple files - #[cfg(feature = "parquet")] fn parquet_exec_multiple_sorted( output_ordering: Vec>, ) -> Arc { @@ -2170,7 +2144,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn multi_hash_joins() -> Result<()> { let left = parquet_exec(); let alias_pairs: Vec<(String, String)> = vec![ @@ -2333,7 +2306,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn multi_joins_after_alias() -> Result<()> { let left = parquet_exec(); let right = parquet_exec(); @@ -2413,7 +2385,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn multi_joins_after_multi_alias() -> Result<()> { let left = parquet_exec(); let right = parquet_exec(); @@ -2469,7 +2440,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn join_after_agg_alias() -> Result<()> { // group by (a as a1) let left = aggregate_exec_with_alias( @@ -2509,7 +2479,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn hash_join_key_ordering() -> Result<()> { // group by (a as a1, b as b1) let left = aggregate_exec_with_alias( @@ -2562,7 +2531,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn multi_hash_join_key_ordering() -> Result<()> { let left = parquet_exec(); let alias_pairs: Vec<(String, String)> = vec![ @@ -2679,7 +2647,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn reorder_join_keys_to_left_input() -> Result<()> { let left = parquet_exec(); let alias_pairs: Vec<(String, String)> = vec![ @@ -2810,7 +2777,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn reorder_join_keys_to_right_input() -> Result<()> { let left = parquet_exec(); let alias_pairs: Vec<(String, String)> = vec![ @@ -2936,7 +2902,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn multi_smj_joins() -> Result<()> { let left = parquet_exec(); let alias_pairs: Vec<(String, String)> = vec![ @@ -3210,7 +3175,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn smj_join_key_ordering() -> Result<()> { // group by (a as a1, b as b1) let left = aggregate_exec_with_alias( @@ -3306,7 +3270,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn merge_does_not_need_sort() -> Result<()> { // see https://github.com/apache/arrow-datafusion/issues/4331 let schema = schema(); @@ -3347,7 +3310,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn union_to_interleave() -> Result<()> { // group by (a as a1) let left = aggregate_exec_with_alias( @@ -3389,7 +3351,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn added_repartition_to_single_partition() -> Result<()> { let alias = vec![("a".to_string(), "a".to_string())]; let plan = aggregate_exec_with_alias(parquet_exec(), alias); @@ -3408,7 +3369,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn repartition_deepest_node() -> Result<()> { let alias = vec![("a".to_string(), "a".to_string())]; let plan = aggregate_exec_with_alias(filter_exec(parquet_exec()), alias); @@ -3428,7 +3388,7 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] + fn repartition_unsorted_limit() -> Result<()> { let plan = limit_exec(filter_exec(parquet_exec())); @@ -3448,7 +3408,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn repartition_sorted_limit() -> Result<()> { let schema = schema(); let sort_key = vec![PhysicalSortExpr { @@ -3471,7 +3430,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn repartition_sorted_limit_with_filter() -> Result<()> { let schema = schema(); let sort_key = vec![PhysicalSortExpr { @@ -3497,7 +3455,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn repartition_ignores_limit() -> Result<()> { let alias = vec![("a".to_string(), "a".to_string())]; let plan = aggregate_exec_with_alias( @@ -3528,7 +3485,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn repartition_ignores_union() -> Result<()> { let plan = union_exec(vec![parquet_exec(); 5]); @@ -3548,7 +3504,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn repartition_through_sort_preserving_merge() -> Result<()> { // sort preserving merge with non-sorted input let schema = schema(); @@ -3571,7 +3526,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn repartition_ignores_sort_preserving_merge() -> Result<()> { // sort preserving merge already sorted input, let schema = schema(); @@ -3603,7 +3557,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn repartition_ignores_sort_preserving_merge_with_union() -> Result<()> { // 2 sorted parquet files unioned (partitions are concatenated, sort is preserved) let schema = schema(); @@ -3636,7 +3589,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn repartition_does_not_destroy_sort() -> Result<()> { // SortRequired // Parquet(sorted) @@ -3662,7 +3614,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn repartition_does_not_destroy_sort_more_complex() -> Result<()> { // model a more complicated scenario where one child of a union can be repartitioned for performance // but the other can not be @@ -3701,7 +3652,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn repartition_transitively_with_projection() -> Result<()> { let schema = schema(); let proj_exprs = vec![( @@ -3744,7 +3694,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn repartition_ignores_transitively_with_projection() -> Result<()> { let schema = schema(); let sort_key = vec![PhysicalSortExpr { @@ -3775,7 +3724,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn repartition_transitively_past_sort_with_projection() -> Result<()> { let schema = schema(); let sort_key = vec![PhysicalSortExpr { @@ -3805,18 +3753,17 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn repartition_transitively_past_sort_with_filter() -> Result<()> { let schema = schema(); let sort_key = vec![PhysicalSortExpr { - expr: col("c", &schema).unwrap(), + expr: col("a", &schema).unwrap(), options: SortOptions::default(), }]; let plan = sort_exec(sort_key, filter_exec(parquet_exec()), false); let expected = &[ - "SortPreservingMergeExec: [c@2 ASC]", - "SortExec: expr=[c@2 ASC]", + "SortPreservingMergeExec: [a@0 ASC]", + "SortExec: expr=[a@0 ASC]", // Expect repartition on the input to the sort (as it can benefit from additional parallelism) "FilterExec: c@2 = 0", "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", @@ -3826,7 +3773,7 @@ mod tests { assert_optimized!(expected, plan.clone(), true); let expected_first_sort_enforcement = &[ - "SortExec: expr=[c@2 ASC]", + "SortExec: expr=[a@0 ASC]", "CoalescePartitionsExec", "FilterExec: c@2 = 0", // Expect repartition on the input of the filter (as it can benefit from additional parallelism) @@ -3880,7 +3827,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn parallelization_single_partition() -> Result<()> { let alias = vec![("a".to_string(), "a".to_string())]; let plan_parquet = aggregate_exec_with_alias(parquet_exec(), alias.clone()); @@ -3969,7 +3915,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn parallelization_two_partitions() -> Result<()> { let alias = vec![("a".to_string(), "a".to_string())]; let plan_parquet = @@ -3997,7 +3942,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn parallelization_two_partitions_into_four() -> Result<()> { let alias = vec![("a".to_string(), "a".to_string())]; let plan_parquet = @@ -4025,7 +3969,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn parallelization_sorted_limit() -> Result<()> { let schema = schema(); let sort_key = vec![PhysicalSortExpr { @@ -4058,7 +4001,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn parallelization_limit_with_filter() -> Result<()> { let schema = schema(); let sort_key = vec![PhysicalSortExpr { @@ -4104,7 +4046,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn parallelization_ignores_limit() -> Result<()> { let alias = vec![("a".to_string(), "a".to_string())]; let plan_parquet = aggregate_exec_with_alias( @@ -4155,7 +4096,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn parallelization_union_inputs() -> Result<()> { let plan_parquet = union_exec(vec![parquet_exec(); 5]); let plan_csv = union_exec(vec![csv_exec(); 5]); @@ -4185,7 +4125,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn parallelization_prior_to_sort_preserving_merge() -> Result<()> { let schema = schema(); let sort_key = vec![PhysicalSortExpr { @@ -4216,7 +4155,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn parallelization_sort_preserving_merge_with_union() -> Result<()> { let schema = schema(); let sort_key = vec![PhysicalSortExpr { @@ -4251,7 +4189,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn parallelization_does_not_benefit() -> Result<()> { let schema = schema(); let sort_key = vec![PhysicalSortExpr { @@ -4280,7 +4217,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn parallelization_ignores_transitively_with_projection_parquet() -> Result<()> { // sorted input let schema = schema(); @@ -4361,7 +4297,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn remove_redundant_roundrobins() -> Result<()> { let input = parquet_exec(); let repartition = repartition_exec(repartition_exec(input)); @@ -4412,40 +4347,63 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn do_not_preserve_ordering_through_repartition() -> Result<()> { let schema = schema(); let sort_key = vec![PhysicalSortExpr { - expr: col("c", &schema).unwrap(), + expr: col("a", &schema).unwrap(), options: SortOptions::default(), }]; let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]); let physical_plan = sort_preserving_merge_exec(sort_key, filter_exec(input)); let expected = &[ - "SortPreservingMergeExec: [c@2 ASC]", - "SortExec: expr=[c@2 ASC]", + "SortPreservingMergeExec: [a@0 ASC]", + "SortExec: expr=[a@0 ASC]", "FilterExec: c@2 = 0", "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2", - "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", + "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC]", ]; assert_optimized!(expected, physical_plan.clone(), true); let expected = &[ - "SortExec: expr=[c@2 ASC]", + "SortExec: expr=[a@0 ASC]", + "CoalescePartitionsExec", + "FilterExec: c@2 = 0", + "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2", + "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC]", + ]; + assert_optimized!(expected, physical_plan, false); + + Ok(()) + } + + #[test] + fn no_need_for_sort_after_filter() -> Result<()> { + let schema = schema(); + let sort_key = vec![PhysicalSortExpr { + expr: col("c", &schema).unwrap(), + options: SortOptions::default(), + }]; + let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]); + let physical_plan = sort_preserving_merge_exec(sort_key, filter_exec(input)); + + let expected = &[ + // After CoalescePartitionsExec c is still constant. Hence c@2 ASC ordering is already satisfied. "CoalescePartitionsExec", + // Since after this stage c is constant. c@2 ASC ordering is already satisfied. "FilterExec: c@2 = 0", "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2", "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", ]; + + assert_optimized!(expected, physical_plan.clone(), true); assert_optimized!(expected, physical_plan, false); Ok(()) } #[test] - #[cfg(feature = "parquet")] fn do_not_preserve_ordering_through_repartition2() -> Result<()> { let schema = schema(); let sort_key = vec![PhysicalSortExpr { @@ -4484,7 +4442,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn do_not_preserve_ordering_through_repartition3() -> Result<()> { let schema = schema(); let sort_key = vec![PhysicalSortExpr { @@ -4507,7 +4464,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn do_not_put_sort_when_input_is_invalid() -> Result<()> { let schema = schema(); let sort_key = vec![PhysicalSortExpr { @@ -4546,7 +4502,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn put_sort_when_input_is_valid() -> Result<()> { let schema = schema(); let sort_key = vec![PhysicalSortExpr { @@ -4589,7 +4544,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn do_not_add_unnecessary_hash() -> Result<()> { let schema = schema(); let sort_key = vec![PhysicalSortExpr { @@ -4645,7 +4599,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn optimize_away_unnecessary_repartition() -> Result<()> { let physical_plan = coalesce_partitions_exec(repartition_exec(parquet_exec())); let expected = &[ @@ -4665,7 +4618,6 @@ mod tests { } #[test] - #[cfg(feature = "parquet")] fn optimize_away_unnecessary_repartition2() -> Result<()> { let physical_plan = filter_exec(repartition_exec(coalesce_partitions_exec( filter_exec(repartition_exec(parquet_exec())), diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs index 822a224d236a..4779ced44f1a 100644 --- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs @@ -51,18 +51,16 @@ use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec; use crate::physical_plan::sorts::sort::SortExec; use crate::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; use crate::physical_plan::windows::{ - get_best_fitting_window, BoundedWindowAggExec, PartitionSearchMode, WindowAggExec, + get_best_fitting_window, BoundedWindowAggExec, WindowAggExec, }; use crate::physical_plan::{with_new_children_if_necessary, Distribution, ExecutionPlan}; use datafusion_common::tree_node::{Transformed, TreeNode, VisitRecursion}; use datafusion_common::{plan_err, DataFusionError}; -use datafusion_physical_expr::utils::{ - ordering_satisfy, ordering_satisfy_requirement_concrete, -}; use datafusion_physical_expr::{PhysicalSortExpr, PhysicalSortRequirement}; use datafusion_physical_plan::repartition::RepartitionExec; +use datafusion_physical_plan::windows::PartitionSearchMode; use itertools::izip; /// This rule inspects [`SortExec`]'s in the given physical plan and removes the @@ -451,13 +449,11 @@ fn ensure_sorting( { let physical_ordering = child.output_ordering(); match (required_ordering, physical_ordering) { - (Some(required_ordering), Some(physical_ordering)) => { - if !ordering_satisfy_requirement_concrete( - physical_ordering, - &required_ordering, - || child.equivalence_properties(), - || child.ordering_equivalence_properties(), - ) { + (Some(required_ordering), Some(_)) => { + if !child + .equivalence_properties() + .ordering_satisfy_requirement(&required_ordering) + { // Make sure we preserve the ordering requirements: update_child_to_remove_unnecessary_sort(child, sort_onwards, &plan)?; add_sort_above(child, &required_ordering, None); @@ -516,13 +512,12 @@ fn analyze_immediate_sort_removal( ) -> Option { if let Some(sort_exec) = plan.as_any().downcast_ref::() { let sort_input = sort_exec.input().clone(); + // If this sort is unnecessary, we should remove it: - if ordering_satisfy( - sort_input.output_ordering(), - sort_exec.output_ordering(), - || sort_input.equivalence_properties(), - || sort_input.ordering_equivalence_properties(), - ) { + if sort_input + .equivalence_properties() + .ordering_satisfy(sort_exec.output_ordering().unwrap_or(&[])) + { // Since we know that a `SortExec` has exactly one child, // we can use the zero index safely: return Some( diff --git a/datafusion/core/src/physical_optimizer/output_requirements.rs b/datafusion/core/src/physical_optimizer/output_requirements.rs index f5eacd5ee60c..d9cdc292dd56 100644 --- a/datafusion/core/src/physical_optimizer/output_requirements.rs +++ b/datafusion/core/src/physical_optimizer/output_requirements.rs @@ -33,7 +33,7 @@ use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::{Result, Statistics}; use datafusion_physical_expr::{ - Distribution, LexOrderingReq, PhysicalSortExpr, PhysicalSortRequirement, + Distribution, LexRequirement, PhysicalSortExpr, PhysicalSortRequirement, }; use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; @@ -90,14 +90,14 @@ enum RuleMode { #[derive(Debug)] struct OutputRequirementExec { input: Arc, - order_requirement: Option, + order_requirement: Option, dist_requirement: Distribution, } impl OutputRequirementExec { fn new( input: Arc, - requirements: Option, + requirements: Option, dist_requirement: Distribution, ) -> Self { Self { diff --git a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs index ea394915a62c..5e4a0a02e61b 100644 --- a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs +++ b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs @@ -31,7 +31,6 @@ use super::utils::is_repartition; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNode, VisitRecursion}; -use datafusion_physical_expr::utils::ordering_satisfy; use datafusion_physical_plan::unbounded_output; /// For a given `plan`, this object carries the information one needs from its @@ -258,12 +257,10 @@ pub(crate) fn replace_with_order_preserving_variants( is_spm_better || use_order_preserving_variant, )?; // If this sort is unnecessary, we should remove it and update the plan: - if ordering_satisfy( - updated_sort_input.output_ordering(), - plan.output_ordering(), - || updated_sort_input.equivalence_properties(), - || updated_sort_input.ordering_equivalence_properties(), - ) { + if updated_sort_input + .equivalence_properties() + .ordering_satisfy(plan.output_ordering().unwrap_or(&[])) + { return Ok(Transformed::Yes(OrderPreservationContext { plan: updated_sort_input, ordering_onwards: vec![None], @@ -278,21 +275,21 @@ pub(crate) fn replace_with_order_preserving_variants( mod tests { use super::*; - use crate::prelude::SessionConfig; - use crate::datasource::file_format::file_compression_type::FileCompressionType; use crate::datasource::listing::PartitionedFile; use crate::datasource::physical_plan::{CsvExec, FileScanConfig}; use crate::physical_plan::coalesce_batches::CoalesceBatchesExec; use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec; - use crate::physical_plan::filter::FilterExec; use crate::physical_plan::joins::{HashJoinExec, PartitionMode}; use crate::physical_plan::repartition::RepartitionExec; use crate::physical_plan::sorts::sort::SortExec; use crate::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; use crate::physical_plan::{displayable, Partitioning}; + use crate::prelude::SessionConfig; + use arrow::compute::SortOptions; + use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion_common::tree_node::TreeNode; use datafusion_common::{Result, Statistics}; use datafusion_execution::object_store::ObjectStoreUrl; @@ -300,9 +297,6 @@ mod tests { use datafusion_physical_expr::expressions::{self, col, Column}; use datafusion_physical_expr::PhysicalSortExpr; - use arrow::compute::SortOptions; - use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; - /// Runs the `replace_with_order_preserving_variants` sub-rule and asserts the plan /// against the original and expected plans. /// diff --git a/datafusion/core/src/physical_optimizer/sort_pushdown.rs b/datafusion/core/src/physical_optimizer/sort_pushdown.rs index a99399592f15..b9502d92ac12 100644 --- a/datafusion/core/src/physical_optimizer/sort_pushdown.rs +++ b/datafusion/core/src/physical_optimizer/sort_pushdown.rs @@ -29,15 +29,12 @@ use crate::physical_plan::sorts::sort::SortExec; use crate::physical_plan::{with_new_children_if_necessary, ExecutionPlan}; use datafusion_common::tree_node::{Transformed, TreeNode, VisitRecursion}; -use datafusion_common::{ - plan_datafusion_err, plan_err, DataFusionError, JoinSide, Result, -}; +use datafusion_common::{plan_err, DataFusionError, JoinSide, Result}; use datafusion_expr::JoinType; use datafusion_physical_expr::expressions::Column; -use datafusion_physical_expr::utils::{ - ordering_satisfy, ordering_satisfy_requirement, requirements_compatible, +use datafusion_physical_expr::{ + LexRequirementRef, PhysicalSortExpr, PhysicalSortRequirement, }; -use datafusion_physical_expr::{PhysicalSortExpr, PhysicalSortRequirement}; use itertools::izip; @@ -127,29 +124,27 @@ pub(crate) fn pushdown_sorts( requirements: SortPushDown, ) -> Result> { let plan = &requirements.plan; - let parent_required = requirements.required_ordering.as_deref(); - const ERR_MSG: &str = "Expects parent requirement to contain something"; - let err = || plan_datafusion_err!("{}", ERR_MSG); + let parent_required = requirements.required_ordering.as_deref().unwrap_or(&[]); if let Some(sort_exec) = plan.as_any().downcast_ref::() { - let mut new_plan = plan.clone(); - if !ordering_satisfy_requirement( - plan.output_ordering(), - parent_required, - || plan.equivalence_properties(), - || plan.ordering_equivalence_properties(), - ) { + let new_plan = if !plan + .equivalence_properties() + .ordering_satisfy_requirement(parent_required) + { // If the current plan is a SortExec, modify it to satisfy parent requirements: - let parent_required_expr = parent_required.ok_or_else(err)?; - new_plan = sort_exec.input().clone(); - add_sort_above(&mut new_plan, parent_required_expr, sort_exec.fetch()); + let mut new_plan = sort_exec.input().clone(); + add_sort_above(&mut new_plan, parent_required, sort_exec.fetch()); + new_plan + } else { + requirements.plan }; let required_ordering = new_plan .output_ordering() - .map(PhysicalSortRequirement::from_sort_exprs); + .map(PhysicalSortRequirement::from_sort_exprs) + .unwrap_or_default(); // Since new_plan is a SortExec, we can safely get the 0th index. let child = new_plan.children().swap_remove(0); if let Some(adjusted) = - pushdown_requirement_to_children(&child, required_ordering.as_deref())? + pushdown_requirement_to_children(&child, &required_ordering)? { // Can push down requirements Ok(Transformed::Yes(SortPushDown { @@ -163,12 +158,10 @@ pub(crate) fn pushdown_sorts( } } else { // Executors other than SortExec - if ordering_satisfy_requirement( - plan.output_ordering(), - parent_required, - || plan.equivalence_properties(), - || plan.ordering_equivalence_properties(), - ) { + if plan + .equivalence_properties() + .ordering_satisfy_requirement(parent_required) + { // Satisfies parent requirements, immediately return. return Ok(Transformed::Yes(SortPushDown { required_ordering: None, @@ -184,9 +177,8 @@ pub(crate) fn pushdown_sorts( })) } else { // Can not push down requirements, add new SortExec: - let parent_required_expr = parent_required.ok_or_else(err)?; let mut new_plan = requirements.plan; - add_sort_above(&mut new_plan, parent_required_expr, None); + add_sort_above(&mut new_plan, parent_required, None); Ok(Transformed::Yes(SortPushDown::init(new_plan))) } } @@ -194,18 +186,21 @@ pub(crate) fn pushdown_sorts( fn pushdown_requirement_to_children( plan: &Arc, - parent_required: Option<&[PhysicalSortRequirement]>, + parent_required: LexRequirementRef, ) -> Result>>>> { - const ERR_MSG: &str = "Expects parent requirement to contain something"; - let err = || plan_datafusion_err!("{}", ERR_MSG); let maintains_input_order = plan.maintains_input_order(); if is_window(plan) { let required_input_ordering = plan.required_input_ordering(); - let request_child = required_input_ordering[0].as_deref(); + let request_child = required_input_ordering[0].as_deref().unwrap_or(&[]); let child_plan = plan.children().swap_remove(0); match determine_children_requirement(parent_required, request_child, child_plan) { RequirementsCompatibility::Satisfy => { - Ok(Some(vec![request_child.map(|r| r.to_vec())])) + let req = if request_child.is_empty() { + None + } else { + Some(request_child.to_vec()) + }; + Ok(Some(vec![req])) } RequirementsCompatibility::Compatible(adjusted) => Ok(Some(vec![adjusted])), RequirementsCompatibility::NonCompatible => Ok(None), @@ -213,16 +208,17 @@ fn pushdown_requirement_to_children( } else if is_union(plan) { // UnionExec does not have real sort requirements for its input. Here we change the adjusted_request_ordering to UnionExec's output ordering and // propagate the sort requirements down to correct the unnecessary descendant SortExec under the UnionExec - Ok(Some(vec![ - parent_required.map(|elem| elem.to_vec()); - plan.children().len() - ])) + let req = if parent_required.is_empty() { + None + } else { + Some(parent_required.to_vec()) + }; + Ok(Some(vec![req; plan.children().len()])) } else if let Some(smj) = plan.as_any().downcast_ref::() { // If the current plan is SortMergeJoinExec let left_columns_len = smj.left().schema().fields().len(); - let parent_required_expr = PhysicalSortRequirement::to_sort_exprs( - parent_required.ok_or_else(err)?.iter().cloned(), - ); + let parent_required_expr = + PhysicalSortRequirement::to_sort_exprs(parent_required.iter().cloned()); let expr_source_side = expr_source_sides(&parent_required_expr, smj.join_type(), left_columns_len); match expr_source_side { @@ -236,10 +232,9 @@ fn pushdown_requirement_to_children( let right_offset = smj.schema().fields.len() - smj.right().schema().fields.len(); let new_right_required = - shift_right_required(parent_required.ok_or_else(err)?, right_offset)?; - let new_right_required_expr = PhysicalSortRequirement::to_sort_exprs( - new_right_required.iter().cloned(), - ); + shift_right_required(parent_required, right_offset)?; + let new_right_required_expr = + PhysicalSortRequirement::to_sort_exprs(new_right_required); try_pushdown_requirements_to_join( smj, parent_required, @@ -260,34 +255,43 @@ fn pushdown_requirement_to_children( || plan.as_any().is::() || is_limit(plan) || plan.as_any().is::() - // Do not push-down through SortPreservingMergeExec when - // ordering requirement invalidates requirement of sort preserving merge exec. - || (is_sort_preserving_merge(plan) && !ordering_satisfy( - parent_required - .map(|req| PhysicalSortRequirement::to_sort_exprs(req.to_vec())) - .as_deref(), - plan.output_ordering(), - || plan.equivalence_properties(), - || plan.ordering_equivalence_properties(), - ) - ) { // If the current plan is a leaf node or can not maintain any of the input ordering, can not pushed down requirements. // For RepartitionExec, we always choose to not push down the sort requirements even the RepartitionExec(input_partition=1) could maintain input ordering. // Pushing down is not beneficial Ok(None) + } else if is_sort_preserving_merge(plan) { + let new_ordering = + PhysicalSortRequirement::to_sort_exprs(parent_required.to_vec()); + let mut spm_eqs = plan.equivalence_properties(); + // Sort preserving merge will have new ordering, one requirement above is pushed down to its below. + spm_eqs = spm_eqs.with_reorder(new_ordering); + // Do not push-down through SortPreservingMergeExec when + // ordering requirement invalidates requirement of sort preserving merge exec. + if !spm_eqs.ordering_satisfy(plan.output_ordering().unwrap_or(&[])) { + Ok(None) + } else { + // Can push-down through SortPreservingMergeExec, because parent requirement is finer + // than SortPreservingMergeExec output ordering. + let req = if parent_required.is_empty() { + None + } else { + Some(parent_required.to_vec()) + }; + Ok(Some(vec![req])) + } } else { Ok(Some( maintains_input_order - .iter() + .into_iter() .map(|flag| { - if *flag { - parent_required.map(|elem| elem.to_vec()) + if flag && !parent_required.is_empty() { + Some(parent_required.to_vec()) } else { None } }) - .collect::>(), + .collect(), )) } // TODO: Add support for Projection push down @@ -298,26 +302,26 @@ fn pushdown_requirement_to_children( /// If the the parent requirements are more specific, push down the parent requirements /// If they are not compatible, need to add Sort. fn determine_children_requirement( - parent_required: Option<&[PhysicalSortRequirement]>, - request_child: Option<&[PhysicalSortRequirement]>, + parent_required: LexRequirementRef, + request_child: LexRequirementRef, child_plan: Arc, ) -> RequirementsCompatibility { - if requirements_compatible( - request_child, - parent_required, - || child_plan.ordering_equivalence_properties(), - || child_plan.equivalence_properties(), - ) { + if child_plan + .equivalence_properties() + .requirements_compatible(request_child, parent_required) + { // request child requirements are more specific, no need to push down the parent requirements RequirementsCompatibility::Satisfy - } else if requirements_compatible( - parent_required, - request_child, - || child_plan.ordering_equivalence_properties(), - || child_plan.equivalence_properties(), - ) { + } else if child_plan + .equivalence_properties() + .requirements_compatible(parent_required, request_child) + { // parent requirements are more specific, adjust the request child requirements and push down the new requirements - let adjusted = parent_required.map(|r| r.to_vec()); + let adjusted = if parent_required.is_empty() { + None + } else { + Some(parent_required.to_vec()) + }; RequirementsCompatibility::Compatible(adjusted) } else { RequirementsCompatibility::NonCompatible @@ -325,7 +329,7 @@ fn determine_children_requirement( } fn try_pushdown_requirements_to_join( smj: &SortMergeJoinExec, - parent_required: Option<&[PhysicalSortRequirement]>, + parent_required: LexRequirementRef, sort_expr: Vec, push_side: JoinSide, ) -> Result>>>> { @@ -335,22 +339,22 @@ fn try_pushdown_requirements_to_join( JoinSide::Left => (sort_expr.as_slice(), right_ordering), JoinSide::Right => (left_ordering, sort_expr.as_slice()), }; + let join_type = smj.join_type(); + let probe_side = SortMergeJoinExec::probe_side(&join_type); let new_output_ordering = calculate_join_output_ordering( new_left_ordering, new_right_ordering, - smj.join_type(), + join_type, smj.on(), smj.left().schema().fields.len(), &smj.maintains_input_order(), - Some(SortMergeJoinExec::probe_side(&smj.join_type())), + Some(probe_side), ); - Ok(ordering_satisfy_requirement( - new_output_ordering.as_deref(), - parent_required, - || smj.equivalence_properties(), - || smj.ordering_equivalence_properties(), - ) - .then(|| { + let mut smj_eqs = smj.equivalence_properties(); + // smj will have this ordering when its input changes. + smj_eqs = smj_eqs.with_reorder(new_output_ordering.unwrap_or_default()); + let should_pushdown = smj_eqs.ordering_satisfy_requirement(parent_required); + Ok(should_pushdown.then(|| { let mut required_input_ordering = smj.required_input_ordering(); let new_req = Some(PhysicalSortRequirement::from_sort_exprs(&sort_expr)); match push_side { @@ -414,7 +418,7 @@ fn expr_source_sides( } fn shift_right_required( - parent_required: &[PhysicalSortRequirement], + parent_required: LexRequirementRef, left_columns_len: usize, ) -> Result> { let new_right_required: Vec = parent_required diff --git a/datafusion/core/src/physical_optimizer/topk_aggregation.rs b/datafusion/core/src/physical_optimizer/topk_aggregation.rs index 572e796a8ba7..e0a8da82e35f 100644 --- a/datafusion/core/src/physical_optimizer/topk_aggregation.rs +++ b/datafusion/core/src/physical_optimizer/topk_aggregation.rs @@ -75,7 +75,7 @@ impl TopKAggregation { aggr.filter_expr().to_vec(), aggr.order_by_expr().to_vec(), aggr.input().clone(), - aggr.input_schema().clone(), + aggr.input_schema(), ) .expect("Unable to copy Aggregate!") .with_limit(Some(limit)); diff --git a/datafusion/core/src/physical_optimizer/utils.rs b/datafusion/core/src/physical_optimizer/utils.rs index 403af4b16ec7..530df374ca7c 100644 --- a/datafusion/core/src/physical_optimizer/utils.rs +++ b/datafusion/core/src/physical_optimizer/utils.rs @@ -30,8 +30,7 @@ use crate::physical_plan::union::UnionExec; use crate::physical_plan::windows::{BoundedWindowAggExec, WindowAggExec}; use crate::physical_plan::{displayable, ExecutionPlan}; -use datafusion_physical_expr::utils::ordering_satisfy_requirement; -use datafusion_physical_expr::PhysicalSortRequirement; +use datafusion_physical_expr::{LexRequirementRef, PhysicalSortRequirement}; /// This object implements a tree that we use while keeping track of paths /// leading to [`SortExec`]s. @@ -100,16 +99,14 @@ pub(crate) fn get_children_exectrees( /// given ordering requirements while preserving the original partitioning. pub fn add_sort_above( node: &mut Arc, - sort_requirement: &[PhysicalSortRequirement], + sort_requirement: LexRequirementRef, fetch: Option, ) { // If the ordering requirement is already satisfied, do not add a sort. - if !ordering_satisfy_requirement( - node.output_ordering(), - Some(sort_requirement), - || node.equivalence_properties(), - || node.ordering_equivalence_properties(), - ) { + if !node + .equivalence_properties() + .ordering_satisfy_requirement(sort_requirement) + { let sort_expr = PhysicalSortRequirement::to_sort_exprs(sort_requirement.to_vec()); let new_sort = SortExec::new(sort_expr, node.clone()).with_fetch(fetch); diff --git a/datafusion/core/src/prelude.rs b/datafusion/core/src/prelude.rs index 7689468e5d13..5cd8b3870f81 100644 --- a/datafusion/core/src/prelude.rs +++ b/datafusion/core/src/prelude.rs @@ -13,7 +13,7 @@ // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations -// under the License.pub}, +// under the License. //! DataFusion "prelude" to simplify importing common types. //! diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs index 50d3610deed3..821f236af87b 100644 --- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs @@ -35,38 +35,33 @@ use datafusion_physical_expr::expressions::{col, Sum}; use datafusion_physical_expr::{AggregateExpr, PhysicalSortExpr}; use test_utils::add_empty_batches; -#[cfg(test)] -mod tests { - use super::*; - - #[tokio::test(flavor = "multi_thread", worker_threads = 8)] - async fn aggregate_test() { - let test_cases = vec![ - vec!["a"], - vec!["b", "a"], - vec!["c", "a"], - vec!["c", "b", "a"], - vec!["d", "a"], - vec!["d", "b", "a"], - vec!["d", "c", "a"], - vec!["d", "c", "b", "a"], - ]; - let n = 300; - let distincts = vec![10, 20]; - for distinct in distincts { - let mut handles = Vec::new(); - for i in 0..n { - let test_idx = i % test_cases.len(); - let group_by_columns = test_cases[test_idx].clone(); - let job = tokio::spawn(run_aggregate_test( - make_staggered_batches::(1000, distinct, i as u64), - group_by_columns, - )); - handles.push(job); - } - for job in handles { - job.await.unwrap(); - } +#[tokio::test(flavor = "multi_thread", worker_threads = 8)] +async fn aggregate_test() { + let test_cases = vec![ + vec!["a"], + vec!["b", "a"], + vec!["c", "a"], + vec!["c", "b", "a"], + vec!["d", "a"], + vec!["d", "b", "a"], + vec!["d", "c", "a"], + vec!["d", "c", "b", "a"], + ]; + let n = 300; + let distincts = vec![10, 20]; + for distinct in distincts { + let mut handles = Vec::new(); + for i in 0..n { + let test_idx = i % test_cases.len(); + let group_by_columns = test_cases[test_idx].clone(); + let job = tokio::spawn(run_aggregate_test( + make_staggered_batches::(1000, distinct, i as u64), + group_by_columns, + )); + handles.push(job); + } + for job in handles { + job.await.unwrap(); } } } diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs index db940a9794a1..af96063ffb5f 100644 --- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs @@ -22,7 +22,6 @@ use arrow::compute::{concat_batches, SortOptions}; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; use arrow::util::pretty::pretty_format_batches; - use datafusion::physical_plan::memory::MemoryExec; use datafusion::physical_plan::sorts::sort::SortExec; use datafusion::physical_plan::windows::{ @@ -44,107 +43,102 @@ use hashbrown::HashMap; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; -#[cfg(test)] -mod tests { - use super::*; - - use datafusion_physical_plan::windows::PartitionSearchMode::{ - Linear, PartiallySorted, Sorted, - }; +use datafusion_physical_plan::windows::PartitionSearchMode::{ + Linear, PartiallySorted, Sorted, +}; - #[tokio::test(flavor = "multi_thread", worker_threads = 16)] - async fn window_bounded_window_random_comparison() -> Result<()> { - // make_staggered_batches gives result sorted according to a, b, c - // In the test cases first entry represents partition by columns - // Second entry represents order by columns. - // Third entry represents search mode. - // In sorted mode physical plans are in the form for WindowAggExec - //``` - // WindowAggExec - // MemoryExec] - // ``` - // and in the form for BoundedWindowAggExec - // ``` - // BoundedWindowAggExec - // MemoryExec - // ``` - // In Linear and PartiallySorted mode physical plans are in the form for WindowAggExec - //``` - // WindowAggExec - // SortExec(required by window function) - // MemoryExec] - // ``` - // and in the form for BoundedWindowAggExec - // ``` - // BoundedWindowAggExec - // MemoryExec - // ``` - let test_cases = vec![ - (vec!["a"], vec!["a"], Sorted), - (vec!["a"], vec!["b"], Sorted), - (vec!["a"], vec!["a", "b"], Sorted), - (vec!["a"], vec!["b", "c"], Sorted), - (vec!["a"], vec!["a", "b", "c"], Sorted), - (vec!["b"], vec!["a"], Linear), - (vec!["b"], vec!["a", "b"], Linear), - (vec!["b"], vec!["a", "c"], Linear), - (vec!["b"], vec!["a", "b", "c"], Linear), - (vec!["c"], vec!["a"], Linear), - (vec!["c"], vec!["a", "b"], Linear), - (vec!["c"], vec!["a", "c"], Linear), - (vec!["c"], vec!["a", "b", "c"], Linear), - (vec!["b", "a"], vec!["a"], Sorted), - (vec!["b", "a"], vec!["b"], Sorted), - (vec!["b", "a"], vec!["c"], Sorted), - (vec!["b", "a"], vec!["a", "b"], Sorted), - (vec!["b", "a"], vec!["b", "c"], Sorted), - (vec!["b", "a"], vec!["a", "c"], Sorted), - (vec!["b", "a"], vec!["a", "b", "c"], Sorted), - (vec!["c", "b"], vec!["a"], Linear), - (vec!["c", "b"], vec!["a", "b"], Linear), - (vec!["c", "b"], vec!["a", "c"], Linear), - (vec!["c", "b"], vec!["a", "b", "c"], Linear), - (vec!["c", "a"], vec!["a"], PartiallySorted(vec![1])), - (vec!["c", "a"], vec!["b"], PartiallySorted(vec![1])), - (vec!["c", "a"], vec!["c"], PartiallySorted(vec![1])), - (vec!["c", "a"], vec!["a", "b"], PartiallySorted(vec![1])), - (vec!["c", "a"], vec!["b", "c"], PartiallySorted(vec![1])), - (vec!["c", "a"], vec!["a", "c"], PartiallySorted(vec![1])), - ( - vec!["c", "a"], - vec!["a", "b", "c"], - PartiallySorted(vec![1]), - ), - (vec!["c", "b", "a"], vec!["a"], Sorted), - (vec!["c", "b", "a"], vec!["b"], Sorted), - (vec!["c", "b", "a"], vec!["c"], Sorted), - (vec!["c", "b", "a"], vec!["a", "b"], Sorted), - (vec!["c", "b", "a"], vec!["b", "c"], Sorted), - (vec!["c", "b", "a"], vec!["a", "c"], Sorted), - (vec!["c", "b", "a"], vec!["a", "b", "c"], Sorted), - ]; - let n = 300; - let n_distincts = vec![10, 20]; - for n_distinct in n_distincts { - let mut handles = Vec::new(); - for i in 0..n { - let idx = i % test_cases.len(); - let (pb_cols, ob_cols, search_mode) = test_cases[idx].clone(); - let job = tokio::spawn(run_window_test( - make_staggered_batches::(1000, n_distinct, i as u64), - i as u64, - pb_cols, - ob_cols, - search_mode, - )); - handles.push(job); - } - for job in handles { - job.await.unwrap()?; - } +#[tokio::test(flavor = "multi_thread", worker_threads = 16)] +async fn window_bounded_window_random_comparison() -> Result<()> { + // make_staggered_batches gives result sorted according to a, b, c + // In the test cases first entry represents partition by columns + // Second entry represents order by columns. + // Third entry represents search mode. + // In sorted mode physical plans are in the form for WindowAggExec + //``` + // WindowAggExec + // MemoryExec] + // ``` + // and in the form for BoundedWindowAggExec + // ``` + // BoundedWindowAggExec + // MemoryExec + // ``` + // In Linear and PartiallySorted mode physical plans are in the form for WindowAggExec + //``` + // WindowAggExec + // SortExec(required by window function) + // MemoryExec] + // ``` + // and in the form for BoundedWindowAggExec + // ``` + // BoundedWindowAggExec + // MemoryExec + // ``` + let test_cases = vec![ + (vec!["a"], vec!["a"], Sorted), + (vec!["a"], vec!["b"], Sorted), + (vec!["a"], vec!["a", "b"], Sorted), + (vec!["a"], vec!["b", "c"], Sorted), + (vec!["a"], vec!["a", "b", "c"], Sorted), + (vec!["b"], vec!["a"], Linear), + (vec!["b"], vec!["a", "b"], Linear), + (vec!["b"], vec!["a", "c"], Linear), + (vec!["b"], vec!["a", "b", "c"], Linear), + (vec!["c"], vec!["a"], Linear), + (vec!["c"], vec!["a", "b"], Linear), + (vec!["c"], vec!["a", "c"], Linear), + (vec!["c"], vec!["a", "b", "c"], Linear), + (vec!["b", "a"], vec!["a"], Sorted), + (vec!["b", "a"], vec!["b"], Sorted), + (vec!["b", "a"], vec!["c"], Sorted), + (vec!["b", "a"], vec!["a", "b"], Sorted), + (vec!["b", "a"], vec!["b", "c"], Sorted), + (vec!["b", "a"], vec!["a", "c"], Sorted), + (vec!["b", "a"], vec!["a", "b", "c"], Sorted), + (vec!["c", "b"], vec!["a"], Linear), + (vec!["c", "b"], vec!["a", "b"], Linear), + (vec!["c", "b"], vec!["a", "c"], Linear), + (vec!["c", "b"], vec!["a", "b", "c"], Linear), + (vec!["c", "a"], vec!["a"], PartiallySorted(vec![1])), + (vec!["c", "a"], vec!["b"], PartiallySorted(vec![1])), + (vec!["c", "a"], vec!["c"], PartiallySorted(vec![1])), + (vec!["c", "a"], vec!["a", "b"], PartiallySorted(vec![1])), + (vec!["c", "a"], vec!["b", "c"], PartiallySorted(vec![1])), + (vec!["c", "a"], vec!["a", "c"], PartiallySorted(vec![1])), + ( + vec!["c", "a"], + vec!["a", "b", "c"], + PartiallySorted(vec![1]), + ), + (vec!["c", "b", "a"], vec!["a"], Sorted), + (vec!["c", "b", "a"], vec!["b"], Sorted), + (vec!["c", "b", "a"], vec!["c"], Sorted), + (vec!["c", "b", "a"], vec!["a", "b"], Sorted), + (vec!["c", "b", "a"], vec!["b", "c"], Sorted), + (vec!["c", "b", "a"], vec!["a", "c"], Sorted), + (vec!["c", "b", "a"], vec!["a", "b", "c"], Sorted), + ]; + let n = 300; + let n_distincts = vec![10, 20]; + for n_distinct in n_distincts { + let mut handles = Vec::new(); + for i in 0..n { + let idx = i % test_cases.len(); + let (pb_cols, ob_cols, search_mode) = test_cases[idx].clone(); + let job = tokio::spawn(run_window_test( + make_staggered_batches::(1000, n_distinct, i as u64), + i as u64, + pb_cols, + ob_cols, + search_mode, + )); + handles.push(job); + } + for job in handles { + job.await.unwrap()?; } - Ok(()) } + Ok(()) } fn get_random_function( diff --git a/datafusion/core/tests/sql/expr.rs b/datafusion/core/tests/sql/expr.rs index 1995a040158d..7d41ad4a881c 100644 --- a/datafusion/core/tests/sql/expr.rs +++ b/datafusion/core/tests/sql/expr.rs @@ -639,7 +639,7 @@ async fn test_uuid_expression() -> Result<()> { async fn test_extract_date_part() -> Result<()> { test_expression!("date_part('YEAR', CAST('2000-01-01' AS DATE))", "2000.0"); test_expression!( - "EXTRACT(year FROM to_timestamp('2020-09-08T12:00:00+00:00'))", + "EXTRACT(year FROM timestamp '2020-09-08T12:00:00+00:00')", "2020.0" ); test_expression!("date_part('QUARTER', CAST('2000-01-01' AS DATE))", "1.0"); @@ -686,35 +686,35 @@ async fn test_extract_date_part() -> Result<()> { "12.0" ); test_expression!( - "EXTRACT(second FROM to_timestamp('2020-09-08T12:00:12.12345678+00:00'))", + "EXTRACT(second FROM timestamp '2020-09-08T12:00:12.12345678+00:00')", "12.12345678" ); test_expression!( - "EXTRACT(millisecond FROM to_timestamp('2020-09-08T12:00:12.12345678+00:00'))", + "EXTRACT(millisecond FROM timestamp '2020-09-08T12:00:12.12345678+00:00')", "12123.45678" ); test_expression!( - "EXTRACT(microsecond FROM to_timestamp('2020-09-08T12:00:12.12345678+00:00'))", + "EXTRACT(microsecond FROM timestamp '2020-09-08T12:00:12.12345678+00:00')", "12123456.78" ); test_expression!( - "EXTRACT(nanosecond FROM to_timestamp('2020-09-08T12:00:12.12345678+00:00'))", + "EXTRACT(nanosecond FROM timestamp '2020-09-08T12:00:12.12345678+00:00')", "1.212345678e10" ); test_expression!( - "date_part('second', to_timestamp('2020-09-08T12:00:12.12345678+00:00'))", + "date_part('second', timestamp '2020-09-08T12:00:12.12345678+00:00')", "12.12345678" ); test_expression!( - "date_part('millisecond', to_timestamp('2020-09-08T12:00:12.12345678+00:00'))", + "date_part('millisecond', timestamp '2020-09-08T12:00:12.12345678+00:00')", "12123.45678" ); test_expression!( - "date_part('microsecond', to_timestamp('2020-09-08T12:00:12.12345678+00:00'))", + "date_part('microsecond', timestamp '2020-09-08T12:00:12.12345678+00:00')", "12123456.78" ); test_expression!( - "date_part('nanosecond', to_timestamp('2020-09-08T12:00:12.12345678+00:00'))", + "date_part('nanosecond', timestamp '2020-09-08T12:00:12.12345678+00:00')", "1.212345678e10" ); diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs index ada66503a181..a18e6831b615 100644 --- a/datafusion/core/tests/sql/timestamp.rs +++ b/datafusion/core/tests/sql/timestamp.rs @@ -742,7 +742,7 @@ async fn test_arrow_typeof() -> Result<()> { "+-----------------------------------------------------------------------+", "| arrow_typeof(date_trunc(Utf8(\"microsecond\"),to_timestamp(Int64(61)))) |", "+-----------------------------------------------------------------------+", - "| Timestamp(Nanosecond, None) |", + "| Timestamp(Second, None) |", "+-----------------------------------------------------------------------+", ]; assert_batches_eq!(expected, &actual); diff --git a/datafusion/execution/Cargo.toml b/datafusion/execution/Cargo.toml index 6ae8bccdae38..e9bb87e9f8ac 100644 --- a/datafusion/execution/Cargo.toml +++ b/datafusion/execution/Cargo.toml @@ -19,9 +19,9 @@ name = "datafusion-execution" description = "Execution configuration support for DataFusion query engine" keywords = ["arrow", "query", "sql"] +readme = "README.md" version = { workspace = true } edition = { workspace = true } -readme = { workspace = true } homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } @@ -35,14 +35,14 @@ path = "src/lib.rs" [dependencies] arrow = { workspace = true } chrono = { version = "0.4", default-features = false } -dashmap = "5.4.0" -datafusion-common = { path = "../common", version = "32.0.0" } -datafusion-expr = { path = "../expr", version = "32.0.0" } -futures = "0.3" +dashmap = { workspace = true } +datafusion-common = { workspace = true } +datafusion-expr = { workspace = true } +futures = { workspace = true } hashbrown = { version = "0.14", features = ["raw"] } -log = "^0.4" -object_store = "0.7.0" -parking_lot = "0.12" -rand = "0.8" -tempfile = "3" -url = "2.2" +log = { workspace = true } +object_store = { workspace = true } +parking_lot = { workspace = true } +rand = { workspace = true } +tempfile = { workspace = true } +url = { workspace = true } diff --git a/datafusion/execution/README.md b/datafusion/execution/README.md new file mode 100644 index 000000000000..67aac6be82b3 --- /dev/null +++ b/datafusion/execution/README.md @@ -0,0 +1,26 @@ + + +# DataFusion Common + +[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. + +This crate is a submodule of DataFusion that provides execution runtime such as the memory pools and disk manager. + +[df]: https://crates.io/crates/datafusion diff --git a/datafusion/execution/src/memory_pool/mod.rs b/datafusion/execution/src/memory_pool/mod.rs index bbd8f4be4f1c..55555014f2ef 100644 --- a/datafusion/execution/src/memory_pool/mod.rs +++ b/datafusion/execution/src/memory_pool/mod.rs @@ -157,6 +157,11 @@ impl MemoryReservation { self.size } + /// Returns [MemoryConsumer] for this [MemoryReservation] + pub fn consumer(&self) -> &MemoryConsumer { + &self.registration.consumer + } + /// Frees all bytes from this reservation back to the underlying /// pool, returning the number of bytes freed. pub fn free(&mut self) -> usize { @@ -230,7 +235,7 @@ impl MemoryReservation { } } - /// Returns a new empty [`MemoryReservation`] with the same [`MemoryConsumer`] + /// Returns a new empty [`MemoryReservation`] with the same [`MemoryConsumer`] pub fn new_empty(&self) -> Self { Self { size: 0, diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index c5cf6a1ac11f..5b1b42153877 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -19,9 +19,9 @@ name = "datafusion-expr" description = "Logical plan and expression representation for DataFusion query engine" keywords = ["datafusion", "logical", "plan", "expressions"] +readme = "README.md" version = { workspace = true } edition = { workspace = true } -readme = { workspace = true } homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } @@ -38,11 +38,11 @@ path = "src/lib.rs" ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } arrow = { workspace = true } arrow-array = { workspace = true } -datafusion-common = { path = "../common", version = "32.0.0", default-features = false } +datafusion-common = { workspace = true } sqlparser = { workspace = true } strum = { version = "0.25.0", features = ["derive"] } strum_macros = "0.25.0" [dev-dependencies] -ctor = "0.2.0" -env_logger = "0.10" +ctor = { workspace = true } +env_logger = { workspace = true } diff --git a/datafusion/expr/README.md b/datafusion/expr/README.md index bcce30be39d9..b086f930e871 100644 --- a/datafusion/expr/README.md +++ b/datafusion/expr/README.md @@ -19,7 +19,7 @@ # DataFusion Logical Plan and Expressions -[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. This crate is a submodule of DataFusion that provides data types and utilities for logical plans and expressions. diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 16554133d828..4db565abfcf7 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -266,6 +266,8 @@ pub enum BuiltinScalarFunction { ToTimestampMillis, /// to_timestamp_micros ToTimestampMicros, + /// to_timestamp_nanos + ToTimestampNanos, /// to_timestamp_seconds ToTimestampSeconds, /// from_unixtime @@ -444,6 +446,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ToTimestamp => Volatility::Immutable, BuiltinScalarFunction::ToTimestampMillis => Volatility::Immutable, BuiltinScalarFunction::ToTimestampMicros => Volatility::Immutable, + BuiltinScalarFunction::ToTimestampNanos => Volatility::Immutable, BuiltinScalarFunction::ToTimestampSeconds => Volatility::Immutable, BuiltinScalarFunction::Translate => Volatility::Immutable, BuiltinScalarFunction::Trim => Volatility::Immutable, @@ -752,9 +755,13 @@ impl BuiltinScalarFunction { return plan_err!("The to_hex function can only accept integers."); } }), - BuiltinScalarFunction::ToTimestamp => Ok(Timestamp(Nanosecond, None)), + BuiltinScalarFunction::ToTimestamp => Ok(match &input_expr_types[0] { + Int64 => Timestamp(Second, None), + _ => Timestamp(Nanosecond, None), + }), BuiltinScalarFunction::ToTimestampMillis => Ok(Timestamp(Millisecond, None)), BuiltinScalarFunction::ToTimestampMicros => Ok(Timestamp(Microsecond, None)), + BuiltinScalarFunction::ToTimestampNanos => Ok(Timestamp(Nanosecond, None)), BuiltinScalarFunction::ToTimestampSeconds => Ok(Timestamp(Second, None)), BuiltinScalarFunction::FromUnixtime => Ok(Timestamp(Second, None)), BuiltinScalarFunction::Now => { @@ -995,6 +1002,18 @@ impl BuiltinScalarFunction { ], self.volatility(), ), + BuiltinScalarFunction::ToTimestampNanos => Signature::uniform( + 1, + vec![ + Int64, + Timestamp(Nanosecond, None), + Timestamp(Microsecond, None), + Timestamp(Millisecond, None), + Timestamp(Second, None), + Utf8, + ], + self.volatility(), + ), BuiltinScalarFunction::ToTimestampSeconds => Signature::uniform( 1, vec![ @@ -1431,6 +1450,7 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static [&'static str] { BuiltinScalarFunction::ToTimestampMillis => &["to_timestamp_millis"], BuiltinScalarFunction::ToTimestampMicros => &["to_timestamp_micros"], BuiltinScalarFunction::ToTimestampSeconds => &["to_timestamp_seconds"], + BuiltinScalarFunction::ToTimestampNanos => &["to_timestamp_nanos"], BuiltinScalarFunction::FromUnixtime => &["from_unixtime"], // hashing functions diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 5368a2d8a22c..5a60c2470c95 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -834,6 +834,12 @@ scalar_expr!( date, "converts a string to a `Timestamp(Microseconds, None)`" ); +scalar_expr!( + ToTimestampNanos, + to_timestamp_nanos, + date, + "converts a string to a `Timestamp(Nanoseconds, None)`" +); scalar_expr!( ToTimestampSeconds, to_timestamp_seconds, diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml index bf786686f474..797dd17a26b5 100644 --- a/datafusion/optimizer/Cargo.toml +++ b/datafusion/optimizer/Cargo.toml @@ -19,9 +19,9 @@ name = "datafusion-optimizer" description = "DataFusion Query Optimizer" keywords = [ "datafusion", "query", "optimizer" ] +readme = "README.md" version = { workspace = true } edition = { workspace = true } -readme = { workspace = true } homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } @@ -40,17 +40,17 @@ unicode_expressions = ["datafusion-physical-expr/unicode_expressions"] [dependencies] arrow = { workspace = true } -async-trait = "0.1.41" +async-trait = { workspace = true } chrono = { workspace = true } -datafusion-common = { path = "../common", version = "32.0.0", default-features = false } -datafusion-expr = { path = "../expr", version = "32.0.0" } +datafusion-common = { workspace = true } +datafusion-expr = { workspace = true } datafusion-physical-expr = { path = "../physical-expr", version = "32.0.0", default-features = false } hashbrown = { version = "0.14", features = ["raw"] } -itertools = "0.11" -log = "^0.4" +itertools = { workspace = true } +log = { workspace = true } regex-syntax = "0.8.0" [dev-dependencies] -ctor = "0.2.0" +ctor = { workspace = true } datafusion-sql = { path = "../sql", version = "32.0.0" } env_logger = "0.10.0" diff --git a/datafusion/optimizer/README.md b/datafusion/optimizer/README.md index c8baae03efa2..b8e5b93e6692 100644 --- a/datafusion/optimizer/README.md +++ b/datafusion/optimizer/README.md @@ -19,7 +19,7 @@ # DataFusion Query Optimizer -[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory +[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. DataFusion has modular design, allowing individual crates to be re-used in other projects. diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index 8c2eb96a48d8..ae986b3c84dd 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -12,7 +12,8 @@ // specific language governing permissions and limitations // under the License. -//! Push Down Filter optimizer rule ensures that filters are applied as early as possible in the plan +//! [`PushDownFilter`] Moves filters so they are applied as early as possible in +//! the plan. use crate::optimizer::ApplyOrder; use crate::utils::{conjunction, split_conjunction, split_conjunction_owned}; @@ -33,31 +34,93 @@ use itertools::Itertools; use std::collections::{HashMap, HashSet}; use std::sync::Arc; -/// Push Down Filter optimizer rule pushes filter clauses down the plan +/// Optimizer rule for pushing (moving) filter expressions down in a plan so +/// they are applied as early as possible. +/// /// # Introduction -/// A filter-commutative operation is an operation whose result of filter(op(data)) = op(filter(data)). -/// An example of a filter-commutative operation is a projection; a counter-example is `limit`. /// -/// The filter-commutative property is column-specific. An aggregate grouped by A on SUM(B) -/// can commute with a filter that depends on A only, but does not commute with a filter that depends -/// on SUM(B). +/// The goal of this rule is to improve query performance by eliminating +/// redundant work. +/// +/// For example, given a plan that sorts all values where `a > 10`: +/// +/// ```text +/// Filter (a > 10) +/// Sort (a, b) +/// ``` +/// +/// A better plan is to filter the data *before* the Sort, which sorts fewer +/// rows and therefore does less work overall: +/// +/// ```text +/// Sort (a, b) +/// Filter (a > 10) <-- Filter is moved before the sort +/// ``` +/// +/// However it is not always possible to push filters down. For example, given a +/// plan that finds the top 3 values and then keeps only those that are greater +/// than 10, if the filter is pushed below the limit it would produce a +/// different result. +/// +/// ```text +/// Filter (a > 10) <-- can not move this Filter before the limit +/// Limit (fetch=3) +/// Sort (a, b) +/// ``` +/// +/// +/// More formally, a filter-commutative operation is an operation `op` that +/// satisfies `filter(op(data)) = op(filter(data))`. +/// +/// The filter-commutative property is plan and column-specific. A filter on `a` +/// can be pushed through a `Aggregate(group_by = [a], agg=[SUM(b))`. However, a +/// filter on `SUM(b)` can not be pushed through the same aggregate. +/// +/// # Handling Conjunctions +/// +/// It is possible to only push down **part** of a filter expression if is +/// connected with `AND`s (more formally if it is a "conjunction"). +/// +/// For example, given the following plan: +/// +/// ```text +/// Filter(a > 10 AND SUM(b) < 5) +/// Aggregate(group_by = [a], agg = [SUM(b)) +/// ``` +/// +/// The `a > 10` is commutative with the `Aggregate` but `SUM(b) < 5` is not. +/// Therefore it is possible to only push part of the expression, resulting in: +/// +/// ```text +/// Filter(SUM(b) < 5) +/// Aggregate(group_by = [a], agg = [SUM(b)) +/// Filter(a > 10) +/// ``` +/// +/// # Handling Column Aliases /// -/// This optimizer commutes filters with filter-commutative operations to push the filters -/// the closest possible to the scans, re-writing the filter expressions by every -/// projection that changes the filter's expression. +/// This optimizer must sometimes handle re-writing filter expressions when they +/// pushed, for example if there is a projection that aliases `a+1` to `"b"`: /// -/// Filter: b Gt Int64(10) -/// Projection: a AS b +/// ```text +/// Filter (b > 10) +/// Projection: [a+1 AS "b"] <-- changes the name of `a+1` to `b` +/// ``` /// -/// is optimized to +/// To apply the filter prior to the `Projection`, all references to `b` must be +/// rewritten to `a+1`: /// -/// Projection: a AS b -/// Filter: a Gt Int64(10) <--- changed from b to a +/// ```text +/// Projection: a AS "b" +/// Filter: (a + 1 > 10) <--- changed from b to a + 1 +/// ``` +/// # Implementation Notes /// -/// This performs a single pass through the plan. When it passes through a filter, it stores that filter, -/// and when it reaches a node that does not commute with it, it adds the filter to that place. -/// When it passes through a projection, it re-writes the filter's expression taking into account that projection. -/// When multiple filters would have been written, it `AND` their expressions into a single expression. +/// This implementation performs a single pass through the plan, "pushing" down +/// filters. When it passes through a filter, it stores that filter, and when it +/// reaches a plan node that does not commute with that filter, it adds the +/// filter to that place. When it passes through a projection, it re-writes the +/// filter's expression taking into account that projection. #[derive(Default)] pub struct PushDownFilter {} diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index cb3f13a51ec4..04fdcca0a994 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1501,7 +1501,7 @@ mod tests { test_evaluate(expr, lit("foobarbaz")); // Check non string arguments - // to_timestamp("2020-09-08T12:00:00+00:00") --> timestamp(1599566400000000000i64) + // to_timestamp("2020-09-08T12:00:00+00:00") --> timestamp(1599566400i64) let expr = call_fn("to_timestamp", vec![lit("2020-09-08T12:00:00+00:00")]).unwrap(); test_evaluate(expr, lit_timestamp_nano(1599566400000000000i64)); diff --git a/datafusion/optimizer/src/simplify_expressions/utils.rs b/datafusion/optimizer/src/simplify_expressions/utils.rs index 28c61427c5ef..17e5d97c3006 100644 --- a/datafusion/optimizer/src/simplify_expressions/utils.rs +++ b/datafusion/optimizer/src/simplify_expressions/utils.rs @@ -525,8 +525,8 @@ pub fn simpl_concat_ws(delimiter: &Expr, args: &[Expr]) -> Result { d => Ok(concat_ws( d.clone(), args.iter() + .filter(|&x| !is_null(x)) .cloned() - .filter(|x| !is_null(x)) .collect::>(), )), } diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs index 8e0f93cb5781..be76c069f0b7 100644 --- a/datafusion/optimizer/src/single_distinct_to_groupby.rs +++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs @@ -74,7 +74,7 @@ fn is_single_distinct_agg(plan: &LogicalPlan) -> Result { distinct_count += 1; } for e in args { - fields_set.insert(e.display_name()?); + fields_set.insert(e.canonical_name()); } } } diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index 6269f27310a6..4be625e384b9 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -19,9 +19,9 @@ name = "datafusion-physical-expr" description = "Physical expression implementation for DataFusion query engine" keywords = ["arrow", "query", "sql"] +readme = "README.md" version = { workspace = true } edition = { workspace = true } -readme = { workspace = true } homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } @@ -49,19 +49,19 @@ base64 = { version = "0.21", optional = true } blake2 = { version = "^0.10.2", optional = true } blake3 = { version = "1.0", optional = true } chrono = { workspace = true } -datafusion-common = { path = "../common", version = "32.0.0", default-features = false } -datafusion-expr = { path = "../expr", version = "32.0.0" } +datafusion-common = { workspace = true } +datafusion-expr = { workspace = true } half = { version = "2.1", default-features = false } hashbrown = { version = "0.14", features = ["raw"] } hex = { version = "0.4", optional = true } -indexmap = "2.0.0" +indexmap = { workspace = true } itertools = { version = "0.11", features = ["use_std"] } libc = "0.2.140" -log = "^0.4" +log = { workspace = true } md-5 = { version = "^0.10.0", optional = true } paste = "^1.0" petgraph = "0.6.2" -rand = "0.8" +rand = { workspace = true } regex = { version = "1.8", optional = true } sha2 = { version = "^0.10.1", optional = true } unicode-segmentation = { version = "^1.7.1", optional = true } @@ -69,8 +69,8 @@ uuid = { version = "^1.2", features = ["v4"] } [dev-dependencies] criterion = "0.5" -rand = "0.8" -rstest = "0.18.0" +rand = { workspace = true } +rstest = { workspace = true } [[bench]] harness = false diff --git a/datafusion/physical-expr/README.md b/datafusion/physical-expr/README.md index a887d3eb29fe..424256c77e7e 100644 --- a/datafusion/physical-expr/README.md +++ b/datafusion/physical-expr/README.md @@ -19,7 +19,7 @@ # DataFusion Physical Expressions -[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. This crate is a submodule of DataFusion that provides data types and utilities for physical expressions. diff --git a/datafusion/physical-expr/src/aggregate/array_agg.rs b/datafusion/physical-expr/src/aggregate/array_agg.rs index 834925b8d554..4dccbfef07f8 100644 --- a/datafusion/physical-expr/src/aggregate/array_agg.rs +++ b/datafusion/physical-expr/src/aggregate/array_agg.rs @@ -24,7 +24,7 @@ use arrow::array::ArrayRef; use arrow::datatypes::{DataType, Field}; use arrow_array::Array; use datafusion_common::cast::as_list_array; -use datafusion_common::utils::wrap_into_list_array; +use datafusion_common::utils::array_into_list_array; use datafusion_common::Result; use datafusion_common::ScalarValue; use datafusion_expr::Accumulator; @@ -161,7 +161,7 @@ impl Accumulator for ArrayAggAccumulator { } let concated_array = arrow::compute::concat(&element_arrays)?; - let list_array = wrap_into_list_array(concated_array); + let list_array = array_into_list_array(concated_array); Ok(ScalarValue::List(Arc::new(list_array))) } diff --git a/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs b/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs index 21143ce54a20..9b391b0c42cf 100644 --- a/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs +++ b/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs @@ -185,7 +185,7 @@ mod tests { use arrow_array::types::Int32Type; use arrow_array::{Array, ListArray}; use arrow_buffer::OffsetBuffer; - use datafusion_common::utils::wrap_into_list_array; + use datafusion_common::utils::array_into_list_array; use datafusion_common::{internal_err, DataFusionError}; // arrow::compute::sort cann't sort ListArray directly, so we need to sort the inner primitive array and wrap it back into ListArray. @@ -201,7 +201,7 @@ mod tests { }; let arr = arrow::compute::sort(&arr, None).unwrap(); - let list_arr = wrap_into_list_array(arr); + let list_arr = array_into_list_array(arr); ScalarValue::List(Arc::new(list_arr)) } diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index af4612272676..687502e79fed 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -25,12 +25,14 @@ use arrow::buffer::OffsetBuffer; use arrow::compute; use arrow::datatypes::{DataType, Field, UInt64Type}; use arrow_buffer::NullBuffer; -use datafusion_common::cast::{as_generic_string_array, as_int64_array, as_list_array}; -use datafusion_common::utils::wrap_into_list_array; + +use datafusion_common::cast::{ + as_generic_string_array, as_int64_array, as_list_array, as_string_array, +}; +use datafusion_common::utils::array_into_list_array; use datafusion_common::{ exec_err, internal_err, not_impl_err, plan_err, DataFusionError, Result, }; -use datafusion_expr::ColumnarValue; use itertools::Itertools; @@ -395,21 +397,10 @@ fn array_array(args: &[ArrayRef], data_type: DataType) -> Result { Ok(res) } -/// Convert one or more [`ColumnarValue`] of the same type into a -/// `ListArray` -/// -/// See [`array_array`] for more details. -fn array(values: &[ColumnarValue]) -> Result { - let arrays: Vec = values - .iter() - .map(|x| match x { - ColumnarValue::Array(array) => array.clone(), - ColumnarValue::Scalar(scalar) => scalar.to_array(), - }) - .collect(); - +/// `make_array` SQL function +pub fn make_array(arrays: &[ArrayRef]) -> Result { let mut data_type = DataType::Null; - for arg in &arrays { + for arg in arrays { let arg_data_type = arg.data_type(); if !arg_data_type.equals_datatype(&DataType::Null) { data_type = arg_data_type.clone(); @@ -421,21 +412,12 @@ fn array(values: &[ColumnarValue]) -> Result { // Either an empty array or all nulls: DataType::Null => { let array = new_null_array(&DataType::Null, arrays.len()); - Ok(Arc::new(wrap_into_list_array(array))) + Ok(Arc::new(array_into_list_array(array))) } - data_type => array_array(arrays.as_slice(), data_type), + data_type => array_array(arrays, data_type), } } -/// `make_array` SQL function -pub fn make_array(arrays: &[ArrayRef]) -> Result { - let values: Vec = arrays - .iter() - .map(|x| ColumnarValue::Array(x.clone())) - .collect(); - array(values.as_slice()) -} - fn return_empty(return_null: bool, data_type: DataType) -> Arc { if return_null { new_null_array(&data_type, 1) @@ -654,7 +636,7 @@ pub fn array_append(args: &[ArrayRef]) -> Result { check_datatypes("array_append", &[arr.values(), element])?; let res = match arr.value_type() { DataType::List(_) => concat_internal(args)?, - DataType::Null => return array(&[ColumnarValue::Array(args[1].clone())]), + DataType::Null => return make_array(&[element.to_owned()]), data_type => { macro_rules! array_function { ($ARRAY_TYPE:ident) => { @@ -728,7 +710,7 @@ pub fn array_prepend(args: &[ArrayRef]) -> Result { check_datatypes("array_prepend", &[element, arr.values()])?; let res = match arr.value_type() { DataType::List(_) => concat_internal(args)?, - DataType::Null => return array(&[ColumnarValue::Array(args[0].clone())]), + DataType::Null => return make_array(&[element.to_owned()]), data_type => { macro_rules! array_function { ($ARRAY_TYPE:ident) => { @@ -743,35 +725,31 @@ pub fn array_prepend(args: &[ArrayRef]) -> Result { } fn align_array_dimensions(args: Vec) -> Result> { - // Find the maximum number of dimensions - let max_ndim: u64 = (*args - .iter() - .map(|arr| compute_array_ndims(Some(arr.clone()))) - .collect::>>>()? + let args_ndim = args .iter() - .max() - .unwrap()) - .unwrap(); + .map(|arg| compute_array_ndims(Some(arg.to_owned()))) + .collect::>>()? + .into_iter() + .map(|x| x.unwrap_or(0)) + .collect::>(); + let max_ndim = args_ndim.iter().max().unwrap_or(&0); // Align the dimensions of the arrays let aligned_args: Result> = args .into_iter() - .map(|array| { - let ndim = compute_array_ndims(Some(array.clone()))?.unwrap(); + .zip(args_ndim.iter()) + .map(|(array, ndim)| { if ndim < max_ndim { let mut aligned_array = array.clone(); for _ in 0..(max_ndim - ndim) { - let data_type = aligned_array.as_ref().data_type().clone(); - let offsets: Vec = - (0..downcast_arg!(aligned_array, ListArray).offsets().len()) - .map(|i| i as i32) - .collect(); - let field = Arc::new(Field::new("item", data_type, true)); + let data_type = aligned_array.data_type().to_owned(); + let array_lengths = vec![1; aligned_array.len()]; + let offsets = OffsetBuffer::::from_lengths(array_lengths); aligned_array = Arc::new(ListArray::try_new( - field, - OffsetBuffer::new(offsets.into()), - Arc::new(aligned_array.clone()), + Arc::new(Field::new("item", data_type, true)), + offsets, + aligned_array, None, )?) } @@ -829,7 +807,7 @@ fn concat_internal(args: &[ArrayRef]) -> Result { } } // Assume all arrays have the same data type - let data_type = list_arrays[0].value_type().clone(); + let data_type = list_arrays[0].value_type(); let buffer = valid.finish(); let elements = arrays @@ -1479,15 +1457,13 @@ macro_rules! to_string { pub fn array_to_string(args: &[ArrayRef]) -> Result { let arr = &args[0]; - let delimiters = as_generic_string_array::(&args[1])?; + let delimiters = as_string_array(&args[1])?; let delimiters: Vec> = delimiters.iter().collect(); let mut null_string = String::from(""); let mut with_null_string = false; if args.len() == 3 { - null_string = as_generic_string_array::(&args[2])? - .value(0) - .to_string(); + null_string = as_string_array(&args[2])?.value(0).to_string(); with_null_string = true; } @@ -1941,29 +1917,64 @@ pub fn string_to_array(args: &[ArrayRef]) -> Result(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + Some(vec![Some(4), Some(5)]), + ])); + let array1d_2 = + Arc::new(ListArray::from_iter_primitive::(vec![ + Some(vec![Some(6), Some(7), Some(8)]), + ])); + + let array2d_1 = Arc::new(array_into_list_array(array1d_1.clone())) as ArrayRef; + let array2d_2 = Arc::new(array_into_list_array(array1d_2.clone())) as ArrayRef; + + let res = + align_array_dimensions(vec![array1d_1.to_owned(), array2d_2.to_owned()]) + .unwrap(); + + let expected = as_list_array(&array2d_1).unwrap(); + let expected_dim = compute_array_ndims(Some(array2d_1.to_owned())).unwrap(); + assert_ne!(as_list_array(&res[0]).unwrap(), expected); + assert_eq!( + compute_array_ndims(Some(res[0].clone())).unwrap(), + expected_dim + ); + + let array3d_1 = Arc::new(array_into_list_array(array2d_1)) as ArrayRef; + let array3d_2 = array_into_list_array(array2d_2.to_owned()); + let res = + align_array_dimensions(vec![array1d_1, Arc::new(array3d_2.clone())]).unwrap(); + + let expected = as_list_array(&array3d_1).unwrap(); + let expected_dim = compute_array_ndims(Some(array3d_1.to_owned())).unwrap(); + assert_ne!(as_list_array(&res[0]).unwrap(), expected); + assert_eq!( + compute_array_ndims(Some(res[0].clone())).unwrap(), + expected_dim + ); + } #[test] fn test_array() { // make_array(1, 2, 3) = [1, 2, 3] let args = [ - ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(2))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), + Arc::new(Int64Array::from(vec![1])) as ArrayRef, + Arc::new(Int64Array::from(vec![2])), + Arc::new(Int64Array::from(vec![3])), ]; - let array = array(&args).expect("failed to initialize function array"); + let array = make_array(&args).expect("failed to initialize function array"); let result = as_list_array(&array).expect("failed to initialize function array"); assert_eq!(result.len(), 1); assert_eq!( &[1, 2, 3], - result - .value(0) - .as_any() - .downcast_ref::() - .unwrap() + as_int64_array(&result.value(0)) + .expect("failed to cast to primitive array") .values() ) } @@ -1972,29 +1983,23 @@ mod tests { fn test_nested_array() { // make_array([1, 3, 5], [2, 4, 6]) = [[1, 3, 5], [2, 4, 6]] let args = [ - ColumnarValue::Array(Arc::new(Int64Array::from(vec![1, 2]))), - ColumnarValue::Array(Arc::new(Int64Array::from(vec![3, 4]))), - ColumnarValue::Array(Arc::new(Int64Array::from(vec![5, 6]))), + Arc::new(Int64Array::from(vec![1, 2])) as ArrayRef, + Arc::new(Int64Array::from(vec![3, 4])), + Arc::new(Int64Array::from(vec![5, 6])), ]; - let array = array(&args).expect("failed to initialize function array"); + let array = make_array(&args).expect("failed to initialize function array"); let result = as_list_array(&array).expect("failed to initialize function array"); assert_eq!(result.len(), 2); assert_eq!( &[1, 3, 5], - result - .value(0) - .as_any() - .downcast_ref::() - .unwrap() + as_int64_array(&result.value(0)) + .expect("failed to cast to primitive array") .values() ); assert_eq!( &[2, 4, 6], - result - .value(1) - .as_any() - .downcast_ref::() - .unwrap() + as_int64_array(&result.value(1)) + .expect("failed to cast to primitive array") .values() ); } @@ -2002,7 +2007,7 @@ mod tests { #[test] fn test_array_element() { // array_element([1, 2, 3, 4], 1) = 1 - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_element(&[list_array, Arc::new(Int64Array::from_value(1, 1))]) .expect("failed to initialize function array_element"); let result = @@ -2011,7 +2016,7 @@ mod tests { assert_eq!(result, &Int64Array::from_value(1, 1)); // array_element([1, 2, 3, 4], 3) = 3 - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_element(&[list_array, Arc::new(Int64Array::from_value(3, 1))]) .expect("failed to initialize function array_element"); let result = @@ -2020,7 +2025,7 @@ mod tests { assert_eq!(result, &Int64Array::from_value(3, 1)); // array_element([1, 2, 3, 4], 0) = NULL - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_element(&[list_array, Arc::new(Int64Array::from_value(0, 1))]) .expect("failed to initialize function array_element"); let result = @@ -2029,7 +2034,7 @@ mod tests { assert_eq!(result, &Int64Array::from(vec![None])); // array_element([1, 2, 3, 4], NULL) = NULL - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_element(&[list_array, Arc::new(Int64Array::from(vec![None]))]) .expect("failed to initialize function array_element"); let result = @@ -2038,7 +2043,7 @@ mod tests { assert_eq!(result, &Int64Array::from(vec![None])); // array_element([1, 2, 3, 4], -1) = 4 - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_element(&[list_array, Arc::new(Int64Array::from_value(-1, 1))]) .expect("failed to initialize function array_element"); let result = @@ -2047,7 +2052,7 @@ mod tests { assert_eq!(result, &Int64Array::from_value(4, 1)); // array_element([1, 2, 3, 4], -3) = 2 - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_element(&[list_array, Arc::new(Int64Array::from_value(-3, 1))]) .expect("failed to initialize function array_element"); let result = @@ -2056,7 +2061,7 @@ mod tests { assert_eq!(result, &Int64Array::from_value(2, 1)); // array_element([1, 2, 3, 4], 10) = NULL - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_element(&[list_array, Arc::new(Int64Array::from_value(10, 1))]) .expect("failed to initialize function array_element"); let result = @@ -2068,7 +2073,7 @@ mod tests { #[test] fn test_nested_array_element() { // array_element([[1, 2, 3, 4], [5, 6, 7, 8]], 2) = [5, 6, 7, 8] - let list_array = return_nested_array().into_array(1); + let list_array = return_nested_array(); let arr = array_element(&[list_array, Arc::new(Int64Array::from_value(2, 1))]) .expect("failed to initialize function array_element"); let result = @@ -2088,7 +2093,7 @@ mod tests { #[test] fn test_array_pop_back() { // array_pop_back([1, 2, 3, 4]) = [1, 2, 3] - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_pop_back(&[list_array]) .expect("failed to initialize function array_pop_back"); let result = @@ -2167,7 +2172,7 @@ mod tests { ); // array_pop_back([1, NULL, 3, NULL]) = [1, NULL, 3] - let list_array = return_array_with_nulls().into_array(1); + let list_array = return_array_with_nulls(); let arr = array_pop_back(&[list_array]) .expect("failed to initialize function array_pop_back"); let result = @@ -2185,7 +2190,7 @@ mod tests { #[test] fn test_nested_array_pop_back() { // array_pop_back([[1, 2, 3, 4], [5, 6, 7, 8]]) = [[1, 2, 3, 4]] - let list_array = return_nested_array().into_array(1); + let list_array = return_nested_array(); let arr = array_pop_back(&[list_array]) .expect("failed to initialize function array_slice"); let result = @@ -2233,7 +2238,7 @@ mod tests { #[test] fn test_array_slice() { // array_slice([1, 2, 3, 4], 1, 3) = [1, 2, 3] - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_slice(&[ list_array, Arc::new(Int64Array::from_value(1, 1)), @@ -2254,7 +2259,7 @@ mod tests { ); // array_slice([1, 2, 3, 4], 2, 2) = [2] - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_slice(&[ list_array, Arc::new(Int64Array::from_value(2, 1)), @@ -2275,7 +2280,7 @@ mod tests { ); // array_slice([1, 2, 3, 4], 0, 0) = [] - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_slice(&[ list_array, Arc::new(Int64Array::from_value(0, 1)), @@ -2293,7 +2298,7 @@ mod tests { .is_empty()); // array_slice([1, 2, 3, 4], 0, 6) = [1, 2, 3, 4] - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_slice(&[ list_array, Arc::new(Int64Array::from_value(0, 1)), @@ -2314,7 +2319,7 @@ mod tests { ); // array_slice([1, 2, 3, 4], -2, -2) = [] - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_slice(&[ list_array, Arc::new(Int64Array::from_value(-2, 1)), @@ -2332,7 +2337,7 @@ mod tests { .is_empty()); // array_slice([1, 2, 3, 4], -3, -1) = [2, 3] - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_slice(&[ list_array, Arc::new(Int64Array::from_value(-3, 1)), @@ -2353,7 +2358,7 @@ mod tests { ); // array_slice([1, 2, 3, 4], -3, 2) = [2] - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_slice(&[ list_array, Arc::new(Int64Array::from_value(-3, 1)), @@ -2374,7 +2379,7 @@ mod tests { ); // array_slice([1, 2, 3, 4], 2, 11) = [2, 3, 4] - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_slice(&[ list_array, Arc::new(Int64Array::from_value(2, 1)), @@ -2395,7 +2400,7 @@ mod tests { ); // array_slice([1, 2, 3, 4], 3, 1) = [] - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_slice(&[ list_array, Arc::new(Int64Array::from_value(3, 1)), @@ -2413,7 +2418,7 @@ mod tests { .is_empty()); // array_slice([1, 2, 3, 4], -7, -2) = NULL - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_slice(&[ list_array, Arc::new(Int64Array::from_value(-7, 1)), @@ -2434,7 +2439,7 @@ mod tests { #[test] fn test_nested_array_slice() { // array_slice([[1, 2, 3, 4], [5, 6, 7, 8]], 1, 1) = [[1, 2, 3, 4]] - let list_array = return_nested_array().into_array(1); + let list_array = return_nested_array(); let arr = array_slice(&[ list_array, Arc::new(Int64Array::from_value(1, 1)), @@ -2459,7 +2464,7 @@ mod tests { ); // array_slice([[1, 2, 3, 4], [5, 6, 7, 8]], -1, -1) = [] - let list_array = return_nested_array().into_array(1); + let list_array = return_nested_array(); let arr = array_slice(&[ list_array, Arc::new(Int64Array::from_value(-1, 1)), @@ -2477,7 +2482,7 @@ mod tests { .is_empty()); // array_slice([[1, 2, 3, 4], [5, 6, 7, 8]], -1, 2) = [[5, 6, 7, 8]] - let list_array = return_nested_array().into_array(1); + let list_array = return_nested_array(); let arr = array_slice(&[ list_array, Arc::new(Int64Array::from_value(-1, 1)), @@ -2588,7 +2593,7 @@ mod tests { #[test] fn test_nested_array_concat() { // array_concat([1, 2, 3, 4], [1, 2, 3, 4]) = [1, 2, 3, 4, 1, 2, 3, 4] - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_concat(&[list_array.clone(), list_array.clone()]) .expect("failed to initialize function array_concat"); let result = @@ -2605,8 +2610,8 @@ mod tests { ); // array_concat([[1, 2, 3, 4], [5, 6, 7, 8]], [1, 2, 3, 4]) = [[1, 2, 3, 4], [5, 6, 7, 8], [1, 2, 3, 4]] - let list_nested_array = return_nested_array().into_array(1); - let list_array = return_array().into_array(1); + let list_nested_array = return_nested_array(); + let list_array = return_array(); let arr = array_concat(&[list_nested_array, list_array]) .expect("failed to initialize function array_concat"); let result = @@ -2630,7 +2635,7 @@ mod tests { #[test] fn test_array_position() { // array_position([1, 2, 3, 4], 3) = 3 - let list_array = return_array().into_array(1); + let list_array = return_array(); let array = array_position(&[list_array, Arc::new(Int64Array::from_value(3, 1))]) .expect("failed to initialize function array_position"); let result = as_uint64_array(&array) @@ -2642,7 +2647,7 @@ mod tests { #[test] fn test_array_positions() { // array_positions([1, 2, 3, 4], 3) = [3] - let list_array = return_array().into_array(1); + let list_array = return_array(); let array = array_positions(&[list_array, Arc::new(Int64Array::from_value(3, 1))]) .expect("failed to initialize function array_position"); @@ -2664,7 +2669,7 @@ mod tests { #[test] fn test_array_remove() { // array_remove([3, 1, 2, 3, 2, 3], 3) = [1, 2, 3, 2, 3] - let list_array = return_array_with_repeating_elements().into_array(1); + let list_array = return_array_with_repeating_elements(); let array = array_remove(&[list_array, Arc::new(Int64Array::from_value(3, 1))]) .expect("failed to initialize function array_remove"); let result = @@ -2688,8 +2693,8 @@ mod tests { // [[1, 2, 3, 4], [5, 6, 7, 8], [1, 2, 3, 4], [9, 10, 11, 12], [5, 6, 7, 8]], // [1, 2, 3, 4], // ) = [[5, 6, 7, 8], [1, 2, 3, 4], [9, 10, 11, 12], [5, 6, 7, 8]] - let list_array = return_nested_array_with_repeating_elements().into_array(1); - let element_array = return_array().into_array(1); + let list_array = return_nested_array_with_repeating_elements(); + let element_array = return_array(); let array = array_remove(&[list_array, element_array]) .expect("failed to initialize function array_remove"); let result = @@ -2717,7 +2722,7 @@ mod tests { #[test] fn test_array_remove_n() { // array_remove_n([3, 1, 2, 3, 2, 3], 3, 2) = [1, 2, 2, 3] - let list_array = return_array_with_repeating_elements().into_array(1); + let list_array = return_array_with_repeating_elements(); let array = array_remove_n(&[ list_array, Arc::new(Int64Array::from_value(3, 1)), @@ -2746,8 +2751,8 @@ mod tests { // [1, 2, 3, 4], // 3, // ) = [[5, 6, 7, 8], [9, 10, 11, 12], [5, 6, 7, 8]] - let list_array = return_nested_array_with_repeating_elements().into_array(1); - let element_array = return_array().into_array(1); + let list_array = return_nested_array_with_repeating_elements(); + let element_array = return_array(); let array = array_remove_n(&[ list_array, element_array, @@ -2778,7 +2783,7 @@ mod tests { #[test] fn test_array_remove_all() { // array_remove_all([3, 1, 2, 3, 2, 3], 3) = [1, 2, 2] - let list_array = return_array_with_repeating_elements().into_array(1); + let list_array = return_array_with_repeating_elements(); let array = array_remove_all(&[list_array, Arc::new(Int64Array::from_value(3, 1))]) .expect("failed to initialize function array_remove_all"); @@ -2803,8 +2808,8 @@ mod tests { // [[1, 2, 3, 4], [5, 6, 7, 8], [1, 2, 3, 4], [9, 10, 11, 12], [5, 6, 7, 8]], // [1, 2, 3, 4], // ) = [[5, 6, 7, 8], [9, 10, 11, 12], [5, 6, 7, 8]] - let list_array = return_nested_array_with_repeating_elements().into_array(1); - let element_array = return_array().into_array(1); + let list_array = return_nested_array_with_repeating_elements(); + let element_array = return_array(); let array = array_remove_all(&[list_array, element_array]) .expect("failed to initialize function array_remove_all"); let result = as_list_array(&array) @@ -2831,7 +2836,7 @@ mod tests { #[test] fn test_array_replace() { // array_replace([3, 1, 2, 3, 2, 3], 3, 4) = [4, 1, 2, 3, 2, 3] - let list_array = return_array_with_repeating_elements().into_array(1); + let list_array = return_array_with_repeating_elements(); let array = array_replace(&[ list_array, Arc::new(Int64Array::from_value(3, 1)), @@ -2860,9 +2865,9 @@ mod tests { // [1, 2, 3, 4], // [11, 12, 13, 14], // ) = [[11, 12, 13, 14], [5, 6, 7, 8], [1, 2, 3, 4], [9, 10, 11, 12], [5, 6, 7, 8]] - let list_array = return_nested_array_with_repeating_elements().into_array(1); - let from_array = return_array().into_array(1); - let to_array = return_extra_array().into_array(1); + let list_array = return_nested_array_with_repeating_elements(); + let from_array = return_array(); + let to_array = return_extra_array(); let array = array_replace(&[list_array, from_array, to_array]) .expect("failed to initialize function array_replace"); let result = @@ -2891,7 +2896,7 @@ mod tests { #[test] fn test_array_replace_n() { // array_replace_n([3, 1, 2, 3, 2, 3], 3, 4, 2) = [4, 1, 2, 4, 2, 3] - let list_array = return_array_with_repeating_elements().into_array(1); + let list_array = return_array_with_repeating_elements(); let array = array_replace_n(&[ list_array, Arc::new(Int64Array::from_value(3, 1)), @@ -2922,9 +2927,9 @@ mod tests { // [11, 12, 13, 14], // 2, // ) = [[11, 12, 13, 14], [5, 6, 7, 8], [11, 12, 13, 14], [9, 10, 11, 12], [5, 6, 7, 8]] - let list_array = return_nested_array_with_repeating_elements().into_array(1); - let from_array = return_array().into_array(1); - let to_array = return_extra_array().into_array(1); + let list_array = return_nested_array_with_repeating_elements(); + let from_array = return_array(); + let to_array = return_extra_array(); let array = array_replace_n(&[ list_array, from_array, @@ -2958,7 +2963,7 @@ mod tests { #[test] fn test_array_replace_all() { // array_replace_all([3, 1, 2, 3, 2, 3], 3, 4) = [4, 1, 2, 4, 2, 4] - let list_array = return_array_with_repeating_elements().into_array(1); + let list_array = return_array_with_repeating_elements(); let array = array_replace_all(&[ list_array, Arc::new(Int64Array::from_value(3, 1)), @@ -2987,9 +2992,9 @@ mod tests { // [1, 2, 3, 4], // [11, 12, 13, 14], // ) = [[11, 12, 13, 14], [5, 6, 7, 8], [11, 12, 13, 14], [9, 10, 11, 12], [5, 6, 7, 8]] - let list_array = return_nested_array_with_repeating_elements().into_array(1); - let from_array = return_array().into_array(1); - let to_array = return_extra_array().into_array(1); + let list_array = return_nested_array_with_repeating_elements(); + let from_array = return_array(); + let to_array = return_extra_array(); let array = array_replace_all(&[list_array, from_array, to_array]) .expect("failed to initialize function array_replace_all"); let result = as_list_array(&array) @@ -3041,7 +3046,7 @@ mod tests { #[test] fn test_nested_array_repeat() { // array_repeat([1, 2, 3, 4], 3) = [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]] - let element = return_array().into_array(1); + let element = return_array(); let array = array_repeat(&[element, Arc::new(Int64Array::from_value(3, 1))]) .expect("failed to initialize function array_repeat"); let result = @@ -3067,25 +3072,25 @@ mod tests { #[test] fn test_array_to_string() { // array_to_string([1, 2, 3, 4], ',') = 1,2,3,4 - let list_array = return_array().into_array(1); + let list_array = return_array(); let array = array_to_string(&[list_array, Arc::new(StringArray::from(vec![Some(",")]))]) .expect("failed to initialize function array_to_string"); - let result = as_generic_string_array::(&array) + let result = as_string_array(&array) .expect("failed to initialize function array_to_string"); assert_eq!(result.len(), 1); assert_eq!("1,2,3,4", result.value(0)); // array_to_string([1, NULL, 3, NULL], ',', '*') = 1,*,3,* - let list_array = return_array_with_nulls().into_array(1); + let list_array = return_array_with_nulls(); let array = array_to_string(&[ list_array, Arc::new(StringArray::from(vec![Some(",")])), Arc::new(StringArray::from(vec![Some("*")])), ]) .expect("failed to initialize function array_to_string"); - let result = as_generic_string_array::(&array) + let result = as_string_array(&array) .expect("failed to initialize function array_to_string"); assert_eq!(result.len(), 1); @@ -3095,25 +3100,25 @@ mod tests { #[test] fn test_nested_array_to_string() { // array_to_string([[1, 2, 3, 4], [5, 6, 7, 8]], '-') = 1-2-3-4-5-6-7-8 - let list_array = return_nested_array().into_array(1); + let list_array = return_nested_array(); let array = array_to_string(&[list_array, Arc::new(StringArray::from(vec![Some("-")]))]) .expect("failed to initialize function array_to_string"); - let result = as_generic_string_array::(&array) + let result = as_string_array(&array) .expect("failed to initialize function array_to_string"); assert_eq!(result.len(), 1); assert_eq!("1-2-3-4-5-6-7-8", result.value(0)); // array_to_string([[1, NULL, 3, NULL], [NULL, 6, 7, NULL]], '-', '*') = 1-*-3-*-*-6-7-* - let list_array = return_nested_array_with_nulls().into_array(1); + let list_array = return_nested_array_with_nulls(); let array = array_to_string(&[ list_array, Arc::new(StringArray::from(vec![Some("-")])), Arc::new(StringArray::from(vec![Some("*")])), ]) .expect("failed to initialize function array_to_string"); - let result = as_generic_string_array::(&array) + let result = as_string_array(&array) .expect("failed to initialize function array_to_string"); assert_eq!(result.len(), 1); @@ -3123,7 +3128,7 @@ mod tests { #[test] fn test_cardinality() { // cardinality([1, 2, 3, 4]) = 4 - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = cardinality(&[list_array]) .expect("failed to initialize function cardinality"); let result = @@ -3135,7 +3140,7 @@ mod tests { #[test] fn test_nested_cardinality() { // cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]) = 8 - let list_array = return_nested_array().into_array(1); + let list_array = return_nested_array(); let arr = cardinality(&[list_array]) .expect("failed to initialize function cardinality"); let result = @@ -3147,7 +3152,7 @@ mod tests { #[test] fn test_array_length() { // array_length([1, 2, 3, 4]) = 4 - let list_array = return_array().into_array(1); + let list_array = return_array(); let arr = array_length(&[list_array.clone()]) .expect("failed to initialize function array_ndims"); let result = @@ -3166,7 +3171,7 @@ mod tests { #[test] fn test_nested_array_length() { - let list_array = return_nested_array().into_array(1); + let list_array = return_nested_array(); // array_length([[1, 2, 3, 4], [5, 6, 7, 8]]) = 2 let arr = array_length(&[list_array.clone()]) @@ -3206,7 +3211,7 @@ mod tests { #[test] fn test_array_dims() { // array_dims([1, 2, 3, 4]) = [4] - let list_array = return_array().into_array(1); + let list_array = return_array(); let array = array_dims(&[list_array]).expect("failed to initialize function array_dims"); @@ -3227,7 +3232,7 @@ mod tests { #[test] fn test_nested_array_dims() { // array_dims([[1, 2, 3, 4], [5, 6, 7, 8]]) = [2, 4] - let list_array = return_nested_array().into_array(1); + let list_array = return_nested_array(); let array = array_dims(&[list_array]).expect("failed to initialize function array_dims"); @@ -3248,7 +3253,7 @@ mod tests { #[test] fn test_array_ndims() { // array_ndims([1, 2, 3, 4]) = 1 - let list_array = return_array().into_array(1); + let list_array = return_array(); let array = array_ndims(&[list_array]) .expect("failed to initialize function array_ndims"); @@ -3261,7 +3266,7 @@ mod tests { #[test] fn test_nested_array_ndims() { // array_ndims([[1, 2, 3, 4], [5, 6, 7, 8]]) = 2 - let list_array = return_nested_array().into_array(1); + let list_array = return_nested_array(); let array = array_ndims(&[list_array]) .expect("failed to initialize function array_ndims"); @@ -3285,152 +3290,137 @@ mod tests { assert_eq!(array.unwrap_err().strip_backtrace(), "Error during planning: array_append received incompatible types: '[Int64, Utf8]'."); } - fn return_array() -> ColumnarValue { + fn return_array() -> ArrayRef { // Returns: [1, 2, 3, 4] let args = [ - ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(2))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(4))), + Arc::new(Int64Array::from(vec![Some(1)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(2)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(3)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(4)])) as ArrayRef, ]; - let result = array(&args).expect("failed to initialize function array"); - ColumnarValue::Array(result.clone()) + make_array(&args).expect("failed to initialize function array") } - fn return_extra_array() -> ColumnarValue { + fn return_extra_array() -> ArrayRef { // Returns: [11, 12, 13, 14] let args = [ - ColumnarValue::Scalar(ScalarValue::Int64(Some(11))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(12))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(13))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(14))), + Arc::new(Int64Array::from(vec![Some(11)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(12)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(13)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(14)])) as ArrayRef, ]; - let result = array(&args).expect("failed to initialize function array"); - ColumnarValue::Array(result.clone()) + make_array(&args).expect("failed to initialize function array") } - fn return_nested_array() -> ColumnarValue { + fn return_nested_array() -> ArrayRef { // Returns: [[1, 2, 3, 4], [5, 6, 7, 8]] let args = [ - ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(2))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(4))), + Arc::new(Int64Array::from(vec![Some(1)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(2)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(3)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(4)])) as ArrayRef, ]; - let arr1 = array(&args).expect("failed to initialize function array"); + let arr1 = make_array(&args).expect("failed to initialize function array"); let args = [ - ColumnarValue::Scalar(ScalarValue::Int64(Some(5))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(6))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(7))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(8))), + Arc::new(Int64Array::from(vec![Some(5)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(6)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(7)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(8)])) as ArrayRef, ]; - let arr2 = array(&args).expect("failed to initialize function array"); + let arr2 = make_array(&args).expect("failed to initialize function array"); - let args = [ColumnarValue::Array(arr1), ColumnarValue::Array(arr2)]; - let result = array(&args).expect("failed to initialize function array"); - ColumnarValue::Array(result.clone()) + make_array(&[arr1, arr2]).expect("failed to initialize function array") } - fn return_array_with_nulls() -> ColumnarValue { + fn return_array_with_nulls() -> ArrayRef { // Returns: [1, NULL, 3, NULL] let args = [ - ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), - ColumnarValue::Scalar(ScalarValue::Null), - ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), - ColumnarValue::Scalar(ScalarValue::Null), + Arc::new(Int64Array::from(vec![Some(1)])) as ArrayRef, + Arc::new(Int64Array::from(vec![None])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(3)])) as ArrayRef, + Arc::new(Int64Array::from(vec![None])) as ArrayRef, ]; - let result = array(&args).expect("failed to initialize function array"); - ColumnarValue::Array(result.clone()) + make_array(&args).expect("failed to initialize function array") } - fn return_nested_array_with_nulls() -> ColumnarValue { + fn return_nested_array_with_nulls() -> ArrayRef { // Returns: [[1, NULL, 3, NULL], [NULL, 6, 7, NULL]] let args = [ - ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), - ColumnarValue::Scalar(ScalarValue::Null), - ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), - ColumnarValue::Scalar(ScalarValue::Null), + Arc::new(Int64Array::from(vec![Some(1)])) as ArrayRef, + Arc::new(Int64Array::from(vec![None])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(3)])) as ArrayRef, + Arc::new(Int64Array::from(vec![None])) as ArrayRef, ]; - let arr1 = array(&args).expect("failed to initialize function array"); + let arr1 = make_array(&args).expect("failed to initialize function array"); let args = [ - ColumnarValue::Scalar(ScalarValue::Null), - ColumnarValue::Scalar(ScalarValue::Int64(Some(6))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(7))), - ColumnarValue::Scalar(ScalarValue::Null), + Arc::new(Int64Array::from(vec![None])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(6)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(7)])) as ArrayRef, + Arc::new(Int64Array::from(vec![None])) as ArrayRef, ]; - let arr2 = array(&args).expect("failed to initialize function array"); + let arr2 = make_array(&args).expect("failed to initialize function array"); - let args = [ColumnarValue::Array(arr1), ColumnarValue::Array(arr2)]; - let result = array(&args).expect("failed to initialize function array"); - ColumnarValue::Array(result.clone()) + make_array(&[arr1, arr2]).expect("failed to initialize function array") } - fn return_array_with_repeating_elements() -> ColumnarValue { + fn return_array_with_repeating_elements() -> ArrayRef { // Returns: [3, 1, 2, 3, 2, 3] let args = [ - ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(2))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(2))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), + Arc::new(Int64Array::from(vec![Some(3)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(1)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(2)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(3)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(2)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(3)])) as ArrayRef, ]; - let result = array(&args).expect("failed to initialize function array"); - ColumnarValue::Array(result.clone()) + make_array(&args).expect("failed to initialize function array") } - fn return_nested_array_with_repeating_elements() -> ColumnarValue { + fn return_nested_array_with_repeating_elements() -> ArrayRef { // Returns: [[1, 2, 3, 4], [5, 6, 7, 8], [1, 2, 3, 4], [9, 10, 11, 12], [5, 6, 7, 8]] let args = [ - ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(2))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(4))), + Arc::new(Int64Array::from(vec![Some(1)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(2)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(3)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(4)])) as ArrayRef, ]; - let arr1 = array(&args).expect("failed to initialize function array"); + let arr1 = make_array(&args).expect("failed to initialize function array"); let args = [ - ColumnarValue::Scalar(ScalarValue::Int64(Some(5))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(6))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(7))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(8))), + Arc::new(Int64Array::from(vec![Some(5)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(6)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(7)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(8)])) as ArrayRef, ]; - let arr2 = array(&args).expect("failed to initialize function array"); + let arr2 = make_array(&args).expect("failed to initialize function array"); let args = [ - ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(2))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(4))), + Arc::new(Int64Array::from(vec![Some(1)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(2)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(3)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(4)])) as ArrayRef, ]; - let arr3 = array(&args).expect("failed to initialize function array"); + let arr3 = make_array(&args).expect("failed to initialize function array"); let args = [ - ColumnarValue::Scalar(ScalarValue::Int64(Some(9))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(10))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(11))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(12))), + Arc::new(Int64Array::from(vec![Some(9)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(10)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(11)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(12)])) as ArrayRef, ]; - let arr4 = array(&args).expect("failed to initialize function array"); + let arr4 = make_array(&args).expect("failed to initialize function array"); let args = [ - ColumnarValue::Scalar(ScalarValue::Int64(Some(5))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(6))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(7))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(8))), + Arc::new(Int64Array::from(vec![Some(5)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(6)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(7)])) as ArrayRef, + Arc::new(Int64Array::from(vec![Some(8)])) as ArrayRef, ]; - let arr5 = array(&args).expect("failed to initialize function array"); + let arr5 = make_array(&args).expect("failed to initialize function array"); - let args = [ - ColumnarValue::Array(arr1), - ColumnarValue::Array(arr2), - ColumnarValue::Array(arr3), - ColumnarValue::Array(arr4), - ColumnarValue::Array(arr5), - ]; - let result = array(&args).expect("failed to initialize function array"); - ColumnarValue::Array(result.clone()) + make_array(&[arr1, arr2, arr3, arr4, arr5]) + .expect("failed to initialize function array") } } diff --git a/datafusion/physical-expr/src/datetime_expressions.rs b/datafusion/physical-expr/src/datetime_expressions.rs index 5cf1c21df5c2..bb8720cb8d00 100644 --- a/datafusion/physical-expr/src/datetime_expressions.rs +++ b/datafusion/physical-expr/src/datetime_expressions.rs @@ -154,6 +154,15 @@ pub fn to_timestamp_micros(args: &[ColumnarValue]) -> Result { ) } +/// to_timestamp_nanos SQL function +pub fn to_timestamp_nanos(args: &[ColumnarValue]) -> Result { + handle::( + args, + string_to_timestamp_nanos_shim, + "to_timestamp_nanos", + ) +} + /// to_timestamp_seconds SQL function pub fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result { handle::( @@ -962,7 +971,7 @@ mod tests { let mut string_builder = StringBuilder::with_capacity(2, 1024); let mut ts_builder = TimestampNanosecondArray::builder(2); - string_builder.append_value("2020-09-08T13:42:29.190855Z"); + string_builder.append_value("2020-09-08T13:42:29.190855"); ts_builder.append_value(1599572549190855000); string_builder.append_null(); diff --git a/datafusion/physical-expr/src/equivalence.rs b/datafusion/physical-expr/src/equivalence.rs index 4fce6854138d..d8aa09b90460 100644 --- a/datafusion/physical-expr/src/equivalence.rs +++ b/datafusion/physical-expr/src/equivalence.rs @@ -15,148 +15,257 @@ // specific language governing permissions and limitations // under the License. -use crate::expressions::{CastExpr, Column}; -use crate::utils::{collect_columns, merge_vectors}; +use std::collections::HashSet; +use std::hash::Hash; +use std::sync::Arc; + +use crate::expressions::Column; +use crate::physical_expr::{deduplicate_physical_exprs, have_common_entries}; +use crate::sort_properties::{ExprOrdering, SortProperties}; use crate::{ - LexOrdering, LexOrderingRef, LexOrderingReq, PhysicalExpr, PhysicalSortExpr, - PhysicalSortRequirement, + physical_exprs_contains, LexOrdering, LexOrderingRef, LexRequirement, + LexRequirementRef, PhysicalExpr, PhysicalSortExpr, PhysicalSortRequirement, }; use arrow::datatypes::SchemaRef; -use arrow_schema::Fields; - +use arrow_schema::SortOptions; use datafusion_common::tree_node::{Transformed, TreeNode}; -use datafusion_common::{JoinSide, JoinType}; -use itertools::izip; -use std::collections::{HashMap, HashSet}; -use std::hash::Hash; -use std::ops::Range; -use std::sync::Arc; +use datafusion_common::{JoinSide, JoinType, Result}; -/// Represents a collection of [`EquivalentClass`] (equivalences -/// between columns in relations) -/// -/// This is used to represent: -/// -/// 1. Equality conditions (like `A=B`), when `T` = [`Column`] +use indexmap::map::Entry; +use indexmap::IndexMap; + +/// An `EquivalenceClass` is a set of [`Arc`]s that are known +/// to have the same value for all tuples in a relation. These are generated by +/// equality predicates, typically equi-join conditions and equality conditions +/// in filters. +pub type EquivalenceClass = Vec>; + +/// Stores the mapping between source expressions and target expressions for a +/// projection. #[derive(Debug, Clone)] -pub struct EquivalenceProperties { - classes: Vec>, - schema: SchemaRef, +pub struct ProjectionMapping { + /// `(source expression)` --> `(target expression)` + /// Indices in the vector corresponds to the indices after projection. + inner: Vec<(Arc, Arc)>, } -impl EquivalenceProperties { - pub fn new(schema: SchemaRef) -> Self { - EquivalenceProperties { - classes: vec![], - schema, +impl ProjectionMapping { + /// Constructs the mapping between a projection's input and output + /// expressions. + /// + /// For example, given the input projection expressions (`a+b`, `c+d`) + /// and an output schema with two columns `"c+d"` and `"a+b"` + /// the projection mapping would be + /// ```text + /// [0]: (c+d, col("c+d")) + /// [1]: (a+b, col("a+b")) + /// ``` + /// where `col("c+d")` means the column named "c+d". + pub fn try_new( + expr: &[(Arc, String)], + input_schema: &SchemaRef, + ) -> Result { + // Construct a map from the input expressions to the output expression of the projection: + let mut inner = vec![]; + for (expr_idx, (expression, name)) in expr.iter().enumerate() { + let target_expr = Arc::new(Column::new(name, expr_idx)) as _; + + let source_expr = expression.clone().transform_down(&|e| match e + .as_any() + .downcast_ref::( + ) { + Some(col) => { + // Sometimes, expression and its name in the input_schema doesn't match. + // This can cause problems. Hence in here we make sure that expression name + // matches with the name in the inout_schema. + // Conceptually, source_expr and expression should be same. + let idx = col.index(); + let matching_input_field = input_schema.field(idx); + let matching_input_column = + Column::new(matching_input_field.name(), idx); + Ok(Transformed::Yes(Arc::new(matching_input_column))) + } + None => Ok(Transformed::No(e)), + })?; + + inner.push((source_expr, target_expr)); } + Ok(Self { inner }) } - /// return the set of equivalences - pub fn classes(&self) -> &[EquivalentClass] { - &self.classes + /// Iterate over pairs of (source, target) expressions + pub fn iter( + &self, + ) -> impl Iterator, Arc)> + '_ { + self.inner.iter() } +} - pub fn schema(&self) -> SchemaRef { - self.schema.clone() +/// An `EquivalenceGroup` is a collection of `EquivalenceClass`es where each +/// class represents a distinct equivalence class in a relation. +#[derive(Debug, Clone)] +pub struct EquivalenceGroup { + classes: Vec, +} + +impl EquivalenceGroup { + /// Creates an empty equivalence group. + fn empty() -> Self { + Self { classes: vec![] } } - /// Add the [`EquivalentClass`] from `iter` to this list - pub fn extend>>(&mut self, iter: I) { - for ec in iter { - self.classes.push(ec) - } + /// Creates an equivalence group from the given equivalence classes. + fn new(classes: Vec) -> Self { + let mut result = EquivalenceGroup { classes }; + result.remove_redundant_entries(); + result } - /// Adds new equal conditions into the EquivalenceProperties. New equal - /// conditions usually come from equality predicates in a join/filter. - pub fn add_equal_conditions(&mut self, new_conditions: (&Column, &Column)) { - let mut idx1: Option = None; - let mut idx2: Option = None; - for (idx, class) in self.classes.iter_mut().enumerate() { - let contains_first = class.contains(new_conditions.0); - let contains_second = class.contains(new_conditions.1); - match (contains_first, contains_second) { - (true, false) => { - class.insert(new_conditions.1.clone()); - idx1 = Some(idx); - } - (false, true) => { - class.insert(new_conditions.0.clone()); - idx2 = Some(idx); - } - (true, true) => { - idx1 = Some(idx); - idx2 = Some(idx); - break; - } - (false, false) => {} + /// Returns how many equivalence classes there are in this group. + fn len(&self) -> usize { + self.classes.len() + } + + /// Checks whether this equivalence group is empty. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns an iterator over the equivalence classes in this group. + fn iter(&self) -> impl Iterator { + self.classes.iter() + } + + /// Adds the equality `left` = `right` to this equivalence group. + /// New equality conditions often arise after steps like `Filter(a = b)`, + /// `Alias(a, a as b)` etc. + fn add_equal_conditions( + &mut self, + left: &Arc, + right: &Arc, + ) { + let mut first_class = None; + let mut second_class = None; + for (idx, cls) in self.classes.iter().enumerate() { + if physical_exprs_contains(cls, left) { + first_class = Some(idx); + } + if physical_exprs_contains(cls, right) { + second_class = Some(idx); } } - - match (idx1, idx2) { - (Some(idx_1), Some(idx_2)) if idx_1 != idx_2 => { - // need to merge the two existing EquivalentClasses - let second_eq_class = self.classes.get(idx_2).unwrap().clone(); - let first_eq_class = self.classes.get_mut(idx_1).unwrap(); - for prop in second_eq_class.iter() { - if !first_eq_class.contains(prop) { - first_eq_class.insert(prop.clone()); + match (first_class, second_class) { + (Some(mut first_idx), Some(mut second_idx)) => { + // If the given left and right sides belong to different classes, + // we should unify/bridge these classes. + if first_idx != second_idx { + // By convention make sure second_idx is larger than first_idx. + if first_idx > second_idx { + (first_idx, second_idx) = (second_idx, first_idx); } + // Remove second_idx from self.classes then merge its values with class at first_idx. + // Convention above makes sure that first_idx is still valid after second_idx removal. + let other_class = self.classes.swap_remove(second_idx); + self.classes[first_idx].extend(other_class); } - self.classes.remove(idx_2); + } + (Some(group_idx), None) => { + // Right side is new, extend left side's class: + self.classes[group_idx].push(right.clone()); + } + (None, Some(group_idx)) => { + // Left side is new, extend right side's class: + self.classes[group_idx].push(left.clone()); } (None, None) => { - // adding new pairs - self.classes.push(EquivalentClass::::new( - new_conditions.0.clone(), - vec![new_conditions.1.clone()], - )); + // None of the expressions is among existing classes. + // Create a new equivalence class and extend the group. + self.classes.push(vec![left.clone(), right.clone()]); + } + } + } + + /// Removes redundant entries from this group. + fn remove_redundant_entries(&mut self) { + // Remove duplicate entries from each equivalence class: + self.classes.retain_mut(|cls| { + // Keep groups that have at least two entries as singleton class is + // meaningless (i.e. it contains no non-trivial information): + deduplicate_physical_exprs(cls); + cls.len() > 1 + }); + // Unify/bridge groups that have common expressions: + self.bridge_classes() + } + + /// This utility function unifies/bridges classes that have common expressions. + /// For example, assume that we have [`EquivalenceClass`]es `[a, b]` and `[b, c]`. + /// Since both classes contain `b`, columns `a`, `b` and `c` are actually all + /// equal and belong to one class. This utility converts merges such classes. + fn bridge_classes(&mut self) { + let mut idx = 0; + while idx < self.classes.len() { + let mut next_idx = idx + 1; + let start_size = self.classes[idx].len(); + while next_idx < self.classes.len() { + if have_common_entries(&self.classes[idx], &self.classes[next_idx]) { + let extension = self.classes.swap_remove(next_idx); + self.classes[idx].extend(extension); + } else { + next_idx += 1; + } + } + if self.classes[idx].len() > start_size { + deduplicate_physical_exprs(&mut self.classes[idx]); + if self.classes[idx].len() > start_size { + continue; + } } - _ => {} + idx += 1; } } - /// Normalizes physical expression according to `EquivalentClass`es inside `self.classes`. - /// expression is replaced with `EquivalentClass::head` expression if it is among `EquivalentClass::others`. + /// Extends this equivalence group with the `other` equivalence group. + fn extend(&mut self, other: Self) { + self.classes.extend(other.classes); + self.remove_redundant_entries(); + } + + /// Normalizes the given physical expression according to this group. + /// The expression is replaced with the first expression in the equivalence + /// class it matches with (if any). pub fn normalize_expr(&self, expr: Arc) -> Arc { expr.clone() .transform(&|expr| { - let normalized_form = - expr.as_any().downcast_ref::().and_then(|column| { - for class in &self.classes { - if class.contains(column) { - return Some(Arc::new(class.head().clone()) as _); - } - } - None - }); - Ok(if let Some(normalized_form) = normalized_form { - Transformed::Yes(normalized_form) - } else { - Transformed::No(expr) - }) + for cls in self.iter() { + if physical_exprs_contains(cls, &expr) { + return Ok(Transformed::Yes(cls[0].clone())); + } + } + Ok(Transformed::No(expr)) }) .unwrap_or(expr) } - /// This function applies the \[`normalize_expr`] - /// function for all expression in `exprs` and returns a vector of - /// normalized physical expressions. - pub fn normalize_exprs( + /// Normalizes the given sort expression according to this group. + /// The underlying physical expression is replaced with the first expression + /// in the equivalence class it matches with (if any). If the underlying + /// expression does not belong to any equivalence class in this group, returns + /// the sort expression as is. + pub fn normalize_sort_expr( &self, - exprs: &[Arc], - ) -> Vec> { - exprs - .iter() - .map(|expr| self.normalize_expr(expr.clone())) - .collect::>() + mut sort_expr: PhysicalSortExpr, + ) -> PhysicalSortExpr { + sort_expr.expr = self.normalize_expr(sort_expr.expr); + sort_expr } - /// This function normalizes `sort_requirement` according to `EquivalenceClasses` in the `self`. - /// If the given sort requirement doesn't belong to equivalence set inside - /// `self`, it returns `sort_requirement` as is. + /// Normalizes the given sort requirement according to this group. + /// The underlying physical expression is replaced with the first expression + /// in the equivalence class it matches with (if any). If the underlying + /// expression does not belong to any equivalence class in this group, returns + /// the given sort requirement as is. pub fn normalize_sort_requirement( &self, mut sort_requirement: PhysicalSortRequirement, @@ -165,1069 +274,1146 @@ impl EquivalenceProperties { sort_requirement } - /// This function applies the \[`normalize_sort_requirement`] - /// function for all sort requirements in `sort_reqs` and returns a vector of - /// normalized sort expressions. - pub fn normalize_sort_requirements( + /// This function applies the `normalize_expr` function for all expressions + /// in `exprs` and returns the corresponding normalized physical expressions. + pub fn normalize_exprs( &self, - sort_reqs: &[PhysicalSortRequirement], - ) -> Vec { - let normalized_sort_reqs = sort_reqs - .iter() - .map(|sort_req| self.normalize_sort_requirement(sort_req.clone())) - .collect::>(); - collapse_vec(normalized_sort_reqs) + exprs: impl IntoIterator>, + ) -> Vec> { + exprs + .into_iter() + .map(|expr| self.normalize_expr(expr)) + .collect() } - /// Similar to the \[`normalize_sort_requirements`] this function normalizes - /// sort expressions in `sort_exprs` and returns a vector of - /// normalized sort expressions. - pub fn normalize_sort_exprs( - &self, - sort_exprs: &[PhysicalSortExpr], - ) -> Vec { - let sort_requirements = - PhysicalSortRequirement::from_sort_exprs(sort_exprs.iter()); - let normalized_sort_requirement = - self.normalize_sort_requirements(&sort_requirements); - PhysicalSortRequirement::to_sort_exprs(normalized_sort_requirement) + /// This function applies the `normalize_sort_expr` function for all sort + /// expressions in `sort_exprs` and returns the corresponding normalized + /// sort expressions. + pub fn normalize_sort_exprs(&self, sort_exprs: LexOrderingRef) -> LexOrdering { + // Convert sort expressions to sort requirements: + let sort_reqs = PhysicalSortRequirement::from_sort_exprs(sort_exprs.iter()); + // Normalize the requirements: + let normalized_sort_reqs = self.normalize_sort_requirements(&sort_reqs); + // Convert sort requirements back to sort expressions: + PhysicalSortRequirement::to_sort_exprs(normalized_sort_reqs) } -} - -/// `OrderingEquivalenceProperties` keeps track of columns that describe the -/// global ordering of the schema. These columns are not necessarily same; e.g. -/// ```text -/// ┌-------┐ -/// | a | b | -/// |---|---| -/// | 1 | 9 | -/// | 2 | 8 | -/// | 3 | 7 | -/// | 5 | 5 | -/// └---┴---┘ -/// ``` -/// where both `a ASC` and `b DESC` can describe the table ordering. With -/// `OrderingEquivalenceProperties`, we can keep track of these equivalences -/// and treat `a ASC` and `b DESC` as the same ordering requirement. -#[derive(Debug, Clone)] -pub struct OrderingEquivalenceProperties { - oeq_class: Option, - /// Keeps track of expressions that have constant value. - constants: Vec>, - schema: SchemaRef, -} -impl OrderingEquivalenceProperties { - /// Create an empty `OrderingEquivalenceProperties` - pub fn new(schema: SchemaRef) -> Self { - Self { - oeq_class: None, - constants: vec![], - schema, - } + /// This function applies the `normalize_sort_requirement` function for all + /// requirements in `sort_reqs` and returns the corresponding normalized + /// sort requirements. + pub fn normalize_sort_requirements( + &self, + sort_reqs: LexRequirementRef, + ) -> LexRequirement { + collapse_lex_req( + sort_reqs + .iter() + .map(|sort_req| self.normalize_sort_requirement(sort_req.clone())) + .collect(), + ) } - /// Extends `OrderingEquivalenceProperties` by adding ordering inside the `other` - /// to the `self.oeq_class`. - pub fn extend(&mut self, other: Option) { - if let Some(other) = other { - if let Some(class) = &mut self.oeq_class { - class.others.insert(other.head); - class.others.extend(other.others); - } else { - self.oeq_class = Some(other); + /// Projects `expr` according to the given projection mapping. + /// If the resulting expression is invalid after projection, returns `None`. + fn project_expr( + &self, + mapping: &ProjectionMapping, + expr: &Arc, + ) -> Option> { + let children = expr.children(); + if children.is_empty() { + for (source, target) in mapping.iter() { + // If we match the source, or an equivalent expression to source, + // then we can project. For example, if we have the mapping + // (a as a1, a + c) and the equivalence class (a, b), expression + // b also projects to a1. + if source.eq(expr) + || self + .get_equivalence_class(source) + .map_or(false, |group| physical_exprs_contains(group, expr)) + { + return Some(target.clone()); + } } } - } - - pub fn oeq_class(&self) -> Option<&OrderingEquivalentClass> { - self.oeq_class.as_ref() - } - - /// Adds new equal conditions into the EquivalenceProperties. New equal - /// conditions usually come from equality predicates in a join/filter. - pub fn add_equal_conditions(&mut self, new_conditions: (&LexOrdering, &LexOrdering)) { - if let Some(class) = &mut self.oeq_class { - class.insert(new_conditions.0.clone()); - class.insert(new_conditions.1.clone()); - } else { - let head = new_conditions.0.clone(); - let others = vec![new_conditions.1.clone()]; - self.oeq_class = Some(OrderingEquivalentClass::new(head, others)) + // Project a non-leaf expression by projecting its children. + else if let Some(children) = children + .into_iter() + .map(|child| self.project_expr(mapping, &child)) + .collect::>>() + { + return Some(expr.clone().with_new_children(children).unwrap()); } + // Arriving here implies the expression was invalid after projection. + None } - /// Add physical expression that have constant value to the `self.constants` - pub fn with_constants(mut self, constants: Vec>) -> Self { - constants.into_iter().for_each(|constant| { - if !physical_exprs_contains(&self.constants, &constant) { - self.constants.push(constant); - } - }); - self - } - - pub fn schema(&self) -> SchemaRef { - self.schema.clone() - } - - /// This function normalizes `sort_reqs` by - /// - removing expressions that have constant value from requirement - /// - replacing sections that are in the `self.oeq_class.others` with `self.oeq_class.head` - /// - removing sections that satisfies global ordering that are in the post fix of requirement - pub fn normalize_sort_requirements( + /// Projects `ordering` according to the given projection mapping. + /// If the resulting ordering is invalid after projection, returns `None`. + fn project_ordering( &self, - sort_reqs: &[PhysicalSortRequirement], - ) -> Vec { - let normalized_sort_reqs = - prune_sort_reqs_with_constants(sort_reqs, &self.constants); - let mut normalized_sort_reqs = collapse_lex_req(normalized_sort_reqs); - if let Some(oeq_class) = &self.oeq_class { - for item in oeq_class.others() { - let item = PhysicalSortRequirement::from_sort_exprs(item); - let item = prune_sort_reqs_with_constants(&item, &self.constants); - let ranges = get_compatible_ranges(&normalized_sort_reqs, &item); - let mut offset: i64 = 0; - for Range { start, end } in ranges { - let head = PhysicalSortRequirement::from_sort_exprs(oeq_class.head()); - let mut head = prune_sort_reqs_with_constants(&head, &self.constants); - let updated_start = (start as i64 + offset) as usize; - let updated_end = (end as i64 + offset) as usize; - let range = end - start; - offset += head.len() as i64 - range as i64; - let all_none = normalized_sort_reqs[updated_start..updated_end] - .iter() - .all(|req| req.options.is_none()); - if all_none { - for req in head.iter_mut() { - req.options = None; - } - } - normalized_sort_reqs.splice(updated_start..updated_end, head); - } - } - normalized_sort_reqs = simplify_lex_req(normalized_sort_reqs, oeq_class); - } - collapse_lex_req(normalized_sort_reqs) + mapping: &ProjectionMapping, + ordering: LexOrderingRef, + ) -> Option { + // If any sort expression is invalid after projection, rest of the + // ordering shouldn't be projected either. For example, if input ordering + // is [a ASC, b ASC, c ASC], and column b is not valid after projection, + // the result should be [a ASC], not [a ASC, c ASC], even if column c is + // valid after projection. + let result = ordering + .iter() + .map_while(|sort_expr| { + self.project_expr(mapping, &sort_expr.expr) + .map(|expr| PhysicalSortExpr { + expr, + options: sort_expr.options, + }) + }) + .collect::>(); + (!result.is_empty()).then_some(result) } - /// Checks whether `leading_ordering` is contained in any of the ordering - /// equivalence classes. - pub fn satisfies_leading_ordering( - &self, - leading_ordering: &PhysicalSortExpr, - ) -> bool { - if let Some(oeq_class) = &self.oeq_class { - for ordering in oeq_class - .others + /// Projects this equivalence group according to the given projection mapping. + pub fn project(&self, mapping: &ProjectionMapping) -> Self { + let projected_classes = self.iter().filter_map(|cls| { + let new_class = cls .iter() - .chain(std::iter::once(&oeq_class.head)) + .filter_map(|expr| self.project_expr(mapping, expr)) + .collect::>(); + (new_class.len() > 1).then_some(new_class) + }); + // TODO: Convert the algorithm below to a version that uses `HashMap`. + // once `Arc` can be stored in `HashMap`. + // See issue: https://github.com/apache/arrow-datafusion/issues/8027 + let mut new_classes = vec![]; + for (source, target) in mapping.iter() { + if new_classes.is_empty() { + new_classes.push((source, vec![target.clone()])); + } + if let Some((_, values)) = + new_classes.iter_mut().find(|(key, _)| key.eq(source)) { - if ordering[0].eq(leading_ordering) { - return true; + if !physical_exprs_contains(values, target) { + values.push(target.clone()); } } } - false - } -} - -/// EquivalentClass is a set of [`Column`]s or [`PhysicalSortExpr`]s that are known -/// to have the same value in all tuples in a relation. `EquivalentClass` -/// is generated by equality predicates, typically equijoin conditions and equality -/// conditions in filters. `EquivalentClass` is generated by the -/// `ROW_NUMBER` window function. -#[derive(Debug, Clone)] -pub struct EquivalentClass { - /// First element in the EquivalentClass - head: T, - /// Other equal columns - others: HashSet, -} - -impl EquivalentClass { - pub fn new(head: T, others: Vec) -> EquivalentClass { - EquivalentClass { - head, - others: HashSet::from_iter(others), - } - } - - pub fn head(&self) -> &T { - &self.head - } - - pub fn others(&self) -> &HashSet { - &self.others - } - - pub fn contains(&self, col: &T) -> bool { - self.head == *col || self.others.contains(col) + // Only add equivalence classes with at least two members as singleton + // equivalence classes are meaningless. + let new_classes = new_classes + .into_iter() + .filter_map(|(_, values)| (values.len() > 1).then_some(values)); + let classes = projected_classes.chain(new_classes).collect(); + Self::new(classes) } - pub fn insert(&mut self, col: T) -> bool { - self.head != col && self.others.insert(col) + /// Returns the equivalence class that contains `expr`. + /// If none of the equivalence classes contains `expr`, returns `None`. + fn get_equivalence_class( + &self, + expr: &Arc, + ) -> Option<&[Arc]> { + self.iter() + .map(|cls| cls.as_slice()) + .find(|cls| physical_exprs_contains(cls, expr)) } - pub fn remove(&mut self, col: &T) -> bool { - let removed = self.others.remove(col); - // If we are removing the head, adjust others so that its first entry becomes the new head. - if !removed && *col == self.head { - if let Some(col) = self.others.iter().next().cloned() { - let removed = self.others.remove(&col); - self.head = col; - removed - } else { - // We don't allow empty equivalence classes, reject removal if one tries removing - // the only element in an equivalence class. - false + /// Combine equivalence groups of the given join children. + pub fn join( + &self, + right_equivalences: &Self, + join_type: &JoinType, + left_size: usize, + on: &[(Column, Column)], + ) -> Self { + match join_type { + JoinType::Inner | JoinType::Left | JoinType::Full | JoinType::Right => { + let mut result = Self::new( + self.iter() + .cloned() + .chain(right_equivalences.iter().map(|item| { + item.iter() + .cloned() + .map(|expr| add_offset_to_expr(expr, left_size)) + .collect() + })) + .collect(), + ); + // In we have an inner join, expressions in the "on" condition + // are equal in the resulting table. + if join_type == &JoinType::Inner { + for (lhs, rhs) in on.iter() { + let index = rhs.index() + left_size; + let new_lhs = Arc::new(lhs.clone()) as _; + let new_rhs = Arc::new(Column::new(rhs.name(), index)) as _; + result.add_equal_conditions(&new_lhs, &new_rhs); + } + } + result } - } else { - removed + JoinType::LeftSemi | JoinType::LeftAnti => self.clone(), + JoinType::RightSemi | JoinType::RightAnti => right_equivalences.clone(), } } +} - pub fn iter(&self) -> impl Iterator { - std::iter::once(&self.head).chain(self.others.iter()) - } - - pub fn len(&self) -> usize { - self.others.len() + 1 - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 +/// This function constructs a duplicate-free `LexOrderingReq` by filtering out +/// duplicate entries that have same physical expression inside. For example, +/// `vec![a Some(Asc), a Some(Desc)]` collapses to `vec![a Some(Asc)]`. +pub fn collapse_lex_req(input: LexRequirement) -> LexRequirement { + let mut output = Vec::::new(); + for item in input { + if !output.iter().any(|req| req.expr.eq(&item.expr)) { + output.push(item); + } } + output } -/// `LexOrdering` stores the lexicographical ordering for a schema. -/// OrderingEquivalentClass keeps track of different alternative orderings than can -/// describe the schema. -/// For instance, for the table below +/// An `OrderingEquivalenceClass` object keeps track of different alternative +/// orderings than can describe a schema. For example, consider the following table: +/// +/// ```text /// |a|b|c|d| /// |1|4|3|1| /// |2|3|3|2| /// |3|1|2|2| /// |3|2|1|3| -/// both `vec![a ASC, b ASC]` and `vec![c DESC, d ASC]` describe the ordering of the table. -/// For this case, we say that `vec![a ASC, b ASC]`, and `vec![c DESC, d ASC]` are ordering equivalent. -pub type OrderingEquivalentClass = EquivalentClass; - -/// Update each expression in `ordering` with alias expressions. Assume -/// `ordering` is `a ASC, b ASC` and `c` is alias of `b`. Then, the result -/// will be `a ASC, c ASC`. -fn update_with_alias( - mut ordering: LexOrdering, - oeq_alias_map: &[(Column, Column)], -) -> LexOrdering { - for (source_col, target_col) in oeq_alias_map { - let source_col: Arc = Arc::new(source_col.clone()); - // Replace invalidated columns with its alias in the ordering expression. - let target_col: Arc = Arc::new(target_col.clone()); - for item in ordering.iter_mut() { - if item.expr.eq(&source_col) { - // Change the corresponding entry with alias expression - item.expr = target_col.clone(); - } - } - } - ordering +/// ``` +/// +/// Here, both `vec![a ASC, b ASC]` and `vec![c DESC, d ASC]` describe the table +/// ordering. In this case, we say that these orderings are equivalent. +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct OrderingEquivalenceClass { + orderings: Vec, } -impl OrderingEquivalentClass { - /// This function updates ordering equivalences with alias information. - /// For instance, assume columns `a` and `b` are aliases (a as b), and - /// orderings `a ASC` and `c DESC` are equivalent. Here, we replace column - /// `a` with `b` in ordering equivalence expressions. After this function, - /// `a ASC`, `c DESC` will be converted to the `b ASC`, `c DESC`. - fn update_with_aliases( - &mut self, - oeq_alias_map: &[(Column, Column)], - fields: &Fields, - ) { - let is_head_invalid = self.head.iter().any(|sort_expr| { - collect_columns(&sort_expr.expr) - .iter() - .any(|col| is_column_invalid_in_new_schema(col, fields)) - }); - // If head is invalidated, update head with alias expressions - if is_head_invalid { - self.head = update_with_alias(self.head.clone(), oeq_alias_map); - } else { - let new_oeq_expr = update_with_alias(self.head.clone(), oeq_alias_map); - self.insert(new_oeq_expr); - } - for ordering in self.others.clone().into_iter() { - self.insert(update_with_alias(ordering, oeq_alias_map)); - } +impl OrderingEquivalenceClass { + /// Creates new empty ordering equivalence class. + fn empty() -> Self { + Self { orderings: vec![] } } - /// Adds `offset` value to the index of each expression inside `self.head` and `self.others`. - pub fn add_offset(&self, offset: usize) -> OrderingEquivalentClass { - let head = add_offset_to_lex_ordering(self.head(), offset); - let others = self - .others() - .iter() - .map(|ordering| add_offset_to_lex_ordering(ordering, offset)) - .collect::>(); - OrderingEquivalentClass::new(head, others) + /// Clears (empties) this ordering equivalence class. + pub fn clear(&mut self) { + self.orderings.clear(); } - /// This function normalizes `OrderingEquivalenceProperties` according to `eq_properties`. - /// More explicitly, it makes sure that expressions in `oeq_class` are head entries - /// in `eq_properties`, replacing any non-head entries with head entries if necessary. - pub fn normalize_with_equivalence_properties( - &self, - eq_properties: &EquivalenceProperties, - ) -> OrderingEquivalentClass { - let head = eq_properties.normalize_sort_exprs(self.head()); - - let others = self - .others() - .iter() - .map(|other| eq_properties.normalize_sort_exprs(other)) - .collect(); - - EquivalentClass::new(head, others) + /// Creates new ordering equivalence class from the given orderings. + pub fn new(orderings: Vec) -> Self { + let mut result = Self { orderings }; + result.remove_redundant_entries(); + result } - /// Prefix with existing ordering. - pub fn prefix_ordering_equivalent_class_with_existing_ordering( - &self, - existing_ordering: &[PhysicalSortExpr], - eq_properties: &EquivalenceProperties, - ) -> OrderingEquivalentClass { - let existing_ordering = eq_properties.normalize_sort_exprs(existing_ordering); - let normalized_head = eq_properties.normalize_sort_exprs(self.head()); - let updated_head = merge_vectors(&existing_ordering, &normalized_head); - let updated_others = self - .others() - .iter() - .map(|ordering| { - let normalized_ordering = eq_properties.normalize_sort_exprs(ordering); - merge_vectors(&existing_ordering, &normalized_ordering) - }) - .collect(); - OrderingEquivalentClass::new(updated_head, updated_others) + /// Checks whether `ordering` is a member of this equivalence class. + pub fn contains(&self, ordering: &LexOrdering) -> bool { + self.orderings.contains(ordering) } -} -/// This is a builder object facilitating incremental construction -/// for ordering equivalences. -pub struct OrderingEquivalenceBuilder { - eq_properties: EquivalenceProperties, - ordering_eq_properties: OrderingEquivalenceProperties, - existing_ordering: Vec, - schema: SchemaRef, -} + /// Adds `ordering` to this equivalence class. + #[allow(dead_code)] + fn push(&mut self, ordering: LexOrdering) { + self.orderings.push(ordering); + // Make sure that there are no redundant orderings: + self.remove_redundant_entries(); + } -impl OrderingEquivalenceBuilder { - pub fn new(schema: SchemaRef) -> Self { - let eq_properties = EquivalenceProperties::new(schema.clone()); - let ordering_eq_properties = OrderingEquivalenceProperties::new(schema.clone()); - Self { - eq_properties, - ordering_eq_properties, - existing_ordering: vec![], - schema, - } + /// Checks whether this ordering equivalence class is empty. + pub fn is_empty(&self) -> bool { + self.len() == 0 } - pub fn extend( - mut self, - new_ordering_eq_properties: OrderingEquivalenceProperties, - ) -> Self { - self.ordering_eq_properties - .extend(new_ordering_eq_properties.oeq_class().cloned()); - self + /// Returns an iterator over the equivalent orderings in this class. + pub fn iter(&self) -> impl Iterator { + self.orderings.iter() } - pub fn with_existing_ordering( - mut self, - existing_ordering: Option>, - ) -> Self { - if let Some(existing_ordering) = existing_ordering { - self.existing_ordering = existing_ordering; - } - self + /// Returns how many equivalent orderings there are in this class. + pub fn len(&self) -> usize { + self.orderings.len() } - pub fn with_equivalences(mut self, new_eq_properties: EquivalenceProperties) -> Self { - self.eq_properties = new_eq_properties; - self + /// Extend this ordering equivalence class with the `other` class. + pub fn extend(&mut self, other: Self) { + self.orderings.extend(other.orderings); + // Make sure that there are no redundant orderings: + self.remove_redundant_entries(); } - pub fn add_equal_conditions( + /// Adds new orderings into this ordering equivalence class. + pub fn add_new_orderings( &mut self, - new_equivalent_ordering: Vec, + orderings: impl IntoIterator, ) { - let mut normalized_out_ordering = vec![]; - for item in &self.existing_ordering { - // To account for ordering equivalences, first normalize the expression: - let normalized = self.eq_properties.normalize_expr(item.expr.clone()); - normalized_out_ordering.push(PhysicalSortExpr { - expr: normalized, - options: item.options, - }); - } - // If there is an existing ordering, add new ordering as an equivalence: - if !normalized_out_ordering.is_empty() { - self.ordering_eq_properties.add_equal_conditions(( - &normalized_out_ordering, - &new_equivalent_ordering, - )); - } - } - - /// Return a reference to the schema with which this builder was constructed with - pub fn schema(&self) -> &SchemaRef { - &self.schema - } - - /// Return a reference to the existing ordering - pub fn existing_ordering(&self) -> &LexOrdering { - &self.existing_ordering - } - - pub fn build(self) -> OrderingEquivalenceProperties { - self.ordering_eq_properties - } -} - -/// Checks whether column is still valid after projection. -fn is_column_invalid_in_new_schema(column: &Column, fields: &Fields) -> bool { - let idx = column.index(); - idx >= fields.len() || fields[idx].name() != column.name() -} - -/// Gets first aliased version of `col` found in `alias_map`. -fn get_alias_column( - col: &Column, - alias_map: &HashMap>, -) -> Option { - alias_map - .iter() - .find_map(|(column, columns)| column.eq(col).then(|| columns[0].clone())) -} - -/// This function applies the given projection to the given equivalence -/// properties to compute the resulting (projected) equivalence properties; e.g. -/// 1) Adding an alias, which can introduce additional equivalence properties, -/// as in Projection(a, a as a1, a as a2). -/// 2) Truncate the [`EquivalentClass`]es that are not in the output schema. -pub fn project_equivalence_properties( - input_eq: EquivalenceProperties, - alias_map: &HashMap>, - output_eq: &mut EquivalenceProperties, -) { - // Get schema and fields of projection output - let schema = output_eq.schema(); - let fields = schema.fields(); - - let mut eq_classes = input_eq.classes().to_vec(); - for (column, columns) in alias_map { - let mut find_match = false; - for class in eq_classes.iter_mut() { - // If `self.head` is invalidated in the new schema, update head - // with this change `self.head` is not randomly assigned by one of the entries from `self.others` - if is_column_invalid_in_new_schema(&class.head, fields) { - if let Some(alias_col) = get_alias_column(&class.head, alias_map) { - class.head = alias_col; + self.orderings.extend(orderings); + // Make sure that there are no redundant orderings: + self.remove_redundant_entries(); + } + + /// Removes redundant orderings from this equivalence class. + /// For instance, If we already have the ordering [a ASC, b ASC, c DESC], + /// then there is no need to keep ordering [a ASC, b ASC] in the state. + fn remove_redundant_entries(&mut self) { + let mut idx = 0; + while idx < self.orderings.len() { + let mut removal = false; + for (ordering_idx, ordering) in self.orderings[0..idx].iter().enumerate() { + if let Some(right_finer) = finer_side(ordering, &self.orderings[idx]) { + if right_finer { + self.orderings.swap(ordering_idx, idx); + } + removal = true; + break; } } - if class.contains(column) { - for col in columns { - class.insert(col.clone()); - } - find_match = true; - break; + if removal { + self.orderings.swap_remove(idx); + } else { + idx += 1; } } - if !find_match { - eq_classes.push(EquivalentClass::new(column.clone(), columns.clone())); - } } - // Prune columns that are no longer in the schema from equivalences. - for class in eq_classes.iter_mut() { - let columns_to_remove = class - .iter() - .filter(|column| is_column_invalid_in_new_schema(column, fields)) - .cloned() - .collect::>(); - for column in columns_to_remove { - class.remove(&column); - } + /// Gets the first ordering entry in this ordering equivalence class. + /// This is one of the many valid orderings (if there are multiple). + pub fn output_ordering(&self) -> Option { + self.orderings.first().cloned() } - eq_classes.retain(|props| { - props.len() > 1 - && - // A column should not give an equivalence with itself. - !(props.len() == 2 && props.head.eq(props.others().iter().next().unwrap())) - }); - - output_eq.extend(eq_classes); -} - -/// This function applies the given projection to the given ordering -/// equivalence properties to compute the resulting (projected) ordering -/// equivalence properties; e.g. -/// 1) Adding an alias, which can introduce additional ordering equivalence -/// properties, as in Projection(a, a as a1, a as a2) extends global ordering -/// of a to a1 and a2. -/// 2) Truncate the [`OrderingEquivalentClass`]es that are not in the output schema. -pub fn project_ordering_equivalence_properties( - input_eq: OrderingEquivalenceProperties, - columns_map: &HashMap>, - output_eq: &mut OrderingEquivalenceProperties, -) { - // Get schema and fields of projection output - let schema = output_eq.schema(); - let fields = schema.fields(); - - let oeq_class = input_eq.oeq_class(); - let mut oeq_class = if let Some(oeq_class) = oeq_class { - oeq_class.clone() - } else { - return; - }; - let mut oeq_alias_map = vec![]; - for (column, columns) in columns_map { - if is_column_invalid_in_new_schema(column, fields) { - oeq_alias_map.push((column.clone(), columns[0].clone())); + // Append orderings in `other` to all existing orderings in this equivalence + // class. + pub fn join_suffix(mut self, other: &Self) -> Self { + for ordering in other.iter() { + for idx in 0..self.orderings.len() { + self.orderings[idx].extend(ordering.iter().cloned()); + } } + self } - oeq_class.update_with_aliases(&oeq_alias_map, fields); - - // Prune columns that no longer is in the schema from from the OrderingEquivalenceProperties. - let sort_exprs_to_remove = oeq_class - .iter() - .filter(|sort_exprs| { - sort_exprs.iter().any(|sort_expr| { - let cols_in_expr = collect_columns(&sort_expr.expr); - // If any one of the columns, used in Expression is invalid, remove expression - // from ordering equivalences - cols_in_expr - .iter() - .any(|col| is_column_invalid_in_new_schema(col, fields)) - }) - }) - .cloned() - .collect::>(); - for sort_exprs in sort_exprs_to_remove { - oeq_class.remove(&sort_exprs); - } - if oeq_class.len() > 1 { - output_eq.extend(Some(oeq_class)); + + /// Adds `offset` value to the index of each expression inside this + /// ordering equivalence class. + pub fn add_offset(&mut self, offset: usize) { + for ordering in self.orderings.iter_mut() { + for sort_expr in ordering { + sort_expr.expr = add_offset_to_expr(sort_expr.expr.clone(), offset); + } + } } -} -/// Update `ordering` if it contains cast expression with target column -/// after projection, if there is no cast expression among `ordering` expressions, -/// returns `None`. -fn update_with_cast_exprs( - cast_exprs: &[(CastExpr, Column)], - mut ordering: LexOrdering, -) -> Option { - let mut is_changed = false; - for sort_expr in ordering.iter_mut() { - for (cast_expr, target_col) in cast_exprs.iter() { - if sort_expr.expr.eq(cast_expr.expr()) { - sort_expr.expr = Arc::new(target_col.clone()) as _; - is_changed = true; + /// Gets sort options associated with this expression if it is a leading + /// ordering expression. Otherwise, returns `None`. + fn get_options(&self, expr: &Arc) -> Option { + for ordering in self.iter() { + let leading_ordering = &ordering[0]; + if leading_ordering.expr.eq(expr) { + return Some(leading_ordering.options); } } + None } - is_changed.then_some(ordering) } -/// Update cast expressions inside ordering equivalence -/// properties with its target column after projection -pub fn update_ordering_equivalence_with_cast( - cast_exprs: &[(CastExpr, Column)], - input_oeq: &mut OrderingEquivalenceProperties, -) { - if let Some(cls) = &mut input_oeq.oeq_class { - for ordering in - std::iter::once(cls.head().clone()).chain(cls.others().clone().into_iter()) - { - if let Some(updated_ordering) = update_with_cast_exprs(cast_exprs, ordering) { - cls.insert(updated_ordering); - } - } - } +/// Adds the `offset` value to `Column` indices inside `expr`. This function is +/// generally used during the update of the right table schema in join operations. +pub fn add_offset_to_expr( + expr: Arc, + offset: usize, +) -> Arc { + expr.transform_down(&|e| match e.as_any().downcast_ref::() { + Some(col) => Ok(Transformed::Yes(Arc::new(Column::new( + col.name(), + offset + col.index(), + )))), + None => Ok(Transformed::No(e)), + }) + .unwrap() + // Note that we can safely unwrap here since our transform always returns + // an `Ok` value. +} + +/// Returns `true` if the ordering `rhs` is strictly finer than the ordering `rhs`, +/// `false` if the ordering `lhs` is at least as fine as the ordering `lhs`, and +/// `None` otherwise (i.e. when given orderings are incomparable). +fn finer_side(lhs: LexOrderingRef, rhs: LexOrderingRef) -> Option { + let all_equal = lhs.iter().zip(rhs.iter()).all(|(lhs, rhs)| lhs.eq(rhs)); + all_equal.then_some(lhs.len() < rhs.len()) } -/// Retrieves the ordering equivalence properties for a given schema and output ordering. -pub fn ordering_equivalence_properties_helper( +/// A `EquivalenceProperties` object stores useful information related to a schema. +/// Currently, it keeps track of: +/// - Equivalent expressions, e.g expressions that have same value. +/// - Valid sort expressions (orderings) for the schema. +/// - Constants expressions (e.g expressions that are known to have constant values). +/// +/// Consider table below: +/// +/// ```text +/// ┌-------┐ +/// | a | b | +/// |---|---| +/// | 1 | 9 | +/// | 2 | 8 | +/// | 3 | 7 | +/// | 5 | 5 | +/// └---┴---┘ +/// ``` +/// +/// where both `a ASC` and `b DESC` can describe the table ordering. With +/// `EquivalenceProperties`, we can keep track of these different valid sort +/// expressions and treat `a ASC` and `b DESC` on an equal footing. +/// +/// Similarly, consider the table below: +/// +/// ```text +/// ┌-------┐ +/// | a | b | +/// |---|---| +/// | 1 | 1 | +/// | 2 | 2 | +/// | 3 | 3 | +/// | 5 | 5 | +/// └---┴---┘ +/// ``` +/// +/// where columns `a` and `b` always have the same value. We keep track of such +/// equivalences inside this object. With this information, we can optimize +/// things like partitioning. For example, if the partition requirement is +/// `Hash(a)` and output partitioning is `Hash(b)`, then we can deduce that +/// the existing partitioning satisfies the requirement. +#[derive(Debug, Clone)] +pub struct EquivalenceProperties { + /// Collection of equivalence classes that store expressions with the same + /// value. + eq_group: EquivalenceGroup, + /// Equivalent sort expressions for this table. + oeq_class: OrderingEquivalenceClass, + /// Expressions whose values are constant throughout the table. + /// TODO: We do not need to track constants separately, they can be tracked + /// inside `eq_groups` as `Literal` expressions. + constants: Vec>, + /// Schema associated with this object. schema: SchemaRef, - eq_orderings: &[LexOrdering], -) -> OrderingEquivalenceProperties { - let mut oep = OrderingEquivalenceProperties::new(schema); - let first_ordering = if let Some(first) = eq_orderings.first() { - first - } else { - // Return an empty OrderingEquivalenceProperties: - return oep; - }; - // First entry among eq_orderings is the head, skip it: - for ordering in eq_orderings.iter().skip(1) { - if !ordering.is_empty() { - oep.add_equal_conditions((first_ordering, ordering)) - } - } - oep } -/// This function constructs a duplicate-free vector by filtering out duplicate -/// entries inside the given vector `input`. -fn collapse_vec(input: Vec) -> Vec { - let mut output = vec![]; - for item in input { - if !output.contains(&item) { - output.push(item); +impl EquivalenceProperties { + /// Creates an empty `EquivalenceProperties` object. + pub fn new(schema: SchemaRef) -> Self { + Self { + eq_group: EquivalenceGroup::empty(), + oeq_class: OrderingEquivalenceClass::empty(), + constants: vec![], + schema, } } - output -} -/// This function constructs a duplicate-free `LexOrderingReq` by filtering out duplicate -/// entries that have same physical expression inside the given vector `input`. -/// `vec![a Some(Asc), a Some(Desc)]` is collapsed to the `vec![a Some(Asc)]`. Since -/// when same expression is already seen before, following expressions are redundant. -fn collapse_lex_req(input: LexOrderingReq) -> LexOrderingReq { - let mut output = vec![]; - for item in input { - if !lex_req_contains(&output, &item) { - output.push(item); + /// Creates a new `EquivalenceProperties` object with the given orderings. + pub fn new_with_orderings(schema: SchemaRef, orderings: &[LexOrdering]) -> Self { + Self { + eq_group: EquivalenceGroup::empty(), + oeq_class: OrderingEquivalenceClass::new(orderings.to_vec()), + constants: vec![], + schema, } } - output -} -/// Check whether `sort_req.expr` is among the expressions of `lex_req`. -fn lex_req_contains( - lex_req: &[PhysicalSortRequirement], - sort_req: &PhysicalSortRequirement, -) -> bool { - for constant in lex_req { - if constant.expr.eq(&sort_req.expr) { - return true; - } + /// Returns the associated schema. + pub fn schema(&self) -> &SchemaRef { + &self.schema } - false -} -/// This function simplifies lexicographical ordering requirement -/// inside `input` by removing postfix lexicographical requirements -/// that satisfy global ordering (occurs inside the ordering equivalent class) -fn simplify_lex_req( - input: LexOrderingReq, - oeq_class: &OrderingEquivalentClass, -) -> LexOrderingReq { - let mut section = &input[..]; - loop { - let n_prune = prune_last_n_that_is_in_oeq(section, oeq_class); - // Cannot prune entries from the end of requirement - if n_prune == 0 { - break; - } - section = §ion[0..section.len() - n_prune]; + /// Returns a reference to the ordering equivalence class within. + pub fn oeq_class(&self) -> &OrderingEquivalenceClass { + &self.oeq_class } - if section.is_empty() { - PhysicalSortRequirement::from_sort_exprs(oeq_class.head()) - } else { - section.to_vec() + + /// Returns a reference to the equivalence group within. + pub fn eq_group(&self) -> &EquivalenceGroup { + &self.eq_group } -} -/// Determines how many entries from the end can be deleted. -/// Last n entry satisfies global ordering, hence having them -/// as postfix in the lexicographical requirement is unnecessary. -/// Assume requirement is [a ASC, b ASC, c ASC], also assume that -/// existing ordering is [c ASC, d ASC]. In this case, since [c ASC] -/// is satisfied by the existing ordering (e.g corresponding section is global ordering), -/// [c ASC] can be pruned from the requirement: [a ASC, b ASC, c ASC]. In this case, -/// this function will return 1, to indicate last element can be removed from the requirement -fn prune_last_n_that_is_in_oeq( - input: &[PhysicalSortRequirement], - oeq_class: &OrderingEquivalentClass, -) -> usize { - let input_len = input.len(); - for ordering in std::iter::once(oeq_class.head()).chain(oeq_class.others().iter()) { - let mut search_range = std::cmp::min(ordering.len(), input_len); - while search_range > 0 { - let req_section = &input[input_len - search_range..]; - // let given_section = &ordering[0..search_range]; - if req_satisfied(ordering, req_section) { - return search_range; - } else { - search_range -= 1; - } - } + /// Returns the normalized version of the ordering equivalence class within. + /// Normalization removes constants and duplicates as well as standardizing + /// expressions according to the equivalence group within. + pub fn normalized_oeq_class(&self) -> OrderingEquivalenceClass { + OrderingEquivalenceClass::new( + self.oeq_class + .iter() + .map(|ordering| self.normalize_sort_exprs(ordering)) + .collect(), + ) + } + + /// Extends this `EquivalenceProperties` with the `other` object. + pub fn extend(mut self, other: Self) -> Self { + self.eq_group.extend(other.eq_group); + self.oeq_class.extend(other.oeq_class); + self.add_constants(other.constants) + } + + /// Clears (empties) the ordering equivalence class within this object. + /// Call this method when existing orderings are invalidated. + pub fn clear_orderings(&mut self) { + self.oeq_class.clear(); + } + + /// Extends this `EquivalenceProperties` by adding the orderings inside the + /// ordering equivalence class `other`. + pub fn add_ordering_equivalence_class(&mut self, other: OrderingEquivalenceClass) { + self.oeq_class.extend(other); + } + + /// Adds new orderings into the existing ordering equivalence class. + pub fn add_new_orderings( + &mut self, + orderings: impl IntoIterator, + ) { + self.oeq_class.add_new_orderings(orderings); + } + + /// Incorporates the given equivalence group to into the existing + /// equivalence group within. + pub fn add_equivalence_group(&mut self, other_eq_group: EquivalenceGroup) { + self.eq_group.extend(other_eq_group); + } + + /// Adds a new equality condition into the existing equivalence group. + /// If the given equality defines a new equivalence class, adds this new + /// equivalence class to the equivalence group. + pub fn add_equal_conditions( + &mut self, + left: &Arc, + right: &Arc, + ) { + self.eq_group.add_equal_conditions(left, right); } - 0 -} -/// Checks whether given section satisfies req. -fn req_satisfied(given: LexOrderingRef, req: &[PhysicalSortRequirement]) -> bool { - for (given, req) in izip!(given.iter(), req.iter()) { - let PhysicalSortRequirement { expr, options } = req; - if let Some(options) = options { - if options != &given.options || !expr.eq(&given.expr) { - return false; + /// Track/register physical expressions with constant values. + pub fn add_constants( + mut self, + constants: impl IntoIterator>, + ) -> Self { + for expr in self.eq_group.normalize_exprs(constants) { + if !physical_exprs_contains(&self.constants, &expr) { + self.constants.push(expr); } - } else if !expr.eq(&given.expr) { - return false; } + self } - true -} -/// Combine equivalence properties of the given join inputs. -pub fn combine_join_equivalence_properties( - join_type: JoinType, - left_properties: EquivalenceProperties, - right_properties: EquivalenceProperties, - left_columns_len: usize, - on: &[(Column, Column)], - schema: SchemaRef, -) -> EquivalenceProperties { - let mut new_properties = EquivalenceProperties::new(schema); - match join_type { - JoinType::Inner | JoinType::Left | JoinType::Full | JoinType::Right => { - new_properties.extend(left_properties.classes().to_vec()); - let new_right_properties = right_properties - .classes() + /// Updates the ordering equivalence group within assuming that the table + /// is re-sorted according to the argument `sort_exprs`. Note that constants + /// and equivalence classes are unchanged as they are unaffected by a re-sort. + pub fn with_reorder(mut self, sort_exprs: Vec) -> Self { + // TODO: In some cases, existing ordering equivalences may still be valid add this analysis. + self.oeq_class = OrderingEquivalenceClass::new(vec![sort_exprs]); + self + } + + /// Normalizes the given sort expressions (i.e. `sort_exprs`) using the + /// equivalence group and the ordering equivalence class within. + /// + /// Assume that `self.eq_group` states column `a` and `b` are aliases. + /// Also assume that `self.oeq_class` states orderings `d ASC` and `a ASC, c ASC` + /// are equivalent (in the sense that both describe the ordering of the table). + /// If the `sort_exprs` argument were `vec![b ASC, c ASC, a ASC]`, then this + /// function would return `vec![a ASC, c ASC]`. Internally, it would first + /// normalize to `vec![a ASC, c ASC, a ASC]` and end up with the final result + /// after deduplication. + fn normalize_sort_exprs(&self, sort_exprs: LexOrderingRef) -> LexOrdering { + // Convert sort expressions to sort requirements: + let sort_reqs = PhysicalSortRequirement::from_sort_exprs(sort_exprs.iter()); + // Normalize the requirements: + let normalized_sort_reqs = self.normalize_sort_requirements(&sort_reqs); + // Convert sort requirements back to sort expressions: + PhysicalSortRequirement::to_sort_exprs(normalized_sort_reqs) + } + + /// Normalizes the given sort requirements (i.e. `sort_reqs`) using the + /// equivalence group and the ordering equivalence class within. It works by: + /// - Removing expressions that have a constant value from the given requirement. + /// - Replacing sections that belong to some equivalence class in the equivalence + /// group with the first entry in the matching equivalence class. + /// + /// Assume that `self.eq_group` states column `a` and `b` are aliases. + /// Also assume that `self.oeq_class` states orderings `d ASC` and `a ASC, c ASC` + /// are equivalent (in the sense that both describe the ordering of the table). + /// If the `sort_reqs` argument were `vec![b ASC, c ASC, a ASC]`, then this + /// function would return `vec![a ASC, c ASC]`. Internally, it would first + /// normalize to `vec![a ASC, c ASC, a ASC]` and end up with the final result + /// after deduplication. + fn normalize_sort_requirements( + &self, + sort_reqs: LexRequirementRef, + ) -> LexRequirement { + let normalized_sort_reqs = self.eq_group.normalize_sort_requirements(sort_reqs); + let constants_normalized = self.eq_group.normalize_exprs(self.constants.clone()); + // Prune redundant sections in the requirement: + collapse_lex_req( + normalized_sort_reqs .iter() - .map(|prop| { - let new_head = Column::new( - prop.head().name(), - left_columns_len + prop.head().index(), - ); - let new_others = prop - .others() + .filter(|&order| { + !physical_exprs_contains(&constants_normalized, &order.expr) + }) + .cloned() + .collect(), + ) + } + + /// Checks whether the given ordering is satisfied by any of the existing + /// orderings. + pub fn ordering_satisfy(&self, given: LexOrderingRef) -> bool { + // Convert the given sort expressions to sort requirements: + let sort_requirements = PhysicalSortRequirement::from_sort_exprs(given.iter()); + self.ordering_satisfy_requirement(&sort_requirements) + } + + /// Checks whether the given sort requirements are satisfied by any of the + /// existing orderings. + pub fn ordering_satisfy_requirement(&self, reqs: LexRequirementRef) -> bool { + // First, standardize the given requirement: + let normalized_reqs = self.normalize_sort_requirements(reqs); + if normalized_reqs.is_empty() { + // Requirements are tautologically satisfied if empty. + return true; + } + let mut indices = HashSet::new(); + for ordering in self.normalized_oeq_class().iter() { + let match_indices = ordering + .iter() + .map(|sort_expr| { + normalized_reqs .iter() - .map(|col| { - Column::new(col.name(), left_columns_len + col.index()) - }) - .collect::>(); - EquivalentClass::new(new_head, new_others) + .position(|sort_req| sort_expr.satisfy(sort_req, &self.schema)) }) .collect::>(); - - new_properties.extend(new_right_properties); - } - JoinType::LeftSemi | JoinType::LeftAnti => { - new_properties.extend(left_properties.classes().to_vec()) - } - JoinType::RightSemi | JoinType::RightAnti => { - new_properties.extend(right_properties.classes().to_vec()) + // Find the largest contiguous increasing sequence starting from the first index: + if let Some(&Some(first)) = match_indices.first() { + indices.insert(first); + let mut iter = match_indices.windows(2); + while let Some([Some(current), Some(next)]) = iter.next() { + if next > current { + indices.insert(*next); + } else { + break; + } + } + } } + indices.len() == normalized_reqs.len() } - if join_type == JoinType::Inner { - on.iter().for_each(|(column1, column2)| { - let new_column2 = - Column::new(column2.name(), left_columns_len + column2.index()); - new_properties.add_equal_conditions((column1, &new_column2)) - }) + /// Checks whether the `given`` sort requirements are equal or more specific + /// than the `reference` sort requirements. + pub fn requirements_compatible( + &self, + given: LexRequirementRef, + reference: LexRequirementRef, + ) -> bool { + let normalized_given = self.normalize_sort_requirements(given); + let normalized_reference = self.normalize_sort_requirements(reference); + + (normalized_reference.len() <= normalized_given.len()) + && normalized_reference + .into_iter() + .zip(normalized_given) + .all(|(reference, given)| given.compatible(&reference)) + } + + /// Returns the finer ordering among the orderings `lhs` and `rhs`, breaking + /// any ties by choosing `lhs`. + /// + /// The finer ordering is the ordering that satisfies both of the orderings. + /// If the orderings are incomparable, returns `None`. + /// + /// For example, the finer ordering among `[a ASC]` and `[a ASC, b ASC]` is + /// the latter. + pub fn get_finer_ordering( + &self, + lhs: LexOrderingRef, + rhs: LexOrderingRef, + ) -> Option { + // Convert the given sort expressions to sort requirements: + let lhs = PhysicalSortRequirement::from_sort_exprs(lhs); + let rhs = PhysicalSortRequirement::from_sort_exprs(rhs); + let finer = self.get_finer_requirement(&lhs, &rhs); + // Convert the chosen sort requirements back to sort expressions: + finer.map(PhysicalSortRequirement::to_sort_exprs) + } + + /// Returns the finer ordering among the requirements `lhs` and `rhs`, + /// breaking any ties by choosing `lhs`. + /// + /// The finer requirements are the ones that satisfy both of the given + /// requirements. If the requirements are incomparable, returns `None`. + /// + /// For example, the finer requirements among `[a ASC]` and `[a ASC, b ASC]` + /// is the latter. + pub fn get_finer_requirement( + &self, + req1: LexRequirementRef, + req2: LexRequirementRef, + ) -> Option { + let mut lhs = self.normalize_sort_requirements(req1); + let mut rhs = self.normalize_sort_requirements(req2); + lhs.iter_mut() + .zip(rhs.iter_mut()) + .all(|(lhs, rhs)| { + lhs.expr.eq(&rhs.expr) + && match (lhs.options, rhs.options) { + (Some(lhs_opt), Some(rhs_opt)) => lhs_opt == rhs_opt, + (Some(options), None) => { + rhs.options = Some(options); + true + } + (None, Some(options)) => { + lhs.options = Some(options); + true + } + (None, None) => true, + } + }) + .then_some(if lhs.len() >= rhs.len() { lhs } else { rhs }) + } + + /// Calculates the "meet" of the given orderings (`lhs` and `rhs`). + /// The meet of a set of orderings is the finest ordering that is satisfied + /// by all the orderings in that set. For details, see: + /// + /// + /// + /// If there is no ordering that satisfies both `lhs` and `rhs`, returns + /// `None`. As an example, the meet of orderings `[a ASC]` and `[a ASC, b ASC]` + /// is `[a ASC]`. + pub fn get_meet_ordering( + &self, + lhs: LexOrderingRef, + rhs: LexOrderingRef, + ) -> Option { + let lhs = self.normalize_sort_exprs(lhs); + let rhs = self.normalize_sort_exprs(rhs); + let mut meet = vec![]; + for (lhs, rhs) in lhs.into_iter().zip(rhs.into_iter()) { + if lhs.eq(&rhs) { + meet.push(lhs); + } else { + break; + } + } + (!meet.is_empty()).then_some(meet) + } + + /// Projects argument `expr` according to `projection_mapping`, taking + /// equivalences into account. + /// + /// For example, assume that columns `a` and `c` are always equal, and that + /// `projection_mapping` encodes following mapping: + /// + /// ```text + /// a -> a1 + /// b -> b1 + /// ``` + /// + /// Then, this function projects `a + b` to `Some(a1 + b1)`, `c + b` to + /// `Some(a1 + b1)` and `d` to `None`, meaning that it cannot be projected. + pub fn project_expr( + &self, + expr: &Arc, + projection_mapping: &ProjectionMapping, + ) -> Option> { + self.eq_group.project_expr(projection_mapping, expr) } - new_properties -} -/// Calculate equivalence properties for the given cross join operation. -pub fn cross_join_equivalence_properties( - left_properties: EquivalenceProperties, - right_properties: EquivalenceProperties, - left_columns_len: usize, - schema: SchemaRef, -) -> EquivalenceProperties { - let mut new_properties = EquivalenceProperties::new(schema); - new_properties.extend(left_properties.classes().to_vec()); - let new_right_properties = right_properties - .classes() - .iter() - .map(|prop| { - let new_head = - Column::new(prop.head().name(), left_columns_len + prop.head().index()); - let new_others = prop - .others() - .iter() - .map(|col| Column::new(col.name(), left_columns_len + col.index())) - .collect::>(); - EquivalentClass::new(new_head, new_others) - }) - .collect::>(); - new_properties.extend(new_right_properties); - new_properties -} + /// Projects the equivalences within according to `projection_mapping` + /// and `output_schema`. + pub fn project( + &self, + projection_mapping: &ProjectionMapping, + output_schema: SchemaRef, + ) -> Self { + let mut projected_orderings = self + .oeq_class + .iter() + .filter_map(|order| self.eq_group.project_ordering(projection_mapping, order)) + .collect::>(); + for (source, target) in projection_mapping.iter() { + let expr_ordering = ExprOrdering::new(source.clone()) + .transform_up(&|expr| update_ordering(expr, self)) + .unwrap(); + if let SortProperties::Ordered(options) = expr_ordering.state { + // Push new ordering to the state. + projected_orderings.push(vec![PhysicalSortExpr { + expr: target.clone(), + options, + }]); + } + } + Self { + eq_group: self.eq_group.project(projection_mapping), + oeq_class: OrderingEquivalenceClass::new(projected_orderings), + constants: vec![], + schema: output_schema, + } + } -/// Update right table ordering equivalences so that: -/// - They point to valid indices at the output of the join schema, and -/// - They are normalized with respect to equivalence columns. -/// -/// To do so, we increment column indices by the size of the left table when -/// join schema consists of a combination of left and right schema (Inner, -/// Left, Full, Right joins). Then, we normalize the sort expressions of -/// ordering equivalences one by one. We make sure that each expression in the -/// ordering equivalence is either: -/// - The head of the one of the equivalent classes, or -/// - Doesn't have an equivalent column. -/// -/// This way; once we normalize an expression according to equivalence properties, -/// it can thereafter safely be used for ordering equivalence normalization. -fn get_updated_right_ordering_equivalent_class( - join_type: &JoinType, - right_oeq_class: &OrderingEquivalentClass, - left_columns_len: usize, - join_eq_properties: &EquivalenceProperties, -) -> OrderingEquivalentClass { - match join_type { - // In these modes, indices of the right schema should be offset by - // the left table size. - JoinType::Inner | JoinType::Left | JoinType::Full | JoinType::Right => { - let right_oeq_class = right_oeq_class.add_offset(left_columns_len); - return right_oeq_class - .normalize_with_equivalence_properties(join_eq_properties); + /// Returns the longest (potentially partial) permutation satisfying the + /// existing ordering. For example, if we have the equivalent orderings + /// `[a ASC, b ASC]` and `[c DESC]`, with `exprs` containing `[c, b, a, d]`, + /// then this function returns `([a ASC, b ASC, c DESC], [2, 1, 0])`. + /// This means that the specification `[a ASC, b ASC, c DESC]` is satisfied + /// by the existing ordering, and `[a, b, c]` resides at indices: `2, 1, 0` + /// inside the argument `exprs` (respectively). For the mathematical + /// definition of "partial permutation", see: + /// + /// + pub fn find_longest_permutation( + &self, + exprs: &[Arc], + ) -> (LexOrdering, Vec) { + let normalized_exprs = self.eq_group.normalize_exprs(exprs.to_vec()); + // Use a map to associate expression indices with sort options: + let mut ordered_exprs = IndexMap::::new(); + for ordering in self.normalized_oeq_class().iter() { + for sort_expr in ordering { + if let Some(idx) = normalized_exprs + .iter() + .position(|expr| sort_expr.expr.eq(expr)) + { + if let Entry::Vacant(e) = ordered_exprs.entry(idx) { + e.insert(sort_expr.options); + } + } else { + // We only consider expressions that correspond to a prefix + // of one of the equivalent orderings we have. + break; + } + } } - _ => {} - }; - right_oeq_class.normalize_with_equivalence_properties(join_eq_properties) + // Construct the lexicographical ordering according to the permutation: + ordered_exprs + .into_iter() + .map(|(idx, options)| { + ( + PhysicalSortExpr { + expr: exprs[idx].clone(), + options, + }, + idx, + ) + }) + .unzip() + } } /// Calculate ordering equivalence properties for the given join operation. -pub fn combine_join_ordering_equivalence_properties( +pub fn join_equivalence_properties( + left: EquivalenceProperties, + right: EquivalenceProperties, join_type: &JoinType, - left_oeq_properties: &OrderingEquivalenceProperties, - right_oeq_properties: &OrderingEquivalenceProperties, - schema: SchemaRef, + join_schema: SchemaRef, maintains_input_order: &[bool], probe_side: Option, - join_eq_properties: EquivalenceProperties, -) -> OrderingEquivalenceProperties { - let mut new_properties = OrderingEquivalenceProperties::new(schema); - let left_columns_len = left_oeq_properties.schema().fields().len(); - // All joins have 2 children - assert_eq!(maintains_input_order.len(), 2); - let left_maintains = maintains_input_order[0]; - let right_maintains = maintains_input_order[1]; - match (left_maintains, right_maintains) { - (true, true) => { - unreachable!("Cannot maintain ordering of both sides"); - } - (true, false) => { - // In this special case, right side ordering can be prefixed with left side ordering. - if let ( - Some(JoinSide::Left), - JoinType::Inner, - Some(left_oeq_class), - Some(right_oeq_class), - ) = ( - probe_side, - join_type, - left_oeq_properties.oeq_class(), - right_oeq_properties.oeq_class(), - ) { - let updated_right_oeq = get_updated_right_ordering_equivalent_class( + on: &[(Column, Column)], +) -> EquivalenceProperties { + let left_size = left.schema.fields.len(); + let mut result = EquivalenceProperties::new(join_schema); + result.add_equivalence_group(left.eq_group().join( + right.eq_group(), + join_type, + left_size, + on, + )); + + let left_oeq_class = left.oeq_class; + let mut right_oeq_class = right.oeq_class; + match maintains_input_order { + [true, false] => { + // In this special case, right side ordering can be prefixed with + // the left side ordering. + if let (Some(JoinSide::Left), JoinType::Inner) = (probe_side, join_type) { + updated_right_ordering_equivalence_class( + &mut right_oeq_class, join_type, - right_oeq_class, - left_columns_len, - &join_eq_properties, + left_size, ); - // Right side ordering equivalence properties should be prepended with - // those of the left side while constructing output ordering equivalence - // properties since stream side is the left side. + // Right side ordering equivalence properties should be prepended + // with those of the left side while constructing output ordering + // equivalence properties since stream side is the left side. // - // If the right table ordering equivalences contain `b ASC`, and the output - // ordering of the left table is `a ASC`, then the ordering equivalence `b ASC` - // for the right table should be converted to `a ASC, b ASC` before it is added - // to the ordering equivalences of the join. - let mut orderings = vec![]; - for left_ordering in left_oeq_class.iter() { - for right_ordering in updated_right_oeq.iter() { - let mut ordering = left_ordering.to_vec(); - ordering.extend(right_ordering.to_vec()); - let ordering_normalized = - join_eq_properties.normalize_sort_exprs(&ordering); - orderings.push(ordering_normalized); - } - } - if !orderings.is_empty() { - let head = orderings.swap_remove(0); - let new_oeq_class = OrderingEquivalentClass::new(head, orderings); - new_properties.extend(Some(new_oeq_class)); - } + // For example, if the right side ordering equivalences contain + // `b ASC`, and the left side ordering equivalences contain `a ASC`, + // then we should add `a ASC, b ASC` to the ordering equivalences + // of the join output. + let out_oeq_class = left_oeq_class.join_suffix(&right_oeq_class); + result.add_ordering_equivalence_class(out_oeq_class); } else { - new_properties.extend(left_oeq_properties.oeq_class().cloned()); + result.add_ordering_equivalence_class(left_oeq_class); } } - (false, true) => { - let updated_right_oeq = - right_oeq_properties.oeq_class().map(|right_oeq_class| { - get_updated_right_ordering_equivalent_class( - join_type, - right_oeq_class, - left_columns_len, - &join_eq_properties, - ) - }); - // In this special case, left side ordering can be prefixed with right side ordering. - if let ( - Some(JoinSide::Right), - JoinType::Inner, - Some(left_oeq_class), - Some(right_oeg_class), - ) = ( - probe_side, + [false, true] => { + updated_right_ordering_equivalence_class( + &mut right_oeq_class, join_type, - left_oeq_properties.oeq_class(), - &updated_right_oeq, - ) { - // Left side ordering equivalence properties should be prepended with - // those of the right side while constructing output ordering equivalence - // properties since stream side is the right side. + left_size, + ); + // In this special case, left side ordering can be prefixed with + // the right side ordering. + if let (Some(JoinSide::Right), JoinType::Inner) = (probe_side, join_type) { + // Left side ordering equivalence properties should be prepended + // with those of the right side while constructing output ordering + // equivalence properties since stream side is the right side. // - // If the right table ordering equivalences contain `b ASC`, and the output - // ordering of the left table is `a ASC`, then the ordering equivalence `b ASC` - // for the right table should be converted to `a ASC, b ASC` before it is added - // to the ordering equivalences of the join. - let mut orderings = vec![]; - for right_ordering in right_oeg_class.iter() { - for left_ordering in left_oeq_class.iter() { - let mut ordering = right_ordering.to_vec(); - ordering.extend(left_ordering.to_vec()); - let ordering_normalized = - join_eq_properties.normalize_sort_exprs(&ordering); - orderings.push(ordering_normalized); - } - } - if !orderings.is_empty() { - let head = orderings.swap_remove(0); - let new_oeq_class = OrderingEquivalentClass::new(head, orderings); - new_properties.extend(Some(new_oeq_class)); - } + // For example, if the left side ordering equivalences contain + // `a ASC`, and the right side ordering equivalences contain `b ASC`, + // then we should add `b ASC, a ASC` to the ordering equivalences + // of the join output. + let out_oeq_class = right_oeq_class.join_suffix(&left_oeq_class); + result.add_ordering_equivalence_class(out_oeq_class); } else { - new_properties.extend(updated_right_oeq); + result.add_ordering_equivalence_class(right_oeq_class); } } - (false, false) => {} + [false, false] => {} + [true, true] => unreachable!("Cannot maintain ordering of both sides"), + _ => unreachable!("Join operators can not have more than two children"), } - new_properties -} - -/// This function searches for the slice `section` inside the slice `given`. -/// It returns each range where `section` is compatible with the corresponding -/// slice in `given`. -fn get_compatible_ranges( - given: &[PhysicalSortRequirement], - section: &[PhysicalSortRequirement], -) -> Vec> { - let n_section = section.len(); - let n_end = if given.len() >= n_section { - given.len() - n_section + 1 - } else { - 0 - }; - (0..n_end) - .filter_map(|idx| { - let end = idx + n_section; - given[idx..end] - .iter() - .zip(section) - .all(|(req, given)| given.compatible(req)) - .then_some(Range { start: idx, end }) - }) - .collect() -} - -/// It is similar to contains method of vector. -/// Finds whether `expr` is among `physical_exprs`. -pub fn physical_exprs_contains( - physical_exprs: &[Arc], - expr: &Arc, -) -> bool { - physical_exprs - .iter() - .any(|physical_expr| physical_expr.eq(expr)) + result } -/// Remove ordering requirements that have constant value -fn prune_sort_reqs_with_constants( - ordering: &[PhysicalSortRequirement], - constants: &[Arc], -) -> Vec { - ordering - .iter() - .filter(|&order| !physical_exprs_contains(constants, &order.expr)) - .cloned() - .collect() -} - -/// Adds the `offset` value to `Column` indices inside `expr`. This function is -/// generally used during the update of the right table schema in join operations. -pub fn add_offset_to_expr( - expr: Arc, - offset: usize, -) -> Arc { - expr.transform_down(&|e| match e.as_any().downcast_ref::() { - Some(col) => Ok(Transformed::Yes(Arc::new(Column::new( - col.name(), - offset + col.index(), - )))), - None => Ok(Transformed::No(e)), - }) - .unwrap() - // Note that we can safely unwrap here since our transform always returns - // an `Ok` value. -} - -/// Adds the `offset` value to `Column` indices inside `sort_expr.expr`. -pub(crate) fn add_offset_to_sort_expr( - sort_expr: &PhysicalSortExpr, - offset: usize, -) -> PhysicalSortExpr { - PhysicalSortExpr { - expr: add_offset_to_expr(sort_expr.expr.clone(), offset), - options: sort_expr.options, +/// In the context of a join, update the right side `OrderingEquivalenceClass` +/// so that they point to valid indices in the join output schema. +/// +/// To do so, we increment column indices by the size of the left table when +/// join schema consists of a combination of the left and right schemas. This +/// is the case for `Inner`, `Left`, `Full` and `Right` joins. For other cases, +/// indices do not change. +fn updated_right_ordering_equivalence_class( + right_oeq_class: &mut OrderingEquivalenceClass, + join_type: &JoinType, + left_size: usize, +) { + if matches!( + join_type, + JoinType::Inner | JoinType::Left | JoinType::Full | JoinType::Right + ) { + right_oeq_class.add_offset(left_size); } } -/// Adds the `offset` value to `Column` indices for each `sort_expr.expr` -/// inside `sort_exprs`. -pub fn add_offset_to_lex_ordering( - sort_exprs: LexOrderingRef, - offset: usize, -) -> LexOrdering { - sort_exprs - .iter() - .map(|sort_expr| add_offset_to_sort_expr(sort_expr, offset)) - .collect() +/// Calculates the [`SortProperties`] of a given [`ExprOrdering`] node. +/// The node can either be a leaf node, or an intermediate node: +/// - If it is a leaf node, we directly find the order of the node by looking +/// at the given sort expression and equivalence properties if it is a `Column` +/// leaf, or we mark it as unordered. In the case of a `Literal` leaf, we mark +/// it as singleton so that it can cooperate with all ordered columns. +/// - If it is an intermediate node, the children states matter. Each `PhysicalExpr` +/// and operator has its own rules on how to propagate the children orderings. +/// However, before we engage in recursion, we check whether this intermediate +/// node directly matches with the sort expression. If there is a match, the +/// sort expression emerges at that node immediately, discarding the recursive +/// result coming from its children. +fn update_ordering( + mut node: ExprOrdering, + eq_properties: &EquivalenceProperties, +) -> Result> { + if !node.expr.children().is_empty() { + // We have an intermediate (non-leaf) node, account for its children: + node.state = node.expr.get_ordering(&node.children_states); + Ok(Transformed::Yes(node)) + } else if node.expr.as_any().is::() { + // We have a Column, which is one of the two possible leaf node types: + let eq_group = &eq_properties.eq_group; + let normalized_expr = eq_group.normalize_expr(node.expr.clone()); + let oeq_class = &eq_properties.oeq_class; + if let Some(options) = oeq_class.get_options(&normalized_expr) { + node.state = SortProperties::Ordered(options); + Ok(Transformed::Yes(node)) + } else { + Ok(Transformed::No(node)) + } + } else { + // We have a Literal, which is the other possible leaf node type: + node.state = node.expr.get_ordering(&[]); + Ok(Transformed::Yes(node)) + } } #[cfg(test)] mod tests { + use std::ops::Not; + use std::sync::Arc; + use super::*; - use crate::expressions::Column; + use crate::expressions::{col, lit, BinaryExpr, Column}; + use crate::physical_expr::{physical_exprs_bag_equal, physical_exprs_equal}; + + use arrow::compute::{lexsort_to_indices, SortColumn}; use arrow::datatypes::{DataType, Field, Schema}; + use arrow_array::{ArrayRef, RecordBatch, UInt32Array, UInt64Array}; + use arrow_schema::{Fields, SortOptions}; use datafusion_common::Result; + use datafusion_expr::Operator; + + use itertools::{izip, Itertools}; + use rand::rngs::StdRng; + use rand::seq::SliceRandom; + use rand::{Rng, SeedableRng}; + + // Generate a schema which consists of 8 columns (a, b, c, d, e, f, g, h) + fn create_test_schema() -> Result { + let a = Field::new("a", DataType::Int32, true); + let b = Field::new("b", DataType::Int32, true); + let c = Field::new("c", DataType::Int32, true); + let d = Field::new("d", DataType::Int32, true); + let e = Field::new("e", DataType::Int32, true); + let f = Field::new("f", DataType::Int32, true); + let g = Field::new("g", DataType::Int32, true); + let h = Field::new("h", DataType::Int32, true); + let schema = Arc::new(Schema::new(vec![a, b, c, d, e, f, g, h])); + + Ok(schema) + } + + /// Construct a schema with following properties + /// Schema satisfies following orderings: + /// [a ASC], [d ASC, b ASC], [e DESC, f ASC, g ASC] + /// and + /// Column [a=c] (e.g they are aliases). + fn create_test_params() -> Result<(SchemaRef, EquivalenceProperties)> { + let test_schema = create_test_schema()?; + let col_a = &col("a", &test_schema)?; + let col_b = &col("b", &test_schema)?; + let col_c = &col("c", &test_schema)?; + let col_d = &col("d", &test_schema)?; + let col_e = &col("e", &test_schema)?; + let col_f = &col("f", &test_schema)?; + let col_g = &col("g", &test_schema)?; + let mut eq_properties = EquivalenceProperties::new(test_schema.clone()); + eq_properties.add_equal_conditions(col_a, col_c); + + let option_asc = SortOptions { + descending: false, + nulls_first: false, + }; + let option_desc = SortOptions { + descending: true, + nulls_first: true, + }; + let orderings = vec![ + // [a ASC] + vec![(col_a, option_asc)], + // [d ASC, b ASC] + vec![(col_d, option_asc), (col_b, option_asc)], + // [e DESC, f ASC, g ASC] + vec![ + (col_e, option_desc), + (col_f, option_asc), + (col_g, option_asc), + ], + ]; + let orderings = convert_to_orderings(&orderings); + eq_properties.add_new_orderings(orderings); + Ok((test_schema, eq_properties)) + } + + // Generate a schema which consists of 6 columns (a, b, c, d, e, f) + fn create_test_schema_2() -> Result { + let a = Field::new("a", DataType::Int32, true); + let b = Field::new("b", DataType::Int32, true); + let c = Field::new("c", DataType::Int32, true); + let d = Field::new("d", DataType::Int32, true); + let e = Field::new("e", DataType::Int32, true); + let f = Field::new("f", DataType::Int32, true); + let schema = Arc::new(Schema::new(vec![a, b, c, d, e, f])); + + Ok(schema) + } + + /// Construct a schema with random ordering + /// among column a, b, c, d + /// where + /// Column [a=f] (e.g they are aliases). + /// Column e is constant. + fn create_random_schema(seed: u64) -> Result<(SchemaRef, EquivalenceProperties)> { + let test_schema = create_test_schema_2()?; + let col_a = &col("a", &test_schema)?; + let col_b = &col("b", &test_schema)?; + let col_c = &col("c", &test_schema)?; + let col_d = &col("d", &test_schema)?; + let col_e = &col("e", &test_schema)?; + let col_f = &col("f", &test_schema)?; + let col_exprs = [col_a, col_b, col_c, col_d, col_e, col_f]; + + let mut eq_properties = EquivalenceProperties::new(test_schema.clone()); + // Define a and f are aliases + eq_properties.add_equal_conditions(col_a, col_f); + // Column e has constant value. + eq_properties = eq_properties.add_constants([col_e.clone()]); + + // Randomly order columns for sorting + let mut rng = StdRng::seed_from_u64(seed); + let mut remaining_exprs = col_exprs[0..4].to_vec(); // only a, b, c, d are sorted + + let options_asc = SortOptions { + descending: false, + nulls_first: false, + }; - use arrow_schema::SortOptions; - use std::sync::Arc; + while !remaining_exprs.is_empty() { + let n_sort_expr = rng.gen_range(0..remaining_exprs.len() + 1); + remaining_exprs.shuffle(&mut rng); + + let ordering = remaining_exprs + .drain(0..n_sort_expr) + .map(|expr| PhysicalSortExpr { + expr: expr.clone(), + options: options_asc, + }) + .collect(); + + eq_properties.add_new_orderings([ordering]); + } + + Ok((test_schema, eq_properties)) + } - fn convert_to_requirement( - in_data: &[(&Column, Option)], + // Convert each tuple to PhysicalSortRequirement + fn convert_to_sort_reqs( + in_data: &[(&Arc, Option)], ) -> Vec { in_data .iter() - .map(|(col, options)| { - PhysicalSortRequirement::new(Arc::new((*col).clone()) as _, *options) + .map(|(expr, options)| { + PhysicalSortRequirement::new((*expr).clone(), *options) + }) + .collect::>() + } + + // Convert each tuple to PhysicalSortExpr + fn convert_to_sort_exprs( + in_data: &[(&Arc, SortOptions)], + ) -> Vec { + in_data + .iter() + .map(|(expr, options)| PhysicalSortExpr { + expr: (*expr).clone(), + options: *options, }) .collect::>() } + // Convert each inner tuple to PhysicalSortExpr + fn convert_to_orderings( + orderings: &[Vec<(&Arc, SortOptions)>], + ) -> Vec> { + orderings + .iter() + .map(|sort_exprs| convert_to_sort_exprs(sort_exprs)) + .collect() + } + #[test] fn add_equal_conditions_test() -> Result<()> { let schema = Arc::new(Schema::new(vec![ @@ -1239,38 +1425,49 @@ mod tests { ])); let mut eq_properties = EquivalenceProperties::new(schema); - let new_condition = (&Column::new("a", 0), &Column::new("b", 1)); - eq_properties.add_equal_conditions(new_condition); - assert_eq!(eq_properties.classes().len(), 1); - - let new_condition = (&Column::new("b", 1), &Column::new("a", 0)); - eq_properties.add_equal_conditions(new_condition); - assert_eq!(eq_properties.classes().len(), 1); - assert_eq!(eq_properties.classes()[0].len(), 2); - assert!(eq_properties.classes()[0].contains(&Column::new("a", 0))); - assert!(eq_properties.classes()[0].contains(&Column::new("b", 1))); - - let new_condition = (&Column::new("b", 1), &Column::new("c", 2)); - eq_properties.add_equal_conditions(new_condition); - assert_eq!(eq_properties.classes().len(), 1); - assert_eq!(eq_properties.classes()[0].len(), 3); - assert!(eq_properties.classes()[0].contains(&Column::new("a", 0))); - assert!(eq_properties.classes()[0].contains(&Column::new("b", 1))); - assert!(eq_properties.classes()[0].contains(&Column::new("c", 2))); - - let new_condition = (&Column::new("x", 3), &Column::new("y", 4)); - eq_properties.add_equal_conditions(new_condition); - assert_eq!(eq_properties.classes().len(), 2); - - let new_condition = (&Column::new("x", 3), &Column::new("a", 0)); - eq_properties.add_equal_conditions(new_condition); - assert_eq!(eq_properties.classes().len(), 1); - assert_eq!(eq_properties.classes()[0].len(), 5); - assert!(eq_properties.classes()[0].contains(&Column::new("a", 0))); - assert!(eq_properties.classes()[0].contains(&Column::new("b", 1))); - assert!(eq_properties.classes()[0].contains(&Column::new("c", 2))); - assert!(eq_properties.classes()[0].contains(&Column::new("x", 3))); - assert!(eq_properties.classes()[0].contains(&Column::new("y", 4))); + let col_a_expr = Arc::new(Column::new("a", 0)) as Arc; + let col_b_expr = Arc::new(Column::new("b", 1)) as Arc; + let col_c_expr = Arc::new(Column::new("c", 2)) as Arc; + let col_x_expr = Arc::new(Column::new("x", 3)) as Arc; + let col_y_expr = Arc::new(Column::new("y", 4)) as Arc; + + // a and b are aliases + eq_properties.add_equal_conditions(&col_a_expr, &col_b_expr); + assert_eq!(eq_properties.eq_group().len(), 1); + + // This new entry is redundant, size shouldn't increase + eq_properties.add_equal_conditions(&col_b_expr, &col_a_expr); + assert_eq!(eq_properties.eq_group().len(), 1); + let eq_groups = &eq_properties.eq_group().classes[0]; + assert_eq!(eq_groups.len(), 2); + assert!(physical_exprs_contains(eq_groups, &col_a_expr)); + assert!(physical_exprs_contains(eq_groups, &col_b_expr)); + + // b and c are aliases. Exising equivalence class should expand, + // however there shouldn't be any new equivalence class + eq_properties.add_equal_conditions(&col_b_expr, &col_c_expr); + assert_eq!(eq_properties.eq_group().len(), 1); + let eq_groups = &eq_properties.eq_group().classes[0]; + assert_eq!(eq_groups.len(), 3); + assert!(physical_exprs_contains(eq_groups, &col_a_expr)); + assert!(physical_exprs_contains(eq_groups, &col_b_expr)); + assert!(physical_exprs_contains(eq_groups, &col_c_expr)); + + // This is a new set of equality. Hence equivalent class count should be 2. + eq_properties.add_equal_conditions(&col_x_expr, &col_y_expr); + assert_eq!(eq_properties.eq_group().len(), 2); + + // This equality bridges distinct equality sets. + // Hence equivalent class count should decrease from 2 to 1. + eq_properties.add_equal_conditions(&col_x_expr, &col_a_expr); + assert_eq!(eq_properties.eq_group().len(), 1); + let eq_groups = &eq_properties.eq_group().classes[0]; + assert_eq!(eq_groups.len(), 5); + assert!(physical_exprs_contains(eq_groups, &col_a_expr)); + assert!(physical_exprs_contains(eq_groups, &col_b_expr)); + assert!(physical_exprs_contains(eq_groups, &col_c_expr)); + assert!(physical_exprs_contains(eq_groups, &col_x_expr)); + assert!(physical_exprs_contains(eq_groups, &col_y_expr)); Ok(()) } @@ -1283,11 +1480,8 @@ mod tests { Field::new("c", DataType::Int64, true), ])); - let mut input_properties = EquivalenceProperties::new(input_schema); - let new_condition = (&Column::new("a", 0), &Column::new("b", 1)); - input_properties.add_equal_conditions(new_condition); - let new_condition = (&Column::new("b", 1), &Column::new("c", 2)); - input_properties.add_equal_conditions(new_condition); + let input_properties = EquivalenceProperties::new(input_schema.clone()); + let col_a = col("a", &input_schema)?; let out_schema = Arc::new(Schema::new(vec![ Field::new("a1", DataType::Int64, true), @@ -1296,105 +1490,558 @@ mod tests { Field::new("a4", DataType::Int64, true), ])); - let mut alias_map = HashMap::new(); - alias_map.insert( - Column::new("a", 0), - vec![ - Column::new("a1", 0), - Column::new("a2", 1), - Column::new("a3", 2), - Column::new("a4", 3), + // a as a1, a as a2, a as a3, a as a3 + let col_a1 = &col("a1", &out_schema)?; + let col_a2 = &col("a2", &out_schema)?; + let col_a3 = &col("a3", &out_schema)?; + let col_a4 = &col("a4", &out_schema)?; + let projection_mapping = ProjectionMapping { + inner: vec![ + (col_a.clone(), col_a1.clone()), + (col_a.clone(), col_a2.clone()), + (col_a.clone(), col_a3.clone()), + (col_a.clone(), col_a4.clone()), ], - ); - let mut out_properties = EquivalenceProperties::new(out_schema); + }; + let out_properties = input_properties.project(&projection_mapping, out_schema); - project_equivalence_properties(input_properties, &alias_map, &mut out_properties); - assert_eq!(out_properties.classes().len(), 1); - assert_eq!(out_properties.classes()[0].len(), 4); - assert!(out_properties.classes()[0].contains(&Column::new("a1", 0))); - assert!(out_properties.classes()[0].contains(&Column::new("a2", 1))); - assert!(out_properties.classes()[0].contains(&Column::new("a3", 2))); - assert!(out_properties.classes()[0].contains(&Column::new("a4", 3))); + // At the output a1=a2=a3=a4 + assert_eq!(out_properties.eq_group().len(), 1); + let eq_class = &out_properties.eq_group().classes[0]; + assert_eq!(eq_class.len(), 4); + assert!(physical_exprs_contains(eq_class, col_a1)); + assert!(physical_exprs_contains(eq_class, col_a2)); + assert!(physical_exprs_contains(eq_class, col_a3)); + assert!(physical_exprs_contains(eq_class, col_a4)); Ok(()) } #[test] - fn test_collapse_vec() -> Result<()> { - assert_eq!(collapse_vec(vec![1, 2, 3]), vec![1, 2, 3]); - assert_eq!(collapse_vec(vec![1, 2, 3, 2, 3]), vec![1, 2, 3]); - assert_eq!(collapse_vec(vec![3, 1, 2, 3, 2, 3]), vec![3, 1, 2]); + fn test_ordering_satisfy() -> Result<()> { + let crude = vec![PhysicalSortExpr { + expr: Arc::new(Column::new("a", 0)), + options: SortOptions::default(), + }]; + let finer = vec![ + PhysicalSortExpr { + expr: Arc::new(Column::new("a", 0)), + options: SortOptions::default(), + }, + PhysicalSortExpr { + expr: Arc::new(Column::new("b", 1)), + options: SortOptions::default(), + }, + ]; + // finer ordering satisfies, crude ordering should return true + let empty_schema = &Arc::new(Schema::empty()); + let mut eq_properties_finer = EquivalenceProperties::new(empty_schema.clone()); + eq_properties_finer.oeq_class.push(finer.clone()); + assert!(eq_properties_finer.ordering_satisfy(&crude)); + + // Crude ordering doesn't satisfy finer ordering. should return false + let mut eq_properties_crude = EquivalenceProperties::new(empty_schema.clone()); + eq_properties_crude.oeq_class.push(crude.clone()); + assert!(!eq_properties_crude.ordering_satisfy(&finer)); Ok(()) } #[test] - fn test_get_compatible_ranges() -> Result<()> { - let col_a = &Column::new("a", 0); - let col_b = &Column::new("b", 1); - let option1 = SortOptions { + fn test_ordering_satisfy_with_equivalence() -> Result<()> { + // Schema satisfies following orderings: + // [a ASC], [d ASC, b ASC], [e DESC, f ASC, g ASC] + // and + // Column [a=c] (e.g they are aliases). + let (test_schema, eq_properties) = create_test_params()?; + let col_a = &col("a", &test_schema)?; + let col_b = &col("b", &test_schema)?; + let col_c = &col("c", &test_schema)?; + let col_d = &col("d", &test_schema)?; + let col_e = &col("e", &test_schema)?; + let col_f = &col("f", &test_schema)?; + let col_g = &col("g", &test_schema)?; + let option_asc = SortOptions { descending: false, nulls_first: false, }; - let test_data = vec![ + let option_desc = SortOptions { + descending: true, + nulls_first: true, + }; + let table_data_with_properties = + generate_table_for_eq_properties(&eq_properties, 625, 5)?; + + // First element in the tuple stores vector of requirement, second element is the expected return value for ordering_satisfy function + let requirements = vec![ + // `a ASC NULLS LAST`, expects `ordering_satisfy` to be `true`, since existing ordering `a ASC NULLS LAST, b ASC NULLS LAST` satisfies it + (vec![(col_a, option_asc)], true), + (vec![(col_a, option_desc)], false), + // Test whether equivalence works as expected + (vec![(col_c, option_asc)], true), + (vec![(col_c, option_desc)], false), + // Test whether ordering equivalence works as expected + (vec![(col_d, option_asc)], true), + (vec![(col_d, option_asc), (col_b, option_asc)], true), + (vec![(col_d, option_desc), (col_b, option_asc)], false), ( - vec![(col_a, Some(option1)), (col_b, Some(option1))], - vec![(col_a, Some(option1))], - vec![(0, 1)], + vec![ + (col_e, option_desc), + (col_f, option_asc), + (col_g, option_asc), + ], + true, ), + (vec![(col_e, option_desc), (col_f, option_asc)], true), + (vec![(col_e, option_asc), (col_f, option_asc)], false), + (vec![(col_e, option_desc), (col_b, option_asc)], false), + (vec![(col_e, option_asc), (col_b, option_asc)], false), ( - vec![(col_a, None), (col_b, Some(option1))], - vec![(col_a, Some(option1))], - vec![(0, 1)], + vec![ + (col_d, option_asc), + (col_b, option_asc), + (col_d, option_asc), + (col_b, option_asc), + ], + true, ), ( vec![ - (col_a, None), - (col_b, Some(option1)), - (col_a, Some(option1)), + (col_d, option_asc), + (col_b, option_asc), + (col_e, option_desc), + (col_f, option_asc), ], - vec![(col_a, Some(option1))], - vec![(0, 1), (2, 3)], + true, + ), + ( + vec![ + (col_d, option_asc), + (col_b, option_asc), + (col_e, option_desc), + (col_b, option_asc), + ], + true, + ), + ( + vec![ + (col_d, option_asc), + (col_b, option_asc), + (col_d, option_desc), + (col_b, option_asc), + ], + true, + ), + ( + vec![ + (col_d, option_asc), + (col_b, option_asc), + (col_e, option_asc), + (col_f, option_asc), + ], + false, + ), + ( + vec![ + (col_d, option_asc), + (col_b, option_asc), + (col_e, option_asc), + (col_b, option_asc), + ], + false, + ), + (vec![(col_d, option_asc), (col_e, option_desc)], true), + ( + vec![ + (col_d, option_asc), + (col_c, option_asc), + (col_b, option_asc), + ], + true, + ), + ( + vec![ + (col_d, option_asc), + (col_e, option_desc), + (col_f, option_asc), + (col_b, option_asc), + ], + true, + ), + ( + vec![ + (col_d, option_asc), + (col_e, option_desc), + (col_c, option_asc), + (col_b, option_asc), + ], + true, + ), + ( + vec![ + (col_d, option_asc), + (col_e, option_desc), + (col_b, option_asc), + (col_f, option_asc), + ], + true, + ), + ]; + + for (cols, expected) in requirements { + let err_msg = format!("Error in test case:{cols:?}"); + let required = cols + .into_iter() + .map(|(expr, options)| PhysicalSortExpr { + expr: expr.clone(), + options, + }) + .collect::>(); + + // Check expected result with experimental result. + assert_eq!( + is_table_same_after_sort( + required.clone(), + table_data_with_properties.clone() + )?, + expected + ); + assert_eq!( + eq_properties.ordering_satisfy(&required), + expected, + "{err_msg}" + ); + } + Ok(()) + } + + #[test] + fn test_ordering_satisfy_with_equivalence_random() -> Result<()> { + const N_RANDOM_SCHEMA: usize = 5; + const N_ELEMENTS: usize = 125; + const N_DISTINCT: usize = 5; + const SORT_OPTIONS: SortOptions = SortOptions { + descending: false, + nulls_first: false, + }; + + for seed in 0..N_RANDOM_SCHEMA { + // Create a random schema with random properties + let (test_schema, eq_properties) = create_random_schema(seed as u64)?; + // Generate a data that satisfies properties given + let table_data_with_properties = + generate_table_for_eq_properties(&eq_properties, N_ELEMENTS, N_DISTINCT)?; + let col_exprs = vec![ + col("a", &test_schema)?, + col("b", &test_schema)?, + col("c", &test_schema)?, + col("d", &test_schema)?, + col("e", &test_schema)?, + col("f", &test_schema)?, + ]; + + for n_req in 0..=col_exprs.len() { + for exprs in col_exprs.iter().combinations(n_req) { + let requirement = exprs + .into_iter() + .map(|expr| PhysicalSortExpr { + expr: expr.clone(), + options: SORT_OPTIONS, + }) + .collect::>(); + let expected = is_table_same_after_sort( + requirement.clone(), + table_data_with_properties.clone(), + )?; + let err_msg = format!( + "Error in test case requirement:{:?}, expected: {:?}", + requirement, expected + ); + // Check whether ordering_satisfy API result and + // experimental result matches. + assert_eq!( + eq_properties.ordering_satisfy(&requirement), + expected, + "{}", + err_msg + ); + } + } + } + + Ok(()) + } + + #[test] + fn test_ordering_satisfy_different_lengths() -> Result<()> { + let test_schema = create_test_schema()?; + let col_a = &col("a", &test_schema)?; + let col_b = &col("b", &test_schema)?; + let col_c = &col("c", &test_schema)?; + let col_d = &col("d", &test_schema)?; + let col_e = &col("e", &test_schema)?; + let col_f = &col("f", &test_schema)?; + let options = SortOptions { + descending: false, + nulls_first: false, + }; + // a=c (e.g they are aliases). + let mut eq_properties = EquivalenceProperties::new(test_schema); + eq_properties.add_equal_conditions(col_a, col_c); + + let orderings = vec![ + vec![(col_a, options)], + vec![(col_e, options)], + vec![(col_d, options), (col_f, options)], + ]; + let orderings = convert_to_orderings(&orderings); + + // Column [a ASC], [e ASC], [d ASC, f ASC] are all valid orderings for the schema. + eq_properties.add_new_orderings(orderings); + + // First entry in the tuple is required ordering, second entry is the expected flag + // that indicates whether this required ordering is satisfied. + // ([a ASC], true) indicate a ASC requirement is already satisfied by existing orderings. + let test_cases = vec![ + // [c ASC, a ASC, e ASC], expected represents this requirement is satisfied + ( + vec![(col_c, options), (col_a, options), (col_e, options)], + true, + ), + (vec![(col_c, options), (col_b, options)], false), + (vec![(col_c, options), (col_d, options)], true), + ( + vec![(col_d, options), (col_f, options), (col_b, options)], + false, + ), + (vec![(col_d, options), (col_f, options)], true), + ]; + + for (reqs, expected) in test_cases { + let err_msg = + format!("error in test reqs: {:?}, expected: {:?}", reqs, expected,); + let reqs = convert_to_sort_exprs(&reqs); + assert_eq!( + eq_properties.ordering_satisfy(&reqs), + expected, + "{}", + err_msg + ); + } + + Ok(()) + } + + #[test] + fn test_bridge_groups() -> Result<()> { + // First entry in the tuple is argument, second entry is the bridged result + let test_cases = vec![ + // ------- TEST CASE 1 -----------// + ( + vec![vec![1, 2, 3], vec![2, 4, 5], vec![11, 12, 9], vec![7, 6, 5]], + // Expected is compared with set equality. Order of the specific results may change. + vec![vec![1, 2, 3, 4, 5, 6, 7], vec![9, 11, 12]], + ), + // ------- TEST CASE 2 -----------// + ( + vec![vec![1, 2, 3], vec![3, 4, 5], vec![9, 8, 7], vec![7, 6, 5]], + // Expected + vec![vec![1, 2, 3, 4, 5, 6, 7, 8, 9]], ), ]; - for (searched, to_search, expected) in test_data { - let searched = convert_to_requirement(&searched); - let to_search = convert_to_requirement(&to_search); + for (entries, expected) in test_cases { + let entries = entries + .into_iter() + .map(|entry| entry.into_iter().map(lit).collect::>()) + .collect::>(); let expected = expected .into_iter() - .map(|(start, end)| Range { start, end }) + .map(|entry| entry.into_iter().map(lit).collect::>()) .collect::>(); - assert_eq!(get_compatible_ranges(&searched, &to_search), expected); + let mut eq_groups = EquivalenceGroup::new(entries.clone()); + eq_groups.bridge_classes(); + let eq_groups = eq_groups.classes; + let err_msg = format!( + "error in test entries: {:?}, expected: {:?}, actual:{:?}", + entries, expected, eq_groups + ); + assert_eq!(eq_groups.len(), expected.len(), "{}", err_msg); + for idx in 0..eq_groups.len() { + assert!( + physical_exprs_bag_equal(&eq_groups[idx], &expected[idx]), + "{}", + err_msg + ); + } } Ok(()) } + #[test] + fn test_remove_redundant_entries_eq_group() -> Result<()> { + let entries = vec![ + vec![lit(1), lit(1), lit(2)], + // This group is meaningless should be removed + vec![lit(3), lit(3)], + vec![lit(4), lit(5), lit(6)], + ]; + // Given equivalences classes are not in succinct form. + // Expected form is the most plain representation that is functionally same. + let expected = vec![vec![lit(1), lit(2)], vec![lit(4), lit(5), lit(6)]]; + let mut eq_groups = EquivalenceGroup::new(entries); + eq_groups.remove_redundant_entries(); + + let eq_groups = eq_groups.classes; + assert_eq!(eq_groups.len(), expected.len()); + assert_eq!(eq_groups.len(), 2); + + assert!(physical_exprs_equal(&eq_groups[0], &expected[0])); + assert!(physical_exprs_equal(&eq_groups[1], &expected[1])); + Ok(()) + } + + #[test] + fn test_remove_redundant_entries_oeq_class() -> Result<()> { + let schema = create_test_schema()?; + let col_a = &col("a", &schema)?; + let col_b = &col("b", &schema)?; + let col_c = &col("c", &schema)?; + + let option_asc = SortOptions { + descending: false, + nulls_first: false, + }; + let option_desc = SortOptions { + descending: true, + nulls_first: true, + }; + + // First entry in the tuple is the given orderings for the table + // Second entry is the simplest version of the given orderings that is functionally equivalent. + let test_cases = vec![ + // ------- TEST CASE 1 --------- + ( + // ORDERINGS GIVEN + vec![ + // [a ASC, b ASC] + vec![(col_a, option_asc), (col_b, option_asc)], + ], + // EXPECTED orderings that is succinct. + vec![ + // [a ASC, b ASC] + vec![(col_a, option_asc), (col_b, option_asc)], + ], + ), + // ------- TEST CASE 2 --------- + ( + // ORDERINGS GIVEN + vec![ + // [a ASC, b ASC] + vec![(col_a, option_asc), (col_b, option_asc)], + // [a ASC, b ASC, c ASC] + vec![ + (col_a, option_asc), + (col_b, option_asc), + (col_c, option_asc), + ], + ], + // EXPECTED orderings that is succinct. + vec![ + // [a ASC, b ASC, c ASC] + vec![ + (col_a, option_asc), + (col_b, option_asc), + (col_c, option_asc), + ], + ], + ), + // ------- TEST CASE 3 --------- + ( + // ORDERINGS GIVEN + vec![ + // [a ASC, b DESC] + vec![(col_a, option_asc), (col_b, option_desc)], + // [a ASC] + vec![(col_a, option_asc)], + // [a ASC, c ASC] + vec![(col_a, option_asc), (col_c, option_asc)], + ], + // EXPECTED orderings that is succinct. + vec![ + // [a ASC, b DESC] + vec![(col_a, option_asc), (col_b, option_desc)], + // [a ASC, c ASC] + vec![(col_a, option_asc), (col_c, option_asc)], + ], + ), + // ------- TEST CASE 4 --------- + ( + // ORDERINGS GIVEN + vec![ + // [a ASC, b ASC] + vec![(col_a, option_asc), (col_b, option_asc)], + // [a ASC, b ASC, c ASC] + vec![ + (col_a, option_asc), + (col_b, option_asc), + (col_c, option_asc), + ], + // [a ASC] + vec![(col_a, option_asc)], + ], + // EXPECTED orderings that is succinct. + vec![ + // [a ASC, b ASC, c ASC] + vec![ + (col_a, option_asc), + (col_b, option_asc), + (col_c, option_asc), + ], + ], + ), + ]; + for (orderings, expected) in test_cases { + let orderings = convert_to_orderings(&orderings); + let expected = convert_to_orderings(&expected); + let actual = OrderingEquivalenceClass::new(orderings.clone()); + let actual = actual.orderings; + let err_msg = format!( + "orderings: {:?}, expected: {:?}, actual :{:?}", + orderings, expected, actual + ); + assert_eq!(actual.len(), expected.len(), "{}", err_msg); + for elem in actual { + assert!(expected.contains(&elem), "{}", err_msg); + } + } + + Ok(()) + } + #[test] fn test_get_updated_right_ordering_equivalence_properties() -> Result<()> { let join_type = JoinType::Inner; - - let options = SortOptions::default(); - let right_oeq_class = OrderingEquivalentClass::new( - vec![ - PhysicalSortExpr { - expr: Arc::new(Column::new("x", 0)), - options, - }, - PhysicalSortExpr { - expr: Arc::new(Column::new("y", 1)), - options, - }, - ], - vec![vec![ - PhysicalSortExpr { - expr: Arc::new(Column::new("z", 2)), - options, - }, - PhysicalSortExpr { - expr: Arc::new(Column::new("w", 3)), - options, - }, - ]], - ); + // Join right child schema + let child_fields: Fields = ["x", "y", "z", "w"] + .into_iter() + .map(|name| Field::new(name, DataType::Int32, true)) + .collect(); + let child_schema = Schema::new(child_fields); + let col_x = &col("x", &child_schema)?; + let col_y = &col("y", &child_schema)?; + let col_z = &col("z", &child_schema)?; + let col_w = &col("w", &child_schema)?; + let option_asc = SortOptions { + descending: false, + nulls_first: false, + }; + // [x ASC, y ASC], [z ASC, w ASC] + let orderings = vec![ + vec![(col_x, option_asc), (col_y, option_asc)], + vec![(col_z, option_asc), (col_w, option_asc)], + ]; + let orderings = convert_to_orderings(&orderings); + // Right child ordering equivalences + let mut right_oeq_class = OrderingEquivalenceClass::new(orderings); let left_columns_len = 4; @@ -1403,45 +2050,808 @@ mod tests { .map(|name| Field::new(name, DataType::Int32, true)) .collect(); - let mut join_eq_properties = - EquivalenceProperties::new(Arc::new(Schema::new(fields))); - join_eq_properties - .add_equal_conditions((&Column::new("a", 0), &Column::new("x", 4))); - join_eq_properties - .add_equal_conditions((&Column::new("d", 3), &Column::new("w", 7))); - - let result = get_updated_right_ordering_equivalent_class( + // Join Schema + let schema = Schema::new(fields); + let col_a = &col("a", &schema)?; + let col_d = &col("d", &schema)?; + let col_x = &col("x", &schema)?; + let col_y = &col("y", &schema)?; + let col_z = &col("z", &schema)?; + let col_w = &col("w", &schema)?; + + let mut join_eq_properties = EquivalenceProperties::new(Arc::new(schema)); + // a=x and d=w + join_eq_properties.add_equal_conditions(col_a, col_x); + join_eq_properties.add_equal_conditions(col_d, col_w); + + updated_right_ordering_equivalence_class( + &mut right_oeq_class, &join_type, - &right_oeq_class, left_columns_len, - &join_eq_properties, ); + join_eq_properties.add_ordering_equivalence_class(right_oeq_class); + let result = join_eq_properties.oeq_class().clone(); + + // [x ASC, y ASC], [z ASC, w ASC] + let orderings = vec![ + vec![(col_x, option_asc), (col_y, option_asc)], + vec![(col_z, option_asc), (col_w, option_asc)], + ]; + let orderings = convert_to_orderings(&orderings); + let expected = OrderingEquivalenceClass::new(orderings); + + assert_eq!(result, expected); + + Ok(()) + } + + /// Checks if the table (RecordBatch) remains unchanged when sorted according to the provided `required_ordering`. + /// + /// The function works by adding a unique column of ascending integers to the original table. This column ensures + /// that rows that are otherwise indistinguishable (e.g., if they have the same values in all other columns) can + /// still be differentiated. When sorting the extended table, the unique column acts as a tie-breaker to produce + /// deterministic sorting results. + /// + /// If the table remains the same after sorting with the added unique column, it indicates that the table was + /// already sorted according to `required_ordering` to begin with. + fn is_table_same_after_sort( + mut required_ordering: Vec, + batch: RecordBatch, + ) -> Result { + // Clone the original schema and columns + let original_schema = batch.schema(); + let mut columns = batch.columns().to_vec(); + + // Create a new unique column + let n_row = batch.num_rows() as u64; + let unique_col = Arc::new(UInt64Array::from_iter_values(0..n_row)) as ArrayRef; + columns.push(unique_col.clone()); + + // Create a new schema with the added unique column + let unique_col_name = "unique"; + let unique_field = Arc::new(Field::new(unique_col_name, DataType::UInt64, false)); + let fields: Vec<_> = original_schema + .fields() + .iter() + .cloned() + .chain(std::iter::once(unique_field)) + .collect(); + let schema = Arc::new(Schema::new(fields)); + + // Create a new batch with the added column + let new_batch = RecordBatch::try_new(schema.clone(), columns)?; - let expected = OrderingEquivalentClass::new( + // Add the unique column to the required ordering to ensure deterministic results + required_ordering.push(PhysicalSortExpr { + expr: Arc::new(Column::new(unique_col_name, original_schema.fields().len())), + options: Default::default(), + }); + + // Convert the required ordering to a list of SortColumn + let sort_columns: Vec<_> = required_ordering + .iter() + .filter_map(|order_expr| { + let col = order_expr.expr.as_any().downcast_ref::()?; + let col_index = schema.column_with_name(col.name())?.0; + Some(SortColumn { + values: new_batch.column(col_index).clone(), + options: Some(order_expr.options), + }) + }) + .collect(); + + // Check if the indices after sorting match the initial ordering + let sorted_indices = lexsort_to_indices(&sort_columns, None)?; + let original_indices = UInt32Array::from_iter_values(0..n_row as u32); + + Ok(sorted_indices == original_indices) + } + + // If we already generated a random result for one of the + // expressions in the equivalence classes. For other expressions in the same + // equivalence class use same result. This util gets already calculated result, when available. + fn get_representative_arr( + eq_group: &[Arc], + existing_vec: &[Option], + schema: SchemaRef, + ) -> Option { + for expr in eq_group.iter() { + let col = expr.as_any().downcast_ref::().unwrap(); + let (idx, _field) = schema.column_with_name(col.name()).unwrap(); + if let Some(res) = &existing_vec[idx] { + return Some(res.clone()); + } + } + None + } + + // Generate a table that satisfies the given equivalence properties; i.e. + // equivalences, ordering equivalences, and constants. + fn generate_table_for_eq_properties( + eq_properties: &EquivalenceProperties, + n_elem: usize, + n_distinct: usize, + ) -> Result { + let mut rng = StdRng::seed_from_u64(23); + + let schema = eq_properties.schema(); + let mut schema_vec = vec![None; schema.fields.len()]; + + // Utility closure to generate random array + let mut generate_random_array = |num_elems: usize, max_val: usize| -> ArrayRef { + let values: Vec = (0..num_elems) + .map(|_| rng.gen_range(0..max_val) as u64) + .collect(); + Arc::new(UInt64Array::from_iter_values(values)) + }; + + // Fill constant columns + for constant in &eq_properties.constants { + let col = constant.as_any().downcast_ref::().unwrap(); + let (idx, _field) = schema.column_with_name(col.name()).unwrap(); + let arr = + Arc::new(UInt64Array::from_iter_values(vec![0; n_elem])) as ArrayRef; + schema_vec[idx] = Some(arr); + } + + // Fill columns based on ordering equivalences + for ordering in eq_properties.oeq_class.iter() { + let (sort_columns, indices): (Vec<_>, Vec<_>) = ordering + .iter() + .map(|PhysicalSortExpr { expr, options }| { + let col = expr.as_any().downcast_ref::().unwrap(); + let (idx, _field) = schema.column_with_name(col.name()).unwrap(); + let arr = generate_random_array(n_elem, n_distinct); + ( + SortColumn { + values: arr, + options: Some(*options), + }, + idx, + ) + }) + .unzip(); + + let sort_arrs = arrow::compute::lexsort(&sort_columns, None)?; + for (idx, arr) in izip!(indices, sort_arrs) { + schema_vec[idx] = Some(arr); + } + } + + // Fill columns based on equivalence groups + for eq_group in eq_properties.eq_group.iter() { + let representative_array = + get_representative_arr(eq_group, &schema_vec, schema.clone()) + .unwrap_or_else(|| generate_random_array(n_elem, n_distinct)); + + for expr in eq_group { + let col = expr.as_any().downcast_ref::().unwrap(); + let (idx, _field) = schema.column_with_name(col.name()).unwrap(); + schema_vec[idx] = Some(representative_array.clone()); + } + } + + let res: Vec<_> = schema_vec + .into_iter() + .zip(schema.fields.iter()) + .map(|(elem, field)| { + ( + field.name(), + // Generate random values for columns that do not occur in any of the groups (equivalence, ordering equivalence, constants) + elem.unwrap_or_else(|| generate_random_array(n_elem, n_distinct)), + ) + }) + .collect(); + + Ok(RecordBatch::try_from_iter(res)?) + } + + #[test] + fn test_schema_normalize_expr_with_equivalence() -> Result<()> { + let col_a = &Column::new("a", 0); + let col_b = &Column::new("b", 1); + let col_c = &Column::new("c", 2); + // Assume that column a and c are aliases. + let (_test_schema, eq_properties) = create_test_params()?; + + let col_a_expr = Arc::new(col_a.clone()) as Arc; + let col_b_expr = Arc::new(col_b.clone()) as Arc; + let col_c_expr = Arc::new(col_c.clone()) as Arc; + // Test cases for equivalence normalization, + // First entry in the tuple is argument, second entry is expected result after normalization. + let expressions = vec![ + // Normalized version of the column a and c should go to a + // (by convention all the expressions inside equivalence class are mapped to the first entry + // in this case a is the first entry in the equivalence class.) + (&col_a_expr, &col_a_expr), + (&col_c_expr, &col_a_expr), + // Cannot normalize column b + (&col_b_expr, &col_b_expr), + ]; + let eq_group = eq_properties.eq_group(); + for (expr, expected_eq) in expressions { + assert!( + expected_eq.eq(&eq_group.normalize_expr(expr.clone())), + "error in test: expr: {expr:?}" + ); + } + + Ok(()) + } + + #[test] + fn test_schema_normalize_sort_requirement_with_equivalence() -> Result<()> { + let option1 = SortOptions { + descending: false, + nulls_first: false, + }; + // Assume that column a and c are aliases. + let (test_schema, eq_properties) = create_test_params()?; + let col_a = &col("a", &test_schema)?; + let col_c = &col("c", &test_schema)?; + let col_d = &col("d", &test_schema)?; + + // Test cases for equivalence normalization + // First entry in the tuple is PhysicalSortRequirement, second entry in the tuple is + // expected PhysicalSortRequirement after normalization. + let test_cases = vec![ + (vec![(col_a, Some(option1))], vec![(col_a, Some(option1))]), + // In the normalized version column c should be replace with column a + (vec![(col_c, Some(option1))], vec![(col_a, Some(option1))]), + (vec![(col_c, None)], vec![(col_a, None)]), + (vec![(col_d, Some(option1))], vec![(col_d, Some(option1))]), + ]; + for (reqs, expected) in test_cases.into_iter() { + let reqs = convert_to_sort_reqs(&reqs); + let expected = convert_to_sort_reqs(&expected); + + let normalized = eq_properties.normalize_sort_requirements(&reqs); + assert!( + expected.eq(&normalized), + "error in test: reqs: {reqs:?}, expected: {expected:?}, normalized: {normalized:?}" + ); + } + + Ok(()) + } + + #[test] + fn test_normalize_sort_reqs() -> Result<()> { + // Schema satisfies following properties + // a=c + // and following orderings are valid + // [a ASC], [d ASC, b ASC], [e DESC, f ASC, g ASC] + let (test_schema, eq_properties) = create_test_params()?; + let col_a = &col("a", &test_schema)?; + let col_b = &col("b", &test_schema)?; + let col_c = &col("c", &test_schema)?; + let col_d = &col("d", &test_schema)?; + let col_e = &col("e", &test_schema)?; + let col_f = &col("f", &test_schema)?; + let option_asc = SortOptions { + descending: false, + nulls_first: false, + }; + let option_desc = SortOptions { + descending: true, + nulls_first: true, + }; + // First element in the tuple stores vector of requirement, second element is the expected return value for ordering_satisfy function + let requirements = vec![ + ( + vec![(col_a, Some(option_asc))], + vec![(col_a, Some(option_asc))], + ), + ( + vec![(col_a, Some(option_desc))], + vec![(col_a, Some(option_desc))], + ), + (vec![(col_a, None)], vec![(col_a, None)]), + // Test whether equivalence works as expected + ( + vec![(col_c, Some(option_asc))], + vec![(col_a, Some(option_asc))], + ), + (vec![(col_c, None)], vec![(col_a, None)]), + // Test whether ordering equivalence works as expected + ( + vec![(col_d, Some(option_asc)), (col_b, Some(option_asc))], + vec![(col_d, Some(option_asc)), (col_b, Some(option_asc))], + ), + ( + vec![(col_d, None), (col_b, None)], + vec![(col_d, None), (col_b, None)], + ), + ( + vec![(col_e, Some(option_desc)), (col_f, Some(option_asc))], + vec![(col_e, Some(option_desc)), (col_f, Some(option_asc))], + ), + // We should be able to normalize in compatible requirements also (not exactly equal) + ( + vec![(col_e, Some(option_desc)), (col_f, None)], + vec![(col_e, Some(option_desc)), (col_f, None)], + ), + ( + vec![(col_e, None), (col_f, None)], + vec![(col_e, None), (col_f, None)], + ), + ]; + + for (reqs, expected_normalized) in requirements.into_iter() { + let req = convert_to_sort_reqs(&reqs); + let expected_normalized = convert_to_sort_reqs(&expected_normalized); + + assert_eq!( + eq_properties.normalize_sort_requirements(&req), + expected_normalized + ); + } + + Ok(()) + } + + #[test] + fn test_get_finer() -> Result<()> { + let schema = create_test_schema()?; + let col_a = &col("a", &schema)?; + let col_b = &col("b", &schema)?; + let col_c = &col("c", &schema)?; + let eq_properties = EquivalenceProperties::new(schema); + let option_asc = SortOptions { + descending: false, + nulls_first: false, + }; + let option_desc = SortOptions { + descending: true, + nulls_first: true, + }; + // First entry, and second entry are the physical sort requirement that are argument for get_finer_requirement. + // Third entry is the expected result. + let tests_cases = vec![ + // Get finer requirement between [a Some(ASC)] and [a None, b Some(ASC)] + // result should be [a Some(ASC), b Some(ASC)] + ( + vec![(col_a, Some(option_asc))], + vec![(col_a, None), (col_b, Some(option_asc))], + Some(vec![(col_a, Some(option_asc)), (col_b, Some(option_asc))]), + ), + // Get finer requirement between [a Some(ASC), b Some(ASC), c Some(ASC)] and [a Some(ASC), b Some(ASC)] + // result should be [a Some(ASC), b Some(ASC), c Some(ASC)] + ( + vec![ + (col_a, Some(option_asc)), + (col_b, Some(option_asc)), + (col_c, Some(option_asc)), + ], + vec![(col_a, Some(option_asc)), (col_b, Some(option_asc))], + Some(vec![ + (col_a, Some(option_asc)), + (col_b, Some(option_asc)), + (col_c, Some(option_asc)), + ]), + ), + // Get finer requirement between [a Some(ASC), b Some(ASC)] and [a Some(ASC), b Some(DESC)] + // result should be None + ( + vec![(col_a, Some(option_asc)), (col_b, Some(option_asc))], + vec![(col_a, Some(option_asc)), (col_b, Some(option_desc))], + None, + ), + ]; + for (lhs, rhs, expected) in tests_cases { + let lhs = convert_to_sort_reqs(&lhs); + let rhs = convert_to_sort_reqs(&rhs); + let expected = expected.map(|expected| convert_to_sort_reqs(&expected)); + let finer = eq_properties.get_finer_requirement(&lhs, &rhs); + assert_eq!(finer, expected) + } + + Ok(()) + } + + #[test] + fn test_get_meet_ordering() -> Result<()> { + let schema = create_test_schema()?; + let col_a = &col("a", &schema)?; + let col_b = &col("b", &schema)?; + let eq_properties = EquivalenceProperties::new(schema); + let option_asc = SortOptions { + descending: false, + nulls_first: false, + }; + let option_desc = SortOptions { + descending: true, + nulls_first: true, + }; + let tests_cases = vec![ + // Get meet ordering between [a ASC] and [a ASC, b ASC] + // result should be [a ASC] + ( + vec![(col_a, option_asc)], + vec![(col_a, option_asc), (col_b, option_asc)], + Some(vec![(col_a, option_asc)]), + ), + // Get meet ordering between [a ASC] and [a DESC] + // result should be None. + (vec![(col_a, option_asc)], vec![(col_a, option_desc)], None), + // Get meet ordering between [a ASC, b ASC] and [a ASC, b DESC] + // result should be [a ASC]. + ( + vec![(col_a, option_asc), (col_b, option_asc)], + vec![(col_a, option_asc), (col_b, option_desc)], + Some(vec![(col_a, option_asc)]), + ), + ]; + for (lhs, rhs, expected) in tests_cases { + let lhs = convert_to_sort_exprs(&lhs); + let rhs = convert_to_sort_exprs(&rhs); + let expected = expected.map(|expected| convert_to_sort_exprs(&expected)); + let finer = eq_properties.get_meet_ordering(&lhs, &rhs); + assert_eq!(finer, expected) + } + + Ok(()) + } + + #[test] + fn test_find_longest_permutation() -> Result<()> { + // Schema satisfies following orderings: + // [a ASC], [d ASC, b ASC], [e DESC, f ASC, g ASC] + // and + // Column [a=c] (e.g they are aliases). + // At below we add [d ASC, h DESC] also, for test purposes + let (test_schema, mut eq_properties) = create_test_params()?; + let col_a = &col("a", &test_schema)?; + let col_b = &col("b", &test_schema)?; + let col_c = &col("c", &test_schema)?; + let col_d = &col("d", &test_schema)?; + let col_e = &col("e", &test_schema)?; + let col_h = &col("h", &test_schema)?; + + let option_asc = SortOptions { + descending: false, + nulls_first: false, + }; + let option_desc = SortOptions { + descending: true, + nulls_first: true, + }; + // [d ASC, h ASC] also satisfies schema. + eq_properties.add_new_orderings([vec![ + PhysicalSortExpr { + expr: col_d.clone(), + options: option_asc, + }, + PhysicalSortExpr { + expr: col_h.clone(), + options: option_desc, + }, + ]]); + let test_cases = vec![ + // TEST CASE 1 + (vec![col_a], vec![(col_a, option_asc)]), + // TEST CASE 2 + (vec![col_c], vec![(col_c, option_asc)]), + // TEST CASE 3 + ( + vec![col_d, col_e, col_b], + vec![ + (col_d, option_asc), + (col_b, option_asc), + (col_e, option_desc), + ], + ), + // TEST CASE 4 + (vec![col_b], vec![]), + // TEST CASE 5 + (vec![col_d], vec![(col_d, option_asc)]), + ]; + for (exprs, expected) in test_cases { + let exprs = exprs.into_iter().cloned().collect::>(); + let expected = convert_to_sort_exprs(&expected); + let (actual, _) = eq_properties.find_longest_permutation(&exprs); + assert_eq!(actual, expected); + } + + Ok(()) + } + + #[test] + fn test_update_ordering() -> Result<()> { + let schema = Schema::new(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + Field::new("c", DataType::Int32, true), + Field::new("d", DataType::Int32, true), + ]); + + let mut eq_properties = EquivalenceProperties::new(Arc::new(schema.clone())); + let col_a = &col("a", &schema)?; + let col_b = &col("b", &schema)?; + let col_c = &col("c", &schema)?; + let col_d = &col("d", &schema)?; + let option_asc = SortOptions { + descending: false, + nulls_first: false, + }; + // b=a (e.g they are aliases) + eq_properties.add_equal_conditions(col_b, col_a); + // [b ASC], [d ASC] + eq_properties.add_new_orderings(vec![ + vec![PhysicalSortExpr { + expr: col_b.clone(), + options: option_asc, + }], + vec![PhysicalSortExpr { + expr: col_d.clone(), + options: option_asc, + }], + ]); + + let test_cases = vec![ + // d + b + ( + Arc::new(BinaryExpr::new( + col_d.clone(), + Operator::Plus, + col_b.clone(), + )) as Arc, + SortProperties::Ordered(option_asc), + ), + // b + (col_b.clone(), SortProperties::Ordered(option_asc)), + // a + (col_a.clone(), SortProperties::Ordered(option_asc)), + // a + c + ( + Arc::new(BinaryExpr::new( + col_a.clone(), + Operator::Plus, + col_c.clone(), + )), + SortProperties::Unordered, + ), + ]; + for (expr, expected) in test_cases { + let expr_ordering = ExprOrdering::new(expr.clone()); + let expr_ordering = expr_ordering + .transform_up(&|expr| update_ordering(expr, &eq_properties))?; + let err_msg = format!( + "expr:{:?}, expected: {:?}, actual: {:?}", + expr, expected, expr_ordering.state + ); + assert_eq!(expr_ordering.state, expected, "{}", err_msg); + } + + Ok(()) + } + + #[test] + fn test_get_indices_of_matching_sort_exprs_with_order_eq() -> Result<()> { + let sort_options = SortOptions::default(); + let sort_options_not = SortOptions::default().not(); + + let schema = Schema::new(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + ]); + let col_a = &col("a", &schema)?; + let col_b = &col("b", &schema)?; + let required_columns = [col_b.clone(), col_a.clone()]; + let mut eq_properties = EquivalenceProperties::new(Arc::new(schema)); + eq_properties.add_new_orderings([vec![ + PhysicalSortExpr { + expr: Arc::new(Column::new("b", 1)), + options: sort_options_not, + }, + PhysicalSortExpr { + expr: Arc::new(Column::new("a", 0)), + options: sort_options, + }, + ]]); + let (result, idxs) = eq_properties.find_longest_permutation(&required_columns); + assert_eq!(idxs, vec![0, 1]); + assert_eq!( + result, vec![ PhysicalSortExpr { - expr: Arc::new(Column::new("a", 0)), - options, + expr: col_b.clone(), + options: sort_options_not }, PhysicalSortExpr { - expr: Arc::new(Column::new("y", 5)), - options, + expr: col_a.clone(), + options: sort_options + } + ] + ); + + let schema = Schema::new(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + Field::new("c", DataType::Int32, true), + ]); + let col_a = &col("a", &schema)?; + let col_b = &col("b", &schema)?; + let required_columns = [col_b.clone(), col_a.clone()]; + let mut eq_properties = EquivalenceProperties::new(Arc::new(schema)); + eq_properties.add_new_orderings([ + vec![PhysicalSortExpr { + expr: Arc::new(Column::new("c", 2)), + options: sort_options, + }], + vec![ + PhysicalSortExpr { + expr: Arc::new(Column::new("b", 1)), + options: sort_options_not, }, - ], - vec![vec![ PhysicalSortExpr { - expr: Arc::new(Column::new("z", 6)), - options, + expr: Arc::new(Column::new("a", 0)), + options: sort_options, }, + ], + ]); + let (result, idxs) = eq_properties.find_longest_permutation(&required_columns); + assert_eq!(idxs, vec![0, 1]); + assert_eq!( + result, + vec![ PhysicalSortExpr { - expr: Arc::new(Column::new("d", 3)), - options, + expr: col_b.clone(), + options: sort_options_not }, - ]], + PhysicalSortExpr { + expr: col_a.clone(), + options: sort_options + } + ] + ); + + let required_columns = [ + Arc::new(Column::new("b", 1)) as _, + Arc::new(Column::new("a", 0)) as _, + ]; + let schema = Schema::new(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + Field::new("c", DataType::Int32, true), + ]); + let mut eq_properties = EquivalenceProperties::new(Arc::new(schema)); + + // not satisfied orders + eq_properties.add_new_orderings([vec![ + PhysicalSortExpr { + expr: Arc::new(Column::new("b", 1)), + options: sort_options_not, + }, + PhysicalSortExpr { + expr: Arc::new(Column::new("c", 2)), + options: sort_options, + }, + PhysicalSortExpr { + expr: Arc::new(Column::new("a", 0)), + options: sort_options, + }, + ]]); + let (_, idxs) = eq_properties.find_longest_permutation(&required_columns); + assert_eq!(idxs, vec![0]); + + Ok(()) + } + + #[test] + fn test_normalize_ordering_equivalence_classes() -> Result<()> { + let sort_options = SortOptions::default(); + + let schema = Schema::new(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + Field::new("c", DataType::Int32, true), + ]); + let col_a_expr = col("a", &schema)?; + let col_b_expr = col("b", &schema)?; + let col_c_expr = col("c", &schema)?; + let mut eq_properties = EquivalenceProperties::new(Arc::new(schema.clone())); + + eq_properties.add_equal_conditions(&col_a_expr, &col_c_expr); + let others = vec![ + vec![PhysicalSortExpr { + expr: col_b_expr.clone(), + options: sort_options, + }], + vec![PhysicalSortExpr { + expr: col_c_expr.clone(), + options: sort_options, + }], + ]; + eq_properties.add_new_orderings(others); + + let mut expected_eqs = EquivalenceProperties::new(Arc::new(schema)); + expected_eqs.add_new_orderings([ + vec![PhysicalSortExpr { + expr: col_b_expr.clone(), + options: sort_options, + }], + vec![PhysicalSortExpr { + expr: col_c_expr.clone(), + options: sort_options, + }], + ]); + + let oeq_class = eq_properties.oeq_class().clone(); + let expected = expected_eqs.oeq_class(); + assert!(oeq_class.eq(expected)); + + Ok(()) + } + + #[test] + fn project_empty_output_ordering() -> Result<()> { + let schema = Schema::new(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + Field::new("c", DataType::Int32, true), + ]); + let mut eq_properties = EquivalenceProperties::new(Arc::new(schema.clone())); + let ordering = vec![PhysicalSortExpr { + expr: Arc::new(Column::new("b", 1)), + options: SortOptions::default(), + }]; + eq_properties.add_new_orderings([ordering]); + let projection_mapping = ProjectionMapping { + inner: vec![ + ( + Arc::new(Column::new("b", 1)) as _, + Arc::new(Column::new("b_new", 0)) as _, + ), + ( + Arc::new(Column::new("a", 0)) as _, + Arc::new(Column::new("a_new", 1)) as _, + ), + ], + }; + let projection_schema = Arc::new(Schema::new(vec![ + Field::new("b_new", DataType::Int32, true), + Field::new("a_new", DataType::Int32, true), + ])); + let orderings = eq_properties + .project(&projection_mapping, projection_schema) + .oeq_class() + .output_ordering() + .unwrap_or_default(); + + assert_eq!( + vec![PhysicalSortExpr { + expr: Arc::new(Column::new("b_new", 0)), + options: SortOptions::default(), + }], + orderings ); - assert_eq!(result.head(), expected.head()); - assert_eq!(result.others(), expected.others()); + let schema = Schema::new(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + Field::new("c", DataType::Int32, true), + ]); + let eq_properties = EquivalenceProperties::new(Arc::new(schema)); + let projection_mapping = ProjectionMapping { + inner: vec![ + ( + Arc::new(Column::new("c", 2)) as _, + Arc::new(Column::new("c_new", 0)) as _, + ), + ( + Arc::new(Column::new("b", 1)) as _, + Arc::new(Column::new("b_new", 1)) as _, + ), + ], + }; + let projection_schema = Arc::new(Schema::new(vec![ + Field::new("c_new", DataType::Int32, true), + Field::new("b_new", DataType::Int32, true), + ])); + let projected = eq_properties.project(&projection_mapping, projection_schema); + // After projection there is no ordering. + assert!(projected.oeq_class().output_ordering().is_none()); Ok(()) } diff --git a/datafusion/physical-expr/src/expressions/column.rs b/datafusion/physical-expr/src/expressions/column.rs index b7b5895db6d3..62da8ff9ed44 100644 --- a/datafusion/physical-expr/src/expressions/column.rs +++ b/datafusion/physical-expr/src/expressions/column.rs @@ -28,7 +28,6 @@ use arrow::{ datatypes::{DataType, Schema}, record_batch::RecordBatch, }; -use datafusion_common::plan_err; use datafusion_common::{internal_err, DataFusionError, Result}; use datafusion_expr::ColumnarValue; @@ -176,7 +175,7 @@ impl PhysicalExpr for UnKnownColumn { /// Evaluate the expression fn evaluate(&self, _batch: &RecordBatch) -> Result { - plan_err!("UnKnownColumn::evaluate() should not be called") + internal_err!("UnKnownColumn::evaluate() should not be called") } fn children(&self) -> Vec> { diff --git a/datafusion/physical-expr/src/expressions/in_list.rs b/datafusion/physical-expr/src/expressions/in_list.rs index 643bbfd820a6..8d55fb70bd9e 100644 --- a/datafusion/physical-expr/src/expressions/in_list.rs +++ b/datafusion/physical-expr/src/expressions/in_list.rs @@ -22,8 +22,7 @@ use std::fmt::Debug; use std::hash::{Hash, Hasher}; use std::sync::Arc; -use crate::physical_expr::down_cast_any_ref; -use crate::utils::expr_list_eq_any_order; +use crate::physical_expr::{down_cast_any_ref, physical_exprs_bag_equal}; use crate::PhysicalExpr; use arrow::array::*; @@ -410,7 +409,7 @@ impl PartialEq for InListExpr { .downcast_ref::() .map(|x| { self.expr.eq(&x.expr) - && expr_list_eq_any_order(&self.list, &x.list) + && physical_exprs_bag_equal(&self.list, &x.list) && self.negated == x.negated }) .unwrap_or(false) diff --git a/datafusion/physical-expr/src/expressions/no_op.rs b/datafusion/physical-expr/src/expressions/no_op.rs index 497fb42fe4df..95e6879a6c2d 100644 --- a/datafusion/physical-expr/src/expressions/no_op.rs +++ b/datafusion/physical-expr/src/expressions/no_op.rs @@ -28,7 +28,7 @@ use arrow::{ use crate::physical_expr::down_cast_any_ref; use crate::PhysicalExpr; -use datafusion_common::{plan_err, DataFusionError, Result}; +use datafusion_common::{internal_err, DataFusionError, Result}; use datafusion_expr::ColumnarValue; /// A place holder expression, can not be evaluated. @@ -65,7 +65,7 @@ impl PhysicalExpr for NoOp { } fn evaluate(&self, _batch: &RecordBatch) -> Result { - plan_err!("NoOp::evaluate() should not be called") + internal_err!("NoOp::evaluate() should not be called") } fn children(&self) -> Vec> { diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index f23b45e26a03..b66bac41014d 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -74,15 +74,20 @@ pub fn create_physical_expr( // so we don't have to pay a per-array/batch cost. BuiltinScalarFunction::ToTimestamp => { Arc::new(match input_phy_exprs[0].data_type(input_schema) { - Ok(DataType::Int64) | Ok(DataType::Timestamp(_, None)) => { - |col_values: &[ColumnarValue]| { - cast_column( - &col_values[0], - &DataType::Timestamp(TimeUnit::Nanosecond, None), - None, - ) - } - } + Ok(DataType::Int64) => |col_values: &[ColumnarValue]| { + cast_column( + &col_values[0], + &DataType::Timestamp(TimeUnit::Second, None), + None, + ) + }, + Ok(DataType::Timestamp(_, None)) => |col_values: &[ColumnarValue]| { + cast_column( + &col_values[0], + &DataType::Timestamp(TimeUnit::Nanosecond, None), + None, + ) + }, Ok(DataType::Utf8) => datetime_expressions::to_timestamp, other => { return internal_err!( @@ -129,6 +134,25 @@ pub fn create_physical_expr( } }) } + BuiltinScalarFunction::ToTimestampNanos => { + Arc::new(match input_phy_exprs[0].data_type(input_schema) { + Ok(DataType::Int64) | Ok(DataType::Timestamp(_, None)) => { + |col_values: &[ColumnarValue]| { + cast_column( + &col_values[0], + &DataType::Timestamp(TimeUnit::Nanosecond, None), + None, + ) + } + } + Ok(DataType::Utf8) => datetime_expressions::to_timestamp_nanos, + other => { + return internal_err!( + "Unsupported data type {other:?} for function to_timestamp_nanos" + ); + } + }) + } BuiltinScalarFunction::ToTimestampSeconds => Arc::new({ match input_phy_exprs[0].data_type(input_schema) { Ok(DataType::Int64) | Ok(DataType::Timestamp(_, None)) => { @@ -333,6 +357,8 @@ where ColumnarValue::Array(a) => Some(a.len()), }); + let is_scalar = len.is_none(); + let inferred_length = len.unwrap_or(1); let args = args .iter() @@ -349,7 +375,14 @@ where .collect::>(); let result = (inner)(&args); - result.map(ColumnarValue::Array) + + if is_scalar { + // If all inputs are scalar, keeps output as scalar + let result = result.and_then(|arr| ScalarValue::try_from_array(&arr, 0)); + result.map(ColumnarValue::Scalar) + } else { + result.map(ColumnarValue::Array) + } }) } diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs index 977542bd8e66..fffa8f602d87 100644 --- a/datafusion/physical-expr/src/lib.rs +++ b/datafusion/physical-expr/src/lib.rs @@ -53,23 +53,16 @@ pub use aggregate::groups_accumulator::{ }; pub use aggregate::AggregateExpr; pub use analysis::{analyze, AnalysisContext, ExprBoundaries}; -pub use equivalence::{ - add_offset_to_expr, add_offset_to_lex_ordering, - ordering_equivalence_properties_helper, project_equivalence_properties, - project_ordering_equivalence_properties, EquivalenceProperties, EquivalentClass, - OrderingEquivalenceProperties, OrderingEquivalentClass, -}; - +pub use equivalence::EquivalenceProperties; pub use partitioning::{Distribution, Partitioning}; -pub use physical_expr::{physical_exprs_contains, PhysicalExpr, PhysicalExprRef}; +pub use physical_expr::{ + physical_exprs_bag_equal, physical_exprs_contains, physical_exprs_equal, + PhysicalExpr, PhysicalExprRef, +}; pub use planner::create_physical_expr; pub use scalar_function::ScalarFunctionExpr; pub use sort_expr::{ - LexOrdering, LexOrderingRef, LexOrderingReq, PhysicalSortExpr, + LexOrdering, LexOrderingRef, LexRequirement, LexRequirementRef, PhysicalSortExpr, PhysicalSortRequirement, }; -pub use sort_properties::update_ordering; -pub use utils::{ - expr_list_eq_any_order, expr_list_eq_strict_order, - normalize_out_expr_with_columns_map, reverse_order_bys, split_conjunction, -}; +pub use utils::{reverse_order_bys, split_conjunction}; diff --git a/datafusion/physical-expr/src/partitioning.rs b/datafusion/physical-expr/src/partitioning.rs index 773eac40dc8a..6a8fca4a1543 100644 --- a/datafusion/physical-expr/src/partitioning.rs +++ b/datafusion/physical-expr/src/partitioning.rs @@ -20,7 +20,7 @@ use std::fmt; use std::sync::Arc; -use crate::{expr_list_eq_strict_order, EquivalenceProperties, PhysicalExpr}; +use crate::{physical_exprs_equal, EquivalenceProperties, PhysicalExpr}; /// Partitioning schemes supported by operators. #[derive(Debug, Clone)] @@ -66,7 +66,7 @@ impl Partitioning { pub fn satisfy EquivalenceProperties>( &self, required: Distribution, - equal_properties: F, + eq_properties: F, ) -> bool { match required { Distribution::UnspecifiedDistribution => true, @@ -78,31 +78,28 @@ impl Partitioning { // then we need to have the partition count and hash functions validation. Partitioning::Hash(partition_exprs, _) => { let fast_match = - expr_list_eq_strict_order(&required_exprs, partition_exprs); + physical_exprs_equal(&required_exprs, partition_exprs); // If the required exprs do not match, need to leverage the eq_properties provided by the child - // and normalize both exprs based on the eq_properties + // and normalize both exprs based on the equivalent groups. if !fast_match { - let eq_properties = equal_properties(); - let eq_classes = eq_properties.classes(); - if !eq_classes.is_empty() { + let eq_properties = eq_properties(); + let eq_groups = eq_properties.eq_group(); + if !eq_groups.is_empty() { let normalized_required_exprs = required_exprs .iter() - .map(|e| eq_properties.normalize_expr(e.clone())) + .map(|e| eq_groups.normalize_expr(e.clone())) .collect::>(); let normalized_partition_exprs = partition_exprs .iter() - .map(|e| eq_properties.normalize_expr(e.clone())) + .map(|e| eq_groups.normalize_expr(e.clone())) .collect::>(); - expr_list_eq_strict_order( + return physical_exprs_equal( &normalized_required_exprs, &normalized_partition_exprs, - ) - } else { - fast_match + ); } - } else { - fast_match } + fast_match } _ => false, } @@ -120,7 +117,7 @@ impl PartialEq for Partitioning { Partitioning::RoundRobinBatch(count2), ) if count1 == count2 => true, (Partitioning::Hash(exprs1, count1), Partitioning::Hash(exprs2, count2)) - if expr_list_eq_strict_order(exprs1, exprs2) && (count1 == count2) => + if physical_exprs_equal(exprs1, exprs2) && (count1 == count2) => { true } @@ -158,15 +155,13 @@ impl Distribution { #[cfg(test)] mod tests { - use crate::expressions::Column; + use std::sync::Arc; use super::*; - use arrow::datatypes::DataType; - use arrow::datatypes::Field; - use arrow::datatypes::Schema; - use datafusion_common::Result; + use crate::expressions::Column; - use std::sync::Arc; + use arrow::datatypes::{DataType, Field, Schema}; + use datafusion_common::Result; #[test] fn partitioning_satisfy_distribution() -> Result<()> { diff --git a/datafusion/physical-expr/src/physical_expr.rs b/datafusion/physical-expr/src/physical_expr.rs index 11fa6c899621..79cbe6828b64 100644 --- a/datafusion/physical-expr/src/physical_expr.rs +++ b/datafusion/physical-expr/src/physical_expr.rs @@ -32,6 +32,8 @@ use datafusion_common::utils::DataPtr; use datafusion_common::{internal_err, not_impl_err, DataFusionError, Result}; use datafusion_expr::ColumnarValue; +use itertools::izip; + /// Expression that can be evaluated against a RecordBatch /// A Physical expression knows its type, nullability and how to evaluate itself. pub trait PhysicalExpr: Send + Sync + Display + Debug + PartialEq { @@ -226,17 +228,79 @@ pub fn physical_exprs_contains( .any(|physical_expr| physical_expr.eq(expr)) } +/// Checks whether the given slices have any common entries. +pub fn have_common_entries( + lhs: &[Arc], + rhs: &[Arc], +) -> bool { + lhs.iter().any(|expr| physical_exprs_contains(rhs, expr)) +} + +/// Checks whether the given physical expression slices are equal. +pub fn physical_exprs_equal( + lhs: &[Arc], + rhs: &[Arc], +) -> bool { + lhs.len() == rhs.len() && izip!(lhs, rhs).all(|(lhs, rhs)| lhs.eq(rhs)) +} + +/// Checks whether the given physical expression slices are equal in the sense +/// of bags (multi-sets), disregarding their orderings. +pub fn physical_exprs_bag_equal( + lhs: &[Arc], + rhs: &[Arc], +) -> bool { + // TODO: Once we can use `HashMap`s with `Arc`, this + // function should use a `HashMap` to reduce computational complexity. + if lhs.len() == rhs.len() { + let mut rhs_vec = rhs.to_vec(); + for expr in lhs { + if let Some(idx) = rhs_vec.iter().position(|e| expr.eq(e)) { + rhs_vec.swap_remove(idx); + } else { + return false; + } + } + true + } else { + false + } +} + +/// This utility function removes duplicates from the given `exprs` vector. +/// Note that this function does not necessarily preserve its input ordering. +pub fn deduplicate_physical_exprs(exprs: &mut Vec>) { + // TODO: Once we can use `HashSet`s with `Arc`, this + // function should use a `HashSet` to reduce computational complexity. + // See issue: https://github.com/apache/arrow-datafusion/issues/8027 + let mut idx = 0; + while idx < exprs.len() { + let mut rest_idx = idx + 1; + while rest_idx < exprs.len() { + if exprs[idx].eq(&exprs[rest_idx]) { + exprs.swap_remove(rest_idx); + } else { + rest_idx += 1; + } + } + idx += 1; + } +} + #[cfg(test)] mod tests { use std::sync::Arc; use crate::expressions::{Column, Literal}; - use crate::physical_expr::{physical_exprs_contains, PhysicalExpr}; + use crate::physical_expr::{ + deduplicate_physical_exprs, have_common_entries, physical_exprs_bag_equal, + physical_exprs_contains, physical_exprs_equal, PhysicalExpr, + }; - use datafusion_common::{Result, ScalarValue}; + use datafusion_common::ScalarValue; #[test] - fn test_physical_exprs_contains() -> Result<()> { + fn test_physical_exprs_contains() { let lit_true = Arc::new(Literal::new(ScalarValue::Boolean(Some(true)))) as Arc; let lit_false = Arc::new(Literal::new(ScalarValue::Boolean(Some(false)))) @@ -268,6 +332,142 @@ mod tests { // below expressions are not inside physical_exprs assert!(!physical_exprs_contains(&physical_exprs, &col_c_expr)); assert!(!physical_exprs_contains(&physical_exprs, &lit1)); - Ok(()) + } + + #[test] + fn test_have_common_entries() { + let lit_true = Arc::new(Literal::new(ScalarValue::Boolean(Some(true)))) + as Arc; + let lit_false = Arc::new(Literal::new(ScalarValue::Boolean(Some(false)))) + as Arc; + let lit2 = + Arc::new(Literal::new(ScalarValue::Int32(Some(2)))) as Arc; + let lit1 = + Arc::new(Literal::new(ScalarValue::Int32(Some(1)))) as Arc; + let col_b_expr = Arc::new(Column::new("b", 1)) as Arc; + + let vec1 = vec![lit_true.clone(), lit_false.clone()]; + let vec2 = vec![lit_true.clone(), col_b_expr.clone()]; + let vec3 = vec![lit2.clone(), lit1.clone()]; + + // lit_true is common + assert!(have_common_entries(&vec1, &vec2)); + // there is no common entry + assert!(!have_common_entries(&vec1, &vec3)); + assert!(!have_common_entries(&vec2, &vec3)); + } + + #[test] + fn test_physical_exprs_equal() { + let lit_true = Arc::new(Literal::new(ScalarValue::Boolean(Some(true)))) + as Arc; + let lit_false = Arc::new(Literal::new(ScalarValue::Boolean(Some(false)))) + as Arc; + let lit1 = + Arc::new(Literal::new(ScalarValue::Int32(Some(1)))) as Arc; + let lit2 = + Arc::new(Literal::new(ScalarValue::Int32(Some(2)))) as Arc; + let col_b_expr = Arc::new(Column::new("b", 1)) as Arc; + + let vec1 = vec![lit_true.clone(), lit_false.clone()]; + let vec2 = vec![lit_true.clone(), col_b_expr.clone()]; + let vec3 = vec![lit2.clone(), lit1.clone()]; + let vec4 = vec![lit_true.clone(), lit_false.clone()]; + + // these vectors are same + assert!(physical_exprs_equal(&vec1, &vec1)); + assert!(physical_exprs_equal(&vec1, &vec4)); + assert!(physical_exprs_bag_equal(&vec1, &vec1)); + assert!(physical_exprs_bag_equal(&vec1, &vec4)); + + // these vectors are different + assert!(!physical_exprs_equal(&vec1, &vec2)); + assert!(!physical_exprs_equal(&vec1, &vec3)); + assert!(!physical_exprs_bag_equal(&vec1, &vec2)); + assert!(!physical_exprs_bag_equal(&vec1, &vec3)); + } + + #[test] + fn test_physical_exprs_set_equal() { + let list1: Vec> = vec![ + Arc::new(Column::new("a", 0)), + Arc::new(Column::new("a", 0)), + Arc::new(Column::new("b", 1)), + ]; + let list2: Vec> = vec![ + Arc::new(Column::new("b", 1)), + Arc::new(Column::new("b", 1)), + Arc::new(Column::new("a", 0)), + ]; + assert!(!physical_exprs_bag_equal( + list1.as_slice(), + list2.as_slice() + )); + assert!(!physical_exprs_bag_equal( + list2.as_slice(), + list1.as_slice() + )); + assert!(!physical_exprs_equal(list1.as_slice(), list2.as_slice())); + assert!(!physical_exprs_equal(list2.as_slice(), list1.as_slice())); + + let list3: Vec> = vec![ + Arc::new(Column::new("a", 0)), + Arc::new(Column::new("b", 1)), + Arc::new(Column::new("c", 2)), + Arc::new(Column::new("a", 0)), + Arc::new(Column::new("b", 1)), + ]; + let list4: Vec> = vec![ + Arc::new(Column::new("b", 1)), + Arc::new(Column::new("b", 1)), + Arc::new(Column::new("a", 0)), + Arc::new(Column::new("c", 2)), + Arc::new(Column::new("a", 0)), + ]; + assert!(physical_exprs_bag_equal(list3.as_slice(), list4.as_slice())); + assert!(physical_exprs_bag_equal(list4.as_slice(), list3.as_slice())); + assert!(physical_exprs_bag_equal(list3.as_slice(), list3.as_slice())); + assert!(physical_exprs_bag_equal(list4.as_slice(), list4.as_slice())); + assert!(!physical_exprs_equal(list3.as_slice(), list4.as_slice())); + assert!(!physical_exprs_equal(list4.as_slice(), list3.as_slice())); + assert!(physical_exprs_bag_equal(list3.as_slice(), list3.as_slice())); + assert!(physical_exprs_bag_equal(list4.as_slice(), list4.as_slice())); + } + + #[test] + fn test_deduplicate_physical_exprs() { + let lit_true = &(Arc::new(Literal::new(ScalarValue::Boolean(Some(true)))) + as Arc); + let lit_false = &(Arc::new(Literal::new(ScalarValue::Boolean(Some(false)))) + as Arc); + let lit4 = &(Arc::new(Literal::new(ScalarValue::Int32(Some(4)))) + as Arc); + let lit2 = &(Arc::new(Literal::new(ScalarValue::Int32(Some(2)))) + as Arc); + let col_a_expr = &(Arc::new(Column::new("a", 0)) as Arc); + let col_b_expr = &(Arc::new(Column::new("b", 1)) as Arc); + + // First vector in the tuple is arguments, second one is the expected value. + let test_cases = vec![ + // ---------- TEST CASE 1----------// + ( + vec![ + lit_true, lit_false, lit4, lit2, col_a_expr, col_a_expr, col_b_expr, + lit_true, lit2, + ], + vec![lit_true, lit_false, lit4, lit2, col_a_expr, col_b_expr], + ), + // ---------- TEST CASE 2----------// + ( + vec![lit_true, lit_true, lit_false, lit4], + vec![lit_true, lit4, lit_false], + ), + ]; + for (exprs, expected) in test_cases { + let mut exprs = exprs.into_iter().cloned().collect::>(); + let expected = expected.into_iter().cloned().collect::>(); + deduplicate_physical_exprs(&mut exprs); + assert!(physical_exprs_equal(&exprs, &expected)); + } } } diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index 9a74c2ca64d1..64c1d0be0455 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -448,3 +448,37 @@ pub fn create_physical_expr( } } } + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::{ArrayRef, BooleanArray, RecordBatch, StringArray}; + use arrow_schema::{DataType, Field, Schema}; + use datafusion_common::{DFSchema, Result}; + use datafusion_expr::{col, left, Literal}; + + #[test] + fn test_create_physical_expr_scalar_input_output() -> Result<()> { + let expr = col("letter").eq(left("APACHE".lit(), 1i64.lit())); + + let schema = Schema::new(vec![Field::new("letter", DataType::Utf8, false)]); + let df_schema = DFSchema::try_from_qualified_schema("data", &schema)?; + let p = create_physical_expr(&expr, &df_schema, &schema, &ExecutionProps::new())?; + + let batch = RecordBatch::try_new( + Arc::new(schema), + vec![Arc::new(StringArray::from_iter_values(vec![ + "A", "B", "C", "D", + ]))], + )?; + let result = p.evaluate(&batch)?; + let result = result.into_array(4); + + assert_eq!( + &result, + &(Arc::new(BooleanArray::from(vec![true, false, false, false,])) as ArrayRef) + ); + + Ok(()) + } +} diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs index 43598ce56489..5acd5dcf2336 100644 --- a/datafusion/physical-expr/src/scalar_function.rs +++ b/datafusion/physical-expr/src/scalar_function.rs @@ -30,15 +30,13 @@ //! to a function that supports f64, it is coerced to f64. use std::any::Any; -use std::fmt::Debug; -use std::fmt::{self, Formatter}; +use std::fmt::{self, Debug, Formatter}; use std::hash::{Hash, Hasher}; use std::sync::Arc; use crate::functions::out_ordering; -use crate::physical_expr::down_cast_any_ref; +use crate::physical_expr::{down_cast_any_ref, physical_exprs_equal}; use crate::sort_properties::SortProperties; -use crate::utils::expr_list_eq_strict_order; use crate::PhysicalExpr; use arrow::datatypes::{DataType, Schema}; @@ -195,7 +193,7 @@ impl PartialEq for ScalarFunctionExpr { .downcast_ref::() .map(|x| { self.name == x.name - && expr_list_eq_strict_order(&self.args, &x.args) + && physical_exprs_equal(&self.args, &x.args) && self.return_type == x.return_type }) .unwrap_or(false) diff --git a/datafusion/physical-expr/src/sort_expr.rs b/datafusion/physical-expr/src/sort_expr.rs index 74179ba5947c..664a6b65b7f7 100644 --- a/datafusion/physical-expr/src/sort_expr.rs +++ b/datafusion/physical-expr/src/sort_expr.rs @@ -25,8 +25,8 @@ use crate::PhysicalExpr; use arrow::compute::kernels::sort::{SortColumn, SortOptions}; use arrow::record_batch::RecordBatch; -use datafusion_common::plan_err; -use datafusion_common::{DataFusionError, Result}; +use arrow_schema::Schema; +use datafusion_common::{exec_err, DataFusionError, Result}; use datafusion_expr::ColumnarValue; /// Represents Sort operation for a column in a RecordBatch @@ -66,7 +66,7 @@ impl PhysicalSortExpr { let array_to_sort = match value_to_sort { ColumnarValue::Array(array) => array, ColumnarValue::Scalar(scalar) => { - return plan_err!( + return exec_err!( "Sort operation is not applicable to scalar value {scalar}" ); } @@ -77,18 +77,26 @@ impl PhysicalSortExpr { }) } - /// Check whether sort expression satisfies [`PhysicalSortRequirement`]. - /// - /// If sort options is Some in `PhysicalSortRequirement`, `expr` - /// and `options` field are compared for equality. - /// - /// If sort options is None in `PhysicalSortRequirement`, only - /// `expr` is compared for equality. - pub fn satisfy(&self, requirement: &PhysicalSortRequirement) -> bool { + /// Checks whether this sort expression satisfies the given `requirement`. + /// If sort options are unspecified in `requirement`, only expressions are + /// compared for inequality. + pub fn satisfy( + &self, + requirement: &PhysicalSortRequirement, + schema: &Schema, + ) -> bool { + // If the column is not nullable, NULLS FIRST/LAST is not important. + let nullable = self.expr.nullable(schema).unwrap_or(true); self.expr.eq(&requirement.expr) - && requirement - .options - .map_or(true, |opts| self.options == opts) + && if nullable { + requirement + .options + .map_or(true, |opts| self.options == opts) + } else { + requirement + .options + .map_or(true, |opts| self.options.descending == opts.descending) + } } /// Returns a [`Display`]able list of `PhysicalSortExpr`. @@ -248,11 +256,18 @@ fn to_str(options: &SortOptions) -> &str { } } -///`LexOrdering` is a type alias for lexicographical ordering definition`Vec` +///`LexOrdering` is an alias for the type `Vec`, which represents +/// a lexicographical ordering. pub type LexOrdering = Vec; -///`LexOrderingRef` is a type alias for lexicographical ordering reference &`[PhysicalSortExpr]` +///`LexOrderingRef` is an alias for the type &`[PhysicalSortExpr]`, which represents +/// a reference to a lexicographical ordering. pub type LexOrderingRef<'a> = &'a [PhysicalSortExpr]; -///`LexOrderingReq` is a type alias for lexicographical ordering requirement definition`Vec` -pub type LexOrderingReq = Vec; +///`LexRequirement` is an alias for the type `Vec`, which +/// represents a lexicographical ordering requirement. +pub type LexRequirement = Vec; + +///`LexRequirementRef` is an alias for the type &`[PhysicalSortRequirement]`, which +/// represents a reference to a lexicographical ordering requirement. +pub type LexRequirementRef<'a> = &'a [PhysicalSortRequirement]; diff --git a/datafusion/physical-expr/src/sort_properties.rs b/datafusion/physical-expr/src/sort_properties.rs index 097f491cb979..a3b201f84e9d 100644 --- a/datafusion/physical-expr/src/sort_properties.rs +++ b/datafusion/physical-expr/src/sort_properties.rs @@ -17,14 +17,10 @@ use std::{ops::Neg, sync::Arc}; -use crate::expressions::Column; -use crate::utils::get_indices_of_matching_sort_exprs_with_order_eq; -use crate::{ - EquivalenceProperties, OrderingEquivalenceProperties, PhysicalExpr, PhysicalSortExpr, -}; +use crate::PhysicalExpr; use arrow_schema::SortOptions; -use datafusion_common::tree_node::{Transformed, TreeNode, VisitRecursion}; +use datafusion_common::tree_node::{TreeNode, VisitRecursion}; use datafusion_common::Result; use itertools::Itertools; @@ -155,37 +151,36 @@ impl Neg for SortProperties { #[derive(Debug)] pub struct ExprOrdering { pub expr: Arc, - pub state: Option, - pub children_states: Option>, + pub state: SortProperties, + pub children_states: Vec, } impl ExprOrdering { + /// Creates a new [`ExprOrdering`] with [`SortProperties::Unordered`] states + /// for `expr` and its children. pub fn new(expr: Arc) -> Self { + let size = expr.children().len(); Self { expr, - state: None, - children_states: None, + state: SortProperties::Unordered, + children_states: vec![SortProperties::Unordered; size], } } - pub fn children(&self) -> Vec { + /// Updates this [`ExprOrdering`]'s children states with the given states. + pub fn with_new_children(mut self, children_states: Vec) -> Self { + self.children_states = children_states; + self + } + + /// Creates new [`ExprOrdering`] objects for each child of the expression. + pub fn children_expr_orderings(&self) -> Vec { self.expr .children() .into_iter() .map(ExprOrdering::new) .collect() } - - pub fn new_with_children( - children_states: Vec, - parent_expr: Arc, - ) -> Self { - Self { - expr: parent_expr, - state: None, - children_states: Some(children_states), - } - } } impl TreeNode for ExprOrdering { @@ -193,7 +188,7 @@ impl TreeNode for ExprOrdering { where F: FnMut(&Self) -> Result, { - for child in self.children() { + for child in self.children_expr_orderings() { match op(&child)? { VisitRecursion::Continue => {} VisitRecursion::Skip => return Ok(VisitRecursion::Continue), @@ -207,71 +202,21 @@ impl TreeNode for ExprOrdering { where F: FnMut(Self) -> Result, { - let children = self.children(); - if children.is_empty() { + if self.children_states.is_empty() { Ok(self) } else { - Ok(ExprOrdering::new_with_children( - children + let child_expr_orderings = self.children_expr_orderings(); + // After mapping over the children, the function `F` applies to the + // current object and updates its state. + Ok(self.with_new_children( + child_expr_orderings .into_iter() + // Update children states after this transformation: .map(transform) - .map_ok(|c| c.state.unwrap_or(SortProperties::Unordered)) + // Extract the state (i.e. sort properties) information: + .map_ok(|c| c.state) .collect::>>()?, - self.expr, )) } } } - -/// Calculates the [`SortProperties`] of a given [`ExprOrdering`] node. -/// The node is either a leaf node, or an intermediate node: -/// - If it is a leaf node, the children states are `None`. We directly find -/// the order of the node by looking at the given sort expression and equivalence -/// properties if it is a `Column` leaf, or we mark it as unordered. In the case -/// of a `Literal` leaf, we mark it as singleton so that it can cooperate with -/// some ordered columns at the upper steps. -/// - If it is an intermediate node, the children states matter. Each `PhysicalExpr` -/// and operator has its own rules about how to propagate the children orderings. -/// However, before the children order propagation, it is checked that whether -/// the intermediate node can be directly matched with the sort expression. If there -/// is a match, the sort expression emerges at that node immediately, discarding -/// the order coming from the children. -pub fn update_ordering( - mut node: ExprOrdering, - sort_expr: &PhysicalSortExpr, - equal_properties: &EquivalenceProperties, - ordering_equal_properties: &OrderingEquivalenceProperties, -) -> Result> { - // If we can directly match a sort expr with the current node, we can set - // its state and return early. - // TODO: If there is a PhysicalExpr other than a Column at this node (e.g. - // a BinaryExpr like a + b), and there is an ordering equivalence of - // it (let's say like c + d), we actually can find it at this step. - if sort_expr.expr.eq(&node.expr) { - node.state = Some(SortProperties::Ordered(sort_expr.options)); - return Ok(Transformed::Yes(node)); - } - - if let Some(children_sort_options) = &node.children_states { - // We have an intermediate (non-leaf) node, account for its children: - node.state = Some(node.expr.get_ordering(children_sort_options)); - } else if let Some(column) = node.expr.as_any().downcast_ref::() { - // We have a Column, which is one of the two possible leaf node types: - node.state = get_indices_of_matching_sort_exprs_with_order_eq( - &[sort_expr.clone()], - &[column.clone()], - equal_properties, - ordering_equal_properties, - ) - .map(|(sort_options, _)| { - SortProperties::Ordered(SortOptions { - descending: sort_options[0].descending, - nulls_first: sort_options[0].nulls_first, - }) - }); - } else { - // We have a Literal, which is the other possible leaf node type: - node.state = Some(node.expr.get_ordering(&[])); - } - Ok(Transformed::Yes(node)) -} diff --git a/datafusion/physical-expr/src/utils.rs b/datafusion/physical-expr/src/utils.rs index b2a6bb5ca6d2..2f4ee89463a8 100644 --- a/datafusion/physical-expr/src/utils.rs +++ b/datafusion/physical-expr/src/utils.rs @@ -15,62 +15,25 @@ // specific language governing permissions and limitations // under the License. -use crate::equivalence::{EquivalenceProperties, OrderingEquivalenceProperties}; -use crate::expressions::{BinaryExpr, Column, UnKnownColumn}; -use crate::sort_properties::{ExprOrdering, SortProperties}; -use crate::update_ordering; -use crate::{PhysicalExpr, PhysicalSortExpr, PhysicalSortRequirement}; +use std::borrow::Borrow; +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; + +use crate::expressions::{BinaryExpr, Column}; +use crate::{PhysicalExpr, PhysicalSortExpr}; use arrow::array::{make_array, Array, ArrayRef, BooleanArray, MutableArrayData}; use arrow::compute::{and_kleene, is_not_null, SlicesIterator}; use arrow::datatypes::SchemaRef; -use arrow_schema::SortOptions; use datafusion_common::tree_node::{ Transformed, TreeNode, TreeNodeRewriter, VisitRecursion, }; -use datafusion_common::utils::longest_consecutive_prefix; use datafusion_common::Result; use datafusion_expr::Operator; use itertools::Itertools; use petgraph::graph::NodeIndex; use petgraph::stable_graph::StableGraph; -use std::borrow::Borrow; -use std::collections::HashMap; -use std::collections::HashSet; -use std::sync::Arc; - -/// Compare the two expr lists are equal no matter the order. -/// For example two InListExpr can be considered to be equals no matter the order: -/// -/// In('a','b','c') == In('c','b','a') -pub fn expr_list_eq_any_order( - list1: &[Arc], - list2: &[Arc], -) -> bool { - if list1.len() == list2.len() { - let mut expr_vec1 = list1.to_vec(); - let mut expr_vec2 = list2.to_vec(); - while let Some(expr1) = expr_vec1.pop() { - if let Some(idx) = expr_vec2.iter().position(|expr2| expr1.eq(expr2)) { - expr_vec2.swap_remove(idx); - } else { - break; - } - } - expr_vec1.is_empty() && expr_vec2.is_empty() - } else { - false - } -} - -/// Strictly compare the two expr lists are equal in the given order. -pub fn expr_list_eq_strict_order( - list1: &[Arc], - list2: &[Arc], -) -> bool { - list1.len() == list2.len() && list1.iter().zip(list2.iter()).all(|(e1, e2)| e1.eq(e2)) -} /// Assume the predicate is in the form of CNF, split the predicate to a Vec of PhysicalExprs. /// @@ -103,219 +66,6 @@ fn split_conjunction_impl<'a>( } } -/// Normalize the output expressions based on Columns Map. -/// -/// If there is a mapping in Columns Map, replace the Column in the output expressions with the 1st Column in the Columns Map. -/// Otherwise, replace the Column with a place holder of [UnKnownColumn] -/// -pub fn normalize_out_expr_with_columns_map( - expr: Arc, - columns_map: &HashMap>, -) -> Arc { - expr.clone() - .transform(&|expr| { - let normalized_form = match expr.as_any().downcast_ref::() { - Some(column) => columns_map - .get(column) - .map(|c| Arc::new(c[0].clone()) as _) - .or_else(|| Some(Arc::new(UnKnownColumn::new(column.name())) as _)), - None => None, - }; - Ok(if let Some(normalized_form) = normalized_form { - Transformed::Yes(normalized_form) - } else { - Transformed::No(expr) - }) - }) - .unwrap_or(expr) -} - -/// Transform `sort_exprs` vector, to standardized version using `eq_properties` and `ordering_eq_properties` -/// Assume `eq_properties` states that `Column a` and `Column b` are aliases. -/// Also assume `ordering_eq_properties` states that ordering `vec![d ASC]` and `vec![a ASC, c ASC]` are -/// ordering equivalent (in the sense that both describe the ordering of the table). -/// If the `sort_exprs` input to this function were `vec![b ASC, c ASC]`, -/// This function converts `sort_exprs` `vec![b ASC, c ASC]` to first `vec![a ASC, c ASC]` after considering `eq_properties` -/// Then converts `vec![a ASC, c ASC]` to `vec![d ASC]` after considering `ordering_eq_properties`. -/// Standardized version `vec![d ASC]` is used in subsequent operations. -fn normalize_sort_exprs( - sort_exprs: &[PhysicalSortExpr], - eq_properties: &EquivalenceProperties, - ordering_eq_properties: &OrderingEquivalenceProperties, -) -> Vec { - let sort_requirements = PhysicalSortRequirement::from_sort_exprs(sort_exprs.iter()); - let normalized_exprs = normalize_sort_requirements( - &sort_requirements, - eq_properties, - ordering_eq_properties, - ); - PhysicalSortRequirement::to_sort_exprs(normalized_exprs) -} - -/// Transform `sort_reqs` vector, to standardized version using `eq_properties` and `ordering_eq_properties` -/// Assume `eq_properties` states that `Column a` and `Column b` are aliases. -/// Also assume `ordering_eq_properties` states that ordering `vec![d ASC]` and `vec![a ASC, c ASC]` are -/// ordering equivalent (in the sense that both describe the ordering of the table). -/// If the `sort_reqs` input to this function were `vec![b Some(ASC), c None]`, -/// This function converts `sort_exprs` `vec![b Some(ASC), c None]` to first `vec![a Some(ASC), c None]` after considering `eq_properties` -/// Then converts `vec![a Some(ASC), c None]` to `vec![d Some(ASC)]` after considering `ordering_eq_properties`. -/// Standardized version `vec![d Some(ASC)]` is used in subsequent operations. -fn normalize_sort_requirements( - sort_reqs: &[PhysicalSortRequirement], - eq_properties: &EquivalenceProperties, - ordering_eq_properties: &OrderingEquivalenceProperties, -) -> Vec { - let normalized_sort_reqs = eq_properties.normalize_sort_requirements(sort_reqs); - ordering_eq_properties.normalize_sort_requirements(&normalized_sort_reqs) -} - -/// Checks whether given ordering requirements are satisfied by provided [PhysicalSortExpr]s. -pub fn ordering_satisfy< - F: FnOnce() -> EquivalenceProperties, - F2: FnOnce() -> OrderingEquivalenceProperties, ->( - provided: Option<&[PhysicalSortExpr]>, - required: Option<&[PhysicalSortExpr]>, - equal_properties: F, - ordering_equal_properties: F2, -) -> bool { - match (provided, required) { - (_, None) => true, - (None, Some(_)) => false, - (Some(provided), Some(required)) => ordering_satisfy_concrete( - provided, - required, - equal_properties, - ordering_equal_properties, - ), - } -} - -/// Checks whether the required [`PhysicalSortExpr`]s are satisfied by the -/// provided [`PhysicalSortExpr`]s. -pub fn ordering_satisfy_concrete< - F: FnOnce() -> EquivalenceProperties, - F2: FnOnce() -> OrderingEquivalenceProperties, ->( - provided: &[PhysicalSortExpr], - required: &[PhysicalSortExpr], - equal_properties: F, - ordering_equal_properties: F2, -) -> bool { - let oeq_properties = ordering_equal_properties(); - let eq_properties = equal_properties(); - let required_normalized = - normalize_sort_exprs(required, &eq_properties, &oeq_properties); - let provided_normalized = - normalize_sort_exprs(provided, &eq_properties, &oeq_properties); - if required_normalized.len() > provided_normalized.len() { - return false; - } - required_normalized - .into_iter() - .zip(provided_normalized) - .all(|(req, given)| given == req) -} - -/// Checks whether the given [`PhysicalSortRequirement`]s are satisfied by the -/// provided [`PhysicalSortExpr`]s. -pub fn ordering_satisfy_requirement< - F: FnOnce() -> EquivalenceProperties, - F2: FnOnce() -> OrderingEquivalenceProperties, ->( - provided: Option<&[PhysicalSortExpr]>, - required: Option<&[PhysicalSortRequirement]>, - equal_properties: F, - ordering_equal_properties: F2, -) -> bool { - match (provided, required) { - (_, None) => true, - (None, Some(_)) => false, - (Some(provided), Some(required)) => ordering_satisfy_requirement_concrete( - provided, - required, - equal_properties, - ordering_equal_properties, - ), - } -} - -/// Checks whether the given [`PhysicalSortRequirement`]s are satisfied by the -/// provided [`PhysicalSortExpr`]s. -pub fn ordering_satisfy_requirement_concrete< - F: FnOnce() -> EquivalenceProperties, - F2: FnOnce() -> OrderingEquivalenceProperties, ->( - provided: &[PhysicalSortExpr], - required: &[PhysicalSortRequirement], - equal_properties: F, - ordering_equal_properties: F2, -) -> bool { - let oeq_properties = ordering_equal_properties(); - let eq_properties = equal_properties(); - let required_normalized = - normalize_sort_requirements(required, &eq_properties, &oeq_properties); - let provided_normalized = - normalize_sort_exprs(provided, &eq_properties, &oeq_properties); - if required_normalized.len() > provided_normalized.len() { - return false; - } - required_normalized - .into_iter() - .zip(provided_normalized) - .all(|(req, given)| given.satisfy(&req)) -} - -/// Checks whether the given [`PhysicalSortRequirement`]s are equal or more -/// specific than the provided [`PhysicalSortRequirement`]s. -pub fn requirements_compatible< - F: FnOnce() -> OrderingEquivalenceProperties, - F2: FnOnce() -> EquivalenceProperties, ->( - provided: Option<&[PhysicalSortRequirement]>, - required: Option<&[PhysicalSortRequirement]>, - ordering_equal_properties: F, - equal_properties: F2, -) -> bool { - match (provided, required) { - (_, None) => true, - (None, Some(_)) => false, - (Some(provided), Some(required)) => requirements_compatible_concrete( - provided, - required, - ordering_equal_properties, - equal_properties, - ), - } -} - -/// Checks whether the given [`PhysicalSortRequirement`]s are equal or more -/// specific than the provided [`PhysicalSortRequirement`]s. -fn requirements_compatible_concrete< - F: FnOnce() -> OrderingEquivalenceProperties, - F2: FnOnce() -> EquivalenceProperties, ->( - provided: &[PhysicalSortRequirement], - required: &[PhysicalSortRequirement], - ordering_equal_properties: F, - equal_properties: F2, -) -> bool { - let oeq_properties = ordering_equal_properties(); - let eq_properties = equal_properties(); - - let required_normalized = - normalize_sort_requirements(required, &eq_properties, &oeq_properties); - let provided_normalized = - normalize_sort_requirements(provided, &eq_properties, &oeq_properties); - if required_normalized.len() > provided_normalized.len() { - return false; - } - required_normalized - .into_iter() - .zip(provided_normalized) - .all(|(req, given)| given.compatible(&req)) -} - /// This function maps back requirement after ProjectionExec /// to the Executor for its input. // Specifically, `ProjectionExec` changes index of `Column`s in the schema of its input executor. @@ -358,19 +108,6 @@ pub fn convert_to_expr>( .collect() } -/// This function finds the indices of `targets` within `items`, taking into -/// account equivalences according to `equal_properties`. -pub fn get_indices_of_matching_exprs EquivalenceProperties>( - targets: &[Arc], - items: &[Arc], - equal_properties: F, -) -> Vec { - let eq_properties = equal_properties(); - let normalized_items = eq_properties.normalize_exprs(items); - let normalized_targets = eq_properties.normalize_exprs(targets); - get_indices_of_exprs_strict(normalized_targets, &normalized_items) -} - /// This function finds the indices of `targets` within `items` using strict /// equality. pub fn get_indices_of_exprs_strict>>( @@ -567,31 +304,6 @@ pub fn reverse_order_bys(order_bys: &[PhysicalSortExpr]) -> Vec EquivalenceProperties, - F2: Fn() -> OrderingEquivalenceProperties, ->( - req1: &'a [PhysicalSortExpr], - req2: &'a [PhysicalSortExpr], - eq_properties: F, - ordering_eq_properties: F2, -) -> Option<&'a [PhysicalSortExpr]> { - if ordering_satisfy_concrete(req1, req2, &eq_properties, &ordering_eq_properties) { - // Finer requirement is `provided`, since it satisfies the other: - return Some(req1); - } - if ordering_satisfy_concrete(req2, req1, &eq_properties, &ordering_eq_properties) { - // Finer requirement is `req`, since it satisfies the other: - return Some(req2); - } - // Neither `provided` nor `req` satisfies one another, they are incompatible. - None -} - /// Scatter `truthy` array by boolean mask. When the mask evaluates `true`, next values of `truthy` /// are taken, when the mask evaluates `false` values null values are filled. /// @@ -635,159 +347,6 @@ pub fn scatter(mask: &BooleanArray, truthy: &dyn Array) -> Result { Ok(make_array(data)) } -/// Return indices of each item in `required_exprs` inside `provided_exprs`. -/// All the items should be found inside `provided_exprs`. Found indices will -/// be a permutation of the range 0, 1, ..., N. For example, \[2,1,0\] is valid -/// (\[0,1,2\] is consecutive), but \[3,1,0\] is not valid (\[0,1,3\] is not -/// consecutive). -fn get_lexicographical_match_indices( - required_exprs: &[Arc], - provided_exprs: &[Arc], -) -> Option> { - let indices_of_equality = get_indices_of_exprs_strict(required_exprs, provided_exprs); - let mut ordered_indices = indices_of_equality.clone(); - ordered_indices.sort(); - let n_match = indices_of_equality.len(); - let first_n = longest_consecutive_prefix(ordered_indices); - (n_match == required_exprs.len() && first_n == n_match && n_match > 0) - .then_some(indices_of_equality) -} - -/// Attempts to find a full match between the required columns to be ordered (lexicographically), and -/// the provided sort options (lexicographically), while considering equivalence properties. -/// -/// It starts by normalizing members of both the required columns and the provided sort options. -/// If a full match is found, returns the sort options and indices of the matches. If no full match is found, -/// the function proceeds to check against ordering equivalence properties. If still no full match is found, -/// the function returns `None`. -pub fn get_indices_of_matching_sort_exprs_with_order_eq( - provided_sorts: &[PhysicalSortExpr], - required_columns: &[Column], - eq_properties: &EquivalenceProperties, - order_eq_properties: &OrderingEquivalenceProperties, -) -> Option<(Vec, Vec)> { - // Create a vector of `PhysicalSortRequirement`s from the required columns: - let sort_requirement_on_requirements = required_columns - .iter() - .map(|required_column| PhysicalSortRequirement { - expr: Arc::new(required_column.clone()) as _, - options: None, - }) - .collect::>(); - - let normalized_required = normalize_sort_requirements( - &sort_requirement_on_requirements, - eq_properties, - &OrderingEquivalenceProperties::new(order_eq_properties.schema()), - ); - let normalized_provided = normalize_sort_requirements( - &PhysicalSortRequirement::from_sort_exprs(provided_sorts.iter()), - eq_properties, - &OrderingEquivalenceProperties::new(order_eq_properties.schema()), - ); - - let provided_sorts = normalized_provided - .iter() - .map(|req| req.expr.clone()) - .collect::>(); - - let normalized_required_expr = normalized_required - .iter() - .map(|req| req.expr.clone()) - .collect::>(); - - if let Some(indices_of_equality) = - get_lexicographical_match_indices(&normalized_required_expr, &provided_sorts) - { - return Some(( - indices_of_equality - .iter() - .filter_map(|index| normalized_provided[*index].options) - .collect(), - indices_of_equality, - )); - } - - // We did not find all the expressions, consult ordering equivalence properties: - if let Some(oeq_class) = order_eq_properties.oeq_class() { - let head = oeq_class.head(); - for ordering in oeq_class.others().iter().chain(std::iter::once(head)) { - let order_eq_class_exprs = convert_to_expr(ordering); - if let Some(indices_of_equality) = get_lexicographical_match_indices( - &normalized_required_expr, - &order_eq_class_exprs, - ) { - return Some(( - indices_of_equality - .iter() - .map(|index| ordering[*index].options) - .collect(), - indices_of_equality, - )); - } - } - } - // If no match found, return `None`: - None -} - -/// Calculates the output orderings for a set of expressions within the context of a given -/// execution plan. The resulting orderings are all in the type of [`Column`], since these -/// expressions become [`Column`] after the projection step. The expressions having an alias -/// are renamed with those aliases in the returned [`PhysicalSortExpr`]'s. If an expression -/// is found to be unordered, the corresponding entry in the output vector is `None`. -/// -/// # Arguments -/// -/// * `expr` - A slice of tuples containing expressions and their corresponding aliases. -/// -/// * `input_output_ordering` - Output ordering of the input plan. -/// -/// * `input_equal_properties` - Equivalence properties of the columns in the input plan. -/// -/// * `input_ordering_equal_properties` - Ordering equivalence properties of the columns in the input plan. -/// -/// # Returns -/// -/// A `Result` containing a vector of optional [`PhysicalSortExpr`]'s. Each element of the -/// vector corresponds to an expression from the input slice. If an expression can be ordered, -/// the corresponding entry is `Some(PhysicalSortExpr)`. If an expression cannot be ordered, -/// the entry is `None`. -pub fn find_orderings_of_exprs( - expr: &[(Arc, String)], - input_output_ordering: Option<&[PhysicalSortExpr]>, - input_equal_properties: EquivalenceProperties, - input_ordering_equal_properties: OrderingEquivalenceProperties, -) -> Result>> { - let mut orderings: Vec> = vec![]; - if let Some(leading_ordering) = - input_output_ordering.and_then(|output_ordering| output_ordering.first()) - { - for (index, (expression, name)) in expr.iter().enumerate() { - let initial_expr = ExprOrdering::new(expression.clone()); - let transformed = initial_expr.transform_up(&|expr| { - update_ordering( - expr, - leading_ordering, - &input_equal_properties, - &input_ordering_equal_properties, - ) - })?; - if let Some(SortProperties::Ordered(sort_options)) = transformed.state { - orderings.push(Some(PhysicalSortExpr { - expr: Arc::new(Column::new(name, index)), - options: sort_options, - })); - } else { - orderings.push(None); - } - } - } else { - orderings.extend(expr.iter().map(|_| None)); - } - Ok(orderings) -} - /// Merge left and right sort expressions, checking for duplicates. pub fn merge_vectors( left: &[PhysicalSortExpr], @@ -803,15 +362,12 @@ pub fn merge_vectors( #[cfg(test)] mod tests { use std::fmt::{Display, Formatter}; - use std::ops::Not; use std::sync::Arc; use super::*; - use crate::equivalence::OrderingEquivalenceProperties; use crate::expressions::{binary, cast, col, in_list, lit, Column, Literal}; - use crate::{OrderingEquivalentClass, PhysicalSortExpr}; + use crate::PhysicalSortExpr; - use arrow::compute::SortOptions; use arrow_array::Int32Array; use arrow_schema::{DataType, Field, Schema}; use datafusion_common::cast::{as_boolean_array, as_int32_array}; @@ -858,86 +414,6 @@ mod tests { } } - // Generate a schema which consists of 5 columns (a, b, c, d, e) - fn create_test_schema() -> Result { - let a = Field::new("a", DataType::Int32, true); - let b = Field::new("b", DataType::Int32, true); - let c = Field::new("c", DataType::Int32, true); - let d = Field::new("d", DataType::Int32, true); - let e = Field::new("e", DataType::Int32, true); - let f = Field::new("f", DataType::Int32, true); - let schema = Arc::new(Schema::new(vec![a, b, c, d, e, f])); - - Ok(schema) - } - - fn create_test_params() -> Result<( - SchemaRef, - EquivalenceProperties, - OrderingEquivalenceProperties, - )> { - // Assume schema satisfies ordering a ASC NULLS LAST - // and d ASC NULLS LAST, b ASC NULLS LAST and e DESC NULLS FIRST, f ASC NULLS LAST, g ASC NULLS LAST - // Assume that column a and c are aliases. - let col_a = &Column::new("a", 0); - let col_b = &Column::new("b", 1); - let col_c = &Column::new("c", 2); - let col_d = &Column::new("d", 3); - let col_e = &Column::new("e", 4); - let col_f = &Column::new("f", 5); - let col_g = &Column::new("g", 6); - let option1 = SortOptions { - descending: false, - nulls_first: false, - }; - let option2 = SortOptions { - descending: true, - nulls_first: true, - }; - let test_schema = create_test_schema()?; - let mut eq_properties = EquivalenceProperties::new(test_schema.clone()); - eq_properties.add_equal_conditions((col_a, col_c)); - let mut ordering_eq_properties = - OrderingEquivalenceProperties::new(test_schema.clone()); - ordering_eq_properties.add_equal_conditions(( - &vec![PhysicalSortExpr { - expr: Arc::new(col_a.clone()), - options: option1, - }], - &vec![ - PhysicalSortExpr { - expr: Arc::new(col_d.clone()), - options: option1, - }, - PhysicalSortExpr { - expr: Arc::new(col_b.clone()), - options: option1, - }, - ], - )); - ordering_eq_properties.add_equal_conditions(( - &vec![PhysicalSortExpr { - expr: Arc::new(col_a.clone()), - options: option1, - }], - &vec![ - PhysicalSortExpr { - expr: Arc::new(col_e.clone()), - options: option2, - }, - PhysicalSortExpr { - expr: Arc::new(col_f.clone()), - options: option1, - }, - PhysicalSortExpr { - expr: Arc::new(col_g.clone()), - options: option1, - }, - ], - )); - Ok((test_schema, eq_properties, ordering_eq_properties)) - } - #[test] fn test_build_dag() -> Result<()> { let schema = Schema::new(vec![ @@ -1016,9 +492,7 @@ mod tests { } #[test] - fn test_get_indices_of_matching_exprs() { - let empty_schema = &Arc::new(Schema::empty()); - let equal_properties = || EquivalenceProperties::new(empty_schema.clone()); + fn test_get_indices_of_exprs_strict() { let list1: Vec> = vec![ Arc::new(Column::new("a", 0)), Arc::new(Column::new("b", 1)), @@ -1030,313 +504,8 @@ mod tests { Arc::new(Column::new("c", 2)), Arc::new(Column::new("a", 0)), ]; - assert_eq!( - get_indices_of_matching_exprs(&list1, &list2, equal_properties), - vec![2, 0, 1] - ); - assert_eq!( - get_indices_of_matching_exprs(&list2, &list1, equal_properties), - vec![1, 2, 0] - ); - } - - #[test] - fn expr_list_eq_test() -> Result<()> { - let list1: Vec> = vec![ - Arc::new(Column::new("a", 0)), - Arc::new(Column::new("a", 0)), - Arc::new(Column::new("b", 1)), - ]; - let list2: Vec> = vec![ - Arc::new(Column::new("b", 1)), - Arc::new(Column::new("b", 1)), - Arc::new(Column::new("a", 0)), - ]; - assert!(!expr_list_eq_any_order(list1.as_slice(), list2.as_slice())); - assert!(!expr_list_eq_any_order(list2.as_slice(), list1.as_slice())); - - assert!(!expr_list_eq_strict_order( - list1.as_slice(), - list2.as_slice() - )); - assert!(!expr_list_eq_strict_order( - list2.as_slice(), - list1.as_slice() - )); - - let list3: Vec> = vec![ - Arc::new(Column::new("a", 0)), - Arc::new(Column::new("b", 1)), - Arc::new(Column::new("c", 2)), - Arc::new(Column::new("a", 0)), - Arc::new(Column::new("b", 1)), - ]; - let list4: Vec> = vec![ - Arc::new(Column::new("b", 1)), - Arc::new(Column::new("b", 1)), - Arc::new(Column::new("a", 0)), - Arc::new(Column::new("c", 2)), - Arc::new(Column::new("a", 0)), - ]; - assert!(expr_list_eq_any_order(list3.as_slice(), list4.as_slice())); - assert!(expr_list_eq_any_order(list4.as_slice(), list3.as_slice())); - assert!(expr_list_eq_any_order(list3.as_slice(), list3.as_slice())); - assert!(expr_list_eq_any_order(list4.as_slice(), list4.as_slice())); - - assert!(!expr_list_eq_strict_order( - list3.as_slice(), - list4.as_slice() - )); - assert!(!expr_list_eq_strict_order( - list4.as_slice(), - list3.as_slice() - )); - assert!(expr_list_eq_any_order(list3.as_slice(), list3.as_slice())); - assert!(expr_list_eq_any_order(list4.as_slice(), list4.as_slice())); - - Ok(()) - } - - #[test] - fn test_ordering_satisfy() -> Result<()> { - let crude = vec![PhysicalSortExpr { - expr: Arc::new(Column::new("a", 0)), - options: SortOptions::default(), - }]; - let crude = Some(&crude[..]); - let finer = vec![ - PhysicalSortExpr { - expr: Arc::new(Column::new("a", 0)), - options: SortOptions::default(), - }, - PhysicalSortExpr { - expr: Arc::new(Column::new("b", 1)), - options: SortOptions::default(), - }, - ]; - let finer = Some(&finer[..]); - let empty_schema = &Arc::new(Schema::empty()); - assert!(ordering_satisfy( - finer, - crude, - || { EquivalenceProperties::new(empty_schema.clone()) }, - || { OrderingEquivalenceProperties::new(empty_schema.clone()) }, - )); - assert!(!ordering_satisfy( - crude, - finer, - || { EquivalenceProperties::new(empty_schema.clone()) }, - || { OrderingEquivalenceProperties::new(empty_schema.clone()) }, - )); - Ok(()) - } - - #[test] - fn test_ordering_satisfy_with_equivalence() -> Result<()> { - let col_a = &Column::new("a", 0); - let col_b = &Column::new("b", 1); - let col_c = &Column::new("c", 2); - let col_d = &Column::new("d", 3); - let col_e = &Column::new("e", 4); - let col_f = &Column::new("f", 5); - let col_g = &Column::new("g", 6); - let option1 = SortOptions { - descending: false, - nulls_first: false, - }; - let option2 = SortOptions { - descending: true, - nulls_first: true, - }; - // The schema is ordered by a ASC NULLS LAST, b ASC NULLS LAST - let provided = vec![ - PhysicalSortExpr { - expr: Arc::new(col_a.clone()), - options: option1, - }, - PhysicalSortExpr { - expr: Arc::new(col_b.clone()), - options: option1, - }, - ]; - let provided = Some(&provided[..]); - let (_test_schema, eq_properties, ordering_eq_properties) = create_test_params()?; - // First element in the tuple stores vector of requirement, second element is the expected return value for ordering_satisfy function - let requirements = vec![ - // `a ASC NULLS LAST`, expects `ordering_satisfy` to be `true`, since existing ordering `a ASC NULLS LAST, b ASC NULLS LAST` satisfies it - (vec![(col_a, option1)], true), - (vec![(col_a, option2)], false), - // Test whether equivalence works as expected - (vec![(col_c, option1)], true), - (vec![(col_c, option2)], false), - // Test whether ordering equivalence works as expected - (vec![(col_d, option1)], true), - (vec![(col_d, option1), (col_b, option1)], true), - (vec![(col_d, option2), (col_b, option1)], false), - ( - vec![(col_e, option2), (col_f, option1), (col_g, option1)], - true, - ), - (vec![(col_e, option2), (col_f, option1)], true), - (vec![(col_e, option1), (col_f, option1)], false), - (vec![(col_e, option2), (col_b, option1)], false), - (vec![(col_e, option1), (col_b, option1)], false), - ( - vec![ - (col_d, option1), - (col_b, option1), - (col_d, option1), - (col_b, option1), - ], - true, - ), - ( - vec![ - (col_d, option1), - (col_b, option1), - (col_e, option2), - (col_f, option1), - ], - true, - ), - ( - vec![ - (col_d, option1), - (col_b, option1), - (col_e, option2), - (col_b, option1), - ], - true, - ), - ( - vec![ - (col_d, option1), - (col_b, option1), - (col_d, option2), - (col_b, option1), - ], - true, - ), - ( - vec![ - (col_d, option1), - (col_b, option1), - (col_e, option1), - (col_f, option1), - ], - false, - ), - ( - vec![ - (col_d, option1), - (col_b, option1), - (col_e, option1), - (col_b, option1), - ], - false, - ), - (vec![(col_d, option1), (col_e, option2)], true), - ]; - - for (cols, expected) in requirements { - let err_msg = format!("Error in test case:{cols:?}"); - let required = cols - .into_iter() - .map(|(col, options)| PhysicalSortExpr { - expr: Arc::new(col.clone()), - options, - }) - .collect::>(); - - let required = Some(&required[..]); - assert_eq!( - ordering_satisfy( - provided, - required, - || eq_properties.clone(), - || ordering_eq_properties.clone(), - ), - expected, - "{err_msg}" - ); - } - Ok(()) - } - - fn convert_to_requirement( - in_data: &[(&Column, Option)], - ) -> Vec { - in_data - .iter() - .map(|(col, options)| { - PhysicalSortRequirement::new(Arc::new((*col).clone()) as _, *options) - }) - .collect::>() - } - - #[test] - fn test_normalize_sort_reqs() -> Result<()> { - let col_a = &Column::new("a", 0); - let col_b = &Column::new("b", 1); - let col_c = &Column::new("c", 2); - let col_d = &Column::new("d", 3); - let col_e = &Column::new("e", 4); - let col_f = &Column::new("f", 5); - let option1 = SortOptions { - descending: false, - nulls_first: false, - }; - let option2 = SortOptions { - descending: true, - nulls_first: true, - }; - // First element in the tuple stores vector of requirement, second element is the expected return value for ordering_satisfy function - let requirements = vec![ - (vec![(col_a, Some(option1))], vec![(col_a, Some(option1))]), - (vec![(col_a, Some(option2))], vec![(col_a, Some(option2))]), - (vec![(col_a, None)], vec![(col_a, Some(option1))]), - // Test whether equivalence works as expected - (vec![(col_c, Some(option1))], vec![(col_a, Some(option1))]), - (vec![(col_c, None)], vec![(col_a, Some(option1))]), - // Test whether ordering equivalence works as expected - ( - vec![(col_d, Some(option1)), (col_b, Some(option1))], - vec![(col_a, Some(option1))], - ), - ( - vec![(col_d, None), (col_b, None)], - vec![(col_a, Some(option1))], - ), - ( - vec![(col_e, Some(option2)), (col_f, Some(option1))], - vec![(col_a, Some(option1))], - ), - // We should be able to normalize in compatible requirements also (not exactly equal) - ( - vec![(col_e, Some(option2)), (col_f, None)], - vec![(col_a, Some(option1))], - ), - ( - vec![(col_e, None), (col_f, None)], - vec![(col_a, Some(option1))], - ), - ]; - - let (_test_schema, eq_properties, ordering_eq_properties) = create_test_params()?; - for (reqs, expected_normalized) in requirements.into_iter() { - let req = convert_to_requirement(&reqs); - let expected_normalized = convert_to_requirement(&expected_normalized); - - assert_eq!( - normalize_sort_requirements( - &req, - &eq_properties, - &ordering_eq_properties, - ), - expected_normalized - ); - } - Ok(()) + assert_eq!(get_indices_of_exprs_strict(&list1, &list2), vec![2, 0, 1]); + assert_eq!(get_indices_of_exprs_strict(&list2, &list1), vec![1, 2, 0]); } #[test] @@ -1376,174 +545,6 @@ mod tests { assert_eq!(actual.as_ref(), expected.as_any()); } - #[test] - fn test_normalize_expr_with_equivalence() -> Result<()> { - let col_a = &Column::new("a", 0); - let col_b = &Column::new("b", 1); - let col_c = &Column::new("c", 2); - let _col_d = &Column::new("d", 3); - let _col_e = &Column::new("e", 4); - // Assume that column a and c are aliases. - let (_test_schema, eq_properties, _ordering_eq_properties) = - create_test_params()?; - - let col_a_expr = Arc::new(col_a.clone()) as Arc; - let col_b_expr = Arc::new(col_b.clone()) as Arc; - let col_c_expr = Arc::new(col_c.clone()) as Arc; - // Test cases for equivalence normalization, - // First entry in the tuple is argument, second entry is expected result after normalization. - let expressions = vec![ - // Normalized version of the column a and c should go to a (since a is head) - (&col_a_expr, &col_a_expr), - (&col_c_expr, &col_a_expr), - // Cannot normalize column b - (&col_b_expr, &col_b_expr), - ]; - for (expr, expected_eq) in expressions { - assert!( - expected_eq.eq(&eq_properties.normalize_expr(expr.clone())), - "error in test: expr: {expr:?}" - ); - } - - Ok(()) - } - - #[test] - fn test_normalize_sort_requirement_with_equivalence() -> Result<()> { - let col_a = &Column::new("a", 0); - let _col_b = &Column::new("b", 1); - let col_c = &Column::new("c", 2); - let col_d = &Column::new("d", 3); - let _col_e = &Column::new("e", 4); - let option1 = SortOptions { - descending: false, - nulls_first: false, - }; - // Assume that column a and c are aliases. - let (_test_schema, eq_properties, _ordering_eq_properties) = - create_test_params()?; - - // Test cases for equivalence normalization - // First entry in the tuple is PhysicalExpr, second entry is its ordering, third entry is result after normalization. - let expressions = vec![ - (&col_a, Some(option1), &col_a, Some(option1)), - (&col_c, Some(option1), &col_a, Some(option1)), - (&col_c, None, &col_a, None), - // Cannot normalize column d, since it is not in equivalence properties. - (&col_d, Some(option1), &col_d, Some(option1)), - ]; - for (expr, sort_options, expected_col, expected_options) in - expressions.into_iter() - { - let expected = PhysicalSortRequirement::new( - Arc::new((*expected_col).clone()) as _, - expected_options, - ); - let arg = PhysicalSortRequirement::new( - Arc::new((*expr).clone()) as _, - sort_options, - ); - assert!( - expected.eq(&eq_properties.normalize_sort_requirement(arg.clone())), - "error in test: expr: {expr:?}, sort_options: {sort_options:?}" - ); - } - - Ok(()) - } - - #[test] - fn test_ordering_satisfy_different_lengths() -> Result<()> { - let col_a = &Column::new("a", 0); - let col_b = &Column::new("b", 1); - let col_c = &Column::new("c", 2); - let col_d = &Column::new("d", 3); - let col_e = &Column::new("e", 4); - let test_schema = create_test_schema()?; - let option1 = SortOptions { - descending: false, - nulls_first: false, - }; - // Column a and c are aliases. - let mut eq_properties = EquivalenceProperties::new(test_schema.clone()); - eq_properties.add_equal_conditions((col_a, col_c)); - - // Column a and e are ordering equivalent (e.g global ordering of the table can be described both as a ASC and e ASC.) - let mut ordering_eq_properties = OrderingEquivalenceProperties::new(test_schema); - ordering_eq_properties.add_equal_conditions(( - &vec![PhysicalSortExpr { - expr: Arc::new(col_a.clone()), - options: option1, - }], - &vec![PhysicalSortExpr { - expr: Arc::new(col_e.clone()), - options: option1, - }], - )); - let sort_req_a = PhysicalSortExpr { - expr: Arc::new((col_a).clone()) as _, - options: option1, - }; - let sort_req_b = PhysicalSortExpr { - expr: Arc::new((col_b).clone()) as _, - options: option1, - }; - let sort_req_c = PhysicalSortExpr { - expr: Arc::new((col_c).clone()) as _, - options: option1, - }; - let sort_req_d = PhysicalSortExpr { - expr: Arc::new((col_d).clone()) as _, - options: option1, - }; - let sort_req_e = PhysicalSortExpr { - expr: Arc::new((col_e).clone()) as _, - options: option1, - }; - - assert!(ordering_satisfy_concrete( - // After normalization would be a ASC, b ASC, d ASC - &[sort_req_a.clone(), sort_req_b.clone(), sort_req_d.clone()], - // After normalization would be a ASC, b ASC, d ASC - &[ - sort_req_c.clone(), - sort_req_b.clone(), - sort_req_a.clone(), - sort_req_d.clone(), - sort_req_e.clone(), - ], - || eq_properties.clone(), - || ordering_eq_properties.clone(), - )); - - assert!(!ordering_satisfy_concrete( - // After normalization would be a ASC, b ASC - &[sort_req_a.clone(), sort_req_b.clone()], - // After normalization would be a ASC, b ASC, d ASC - &[ - sort_req_c.clone(), - sort_req_b.clone(), - sort_req_a.clone(), - sort_req_d.clone(), - sort_req_e.clone(), - ], - || eq_properties.clone(), - || ordering_eq_properties.clone(), - )); - - assert!(!ordering_satisfy_concrete( - // After normalization would be a ASC, b ASC, d ASC - &[sort_req_a.clone(), sort_req_b.clone(), sort_req_d.clone()], - // After normalization would be a ASC, d ASC, b ASC - &[sort_req_c, sort_req_d, sort_req_a, sort_req_b, sort_req_e,], - || eq_properties.clone(), - || ordering_eq_properties.clone(), - )); - - Ok(()) - } - #[test] fn test_collect_columns() -> Result<()> { let expr1 = Arc::new(Column::new("col1", 2)) as _; @@ -1629,211 +630,4 @@ mod tests { assert_eq!(&expected, result); Ok(()) } - - #[test] - fn test_get_indices_of_matching_sort_exprs_with_order_eq() -> Result<()> { - let sort_options = SortOptions::default(); - let sort_options_not = SortOptions::default().not(); - - let provided_sorts = [ - PhysicalSortExpr { - expr: Arc::new(Column::new("b", 1)), - options: sort_options_not, - }, - PhysicalSortExpr { - expr: Arc::new(Column::new("a", 0)), - options: sort_options, - }, - ]; - let required_columns = [Column::new("b", 1), Column::new("a", 0)]; - let schema = Schema::new(vec![ - Field::new("a", DataType::Int32, true), - Field::new("b", DataType::Int32, true), - ]); - let equal_properties = EquivalenceProperties::new(Arc::new(schema.clone())); - let ordering_equal_properties = - OrderingEquivalenceProperties::new(Arc::new(schema)); - assert_eq!( - get_indices_of_matching_sort_exprs_with_order_eq( - &provided_sorts, - &required_columns, - &equal_properties, - &ordering_equal_properties, - ), - Some((vec![sort_options_not, sort_options], vec![0, 1])) - ); - - // required columns are provided in the equivalence classes - let provided_sorts = [PhysicalSortExpr { - expr: Arc::new(Column::new("c", 2)), - options: sort_options, - }]; - let required_columns = [Column::new("b", 1), Column::new("a", 0)]; - let schema = Schema::new(vec![ - Field::new("a", DataType::Int32, true), - Field::new("b", DataType::Int32, true), - Field::new("c", DataType::Int32, true), - ]); - let equal_properties = EquivalenceProperties::new(Arc::new(schema.clone())); - let mut ordering_equal_properties = - OrderingEquivalenceProperties::new(Arc::new(schema)); - ordering_equal_properties.add_equal_conditions(( - &vec![PhysicalSortExpr { - expr: Arc::new(Column::new("c", 2)), - options: sort_options, - }], - &vec![ - PhysicalSortExpr { - expr: Arc::new(Column::new("b", 1)), - options: sort_options_not, - }, - PhysicalSortExpr { - expr: Arc::new(Column::new("a", 0)), - options: sort_options, - }, - ], - )); - assert_eq!( - get_indices_of_matching_sort_exprs_with_order_eq( - &provided_sorts, - &required_columns, - &equal_properties, - &ordering_equal_properties, - ), - Some((vec![sort_options_not, sort_options], vec![0, 1])) - ); - - // not satisfied orders - let provided_sorts = [ - PhysicalSortExpr { - expr: Arc::new(Column::new("b", 1)), - options: sort_options_not, - }, - PhysicalSortExpr { - expr: Arc::new(Column::new("c", 2)), - options: sort_options, - }, - PhysicalSortExpr { - expr: Arc::new(Column::new("a", 0)), - options: sort_options, - }, - ]; - let required_columns = [Column::new("b", 1), Column::new("a", 0)]; - let schema = Schema::new(vec![ - Field::new("a", DataType::Int32, true), - Field::new("b", DataType::Int32, true), - Field::new("c", DataType::Int32, true), - ]); - let equal_properties = EquivalenceProperties::new(Arc::new(schema.clone())); - let ordering_equal_properties = - OrderingEquivalenceProperties::new(Arc::new(schema)); - assert_eq!( - get_indices_of_matching_sort_exprs_with_order_eq( - &provided_sorts, - &required_columns, - &equal_properties, - &ordering_equal_properties, - ), - None - ); - - Ok(()) - } - - #[test] - fn test_normalize_ordering_equivalence_classes() -> Result<()> { - let sort_options = SortOptions::default(); - - let schema = Schema::new(vec![ - Field::new("a", DataType::Int32, true), - Field::new("b", DataType::Int32, true), - Field::new("c", DataType::Int32, true), - ]); - let mut equal_properties = EquivalenceProperties::new(Arc::new(schema.clone())); - let mut expected_oeq = OrderingEquivalenceProperties::new(Arc::new(schema)); - - equal_properties - .add_equal_conditions((&Column::new("a", 0), &Column::new("c", 2))); - let head = vec![PhysicalSortExpr { - expr: Arc::new(Column::new("b", 1)), - options: sort_options, - }]; - let others = vec![vec![PhysicalSortExpr { - expr: Arc::new(Column::new("c", 2)), - options: sort_options, - }]]; - let oeq_class = OrderingEquivalentClass::new(head, others); - - expected_oeq.add_equal_conditions(( - &vec![PhysicalSortExpr { - expr: Arc::new(Column::new("b", 1)), - options: sort_options, - }], - &vec![PhysicalSortExpr { - expr: Arc::new(Column::new("a", 0)), - options: sort_options, - }], - )); - - let normalized_oeq_class = - oeq_class.normalize_with_equivalence_properties(&equal_properties); - let expected = expected_oeq.oeq_class().unwrap(); - assert!( - normalized_oeq_class.head().eq(expected.head()) - && normalized_oeq_class.others().eq(expected.others()) - ); - - Ok(()) - } - - #[test] - fn project_empty_output_ordering() -> Result<()> { - let schema = Schema::new(vec![ - Field::new("a", DataType::Int32, true), - Field::new("b", DataType::Int32, true), - Field::new("c", DataType::Int32, true), - ]); - let orderings = find_orderings_of_exprs( - &[ - (Arc::new(Column::new("b", 1)), "b_new".to_string()), - (Arc::new(Column::new("a", 0)), "a_new".to_string()), - ], - Some(&[PhysicalSortExpr { - expr: Arc::new(Column::new("b", 1)), - options: SortOptions::default(), - }]), - EquivalenceProperties::new(Arc::new(schema.clone())), - OrderingEquivalenceProperties::new(Arc::new(schema.clone())), - )?; - - assert_eq!( - vec![ - Some(PhysicalSortExpr { - expr: Arc::new(Column::new("b_new", 0)), - options: SortOptions::default(), - }), - None, - ], - orderings - ); - - let schema = Schema::new(vec![ - Field::new("a", DataType::Int32, true), - Field::new("b", DataType::Int32, true), - Field::new("c", DataType::Int32, true), - ]); - let orderings = find_orderings_of_exprs( - &[ - (Arc::new(Column::new("c", 2)), "c_new".to_string()), - (Arc::new(Column::new("b", 1)), "b_new".to_string()), - ], - Some(&[]), - EquivalenceProperties::new(Arc::new(schema.clone())), - OrderingEquivalenceProperties::new(Arc::new(schema)), - )?; - - assert_eq!(vec![None, None], orderings); - - Ok(()) - } } diff --git a/datafusion/physical-expr/src/window/built_in.rs b/datafusion/physical-expr/src/window/built_in.rs index a00d32e201fb..665ceb70d658 100644 --- a/datafusion/physical-expr/src/window/built_in.rs +++ b/datafusion/physical-expr/src/window/built_in.rs @@ -21,22 +21,19 @@ use std::any::Any; use std::ops::Range; use std::sync::Arc; -use super::BuiltInWindowFunctionExpr; -use super::WindowExpr; -use crate::equivalence::OrderingEquivalenceBuilder; +use super::{BuiltInWindowFunctionExpr, WindowExpr}; use crate::expressions::PhysicalSortExpr; -use crate::utils::{convert_to_expr, get_indices_of_matching_exprs}; use crate::window::window_expr::{get_orderby_values, WindowFn}; use crate::window::{PartitionBatches, PartitionWindowAggStates, WindowState}; use crate::{reverse_order_bys, EquivalenceProperties, PhysicalExpr}; + use arrow::array::{new_empty_array, ArrayRef}; use arrow::compute::SortOptions; use arrow::datatypes::Field; use arrow::record_batch::RecordBatch; use datafusion_common::utils::evaluate_partition_ranges; use datafusion_common::{Result, ScalarValue}; -use datafusion_expr::window_state::WindowAggState; -use datafusion_expr::window_state::WindowFrameContext; +use datafusion_expr::window_state::{WindowAggState, WindowFrameContext}; use datafusion_expr::WindowFrame; /// A window expr that takes the form of a [`BuiltInWindowFunctionExpr`]. @@ -75,16 +72,12 @@ impl BuiltInWindowExpr { /// If `self.expr` doesn't have an ordering, ordering equivalence properties /// are not updated. Otherwise, ordering equivalence properties are updated /// by the ordering of `self.expr`. - pub fn add_equal_orderings EquivalenceProperties>( - &self, - builder: &mut OrderingEquivalenceBuilder, - equal_properties: F, - ) { - let schema = builder.schema(); + pub fn add_equal_orderings(&self, eq_properties: &mut EquivalenceProperties) { + let schema = eq_properties.schema(); if let Some(fn_res_ordering) = self.expr.get_result_ordering(schema) { if self.partition_by.is_empty() { // In the absence of a PARTITION BY, ordering of `self.expr` is global: - builder.add_equal_conditions(vec![fn_res_ordering]); + eq_properties.add_new_orderings([vec![fn_res_ordering]]); } else { // If we have a PARTITION BY, built-in functions can not introduce // a global ordering unless the existing ordering is compatible @@ -92,23 +85,11 @@ impl BuiltInWindowExpr { // expressions and existing ordering expressions are equal (w.r.t. // set equality), we can prefix the ordering of `self.expr` with // the existing ordering. - let existing_ordering = builder.existing_ordering(); - let existing_ordering_exprs = convert_to_expr(existing_ordering); - // Get indices of the PARTITION BY expressions among input ordering expressions: - let pb_indices = get_indices_of_matching_exprs( - &self.partition_by, - &existing_ordering_exprs, - equal_properties, - ); - // Existing ordering should match exactly with PARTITION BY expressions. - // There should be no missing/extra entries in the existing ordering. - // Otherwise, prefixing wouldn't work. - if pb_indices.len() == self.partition_by.len() - && pb_indices.len() == existing_ordering.len() - { - let mut new_ordering = existing_ordering.to_vec(); - new_ordering.push(fn_res_ordering); - builder.add_equal_conditions(new_ordering); + let (mut ordering, _) = + eq_properties.find_longest_permutation(&self.partition_by); + if ordering.len() == self.partition_by.len() { + ordering.push(fn_res_ordering); + eq_properties.add_new_orderings([ordering]); } } } diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml index 2dfcf12e350a..82c8f49a764f 100644 --- a/datafusion/physical-plan/Cargo.toml +++ b/datafusion/physical-plan/Cargo.toml @@ -19,9 +19,9 @@ name = "datafusion-physical-plan" description = "Physical (ExecutionPlan) implementations for DataFusion query engine" keywords = ["arrow", "query", "sql"] +readme = "README.md" version = { workspace = true } edition = { workspace = true } -readme = { workspace = true } homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } @@ -38,26 +38,26 @@ arrow = { workspace = true } arrow-array = { workspace = true } arrow-buffer = { workspace = true } arrow-schema = { workspace = true } -async-trait = "0.1.41" +async-trait = { workspace = true } chrono = { version = "0.4.23", default-features = false } -datafusion-common = { path = "../common", version = "32.0.0", default-features = false } -datafusion-execution = { path = "../execution", version = "32.0.0" } -datafusion-expr = { path = "../expr", version = "32.0.0" } -datafusion-physical-expr = { path = "../physical-expr", version = "32.0.0" } -futures = "0.3" +datafusion-common = { workspace = true } +datafusion-execution = { workspace = true } +datafusion-expr = { workspace = true } +datafusion-physical-expr = { workspace = true } +futures = { workspace = true } half = { version = "2.1", default-features = false } hashbrown = { version = "0.14", features = ["raw"] } -indexmap = "2.0.0" +indexmap = { workspace = true } itertools = { version = "0.11", features = ["use_std"] } -log = "^0.4" +log = { workspace = true } once_cell = "1.18.0" -parking_lot = "0.12" +parking_lot = { workspace = true } pin-project-lite = "^0.2.7" -rand = "0.8" +rand = { workspace = true } tokio = { version = "1.28", features = ["sync", "fs", "parking_lot"] } uuid = { version = "^1.2", features = ["v4"] } [dev-dependencies] -rstest = "0.18.0" +rstest = { workspace = true } termtree = "0.4.1" tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] } diff --git a/datafusion/physical-plan/README.md b/datafusion/physical-plan/README.md new file mode 100644 index 000000000000..366a6b555150 --- /dev/null +++ b/datafusion/physical-plan/README.md @@ -0,0 +1,27 @@ + + +# DataFusion Common + +[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. + +This crate is a submodule of DataFusion that contains the `ExecutionPlan` trait and the various implementations of that +trait for built in operators such as filters, projections, joins, aggregations, etc. + +[df]: https://crates.io/crates/datafusion diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 4c612223178c..8dab38bc5f0e 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -18,7 +18,6 @@ //! Aggregates functionalities use std::any::Any; -use std::collections::HashMap; use std::sync::Arc; use super::DisplayAs; @@ -26,9 +25,13 @@ use crate::aggregates::{ no_grouping::AggregateStream, row_hash::GroupedHashAggregateStream, topk_stream::GroupedTopKAggregateStream, }; + use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet}; +use crate::windows::{ + get_ordered_partition_by_indices, get_window_mode, PartitionSearchMode, +}; use crate::{ - DisplayFormatType, Distribution, EquivalenceProperties, ExecutionPlan, Partitioning, + DisplayFormatType, Distribution, ExecutionPlan, Partitioning, SendableRecordBatchStream, Statistics, }; @@ -36,24 +39,18 @@ use arrow::array::ArrayRef; use arrow::datatypes::{Field, Schema, SchemaRef}; use arrow::record_batch::RecordBatch; use datafusion_common::stats::Precision; -use datafusion_common::utils::longest_consecutive_prefix; use datafusion_common::{not_impl_err, plan_err, DataFusionError, Result}; use datafusion_execution::TaskContext; use datafusion_expr::Accumulator; -use datafusion_physical_expr::utils::{ - convert_to_expr, get_finer_ordering, get_indices_of_matching_exprs, - ordering_satisfy_requirement_concrete, -}; use datafusion_physical_expr::{ aggregate::is_order_sensitive, - equivalence::project_equivalence_properties, - expressions::{Column, Max, Min}, - normalize_out_expr_with_columns_map, physical_exprs_contains, reverse_order_bys, - AggregateExpr, LexOrdering, LexOrderingReq, OrderingEquivalenceProperties, - PhysicalExpr, PhysicalSortExpr, PhysicalSortRequirement, + equivalence::collapse_lex_req, + expressions::{Column, Max, Min, UnKnownColumn}, + physical_exprs_contains, reverse_order_bys, AggregateExpr, EquivalenceProperties, + LexOrdering, LexRequirement, PhysicalExpr, PhysicalSortExpr, PhysicalSortRequirement, }; -use itertools::Itertools; +use itertools::{izip, Itertools}; mod group_values; mod no_grouping; @@ -63,6 +60,7 @@ mod topk; mod topk_stream; pub use datafusion_expr::AggregateFunction; +use datafusion_physical_expr::equivalence::ProjectionMapping; pub use datafusion_physical_expr::expressions::create_aggregate_expr; /// Hash aggregate modes @@ -271,18 +269,6 @@ impl From for SendableRecordBatchStream { } } -/// This object encapsulates ordering-related information on GROUP BY columns. -#[derive(Debug, Clone)] -pub(crate) struct AggregationOrdering { - /// Specifies whether the GROUP BY columns are partially or fully ordered. - mode: GroupByOrderMode, - /// Stores indices such that when we iterate with these indices, GROUP BY - /// expressions match input ordering. - order_indices: Vec, - /// Actual ordering information of the GROUP BY columns. - ordering: LexOrdering, -} - /// Hash aggregate execution plan #[derive(Debug)] pub struct AggregateExec { @@ -308,90 +294,14 @@ pub struct AggregateExec { /// We need the input schema of partial aggregate to be able to deserialize aggregate /// expressions from protobuf for final aggregate. pub input_schema: SchemaRef, - /// The columns map used to normalize out expressions like Partitioning and PhysicalSortExpr - /// The key is the column from the input schema and the values are the columns from the output schema - columns_map: HashMap>, - /// Execution Metrics + /// The mapping used to normalize expressions like Partitioning and + /// PhysicalSortExpr that maps input to output + projection_mapping: ProjectionMapping, + /// Execution metrics metrics: ExecutionPlanMetricsSet, - /// Stores mode and output ordering information for the `AggregateExec`. - aggregation_ordering: Option, - required_input_ordering: Option, -} - -/// Calculates the working mode for `GROUP BY` queries. -/// - If no GROUP BY expression has an ordering, returns `None`. -/// - If some GROUP BY expressions have an ordering, returns `Some(GroupByOrderMode::PartiallyOrdered)`. -/// - If all GROUP BY expressions have orderings, returns `Some(GroupByOrderMode::Ordered)`. -fn get_working_mode( - input: &Arc, - group_by: &PhysicalGroupBy, -) -> Option<(GroupByOrderMode, Vec)> { - if !group_by.is_single() { - // We do not currently support streaming execution if we have more - // than one group (e.g. we have grouping sets). - return None; - }; - - let output_ordering = input.output_ordering().unwrap_or(&[]); - // Since direction of the ordering is not important for GROUP BY columns, - // we convert PhysicalSortExpr to PhysicalExpr in the existing ordering. - let ordering_exprs = convert_to_expr(output_ordering); - let groupby_exprs = group_by.input_exprs(); - // Find where each expression of the GROUP BY clause occurs in the existing - // ordering (if it occurs): - let mut ordered_indices = - get_indices_of_matching_exprs(&groupby_exprs, &ordering_exprs, || { - input.equivalence_properties() - }); - ordered_indices.sort(); - // Find out how many expressions of the existing ordering define ordering - // for expressions in the GROUP BY clause. For example, if the input is - // ordered by a, b, c, d and we group by b, a, d; the result below would be. - // 2, meaning 2 elements (a, b) among the GROUP BY columns define ordering. - let first_n = longest_consecutive_prefix(ordered_indices); - if first_n == 0 { - // No GROUP by columns are ordered, we can not do streaming execution. - return None; - } - let ordered_exprs = ordering_exprs[0..first_n].to_vec(); - // Find indices for the GROUP BY expressions such that when we iterate with - // these indices, we would match existing ordering. For the example above, - // this would produce 1, 0; meaning 1st and 0th entries (a, b) among the - // GROUP BY expressions b, a, d match input ordering. - let ordered_group_by_indices = - get_indices_of_matching_exprs(&ordered_exprs, &groupby_exprs, || { - input.equivalence_properties() - }); - Some(if first_n == group_by.expr.len() { - (GroupByOrderMode::FullyOrdered, ordered_group_by_indices) - } else { - (GroupByOrderMode::PartiallyOrdered, ordered_group_by_indices) - }) -} - -/// This function gathers the ordering information for the GROUP BY columns. -fn calc_aggregation_ordering( - input: &Arc, - group_by: &PhysicalGroupBy, -) -> Option { - get_working_mode(input, group_by).map(|(mode, order_indices)| { - let existing_ordering = input.output_ordering().unwrap_or(&[]); - let out_group_expr = group_by.output_exprs(); - // Calculate output ordering information for the operator: - let out_ordering = order_indices - .iter() - .zip(existing_ordering) - .map(|(idx, input_col)| PhysicalSortExpr { - expr: out_group_expr[*idx].clone(), - options: input_col.options, - }) - .collect::>(); - AggregationOrdering { - mode, - order_indices, - ordering: out_ordering, - } - }) + required_input_ordering: Option, + partition_search_mode: PartitionSearchMode, + output_ordering: Option, } /// This function returns the ordering requirement of the first non-reversible @@ -420,46 +330,57 @@ fn get_init_req( /// This function gets the finest ordering requirement among all the aggregation /// functions. If requirements are conflicting, (i.e. we can not compute the /// aggregations in a single [`AggregateExec`]), the function returns an error. -fn get_finest_requirement< - F: Fn() -> EquivalenceProperties, - F2: Fn() -> OrderingEquivalenceProperties, ->( +fn get_finest_requirement( aggr_expr: &mut [Arc], order_by_expr: &mut [Option], - eq_properties: F, - ordering_eq_properties: F2, + eq_properties: &EquivalenceProperties, ) -> Result> { + // First, we check if all the requirements are satisfied by the existing + // ordering. If so, we return `None` to indicate this. + let mut all_satisfied = true; + for (aggr_expr, fn_req) in aggr_expr.iter_mut().zip(order_by_expr.iter_mut()) { + if eq_properties.ordering_satisfy(fn_req.as_deref().unwrap_or(&[])) { + continue; + } + if let Some(reverse) = aggr_expr.reverse_expr() { + let reverse_req = fn_req.as_ref().map(|item| reverse_order_bys(item)); + if eq_properties.ordering_satisfy(reverse_req.as_deref().unwrap_or(&[])) { + // We need to update `aggr_expr` with its reverse since only its + // reverse requirement is compatible with the existing requirements: + *aggr_expr = reverse; + *fn_req = reverse_req; + continue; + } + } + // Requirement is not satisfied: + all_satisfied = false; + } + if all_satisfied { + // All of the requirements are already satisfied. + return Ok(None); + } let mut finest_req = get_init_req(aggr_expr, order_by_expr); for (aggr_expr, fn_req) in aggr_expr.iter_mut().zip(order_by_expr.iter_mut()) { - let fn_req = if let Some(fn_req) = fn_req { - fn_req - } else { + let Some(fn_req) = fn_req else { continue; }; + if let Some(finest_req) = &mut finest_req { - if let Some(finer) = get_finer_ordering( - finest_req, - fn_req, - &eq_properties, - &ordering_eq_properties, - ) { - *finest_req = finer.to_vec(); + if let Some(finer) = eq_properties.get_finer_ordering(finest_req, fn_req) { + *finest_req = finer; continue; } // If an aggregate function is reversible, analyze whether its reverse // direction is compatible with existing requirements: if let Some(reverse) = aggr_expr.reverse_expr() { let fn_req_reverse = reverse_order_bys(fn_req); - if let Some(finer) = get_finer_ordering( - finest_req, - &fn_req_reverse, - &eq_properties, - &ordering_eq_properties, - ) { + if let Some(finer) = + eq_properties.get_finer_ordering(finest_req, &fn_req_reverse) + { // We need to update `aggr_expr` with its reverse, since only its // reverse requirement is compatible with existing requirements: *aggr_expr = reverse; - *finest_req = finer.to_vec(); + *finest_req = finer; *fn_req = fn_req_reverse; continue; } @@ -477,113 +398,46 @@ fn get_finest_requirement< Ok(finest_req) } -/// Calculate the required input ordering for the [`AggregateExec`] by considering -/// ordering requirements of order-sensitive aggregation functions. -fn calc_required_input_ordering( +/// Calculates search_mode for the aggregation +fn get_aggregate_search_mode( + group_by: &PhysicalGroupBy, input: &Arc, - aggr_exprs: &mut [Arc], - order_by_exprs: &mut [Option], - aggregator_reqs: LexOrderingReq, - aggregator_reverse_reqs: Option, - aggregation_ordering: &mut Option, - mode: &AggregateMode, -) -> Result> { - let mut required_input_ordering = vec![]; - // Boolean shows that whether `required_input_ordering` stored comes from - // `aggregator_reqs` or `aggregator_reverse_reqs` - let mut reverse_req = false; - // If reverse aggregator is None, there is no way to run aggregators in reverse mode. Hence ignore it during analysis - let aggregator_requirements = - if let Some(aggregator_reverse_reqs) = aggregator_reverse_reqs { - // If existing ordering doesn't satisfy requirement, we should do calculations - // on naive requirement (by convention, otherwise the final plan will be unintuitive), - // even if reverse ordering is possible. - // Hence, while iterating consider naive requirement last, by this way - // we prioritize naive requirement over reverse requirement, when - // reverse requirement is not helpful with removing SortExec from the plan. - vec![(true, aggregator_reverse_reqs), (false, aggregator_reqs)] - } else { - vec![(false, aggregator_reqs)] - }; - for (is_reverse, aggregator_requirement) in aggregator_requirements.into_iter() { - if let Some(AggregationOrdering { - // If the mode is FullyOrdered or PartiallyOrdered (i.e. we are - // running with bounded memory, without breaking the pipeline), - // then we append the aggregator ordering requirement to the existing - // ordering. This way, we can still run with bounded memory. - mode: GroupByOrderMode::FullyOrdered | GroupByOrderMode::PartiallyOrdered, - order_indices, - .. - }) = aggregation_ordering - { - // Get the section of the input ordering that enables us to run in - // FullyOrdered or PartiallyOrdered modes: - let requirement_prefix = - if let Some(existing_ordering) = input.output_ordering() { - &existing_ordering[0..order_indices.len()] - } else { - &[] - }; - let mut requirement = - PhysicalSortRequirement::from_sort_exprs(requirement_prefix.iter()); - for req in aggregator_requirement { - // Final and FinalPartitioned modes don't enforce ordering - // requirements since order-sensitive aggregators handle such - // requirements during merging. - if mode.is_first_stage() - && requirement.iter().all(|item| req.expr.ne(&item.expr)) - { - requirement.push(req); - } - } - required_input_ordering = requirement; - } else if mode.is_first_stage() { - required_input_ordering = aggregator_requirement; - } - // Keep track of the direction from which required_input_ordering is constructed: - reverse_req = is_reverse; - // If all the order-sensitive aggregate functions are reversible (e.g. all the - // order-sensitive aggregators are either FIRST_VALUE or LAST_VALUE), then we can - // run aggregate expressions either in the given required ordering, (i.e. finest - // requirement that satisfies every aggregate function requirement) or its reverse - // (opposite) direction. We analyze these two possibilities, and use the version that - // satisfies existing ordering. This enables us to avoid an extra sort step in the final - // plan. If neither version satisfies the existing ordering, we use the given ordering - // requirement. In short, if running aggregators in reverse order help us to avoid a - // sorting step, we do so. Otherwise, we use the aggregators as is. - let existing_ordering = input.output_ordering().unwrap_or(&[]); - if ordering_satisfy_requirement_concrete( - existing_ordering, - &required_input_ordering, - || input.equivalence_properties(), - || input.ordering_equivalence_properties(), - ) { - break; - } + aggr_expr: &mut [Arc], + order_by_expr: &mut [Option], + ordering_req: &mut Vec, +) -> Result { + let groupby_exprs = group_by + .expr + .iter() + .map(|(item, _)| item.clone()) + .collect::>(); + let mut partition_search_mode = PartitionSearchMode::Linear; + if !group_by.is_single() || groupby_exprs.is_empty() { + return Ok(partition_search_mode); } - // If `required_input_ordering` is constructed using the reverse requirement, we - // should reverse each `aggr_expr` in order to correctly calculate their results - // in reverse order. - if reverse_req { - aggr_exprs - .iter_mut() - .zip(order_by_exprs.iter_mut()) - .map(|(aggr_expr, ob_expr)| { - if is_order_sensitive(aggr_expr) { - if let Some(reverse) = aggr_expr.reverse_expr() { - *aggr_expr = reverse; - *ob_expr = ob_expr.as_ref().map(|obs| reverse_order_bys(obs)); + + if let Some((should_reverse, mode)) = + get_window_mode(&groupby_exprs, ordering_req, input)? + { + let all_reversible = aggr_expr + .iter() + .all(|expr| !is_order_sensitive(expr) || expr.reverse_expr().is_some()); + if should_reverse && all_reversible { + izip!(aggr_expr.iter_mut(), order_by_expr.iter_mut()).for_each( + |(aggr, order_by)| { + if let Some(reverse) = aggr.reverse_expr() { + *aggr = reverse; } else { - return plan_err!( - "Aggregate expression should have a reverse expression" - ); + unreachable!(); } - } - Ok(()) - }) - .collect::>>()?; + *order_by = order_by.as_ref().map(|ob| reverse_order_bys(ob)); + }, + ); + *ordering_req = reverse_order_bys(ordering_req); + } + partition_search_mode = mode; } - Ok((!required_input_ordering.is_empty()).then_some(required_input_ordering)) + Ok(partition_search_mode) } /// Check whether group by expression contains all of the expression inside `requirement` @@ -647,57 +501,50 @@ impl AggregateExec { }) }) .collect::>(); - let mut aggregator_reverse_reqs = None; - // Currently we support order-sensitive aggregation only in `Single` mode. - // For `Final` and `FinalPartitioned` modes, we cannot guarantee they will receive - // data according to ordering requirements. As long as we cannot produce correct result - // in `Final` mode, it is not important to produce correct result in `Partial` mode. - // We only support `Single` mode, where we are sure that output produced is final, and it - // is produced in a single step. - let requirement = get_finest_requirement( &mut aggr_expr, &mut order_by_expr, - || input.equivalence_properties(), - || input.ordering_equivalence_properties(), + &input.equivalence_properties(), )?; - let aggregator_requirement = requirement - .as_ref() - .map(|exprs| PhysicalSortRequirement::from_sort_exprs(exprs.iter())); - let aggregator_reqs = aggregator_requirement.unwrap_or(vec![]); - // If all aggregate expressions are reversible, also consider reverse - // requirement(s). The reason is that existing ordering may satisfy the - // given requirement or its reverse. By considering both, we can generate better plans. - if aggr_expr - .iter() - .all(|expr| !is_order_sensitive(expr) || expr.reverse_expr().is_some()) - { - aggregator_reverse_reqs = requirement.map(|reqs| { - PhysicalSortRequirement::from_sort_exprs(reverse_order_bys(&reqs).iter()) - }); - } - - // construct a map from the input columns to the output columns of the Aggregation - let mut columns_map: HashMap> = HashMap::new(); - for (expression, name) in group_by.expr.iter() { - if let Some(column) = expression.as_any().downcast_ref::() { - let new_col_idx = schema.index_of(name)?; - let entry = columns_map.entry(column.clone()).or_default(); - entry.push(Column::new(name, new_col_idx)); - }; - } - - let mut aggregation_ordering = calc_aggregation_ordering(&input, &group_by); - let required_input_ordering = calc_required_input_ordering( + let mut ordering_req = requirement.unwrap_or(vec![]); + let partition_search_mode = get_aggregate_search_mode( + &group_by, &input, &mut aggr_expr, &mut order_by_expr, - aggregator_reqs, - aggregator_reverse_reqs, - &mut aggregation_ordering, - &mode, + &mut ordering_req, )?; + // Get GROUP BY expressions: + let groupby_exprs = group_by.input_exprs(); + // If existing ordering satisfies a prefix of the GROUP BY expressions, + // prefix requirements with this section. In this case, aggregation will + // work more efficiently. + let indices = get_ordered_partition_by_indices(&groupby_exprs, &input); + let mut new_requirement = indices + .into_iter() + .map(|idx| PhysicalSortRequirement { + expr: groupby_exprs[idx].clone(), + options: None, + }) + .collect::>(); + // Postfix ordering requirement of the aggregation to the requirement. + let req = PhysicalSortRequirement::from_sort_exprs(&ordering_req); + new_requirement.extend(req); + new_requirement = collapse_lex_req(new_requirement); + + // construct a map from the input expression to the output expression of the Aggregation group by + let projection_mapping = + ProjectionMapping::try_new(&group_by.expr, &input.schema())?; + + let required_input_ordering = + (!new_requirement.is_empty()).then_some(new_requirement); + + let aggregate_eqs = input + .equivalence_properties() + .project(&projection_mapping, schema.clone()); + let output_ordering = aggregate_eqs.oeq_class().output_ordering(); + Ok(AggregateExec { mode, group_by, @@ -707,11 +554,12 @@ impl AggregateExec { input, schema, input_schema, - columns_map, + projection_mapping, metrics: ExecutionPlanMetricsSet::new(), - aggregation_ordering, required_input_ordering, limit: None, + partition_search_mode, + output_ordering, }) } @@ -870,8 +718,8 @@ impl DisplayAs for AggregateExec { write!(f, ", lim=[{limit}]")?; } - if let Some(aggregation_ordering) = &self.aggregation_ordering { - write!(f, ", ordering_mode={:?}", aggregation_ordering.mode)?; + if self.partition_search_mode != PartitionSearchMode::Linear { + write!(f, ", ordering_mode={:?}", self.partition_search_mode)?; } } } @@ -893,23 +741,28 @@ impl ExecutionPlan for AggregateExec { fn output_partitioning(&self) -> Partitioning { let input_partition = self.input.output_partitioning(); if self.mode.is_first_stage() { + // First stage aggregation will not change the output partitioning, + // but needs to respect aliases (e.g. mapping in the GROUP BY + // expression). + let input_eq_properties = self.input.equivalence_properties(); // First stage Aggregation will not change the output partitioning but need to respect the Alias let input_partition = self.input.output_partitioning(); if let Partitioning::Hash(exprs, part) = input_partition { let normalized_exprs = exprs .into_iter() .map(|expr| { - normalize_out_expr_with_columns_map(expr, &self.columns_map) + input_eq_properties + .project_expr(&expr, &self.projection_mapping) + .unwrap_or_else(|| { + Arc::new(UnKnownColumn::new(&expr.to_string())) + }) }) - .collect::>(); - Partitioning::Hash(normalized_exprs, part) - } else { - input_partition + .collect(); + return Partitioning::Hash(normalized_exprs, part); } - } else { - // Final Aggregation's output partitioning is the same as its real input - input_partition } + // Final Aggregation's output partitioning is the same as its real input + input_partition } /// Specifies whether this plan generates an infinite stream of records. @@ -917,7 +770,7 @@ impl ExecutionPlan for AggregateExec { /// infinite, returns an error to indicate this. fn unbounded_output(&self, children: &[bool]) -> Result { if children[0] { - if self.aggregation_ordering.is_none() { + if self.partition_search_mode == PartitionSearchMode::Linear { // Cannot run without breaking pipeline. plan_err!( "Aggregate Error: `GROUP BY` clauses with columns without ordering and GROUPING SETS are not supported for unbounded inputs." @@ -931,9 +784,7 @@ impl ExecutionPlan for AggregateExec { } fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { - self.aggregation_ordering - .as_ref() - .map(|item: &AggregationOrdering| item.ordering.as_slice()) + self.output_ordering.as_deref() } fn required_input_distribution(&self) -> Vec { @@ -950,18 +801,14 @@ impl ExecutionPlan for AggregateExec { } } - fn required_input_ordering(&self) -> Vec> { + fn required_input_ordering(&self) -> Vec> { vec![self.required_input_ordering.clone()] } fn equivalence_properties(&self) -> EquivalenceProperties { - let mut new_properties = EquivalenceProperties::new(self.schema()); - project_equivalence_properties( - self.input.equivalence_properties(), - &self.columns_map, - &mut new_properties, - ); - new_properties + self.input + .equivalence_properties() + .project(&self.projection_mapping, self.schema()) } fn children(&self) -> Vec> { @@ -1287,19 +1134,21 @@ pub(crate) fn evaluate_group_by( #[cfg(test)] mod tests { + use std::any::Any; + use std::sync::Arc; + use std::task::{Context, Poll}; + use super::*; - use crate::aggregates::GroupByOrderMode::{FullyOrdered, PartiallyOrdered}; use crate::aggregates::{ - get_finest_requirement, get_working_mode, AggregateExec, AggregateMode, - PhysicalGroupBy, + get_finest_requirement, AggregateExec, AggregateMode, PhysicalGroupBy, }; use crate::coalesce_batches::CoalesceBatchesExec; use crate::coalesce_partitions::CoalescePartitionsExec; use crate::common; use crate::expressions::{col, Avg}; use crate::memory::MemoryExec; + use crate::test::assert_is_pending; use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec}; - use crate::test::{assert_is_pending, mem_exec}; use crate::{ DisplayAs, ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream, Statistics, @@ -1313,20 +1162,15 @@ mod tests { assert_batches_eq, assert_batches_sorted_eq, internal_err, DataFusionError, Result, ScalarValue, }; + use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv}; use datafusion_physical_expr::expressions::{ - lit, ApproxDistinct, Column, Count, FirstValue, LastValue, Median, + lit, ApproxDistinct, Count, FirstValue, LastValue, Median, }; use datafusion_physical_expr::{ - AggregateExpr, EquivalenceProperties, OrderingEquivalenceProperties, - PhysicalExpr, PhysicalSortExpr, + AggregateExpr, EquivalenceProperties, PhysicalExpr, PhysicalSortExpr, }; - use std::any::Any; - use std::sync::Arc; - use std::task::{Context, Poll}; - - use datafusion_execution::config::SessionConfig; use futures::{FutureExt, Stream}; // Generate a schema which consists of 5 columns (a, b, c, d, e) @@ -1341,80 +1185,6 @@ mod tests { Ok(schema) } - /// make PhysicalSortExpr with default options - fn sort_expr(name: &str, schema: &Schema) -> PhysicalSortExpr { - sort_expr_options(name, schema, SortOptions::default()) - } - - /// PhysicalSortExpr with specified options - fn sort_expr_options( - name: &str, - schema: &Schema, - options: SortOptions, - ) -> PhysicalSortExpr { - PhysicalSortExpr { - expr: col(name, schema).unwrap(), - options, - } - } - - #[tokio::test] - async fn test_get_working_mode() -> Result<()> { - let test_schema = create_test_schema()?; - // Source is sorted by a ASC NULLS FIRST, b ASC NULLS FIRST, c ASC NULLS FIRST - // Column d, e is not ordered. - let sort_exprs = vec![ - sort_expr("a", &test_schema), - sort_expr("b", &test_schema), - sort_expr("c", &test_schema), - ]; - let input = mem_exec(1).with_sort_information(vec![sort_exprs]); - let input = Arc::new(input) as _; - - // test cases consists of vector of tuples. Where each tuple represents a single test case. - // First field in the tuple is Vec where each element in the vector represents GROUP BY columns - // For instance `vec!["a", "b"]` corresponds to GROUP BY a, b - // Second field in the tuple is Option, which corresponds to expected algorithm mode. - // None represents that existing ordering is not sufficient to run executor with any one of the algorithms - // (We need to add SortExec to be able to run it). - // Some(GroupByOrderMode) represents, we can run algorithm with existing ordering; and algorithm should work in - // GroupByOrderMode. - let test_cases = vec![ - (vec!["a"], Some((FullyOrdered, vec![0]))), - (vec!["b"], None), - (vec!["c"], None), - (vec!["b", "a"], Some((FullyOrdered, vec![1, 0]))), - (vec!["c", "b"], None), - (vec!["c", "a"], Some((PartiallyOrdered, vec![1]))), - (vec!["c", "b", "a"], Some((FullyOrdered, vec![2, 1, 0]))), - (vec!["d", "a"], Some((PartiallyOrdered, vec![1]))), - (vec!["d", "b"], None), - (vec!["d", "c"], None), - (vec!["d", "b", "a"], Some((PartiallyOrdered, vec![2, 1]))), - (vec!["d", "c", "b"], None), - (vec!["d", "c", "a"], Some((PartiallyOrdered, vec![2]))), - ( - vec!["d", "c", "b", "a"], - Some((PartiallyOrdered, vec![3, 2, 1])), - ), - ]; - for (case_idx, test_case) in test_cases.iter().enumerate() { - let (group_by_columns, expected) = &test_case; - let mut group_by_exprs = vec![]; - for col_name in group_by_columns { - group_by_exprs.push((col(col_name, &test_schema)?, col_name.to_string())); - } - let group_bys = PhysicalGroupBy::new_single(group_by_exprs); - let res = get_working_mode(&input, &group_bys); - assert_eq!( - res, *expected, - "Unexpected result for in unbounded test case#: {case_idx:?}, case: {test_case:?}" - ); - } - - Ok(()) - } - /// some mock data to aggregates fn some_data() -> (Arc, Vec) { // define a schema. @@ -2155,7 +1925,7 @@ mod tests { spill: bool, ) -> Result<()> { let task_ctx = if spill { - new_spill_ctx(2, 2812) + new_spill_ctx(2, 2886) } else { Arc::new(TaskContext::default()) }; @@ -2268,69 +2038,72 @@ mod tests { descending: true, nulls_first: true, }; - let mut eq_properties = EquivalenceProperties::new(test_schema.clone()); - let col_a = Column::new("a", 0); - let col_b = Column::new("b", 1); - let col_c = Column::new("c", 2); - let col_d = Column::new("d", 3); - eq_properties.add_equal_conditions((&col_a, &col_b)); - let mut ordering_eq_properties = OrderingEquivalenceProperties::new(test_schema); - ordering_eq_properties.add_equal_conditions(( - &vec![PhysicalSortExpr { - expr: Arc::new(col_a.clone()) as _, - options: options1, - }], - &vec![PhysicalSortExpr { - expr: Arc::new(col_c.clone()) as _, - options: options2, - }], - )); + let col_a = &col("a", &test_schema)?; + let col_b = &col("b", &test_schema)?; + let col_c = &col("c", &test_schema)?; + let mut eq_properties = EquivalenceProperties::new(test_schema); + // Columns a and b are equal. + eq_properties.add_equal_conditions(col_a, col_b); + // Aggregate requirements are + // [None], [a ASC], [a ASC, b ASC, c ASC], [a ASC, b ASC] respectively let mut order_by_exprs = vec![ None, Some(vec![PhysicalSortExpr { - expr: Arc::new(col_a.clone()), + expr: col_a.clone(), options: options1, }]), - Some(vec![PhysicalSortExpr { - expr: Arc::new(col_b.clone()), - options: options1, - }]), - Some(vec![PhysicalSortExpr { - expr: Arc::new(col_c), - options: options2, - }]), Some(vec![ PhysicalSortExpr { - expr: Arc::new(col_a.clone()), + expr: col_a.clone(), + options: options1, + }, + PhysicalSortExpr { + expr: col_b.clone(), options: options1, }, PhysicalSortExpr { - expr: Arc::new(col_d), + expr: col_c.clone(), + options: options1, + }, + ]), + Some(vec![ + PhysicalSortExpr { + expr: col_a.clone(), + options: options1, + }, + PhysicalSortExpr { + expr: col_b.clone(), options: options1, }, ]), // Since aggregate expression is reversible (FirstValue), we should be able to resolve below // contradictory requirement by reversing it. Some(vec![PhysicalSortExpr { - expr: Arc::new(col_b.clone()), + expr: col_b.clone(), options: options2, }]), ]; + let common_requirement = Some(vec![ + PhysicalSortExpr { + expr: col_a.clone(), + options: options1, + }, + PhysicalSortExpr { + expr: col_c.clone(), + options: options1, + }, + ]); let aggr_expr = Arc::new(FirstValue::new( - Arc::new(col_a.clone()), + col_a.clone(), "first1", DataType::Int32, vec![], vec![], )) as _; let mut aggr_exprs = vec![aggr_expr; order_by_exprs.len()]; - let res = get_finest_requirement( - &mut aggr_exprs, - &mut order_by_exprs, - || eq_properties.clone(), - || ordering_eq_properties.clone(), - )?; - assert_eq!(res, order_by_exprs[4]); + let res = + get_finest_requirement(&mut aggr_exprs, &mut order_by_exprs, &eq_properties)?; + assert_eq!(res, common_requirement); Ok(()) } } diff --git a/datafusion/physical-plan/src/aggregates/order/mod.rs b/datafusion/physical-plan/src/aggregates/order/mod.rs index f0b49872b1c5..f72d2f06e459 100644 --- a/datafusion/physical-plan/src/aggregates/order/mod.rs +++ b/datafusion/physical-plan/src/aggregates/order/mod.rs @@ -18,13 +18,12 @@ use arrow_array::ArrayRef; use arrow_schema::Schema; use datafusion_common::Result; -use datafusion_physical_expr::EmitTo; - -use super::{AggregationOrdering, GroupByOrderMode}; +use datafusion_physical_expr::{EmitTo, PhysicalSortExpr}; mod full; mod partial; +use crate::windows::PartitionSearchMode; pub(crate) use full::GroupOrderingFull; pub(crate) use partial::GroupOrderingPartial; @@ -43,24 +42,19 @@ impl GroupOrdering { /// Create a `GroupOrdering` for the the specified ordering pub fn try_new( input_schema: &Schema, - ordering: &AggregationOrdering, + mode: &PartitionSearchMode, + ordering: &[PhysicalSortExpr], ) -> Result { - let AggregationOrdering { - mode, - order_indices, - ordering, - } = ordering; - - Ok(match mode { - GroupByOrderMode::PartiallyOrdered => { - let partial = - GroupOrderingPartial::try_new(input_schema, order_indices, ordering)?; - GroupOrdering::Partial(partial) + match mode { + PartitionSearchMode::Linear => Ok(GroupOrdering::None), + PartitionSearchMode::PartiallySorted(order_indices) => { + GroupOrderingPartial::try_new(input_schema, order_indices, ordering) + .map(GroupOrdering::Partial) } - GroupByOrderMode::FullyOrdered => { - GroupOrdering::Full(GroupOrderingFull::new()) + PartitionSearchMode::Sorted => { + Ok(GroupOrdering::Full(GroupOrderingFull::new())) } - }) + } } // How many groups be emitted, or None if no data can be emitted diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs index d773533ad6a3..7cee4a3e7cfc 100644 --- a/datafusion/physical-plan/src/aggregates/row_hash.rs +++ b/datafusion/physical-plan/src/aggregates/row_hash.rs @@ -17,17 +17,10 @@ //! Hash aggregation -use datafusion_physical_expr::{ - AggregateExpr, EmitTo, GroupsAccumulator, GroupsAccumulatorAdapter, PhysicalSortExpr, -}; -use log::debug; use std::sync::Arc; use std::task::{Context, Poll}; use std::vec; -use futures::ready; -use futures::stream::{Stream, StreamExt}; - use crate::aggregates::group_values::{new_group_values, GroupValues}; use crate::aggregates::order::GroupOrderingFull; use crate::aggregates::{ @@ -39,8 +32,9 @@ use crate::metrics::{BaselineMetrics, RecordOutput}; use crate::sorts::sort::{read_spill_as_stream, sort_batch}; use crate::sorts::streaming_merge; use crate::stream::RecordBatchStreamAdapter; -use crate::{aggregates, PhysicalExpr}; +use crate::{aggregates, ExecutionPlan, PhysicalExpr}; use crate::{RecordBatchStream, SendableRecordBatchStream}; + use arrow::array::*; use arrow::{datatypes::SchemaRef, record_batch::RecordBatch}; use arrow_schema::SortOptions; @@ -50,7 +44,14 @@ use datafusion_execution::memory_pool::proxy::VecAllocExt; use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_execution::runtime_env::RuntimeEnv; use datafusion_execution::TaskContext; -use datafusion_physical_expr::expressions::col; +use datafusion_physical_expr::expressions::Column; +use datafusion_physical_expr::{ + AggregateExpr, EmitTo, GroupsAccumulator, GroupsAccumulatorAdapter, PhysicalSortExpr, +}; + +use futures::ready; +use futures::stream::{Stream, StreamExt}; +use log::debug; #[derive(Debug, Clone)] /// This object tracks the aggregation phase (input/output) @@ -321,24 +322,25 @@ impl GroupedHashAggregateStream { let spill_expr = group_schema .fields .into_iter() - .map(|field| PhysicalSortExpr { - expr: col(field.name(), &group_schema).unwrap(), + .enumerate() + .map(|(idx, field)| PhysicalSortExpr { + expr: Arc::new(Column::new(field.name().as_str(), idx)) as _, options: SortOptions::default(), }) .collect(); let name = format!("GroupedHashAggregateStream[{partition}]"); - let reservation = MemoryConsumer::new(name).register(context.memory_pool()); - - let group_ordering = agg - .aggregation_ordering - .as_ref() - .map(|aggregation_ordering| { - GroupOrdering::try_new(&group_schema, aggregation_ordering) - }) - // return error if any - .transpose()? - .unwrap_or(GroupOrdering::None); + let reservation = MemoryConsumer::new(name) + .with_can_spill(true) + .register(context.memory_pool()); + let (ordering, _) = agg + .equivalence_properties() + .find_longest_permutation(&agg_group_by.output_exprs()); + let group_ordering = GroupOrdering::try_new( + &group_schema, + &agg.partition_search_mode, + ordering.as_slice(), + )?; let group_values = new_group_values(group_schema)?; timer.done(); @@ -416,8 +418,7 @@ impl Stream for GroupedHashAggregateStream { let elapsed_compute = self.baseline_metrics.elapsed_compute().clone(); loop { - let exec_state = self.exec_state.clone(); - match exec_state { + match &self.exec_state { ExecutionState::ReadingInput => { match ready!(self.input.poll_next_unpin(cx)) { // new batch to aggregate @@ -451,14 +452,14 @@ impl Stream for GroupedHashAggregateStream { self.input_done = true; self.group_ordering.input_done(); let timer = elapsed_compute.timer(); - if self.spill_state.spills.is_empty() { + self.exec_state = if self.spill_state.spills.is_empty() { let batch = extract_ok!(self.emit(EmitTo::All, false)); - self.exec_state = ExecutionState::ProducingOutput(batch); + ExecutionState::ProducingOutput(batch) } else { // If spill files exist, stream-merge them. extract_ok!(self.update_merged_stream()); - self.exec_state = ExecutionState::ReadingInput; - } + ExecutionState::ReadingInput + }; timer.done(); } } @@ -466,19 +467,24 @@ impl Stream for GroupedHashAggregateStream { ExecutionState::ProducingOutput(batch) => { // slice off a part of the batch, if needed - let output_batch = if batch.num_rows() <= self.batch_size { - if self.input_done { - self.exec_state = ExecutionState::Done; - } else { - self.exec_state = ExecutionState::ReadingInput - } - batch + let output_batch; + let size = self.batch_size; + (self.exec_state, output_batch) = if batch.num_rows() <= size { + ( + if self.input_done { + ExecutionState::Done + } else { + ExecutionState::ReadingInput + }, + batch.clone(), + ) } else { // output first batch_size rows - let num_remaining = batch.num_rows() - self.batch_size; - let remaining = batch.slice(self.batch_size, num_remaining); - self.exec_state = ExecutionState::ProducingOutput(remaining); - batch.slice(0, self.batch_size) + let size = self.batch_size; + let num_remaining = batch.num_rows() - size; + let remaining = batch.slice(size, num_remaining); + let output = batch.slice(0, size); + (ExecutionState::ProducingOutput(remaining), output) }; return Poll::Ready(Some(Ok( output_batch.record_output(&self.baseline_metrics) @@ -673,7 +679,16 @@ impl GroupedHashAggregateStream { let spillfile = self.runtime.disk_manager.create_tmp_file("HashAggSpill")?; let mut writer = IPCWriter::new(spillfile.path(), &emit.schema())?; // TODO: slice large `sorted` and write to multiple files in parallel - writer.write(&sorted)?; + let mut offset = 0; + let total_rows = sorted.num_rows(); + + while offset < total_rows { + let length = std::cmp::min(total_rows - offset, self.batch_size); + let batch = sorted.slice(offset, length); + offset += batch.num_rows(); + writer.write(&batch)?; + } + writer.finish()?; self.spill_state.spills.push(spillfile); Ok(()) diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs index df9e8a8a2b8c..09d1ea87ca37 100644 --- a/datafusion/physical-plan/src/coalesce_batches.rs +++ b/datafusion/physical-plan/src/coalesce_batches.rs @@ -27,8 +27,8 @@ use super::expressions::PhysicalSortExpr; use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use super::{DisplayAs, Statistics}; use crate::{ - DisplayFormatType, EquivalenceProperties, ExecutionPlan, Partitioning, - RecordBatchStream, SendableRecordBatchStream, + DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, + SendableRecordBatchStream, }; use arrow::datatypes::SchemaRef; @@ -36,7 +36,7 @@ use arrow::error::Result as ArrowResult; use arrow::record_batch::RecordBatch; use datafusion_common::Result; use datafusion_execution::TaskContext; -use datafusion_physical_expr::OrderingEquivalenceProperties; +use datafusion_physical_expr::EquivalenceProperties; use futures::stream::{Stream, StreamExt}; use log::trace; @@ -138,10 +138,6 @@ impl ExecutionPlan for CoalesceBatchesExec { self.input.equivalence_properties() } - fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties { - self.input.ordering_equivalence_properties() - } - fn with_new_children( self: Arc, children: Vec>, @@ -228,17 +224,17 @@ impl CoalesceBatchesStream { let _timer = cloned_time.timer(); match input_batch { Poll::Ready(x) => match x { - Some(Ok(ref batch)) => { + Some(Ok(batch)) => { if batch.num_rows() >= self.target_batch_size && self.buffer.is_empty() { - return Poll::Ready(Some(Ok(batch.clone()))); + return Poll::Ready(Some(Ok(batch))); } else if batch.num_rows() == 0 { // discard empty batches } else { // add to the buffered batches - self.buffer.push(batch.clone()); self.buffered_rows += batch.num_rows(); + self.buffer.push(batch); // check to see if we have enough batches yet if self.buffered_rows >= self.target_batch_size { // combine the batches and return @@ -300,14 +296,14 @@ pub fn concat_batches( batches.len(), row_count ); - let b = arrow::compute::concat_batches(schema, batches)?; - Ok(b) + arrow::compute::concat_batches(schema, batches) } #[cfg(test)] mod tests { use super::*; use crate::{memory::MemoryExec, repartition::RepartitionExec}; + use arrow::datatypes::{DataType, Field, Schema}; use arrow_array::UInt32Array; diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs index f09b33e3f3b5..bfcff2853538 100644 --- a/datafusion/physical-plan/src/coalesce_partitions.rs +++ b/datafusion/physical-plan/src/coalesce_partitions.rs @@ -26,11 +26,12 @@ use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use super::stream::{ObservedStream, RecordBatchReceiverStream}; use super::{DisplayAs, SendableRecordBatchStream, Statistics}; -use crate::{DisplayFormatType, EquivalenceProperties, ExecutionPlan, Partitioning}; +use crate::{DisplayFormatType, ExecutionPlan, Partitioning}; use arrow::datatypes::SchemaRef; use datafusion_common::{internal_err, DataFusionError, Result}; use datafusion_execution::TaskContext; +use datafusion_physical_expr::EquivalenceProperties; /// Merge execution plan executes partitions in parallel and combines them into a single /// partition. No guarantees are made about the order of the resulting partition. @@ -101,7 +102,10 @@ impl ExecutionPlan for CoalescePartitionsExec { } fn equivalence_properties(&self) -> EquivalenceProperties { - self.input.equivalence_properties() + let mut output_eq = self.input.equivalence_properties(); + // Coalesce partitions loses existing orderings. + output_eq.clear_orderings(); + output_eq } fn benefits_from_input_partitioning(&self) -> Vec { diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs index 7b046ebdf398..1c98a1319cb6 100644 --- a/datafusion/physical-plan/src/filter.rs +++ b/datafusion/physical-plan/src/filter.rs @@ -30,7 +30,7 @@ use super::{ use crate::{ metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}, - Column, DisplayFormatType, EquivalenceProperties, ExecutionPlan, Partitioning, + Column, DisplayFormatType, ExecutionPlan, Partitioning, }; use arrow::compute::filter_record_batch; @@ -42,13 +42,12 @@ use datafusion_common::{plan_err, project_schema, DataFusionError, Result}; use datafusion_execution::TaskContext; use datafusion_expr::Operator; use datafusion_physical_expr::expressions::BinaryExpr; -use datafusion_physical_expr::{ - analyze, split_conjunction, AnalysisContext, ExprBoundaries, - OrderingEquivalenceProperties, PhysicalExpr, -}; - use datafusion_physical_expr::intervals::utils::check_support; use datafusion_physical_expr::utils::collect_columns; +use datafusion_physical_expr::{ + analyze, split_conjunction, AnalysisContext, EquivalenceProperties, ExprBoundaries, + PhysicalExpr, +}; use futures::stream::{Stream, StreamExt}; use log::trace; @@ -154,38 +153,33 @@ impl ExecutionPlan for FilterExec { } fn equivalence_properties(&self) -> EquivalenceProperties { + let stats = self.statistics().unwrap(); // Combine the equal predicates with the input equivalence properties - let mut input_properties = self.input.equivalence_properties(); - let (equal_pairs, _ne_pairs) = collect_columns_from_predicate(&self.predicate); - for new_condition in equal_pairs { - input_properties.add_equal_conditions(new_condition) + let mut result = self.input.equivalence_properties(); + let (equal_pairs, _) = collect_columns_from_predicate(&self.predicate); + for (lhs, rhs) in equal_pairs { + let lhs_expr = Arc::new(lhs.clone()) as _; + let rhs_expr = Arc::new(rhs.clone()) as _; + result.add_equal_conditions(&lhs_expr, &rhs_expr) } - input_properties - } - - fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties { - let stats = self - .statistics() - .expect("Ordering equivalences need to handle the error case of statistics"); // Add the columns that have only one value (singleton) after filtering to constants. let constants = collect_columns(self.predicate()) .into_iter() .filter(|column| stats.column_statistics[column.index()].is_singleton()) - .map(|column| Arc::new(column) as Arc) - .collect::>(); - let filter_oeq = self.input.ordering_equivalence_properties(); - filter_oeq.with_constants(constants) + .map(|column| Arc::new(column) as _); + result.add_constants(constants) } fn with_new_children( self: Arc, - children: Vec>, + mut children: Vec>, ) -> Result> { - Ok(Arc::new(FilterExec::try_new( + FilterExec::try_new( self.predicate.clone(), self.projection.clone(), - children[0].clone(), - )?)) + children.swap_remove(0), + ) + .map(|e| Arc::new(e) as _) } fn execute( @@ -372,17 +366,16 @@ impl RecordBatchStream for FilterExecStream { /// Return the equals Column-Pairs and Non-equals Column-Pairs fn collect_columns_from_predicate(predicate: &Arc) -> EqualAndNonEqual { - let mut eq_predicate_columns: Vec<(&Column, &Column)> = Vec::new(); - let mut ne_predicate_columns: Vec<(&Column, &Column)> = Vec::new(); + let mut eq_predicate_columns = Vec::<(&Column, &Column)>::new(); + let mut ne_predicate_columns = Vec::<(&Column, &Column)>::new(); let predicates = split_conjunction(predicate); predicates.into_iter().for_each(|p| { if let Some(binary) = p.as_any().downcast_ref::() { - let left = binary.left(); - let right = binary.right(); - if left.as_any().is::() && right.as_any().is::() { - let left_column = left.as_any().downcast_ref::().unwrap(); - let right_column = right.as_any().downcast_ref::().unwrap(); + if let (Some(left_column), Some(right_column)) = ( + binary.left().as_any().downcast_ref::(), + binary.right().as_any().downcast_ref::(), + ) { match binary.op() { Operator::Eq => { eq_predicate_columns.push((left_column, right_column)) diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs index d8c8064e2ac1..102f0c42e90c 100644 --- a/datafusion/physical-plan/src/joins/cross_join.rs +++ b/datafusion/physical-plan/src/joins/cross_join.rs @@ -27,23 +27,22 @@ use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet}; use crate::DisplayAs; use crate::{ coalesce_batches::concat_batches, coalesce_partitions::CoalescePartitionsExec, - ColumnStatistics, DisplayFormatType, Distribution, EquivalenceProperties, - ExecutionPlan, Partitioning, PhysicalSortExpr, RecordBatchStream, - SendableRecordBatchStream, Statistics, + ColumnStatistics, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, + PhysicalSortExpr, RecordBatchStream, SendableRecordBatchStream, Statistics, }; use arrow::datatypes::{Fields, Schema, SchemaRef}; use arrow::record_batch::RecordBatch; use arrow_array::RecordBatchOptions; use datafusion_common::stats::Precision; -use datafusion_common::{plan_err, DataFusionError, Result, ScalarValue}; +use datafusion_common::{plan_err, DataFusionError, JoinType, Result, ScalarValue}; use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_execution::TaskContext; +use datafusion_physical_expr::equivalence::join_equivalence_properties; +use datafusion_physical_expr::EquivalenceProperties; use async_trait::async_trait; -use datafusion_physical_expr::equivalence::cross_join_equivalence_properties; -use futures::{ready, StreamExt}; -use futures::{Stream, TryStreamExt}; +use futures::{ready, Stream, StreamExt, TryStreamExt}; /// Data of the left side type JoinLeftData = (RecordBatch, MemoryReservation); @@ -106,12 +105,11 @@ async fn load_left_input( reservation: MemoryReservation, ) -> Result { // merge all left parts into a single stream - let merge = { - if left.output_partitioning().partition_count() != 1 { - Arc::new(CoalescePartitionsExec::new(left.clone())) - } else { - left.clone() - } + let left_schema = left.schema(); + let merge = if left.output_partitioning().partition_count() != 1 { + Arc::new(CoalescePartitionsExec::new(left)) + } else { + left }; let stream = merge.execute(0, context)?; @@ -136,7 +134,7 @@ async fn load_left_input( ) .await?; - let merged_batch = concat_batches(&left.schema(), &batches, num_rows)?; + let merged_batch = concat_batches(&left_schema, &batches, num_rows)?; Ok((merged_batch, reservation)) } @@ -217,12 +215,14 @@ impl ExecutionPlan for CrossJoinExec { } fn equivalence_properties(&self) -> EquivalenceProperties { - let left_columns_len = self.left.schema().fields.len(); - cross_join_equivalence_properties( + join_equivalence_properties( self.left.equivalence_properties(), self.right.equivalence_properties(), - left_columns_len, + &JoinType::Full, self.schema(), + &[false, false], + None, + &[], ) } diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs index 9aa776fe054c..0e2d552b543b 100644 --- a/datafusion/physical-plan/src/joins/hash_join.rs +++ b/datafusion/physical-plan/src/joins/hash_join.rs @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! Defines the join plan for executing partitions in parallel and then joining the results -//! into a set of partitions. +//! [`HashJoinExec`] Partitioned Hash Join Operator use std::fmt; use std::mem::size_of; @@ -43,8 +42,8 @@ use crate::{ BuildProbeJoinMetrics, ColumnIndex, JoinFilter, JoinOn, }, metrics::{ExecutionPlanMetricsSet, MetricsSet}, - DisplayFormatType, Distribution, EquivalenceProperties, ExecutionPlan, Partitioning, - PhysicalExpr, RecordBatchStream, SendableRecordBatchStream, Statistics, + DisplayFormatType, Distribution, ExecutionPlan, Partitioning, PhysicalExpr, + RecordBatchStream, SendableRecordBatchStream, Statistics, }; use super::{ @@ -56,6 +55,7 @@ use arrow::array::{ Array, ArrayRef, BooleanArray, BooleanBufferBuilder, PrimitiveArray, UInt32Array, UInt32BufferBuilder, UInt64Array, UInt64BufferBuilder, }; +use arrow::compute::kernels::cmp::{eq, not_distinct}; use arrow::compute::{and, take, FilterBuilder}; use arrow::datatypes::{Schema, SchemaRef}; use arrow::record_batch::RecordBatch; @@ -67,40 +67,148 @@ use datafusion_common::{ }; use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_execution::TaskContext; -use datafusion_physical_expr::OrderingEquivalenceProperties; +use datafusion_physical_expr::equivalence::join_equivalence_properties; +use datafusion_physical_expr::EquivalenceProperties; use ahash::RandomState; -use arrow::compute::kernels::cmp::{eq, not_distinct}; -use datafusion_physical_expr::equivalence::{ - combine_join_equivalence_properties, combine_join_ordering_equivalence_properties, -}; use futures::{ready, Stream, StreamExt, TryStreamExt}; type JoinLeftData = (JoinHashMap, RecordBatch, MemoryReservation); -/// Join execution plan executes partitions in parallel and combines them into a set of -/// partitions. +/// Join execution plan: Evaluates eqijoin predicates in parallel on multiple +/// partitions using a hash table and an optional filter list to apply post +/// join. +/// +/// # Join Expressions +/// +/// This implementation is optimized for evaluating eqijoin predicates ( +/// ` = `) expressions, which are represented as a list of `Columns` +/// in [`Self::on`]. +/// +/// Non-equality predicates, which can not pushed down to a join inputs (e.g. +/// ` != `) are known as "filter expressions" and are evaluated +/// after the equijoin predicates. +/// +/// # "Build Side" vs "Probe Side" +/// +/// HashJoin takes two inputs, which are referred to as the "build" and the +/// "probe". The build side is the first child, and the probe side is the second +/// child. +/// +/// The two inputs are treated differently and it is VERY important that the +/// *smaller* input is placed on the build side to minimize the work of creating +/// the hash table. +/// +/// ```text +/// ┌───────────┐ +/// │ HashJoin │ +/// │ │ +/// └───────────┘ +/// │ │ +/// ┌─────┘ └─────┐ +/// ▼ ▼ +/// ┌────────────┐ ┌─────────────┐ +/// │ Input │ │ Input │ +/// │ [0] │ │ [1] │ +/// └────────────┘ └─────────────┘ +/// +/// "build side" "probe side" +/// ``` +/// +/// Execution proceeds in 2 stages: +/// +/// 1. the **build phase** where a hash table is created from the tuples of the +/// build side. /// -/// Filter expression expected to contain non-equality predicates that can not be pushed -/// down to any of join inputs. -/// In case of outer join, filter applied to only matched rows. +/// 2. the **probe phase** where the tuples of the probe side are streamed +/// through, checking for matches of the join keys in the hash table. +/// +/// ```text +/// ┌────────────────┐ ┌────────────────┐ +/// │ ┌─────────┐ │ │ ┌─────────┐ │ +/// │ │ Hash │ │ │ │ Hash │ │ +/// │ │ Table │ │ │ │ Table │ │ +/// │ │(keys are│ │ │ │(keys are│ │ +/// │ │equi join│ │ │ │equi join│ │ Stage 2: batches from +/// Stage 1: the │ │columns) │ │ │ │columns) │ │ the probe side are +/// *entire* build │ │ │ │ │ │ │ │ streamed through, and +/// side is read │ └─────────┘ │ │ └─────────┘ │ checked against the +/// into the hash │ ▲ │ │ ▲ │ contents of the hash +/// table │ HashJoin │ │ HashJoin │ table +/// └──────┼─────────┘ └──────────┼─────┘ +/// ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ +/// │ │ +/// +/// │ │ +/// ┌────────────┐ ┌────────────┐ +/// │RecordBatch │ │RecordBatch │ +/// └────────────┘ └────────────┘ +/// ┌────────────┐ ┌────────────┐ +/// │RecordBatch │ │RecordBatch │ +/// └────────────┘ └────────────┘ +/// ... ... +/// ┌────────────┐ ┌────────────┐ +/// │RecordBatch │ │RecordBatch │ +/// └────────────┘ └────────────┘ +/// +/// build side probe side +/// +/// ``` +/// +/// # Example "Optimal" Plans +/// +/// The differences in the inputs means that for classic "Star Schema Query", +/// the optimal plan will be a **"Right Deep Tree"** . A Star Schema Query is +/// one where there is one large table and several smaller "dimension" tables, +/// joined on `Foreign Key = Primary Key` predicates. +/// +/// A "Right Deep Tree" looks like this large table as the probe side on the +/// lowest join: +/// +/// ```text +/// ┌───────────┐ +/// │ HashJoin │ +/// │ │ +/// └───────────┘ +/// │ │ +/// ┌───────┘ └──────────┐ +/// ▼ ▼ +/// ┌───────────────┐ ┌───────────┐ +/// │ small table 1 │ │ HashJoin │ +/// │ "dimension" │ │ │ +/// └───────────────┘ └───┬───┬───┘ +/// ┌──────────┘ └───────┐ +/// │ │ +/// ▼ ▼ +/// ┌───────────────┐ ┌───────────┐ +/// │ small table 2 │ │ HashJoin │ +/// │ "dimension" │ │ │ +/// └───────────────┘ └───┬───┬───┘ +/// ┌────────┘ └────────┐ +/// │ │ +/// ▼ ▼ +/// ┌───────────────┐ ┌───────────────┐ +/// │ small table 3 │ │ large table │ +/// │ "dimension" │ │ "fact" │ +/// └───────────────┘ └───────────────┘ +/// ``` #[derive(Debug)] pub struct HashJoinExec { /// left (build) side which gets hashed pub left: Arc, /// right (probe) side which are filtered by the hash table pub right: Arc, - /// Set of common columns used to join on + /// Set of equijoin columns from the relations: `(left_col, right_col)` pub on: Vec<(Column, Column)>, /// Filters which are applied while finding matching rows pub filter: Option, - /// How the join is performed + /// How the join is performed (`OUTER`, `INNER`, etc) pub join_type: JoinType, - /// The schema once the join is applied + /// The output schema for the join schema: SchemaRef, /// Build-side data left_fut: OnceAsync, - /// Shares the `RandomState` for the hashing algorithm + /// Shared the `RandomState` for the hashing algorithm random_state: RandomState, /// Output order output_order: Option>, @@ -110,12 +218,16 @@ pub struct HashJoinExec { metrics: ExecutionPlanMetricsSet, /// Information of index and left / right placement of columns column_indices: Vec, - /// If null_equals_null is true, null == null else null != null + /// Null matching behavior: If `null_equals_null` is true, rows that have + /// `null`s in both left and right equijoin columns will be matched. + /// Otherwise, rows that have `null`s in the join columns will not be + /// matched and thus will not appear in the output. pub null_equals_null: bool, } impl HashJoinExec { /// Tries to create a new [HashJoinExec]. + /// /// # Error /// This function errors when it is not possible to join the left and right sides on keys `on`. pub fn try_new( @@ -368,26 +480,14 @@ impl ExecutionPlan for HashJoinExec { } fn equivalence_properties(&self) -> EquivalenceProperties { - let left_columns_len = self.left.schema().fields.len(); - combine_join_equivalence_properties( - self.join_type, + join_equivalence_properties( self.left.equivalence_properties(), self.right.equivalence_properties(), - left_columns_len, - self.on(), - self.schema(), - ) - } - - fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties { - combine_join_ordering_equivalence_properties( &self.join_type, - &self.left.ordering_equivalence_properties(), - &self.right.ordering_equivalence_properties(), self.schema(), &self.maintains_input_order(), Some(Self::probe_side()), - self.equivalence_properties(), + self.on(), ) } @@ -419,6 +519,7 @@ impl ExecutionPlan for HashJoinExec { let on_right = self.on.iter().map(|on| on.1.clone()).collect::>(); let left_partitions = self.left.output_partitioning().partition_count(); let right_partitions = self.right.output_partitioning().partition_count(); + if self.mode == PartitionMode::Partitioned && left_partitions != right_partitions { return internal_err!( @@ -521,16 +622,10 @@ async fn collect_left_input( let (left_input, left_input_partition) = if let Some(partition) = partition { (left, partition) + } else if left.output_partitioning().partition_count() != 1 { + (Arc::new(CoalescePartitionsExec::new(left)) as _, 0) } else { - let merge = { - if left.output_partitioning().partition_count() != 1 { - Arc::new(CoalescePartitionsExec::new(left)) - } else { - left - } - }; - - (merge, 0) + (left, 0) }; // Depending on partition argument load single partition or whole left side in memory @@ -1061,24 +1156,22 @@ impl Stream for HashJoinStream { mod tests { use std::sync::Arc; - use arrow::array::{ArrayRef, Date32Array, Int32Array, UInt32Builder, UInt64Builder}; - use arrow::datatypes::{DataType, Field, Schema}; - - use datafusion_common::{assert_batches_sorted_eq, assert_contains, ScalarValue}; - use datafusion_expr::Operator; - use datafusion_physical_expr::expressions::Literal; - use hashbrown::raw::RawTable; - + use super::*; use crate::{ common, expressions::Column, hash_utils::create_hashes, joins::hash_join::build_equal_condition_join_indices, memory::MemoryExec, repartition::RepartitionExec, test::build_table_i32, test::exec::MockExec, }; + + use arrow::array::{ArrayRef, Date32Array, Int32Array, UInt32Builder, UInt64Builder}; + use arrow::datatypes::{DataType, Field, Schema}; + use datafusion_common::{assert_batches_sorted_eq, assert_contains, ScalarValue}; use datafusion_execution::config::SessionConfig; use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv}; - use datafusion_physical_expr::expressions::BinaryExpr; + use datafusion_expr::Operator; + use datafusion_physical_expr::expressions::{BinaryExpr, Literal}; - use super::*; + use hashbrown::raw::RawTable; fn build_table( a: (&str, &Vec), diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index a113066e39d1..6951642ff801 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -48,9 +48,9 @@ use datafusion_common::{exec_err, DataFusionError, JoinSide, Result, Statistics} use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_execution::TaskContext; use datafusion_expr::JoinType; +use datafusion_physical_expr::equivalence::join_equivalence_properties; use datafusion_physical_expr::{EquivalenceProperties, PhysicalSortExpr}; -use datafusion_physical_expr::equivalence::combine_join_equivalence_properties; use futures::{ready, Stream, StreamExt, TryStreamExt}; /// Data of the inner table side @@ -192,14 +192,15 @@ impl ExecutionPlan for NestedLoopJoinExec { } fn equivalence_properties(&self) -> EquivalenceProperties { - let left_columns_len = self.left.schema().fields.len(); - combine_join_equivalence_properties( - self.join_type, + join_equivalence_properties( self.left.equivalence_properties(), self.right.equivalence_properties(), - left_columns_len, - &[], // empty join keys + &self.join_type, self.schema(), + &self.maintains_input_order(), + None, + // No on columns in nested loop join + &[], ) } diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs index 759149a64d9f..f6fdc6d77c0c 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs @@ -37,9 +37,8 @@ use crate::joins::utils::{ }; use crate::metrics::{ExecutionPlanMetricsSet, MetricBuilder, MetricsSet}; use crate::{ - metrics, DisplayAs, DisplayFormatType, Distribution, EquivalenceProperties, - ExecutionPlan, Partitioning, PhysicalExpr, RecordBatchStream, - SendableRecordBatchStream, Statistics, + metrics, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, + PhysicalExpr, RecordBatchStream, SendableRecordBatchStream, Statistics, }; use arrow::array::*; @@ -52,11 +51,9 @@ use datafusion_common::{ }; use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_execution::TaskContext; -use datafusion_physical_expr::{OrderingEquivalenceProperties, PhysicalSortRequirement}; +use datafusion_physical_expr::equivalence::join_equivalence_properties; +use datafusion_physical_expr::{EquivalenceProperties, PhysicalSortRequirement}; -use datafusion_physical_expr::equivalence::{ - combine_join_equivalence_properties, combine_join_ordering_equivalence_properties, -}; use futures::{Stream, StreamExt}; /// join execution plan executes partitions in parallel and combines them into a set of @@ -285,26 +282,14 @@ impl ExecutionPlan for SortMergeJoinExec { } fn equivalence_properties(&self) -> EquivalenceProperties { - let left_columns_len = self.left.schema().fields.len(); - combine_join_equivalence_properties( - self.join_type, + join_equivalence_properties( self.left.equivalence_properties(), self.right.equivalence_properties(), - left_columns_len, - self.on(), - self.schema(), - ) - } - - fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties { - combine_join_ordering_equivalence_properties( &self.join_type, - &self.left.ordering_equivalence_properties(), - &self.right.ordering_equivalence_properties(), self.schema(), &self.maintains_input_order(), Some(Self::probe_side(&self.join_type)), - self.equivalence_properties(), + self.on(), ) } diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs index 00d43aead434..3617893a1c61 100644 --- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs +++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs @@ -63,10 +63,10 @@ use datafusion_common::{ }; use datafusion_execution::memory_pool::MemoryConsumer; use datafusion_execution::TaskContext; +use datafusion_physical_expr::equivalence::join_equivalence_properties; use datafusion_physical_expr::intervals::ExprIntervalGraph; use ahash::RandomState; -use datafusion_physical_expr::equivalence::combine_join_equivalence_properties; use futures::stream::{select, BoxStream}; use futures::{Stream, StreamExt}; use hashbrown::HashSet; @@ -430,14 +430,15 @@ impl ExecutionPlan for SymmetricHashJoinExec { } fn equivalence_properties(&self) -> EquivalenceProperties { - let left_columns_len = self.left.schema().fields.len(); - combine_join_equivalence_properties( - self.join_type, + join_equivalence_properties( self.left.equivalence_properties(), self.right.equivalence_properties(), - left_columns_len, - self.on(), + &self.join_type, self.schema(), + &self.maintains_input_order(), + // Has alternating probe side + None, + self.on(), ) } diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs index cf150ddf575f..c91dc92fbc7a 100644 --- a/datafusion/physical-plan/src/joins/utils.rs +++ b/datafusion/physical-plan/src/joins/utils.rs @@ -40,12 +40,12 @@ use datafusion_common::{ plan_datafusion_err, plan_err, DataFusionError, JoinSide, JoinType, Result, SharedResult, }; +use datafusion_physical_expr::equivalence::add_offset_to_expr; use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr::intervals::{ExprIntervalGraph, Interval, IntervalBound}; use datafusion_physical_expr::utils::merge_vectors; use datafusion_physical_expr::{ - add_offset_to_expr, add_offset_to_lex_ordering, LexOrdering, LexOrderingRef, - PhysicalExpr, PhysicalSortExpr, + LexOrdering, LexOrderingRef, PhysicalExpr, PhysicalSortExpr, }; use futures::future::{BoxFuture, Shared}; @@ -91,8 +91,8 @@ fn check_join_set_is_valid( if !left_missing.is_empty() | !right_missing.is_empty() { return plan_err!( - "The left or right side of the join does not have all columns on \"on\": \nMissing on the left: {left_missing:?}\nMissing on the right: {right_missing:?}" - ); + "The left or right side of the join does not have all columns on \"on\": \nMissing on the left: {left_missing:?}\nMissing on the right: {right_missing:?}" + ); }; Ok(()) @@ -133,7 +133,7 @@ pub fn adjust_right_output_partitioning( let new_exprs = exprs .into_iter() .map(|expr| add_offset_to_expr(expr, left_columns_len)) - .collect::>(); + .collect(); Partitioning::Hash(new_exprs, size) } } @@ -169,23 +169,22 @@ pub fn calculate_join_output_ordering( maintains_input_order: &[bool], probe_side: Option, ) -> Option { - // All joins have 2 children: - assert_eq!(maintains_input_order.len(), 2); - let left_maintains = maintains_input_order[0]; - let right_maintains = maintains_input_order[1]; let mut right_ordering = match join_type { // In the case below, right ordering should be offseted with the left // side length, since we append the right table to the left table. JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => { - add_offset_to_lex_ordering(right_ordering, left_columns_len) + right_ordering + .iter() + .map(|sort_expr| PhysicalSortExpr { + expr: add_offset_to_expr(sort_expr.expr.clone(), left_columns_len), + options: sort_expr.options, + }) + .collect() } _ => right_ordering.to_vec(), }; - let output_ordering = match (left_maintains, right_maintains) { - (true, true) => { - unreachable!("Cannot maintain ordering of both sides"); - } - (true, false) => { + let output_ordering = match maintains_input_order { + [true, false] => { // Special case, we can prefix ordering of right side with the ordering of left side. if join_type == JoinType::Inner && probe_side == Some(JoinSide::Left) { replace_on_columns_of_right_ordering( @@ -198,7 +197,7 @@ pub fn calculate_join_output_ordering( left_ordering.to_vec() } } - (false, true) => { + [false, true] => { // Special case, we can prefix ordering of left side with the ordering of right side. if join_type == JoinType::Inner && probe_side == Some(JoinSide::Right) { replace_on_columns_of_right_ordering( @@ -212,7 +211,9 @@ pub fn calculate_join_output_ordering( } } // Doesn't maintain ordering, output ordering is None. - (false, false) => return None, + [false, false] => return None, + [true, true] => unreachable!("Cannot maintain ordering of both sides"), + _ => unreachable!("Join operators can not have more than two children"), }; (!output_ordering.is_empty()).then_some(output_ordering) } diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs index b2f81579f8e8..9519f6a5a1dd 100644 --- a/datafusion/physical-plan/src/lib.rs +++ b/datafusion/physical-plan/src/lib.rs @@ -33,7 +33,6 @@ use datafusion_common::tree_node::Transformed; use datafusion_common::utils::DataPtr; use datafusion_common::{plan_err, DataFusionError, Result}; use datafusion_execution::TaskContext; -use datafusion_physical_expr::equivalence::OrderingEquivalenceProperties; use datafusion_physical_expr::expressions::Column; use datafusion_physical_expr::{ EquivalenceProperties, PhysicalSortExpr, PhysicalSortRequirement, @@ -76,30 +75,41 @@ pub use crate::metrics::Metric; pub use crate::topk::TopK; pub use crate::visitor::{accept, visit_execution_plan, ExecutionPlanVisitor}; +use datafusion_common::config::ConfigOptions; pub use datafusion_common::hash_utils; pub use datafusion_common::utils::project_schema; pub use datafusion_common::{internal_err, ColumnStatistics, Statistics}; pub use datafusion_expr::{Accumulator, ColumnarValue}; pub use datafusion_physical_expr::window::WindowExpr; pub use datafusion_physical_expr::{ - expressions, functions, ordering_equivalence_properties_helper, udf, AggregateExpr, - Distribution, Partitioning, PhysicalExpr, + expressions, functions, udf, AggregateExpr, Distribution, Partitioning, PhysicalExpr, }; // Backwards compatibility pub use crate::stream::EmptyRecordBatchStream; pub use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream}; -/// `ExecutionPlan` represent nodes in the DataFusion Physical Plan. +/// Represent nodes in the DataFusion Physical Plan. /// -/// Each `ExecutionPlan` is partition-aware and is responsible for -/// creating the actual `async` [`SendableRecordBatchStream`]s -/// of [`RecordBatch`] that incrementally compute the operator's -/// output from its input partition. +/// Calling [`execute`] produces an `async` [`SendableRecordBatchStream`] of +/// [`RecordBatch`] that incrementally computes a partition of the +/// `ExecutionPlan`'s output from its input. See [`Partitioning`] for more +/// details on partitioning. +/// +/// Methods such as [`schema`] and [`output_partitioning`] communicate +/// properties of this output to the DataFusion optimizer, and methods such as +/// [`required_input_distribution`] and [`required_input_ordering`] express +/// requirements of the `ExecutionPlan` from its input. /// /// [`ExecutionPlan`] can be displayed in a simplified form using the /// return value from [`displayable`] in addition to the (normally /// quite verbose) `Debug` output. +/// +/// [`execute`]: ExecutionPlan::execute +/// [`schema`]: ExecutionPlan::schema +/// [`output_partitioning`]: ExecutionPlan::output_partitioning +/// [`required_input_distribution`]: ExecutionPlan::required_input_distribution +/// [`required_input_ordering`]: ExecutionPlan::required_input_ordering pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { /// Returns the execution plan as [`Any`] so that it can be /// downcast to a specific implementation. @@ -108,7 +118,8 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { /// Get the schema for this execution plan fn schema(&self) -> SchemaRef; - /// Specifies the output partitioning scheme of this plan + /// Specifies how the output of this `ExecutionPlan` is split into + /// partitions. fn output_partitioning(&self) -> Partitioning; /// Specifies whether this plan generates an infinite stream of records. @@ -122,7 +133,7 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { } } - /// If the output of this operator within each partition is sorted, + /// If the output of this `ExecutionPlan` within each partition is sorted, /// returns `Some(keys)` with the description of how it was sorted. /// /// For example, Sort, (obviously) produces sorted output as does @@ -130,17 +141,19 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { /// produces sorted output if its input was sorted as it does not /// reorder the input rows, /// - /// It is safe to return `None` here if your operator does not + /// It is safe to return `None` here if your `ExecutionPlan` does not /// have any particular output order here fn output_ordering(&self) -> Option<&[PhysicalSortExpr]>; /// Specifies the data distribution requirements for all the - /// children for this operator, By default it's [[Distribution::UnspecifiedDistribution]] for each child, + /// children for this `ExecutionPlan`, By default it's [[Distribution::UnspecifiedDistribution]] for each child, fn required_input_distribution(&self) -> Vec { vec![Distribution::UnspecifiedDistribution; self.children().len()] } - /// Specifies the ordering requirements for all of the children + /// Specifies the ordering required for all of the children of this + /// `ExecutionPlan`. + /// /// For each child, it's the local ordering requirement within /// each partition rather than the global ordering /// @@ -151,7 +164,7 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { vec![None; self.children().len()] } - /// Returns `false` if this operator's implementation may reorder + /// Returns `false` if this `ExecutionPlan`'s implementation may reorder /// rows within or between partitions. /// /// For example, Projection, Filter, and Limit maintain the order @@ -165,19 +178,21 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { /// The default implementation returns `false` /// /// WARNING: if you override this default, you *MUST* ensure that - /// the operator's maintains the ordering invariant or else + /// the `ExecutionPlan`'s maintains the ordering invariant or else /// DataFusion may produce incorrect results. fn maintains_input_order(&self) -> Vec { vec![false; self.children().len()] } - /// Specifies whether the operator benefits from increased parallelization - /// at its input for each child. If set to `true`, this indicates that the - /// operator would benefit from partitioning its corresponding child - /// (and thus from more parallelism). For operators that do very little work - /// the overhead of extra parallelism may outweigh any benefits + /// Specifies whether the `ExecutionPlan` benefits from increased + /// parallelization at its input for each child. /// - /// The default implementation returns `true` unless this operator + /// If returns `true`, the `ExecutionPlan` would benefit from partitioning + /// its corresponding child (and thus from more parallelism). For + /// `ExecutionPlan` that do very little work the overhead of extra + /// parallelism may outweigh any benefits + /// + /// The default implementation returns `true` unless this `ExecutionPlan` /// has signalled it requires a single child input partition. fn benefits_from_input_partitioning(&self) -> Vec { // By default try to maximize parallelism with more CPUs if @@ -188,28 +203,181 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { .collect() } - /// Get the EquivalenceProperties within the plan + /// Get the [`EquivalenceProperties`] within the plan fn equivalence_properties(&self) -> EquivalenceProperties { EquivalenceProperties::new(self.schema()) } - /// Get the OrderingEquivalenceProperties within the plan - fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties { - OrderingEquivalenceProperties::new(self.schema()) - } - - /// Get a list of child execution plans that provide the input for this plan. The returned list - /// will be empty for leaf nodes, will contain a single value for unary nodes, or two - /// values for binary nodes (such as joins). + /// Get a list of children `ExecutionPlan`s that act as inputs to this plan. + /// The returned list will be empty for leaf nodes such as scans, will contain + /// a single value for unary nodes, or two values for binary nodes (such as + /// joins). fn children(&self) -> Vec>; - /// Returns a new plan where all children were replaced by new plans. + /// Returns a new `ExecutionPlan` where all existing children were replaced + /// by the `children`, oi order fn with_new_children( self: Arc, children: Vec>, ) -> Result>; - /// creates an iterator + /// If supported, attempt to increase the partitioning of this `ExecutionPlan` to + /// produce `target_partitions` partitions. + /// + /// If the `ExecutionPlan` does not support changing its partitioning, + /// returns `Ok(None)` (the default). + /// + /// It is the `ExecutionPlan` can increase its partitioning, but not to the + /// `target_partitions`, it may return an ExecutionPlan with fewer + /// partitions. This might happen, for example, if each new partition would + /// be too small to be efficiently processed individually. + /// + /// The DataFusion optimizer attempts to use as many threads as possible by + /// repartitioning its inputs to match the target number of threads + /// available (`target_partitions`). Some data sources, such as the built in + /// CSV and Parquet readers, implement this method as they are able to read + /// from their input files in parallel, regardless of how the source data is + /// split amongst files. + fn repartitioned( + &self, + _target_partitions: usize, + _config: &ConfigOptions, + ) -> Result>> { + Ok(None) + } + + /// Begin execution of `partition`, returning a [`Stream`] of + /// [`RecordBatch`]es. + /// + /// # Notes + /// + /// The `execute` method itself is not `async` but it returns an `async` + /// [`futures::stream::Stream`]. This `Stream` should incrementally compute + /// the output, `RecordBatch` by `RecordBatch` (in a streaming fashion). + /// Most `ExecutionPlan`s should not do any work before the first + /// `RecordBatch` is requested from the stream. + /// + /// [`RecordBatchStreamAdapter`] can be used to convert an `async` + /// [`Stream`] into a [`SendableRecordBatchStream`]. + /// + /// Using `async` `Streams` allows for network I/O during execution and + /// takes advantage of Rust's built in support for `async` continuations and + /// crate ecosystem. + /// + /// [`Stream`]: futures::stream::Stream + /// [`StreamExt`]: futures::stream::StreamExt + /// [`TryStreamExt`]: futures::stream::TryStreamExt + /// [`RecordBatchStreamAdapter`]: crate::stream::RecordBatchStreamAdapter + /// + /// # Implementation Examples + /// + /// While `async` `Stream`s have a non trivial learning curve, the + /// [`futures`] crate provides [`StreamExt`] and [`TryStreamExt`] + /// which help simplify many common operations. + /// + /// Here are some common patterns: + /// + /// ## Return Precomputed `RecordBatch` + /// + /// We can return a precomputed `RecordBatch` as a `Stream`: + /// + /// ``` + /// # use std::sync::Arc; + /// # use arrow_array::RecordBatch; + /// # use arrow_schema::SchemaRef; + /// # use datafusion_common::Result; + /// # use datafusion_execution::{SendableRecordBatchStream, TaskContext}; + /// # use datafusion_physical_plan::memory::MemoryStream; + /// # use datafusion_physical_plan::stream::RecordBatchStreamAdapter; + /// struct MyPlan { + /// batch: RecordBatch, + /// } + /// + /// impl MyPlan { + /// fn execute( + /// &self, + /// partition: usize, + /// context: Arc + /// ) -> Result { + /// // use functions from futures crate convert the batch into a stream + /// let fut = futures::future::ready(Ok(self.batch.clone())); + /// let stream = futures::stream::once(fut); + /// Ok(Box::pin(RecordBatchStreamAdapter::new(self.batch.schema(), stream))) + /// } + /// } + /// ``` + /// + /// ## Lazily (async) Compute `RecordBatch` + /// + /// We can also lazily compute a `RecordBatch` when the returned `Stream` is polled + /// + /// ``` + /// # use std::sync::Arc; + /// # use arrow_array::RecordBatch; + /// # use arrow_schema::SchemaRef; + /// # use datafusion_common::Result; + /// # use datafusion_execution::{SendableRecordBatchStream, TaskContext}; + /// # use datafusion_physical_plan::memory::MemoryStream; + /// # use datafusion_physical_plan::stream::RecordBatchStreamAdapter; + /// struct MyPlan { + /// schema: SchemaRef, + /// } + /// + /// /// Returns a single batch when the returned stream is polled + /// async fn get_batch() -> Result { + /// todo!() + /// } + /// + /// impl MyPlan { + /// fn execute( + /// &self, + /// partition: usize, + /// context: Arc + /// ) -> Result { + /// let fut = get_batch(); + /// let stream = futures::stream::once(fut); + /// Ok(Box::pin(RecordBatchStreamAdapter::new(self.schema.clone(), stream))) + /// } + /// } + /// ``` + /// + /// ## Lazily (async) create a Stream + /// + /// If you need to to create the return `Stream` using an `async` function, + /// you can do so by flattening the result: + /// + /// ``` + /// # use std::sync::Arc; + /// # use arrow_array::RecordBatch; + /// # use arrow_schema::SchemaRef; + /// # use futures::TryStreamExt; + /// # use datafusion_common::Result; + /// # use datafusion_execution::{SendableRecordBatchStream, TaskContext}; + /// # use datafusion_physical_plan::memory::MemoryStream; + /// # use datafusion_physical_plan::stream::RecordBatchStreamAdapter; + /// struct MyPlan { + /// schema: SchemaRef, + /// } + /// + /// /// async function that returns a stream + /// async fn get_batch_stream() -> Result { + /// todo!() + /// } + /// + /// impl MyPlan { + /// fn execute( + /// &self, + /// partition: usize, + /// context: Arc + /// ) -> Result { + /// // A future that yields a stream + /// let fut = get_batch_stream(); + /// // Use TryStreamExt::try_flatten to flatten the stream of streams + /// let stream = futures::stream::once(fut).try_flatten(); + /// Ok(Box::pin(RecordBatchStreamAdapter::new(self.schema.clone(), stream))) + /// } + /// } + /// ``` fn execute( &self, partition: usize, @@ -217,7 +385,7 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { ) -> Result; /// Return a snapshot of the set of [`Metric`]s for this - /// [`ExecutionPlan`]. + /// [`ExecutionPlan`]. If no `Metric`s are available, return None. /// /// While the values of the metrics in the returned /// [`MetricsSet`]s may change as execution progresses, the @@ -242,7 +410,7 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { /// Indicate whether a data exchange is needed for the input of `plan`, which will be very helpful /// especially for the distributed engine to judge whether need to deal with shuffling. /// Currently there are 3 kinds of execution plan which needs data exchange -/// 1. RepartitionExec for changing the partition number between two operators +/// 1. RepartitionExec for changing the partition number between two `ExecutionPlan`s /// 2. CoalescePartitionsExec for collapsing all of the partitions into one without ordering guarantee /// 3. SortPreservingMergeExec for collapsing all of the sorted partitions into one with ordering guarantee pub fn need_data_exchange(plan: Arc) -> bool { diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs index 9fe1eb0763ba..945dad16b794 100644 --- a/datafusion/physical-plan/src/limit.rs +++ b/datafusion/physical-plan/src/limit.rs @@ -35,7 +35,6 @@ use arrow::record_batch::{RecordBatch, RecordBatchOptions}; use datafusion_common::stats::Precision; use datafusion_common::{internal_err, DataFusionError, Result}; use datafusion_execution::TaskContext; -use datafusion_physical_expr::OrderingEquivalenceProperties; use futures::stream::{Stream, StreamExt}; use log::trace; @@ -138,10 +137,6 @@ impl ExecutionPlan for GlobalLimitExec { self.input.equivalence_properties() } - fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties { - self.input.ordering_equivalence_properties() - } - fn with_new_children( self: Arc, children: Vec>, @@ -327,10 +322,6 @@ impl ExecutionPlan for LocalLimitExec { self.input.equivalence_properties() } - fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties { - self.input.ordering_equivalence_properties() - } - fn unbounded_output(&self, _children: &[bool]) -> Result { Ok(false) } diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs index b53500579266..5f1660a225b9 100644 --- a/datafusion/physical-plan/src/memory.rs +++ b/datafusion/physical-plan/src/memory.rs @@ -27,13 +27,12 @@ use super::{ common, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream, Statistics, }; -use crate::ordering_equivalence_properties_helper; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; use datafusion_common::{internal_err, project_schema, DataFusionError, Result}; use datafusion_execution::TaskContext; -use datafusion_physical_expr::{LexOrdering, OrderingEquivalenceProperties}; +use datafusion_physical_expr::{EquivalenceProperties, LexOrdering}; use futures::Stream; @@ -122,8 +121,8 @@ impl ExecutionPlan for MemoryExec { .map(|ordering| ordering.as_slice()) } - fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties { - ordering_equivalence_properties_helper(self.schema(), &self.sort_information) + fn equivalence_properties(&self) -> EquivalenceProperties { + EquivalenceProperties::new_with_orderings(self.schema(), &self.sort_information) } fn with_new_children( @@ -179,7 +178,7 @@ impl MemoryExec { } /// A memory table can be ordered by multiple expressions simultaneously. - /// `OrderingEquivalenceProperties` keeps track of expressions that describe the + /// [`EquivalenceProperties`] keeps track of expressions that describe the /// global ordering of the schema. These columns are not necessarily same; e.g. /// ```text /// ┌-------┐ @@ -192,10 +191,8 @@ impl MemoryExec { /// └---┴---┘ /// ``` /// where both `a ASC` and `b DESC` can describe the table ordering. With - /// `OrderingEquivalenceProperties`, we can keep track of these equivalences - /// and treat `a ASC` and `b DESC` as the same ordering requirement - /// by outputting the `a ASC` from output_ordering API - /// and add `b DESC` into `OrderingEquivalenceProperties` + /// [`EquivalenceProperties`], we can keep track of these equivalences + /// and treat `a ASC` and `b DESC` as the same ordering requirement. pub fn with_sort_information(mut self, sort_information: Vec) -> Self { self.sort_information = sort_information; self @@ -303,11 +300,8 @@ mod tests { .with_sort_information(sort_information); assert_eq!(mem_exec.output_ordering().unwrap(), expected_output_order); - let order_eq = mem_exec.ordering_equivalence_properties(); - assert!(order_eq - .oeq_class() - .map(|class| class.contains(&expected_order_eq)) - .unwrap_or(false)); + let eq_properties = mem_exec.equivalence_properties(); + assert!(eq_properties.oeq_class().contains(&expected_order_eq)); Ok(()) } } diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs index a374154c995c..c5d94b08e0e1 100644 --- a/datafusion/physical-plan/src/projection.rs +++ b/datafusion/physical-plan/src/projection.rs @@ -30,8 +30,7 @@ use super::expressions::{Column, PhysicalSortExpr}; use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use super::{DisplayAs, RecordBatchStream, SendableRecordBatchStream, Statistics}; use crate::{ - ColumnStatistics, DisplayFormatType, EquivalenceProperties, ExecutionPlan, - Partitioning, PhysicalExpr, + ColumnStatistics, DisplayFormatType, ExecutionPlan, Partitioning, PhysicalExpr, }; use arrow::datatypes::{Field, Schema, SchemaRef}; @@ -40,12 +39,9 @@ use datafusion_common::stats::Precision; use datafusion_common::Result; use datafusion_execution::TaskContext; use datafusion_physical_expr::expressions::{Literal, UnKnownColumn}; -use datafusion_physical_expr::utils::find_orderings_of_exprs; -use datafusion_physical_expr::{ - normalize_out_expr_with_columns_map, project_equivalence_properties, - project_ordering_equivalence_properties, OrderingEquivalenceProperties, -}; +use datafusion_physical_expr::EquivalenceProperties; +use datafusion_physical_expr::equivalence::ProjectionMapping; use futures::stream::{Stream, StreamExt}; use log::trace; @@ -60,15 +56,11 @@ pub struct ProjectionExec { input: Arc, /// The output ordering output_ordering: Option>, - /// The columns map used to normalize out expressions like Partitioning and PhysicalSortExpr - /// The key is the column from the input schema and the values are the columns from the output schema - columns_map: HashMap>, + /// The mapping used to normalize expressions like Partitioning and + /// PhysicalSortExpr that maps input to output + projection_mapping: ProjectionMapping, /// Execution metrics metrics: ExecutionPlanMetricsSet, - /// Expressions' normalized orderings (as given by the output ordering API - /// and normalized with respect to equivalence classes of input plan). The - /// projected expressions are mapped by their indices to this vector. - orderings: Vec>, } impl ProjectionExec { @@ -100,63 +92,20 @@ impl ProjectionExec { input_schema.metadata().clone(), )); - // construct a map from the input columns to the output columns of the Projection - let mut columns_map: HashMap> = HashMap::new(); - for (expr_idx, (expression, name)) in expr.iter().enumerate() { - if let Some(column) = expression.as_any().downcast_ref::() { - // For some executors, logical and physical plan schema fields - // are not the same. The information in a `Column` comes from - // the logical plan schema. Therefore, to produce correct results - // we use the field in the input schema with the same index. This - // corresponds to the physical plan `Column`. - let idx = column.index(); - let matching_input_field = input_schema.field(idx); - let matching_input_column = Column::new(matching_input_field.name(), idx); - let entry = columns_map.entry(matching_input_column).or_default(); - entry.push(Column::new(name, expr_idx)); - }; - } - - // Output Ordering need to respect the alias - let child_output_ordering = input.output_ordering(); - let output_ordering = match child_output_ordering { - Some(sort_exprs) => { - let normalized_exprs = sort_exprs - .iter() - .map(|sort_expr| { - let expr = normalize_out_expr_with_columns_map( - sort_expr.expr.clone(), - &columns_map, - ); - PhysicalSortExpr { - expr, - options: sort_expr.options, - } - }) - .collect::>(); - Some(normalized_exprs) - } - None => None, - }; - - let orderings = find_orderings_of_exprs( - &expr, - input.output_ordering(), - input.equivalence_properties(), - input.ordering_equivalence_properties(), - )?; + // construct a map from the input expressions to the output expression of the Projection + let projection_mapping = ProjectionMapping::try_new(&expr, &input_schema)?; - let output_ordering = - validate_output_ordering(output_ordering, &orderings, &expr); + let input_eqs = input.equivalence_properties(); + let project_eqs = input_eqs.project(&projection_mapping, schema.clone()); + let output_ordering = project_eqs.oeq_class().output_ordering(); Ok(Self { expr, schema, input, output_ordering, - columns_map, + projection_mapping, metrics: ExecutionPlanMetricsSet::new(), - orderings, }) } @@ -224,11 +173,18 @@ impl ExecutionPlan for ProjectionExec { fn output_partitioning(&self) -> Partitioning { // Output partition need to respect the alias let input_partition = self.input.output_partitioning(); + let input_eq_properties = self.input.equivalence_properties(); if let Partitioning::Hash(exprs, part) = input_partition { let normalized_exprs = exprs .into_iter() - .map(|expr| normalize_out_expr_with_columns_map(expr, &self.columns_map)) - .collect::>(); + .map(|expr| { + input_eq_properties + .project_expr(&expr, &self.projection_mapping) + .unwrap_or_else(|| { + Arc::new(UnKnownColumn::new(&expr.to_string())) + }) + }) + .collect(); Partitioning::Hash(normalized_exprs, part) } else { input_partition @@ -245,58 +201,17 @@ impl ExecutionPlan for ProjectionExec { } fn equivalence_properties(&self) -> EquivalenceProperties { - let mut new_properties = EquivalenceProperties::new(self.schema()); - project_equivalence_properties( - self.input.equivalence_properties(), - &self.columns_map, - &mut new_properties, - ); - new_properties - } - - fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties { - let mut new_properties = OrderingEquivalenceProperties::new(self.schema()); - if self.output_ordering.is_none() { - // If there is no output ordering, return an "empty" equivalence set: - return new_properties; - } - - let input_oeq = self.input().ordering_equivalence_properties(); - - project_ordering_equivalence_properties( - input_oeq, - &self.columns_map, - &mut new_properties, - ); - - if let Some(leading_ordering) = self - .output_ordering - .as_ref() - .map(|output_ordering| &output_ordering[0]) - { - for order in self.orderings.iter().flatten() { - if !order.eq(leading_ordering) - && !new_properties.satisfies_leading_ordering(order) - { - new_properties.add_equal_conditions(( - &vec![leading_ordering.clone()], - &vec![order.clone()], - )); - } - } - } - - new_properties + self.input + .equivalence_properties() + .project(&self.projection_mapping, self.schema()) } fn with_new_children( self: Arc, - children: Vec>, + mut children: Vec>, ) -> Result> { - Ok(Arc::new(ProjectionExec::try_new( - self.expr.clone(), - children[0].clone(), - )?)) + ProjectionExec::try_new(self.expr.clone(), children.swap_remove(0)) + .map(|p| Arc::new(p) as _) } fn benefits_from_input_partitioning(&self) -> Vec { @@ -336,40 +251,6 @@ impl ExecutionPlan for ProjectionExec { } } -/// This function takes the current `output_ordering`, the `orderings` based on projected expressions, -/// and the `expr` representing the projected expressions themselves. It aims to ensure that the output -/// ordering is valid and correctly corresponds to the projected columns. -/// -/// If the leading expression in the `output_ordering` is an [`UnKnownColumn`], it indicates that the column -/// referenced in the ordering is not found among the projected expressions. In such cases, this function -/// attempts to create a new output ordering by referring to valid columns from the leftmost side of the -/// expressions that have an ordering specified. -fn validate_output_ordering( - output_ordering: Option>, - orderings: &[Option], - expr: &[(Arc, String)], -) -> Option> { - output_ordering.and_then(|ordering| { - // If the leading expression is invalid column, change output - // ordering of the projection so that it refers to valid columns if - // possible. - if ordering[0].expr.as_any().is::() { - for (idx, order) in orderings.iter().enumerate() { - if let Some(sort_expr) = order { - let (_, col_name) = &expr[idx]; - return Some(vec![PhysicalSortExpr { - expr: Arc::new(Column::new(col_name, idx)), - options: sort_expr.options, - }]); - } - } - None - } else { - Some(ordering) - } - }) -} - /// If e is a direct column reference, returns the field level /// metadata for that field, if any. Otherwise returns None fn get_field_metadata( @@ -486,6 +367,7 @@ mod tests { use crate::common::collect; use crate::expressions; use crate::test; + use arrow_schema::DataType; use datafusion_common::ScalarValue; diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs index 82801bcdfdb3..66f7037e5c2d 100644 --- a/datafusion/physical-plan/src/repartition/mod.rs +++ b/datafusion/physical-plan/src/repartition/mod.rs @@ -29,9 +29,7 @@ use crate::hash_utils::create_hashes; use crate::metrics::BaselineMetrics; use crate::repartition::distributor_channels::{channels, partition_aware_channels}; use crate::sorts::streaming_merge; -use crate::{ - DisplayFormatType, EquivalenceProperties, ExecutionPlan, Partitioning, Statistics, -}; +use crate::{DisplayFormatType, ExecutionPlan, Partitioning, Statistics}; use self::distributor_channels::{DistributionReceiver, DistributionSender}; @@ -46,7 +44,7 @@ use arrow::record_batch::RecordBatch; use datafusion_common::{not_impl_err, DataFusionError, Result}; use datafusion_execution::memory_pool::MemoryConsumer; use datafusion_execution::TaskContext; -use datafusion_physical_expr::{OrderingEquivalenceProperties, PhysicalExpr}; +use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr}; use futures::stream::Stream; use futures::{FutureExt, StreamExt}; @@ -427,12 +425,11 @@ impl ExecutionPlan for RepartitionExec { fn with_new_children( self: Arc, - children: Vec>, + mut children: Vec>, ) -> Result> { let repartition = - RepartitionExec::try_new(children[0].clone(), self.partitioning.clone())? - .with_preserve_order(self.preserve_order); - Ok(Arc::new(repartition)) + RepartitionExec::try_new(children.swap_remove(0), self.partitioning.clone()); + repartition.map(|r| Arc::new(r.with_preserve_order(self.preserve_order)) as _) } /// Specifies whether this plan generates an infinite stream of records. @@ -468,11 +465,15 @@ impl ExecutionPlan for RepartitionExec { } fn equivalence_properties(&self) -> EquivalenceProperties { - self.input.equivalence_properties() - } - - fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties { - self.input.ordering_equivalence_properties() + let mut result = self.input.equivalence_properties(); + // If the ordering is lost, reset the ordering equivalence class. + if !self.maintains_input_order()[0] { + result.clear_orderings(); + } + if self.preserve_order { + result = result.with_reorder(self.sort_exprs().unwrap_or_default().to_vec()) + } + result } fn execute( diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs index c7d676493f04..2d8237011fff 100644 --- a/datafusion/physical-plan/src/sorts/sort.rs +++ b/datafusion/physical-plan/src/sorts/sort.rs @@ -735,7 +735,13 @@ impl SortExec { self } - /// Whether this `SortExec` preserves partitioning of the children + /// Modify how many rows to include in the result + /// + /// If None, then all rows will be returned, in sorted order. + /// If Some, then only the top `fetch` rows will be returned. + /// This can reduce the memory pressure required by the sort + /// operation since rows that are not going to be included + /// can be dropped. pub fn with_fetch(mut self, fetch: Option) -> Self { self.fetch = fetch; self @@ -829,7 +835,10 @@ impl ExecutionPlan for SortExec { } fn equivalence_properties(&self) -> EquivalenceProperties { - self.input.equivalence_properties() + // Reset the ordering equivalence class with the new ordering: + self.input + .equivalence_properties() + .with_reorder(self.expr.to_vec()) } fn with_new_children( diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs index ee044d9172fb..65cd8e41480e 100644 --- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs +++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs @@ -33,9 +33,7 @@ use arrow::datatypes::SchemaRef; use datafusion_common::{internal_err, DataFusionError, Result}; use datafusion_execution::memory_pool::MemoryConsumer; use datafusion_execution::TaskContext; -use datafusion_physical_expr::{ - EquivalenceProperties, OrderingEquivalenceProperties, PhysicalSortRequirement, -}; +use datafusion_physical_expr::{EquivalenceProperties, PhysicalSortRequirement}; use log::{debug, trace}; @@ -176,11 +174,8 @@ impl ExecutionPlan for SortPreservingMergeExec { } fn equivalence_properties(&self) -> EquivalenceProperties { - self.input.equivalence_properties() - } - - fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties { - self.input.ordering_equivalence_properties() + let output_oeq = self.input.equivalence_properties(); + output_oeq.with_reorder(self.expr.to_vec()) } fn children(&self) -> Vec> { @@ -271,6 +266,8 @@ impl ExecutionPlan for SortPreservingMergeExec { #[cfg(test)] mod tests { + use std::iter::FromIterator; + use super::*; use crate::coalesce_partitions::CoalescePartitionsExec; use crate::expressions::col; @@ -281,8 +278,8 @@ mod tests { use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec}; use crate::test::{self, assert_is_pending, make_partition}; use crate::{collect, common}; - use arrow::array::ArrayRef; - use arrow::array::{Int32Array, StringArray, TimestampNanosecondArray}; + + use arrow::array::{ArrayRef, Int32Array, StringArray, TimestampNanosecondArray}; use arrow::compute::SortOptions; use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; @@ -290,7 +287,6 @@ mod tests { use datafusion_execution::config::SessionConfig; use futures::{FutureExt, StreamExt}; - use std::iter::FromIterator; #[tokio::test] async fn test_merge_interleave() { diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs index 27f03b727c29..1923a5f3abad 100644 --- a/datafusion/physical-plan/src/streaming.rs +++ b/datafusion/physical-plan/src/streaming.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! Execution plan for streaming [`PartitionStream`] +//! Generic plans for deferred execution: [`StreamingTableExec`] and [`PartitionStream`] use std::any::Any; use std::sync::Arc; @@ -28,13 +28,17 @@ use crate::{ExecutionPlan, Partitioning, SendableRecordBatchStream}; use arrow::datatypes::SchemaRef; use datafusion_common::{internal_err, plan_err, DataFusionError, Result}; use datafusion_execution::TaskContext; -use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr}; +use datafusion_physical_expr::{EquivalenceProperties, LexOrdering, PhysicalSortExpr}; use async_trait::async_trait; use futures::stream::StreamExt; use log::debug; /// A partition that can be converted into a [`SendableRecordBatchStream`] +/// +/// Combined with [`StreamingTableExec`], you can use this trait to implement +/// [`ExecutionPlan`] for a custom source with less boiler plate than +/// implementing `ExecutionPlan` directly for many use cases. pub trait PartitionStream: Send + Sync { /// Returns the schema of this partition fn schema(&self) -> &SchemaRef; @@ -43,7 +47,10 @@ pub trait PartitionStream: Send + Sync { fn execute(&self, ctx: Arc) -> SendableRecordBatchStream; } -/// An [`ExecutionPlan`] for [`PartitionStream`] +/// An [`ExecutionPlan`] for one or more [`PartitionStream`]s. +/// +/// If your source can be represented as one or more [`PartitionStream`]s, you can +/// use this struct to implement [`ExecutionPlan`]. pub struct StreamingTableExec { partitions: Vec>, projection: Option>, @@ -156,6 +163,14 @@ impl ExecutionPlan for StreamingTableExec { self.projected_output_ordering.as_deref() } + fn equivalence_properties(&self) -> EquivalenceProperties { + let mut result = EquivalenceProperties::new(self.schema()); + if let Some(ordering) = &self.projected_output_ordering { + result.add_new_orderings([ordering.clone()]) + } + result + } + fn children(&self) -> Vec> { vec![] } diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs index 2727a6e161be..9700605ce406 100644 --- a/datafusion/physical-plan/src/union.rs +++ b/datafusion/physical-plan/src/union.rs @@ -40,6 +40,7 @@ use arrow::record_batch::RecordBatch; use datafusion_common::stats::Precision; use datafusion_common::{exec_err, internal_err, DFSchemaRef, DataFusionError, Result}; use datafusion_execution::TaskContext; +use datafusion_physical_expr::EquivalenceProperties; use futures::Stream; use itertools::Itertools; @@ -222,6 +223,46 @@ impl ExecutionPlan for UnionExec { } } + fn equivalence_properties(&self) -> EquivalenceProperties { + // TODO: In some cases, we should be able to preserve some equivalence + // classes and constants. Add support for such cases. + let children_eqs = self + .inputs + .iter() + .map(|child| child.equivalence_properties()) + .collect::>(); + let mut result = EquivalenceProperties::new(self.schema()); + // Use the ordering equivalence class of the first child as the seed: + let mut meets = children_eqs[0] + .oeq_class() + .iter() + .map(|item| item.to_vec()) + .collect::>(); + // Iterate over all the children: + for child_eqs in &children_eqs[1..] { + // Compute meet orderings of the current meets and the new ordering + // equivalence class. + let mut idx = 0; + while idx < meets.len() { + // Find all the meets of `current_meet` with this child's orderings: + let valid_meets = child_eqs.oeq_class().iter().filter_map(|ordering| { + child_eqs.get_meet_ordering(ordering, &meets[idx]) + }); + // Use the longest of these meets as others are redundant: + if let Some(next_meet) = valid_meets.max_by_key(|m| m.len()) { + meets[idx] = next_meet; + idx += 1; + } else { + meets.swap_remove(idx); + } + } + } + // We know have all the valid orderings after union, remove redundant + // entries (implicitly) and return: + result.add_new_orderings(meets); + result + } + fn with_new_children( self: Arc, children: Vec>, @@ -596,10 +637,41 @@ fn stats_union(mut left: Statistics, right: Statistics) -> Statistics { mod tests { use super::*; use crate::collect; + use crate::memory::MemoryExec; use crate::test; use arrow::record_batch::RecordBatch; + use arrow_schema::{DataType, SortOptions}; use datafusion_common::ScalarValue; + use datafusion_physical_expr::expressions::col; + use datafusion_physical_expr::PhysicalExpr; + + // Generate a schema which consists of 7 columns (a, b, c, d, e, f, g) + fn create_test_schema() -> Result { + let a = Field::new("a", DataType::Int32, true); + let b = Field::new("b", DataType::Int32, true); + let c = Field::new("c", DataType::Int32, true); + let d = Field::new("d", DataType::Int32, true); + let e = Field::new("e", DataType::Int32, true); + let f = Field::new("f", DataType::Int32, true); + let g = Field::new("g", DataType::Int32, true); + let schema = Arc::new(Schema::new(vec![a, b, c, d, e, f, g])); + + Ok(schema) + } + + // Convert each tuple to PhysicalSortExpr + fn convert_to_sort_exprs( + in_data: &[(&Arc, SortOptions)], + ) -> Vec { + in_data + .iter() + .map(|(expr, options)| PhysicalSortExpr { + expr: (*expr).clone(), + options: *options, + }) + .collect::>() + } #[tokio::test] async fn test_union_partitions() -> Result<()> { @@ -712,4 +784,105 @@ mod tests { assert_eq!(result, expected); } + + #[tokio::test] + async fn test_union_equivalence_properties() -> Result<()> { + let schema = create_test_schema()?; + let col_a = &col("a", &schema)?; + let col_b = &col("b", &schema)?; + let col_c = &col("c", &schema)?; + let col_d = &col("d", &schema)?; + let col_e = &col("e", &schema)?; + let col_f = &col("f", &schema)?; + let options = SortOptions::default(); + let test_cases = vec![ + //-----------TEST CASE 1----------// + ( + // First child orderings + vec![ + // [a ASC, b ASC, f ASC] + vec![(col_a, options), (col_b, options), (col_f, options)], + ], + // Second child orderings + vec![ + // [a ASC, b ASC, c ASC] + vec![(col_a, options), (col_b, options), (col_c, options)], + // [a ASC, b ASC, f ASC] + vec![(col_a, options), (col_b, options), (col_f, options)], + ], + // Union output orderings + vec![ + // [a ASC, b ASC, f ASC] + vec![(col_a, options), (col_b, options), (col_f, options)], + ], + ), + //-----------TEST CASE 2----------// + ( + // First child orderings + vec![ + // [a ASC, b ASC, f ASC] + vec![(col_a, options), (col_b, options), (col_f, options)], + // d ASC + vec![(col_d, options)], + ], + // Second child orderings + vec![ + // [a ASC, b ASC, c ASC] + vec![(col_a, options), (col_b, options), (col_c, options)], + // [e ASC] + vec![(col_e, options)], + ], + // Union output orderings + vec![ + // [a ASC, b ASC] + vec![(col_a, options), (col_b, options)], + ], + ), + ]; + + for ( + test_idx, + (first_child_orderings, second_child_orderings, union_orderings), + ) in test_cases.iter().enumerate() + { + let first_orderings = first_child_orderings + .iter() + .map(|ordering| convert_to_sort_exprs(ordering)) + .collect::>(); + let second_orderings = second_child_orderings + .iter() + .map(|ordering| convert_to_sort_exprs(ordering)) + .collect::>(); + let union_expected_orderings = union_orderings + .iter() + .map(|ordering| convert_to_sort_exprs(ordering)) + .collect::>(); + let child1 = Arc::new( + MemoryExec::try_new(&[], schema.clone(), None)? + .with_sort_information(first_orderings), + ); + let child2 = Arc::new( + MemoryExec::try_new(&[], schema.clone(), None)? + .with_sort_information(second_orderings), + ); + + let union = UnionExec::new(vec![child1, child2]); + let union_eq_properties = union.equivalence_properties(); + let union_actual_orderings = union_eq_properties.oeq_class(); + let err_msg = format!( + "Error in test id: {:?}, test case: {:?}", + test_idx, test_cases[test_idx] + ); + assert_eq!( + union_actual_orderings.len(), + union_expected_orderings.len(), + "{}", + err_msg + ); + for expected in &union_expected_orderings { + assert!(union_actual_orderings.contains(expected), "{}", err_msg); + } + } + Ok(()) + } } diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs index 30f109953cbb..c9f3fb76c2e5 100644 --- a/datafusion/physical-plan/src/unnest.rs +++ b/datafusion/physical-plan/src/unnest.rs @@ -23,9 +23,8 @@ use std::{any::Any, sync::Arc}; use super::DisplayAs; use crate::{ - expressions::Column, DisplayFormatType, Distribution, EquivalenceProperties, - ExecutionPlan, Partitioning, PhysicalExpr, PhysicalSortExpr, RecordBatchStream, - SendableRecordBatchStream, + expressions::Column, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, + PhysicalExpr, PhysicalSortExpr, RecordBatchStream, SendableRecordBatchStream, }; use arrow::array::{ @@ -136,10 +135,6 @@ impl ExecutionPlan for UnnestExec { None } - fn equivalence_properties(&self) -> EquivalenceProperties { - self.input.equivalence_properties() - } - fn execute( &self, partition: usize, diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs index f6ffe2e26795..fb679b013863 100644 --- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs @@ -30,8 +30,8 @@ use std::task::{Context, Poll}; use crate::expressions::PhysicalSortExpr; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use crate::windows::{ - calc_requirements, get_ordered_partition_by_indices, window_ordering_equivalence, - PartitionSearchMode, + calc_requirements, get_ordered_partition_by_indices, get_partition_by_sort_exprs, + window_equivalence_properties, PartitionSearchMode, }; use crate::{ ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, @@ -58,8 +58,7 @@ use datafusion_physical_expr::window::{ PartitionBatches, PartitionKey, PartitionWindowAggStates, WindowState, }; use datafusion_physical_expr::{ - EquivalenceProperties, OrderingEquivalenceProperties, PhysicalExpr, - PhysicalSortRequirement, + EquivalenceProperties, PhysicalExpr, PhysicalSortRequirement, }; use ahash::RandomState; @@ -150,9 +149,12 @@ impl BoundedWindowAggExec { // Hence returned `PhysicalSortExpr` corresponding to `PARTITION BY` columns can be used safely // to calculate partition separation points pub fn partition_by_sort_keys(&self) -> Result> { - // Partition by sort keys indices are stored in self.ordered_partition_by_indices. - let sort_keys = self.input.output_ordering().unwrap_or(&[]); - get_at_indices(sort_keys, &self.ordered_partition_by_indices) + let partition_by = self.window_expr()[0].partition_by(); + get_partition_by_sort_exprs( + &self.input, + partition_by, + &self.ordered_partition_by_indices, + ) } /// Initializes the appropriate [`PartitionSearcher`] implementation from @@ -264,13 +266,9 @@ impl ExecutionPlan for BoundedWindowAggExec { } } + /// Get the [`EquivalenceProperties`] within the plan fn equivalence_properties(&self) -> EquivalenceProperties { - self.input().equivalence_properties() - } - - /// Get the OrderingEquivalenceProperties within the plan - fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties { - window_ordering_equivalence(&self.schema, &self.input, &self.window_expr) + window_equivalence_properties(&self.schema, &self.input, &self.window_expr) } fn maintains_input_order(&self) -> Vec { diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index aff936499a5e..b6ed6e482ff5 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -32,24 +32,18 @@ use crate::{ use arrow::datatypes::Schema; use arrow_schema::{DataType, Field, SchemaRef}; -use datafusion_common::utils::{ - find_indices, get_at_indices, is_sorted, longest_consecutive_prefix, - merge_and_order_indices, set_difference, -}; -use datafusion_common::{DataFusionError, Result, ScalarValue}; +use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue}; use datafusion_expr::{ window_function::{BuiltInWindowFunction, WindowFunction}, PartitionEvaluator, WindowFrame, WindowUDF, }; +use datafusion_physical_expr::equivalence::collapse_lex_req; use datafusion_physical_expr::{ - equivalence::OrderingEquivalenceBuilder, - utils::{convert_to_expr, get_indices_of_matching_exprs}, + reverse_order_bys, window::{BuiltInWindowFunctionExpr, SlidingAggregateWindowExpr}, - AggregateExpr, OrderingEquivalenceProperties, PhysicalSortRequirement, + AggregateExpr, EquivalenceProperties, LexOrdering, PhysicalSortRequirement, }; -use itertools::{izip, Itertools}; - mod bounded_window_agg_exec; mod window_agg_exec; @@ -61,13 +55,26 @@ pub use datafusion_physical_expr::window::{ }; #[derive(Debug, Clone, PartialEq)] -/// Specifies partition column properties in terms of input ordering +/// Specifies aggregation grouping and/or window partitioning properties of a +/// set of expressions in terms of the existing ordering. +/// For example, if the existing ordering is `[a ASC, b ASC, c ASC]`: +/// - A `PARTITION BY b` clause will result in `Linear` mode. +/// - A `PARTITION BY a, c` or a `PARTITION BY c, a` clause will result in +/// `PartiallySorted([0])` or `PartiallySorted([1])` modes, respectively. +/// The vector stores the index of `a` in the respective PARTITION BY expression. +/// - A `PARTITION BY a, b` or a `PARTITION BY b, a` clause will result in +/// `Sorted` mode. +/// Note that the examples above are applicable for `GROUP BY` clauses too. pub enum PartitionSearchMode { - /// None of the columns among the partition columns is ordered. + /// There is no partial permutation of the expressions satisfying the + /// existing ordering. Linear, - /// Some columns of the partition columns are ordered but not all + /// There is a partial permutation of the expressions satisfying the + /// existing ordering. Indices describing the longest partial permutation + /// are stored in the vector. PartiallySorted(Vec), - /// All Partition columns are ordered (Also empty case) + /// There is a (full) permutation of the expressions satisfying the + /// existing ordering. Sorted, } @@ -331,45 +338,51 @@ pub(crate) fn get_ordered_partition_by_indices( partition_by_exprs: &[Arc], input: &Arc, ) -> Vec { - let input_ordering = input.output_ordering().unwrap_or(&[]); - let input_ordering_exprs = convert_to_expr(input_ordering); - let equal_properties = || input.equivalence_properties(); - let input_places = get_indices_of_matching_exprs( - &input_ordering_exprs, - partition_by_exprs, - equal_properties, - ); - let mut partition_places = get_indices_of_matching_exprs( - partition_by_exprs, - &input_ordering_exprs, - equal_properties, - ); - partition_places.sort(); - let first_n = longest_consecutive_prefix(partition_places); - input_places[0..first_n].to_vec() + let (_, indices) = input + .equivalence_properties() + .find_longest_permutation(partition_by_exprs); + indices } -pub(crate) fn window_ordering_equivalence( +pub(crate) fn get_partition_by_sort_exprs( + input: &Arc, + partition_by_exprs: &[Arc], + ordered_partition_by_indices: &[usize], +) -> Result { + let ordered_partition_exprs = ordered_partition_by_indices + .iter() + .map(|idx| partition_by_exprs[*idx].clone()) + .collect::>(); + // Make sure ordered section doesn't move over the partition by expression + assert!(ordered_partition_by_indices.len() <= partition_by_exprs.len()); + let (ordering, _) = input + .equivalence_properties() + .find_longest_permutation(&ordered_partition_exprs); + if ordering.len() == ordered_partition_exprs.len() { + Ok(ordering) + } else { + exec_err!("Expects PARTITION BY expression to be ordered") + } +} + +pub(crate) fn window_equivalence_properties( schema: &SchemaRef, input: &Arc, window_expr: &[Arc], -) -> OrderingEquivalenceProperties { +) -> EquivalenceProperties { // We need to update the schema, so we can not directly use - // `input.ordering_equivalence_properties()`. - let mut builder = OrderingEquivalenceBuilder::new(schema.clone()) - .with_equivalences(input.equivalence_properties()) - .with_existing_ordering(input.output_ordering().map(|elem| elem.to_vec())) - .extend(input.ordering_equivalence_properties()); + // `input.equivalence_properties()`. + let mut window_eq_properties = + EquivalenceProperties::new(schema.clone()).extend(input.equivalence_properties()); for expr in window_expr { if let Some(builtin_window_expr) = expr.as_any().downcast_ref::() { - builtin_window_expr - .add_equal_orderings(&mut builder, || input.equivalence_properties()); + builtin_window_expr.add_equal_orderings(&mut window_eq_properties); } } - builder.build() + window_eq_properties } /// Constructs the best-fitting windowing operator (a `WindowAggExec` or a @@ -396,7 +409,7 @@ pub fn get_best_fitting_window( let orderby_keys = window_exprs[0].order_by(); let (should_reverse, partition_search_mode) = if let Some((should_reverse, partition_search_mode)) = - can_skip_sort(partitionby_exprs, orderby_keys, input)? + get_window_mode(partitionby_exprs, orderby_keys, input)? { (should_reverse, partition_search_mode) } else { @@ -459,149 +472,41 @@ pub fn get_best_fitting_window( /// The `bool` field in the return value represents whether we should reverse window /// operator to remove `SortExec` before it. The `PartitionSearchMode` field represents /// the mode this window operator should work in to accomodate the existing ordering. -fn can_skip_sort( +pub fn get_window_mode( partitionby_exprs: &[Arc], orderby_keys: &[PhysicalSortExpr], input: &Arc, ) -> Result> { - let physical_ordering = if let Some(physical_ordering) = input.output_ordering() { - physical_ordering - } else { - // If there is no physical ordering, there is no way to remove a - // sort, so immediately return. - return Ok(None); - }; - let orderby_exprs = convert_to_expr(orderby_keys); - let physical_ordering_exprs = convert_to_expr(physical_ordering); - let equal_properties = || input.equivalence_properties(); - // Get the indices of the ORDER BY expressions among input ordering expressions: - let ob_indices = get_indices_of_matching_exprs( - &orderby_exprs, - &physical_ordering_exprs, - equal_properties, - ); - if ob_indices.len() != orderby_exprs.len() { - // If all order by expressions are not in the input ordering, - // there is no way to remove a sort -- immediately return: - return Ok(None); - } - // Get the indices of the PARTITION BY expressions among input ordering expressions: - let pb_indices = get_indices_of_matching_exprs( - partitionby_exprs, - &physical_ordering_exprs, - equal_properties, - ); - let ordered_merged_indices = merge_and_order_indices(&pb_indices, &ob_indices); - // Get the indices of the ORDER BY columns that don't appear in the - // PARTITION BY clause; i.e. calculate (ORDER BY columns) ∖ (PARTITION - // BY columns) where `∖` represents set difference. - let unique_ob_indices = set_difference(&ob_indices, &pb_indices); - if !is_sorted(&unique_ob_indices) { - // ORDER BY indices should be ascending ordered - return Ok(None); - } - let first_n = longest_consecutive_prefix(ordered_merged_indices); - let furthest_ob_index = *unique_ob_indices.last().unwrap_or(&0); - // Cannot skip sort if last order by index is not within consecutive prefix. - // For instance, if input is ordered by a, b, c, d for the expression - // `PARTITION BY a, ORDER BY b, d`, then `first_n` would be 2 (meaning a, b defines a - // prefix for input ordering). However, `furthest_ob_index` would be 3 as column d - // occurs at the 3rd index of the existing ordering. Hence, existing ordering would - // not be sufficient to run the current operator. - // However, for expression `PARTITION BY a, ORDER BY b, c, d`, `first_n` would be 4 (meaning - // a, b, c, d defines a prefix for input ordering). Similarly, `furthest_ob_index` would be - // 3 as column d occurs at the 3rd index of the existing ordering. Therefore, the existing - // ordering would be sufficient to run the current operator. - if first_n <= furthest_ob_index { - return Ok(None); - } - let input_orderby_columns = get_at_indices(physical_ordering, &unique_ob_indices)?; - let expected_orderby_columns = - get_at_indices(orderby_keys, find_indices(&ob_indices, &unique_ob_indices)?)?; - let should_reverse = if let Some(should_reverse) = check_alignments( - &input.schema(), - &input_orderby_columns, - &expected_orderby_columns, - )? { - should_reverse - } else { - // If ordering directions are not aligned, we cannot calculate the - // result without changing existing ordering. - return Ok(None); - }; - - let ordered_pb_indices = pb_indices.iter().copied().sorted().collect::>(); - // Determine how many elements in the PARTITION BY columns defines a consecutive range from zero. - let first_n = longest_consecutive_prefix(&ordered_pb_indices); - let mode = if first_n == partitionby_exprs.len() { - // All of the PARTITION BY columns defines a consecutive range from zero. - PartitionSearchMode::Sorted - } else if first_n > 0 { - // All of the PARTITION BY columns defines a consecutive range from zero. - let ordered_range = &ordered_pb_indices[0..first_n]; - let input_pb_exprs = get_at_indices(&physical_ordering_exprs, ordered_range)?; - let partially_ordered_indices = get_indices_of_matching_exprs( - &input_pb_exprs, - partitionby_exprs, - equal_properties, - ); - PartitionSearchMode::PartiallySorted(partially_ordered_indices) - } else { - // None of the PARTITION BY columns defines a consecutive range from zero. - PartitionSearchMode::Linear - }; - - Ok(Some((should_reverse, mode))) -} - -/// Compares all the orderings in `physical_ordering` and `required`, decides -/// whether alignments match. A `None` return value indicates that current -/// column is not aligned. A `Some(bool)` value indicates otherwise, and signals -/// whether we should reverse the window expression in order to avoid sorting. -fn check_alignments( - schema: &SchemaRef, - physical_ordering: &[PhysicalSortExpr], - required: &[PhysicalSortExpr], -) -> Result> { - let result = izip!(physical_ordering, required) - .map(|(lhs, rhs)| check_alignment(schema, lhs, rhs)) - .collect::>>>()?; - Ok(if let Some(res) = result { - if !res.is_empty() { - let first = res[0]; - let all_same = res.into_iter().all(|elem| elem == first); - all_same.then_some(first) - } else { - Some(false) - } - } else { - // Cannot skip some of the requirements in the input. - None - }) -} - -/// Compares `physical_ordering` and `required` ordering, decides whether -/// alignments match. A `None` return value indicates that current column is -/// not aligned. A `Some(bool)` value indicates otherwise, and signals whether -/// we should reverse the window expression in order to avoid sorting. -fn check_alignment( - input_schema: &SchemaRef, - physical_ordering: &PhysicalSortExpr, - required: &PhysicalSortExpr, -) -> Result> { - Ok(if required.expr.eq(&physical_ordering.expr) { - let physical_opts = physical_ordering.options; - let required_opts = required.options; - if required.expr.nullable(input_schema)? { - let reverse = physical_opts == !required_opts; - (reverse || physical_opts == required_opts).then_some(reverse) - } else { - // If the column is not nullable, NULLS FIRST/LAST is not important. - Some(physical_opts.descending != required_opts.descending) + let input_eqs = input.equivalence_properties(); + let mut partition_by_reqs: Vec = vec![]; + let (_, indices) = input_eqs.find_longest_permutation(partitionby_exprs); + partition_by_reqs.extend(indices.iter().map(|&idx| PhysicalSortRequirement { + expr: partitionby_exprs[idx].clone(), + options: None, + })); + // Treat partition by exprs as constant. During analysis of requirements are satisfied. + let partition_by_eqs = input_eqs.add_constants(partitionby_exprs.iter().cloned()); + let order_by_reqs = PhysicalSortRequirement::from_sort_exprs(orderby_keys); + let reverse_order_by_reqs = + PhysicalSortRequirement::from_sort_exprs(&reverse_order_bys(orderby_keys)); + for (should_swap, order_by_reqs) in + [(false, order_by_reqs), (true, reverse_order_by_reqs)] + { + let req = [partition_by_reqs.clone(), order_by_reqs].concat(); + let req = collapse_lex_req(req); + if partition_by_eqs.ordering_satisfy_requirement(&req) { + // Window can be run with existing ordering + let mode = if indices.len() == partitionby_exprs.len() { + PartitionSearchMode::Sorted + } else if indices.is_empty() { + PartitionSearchMode::Linear + } else { + PartitionSearchMode::PartiallySorted(indices) + }; + return Ok(Some((should_swap, mode))); } - } else { - None - }) + } + Ok(None) } #[cfg(test)] @@ -613,7 +518,6 @@ mod tests { use crate::streaming::StreamingTableExec; use crate::test::assert_is_pending; use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec}; - use crate::windows::PartitionSearchMode::{Linear, PartiallySorted, Sorted}; use arrow::compute::SortOptions; use arrow::datatypes::{DataType, Field, SchemaRef}; @@ -621,6 +525,8 @@ mod tests { use futures::FutureExt; + use PartitionSearchMode::{Linear, PartiallySorted, Sorted}; + fn create_test_schema() -> Result { let nullable_column = Field::new("nullable_col", DataType::Int32, true); let non_nullable_column = Field::new("non_nullable_col", DataType::Int32, false); @@ -781,15 +687,16 @@ mod tests { } #[tokio::test] - async fn test_is_column_aligned_nullable() -> Result<()> { + async fn test_satisfiy_nullable() -> Result<()> { let schema = create_test_schema()?; let params = vec![ - ((true, true), (false, false), Some(true)), - ((true, true), (false, true), None), - ((true, true), (true, false), None), - ((true, false), (false, true), Some(true)), - ((true, false), (false, false), None), - ((true, false), (true, true), None), + ((true, true), (false, false), false), + ((true, true), (false, true), false), + ((true, true), (true, false), false), + ((true, false), (false, true), false), + ((true, false), (false, false), false), + ((true, false), (true, true), false), + ((true, false), (true, false), true), ]; for ( (physical_desc, physical_nulls_first), @@ -811,7 +718,7 @@ mod tests { nulls_first: req_nulls_first, }, }; - let res = check_alignment(&schema, &physical_ordering, &required_ordering)?; + let res = physical_ordering.satisfy(&required_ordering.into(), &schema); assert_eq!(res, expected); } @@ -819,16 +726,17 @@ mod tests { } #[tokio::test] - async fn test_is_column_aligned_non_nullable() -> Result<()> { + async fn test_satisfy_non_nullable() -> Result<()> { let schema = create_test_schema()?; let params = vec![ - ((true, true), (false, false), Some(true)), - ((true, true), (false, true), Some(true)), - ((true, true), (true, false), Some(false)), - ((true, false), (false, true), Some(true)), - ((true, false), (false, false), Some(true)), - ((true, false), (true, true), Some(false)), + ((true, true), (false, false), false), + ((true, true), (false, true), false), + ((true, true), (true, false), true), + ((true, false), (false, true), false), + ((true, false), (false, false), false), + ((true, false), (true, true), true), + ((true, false), (true, false), true), ]; for ( (physical_desc, physical_nulls_first), @@ -850,7 +758,7 @@ mod tests { nulls_first: req_nulls_first, }, }; - let res = check_alignment(&schema, &physical_ordering, &required_ordering)?; + let res = physical_ordering.satisfy(&required_ordering.into(), &schema); assert_eq!(res, expected); } @@ -858,7 +766,7 @@ mod tests { } #[tokio::test] - async fn test_can_skip_ordering_exhaustive() -> Result<()> { + async fn test_get_window_mode_exhaustive() -> Result<()> { let test_schema = create_test_schema3()?; // Columns a,c are nullable whereas b,d are not nullable. // Source is sorted by a ASC NULLS FIRST, b ASC NULLS FIRST, c ASC NULLS FIRST, d ASC NULLS FIRST @@ -891,7 +799,7 @@ mod tests { (vec!["a"], vec!["a", "c"], None), (vec!["a"], vec!["a", "b", "c"], Some(Sorted)), (vec!["b"], vec!["a"], Some(Linear)), - (vec!["b"], vec!["b"], None), + (vec!["b"], vec!["b"], Some(Linear)), (vec!["b"], vec!["c"], None), (vec!["b"], vec!["a", "b"], Some(Linear)), (vec!["b"], vec!["b", "c"], None), @@ -899,7 +807,7 @@ mod tests { (vec!["b"], vec!["a", "b", "c"], Some(Linear)), (vec!["c"], vec!["a"], Some(Linear)), (vec!["c"], vec!["b"], None), - (vec!["c"], vec!["c"], None), + (vec!["c"], vec!["c"], Some(Linear)), (vec!["c"], vec!["a", "b"], Some(Linear)), (vec!["c"], vec!["b", "c"], None), (vec!["c"], vec!["a", "c"], Some(Linear)), @@ -912,10 +820,10 @@ mod tests { (vec!["b", "a"], vec!["a", "c"], Some(Sorted)), (vec!["b", "a"], vec!["a", "b", "c"], Some(Sorted)), (vec!["c", "b"], vec!["a"], Some(Linear)), - (vec!["c", "b"], vec!["b"], None), - (vec!["c", "b"], vec!["c"], None), + (vec!["c", "b"], vec!["b"], Some(Linear)), + (vec!["c", "b"], vec!["c"], Some(Linear)), (vec!["c", "b"], vec!["a", "b"], Some(Linear)), - (vec!["c", "b"], vec!["b", "c"], None), + (vec!["c", "b"], vec!["b", "c"], Some(Linear)), (vec!["c", "b"], vec!["a", "c"], Some(Linear)), (vec!["c", "b"], vec!["a", "b", "c"], Some(Linear)), (vec!["c", "a"], vec!["a"], Some(PartiallySorted(vec![1]))), @@ -965,7 +873,7 @@ mod tests { order_by_exprs.push(PhysicalSortExpr { expr, options }); } let res = - can_skip_sort(&partition_by_exprs, &order_by_exprs, &exec_unbounded)?; + get_window_mode(&partition_by_exprs, &order_by_exprs, &exec_unbounded)?; // Since reversibility is not important in this test. Convert Option<(bool, PartitionSearchMode)> to Option let res = res.map(|(_, mode)| mode); assert_eq!( @@ -978,7 +886,7 @@ mod tests { } #[tokio::test] - async fn test_can_skip_ordering() -> Result<()> { + async fn test_get_window_mode() -> Result<()> { let test_schema = create_test_schema3()?; // Columns a,c are nullable whereas b,d are not nullable. // Source is sorted by a ASC NULLS FIRST, b ASC NULLS FIRST, c ASC NULLS FIRST, d ASC NULLS FIRST @@ -1129,7 +1037,7 @@ mod tests { } assert_eq!( - can_skip_sort(&partition_by_exprs, &order_by_exprs, &exec_unbounded)?, + get_window_mode(&partition_by_exprs, &order_by_exprs, &exec_unbounded)?, *expected, "Unexpected result for in unbounded test case#: {case_idx:?}, case: {test_case:?}" ); diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs index 4873778425d3..6c245f65ba4f 100644 --- a/datafusion/physical-plan/src/windows/window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs @@ -26,12 +26,13 @@ use crate::common::transpose; use crate::expressions::PhysicalSortExpr; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use crate::windows::{ - calc_requirements, get_ordered_partition_by_indices, window_ordering_equivalence, + calc_requirements, get_ordered_partition_by_indices, get_partition_by_sort_exprs, + window_equivalence_properties, }; use crate::{ - ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, EquivalenceProperties, - ExecutionPlan, Partitioning, PhysicalExpr, RecordBatchStream, - SendableRecordBatchStream, Statistics, WindowExpr, + ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, + Partitioning, PhysicalExpr, RecordBatchStream, SendableRecordBatchStream, Statistics, + WindowExpr, }; use arrow::compute::{concat, concat_batches}; @@ -43,10 +44,10 @@ use arrow::{ record_batch::RecordBatch, }; use datafusion_common::stats::Precision; -use datafusion_common::utils::{evaluate_partition_ranges, get_at_indices}; +use datafusion_common::utils::evaluate_partition_ranges; use datafusion_common::{internal_err, plan_err, DataFusionError, Result}; use datafusion_execution::TaskContext; -use datafusion_physical_expr::{OrderingEquivalenceProperties, PhysicalSortRequirement}; +use datafusion_physical_expr::{EquivalenceProperties, PhysicalSortRequirement}; use futures::stream::Stream; use futures::{ready, StreamExt}; @@ -107,9 +108,12 @@ impl WindowAggExec { // Hence returned `PhysicalSortExpr` corresponding to `PARTITION BY` columns can be used safely // to calculate partition separation points pub fn partition_by_sort_keys(&self) -> Result> { - // Partition by sort keys indices are stored in self.ordered_partition_by_indices. - let sort_keys = self.input.output_ordering().unwrap_or(&[]); - get_at_indices(sort_keys, &self.ordered_partition_by_indices) + let partition_by = self.window_expr()[0].partition_by(); + get_partition_by_sort_exprs( + &self.input, + partition_by, + &self.ordered_partition_by_indices, + ) } } @@ -206,13 +210,9 @@ impl ExecutionPlan for WindowAggExec { } } + /// Get the [`EquivalenceProperties`] within the plan fn equivalence_properties(&self) -> EquivalenceProperties { - self.input().equivalence_properties() - } - - /// Get the OrderingEquivalenceProperties within the plan - fn ordering_equivalence_properties(&self) -> OrderingEquivalenceProperties { - window_ordering_equivalence(&self.schema, &self.input, &self.window_expr) + window_equivalence_properties(&self.schema, &self.input, &self.window_expr) } fn with_new_children( diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index 32e10e58a7d7..72a4df66ebd7 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -19,9 +19,9 @@ name = "datafusion-proto" description = "Protobuf serialization of DataFusion logical plan expressions" keywords = ["arrow", "query", "sql"] +readme = "README.md" version = { workspace = true } edition = { workspace = true } -readme = { workspace = true } homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } @@ -44,14 +44,14 @@ parquet = ["datafusion/parquet", "datafusion-common/parquet"] arrow = { workspace = true } chrono = { workspace = true } datafusion = { path = "../core", version = "32.0.0" } -datafusion-common = { path = "../common", version = "32.0.0", default-features = false } -datafusion-expr = { path = "../expr", version = "32.0.0" } +datafusion-common = { workspace = true } +datafusion-expr = { workspace = true } object_store = { version = "0.7.0" } pbjson = { version = "0.5", optional = true } prost = "0.12.0" serde = { version = "1.0", optional = true } -serde_json = { version = "1.0", optional = true } +serde_json = { workspace = true, optional = true } [dev-dependencies] -doc-comment = "0.3" +doc-comment = { workspace = true } tokio = "1.18" diff --git a/datafusion/proto/README.md b/datafusion/proto/README.md index fd66d54aa2de..171aadb744d6 100644 --- a/datafusion/proto/README.md +++ b/datafusion/proto/README.md @@ -19,7 +19,7 @@ # DataFusion Proto -[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. This crate is a submodule of DataFusion that provides a protocol buffer format for representing query plans and expressions. diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index f4ab582593e0..9b6a0448f810 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -620,6 +620,7 @@ enum ScalarFunction { ArrayEmpty = 115; ArrayPopBack = 116; StringToArray = 117; + ToTimestampNanos = 118; } message ScalarFunctionNode { diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index e9e2fd0c0461..3eeb060f8d01 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -19772,6 +19772,7 @@ impl serde::Serialize for ScalarFunction { Self::ArrayEmpty => "ArrayEmpty", Self::ArrayPopBack => "ArrayPopBack", Self::StringToArray => "StringToArray", + Self::ToTimestampNanos => "ToTimestampNanos", }; serializer.serialize_str(variant) } @@ -19901,6 +19902,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "ArrayEmpty", "ArrayPopBack", "StringToArray", + "ToTimestampNanos", ]; struct GeneratedVisitor; @@ -20059,6 +20061,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "ArrayEmpty" => Ok(ScalarFunction::ArrayEmpty), "ArrayPopBack" => Ok(ScalarFunction::ArrayPopBack), "StringToArray" => Ok(ScalarFunction::StringToArray), + "ToTimestampNanos" => Ok(ScalarFunction::ToTimestampNanos), _ => Err(serde::de::Error::unknown_variant(value, FIELDS)), } } diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 1c821708a971..d18bacfb3bcc 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2465,6 +2465,7 @@ pub enum ScalarFunction { ArrayEmpty = 115, ArrayPopBack = 116, StringToArray = 117, + ToTimestampNanos = 118, } impl ScalarFunction { /// String value of the enum field names used in the ProtoBuf definition. @@ -2591,6 +2592,7 @@ impl ScalarFunction { ScalarFunction::ArrayEmpty => "ArrayEmpty", ScalarFunction::ArrayPopBack => "ArrayPopBack", ScalarFunction::StringToArray => "StringToArray", + ScalarFunction::ToTimestampNanos => "ToTimestampNanos", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2714,6 +2716,7 @@ impl ScalarFunction { "ArrayEmpty" => Some(Self::ArrayEmpty), "ArrayPopBack" => Some(Self::ArrayPopBack), "StringToArray" => Some(Self::StringToArray), + "ToTimestampNanos" => Some(Self::ToTimestampNanos), _ => None, } } diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index c87882ca72fc..26bd0163d0a3 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -54,7 +54,8 @@ use datafusion_expr::{ random, regexp_match, regexp_replace, repeat, replace, reverse, right, round, rpad, rtrim, sha224, sha256, sha384, sha512, signum, sin, sinh, split_part, sqrt, starts_with, strpos, substr, substring, tan, tanh, to_hex, to_timestamp_micros, - to_timestamp_millis, to_timestamp_seconds, translate, trim, trunc, upper, uuid, + to_timestamp_millis, to_timestamp_nanos, to_timestamp_seconds, translate, trim, + trunc, upper, uuid, window_frame::regularize, AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, Case, Cast, Expr, GetFieldAccess, GetIndexedField, GroupingSet, @@ -521,6 +522,7 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::Substr => Self::Substr, ScalarFunction::ToHex => Self::ToHex, ScalarFunction::ToTimestampMicros => Self::ToTimestampMicros, + ScalarFunction::ToTimestampNanos => Self::ToTimestampNanos, ScalarFunction::ToTimestampSeconds => Self::ToTimestampSeconds, ScalarFunction::Now => Self::Now, ScalarFunction::CurrentDate => Self::CurrentDate, @@ -1592,6 +1594,9 @@ pub fn parse_expr( ScalarFunction::ToTimestampMicros => { Ok(to_timestamp_micros(parse_expr(&args[0], registry)?)) } + ScalarFunction::ToTimestampNanos => { + Ok(to_timestamp_nanos(parse_expr(&args[0], registry)?)) + } ScalarFunction::ToTimestampSeconds => { Ok(to_timestamp_seconds(parse_expr(&args[0], registry)?)) } diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 125ced032e20..687b73cfc886 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1522,6 +1522,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::Substr => Self::Substr, BuiltinScalarFunction::ToHex => Self::ToHex, BuiltinScalarFunction::ToTimestampMicros => Self::ToTimestampMicros, + BuiltinScalarFunction::ToTimestampNanos => Self::ToTimestampNanos, BuiltinScalarFunction::ToTimestampSeconds => Self::ToTimestampSeconds, BuiltinScalarFunction::Now => Self::Now, BuiltinScalarFunction::CurrentDate => Self::CurrentDate, diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index a00a7f021352..b91a2ac1fbd7 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -19,9 +19,9 @@ name = "datafusion-sql" description = "DataFusion SQL Query Planner" keywords = ["datafusion", "sql", "parser", "planner"] +readme = "README.md" version = { workspace = true } edition = { workspace = true } -readme = { workspace = true } homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } @@ -39,13 +39,13 @@ unicode_expressions = [] [dependencies] arrow = { workspace = true } arrow-schema = { workspace = true } -datafusion-common = { path = "../common", version = "32.0.0", default-features = false } -datafusion-expr = { path = "../expr", version = "32.0.0" } -log = "^0.4" +datafusion-common = { workspace = true } +datafusion-expr = { workspace = true } +log = { workspace = true } sqlparser = { workspace = true } [dev-dependencies] -ctor = "0.2.0" -env_logger = "0.10" +ctor = { workspace = true } +env_logger = { workspace = true } paste = "^1.0" rstest = "0.18" diff --git a/datafusion/sql/README.md b/datafusion/sql/README.md index 2ad994e4eba5..256fa774b410 100644 --- a/datafusion/sql/README.md +++ b/datafusion/sql/README.md @@ -20,7 +20,7 @@ # DataFusion SQL Query Planner This crate provides a general purpose SQL query planner that can parse SQL and translate queries into logical -plans. Although this crate is used by the [DataFusion](df) query engine, it was designed to be easily usable from any +plans. Although this crate is used by the [DataFusion][df] query engine, it was designed to be easily usable from any project that requires a SQL query planner and does not make any assumptions about how the resulting logical plan will be translated to a physical plan. For example, there is no concept of row-based versus columnar execution in the logical plan. diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index 373388277351..c58b8319ceb7 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -36,44 +36,57 @@ use super::arrow_cast::ARROW_CAST_NAME; impl<'a, S: ContextProvider> SqlToRel<'a, S> { pub(super) fn sql_function_to_expr( &self, - mut function: SQLFunction, + function: SQLFunction, schema: &DFSchema, planner_context: &mut PlannerContext, ) -> Result { - let name = if function.name.0.len() > 1 { + let SQLFunction { + name, + args, + over, + distinct, + filter, + null_treatment, + special: _, // true if not called with trailing parens + order_by, + } = function; + + if let Some(null_treatment) = null_treatment { + return not_impl_err!("Null treatment in aggregate functions is not supported: {null_treatment}"); + } + + let name = if name.0.len() > 1 { // DF doesn't handle compound identifiers // (e.g. "foo.bar") for function names yet - function.name.to_string() + name.to_string() } else { - crate::utils::normalize_ident(function.name.0[0].clone()) + crate::utils::normalize_ident(name.0[0].clone()) }; // user-defined function (UDF) should have precedence in case it has the same name as a scalar built-in function if let Some(fm) = self.context_provider.get_function_meta(&name) { - let args = - self.function_args_to_expr(function.args, schema, planner_context)?; + let args = self.function_args_to_expr(args, schema, planner_context)?; return Ok(Expr::ScalarUDF(ScalarUDF::new(fm, args))); } // next, scalar built-in if let Ok(fun) = BuiltinScalarFunction::from_str(&name) { - let args = - self.function_args_to_expr(function.args, schema, planner_context)?; + let args = self.function_args_to_expr(args, schema, planner_context)?; return Ok(Expr::ScalarFunction(ScalarFunction::new(fun, args))); }; // If function is a window function (it has an OVER clause), // it shouldn't have ordering requirement as function argument // required ordering should be defined in OVER clause. - let is_function_window = function.over.is_some(); - if !function.order_by.is_empty() && is_function_window { + let is_function_window = over.is_some(); + if !order_by.is_empty() && is_function_window { return plan_err!( "Aggregate ORDER BY is not implemented for window functions" ); } // then, window function - if let Some(WindowType::WindowSpec(window)) = function.over.take() { + if let Some(WindowType::WindowSpec(window)) = over { let partition_by = window .partition_by .into_iter() @@ -97,11 +110,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { if let Ok(fun) = self.find_window_func(&name) { let expr = match fun { WindowFunction::AggregateFunction(aggregate_fun) => { - let args = self.function_args_to_expr( - function.args, - schema, - planner_context, - )?; + let args = + self.function_args_to_expr(args, schema, planner_context)?; Expr::WindowFunction(expr::WindowFunction::new( WindowFunction::AggregateFunction(aggregate_fun), @@ -113,11 +123,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } _ => Expr::WindowFunction(expr::WindowFunction::new( fun, - self.function_args_to_expr( - function.args, - schema, - planner_context, - )?, + self.function_args_to_expr(args, schema, planner_context)?, partition_by, order_by, window_frame, @@ -128,8 +134,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } else { // User defined aggregate functions (UDAF) have precedence in case it has the same name as a scalar built-in function if let Some(fm) = self.context_provider.get_aggregate_meta(&name) { - let args = - self.function_args_to_expr(function.args, schema, planner_context)?; + let args = self.function_args_to_expr(args, schema, planner_context)?; return Ok(Expr::AggregateUDF(expr::AggregateUDF::new( fm, args, None, None, ))); @@ -137,25 +142,23 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // next, aggregate built-ins if let Ok(fun) = AggregateFunction::from_str(&name) { - let distinct = function.distinct; - let order_by = self.order_by_to_sort_expr( - &function.order_by, - schema, - planner_context, - )?; + let order_by = + self.order_by_to_sort_expr(&order_by, schema, planner_context)?; let order_by = (!order_by.is_empty()).then_some(order_by); - let args = - self.function_args_to_expr(function.args, schema, planner_context)?; + let args = self.function_args_to_expr(args, schema, planner_context)?; + let filter: Option> = filter + .map(|e| self.sql_expr_to_logical_expr(*e, schema, planner_context)) + .transpose()? + .map(Box::new); return Ok(Expr::AggregateFunction(expr::AggregateFunction::new( - fun, args, distinct, None, order_by, + fun, args, distinct, filter, order_by, ))); }; // Special case arrow_cast (as its type is dependent on its argument value) if name == ARROW_CAST_NAME { - let args = - self.function_args_to_expr(function.args, schema, planner_context)?; + let args = self.function_args_to_expr(args, schema, planner_context)?; return super::arrow_cast::create_arrow_cast(args, schema); } } diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 26184834874a..1cf0fc133f04 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -222,7 +222,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { planner_context, ), - SQLExpr::Cast { expr, data_type } => Ok(Expr::Cast(Cast::new( + SQLExpr::Cast { + expr, data_type, .. + } => Ok(Expr::Cast(Cast::new( Box::new(self.sql_expr_to_logical_expr( *expr, schema, @@ -231,7 +233,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { self.convert_data_type(&data_type)?, ))), - SQLExpr::TryCast { expr, data_type } => Ok(Expr::TryCast(TryCast::new( + SQLExpr::TryCast { + expr, data_type, .. + } => Ok(Expr::TryCast(TryCast::new( Box::new(self.sql_expr_to_logical_expr( *expr, schema, @@ -412,6 +416,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { expr, trim_where, trim_what, + .. } => self.sql_trim_to_expr( *expr, trim_where, @@ -477,10 +482,36 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { self.parse_array_agg(array_agg, schema, planner_context) } + SQLExpr::Struct { values, fields } => { + self.parse_struct(values, fields, schema, planner_context) + } + _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"), } } + fn parse_struct( + &self, + values: Vec, + fields: Vec, + input_schema: &DFSchema, + planner_context: &mut PlannerContext, + ) -> Result { + if !fields.is_empty() { + return not_impl_err!("Struct fields are not supported yet"); + } + let args = values + .into_iter() + .map(|value| { + self.sql_expr_to_logical_expr(value, input_schema, planner_context) + }) + .collect::>>()?; + Ok(Expr::ScalarFunction(ScalarFunction::new( + BuiltinScalarFunction::Struct, + args, + ))) + } + fn parse_array_agg( &self, array_agg: ArrayAgg, diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index f7d8307d33a0..ca5e260aee05 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -24,8 +24,8 @@ use arrow_schema::*; use datafusion_common::field_not_found; use datafusion_common::internal_err; use datafusion_expr::WindowUDF; -use sqlparser::ast::ExactNumberInfo; use sqlparser::ast::TimezoneInfo; +use sqlparser::ast::{ArrayElemTypeDef, ExactNumberInfo}; use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption}; use sqlparser::ast::{DataType as SQLDataType, Ident, ObjectName, TableAlias}; @@ -297,14 +297,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { pub(crate) fn convert_data_type(&self, sql_type: &SQLDataType) -> Result { match sql_type { - SQLDataType::Array(Some(inner_sql_type)) => { + SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type)) + | SQLDataType::Array(ArrayElemTypeDef::SquareBracket(inner_sql_type)) => { let data_type = self.convert_simple_data_type(inner_sql_type)?; Ok(DataType::List(Arc::new(Field::new( "field", data_type, true, )))) } - SQLDataType::Array(None) => { + SQLDataType::Array(ArrayElemTypeDef::None) => { not_impl_err!("Arrays with unspecified type is not supported") } other => self.convert_simple_data_type(other), @@ -330,7 +331,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SQLDataType::Char(_) | SQLDataType::Varchar(_) | SQLDataType::Text - | SQLDataType::String => Ok(DataType::Utf8), + | SQLDataType::String(_) => Ok(DataType::Utf8), SQLDataType::Timestamp(None, tz_info) => { let tz = if matches!(tz_info, TimezoneInfo::Tz) || matches!(tz_info, TimezoneInfo::WithTimeZone) @@ -400,7 +401,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { | SQLDataType::Dec(_) | SQLDataType::BigNumeric(_) | SQLDataType::BigDecimal(_) - | SQLDataType::Clob(_) => not_impl_err!( + | SQLDataType::Clob(_) + | SQLDataType::Bytes(_) + | SQLDataType::Int64 + | SQLDataType::Float64 + | SQLDataType::Struct(_) + => not_impl_err!( "Unsupported SQL type {sql_type:?}" ), } diff --git a/datafusion/sql/src/set_expr.rs b/datafusion/sql/src/set_expr.rs index e771a5ba3de4..7300d49be0f5 100644 --- a/datafusion/sql/src/set_expr.rs +++ b/datafusion/sql/src/set_expr.rs @@ -44,6 +44,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { SetQuantifier::AllByName => { return not_impl_err!("UNION ALL BY NAME not implemented") } + SetQuantifier::DistinctByName => { + return not_impl_err!("UNION DISTINCT BY NAME not implemented") + } }; let left_plan = self.set_expr_to_plan(*left, planner_context)?; diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index f8504a487a66..9d9c55361a5e 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -31,12 +31,11 @@ use arrow_schema::DataType; use datafusion_common::file_options::StatementOptions; use datafusion_common::parsers::CompressionTypeVariant; use datafusion_common::{ - not_impl_err, plan_datafusion_err, plan_err, unqualified_field_not_found, Column, - Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError, ExprSchema, - OwnedTableReference, Result, SchemaReference, TableReference, ToDFSchema, + not_impl_err, plan_datafusion_err, plan_err, unqualified_field_not_found, + Constraints, DFField, DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference, + Result, SchemaReference, TableReference, ToDFSchema, }; use datafusion_expr::dml::{CopyOptions, CopyTo}; -use datafusion_expr::expr::Placeholder; use datafusion_expr::expr_rewriter::normalize_col_with_schemas_and_ambiguity_check; use datafusion_expr::logical_plan::builder::project; use datafusion_expr::logical_plan::DdlStatement; @@ -431,6 +430,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { table, on, returning, + ignore, } => { if or.is_some() { plan_err!("Inserts with or clauses not supported")?; @@ -450,6 +450,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { if returning.is_some() { plan_err!("Insert-returning clause not supported")?; } + if ignore { + plan_err!("Insert-ignore clause not supported")?; + } let _ = into; // optional keyword doesn't change behavior self.insert_to_plan(table_name, columns, source, overwrite) } @@ -472,6 +475,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { selection, returning, from, + order_by, + limit, } => { if !tables.is_empty() { plan_err!("DELETE not supported")?; @@ -484,6 +489,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { if returning.is_some() { plan_err!("Delete-returning clause not yet supported")?; } + + if !order_by.is_empty() { + plan_err!("Delete-order-by clause not yet supported")?; + } + + if limit.is_some() { + plan_err!("Delete-limit clause not yet supported")?; + } + let table_name = self.get_delete_target(from)?; self.delete_to_plan(table_name, selection) } @@ -964,17 +978,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // Do a table lookup to verify the table exists let table_name = self.object_name_to_table_reference(table_name)?; let table_source = self.context_provider.get_table_source(table_name.clone())?; - let arrow_schema = (*table_source.schema()).clone(); let table_schema = Arc::new(DFSchema::try_from_qualified_schema( table_name.clone(), - &arrow_schema, + &table_source.schema(), )?); - let values = table_schema.fields().iter().map(|f| { - ( - f.name().clone(), - ast::Expr::Identifier(ast::Ident::from(f.name().as_str())), - ) - }); // Overwrite with assignment expressions let mut planner_context = PlannerContext::new(); @@ -992,17 +999,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { }) .collect::>>()?; - let values = values - .into_iter() - .map(|(k, v)| { - let val = assign_map.remove(&k).unwrap_or(v); - (k, val) - }) - .collect::>(); - - // Build scan - let from = from.unwrap_or(table); - let scan = self.plan_from_tables(vec![from], &mut planner_context)?; + // Build scan, join with from table if it exists. + let mut input_tables = vec![table]; + input_tables.extend(from); + let scan = self.plan_from_tables(input_tables, &mut planner_context)?; // Filter let source = match predicate_expr { @@ -1010,43 +1010,49 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { Some(predicate_expr) => { let filter_expr = self.sql_to_expr( predicate_expr, - &table_schema, + scan.schema(), &mut planner_context, )?; let mut using_columns = HashSet::new(); expr_to_columns(&filter_expr, &mut using_columns)?; let filter_expr = normalize_col_with_schemas_and_ambiguity_check( filter_expr, - &[&[&table_schema]], + &[&[&scan.schema()]], &[using_columns], )?; LogicalPlan::Filter(Filter::try_new(filter_expr, Arc::new(scan))?) } }; - // Projection - let mut exprs = vec![]; - for (col_name, expr) in values.into_iter() { - let expr = self.sql_to_expr(expr, &table_schema, &mut planner_context)?; - let expr = match expr { - datafusion_expr::Expr::Placeholder(Placeholder { - ref id, - ref data_type, - }) => match data_type { - None => { - let dt = table_schema.data_type(&Column::from_name(&col_name))?; - datafusion_expr::Expr::Placeholder(Placeholder::new( - id.clone(), - Some(dt.clone()), - )) + // Build updated values for each column, using the previous value if not modified + let exprs = table_schema + .fields() + .iter() + .map(|field| { + let expr = match assign_map.remove(field.name()) { + Some(new_value) => { + let mut expr = self.sql_to_expr( + new_value, + source.schema(), + &mut planner_context, + )?; + // Update placeholder's datatype to the type of the target column + if let datafusion_expr::Expr::Placeholder(placeholder) = &mut expr + { + placeholder.data_type = placeholder + .data_type + .take() + .or_else(|| Some(field.data_type().clone())); + } + // Cast to target column type, if necessary + expr.cast_to(field.data_type(), source.schema())? } - Some(_) => expr, - }, - _ => expr, - }; - let expr = expr.alias(col_name); - exprs.push(expr); - } + None => datafusion_expr::Expr::Column(field.qualified_column()), + }; + Ok(expr.alias(field.name())) + }) + .collect::>>()?; + let source = project(source, exprs)?; let plan = LogicalPlan::Dml(DmlStatement { diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 2446ee0a5841..ff6dca7eef2a 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -1287,6 +1287,16 @@ fn select_simple_aggregate_repeated_aggregate_with_unique_aliases() { ); } +#[test] +fn select_simple_aggregate_respect_nulls() { + let sql = "SELECT MIN(age) RESPECT NULLS FROM person"; + let err = logical_plan(sql).expect_err("query should have failed"); + + assert_contains!( + err.strip_backtrace(), + "This feature is not implemented: Null treatment in aggregate functions is not supported: RESPECT NULLS" + ); +} #[test] fn select_from_typed_string_values() { quick_test( diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml index 454f99942f52..07debf179529 100644 --- a/datafusion/sqllogictest/Cargo.toml +++ b/datafusion/sqllogictest/Cargo.toml @@ -21,7 +21,7 @@ edition = { workspace = true } homepage = { workspace = true } license = { workspace = true } name = "datafusion-sqllogictest" -readme = { workspace = true } +readme = "README.md" repository = { workspace = true } rust-version = { workspace = true } version = { workspace = true } @@ -32,24 +32,24 @@ path = "src/lib.rs" [dependencies] arrow = { workspace = true } -async-trait = "0.1.41" -bigdecimal = "0.4.1" +async-trait = { workspace = true } +bigdecimal = { workspace = true } bytes = { version = "1.4.0", optional = true } chrono = { workspace = true, optional = true } datafusion = { path = "../core", version = "32.0.0" } -datafusion-common = { path = "../common", version = "32.0.0", default-features = false } +datafusion-common = { workspace = true } futures = { version = "0.3.28" } -half = "2.2.1" -itertools = "0.11" -log = "^0.4" -object_store = "0.7.0" +half = { workspace = true } +itertools = { workspace = true } +log = { workspace = true } +object_store = { workspace = true } postgres-protocol = { version = "0.6.4", optional = true } postgres-types = { version = "0.2.4", optional = true } rust_decimal = { version = "1.27.0" } sqllogictest = "0.17.0" sqlparser = { workspace = true } -tempfile = "3" -thiserror = "1.0.44" +tempfile = { workspace = true } +thiserror = { workspace = true } tokio = { version = "1.0" } tokio-postgres = { version = "0.7.7", optional = true } @@ -58,8 +58,8 @@ avro = ["datafusion/avro"] postgres = ["bytes", "chrono", "tokio-postgres", "postgres-types", "postgres-protocol"] [dev-dependencies] -env_logger = "0.10" -num_cpus = "1.13.0" +env_logger = { workspace = true } +num_cpus = { workspace = true } [[test]] harness = false diff --git a/datafusion/sqllogictest/README.md b/datafusion/sqllogictest/README.md index 3e94859d35a7..0349ed852f46 100644 --- a/datafusion/sqllogictest/README.md +++ b/datafusion/sqllogictest/README.md @@ -17,19 +17,26 @@ under the License. --> -#### Overview +# DataFusion sqllogictest -This is the Datafusion implementation of [sqllogictest](https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki). We -use [sqllogictest-rs](https://github.com/risinglightdb/sqllogictest-rs) as a parser/runner of `.slt` files -in [`test_files`](test_files). +[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. -#### Testing setup +This crate is a submodule of DataFusion that contains an implementation of [sqllogictest](https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki). + +[df]: https://crates.io/crates/datafusion + +## Overview + +This crate uses [sqllogictest-rs](https://github.com/risinglightdb/sqllogictest-rs) to parse and run `.slt` files in the +[`test_files`](test_files) directory of this crate. + +## Testing setup 1. `rustup update stable` DataFusion uses the latest stable release of rust 2. `git submodule init` 3. `git submodule update` -#### Running tests: TLDR Examples +## Running tests: TLDR Examples ```shell # Run all tests @@ -56,7 +63,7 @@ cargo test --test sqllogictests -- ddl --complete RUST_LOG=debug cargo test --test sqllogictests -- ddl ``` -#### Cookbook: Adding Tests +## Cookbook: Adding Tests 1. Add queries @@ -95,11 +102,11 @@ SELECT * from foo; Assuming it looks good, check it in! -#### Reference +# Reference -#### Running tests: Validation Mode +## Running tests: Validation Mode -In this model, `sqllogictests` runs the statements and queries in a `.slt` file, comparing the expected output in the +In this mode, `sqllogictests` runs the statements and queries in a `.slt` file, comparing the expected output in the file to the output produced by that run. For example, to run all tests suites in validation mode @@ -115,10 +122,10 @@ sqllogictests also supports `cargo test` style substring matches on file names t cargo test --test sqllogictests -- information ``` -#### Running tests: Postgres compatibility +## Running tests: Postgres compatibility Test files that start with prefix `pg_compat_` verify compatibility -with Postgres by running the same script files both with DataFusion and with Posgres +with Postgres by running the same script files both with DataFusion and with Postgres In order to run the sqllogictests running against a previously running Postgres instance, do: @@ -145,7 +152,7 @@ docker run \ postgres ``` -#### Running Tests: `tpch` +## Running Tests: `tpch` Test files in `tpch` directory runs against the `TPCH` data set (SF = 0.1), which must be generated before running. You can use following @@ -165,7 +172,7 @@ Then you need to add `INCLUDE_TPCH=true` to run tpch tests: INCLUDE_TPCH=true cargo test --test sqllogictests ``` -#### Updating tests: Completion Mode +## Updating tests: Completion Mode In test script completion mode, `sqllogictests` reads a prototype script and runs the statements and queries against the database engine. The output is a full script that is a copy of the prototype script with result inserted. @@ -177,7 +184,7 @@ You can update the tests / generate expected output by passing the `--complete` cargo test --test sqllogictests -- ddl --complete ``` -#### Running tests: `scratchdir` +## Running tests: `scratchdir` The DataFusion sqllogictest runner automatically creates a directory named `test_files/scratch/`, creating it if needed and @@ -190,7 +197,7 @@ Tests that need to write temporary files should write (only) to this directory to ensure they do not interfere with others concurrently running tests. -#### `.slt` file format +## `.slt` file format [`sqllogictest`] was originally written for SQLite to verify the correctness of SQL queries against the SQLite engine. The format is designed @@ -247,7 +254,7 @@ query > :warning: It is encouraged to either apply `order by`, or use `rowsort` for queries without explicit `order by` > clauses. -##### Example +### Example ```sql # group_by_distinct diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 777b634e93b1..6217f12279a9 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -2020,14 +2020,6 @@ statement ok drop table t; - - -statement error DataFusion error: Execution error: Table 't_source' doesn't exist\. -drop table t_source; - -statement error DataFusion error: Execution error: Table 't' doesn't exist\. -drop table t; - query I select median(a) from (select 1 as a where 1=0); ---- @@ -2199,6 +2191,26 @@ NULL 1 10.1 10.1 10.1 10.1 0 NULL statement ok set datafusion.sql_parser.dialect = 'Generic'; +## Multiple distinct aggregates and dictionaries +statement ok +create table dict_test as values (1, arrow_cast('foo', 'Dictionary(Int32, Utf8)')), (2, arrow_cast('bar', 'Dictionary(Int32, Utf8)')); + +query I? +select * from dict_test; +---- +1 foo +2 bar + +query II +select count(distinct column1), count(distinct column2) from dict_test group by column1; +---- +1 1 +1 1 + +statement ok +drop table dict_test; + + # Prepare the table with dictionary values for testing statement ok CREATE TABLE value(x bigint) AS VALUES (1), (2), (3), (1), (3), (4), (5), (2); @@ -2282,6 +2294,13 @@ select max(x_dict) from value_dict group by x_dict % 2 order by max(x_dict); 4 5 +statement ok +drop table value + +statement ok +drop table value_dict + + # bool aggregation statement ok CREATE TABLE value_bool(x boolean, g int) AS VALUES (NULL, 0), (false, 0), (true, 0), (false, 1), (true, 2), (NULL, 3); diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 621cb4a8f4c0..b5601a22226c 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -209,6 +209,17 @@ AS VALUES (make_array([28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]), [28, 29, 30], [37, 38, 39], 10) ; +query TTT +select arrow_typeof(column1), arrow_typeof(column2), arrow_typeof(column3) from arrays; +---- +List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) +List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) +List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) +List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) +List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) +List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) +List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) + # arrays table query ??? select column1, column2, column3 from arrays; diff --git a/datafusion/sqllogictest/test_files/groupby.slt b/datafusion/sqllogictest/test_files/groupby.slt index 5cb3ac2f8135..cb0b0b7c76a5 100644 --- a/datafusion/sqllogictest/test_files/groupby.slt +++ b/datafusion/sqllogictest/test_files/groupby.slt @@ -2019,7 +2019,7 @@ SortPreservingMergeExec: [col0@0 ASC NULLS LAST] ------AggregateExec: mode=FinalPartitioned, gby=[col0@0 as col0, col1@1 as col1, col2@2 as col2], aggr=[LAST_VALUE(r.col1)] --------CoalesceBatchesExec: target_batch_size=8192 ----------RepartitionExec: partitioning=Hash([col0@0, col1@1, col2@2], 4), input_partitions=4 -------------AggregateExec: mode=Partial, gby=[col0@0 as col0, col1@1 as col1, col2@2 as col2], aggr=[LAST_VALUE(r.col1)], ordering_mode=PartiallyOrdered +------------AggregateExec: mode=Partial, gby=[col0@0 as col0, col1@1 as col1, col2@2 as col2], aggr=[LAST_VALUE(r.col1)], ordering_mode=PartiallySorted([0]) --------------SortExec: expr=[col0@3 ASC NULLS LAST] ----------------CoalesceBatchesExec: target_batch_size=8192 ------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(col0@0, col0@0)] @@ -2118,7 +2118,7 @@ Projection: annotated_data_infinite2.a, annotated_data_infinite2.b, SUM(annotate ----TableScan: annotated_data_infinite2 projection=[a, b, c] physical_plan ProjectionExec: expr=[a@1 as a, b@0 as b, SUM(annotated_data_infinite2.c)@2 as summation1] ---AggregateExec: mode=Single, gby=[b@1 as b, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=FullyOrdered +--AggregateExec: mode=Single, gby=[b@1 as b, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=Sorted ----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true @@ -2149,7 +2149,7 @@ Projection: annotated_data_infinite2.a, annotated_data_infinite2.d, SUM(annotate ----TableScan: annotated_data_infinite2 projection=[a, c, d] physical_plan ProjectionExec: expr=[a@1 as a, d@0 as d, SUM(annotated_data_infinite2.c) ORDER BY [annotated_data_infinite2.a DESC NULLS FIRST]@2 as summation1] ---AggregateExec: mode=Single, gby=[d@2 as d, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=PartiallyOrdered +--AggregateExec: mode=Single, gby=[d@2 as d, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=PartiallySorted([1]) ----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST], has_header=true query III @@ -2182,7 +2182,7 @@ Projection: annotated_data_infinite2.a, annotated_data_infinite2.b, FIRST_VALUE( ----TableScan: annotated_data_infinite2 projection=[a, b, c] physical_plan ProjectionExec: expr=[a@0 as a, b@1 as b, FIRST_VALUE(annotated_data_infinite2.c) ORDER BY [annotated_data_infinite2.a DESC NULLS FIRST]@2 as first_c] ---AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[FIRST_VALUE(annotated_data_infinite2.c)], ordering_mode=FullyOrdered +--AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[FIRST_VALUE(annotated_data_infinite2.c)], ordering_mode=Sorted ----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true query III @@ -2208,7 +2208,7 @@ Projection: annotated_data_infinite2.a, annotated_data_infinite2.b, LAST_VALUE(a ----TableScan: annotated_data_infinite2 projection=[a, b, c] physical_plan ProjectionExec: expr=[a@0 as a, b@1 as b, LAST_VALUE(annotated_data_infinite2.c) ORDER BY [annotated_data_infinite2.a DESC NULLS FIRST]@2 as last_c] ---AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[LAST_VALUE(annotated_data_infinite2.c)], ordering_mode=FullyOrdered +--AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[LAST_VALUE(annotated_data_infinite2.c)], ordering_mode=Sorted ----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true query III @@ -2235,7 +2235,7 @@ Projection: annotated_data_infinite2.a, annotated_data_infinite2.b, LAST_VALUE(a ----TableScan: annotated_data_infinite2 projection=[a, b, c] physical_plan ProjectionExec: expr=[a@0 as a, b@1 as b, LAST_VALUE(annotated_data_infinite2.c)@2 as last_c] ---AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[LAST_VALUE(annotated_data_infinite2.c)], ordering_mode=FullyOrdered +--AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b], aggr=[LAST_VALUE(annotated_data_infinite2.c)], ordering_mode=Sorted ----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true query III @@ -2354,9 +2354,9 @@ SELECT ARRAY_AGG(s.amount ORDER BY s.amount DESC) AS amounts, GROUP BY s.country # test_ordering_sensitive_aggregation4 -# If aggregators can work with bounded memory (FullyOrdered or PartiallyOrdered mode), we should append requirement to +# If aggregators can work with bounded memory (Sorted or PartiallySorted mode), we should append requirement to # the existing ordering. This enables us to still work with bounded memory, and also satisfy aggregation requirement. -# This test checks for whether we can satisfy aggregation requirement in FullyOrdered mode. +# This test checks for whether we can satisfy aggregation requirement in Sorted mode. query TT EXPLAIN SELECT s.country, ARRAY_AGG(s.amount ORDER BY s.amount DESC) AS amounts, SUM(s.amount) AS sum1 @@ -2373,7 +2373,7 @@ Projection: s.country, ARRAY_AGG(s.amount) ORDER BY [s.amount DESC NULLS FIRST] --------TableScan: sales_global projection=[country, amount] physical_plan ProjectionExec: expr=[country@0 as country, ARRAY_AGG(s.amount) ORDER BY [s.amount DESC NULLS FIRST]@1 as amounts, SUM(s.amount)@2 as sum1] ---AggregateExec: mode=Single, gby=[country@0 as country], aggr=[ARRAY_AGG(s.amount), SUM(s.amount)], ordering_mode=FullyOrdered +--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[ARRAY_AGG(s.amount), SUM(s.amount)], ordering_mode=Sorted ----SortExec: expr=[country@0 ASC NULLS LAST,amount@1 DESC] ------MemoryExec: partitions=1, partition_sizes=[1] @@ -2390,9 +2390,9 @@ GRC [80.0, 30.0] 110 TUR [100.0, 75.0] 175 # test_ordering_sensitive_aggregation5 -# If aggregators can work with bounded memory (FullyOrdered or PartiallyOrdered mode), we should be append requirement to +# If aggregators can work with bounded memory (Sorted or PartiallySorted mode), we should be append requirement to # the existing ordering. This enables us to still work with bounded memory, and also satisfy aggregation requirement. -# This test checks for whether we can satisfy aggregation requirement in PartiallyOrdered mode. +# This test checks for whether we can satisfy aggregation requirement in PartiallySorted mode. query TT EXPLAIN SELECT s.country, s.zip_code, ARRAY_AGG(s.amount ORDER BY s.amount DESC) AS amounts, SUM(s.amount) AS sum1 @@ -2409,7 +2409,7 @@ Projection: s.country, s.zip_code, ARRAY_AGG(s.amount) ORDER BY [s.amount DESC N --------TableScan: sales_global projection=[zip_code, country, amount] physical_plan ProjectionExec: expr=[country@0 as country, zip_code@1 as zip_code, ARRAY_AGG(s.amount) ORDER BY [s.amount DESC NULLS FIRST]@2 as amounts, SUM(s.amount)@3 as sum1] ---AggregateExec: mode=Single, gby=[country@1 as country, zip_code@0 as zip_code], aggr=[ARRAY_AGG(s.amount), SUM(s.amount)], ordering_mode=PartiallyOrdered +--AggregateExec: mode=Single, gby=[country@1 as country, zip_code@0 as zip_code], aggr=[ARRAY_AGG(s.amount), SUM(s.amount)], ordering_mode=PartiallySorted([0]) ----SortExec: expr=[country@1 ASC NULLS LAST,amount@2 DESC] ------MemoryExec: partitions=1, partition_sizes=[1] @@ -2426,7 +2426,7 @@ GRC 0 [80.0, 30.0] 110 TUR 1 [100.0, 75.0] 175 # test_ordering_sensitive_aggregation6 -# If aggregators can work with bounded memory (FullyOrdered or PartiallyOrdered mode), we should be append requirement to +# If aggregators can work with bounded memory (FullySorted or PartiallySorted mode), we should be append requirement to # the existing ordering. When group by expressions contain aggregation requirement, we shouldn't append redundant expression. # Hence in the final plan SortExec should be SortExec: expr=[country@0 DESC] not SortExec: expr=[country@0 ASC NULLS LAST,country@0 DESC] query TT @@ -2445,7 +2445,7 @@ Projection: s.country, ARRAY_AGG(s.amount) ORDER BY [s.country DESC NULLS FIRST] --------TableScan: sales_global projection=[country, amount] physical_plan ProjectionExec: expr=[country@0 as country, ARRAY_AGG(s.amount) ORDER BY [s.country DESC NULLS FIRST]@1 as amounts, SUM(s.amount)@2 as sum1] ---AggregateExec: mode=Single, gby=[country@0 as country], aggr=[ARRAY_AGG(s.amount), SUM(s.amount)], ordering_mode=FullyOrdered +--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[ARRAY_AGG(s.amount), SUM(s.amount)], ordering_mode=Sorted ----SortExec: expr=[country@0 ASC NULLS LAST] ------MemoryExec: partitions=1, partition_sizes=[1] @@ -2480,7 +2480,7 @@ Projection: s.country, ARRAY_AGG(s.amount) ORDER BY [s.country DESC NULLS FIRST, --------TableScan: sales_global projection=[country, amount] physical_plan ProjectionExec: expr=[country@0 as country, ARRAY_AGG(s.amount) ORDER BY [s.country DESC NULLS FIRST, s.amount DESC NULLS FIRST]@1 as amounts, SUM(s.amount)@2 as sum1] ---AggregateExec: mode=Single, gby=[country@0 as country], aggr=[ARRAY_AGG(s.amount), SUM(s.amount)], ordering_mode=FullyOrdered +--AggregateExec: mode=Single, gby=[country@0 as country], aggr=[ARRAY_AGG(s.amount), SUM(s.amount)], ordering_mode=Sorted ----SortExec: expr=[country@0 ASC NULLS LAST,amount@1 DESC] ------MemoryExec: partitions=1, partition_sizes=[1] @@ -3646,6 +3646,18 @@ GROUP BY d; 0 4 0 9 +query III rowsort +SELECT d, FIRST_VALUE(c ORDER BY a DESC, c DESC) as first_a, + LAST_VALUE(c ORDER BY c DESC) as last_c +FROM multiple_ordered_table +GROUP BY d; +---- +0 95 0 +1 90 4 +2 97 1 +3 99 15 +4 98 9 + query TT EXPLAIN SELECT c FROM multiple_ordered_table @@ -3656,6 +3668,49 @@ Sort: multiple_ordered_table.c ASC NULLS LAST --TableScan: multiple_ordered_table projection=[c] physical_plan CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], has_header=true +statement ok +set datafusion.execution.target_partitions = 1; + +query TT +EXPLAIN SELECT LAST_VALUE(l.d ORDER BY l.a) AS amount_usd +FROM multiple_ordered_table AS l +INNER JOIN ( + SELECT *, ROW_NUMBER() OVER (ORDER BY r.a) as row_n FROM multiple_ordered_table AS r +) +ON l.d = r.d AND + l.a >= r.a - 10 +GROUP BY row_n +ORDER BY row_n +---- +logical_plan +Projection: amount_usd +--Sort: row_n ASC NULLS LAST +----Projection: LAST_VALUE(l.d) ORDER BY [l.a ASC NULLS LAST] AS amount_usd, row_n +------Aggregate: groupBy=[[row_n]], aggr=[[LAST_VALUE(l.d) ORDER BY [l.a ASC NULLS LAST]]] +--------Projection: l.a, l.d, row_n +----------Inner Join: l.d = r.d Filter: CAST(l.a AS Int64) >= CAST(r.a AS Int64) - Int64(10) +------------SubqueryAlias: l +--------------TableScan: multiple_ordered_table projection=[a, d] +------------Projection: r.a, r.d, ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_n +--------------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +----------------SubqueryAlias: r +------------------TableScan: multiple_ordered_table projection=[a, d] +physical_plan +ProjectionExec: expr=[amount_usd@0 as amount_usd] +--ProjectionExec: expr=[LAST_VALUE(l.d) ORDER BY [l.a ASC NULLS LAST]@1 as amount_usd, row_n@0 as row_n] +----AggregateExec: mode=Single, gby=[row_n@2 as row_n], aggr=[LAST_VALUE(l.d)], ordering_mode=Sorted +------ProjectionExec: expr=[a@0 as a, d@1 as d, row_n@4 as row_n] +--------CoalesceBatchesExec: target_batch_size=2 +----------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d@1, d@1)], filter=CAST(a@0 AS Int64) >= CAST(a@1 AS Int64) - 10 +------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true +------------ProjectionExec: expr=[a@0 as a, d@1 as d, ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n] +--------------BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow }], mode=[Sorted] +----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true + +# reset partition number to 8. +statement ok +set datafusion.execution.target_partitions = 8; + # Create an external table with primary key # column c statement ok @@ -3685,11 +3740,11 @@ logical_plan Aggregate: groupBy=[[multiple_ordered_table_with_pk.c, multiple_ordered_table_with_pk.b]], aggr=[[SUM(CAST(multiple_ordered_table_with_pk.d AS Int64))]] --TableScan: multiple_ordered_table_with_pk projection=[b, c, d] physical_plan -AggregateExec: mode=FinalPartitioned, gby=[c@0 as c, b@1 as b], aggr=[SUM(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallyOrdered +AggregateExec: mode=FinalPartitioned, gby=[c@0 as c, b@1 as b], aggr=[SUM(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0]) --SortExec: expr=[c@0 ASC NULLS LAST] ----CoalesceBatchesExec: target_batch_size=2 ------RepartitionExec: partitioning=Hash([c@0, b@1], 8), input_partitions=8 ---------AggregateExec: mode=Partial, gby=[c@1 as c, b@0 as b], aggr=[SUM(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallyOrdered +--------AggregateExec: mode=Partial, gby=[c@1 as c, b@0 as b], aggr=[SUM(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0]) ----------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 ------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], has_header=true @@ -3726,10 +3781,68 @@ logical_plan Aggregate: groupBy=[[multiple_ordered_table_with_pk.c, multiple_ordered_table_with_pk.b]], aggr=[[SUM(CAST(multiple_ordered_table_with_pk.d AS Int64))]] --TableScan: multiple_ordered_table_with_pk projection=[b, c, d] physical_plan -AggregateExec: mode=FinalPartitioned, gby=[c@0 as c, b@1 as b], aggr=[SUM(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallyOrdered +AggregateExec: mode=FinalPartitioned, gby=[c@0 as c, b@1 as b], aggr=[SUM(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0]) --SortExec: expr=[c@0 ASC NULLS LAST] ----CoalesceBatchesExec: target_batch_size=2 ------RepartitionExec: partitioning=Hash([c@0, b@1], 8), input_partitions=8 ---------AggregateExec: mode=Partial, gby=[c@1 as c, b@0 as b], aggr=[SUM(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallyOrdered +--------AggregateExec: mode=Partial, gby=[c@1 as c, b@0 as b], aggr=[SUM(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0]) ----------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 ------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], has_header=true + +# Tests for single distinct to group by optimization rule +statement ok +CREATE TABLE t(x int) AS VALUES (1), (2), (1); + +statement ok +create table t1(x bigint,y int) as values (9223372036854775807,2), (9223372036854775806,2); + +query II +SELECT SUM(DISTINCT x), MAX(DISTINCT x) from t GROUP BY x ORDER BY x; +---- +1 1 +2 2 + +query II +SELECT MAX(DISTINCT x), SUM(DISTINCT x) from t GROUP BY x ORDER BY x; +---- +1 1 +2 2 + +query TT +EXPLAIN SELECT SUM(DISTINCT CAST(x AS DOUBLE)), MAX(DISTINCT x) FROM t1 GROUP BY y; +---- +logical_plan +Projection: SUM(DISTINCT t1.x), MAX(DISTINCT t1.x) +--Aggregate: groupBy=[[t1.y]], aggr=[[SUM(DISTINCT CAST(t1.x AS Float64)), MAX(DISTINCT t1.x)]] +----TableScan: t1 projection=[x, y] +physical_plan +ProjectionExec: expr=[SUM(DISTINCT t1.x)@1 as SUM(DISTINCT t1.x), MAX(DISTINCT t1.x)@2 as MAX(DISTINCT t1.x)] +--AggregateExec: mode=FinalPartitioned, gby=[y@0 as y], aggr=[SUM(DISTINCT t1.x), MAX(DISTINCT t1.x)] +----CoalesceBatchesExec: target_batch_size=2 +------RepartitionExec: partitioning=Hash([y@0], 8), input_partitions=8 +--------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 +----------AggregateExec: mode=Partial, gby=[y@1 as y], aggr=[SUM(DISTINCT t1.x), MAX(DISTINCT t1.x)] +------------MemoryExec: partitions=1, partition_sizes=[1] + +query TT +EXPLAIN SELECT SUM(DISTINCT CAST(x AS DOUBLE)), MAX(DISTINCT CAST(x AS DOUBLE)) FROM t1 GROUP BY y; +---- +logical_plan +Projection: SUM(alias1) AS SUM(DISTINCT t1.x), MAX(alias1) AS MAX(DISTINCT t1.x) +--Aggregate: groupBy=[[t1.y]], aggr=[[SUM(alias1), MAX(alias1)]] +----Aggregate: groupBy=[[t1.y, CAST(t1.x AS Float64)t1.x AS t1.x AS alias1]], aggr=[[]] +------Projection: CAST(t1.x AS Float64) AS CAST(t1.x AS Float64)t1.x, t1.y +--------TableScan: t1 projection=[x, y] +physical_plan +ProjectionExec: expr=[SUM(alias1)@1 as SUM(DISTINCT t1.x), MAX(alias1)@2 as MAX(DISTINCT t1.x)] +--AggregateExec: mode=FinalPartitioned, gby=[y@0 as y], aggr=[SUM(alias1), MAX(alias1)] +----CoalesceBatchesExec: target_batch_size=2 +------RepartitionExec: partitioning=Hash([y@0], 8), input_partitions=8 +--------AggregateExec: mode=Partial, gby=[y@0 as y], aggr=[SUM(alias1), MAX(alias1)] +----------AggregateExec: mode=FinalPartitioned, gby=[y@0 as y, alias1@1 as alias1], aggr=[] +------------CoalesceBatchesExec: target_batch_size=2 +--------------RepartitionExec: partitioning=Hash([y@0, alias1@1], 8), input_partitions=8 +----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1 +------------------AggregateExec: mode=Partial, gby=[y@1 as y, CAST(t1.x AS Float64)t1.x@0 as alias1], aggr=[] +--------------------ProjectionExec: expr=[CAST(x@0 AS Float64) as CAST(t1.x AS Float64)t1.x, y@1 as y] +----------------------MemoryExec: partitions=1, partition_sizes=[1] diff --git a/datafusion/sqllogictest/test_files/insert_to_external.slt b/datafusion/sqllogictest/test_files/insert_to_external.slt index b2206e987864..8b01a14568e7 100644 --- a/datafusion/sqllogictest/test_files/insert_to_external.slt +++ b/datafusion/sqllogictest/test_files/insert_to_external.slt @@ -40,8 +40,44 @@ STORED AS CSV WITH HEADER ROW LOCATION '../../testing/data/csv/aggregate_test_100.csv' -# test_insert_into +statement ok +create table dictionary_encoded_values as values +('a', arrow_cast('foo', 'Dictionary(Int32, Utf8)')), ('b', arrow_cast('bar', 'Dictionary(Int32, Utf8)')); + +query TTT +describe dictionary_encoded_values; +---- +column1 Utf8 YES +column2 Dictionary(Int32, Utf8) YES + +statement ok +CREATE EXTERNAL TABLE dictionary_encoded_parquet_partitioned( + a varchar, + b varchar, +) +STORED AS parquet +LOCATION 'test_files/scratch/insert_to_external/parquet_types_partitioned' +PARTITIONED BY (b) +OPTIONS( +create_local_path 'true', +insert_mode 'append_new_files', +); + +query TT +insert into dictionary_encoded_parquet_partitioned +select * from dictionary_encoded_values +---- +2 + +query TT +select * from dictionary_encoded_parquet_partitioned order by (a); +---- +a foo +b bar + + +# test_insert_into statement ok set datafusion.execution.target_partitions = 8; diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index cc90e6431389..c794c4da4310 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -3283,6 +3283,127 @@ CoalesceBatchesExec: target_batch_size=2 ------BoundedWindowAggExec: wdw=[ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }], mode=[Sorted] --------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true +query TT +EXPLAIN SELECT l.a, LAST_VALUE(r.b ORDER BY r.a ASC NULLS FIRST) as last_col1 +FROM annotated_data as l +JOIN annotated_data as r +ON l.a = r.a +GROUP BY l.a, l.b, l.c +ORDER BY l.a ASC NULLS FIRST; +---- +logical_plan +Sort: l.a ASC NULLS FIRST +--Projection: l.a, LAST_VALUE(r.b) ORDER BY [r.a ASC NULLS FIRST] AS last_col1 +----Aggregate: groupBy=[[l.a, l.b, l.c]], aggr=[[LAST_VALUE(r.b) ORDER BY [r.a ASC NULLS FIRST]]] +------Inner Join: l.a = r.a +--------SubqueryAlias: l +----------TableScan: annotated_data projection=[a, b, c] +--------SubqueryAlias: r +----------TableScan: annotated_data projection=[a, b] +physical_plan +ProjectionExec: expr=[a@0 as a, LAST_VALUE(r.b) ORDER BY [r.a ASC NULLS FIRST]@3 as last_col1] +--AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b, c@2 as c], aggr=[LAST_VALUE(r.b)], ordering_mode=PartiallySorted([0]) +----CoalesceBatchesExec: target_batch_size=2 +------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0)] +--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true +--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST], has_header=true + +# create a table where there more than one valid ordering +# that describes table. +statement ok +CREATE EXTERNAL TABLE multiple_ordered_table ( + a0 INTEGER, + a INTEGER, + b INTEGER, + c INTEGER, + d INTEGER +) +STORED AS CSV +WITH HEADER ROW +WITH ORDER (a ASC, b ASC) +WITH ORDER (c ASC) +LOCATION '../core/tests/data/window_2.csv'; + +query TT +EXPLAIN SELECT LAST_VALUE(l.d ORDER BY l.a) AS amount_usd +FROM multiple_ordered_table AS l +INNER JOIN ( + SELECT *, ROW_NUMBER() OVER (ORDER BY r.a) as row_n FROM multiple_ordered_table AS r +) +ON l.d = r.d AND + l.a >= r.a - 10 +GROUP BY row_n +ORDER BY row_n +---- +logical_plan +Projection: amount_usd +--Sort: row_n ASC NULLS LAST +----Projection: LAST_VALUE(l.d) ORDER BY [l.a ASC NULLS LAST] AS amount_usd, row_n +------Aggregate: groupBy=[[row_n]], aggr=[[LAST_VALUE(l.d) ORDER BY [l.a ASC NULLS LAST]]] +--------Projection: l.a, l.d, row_n +----------Inner Join: l.d = r.d Filter: CAST(l.a AS Int64) >= CAST(r.a AS Int64) - Int64(10) +------------SubqueryAlias: l +--------------TableScan: multiple_ordered_table projection=[a, d] +------------Projection: r.a, r.d, ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_n +--------------WindowAggr: windowExpr=[[ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +----------------SubqueryAlias: r +------------------TableScan: multiple_ordered_table projection=[a, d] +physical_plan +ProjectionExec: expr=[amount_usd@0 as amount_usd] +--ProjectionExec: expr=[LAST_VALUE(l.d) ORDER BY [l.a ASC NULLS LAST]@1 as amount_usd, row_n@0 as row_n] +----AggregateExec: mode=Single, gby=[row_n@2 as row_n], aggr=[LAST_VALUE(l.d)], ordering_mode=Sorted +------ProjectionExec: expr=[a@0 as a, d@1 as d, row_n@4 as row_n] +--------CoalesceBatchesExec: target_batch_size=2 +----------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d@1, d@1)], filter=CAST(a@0 AS Int64) >= CAST(a@1 AS Int64) - 10 +------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true +------------ProjectionExec: expr=[a@0 as a, d@1 as d, ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n] +--------------BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow }], mode=[Sorted] +----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true + +# run query above in multiple partitions +statement ok +set datafusion.execution.target_partitions = 2; + +# use bounded variants +statement ok +set datafusion.optimizer.prefer_existing_sort = true; + +query TT +EXPLAIN SELECT l.a, LAST_VALUE(r.b ORDER BY r.a ASC NULLS FIRST) as last_col1 +FROM annotated_data as l +JOIN annotated_data as r +ON l.a = r.a +GROUP BY l.a, l.b, l.c +ORDER BY l.a ASC NULLS FIRST; +---- +logical_plan +Sort: l.a ASC NULLS FIRST +--Projection: l.a, LAST_VALUE(r.b) ORDER BY [r.a ASC NULLS FIRST] AS last_col1 +----Aggregate: groupBy=[[l.a, l.b, l.c]], aggr=[[LAST_VALUE(r.b) ORDER BY [r.a ASC NULLS FIRST]]] +------Inner Join: l.a = r.a +--------SubqueryAlias: l +----------TableScan: annotated_data projection=[a, b, c] +--------SubqueryAlias: r +----------TableScan: annotated_data projection=[a, b] +physical_plan +SortPreservingMergeExec: [a@0 ASC] +--SortExec: expr=[a@0 ASC] +----ProjectionExec: expr=[a@0 as a, LAST_VALUE(r.b) ORDER BY [r.a ASC NULLS FIRST]@3 as last_col1] +------AggregateExec: mode=FinalPartitioned, gby=[a@0 as a, b@1 as b, c@2 as c], aggr=[LAST_VALUE(r.b)] +--------CoalesceBatchesExec: target_batch_size=2 +----------RepartitionExec: partitioning=Hash([a@0, b@1, c@2], 2), input_partitions=2 +------------AggregateExec: mode=Partial, gby=[a@0 as a, b@1 as b, c@2 as c], aggr=[LAST_VALUE(r.b)], ordering_mode=PartiallySorted([0]) +--------------CoalesceBatchesExec: target_batch_size=2 +----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a@0)] +------------------CoalesceBatchesExec: target_batch_size=2 +--------------------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2 +----------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 +------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true +------------------CoalesceBatchesExec: target_batch_size=2 +--------------------SortPreservingRepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2, sort_exprs=a@0 ASC,b@1 ASC NULLS LAST +----------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 +------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST], has_header=true + #### # Config teardown #### @@ -3296,5 +3417,9 @@ set datafusion.optimizer.prefer_hash_join = true; statement ok set datafusion.execution.target_partitions = 2; +statement ok +set datafusion.optimizer.prefer_existing_sort = false; + statement ok drop table annotated_data; + diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt index e5c1a828492a..ecb7fe13fcf4 100644 --- a/datafusion/sqllogictest/test_files/scalar.slt +++ b/datafusion/sqllogictest/test_files/scalar.slt @@ -1878,3 +1878,51 @@ query T SELECT CONCAT('Hello', 'World') ---- HelloWorld + +statement ok +CREATE TABLE simple_string( + letter STRING, + letter2 STRING +) as VALUES + ('A', 'APACHE'), + ('B', 'APACHE'), + ('C', 'APACHE'), + ('D', 'APACHE') +; + +query TT +EXPLAIN SELECT letter, letter = LEFT('APACHE', 1) FROM simple_string; +---- +logical_plan +Projection: simple_string.letter, simple_string.letter = Utf8("A") AS simple_string.letter = left(Utf8("APACHE"),Int64(1)) +--TableScan: simple_string projection=[letter] +physical_plan +ProjectionExec: expr=[letter@0 as letter, letter@0 = A as simple_string.letter = left(Utf8("APACHE"),Int64(1))] +--MemoryExec: partitions=1, partition_sizes=[1] + +query TB +SELECT letter, letter = LEFT('APACHE', 1) FROM simple_string; + ---- +---- +A true +B false +C false +D false + +query TT +EXPLAIN SELECT letter, letter = LEFT(letter2, 1) FROM simple_string; +---- +logical_plan +Projection: simple_string.letter, simple_string.letter = left(simple_string.letter2, Int64(1)) +--TableScan: simple_string projection=[letter, letter2] +physical_plan +ProjectionExec: expr=[letter@0 as letter, letter@0 = left(letter2@1, 1) as simple_string.letter = left(simple_string.letter2,Int64(1))] +--MemoryExec: partitions=1, partition_sizes=[1] + +query TB +SELECT letter, letter = LEFT(letter2, 1) FROM simple_string; +---- +A true +B false +C false +D false diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index 1d427479763a..98ea061c731b 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -848,6 +848,26 @@ statement error DataFusion error: Error during planning: EXCLUDE or EXCEPT conta SELECT * EXCLUDE(a, a) FROM table1 +# if EXCEPT all the columns, query should still succeed but return empty +statement ok +SELECT * EXCEPT(a, b, c, d) +FROM table1 + +# EXCLUDE order shouldn't matter +query II +SELECT * EXCLUDE(b, a) +FROM table1 +ORDER BY c +LIMIT 5 +---- +100 1000 +200 2000 + +# EXCLUDE with out of order but duplicate columns should error +statement error DataFusion error: Error during planning: EXCLUDE or EXCEPT contains duplicate column names +SELECT * EXCLUDE(d, b, c, a, a, b, c, d) +FROM table1 + # run below query in multi partitions statement ok set datafusion.execution.target_partitions = 2; diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index fea61b076ebc..e186aa12f7a9 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -217,7 +217,7 @@ SELECT to_timestamp_micros(ts) FROM ts_data_secs LIMIT 3 # to nanos query P -SELECT to_timestamp(ts) FROM ts_data_secs LIMIT 3 +SELECT to_timestamp_nanos(ts) FROM ts_data_secs LIMIT 3 ---- 2020-09-08T13:42:29 2020-09-08T12:42:29 @@ -244,7 +244,7 @@ SELECT to_timestamp_seconds(ts) FROM ts_data_micros LIMIT 3 2020-09-08T11:42:29 -# Original column is micros, convert to nanos and check timestamp +# Original column is micros, convert to seconds and check timestamp query P SELECT to_timestamp(ts) FROM ts_data_micros LIMIT 3 @@ -266,7 +266,7 @@ SELECT from_unixtime(ts / 1000000000) FROM ts_data LIMIT 3; # to_timestamp query I -SELECT COUNT(*) FROM ts_data_nanos where ts > to_timestamp('2020-09-08T12:00:00+00:00') +SELECT COUNT(*) FROM ts_data_nanos where ts > timestamp '2020-09-08T12:00:00+00:00' ---- 2 @@ -375,7 +375,7 @@ set datafusion.optimizer.skip_failed_rules = true query P select to_timestamp(a) from (select to_timestamp(1) as a) A; ---- -1970-01-01T00:00:00.000000001 +1970-01-01T00:00:01 # cast_to_timestamp_seconds_twice query P @@ -383,7 +383,6 @@ select to_timestamp_seconds(a) from (select to_timestamp_seconds(1) as a)A ---- 1970-01-01T00:00:01 - # cast_to_timestamp_millis_twice query P select to_timestamp_millis(a) from (select to_timestamp_millis(1) as a)A; @@ -396,11 +395,17 @@ select to_timestamp_micros(a) from (select to_timestamp_micros(1) as a)A; ---- 1970-01-01T00:00:00.000001 +# cast_to_timestamp_nanos_twice +query P +select to_timestamp_nanos(a) from (select to_timestamp_nanos(1) as a)A; +---- +1970-01-01T00:00:00.000000001 + # to_timestamp_i32 query P select to_timestamp(cast (1 as int)); ---- -1970-01-01T00:00:00.000000001 +1970-01-01T00:00:01 # to_timestamp_micros_i32 query P @@ -408,6 +413,12 @@ select to_timestamp_micros(cast (1 as int)); ---- 1970-01-01T00:00:00.000001 +# to_timestamp_nanos_i32 +query P +select to_timestamp_nanos(cast (1 as int)); +---- +1970-01-01T00:00:00.000000001 + # to_timestamp_millis_i32 query P select to_timestamp_millis(cast (1 as int)); @@ -1776,3 +1787,9 @@ query B SELECT TIMESTAMPTZ '2020-01-01 00:00:00Z' = TIMESTAMP '2020-01-01' ---- true + +# verify to_timestamp edge cases to be in sync with postgresql +query PPPPP +SELECT to_timestamp(null), to_timestamp(-62125747200), to_timestamp(0), to_timestamp(1926632005177), to_timestamp(1926632005) +---- +NULL 0001-04-25T00:00:00 1970-01-01T00:00:00 +63022-07-16T12:59:37 2031-01-19T23:33:25 diff --git a/datafusion/sqllogictest/test_files/tpch/q17.slt.part b/datafusion/sqllogictest/test_files/tpch/q17.slt.part index 50661b9b10a8..4d4aa4b1395f 100644 --- a/datafusion/sqllogictest/test_files/tpch/q17.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/q17.slt.part @@ -58,21 +58,19 @@ ProjectionExec: expr=[CAST(SUM(lineitem.l_extendedprice)@0 AS Float64) / 7 as av --------ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice] ----------CoalesceBatchesExec: target_batch_size=8192 ------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * AVG(lineitem.l_quantity)@1 ---------------CoalesceBatchesExec: target_batch_size=8192 -----------------RepartitionExec: partitioning=Hash([p_partkey@2], 4), input_partitions=4 -------------------ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@3 as p_partkey] +--------------ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@3 as p_partkey] +----------------CoalesceBatchesExec: target_batch_size=8192 +------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)] --------------------CoalesceBatchesExec: target_batch_size=8192 -----------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)] -------------------------CoalesceBatchesExec: target_batch_size=8192 ---------------------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 -----------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_quantity, l_extendedprice], has_header=false -------------------------CoalesceBatchesExec: target_batch_size=8192 ---------------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 -----------------------------ProjectionExec: expr=[p_partkey@0 as p_partkey] -------------------------------CoalesceBatchesExec: target_batch_size=8192 ---------------------------------FilterExec: p_brand@1 = Brand#23 AND p_container@2 = MED BOX -----------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 -------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_brand, p_container], has_header=false +----------------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 +------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_quantity, l_extendedprice], has_header=false +--------------------CoalesceBatchesExec: target_batch_size=8192 +----------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 +------------------------ProjectionExec: expr=[p_partkey@0 as p_partkey] +--------------------------CoalesceBatchesExec: target_batch_size=8192 +----------------------------FilterExec: p_brand@1 = Brand#23 AND p_container@2 = MED BOX +------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +--------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_brand, p_container], has_header=false --------------ProjectionExec: expr=[CAST(0.2 * CAST(AVG(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * AVG(lineitem.l_quantity), l_partkey@0 as l_partkey] ----------------AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[AVG(lineitem.l_quantity)] ------------------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/update.slt b/datafusion/sqllogictest/test_files/update.slt new file mode 100644 index 000000000000..cb8c6a4fac28 --- /dev/null +++ b/datafusion/sqllogictest/test_files/update.slt @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +########## +## Update Tests +########## + +statement ok +create table t1(a int, b varchar, c double, d int); + +# Turn off the optimizer to make the logical plan closer to the initial one +statement ok +set datafusion.optimizer.max_passes = 0; + +query TT +explain update t1 set a=1, b=2, c=3.0, d=NULL; +---- +logical_plan +Dml: op=[Update] table=[t1] +--Projection: CAST(Int64(1) AS Int32) AS a, CAST(Int64(2) AS Utf8) AS b, Float64(3) AS c, CAST(NULL AS Int32) AS d +----TableScan: t1 + +query TT +explain update t1 set a=c+1, b=a, c=c+1.0, d=b; +---- +logical_plan +Dml: op=[Update] table=[t1] +--Projection: CAST(t1.c + CAST(Int64(1) AS Float64) AS Int32) AS a, CAST(t1.a AS Utf8) AS b, t1.c + Float64(1) AS c, CAST(t1.b AS Int32) AS d +----TableScan: t1 + +statement ok +create table t2(a int, b varchar, c double, d int); + +## set from subquery +query TT +explain update t1 set b = (select max(b) from t2 where t1.a = t2.a) +---- +logical_plan +Dml: op=[Update] table=[t1] +--Projection: t1.a AS a, () AS b, t1.c AS c, t1.d AS d +----Subquery: +------Projection: MAX(t2.b) +--------Aggregate: groupBy=[[]], aggr=[[MAX(t2.b)]] +----------Filter: outer_ref(t1.a) = t2.a +------------TableScan: t2 +----TableScan: t1 + +# set from other table +query TT +explain update t1 set b = t2.b, c = t2.a, d = 1 from t2 where t1.a = t2.a and t1.b > 'foo' and t2.c > 1.0; +---- +logical_plan +Dml: op=[Update] table=[t1] +--Projection: t1.a AS a, t2.b AS b, CAST(t2.a AS Float64) AS c, CAST(Int64(1) AS Int32) AS d +----Filter: t1.a = t2.a AND t1.b > Utf8("foo") AND t2.c > Float64(1) +------CrossJoin: +--------TableScan: t1 +--------TableScan: t2 + +statement ok +create table t3(a int, b varchar, c double, d int); + +# set from mutiple tables, sqlparser only supports from one table +query error DataFusion error: SQL error: ParserError\("Expected end of statement, found: ,"\) +explain update t1 set b = t2.b, c = t3.a, d = 1 from t2, t3 where t1.a = t2.a and t1.a = t3.a; \ No newline at end of file diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 213f6daaef3e..c7060433d999 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -357,7 +357,7 @@ Sort: d.b ASC NULLS LAST physical_plan SortPreservingMergeExec: [b@0 ASC NULLS LAST] --ProjectionExec: expr=[b@0 as b, MAX(d.a)@1 as max_a, MAX(d.seq)@2 as MAX(d.seq)] -----AggregateExec: mode=SinglePartitioned, gby=[b@2 as b], aggr=[MAX(d.a), MAX(d.seq)], ordering_mode=FullyOrdered +----AggregateExec: mode=SinglePartitioned, gby=[b@2 as b], aggr=[MAX(d.a), MAX(d.seq)], ordering_mode=Sorted ------ProjectionExec: expr=[ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as seq, a@0 as a, b@1 as b] --------BoundedWindowAggExec: wdw=[ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }], mode=[Sorted] ----------SortExec: expr=[b@1 ASC NULLS LAST,a@0 ASC NULLS LAST] @@ -1957,7 +1957,7 @@ Sort: aggregate_test_100.c1 ASC NULLS LAST ----WindowAggr: windowExpr=[[ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]] ------TableScan: aggregate_test_100 projection=[c1] physical_plan -SortPreservingMergeExec: [c1@0 ASC NULLS LAST] +SortPreservingMergeExec: [c1@0 ASC NULLS LAST,rn1@1 ASC NULLS LAST] --ProjectionExec: expr=[c1@0 as c1, ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1] ----BoundedWindowAggExec: wdw=[ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }], mode=[Sorted] ------SortExec: expr=[c1@0 ASC NULLS LAST] @@ -2132,15 +2132,12 @@ ProjectionExec: expr=[c9@1 as c9, SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER B ----BoundedWindowAggExec: wdw=[SUM(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted] ------ProjectionExec: expr=[c2@0 as c2, c9@2 as c9, c1_alias@3 as c1_alias, SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@4 as SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, SUM(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@6 as SUM(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING] --------WindowAggExec: wdw=[SUM(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "SUM(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)) }] -----------SortExec: expr=[c2@0 ASC NULLS LAST,c1_alias@3 ASC NULLS LAST,c9@2 ASC NULLS LAST,c8@1 ASC NULLS LAST] -------------ProjectionExec: expr=[c2@1 as c2, c8@2 as c8, c9@3 as c9, c1_alias@4 as c1_alias, SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING] ---------------BoundedWindowAggExec: wdw=[SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted] -----------------WindowAggExec: wdw=[SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)) }] -------------------SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST,c9@3 ASC NULLS LAST,c8@2 ASC NULLS LAST] ---------------------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c8@2 as c8, c9@3 as c9, c1@0 as c1_alias] -----------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], has_header=true - - +----------ProjectionExec: expr=[c2@1 as c2, c8@2 as c8, c9@3 as c9, c1_alias@4 as c1_alias, SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING] +------------BoundedWindowAggExec: wdw=[SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted] +--------------WindowAggExec: wdw=[SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)) }] +----------------SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST,c9@3 ASC NULLS LAST,c8@2 ASC NULLS LAST] +------------------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c8@2 as c8, c9@3 as c9, c1@0 as c1_alias] +--------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], has_header=true query IIIII SELECT c9, @@ -2338,10 +2335,11 @@ Limit: skip=0, fetch=5 ----------TableScan: aggregate_test_100 projection=[c9] physical_plan GlobalLimitExec: skip=0, fetch=5 ---ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1] -----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted] -------SortExec: expr=[c9@0 DESC] ---------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true +--SortExec: TopK(fetch=5), expr=[rn1@1 ASC NULLS LAST,c9@0 ASC NULLS LAST] +----ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1] +------BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted] +--------SortExec: expr=[c9@0 DESC] +----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true query II SELECT c9, rn1 FROM (SELECT c9, @@ -3241,7 +3239,7 @@ physical_plan ProjectionExec: expr=[SUM(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, SUM(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum2, SUM(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum3, SUM(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum4] --BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow }], mode=[Linear] ----CoalesceBatchesExec: target_batch_size=4096 -------SortPreservingRepartitionExec: partitioning=Hash([d@1], 2), input_partitions=2, sort_exprs=a@0 ASC NULLS LAST,b ASC NULLS LAST,c ASC NULLS LAST +------SortPreservingRepartitionExec: partitioning=Hash([d@1], 2), input_partitions=2, sort_exprs=a@0 ASC NULLS LAST --------ProjectionExec: expr=[a@0 as a, d@3 as d, SUM(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as SUM(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, SUM(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as SUM(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, SUM(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as SUM(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] ----------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow }], mode=[Sorted] ------------CoalesceBatchesExec: target_batch_size=4096 @@ -3379,6 +3377,96 @@ SELECT ORDER BY C3 LIMIT 5 +# Create a source where there is multiple orderings. +statement ok +CREATE EXTERNAL TABLE multiple_ordered_table ( + a0 INTEGER, + a INTEGER, + b INTEGER, + c INTEGER, + d INTEGER +) +STORED AS CSV +WITH HEADER ROW +WITH ORDER (a ASC, b ASC) +WITH ORDER (c ASC) +LOCATION '../core/tests/data/window_2.csv'; + +# All of the window execs in the physical plan should work in the +# sorted mode. +query TT +EXPLAIN SELECT MIN(d) OVER(ORDER BY c ASC) as min1, + MAX(d) OVER(PARTITION BY b, a ORDER BY c ASC) as max1 +FROM multiple_ordered_table +---- +logical_plan +Projection: MIN(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS min1, MAX(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS max1 +--WindowAggr: windowExpr=[[MIN(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +----Projection: multiple_ordered_table.c, multiple_ordered_table.d, MAX(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +------WindowAggr: windowExpr=[[MAX(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +--------TableScan: multiple_ordered_table projection=[a, b, c, d] +physical_plan +ProjectionExec: expr=[MIN(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as min1, MAX(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as max1] +--BoundedWindowAggExec: wdw=[MIN(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MIN(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow }], mode=[Sorted] +----ProjectionExec: expr=[c@2 as c, d@3 as d, MAX(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as MAX(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] +------BoundedWindowAggExec: wdw=[MAX(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MAX(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow }], mode=[Sorted] +--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true + +query TT +EXPLAIN SELECT MAX(c) OVER(PARTITION BY d ORDER BY c ASC) as max_c +FROM( + SELECT * + FROM multiple_ordered_table + WHERE d=0) +---- +logical_plan +Projection: MAX(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS max_c +--WindowAggr: windowExpr=[[MAX(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +----Filter: multiple_ordered_table.d = Int32(0) +------TableScan: multiple_ordered_table projection=[c, d], partial_filters=[multiple_ordered_table.d = Int32(0)] +physical_plan +ProjectionExec: expr=[MAX(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as max_c] +--BoundedWindowAggExec: wdw=[MAX(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MAX(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow }], mode=[Sorted] +----CoalesceBatchesExec: target_batch_size=4096 +------FilterExec: d@1 = 0 +--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], output_ordering=[c@0 ASC NULLS LAST], has_header=true + +query TT +explain SELECT SUM(d) OVER(PARTITION BY c ORDER BY a ASC) +FROM multiple_ordered_table; +---- +logical_plan +Projection: SUM(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +--WindowAggr: windowExpr=[[SUM(CAST(multiple_ordered_table.d AS Int64)) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +----TableScan: multiple_ordered_table projection=[a, c, d] +physical_plan +ProjectionExec: expr=[SUM(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as SUM(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] +--BoundedWindowAggExec: wdw=[SUM(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow }], mode=[Sorted] +----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true + +query TT +explain SELECT SUM(d) OVER(PARTITION BY c, a ORDER BY b ASC) +FROM multiple_ordered_table; +---- +logical_plan +Projection: SUM(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +--WindowAggr: windowExpr=[[SUM(CAST(multiple_ordered_table.d AS Int64)) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +----TableScan: multiple_ordered_table projection=[a, b, c, d] +physical_plan +ProjectionExec: expr=[SUM(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as SUM(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] +--BoundedWindowAggExec: wdw=[SUM(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow }], mode=[Sorted] +----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true + +query I +SELECT SUM(d) OVER(PARTITION BY c, a ORDER BY b ASC) +FROM multiple_ordered_table +LIMIT 5; +---- +0 +2 +0 +0 +1 # simple window query query II diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml index 7c4ff868cfcd..585cb6e43d15 100644 --- a/datafusion/substrait/Cargo.toml +++ b/datafusion/substrait/Cargo.toml @@ -18,9 +18,9 @@ [package] name = "datafusion-substrait" description = "DataFusion Substrait Producer and Consumer" +readme = "README.md" version = { workspace = true } edition = { workspace = true } -readme = { workspace = true } homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } @@ -30,9 +30,9 @@ rust-version = "1.70" [dependencies] async-recursion = "1.0" chrono = { workspace = true } -datafusion = { version = "32.0.0", path = "../core" } -itertools = "0.11" -object_store = "0.7.0" +datafusion = { workspace = true } +itertools = { workspace = true } +object_store = { workspace = true } prost = "0.12" prost-types = "0.12" substrait = "0.18.0" diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs index ae65a2c7d94a..a15121652452 100644 --- a/datafusion/substrait/src/logical_plan/consumer.rs +++ b/datafusion/substrait/src/logical_plan/consumer.rs @@ -177,7 +177,7 @@ fn split_eq_and_noneq_join_predicate_with_nulls_equality( /// Convert Substrait Plan to DataFusion DataFrame pub async fn from_substrait_plan( - ctx: &mut SessionContext, + ctx: &SessionContext, plan: &Plan, ) -> Result { // Register function extension @@ -219,7 +219,7 @@ pub async fn from_substrait_plan( /// Convert Substrait Rel to DataFusion DataFrame #[async_recursion] pub async fn from_substrait_rel( - ctx: &mut SessionContext, + ctx: &SessionContext, rel: &Rel, extensions: &HashMap, ) -> Result { diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs index 757bddf9fe58..e3c6f94d43d5 100644 --- a/datafusion/substrait/src/logical_plan/producer.rs +++ b/datafusion/substrait/src/logical_plan/producer.rs @@ -326,7 +326,7 @@ pub fn to_substrait_rel( left: Some(left), right: Some(right), r#type: join_type as i32, - expression: join_expr.clone(), + expression: join_expr, post_join_filter: None, advanced_extension: None, }))), diff --git a/datafusion/substrait/src/physical_plan/consumer.rs b/datafusion/substrait/src/physical_plan/consumer.rs index 7788ba0a69de..1dab1f9d5e39 100644 --- a/datafusion/substrait/src/physical_plan/consumer.rs +++ b/datafusion/substrait/src/physical_plan/consumer.rs @@ -38,7 +38,7 @@ use substrait::proto::{ /// Convert Substrait Rel to DataFusion ExecutionPlan #[async_recursion] pub async fn from_substrait_rel( - _ctx: &mut SessionContext, + _ctx: &SessionContext, rel: &Rel, _extensions: &HashMap, ) -> Result> { diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs index 32416125de24..ca2b4d48c460 100644 --- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs @@ -606,7 +606,7 @@ async fn new_test_grammar() -> Result<()> { #[tokio::test] async fn extension_logical_plan() -> Result<()> { - let mut ctx = create_context().await?; + let ctx = create_context().await?; let validation_bytes = "MockUserDefinedLogicalPlan".as_bytes().to_vec(); let ext_plan = LogicalPlan::Extension(Extension { node: Arc::new(MockUserDefinedLogicalPlan { @@ -617,7 +617,7 @@ async fn extension_logical_plan() -> Result<()> { }); let proto = to_substrait_plan(&ext_plan, &ctx)?; - let plan2 = from_substrait_plan(&mut ctx, &proto).await?; + let plan2 = from_substrait_plan(&ctx, &proto).await?; let plan1str = format!("{ext_plan:?}"); let plan2str = format!("{plan2:?}"); @@ -712,11 +712,11 @@ async fn verify_post_join_filter_value(proto: Box) -> Result<()> { } async fn assert_expected_plan(sql: &str, expected_plan_str: &str) -> Result<()> { - let mut ctx = create_context().await?; + let ctx = create_context().await?; let df = ctx.sql(sql).await?; let plan = df.into_optimized_plan()?; let proto = to_substrait_plan(&plan, &ctx)?; - let plan2 = from_substrait_plan(&mut ctx, &proto).await?; + let plan2 = from_substrait_plan(&ctx, &proto).await?; let plan2 = ctx.state().optimize(&plan2)?; let plan2str = format!("{plan2:?}"); assert_eq!(expected_plan_str, &plan2str); @@ -724,11 +724,11 @@ async fn assert_expected_plan(sql: &str, expected_plan_str: &str) -> Result<()> } async fn roundtrip_fill_na(sql: &str) -> Result<()> { - let mut ctx = create_context().await?; + let ctx = create_context().await?; let df = ctx.sql(sql).await?; let plan1 = df.into_optimized_plan()?; let proto = to_substrait_plan(&plan1, &ctx)?; - let plan2 = from_substrait_plan(&mut ctx, &proto).await?; + let plan2 = from_substrait_plan(&ctx, &proto).await?; let plan2 = ctx.state().optimize(&plan2)?; // Format plan string and replace all None's with 0 @@ -743,15 +743,15 @@ async fn test_alias(sql_with_alias: &str, sql_no_alias: &str) -> Result<()> { // Since we ignore the SubqueryAlias in the producer, the result should be // the same as producing a Substrait plan from the same query without aliases // sql_with_alias -> substrait -> logical plan = sql_no_alias -> substrait -> logical plan - let mut ctx = create_context().await?; + let ctx = create_context().await?; let df_a = ctx.sql(sql_with_alias).await?; let proto_a = to_substrait_plan(&df_a.into_optimized_plan()?, &ctx)?; - let plan_with_alias = from_substrait_plan(&mut ctx, &proto_a).await?; + let plan_with_alias = from_substrait_plan(&ctx, &proto_a).await?; let df = ctx.sql(sql_no_alias).await?; let proto = to_substrait_plan(&df.into_optimized_plan()?, &ctx)?; - let plan = from_substrait_plan(&mut ctx, &proto).await?; + let plan = from_substrait_plan(&ctx, &proto).await?; println!("{plan_with_alias:#?}"); println!("{plan:#?}"); @@ -763,11 +763,11 @@ async fn test_alias(sql_with_alias: &str, sql_no_alias: &str) -> Result<()> { } async fn roundtrip(sql: &str) -> Result<()> { - let mut ctx = create_context().await?; + let ctx = create_context().await?; let df = ctx.sql(sql).await?; let plan = df.into_optimized_plan()?; let proto = to_substrait_plan(&plan, &ctx)?; - let plan2 = from_substrait_plan(&mut ctx, &proto).await?; + let plan2 = from_substrait_plan(&ctx, &proto).await?; let plan2 = ctx.state().optimize(&plan2)?; println!("{plan:#?}"); @@ -780,11 +780,11 @@ async fn roundtrip(sql: &str) -> Result<()> { } async fn roundtrip_verify_post_join_filter(sql: &str) -> Result<()> { - let mut ctx = create_context().await?; + let ctx = create_context().await?; let df = ctx.sql(sql).await?; let plan = df.into_optimized_plan()?; let proto = to_substrait_plan(&plan, &ctx)?; - let plan2 = from_substrait_plan(&mut ctx, &proto).await?; + let plan2 = from_substrait_plan(&ctx, &proto).await?; let plan2 = ctx.state().optimize(&plan2)?; println!("{plan:#?}"); @@ -799,11 +799,11 @@ async fn roundtrip_verify_post_join_filter(sql: &str) -> Result<()> { } async fn roundtrip_all_types(sql: &str) -> Result<()> { - let mut ctx = create_all_type_context().await?; + let ctx = create_all_type_context().await?; let df = ctx.sql(sql).await?; let plan = df.into_optimized_plan()?; let proto = to_substrait_plan(&plan, &ctx)?; - let plan2 = from_substrait_plan(&mut ctx, &proto).await?; + let plan2 = from_substrait_plan(&ctx, &proto).await?; let plan2 = ctx.state().optimize(&plan2)?; println!("{plan:#?}"); diff --git a/datafusion/substrait/tests/cases/roundtrip_physical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_physical_plan.rs index 3e5e757e4c39..b64dd2c138fc 100644 --- a/datafusion/substrait/tests/cases/roundtrip_physical_plan.rs +++ b/datafusion/substrait/tests/cases/roundtrip_physical_plan.rs @@ -62,10 +62,10 @@ async fn parquet_exec() -> Result<()> { let substrait_rel = producer::to_substrait_rel(parquet_exec.as_ref(), &mut extension_info)?; - let mut ctx = SessionContext::new(); + let ctx = SessionContext::new(); let parquet_exec_roundtrip = - consumer::from_substrait_rel(&mut ctx, substrait_rel.as_ref(), &HashMap::new()) + consumer::from_substrait_rel(&ctx, substrait_rel.as_ref(), &HashMap::new()) .await?; let expected = format!("{}", displayable(parquet_exec.as_ref()).indent(true)); diff --git a/datafusion/substrait/tests/cases/serialize.rs b/datafusion/substrait/tests/cases/serialize.rs index d6dc5d7e58f2..f6736ca22279 100644 --- a/datafusion/substrait/tests/cases/serialize.rs +++ b/datafusion/substrait/tests/cases/serialize.rs @@ -30,7 +30,7 @@ mod tests { #[tokio::test] async fn serialize_simple_select() -> Result<()> { - let mut ctx = create_context().await?; + let ctx = create_context().await?; let path = "tests/simple_select.bin"; let sql = "SELECT a, b FROM data"; // Test reference @@ -42,7 +42,7 @@ mod tests { // Read substrait plan from file let proto = serializer::deserialize(path).await?; // Check plan equality - let plan = from_substrait_plan(&mut ctx, &proto).await?; + let plan = from_substrait_plan(&ctx, &proto).await?; let plan_str_ref = format!("{plan_ref:?}"); let plan_str = format!("{plan:?}"); assert_eq!(plan_str_ref, plan_str); diff --git a/datafusion/wasmtest/Cargo.toml b/datafusion/wasmtest/Cargo.toml index e1a9a5d41a5a..882b02bcc84b 100644 --- a/datafusion/wasmtest/Cargo.toml +++ b/datafusion/wasmtest/Cargo.toml @@ -18,9 +18,9 @@ [package] name = "datafusion-wasmtest" description = "Test library to compile datafusion crates to wasm" +readme = "README.md" version = { workspace = true } edition = { workspace = true } -readme = { workspace = true } homepage = { workspace = true } repository = { workspace = true } license = { workspace = true } @@ -38,11 +38,11 @@ crate-type = ["cdylib", "rlib",] # code size when deploying. console_error_panic_hook = { version = "0.1.1", optional = true } -datafusion-common = { path = "../common", version = "32.0.0", default-features = false } -datafusion-expr = { path = "../expr" } -datafusion-optimizer = { path = "../optimizer" } -datafusion-physical-expr = { path = "../physical-expr" } -datafusion-sql = { path = "../sql" } +datafusion-common = { workspace = true } +datafusion-expr = { workspace = true } +datafusion-optimizer = { workspace = true } +datafusion-physical-expr = { workspace = true } +datafusion-sql = { workspace = true } # getrandom must be compiled with js feature getrandom = { version = "0.2.8", features = ["js"] } diff --git a/datafusion/wasmtest/README.md b/datafusion/wasmtest/README.md index 5dc7bb2de45d..d26369a18ab9 100644 --- a/datafusion/wasmtest/README.md +++ b/datafusion/wasmtest/README.md @@ -17,9 +17,16 @@ under the License. --> -## wasmtest +# DataFusion wasmtest + +[DataFusion][df] is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. + +This crate is a submodule of DataFusion used to verify that various DataFusion crates compile successfully to the +`wasm32-unknown-unknown` target with wasm-pack. -Library crate to verify that various DataFusion crates compile successfully to the `wasm32-unknown-unknown` target with wasm-pack. +[df]: https://crates.io/crates/datafusion + +## wasmtest Some of DataFusion's downstream projects compile to WASM to run in the browser. Doing so requires special care that certain library dependencies are not included in DataFusion. diff --git a/dev/update_datafusion_versions.py b/dev/update_datafusion_versions.py index 7cbe39fdfb66..19701b813671 100755 --- a/dev/update_datafusion_versions.py +++ b/dev/update_datafusion_versions.py @@ -43,6 +43,7 @@ 'datafusion-wasmtest': 'datafusion/wasmtest/Cargo.toml', 'datafusion-benchmarks': 'benchmarks/Cargo.toml', 'datafusion-examples': 'datafusion-examples/Cargo.toml', + 'datafusion-docs': 'docs/Cargo.toml', } def update_workspace_version(new_version: str): diff --git a/docs/Cargo.toml b/docs/Cargo.toml new file mode 100644 index 000000000000..9caa0bde3608 --- /dev/null +++ b/docs/Cargo.toml @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "datafusion-docs-tests" +description = "DataFusion Documentation Tests" +publish = false +version = { workspace = true } +edition = { workspace = true } +readme = { workspace = true } +homepage = { workspace = true } +repository = { workspace = true } +license = { workspace = true } +authors = { workspace = true } +rust-version = "1.70" + +[dependencies] +datafusion = { path = "../datafusion/core", version = "32.0.0", default-features = false } diff --git a/docs/source/library-user-guide/building-logical-plans.md b/docs/source/library-user-guide/building-logical-plans.md index 406f4881129c..fe922d8eaeb1 100644 --- a/docs/source/library-user-guide/building-logical-plans.md +++ b/docs/source/library-user-guide/building-logical-plans.md @@ -19,4 +19,131 @@ # Building Logical Plans -Coming Soon +A logical plan is a structured representation of a database query that describes the high-level operations and +transformations needed to retrieve data from a database or data source. It abstracts away specific implementation +details and focuses on the logical flow of the query, including operations like filtering, sorting, and joining tables. + +This logical plan serves as an intermediate step before generating an optimized physical execution plan. This is +explained in more detail in the [Query Planning and Execution Overview] section of the [Architecture Guide]. + +## Building Logical Plans Manually + +DataFusion's [LogicalPlan] is an enum containing variants representing all the supported operators, and also +contains an `Extension` variant that allows projects building on DataFusion to add custom logical operators. + +It is possible to create logical plans by directly creating instances of the [LogicalPlan] enum as follows, but is is +much easier to use the [LogicalPlanBuilder], which is described in the next section. + +Here is an example of building a logical plan directly: + + + +```rust +// create a logical table source +let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, true), + Field::new("name", DataType::Utf8, true), +]); +let table_source = LogicalTableSource::new(SchemaRef::new(schema)); + +// create a TableScan plan +let projection = None; // optional projection +let filters = vec![]; // optional filters to push down +let fetch = None; // optional LIMIT +let table_scan = LogicalPlan::TableScan(TableScan::try_new( + "person", + Arc::new(table_source), + projection, + filters, + fetch, +)?); + +// create a Filter plan that evaluates `id > 500` that wraps the TableScan +let filter_expr = col("id").gt(lit(500)); +let plan = LogicalPlan::Filter(Filter::try_new(filter_expr, Arc::new(table_scan))?); + +// print the plan +println!("{}", plan.display_indent_schema()); +``` + +This example produces the following plan: + +``` +Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N] + TableScan: person [id:Int32;N, name:Utf8;N] +``` + +## Building Logical Plans with LogicalPlanBuilder + +DataFusion logical plans can be created using the [LogicalPlanBuilder] struct. There is also a [DataFrame] API which is +a higher-level API that delegates to [LogicalPlanBuilder]. + +The following associated functions can be used to create a new builder: + +- `empty` - create an empty plan with no fields +- `values` - create a plan from a set of literal values +- `scan` - create a plan representing a table scan +- `scan_with_filters` - create a plan representing a table scan with filters + +Once the builder is created, transformation methods can be called to declare that further operations should be +performed on the plan. Note that all we are doing at this stage is building up the logical plan structure. No query +execution will be performed. + +Here are some examples of transformation methods, but for a full list, refer to the [LogicalPlanBuilder] API documentation. + +- `filter` +- `limit` +- `sort` +- `distinct` +- `join` + +The following example demonstrates building the same simple query plan as the previous example, with a table scan followed by a filter. + + + +```rust +// create a logical table source +let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, true), + Field::new("name", DataType::Utf8, true), +]); +let table_source = LogicalTableSource::new(SchemaRef::new(schema)); + +// optional projection +let projection = None; + +// create a LogicalPlanBuilder for a table scan +let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source), projection)?; + +// perform a filter operation and build the plan +let plan = builder + .filter(col("id").gt(lit(500)))? // WHERE id > 500 + .build()?; + +// print the plan +println!("{}", plan.display_indent_schema()); +``` + +This example produces the following plan: + +``` +Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N] + TableScan: person [id:Int32;N, name:Utf8;N] +``` + +## Table Sources + +The previous example used a [LogicalTableSource], which is used for tests and documentation in DataFusion, and is also +suitable if you are using DataFusion to build logical plans but do not use DataFusion's physical planner. However, if you +want to use a [TableSource] that can be executed in DataFusion then you will need to use [DefaultTableSource], which is a +wrapper for a [TableProvider]. + +[query planning and execution overview]: https://docs.rs/datafusion/latest/datafusion/index.html#query-planning-and-execution-overview +[architecture guide]: https://docs.rs/datafusion/latest/datafusion/index.html#architecture +[logicalplan]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/enum.LogicalPlan.html +[logicalplanbuilder]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalPlanBuilder.html +[dataframe]: using-the-dataframe-api.md +[logicaltablesource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalTableSource.html +[defaulttablesource]: https://docs.rs/datafusion/latest/datafusion/datasource/default_table_source/struct.DefaultTableSource.html +[tableprovider]: https://docs.rs/datafusion/latest/datafusion/datasource/provider/trait.TableProvider.html +[tablesource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/trait.TableSource.html diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index d5717b9c2130..be05084fb249 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -1218,6 +1218,7 @@ regexp_replace(str, regexp, replacement, flags) - [to_timestamp_millis](#to_timestamp_millis) - [to_timestamp_micros](#to_timestamp_micros) - [to_timestamp_seconds](#to_timestamp_seconds) +- [to_timestamp_nanos](#to_timestamp_nanos) - [from_unixtime](#from_unixtime) ### `now` @@ -1390,10 +1391,11 @@ extract(field FROM source) ### `to_timestamp` -Converts a value to RFC3339 nanosecond timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). -Supports timestamp, integer, and unsigned integer types as input. -Integers and unsigned integers are parsed as Unix nanosecond timestamps and -return the corresponding RFC3339 nanosecond timestamp. +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). +Supports strings, integer, and unsigned integer types as input. +Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') +Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`) +return the corresponding timestamp. ``` to_timestamp(expression) @@ -1406,10 +1408,11 @@ to_timestamp(expression) ### `to_timestamp_millis` -Converts a value to RFC3339 millisecond timestamp format (`YYYY-MM-DDT00:00:00.000Z`). -Supports timestamp, integer, and unsigned integer types as input. -Integers and unsigned integers are parsed as Unix nanosecond timestamps and -return the corresponding RFC3339 timestamp. +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). +Supports strings, integer, and unsigned integer types as input. +Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') +Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`) +return the corresponding timestamp. ``` to_timestamp_millis(expression) @@ -1422,13 +1425,26 @@ to_timestamp_millis(expression) ### `to_timestamp_micros` -Converts a value to RFC3339 microsecond timestamp format (`YYYY-MM-DDT00:00:00.000000Z`). -Supports timestamp, integer, and unsigned integer types as input. -Integers and unsigned integers are parsed as Unix nanosecond timestamps and -return the corresponding RFC3339 timestamp. +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). +Supports strings, integer, and unsigned integer types as input. +Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') +Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) +return the corresponding timestamp. ``` -to_timestamp_micros(expression) +to_timestamp_nanos(expression) +``` + +### `to_timestamp_nanos` + +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). +Supports strings, integer, and unsigned integer types as input. +Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') +Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) +return the corresponding timestamp. + +``` +to_timestamp_nanos(expression) ``` #### Arguments @@ -1438,10 +1454,11 @@ to_timestamp_micros(expression) ### `to_timestamp_seconds` -Converts a value to RFC3339 second timestamp format (`YYYY-MM-DDT00:00:00Z`). -Supports timestamp, integer, and unsigned integer types as input. -Integers and unsigned integers are parsed as Unix nanosecond timestamps and -return the corresponding RFC3339 timestamp. +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). +Supports strings, integer, and unsigned integer types as input. +Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') +Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`) +return the corresponding timestamp. ``` to_timestamp_seconds(expression) @@ -1455,8 +1472,8 @@ to_timestamp_seconds(expression) ### `from_unixtime` Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). -Input is parsed as a Unix nanosecond timestamp and returns the corresponding -RFC3339 timestamp. +Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) +return the corresponding timestamp. ``` from_unixtime(expression) diff --git a/docs/src/lib.rs b/docs/src/lib.rs new file mode 100644 index 000000000000..f73132468ec9 --- /dev/null +++ b/docs/src/lib.rs @@ -0,0 +1,19 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#[cfg(test)] +mod library_logical_plan; diff --git a/docs/src/library_logical_plan.rs b/docs/src/library_logical_plan.rs new file mode 100644 index 000000000000..355003941570 --- /dev/null +++ b/docs/src/library_logical_plan.rs @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use datafusion::error::Result; +use datafusion::logical_expr::builder::LogicalTableSource; +use datafusion::logical_expr::{Filter, LogicalPlan, LogicalPlanBuilder, TableScan}; +use datafusion::prelude::*; +use std::sync::Arc; + +#[test] +fn plan_1() -> Result<()> { + // create a logical table source + let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, true), + Field::new("name", DataType::Utf8, true), + ]); + let table_source = LogicalTableSource::new(SchemaRef::new(schema)); + + // create a TableScan plan + let projection = None; // optional projection + let filters = vec![]; // optional filters to push down + let fetch = None; // optional LIMIT + let table_scan = LogicalPlan::TableScan(TableScan::try_new( + "person", + Arc::new(table_source), + projection, + filters, + fetch, + )?); + + // create a Filter plan that evaluates `id > 500` and wraps the TableScan + let filter_expr = col("id").gt(lit(500)); + let plan = LogicalPlan::Filter(Filter::try_new(filter_expr, Arc::new(table_scan))?); + + // print the plan + println!("{}", plan.display_indent_schema()); + + Ok(()) +} + +#[test] +fn plan_builder_1() -> Result<()> { + // create a logical table source + let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, true), + Field::new("name", DataType::Utf8, true), + ]); + let table_source = LogicalTableSource::new(SchemaRef::new(schema)); + + // optional projection + let projection = None; + + // create a LogicalPlanBuilder for a table scan + let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source), projection)?; + + // perform a filter that evaluates `id > 500`, and build the plan + let plan = builder.filter(col("id").gt(lit(500)))?.build()?; + + // print the plan + println!("{}", plan.display_indent_schema()); + + Ok(()) +} diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml index 5ab10e42cf68..b9c4db17c098 100644 --- a/test-utils/Cargo.toml +++ b/test-utils/Cargo.toml @@ -26,4 +26,4 @@ edition = { workspace = true } arrow = { workspace = true } datafusion-common = { path = "../datafusion/common" } env_logger = "0.10.0" -rand = "0.8" +rand = { workspace = true }