diff --git a/app/benches/yuv8/main.rs b/app/benches/yuv8/main.rs index 8ab6394c..cd508220 100644 --- a/app/benches/yuv8/main.rs +++ b/app/benches/yuv8/main.rs @@ -37,9 +37,8 @@ use yuvutils_rs::{ gbr_to_rgba, rgb_to_gbr, rgb_to_yuv400, rgb_to_yuv420, rgb_to_yuv422, rgb_to_yuv444, rgb_to_yuv_nv12, rgb_to_yuv_nv16, rgba_to_yuv420, rgba_to_yuv422, rgba_to_yuv444, yuv400_to_rgba, yuv420_to_rgb, yuv420_to_rgba, yuv422_to_rgba, yuv444_to_rgba, yuv_nv12_to_rgb, - yuv_nv12_to_rgba, yuv_nv16_to_rgb, yuv_nv16_to_rgba, ConversionQuality, YuvBiPlanarImageMut, - YuvChromaSubsampling, YuvGrayImage, YuvGrayImageMut, YuvPlanarImageMut, YuvRange, - YuvStandardMatrix, + yuv_nv12_to_rgba, yuv_nv16_to_rgb, YuvBiPlanarImageMut, YuvChromaSubsampling, YuvGrayImageMut, + YuvPlanarImageMut, YuvRange, YuvStandardMatrix, }; pub fn criterion_benchmark(c: &mut Criterion) { @@ -115,246 +114,245 @@ pub fn criterion_benchmark(c: &mut Criterion) { let fixed_gray = gray_image.to_fixed(); - // c.bench_function("yuvutils GBR -> RGBA Limited", |b| { - // let mut rgb_bytes = vec![0u8; dimensions.0 as usize * 4 * dimensions.1 as usize]; - // b.iter(|| { - // gbr_to_rgba( - // &fixed_gbr, - // &mut rgb_bytes, - // dimensions.0 * 4, - // YuvRange::Limited, - // ) - // .unwrap(); - // }) - // }); - // - // c.bench_function("yuvutils RGB -> YUV 4:2:0", |b| { - // let mut test_planar = YuvPlanarImageMut::::alloc( - // dimensions.0, - // dimensions.1, - // YuvChromaSubsampling::Yuv420, - // ); - // b.iter(|| { - // rgb_to_yuv420( - // &mut test_planar, - // &src_bytes, - // stride as u32, - // YuvRange::Limited, - // YuvStandardMatrix::Bt601, - // ) - // .unwrap(); - // }) - // }); - // - // c.bench_function("libyuv RGB -> YUV 4:2:0", |b| unsafe { - // let layout_rgb = - // Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize * 3, 16).unwrap(); - // let layout_y = - // Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize, 16).unwrap(); - // let layout_uv = Layout::from_size_align( - // (dimensions.0 as usize + 1) / 2 * (dimensions.1 as usize + 1) / 2, - // 16, - // ) - // .unwrap(); - // let target_y = std::alloc::alloc(layout_y); - // let target_u = std::alloc::alloc(layout_uv); - // let target_v = std::alloc::alloc(layout_uv); - // let source_rgb = std::alloc::alloc(layout_rgb); - // for (x, src) in src_bytes.iter().enumerate() { - // *source_rgb.add(x) = *src; - // } - // b.iter(|| { - // rs_RGB24ToI420( - // source_rgb, - // stride as i32, - // target_y, - // dimensions.0 as i32, - // target_u, - // (dimensions.0 as i32 + 1) / 2, - // target_v, - // (dimensions.0 as i32 + 1) / 2, - // dimensions.0 as i32, - // dimensions.1 as i32, - // ); - // }); - // std::alloc::dealloc(target_y, layout_y); - // std::alloc::dealloc(target_u, layout_uv); - // std::alloc::dealloc(target_v, layout_uv); - // std::alloc::dealloc(source_rgb, layout_rgb); - // }); - // - // c.bench_function("yuvutils RGBA -> YUV 4:2:0", |b| { - // let mut test_planar = YuvPlanarImageMut::::alloc( - // dimensions.0, - // dimensions.1, - // YuvChromaSubsampling::Yuv420, - // ); - // b.iter(|| { - // rgba_to_yuv420( - // &mut test_planar, - // &rgba_image, - // dimensions.0 * 4, - // YuvRange::Limited, - // YuvStandardMatrix::Bt601, - // ) - // .unwrap(); - // }) - // }); - // - // c.bench_function("libyuv RGBA -> YUV 4:2:0", |b| unsafe { - // let layout_rgba = - // Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize * 4, 16).unwrap(); - // let layout_y = - // Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize, 16).unwrap(); - // let layout_uv = Layout::from_size_align( - // (dimensions.0 as usize + 1) / 2 * (dimensions.1 as usize + 1) / 2, - // 16, - // ) - // .unwrap(); - // let target_y = std::alloc::alloc(layout_y); - // let target_u = std::alloc::alloc(layout_uv); - // let target_v = std::alloc::alloc(layout_uv); - // let source_rgb = std::alloc::alloc(layout_rgba); - // for (x, src) in src_bytes.iter().enumerate() { - // *source_rgb.add(x) = *src; - // } - // b.iter(|| { - // rs_ABGRToI420( - // source_rgb, - // dimensions.0 as i32 * 4i32, - // target_y, - // dimensions.0 as i32, - // target_u, - // (dimensions.0 as i32 + 1) / 2, - // target_v, - // (dimensions.0 as i32 + 1) / 2, - // dimensions.0 as i32, - // dimensions.1 as i32, - // ); - // }); - // std::alloc::dealloc(target_y, layout_y); - // std::alloc::dealloc(target_u, layout_uv); - // std::alloc::dealloc(target_v, layout_uv); - // std::alloc::dealloc(source_rgb, layout_rgba); - // }); - // - // c.bench_function("yuvutils RGBA -> YUV 4:2:2", |b| { - // let mut test_planar = YuvPlanarImageMut::::alloc( - // dimensions.0, - // dimensions.1, - // YuvChromaSubsampling::Yuv422, - // ); - // b.iter(|| { - // rgba_to_yuv422( - // &mut test_planar, - // &rgba_image, - // dimensions.0 * 4, - // YuvRange::Limited, - // YuvStandardMatrix::Bt601, - // ) - // .unwrap(); - // }) - // }); - // - // c.bench_function("libyuv RGBA -> YUV 4:2:2", |b| { - // let mut test_planar = YuvPlanarImageMut::::alloc( - // dimensions.0, - // dimensions.1, - // YuvChromaSubsampling::Yuv422, - // ); - // b.iter(|| unsafe { - // rs_ABGRToJ422( - // rgba_image.as_ptr(), - // dimensions.0 as i32 * 4i32, - // test_planar.y_plane.borrow_mut().as_mut_ptr(), - // test_planar.y_stride as i32, - // test_planar.u_plane.borrow_mut().as_mut_ptr(), - // test_planar.u_stride as i32, - // test_planar.v_plane.borrow_mut().as_mut_ptr(), - // test_planar.v_stride as i32, - // test_planar.width as i32, - // test_planar.height as i32, - // ); - // }) - // }); - // - // c.bench_function("yuvutils RGBA -> YUV 4:4:4", |b| { - // let mut test_planar = YuvPlanarImageMut::::alloc( - // dimensions.0, - // dimensions.1, - // YuvChromaSubsampling::Yuv444, - // ); - // b.iter(|| { - // rgba_to_yuv444( - // &mut test_planar, - // &rgba_image, - // dimensions.0 * 4, - // YuvRange::Limited, - // YuvStandardMatrix::Bt601, - // ) - // .unwrap(); - // }) - // }); - // - // c.bench_function("yuvutils YUV NV16 -> RGB", |b| { - // let mut rgb_bytes = vec![0u8; dimensions.0 as usize * 3 * dimensions.1 as usize]; - // b.iter(|| { - // yuv_nv16_to_rgb( - // &fixed_bi_planar422, - // &mut rgb_bytes, - // dimensions.0 * 3u32, - // YuvRange::Limited, - // YuvStandardMatrix::Bt601, - // ) - // .unwrap(); - // }) - // }); - // - // c.bench_function("yuvutils YUV NV12 -> RGB", |b| { - // let mut rgb_bytes = vec![0u8; dimensions.0 as usize * 3 * dimensions.1 as usize]; - // b.iter(|| { - // yuv_nv12_to_rgb( - // &fixed_bi_planar, - // &mut rgb_bytes, - // dimensions.0 * 3u32, - // YuvRange::Limited, - // YuvStandardMatrix::Bt601, - // ) - // .unwrap(); - // }) - // }); - // - // c.bench_function("yuvutils YUV NV12 -> RGBA", |b| { - // let mut rgb_bytes = vec![0u8; dimensions.0 as usize * 4 * dimensions.1 as usize]; - // b.iter(|| { - // yuv_nv12_to_rgba( - // &fixed_bi_planar, - // &mut rgb_bytes, - // dimensions.0 * 4u32, - // YuvRange::Limited, - // YuvStandardMatrix::Bt601, - // ) - // .unwrap(); - // }) - // }); - // - // // - // c.bench_function("libyuv YUV NV12 -> RGB", |b| { - // let mut rgb_bytes = vec![0u8; dimensions.0 as usize * 4 * dimensions.1 as usize]; - // b.iter(|| unsafe { - // rs_NV21ToABGR( - // fixed_bi_planar.y_plane.as_ptr(), - // fixed_bi_planar.y_stride as i32, - // fixed_bi_planar.uv_plane.as_ptr(), - // fixed_bi_planar.uv_stride as i32, - // rgb_bytes.as_mut_ptr(), - // dimensions.0 as i32 * 4, - // fixed_bi_planar.width as i32, - // fixed_bi_planar.height as i32, - // ); - // }) - // }); - // + c.bench_function("yuvutils GBR -> RGBA Limited", |b| { + let mut rgb_bytes = vec![0u8; dimensions.0 as usize * 4 * dimensions.1 as usize]; + b.iter(|| { + gbr_to_rgba( + &fixed_gbr, + &mut rgb_bytes, + dimensions.0 * 4, + YuvRange::Limited, + ) + .unwrap(); + }) + }); + + c.bench_function("yuvutils RGB -> YUV 4:2:0", |b| { + let mut test_planar = YuvPlanarImageMut::::alloc( + dimensions.0, + dimensions.1, + YuvChromaSubsampling::Yuv420, + ); + b.iter(|| { + rgb_to_yuv420( + &mut test_planar, + &src_bytes, + stride as u32, + YuvRange::Limited, + YuvStandardMatrix::Bt601, + ) + .unwrap(); + }) + }); + + c.bench_function("libyuv RGB -> YUV 4:2:0", |b| unsafe { + let layout_rgb = + Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize * 3, 16).unwrap(); + let layout_y = + Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize, 16).unwrap(); + let layout_uv = Layout::from_size_align( + (dimensions.0 as usize + 1) / 2 * (dimensions.1 as usize + 1) / 2, + 16, + ) + .unwrap(); + let target_y = std::alloc::alloc(layout_y); + let target_u = std::alloc::alloc(layout_uv); + let target_v = std::alloc::alloc(layout_uv); + let source_rgb = std::alloc::alloc(layout_rgb); + for (x, src) in src_bytes.iter().enumerate() { + *source_rgb.add(x) = *src; + } + b.iter(|| { + rs_RGB24ToI420( + source_rgb, + stride as i32, + target_y, + dimensions.0 as i32, + target_u, + (dimensions.0 as i32 + 1) / 2, + target_v, + (dimensions.0 as i32 + 1) / 2, + dimensions.0 as i32, + dimensions.1 as i32, + ); + }); + std::alloc::dealloc(target_y, layout_y); + std::alloc::dealloc(target_u, layout_uv); + std::alloc::dealloc(target_v, layout_uv); + std::alloc::dealloc(source_rgb, layout_rgb); + }); + + c.bench_function("yuvutils RGBA -> YUV 4:2:0", |b| { + let mut test_planar = YuvPlanarImageMut::::alloc( + dimensions.0, + dimensions.1, + YuvChromaSubsampling::Yuv420, + ); + b.iter(|| { + rgba_to_yuv420( + &mut test_planar, + &rgba_image, + dimensions.0 * 4, + YuvRange::Limited, + YuvStandardMatrix::Bt601, + ) + .unwrap(); + }) + }); + + c.bench_function("libyuv RGBA -> YUV 4:2:0", |b| unsafe { + let layout_rgba = + Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize * 4, 16).unwrap(); + let layout_y = + Layout::from_size_align(dimensions.0 as usize * dimensions.1 as usize, 16).unwrap(); + let layout_uv = Layout::from_size_align( + (dimensions.0 as usize + 1) / 2 * (dimensions.1 as usize + 1) / 2, + 16, + ) + .unwrap(); + let target_y = std::alloc::alloc(layout_y); + let target_u = std::alloc::alloc(layout_uv); + let target_v = std::alloc::alloc(layout_uv); + let source_rgb = std::alloc::alloc(layout_rgba); + for (x, src) in src_bytes.iter().enumerate() { + *source_rgb.add(x) = *src; + } + b.iter(|| { + rs_ABGRToI420( + source_rgb, + dimensions.0 as i32 * 4i32, + target_y, + dimensions.0 as i32, + target_u, + (dimensions.0 as i32 + 1) / 2, + target_v, + (dimensions.0 as i32 + 1) / 2, + dimensions.0 as i32, + dimensions.1 as i32, + ); + }); + std::alloc::dealloc(target_y, layout_y); + std::alloc::dealloc(target_u, layout_uv); + std::alloc::dealloc(target_v, layout_uv); + std::alloc::dealloc(source_rgb, layout_rgba); + }); + + c.bench_function("yuvutils RGBA -> YUV 4:2:2", |b| { + let mut test_planar = YuvPlanarImageMut::::alloc( + dimensions.0, + dimensions.1, + YuvChromaSubsampling::Yuv422, + ); + b.iter(|| { + rgba_to_yuv422( + &mut test_planar, + &rgba_image, + dimensions.0 * 4, + YuvRange::Limited, + YuvStandardMatrix::Bt601, + ) + .unwrap(); + }) + }); + + c.bench_function("libyuv RGBA -> YUV 4:2:2", |b| { + let mut test_planar = YuvPlanarImageMut::::alloc( + dimensions.0, + dimensions.1, + YuvChromaSubsampling::Yuv422, + ); + b.iter(|| unsafe { + rs_ABGRToJ422( + rgba_image.as_ptr(), + dimensions.0 as i32 * 4i32, + test_planar.y_plane.borrow_mut().as_mut_ptr(), + test_planar.y_stride as i32, + test_planar.u_plane.borrow_mut().as_mut_ptr(), + test_planar.u_stride as i32, + test_planar.v_plane.borrow_mut().as_mut_ptr(), + test_planar.v_stride as i32, + test_planar.width as i32, + test_planar.height as i32, + ); + }) + }); + + c.bench_function("yuvutils RGBA -> YUV 4:4:4", |b| { + let mut test_planar = YuvPlanarImageMut::::alloc( + dimensions.0, + dimensions.1, + YuvChromaSubsampling::Yuv444, + ); + b.iter(|| { + rgba_to_yuv444( + &mut test_planar, + &rgba_image, + dimensions.0 * 4, + YuvRange::Limited, + YuvStandardMatrix::Bt601, + ) + .unwrap(); + }) + }); + + c.bench_function("yuvutils YUV NV16 -> RGB", |b| { + let mut rgb_bytes = vec![0u8; dimensions.0 as usize * 3 * dimensions.1 as usize]; + b.iter(|| { + yuv_nv16_to_rgb( + &fixed_bi_planar422, + &mut rgb_bytes, + dimensions.0 * 3u32, + YuvRange::Limited, + YuvStandardMatrix::Bt601, + ) + .unwrap(); + }) + }); + + c.bench_function("yuvutils YUV NV12 -> RGB", |b| { + let mut rgb_bytes = vec![0u8; dimensions.0 as usize * 3 * dimensions.1 as usize]; + b.iter(|| { + yuv_nv12_to_rgb( + &fixed_bi_planar, + &mut rgb_bytes, + dimensions.0 * 3u32, + YuvRange::Limited, + YuvStandardMatrix::Bt601, + ) + .unwrap(); + }) + }); + + c.bench_function("yuvutils YUV NV12 -> RGBA", |b| { + let mut rgb_bytes = vec![0u8; dimensions.0 as usize * 4 * dimensions.1 as usize]; + b.iter(|| { + yuv_nv12_to_rgba( + &fixed_bi_planar, + &mut rgb_bytes, + dimensions.0 * 4u32, + YuvRange::Limited, + YuvStandardMatrix::Bt601, + ) + .unwrap(); + }) + }); + + c.bench_function("libyuv YUV NV12 -> RGB", |b| { + let mut rgb_bytes = vec![0u8; dimensions.0 as usize * 4 * dimensions.1 as usize]; + b.iter(|| unsafe { + rs_NV21ToABGR( + fixed_bi_planar.y_plane.as_ptr(), + fixed_bi_planar.y_stride as i32, + fixed_bi_planar.uv_plane.as_ptr(), + fixed_bi_planar.uv_stride as i32, + rgb_bytes.as_mut_ptr(), + dimensions.0 as i32 * 4, + fixed_bi_planar.width as i32, + fixed_bi_planar.height as i32, + ); + }) + }); + c.bench_function("yuvutils YUV 4:2:0 -> RGB", |b| { let mut rgb_bytes = vec![0u8; dimensions.0 as usize * 3 * dimensions.1 as usize]; b.iter(|| { @@ -364,7 +362,6 @@ pub fn criterion_benchmark(c: &mut Criterion) { dimensions.0 * 3u32, YuvRange::Limited, YuvStandardMatrix::Bt601, - ConversionQuality::Fastest, ) .unwrap(); }) @@ -397,7 +394,6 @@ pub fn criterion_benchmark(c: &mut Criterion) { dimensions.0 * 4u32, YuvRange::Limited, YuvStandardMatrix::Bt601, - ConversionQuality::Fastest, ) .unwrap(); }) @@ -444,7 +440,6 @@ pub fn criterion_benchmark(c: &mut Criterion) { dimensions.0 * 4u32, YuvRange::Limited, YuvStandardMatrix::Bt601, - ConversionQuality::Fastest, ) .unwrap(); }) @@ -491,7 +486,6 @@ pub fn criterion_benchmark(c: &mut Criterion) { dimensions.0 * 4u32, YuvRange::Limited, YuvStandardMatrix::Bt601, - ConversionQuality::Fastest, ) .unwrap(); }) diff --git a/app/src/main.rs b/app/src/main.rs index 0ec180fd..0068d783 100644 --- a/app/src/main.rs +++ b/app/src/main.rs @@ -42,10 +42,9 @@ use yuvutils_rs::{ yuv420_p16_to_rgb, yuv420_p16_to_rgb16, yuv420_to_rgb, yuv420_to_yuyv422, yuv422_p16_to_rgb, yuv422_p16_to_rgb16, yuv422_to_rgb, yuv422_to_rgba, yuv444_p16_to_rgb16, yuv444_to_rgb, yuv_nv12_to_rgb, yuv_nv12_to_rgb_p16, yuv_nv12_to_rgba, yuv_nv12_to_rgba_p16, yuv_nv16_to_rgb, - yuv_nv24_to_rgb, yuyv422_to_rgb, yuyv422_to_yuv420, BufferStoreMut, ConversionQuality, - SharpYuvGammaTransfer, YuvBiPlanarImageMut, YuvBytesPacking, YuvChromaSubsampling, - YuvEndianness, YuvGrayImageMut, YuvPackedImage, YuvPackedImageMut, YuvPlanarImageMut, YuvRange, - YuvStandardMatrix, + yuv_nv24_to_rgb, yuyv422_to_rgb, yuyv422_to_yuv420, SharpYuvGammaTransfer, YuvBiPlanarImageMut, + YuvBytesPacking, YuvChromaSubsampling, YuvEndianness, YuvGrayImageMut, YuvPackedImage, + YuvPackedImageMut, YuvPlanarImageMut, YuvRange, YuvStandardMatrix, }; fn read_file_bytes(file_path: &str) -> Result, String> { @@ -271,7 +270,6 @@ fn main() { rgba_stride as u32, YuvRange::Limited, YuvStandardMatrix::Bt709, - ConversionQuality::Fastest, ) .unwrap(); diff --git a/fuzz/yuv_to_rgb/yuv_to_rgb.rs b/fuzz/yuv_to_rgb/yuv_to_rgb.rs index de37b1f5..94b84893 100644 --- a/fuzz/yuv_to_rgb/yuv_to_rgb.rs +++ b/fuzz/yuv_to_rgb/yuv_to_rgb.rs @@ -32,68 +32,19 @@ use libfuzzer_sys::fuzz_target; use yuvutils_rs::{ yuv420_to_rgb, yuv420_to_rgba, yuv422_to_rgb, yuv422_to_rgba, yuv444_to_rgb, yuv444_to_rgba, - ConversionQuality, YuvPlanarImage, YuvRange, YuvStandardMatrix, + YuvPlanarImage, YuvRange, YuvStandardMatrix, }; fuzz_target!(|data: (u8, u8, u8, u8, u8, u8)| { - fuzz_yuv_420( - data.0, - data.1, - data.2, - data.3, - data.4, - ConversionQuality::Fastest, - ); - fuzz_yuv_420( - data.0, - data.1, - data.2, - data.3, - data.4, - ConversionQuality::Good, - ); - fuzz_yuv_422( - data.0, - data.1, - data.2, - data.3, - data.4, - ConversionQuality::Fastest, - ); - fuzz_yuv_422( - data.0, - data.1, - data.2, - data.3, - data.4, - ConversionQuality::Good, - ); - fuzz_yuv_444( - data.0, - data.1, - data.2, - data.3, - data.4, - ConversionQuality::Fastest, - ); - fuzz_yuv_444( - data.0, - data.1, - data.2, - data.3, - data.4, - ConversionQuality::Good, - ); + fuzz_yuv_420(data.0, data.1, data.2, data.3, data.4); + fuzz_yuv_420(data.0, data.1, data.2, data.3, data.4); + fuzz_yuv_422(data.0, data.1, data.2, data.3, data.4); + fuzz_yuv_422(data.0, data.1, data.2, data.3, data.4); + fuzz_yuv_444(data.0, data.1, data.2, data.3, data.4); + fuzz_yuv_444(data.0, data.1, data.2, data.3, data.4); }); -fn fuzz_yuv_420( - i_width: u8, - i_height: u8, - y_value: u8, - u_value: u8, - v_value: u8, - conversion_quality: ConversionQuality, -) { +fn fuzz_yuv_420(i_width: u8, i_height: u8, y_value: u8, u_value: u8, v_value: u8) { if i_height == 0 || i_width == 0 { return; } @@ -120,7 +71,6 @@ fn fuzz_yuv_420( i_width as u32 * 3, YuvRange::Limited, YuvStandardMatrix::Bt601, - conversion_quality, ) .unwrap(); @@ -132,19 +82,11 @@ fn fuzz_yuv_420( i_width as u32 * 4, YuvRange::Limited, YuvStandardMatrix::Bt601, - conversion_quality, ) .unwrap(); } -fn fuzz_yuv_422( - i_width: u8, - i_height: u8, - y_value: u8, - u_value: u8, - v_value: u8, - conversion_quality: ConversionQuality, -) { +fn fuzz_yuv_422(i_width: u8, i_height: u8, y_value: u8, u_value: u8, v_value: u8) { if i_height == 0 || i_width == 0 { return; } @@ -171,7 +113,6 @@ fn fuzz_yuv_422( i_width as u32 * 3, YuvRange::Limited, YuvStandardMatrix::Bt601, - conversion_quality, ) .unwrap(); @@ -183,19 +124,11 @@ fn fuzz_yuv_422( i_width as u32 * 4, YuvRange::Limited, YuvStandardMatrix::Bt601, - conversion_quality, ) .unwrap(); } -fn fuzz_yuv_444( - i_width: u8, - i_height: u8, - y_value: u8, - u_value: u8, - v_value: u8, - conversion_quality: ConversionQuality, -) { +fn fuzz_yuv_444(i_width: u8, i_height: u8, y_value: u8, u_value: u8, v_value: u8) { if i_height == 0 || i_width == 0 { return; } @@ -222,7 +155,6 @@ fn fuzz_yuv_444( i_width as u32 * 3, YuvRange::Limited, YuvStandardMatrix::Bt601, - conversion_quality, ) .unwrap(); @@ -234,7 +166,6 @@ fn fuzz_yuv_444( i_width as u32 * 4, YuvRange::Limited, YuvStandardMatrix::Bt601, - conversion_quality, ) .unwrap(); } diff --git a/src/avx2/avx2_utils.rs b/src/avx2/avx2_utils.rs index d01f9e97..7e2239d2 100644 --- a/src/avx2/avx2_utils.rs +++ b/src/avx2/avx2_utils.rs @@ -893,11 +893,6 @@ pub(crate) unsafe fn _mm256_affine_v_dot( ) } -#[inline(always)] -pub(crate) unsafe fn avx2_create(lo: __m128i, hi: __m128i) -> __m256i { - _mm256_set_m128i(hi, lo) -} - #[inline(always)] pub(crate) unsafe fn _mm256_expand8_to_10(v: __m256i) -> (__m256i, __m256i) { let (v0, v1) = _mm256_interleave_epi8(v, v); diff --git a/src/avx2/gbr_to_rgb.rs b/src/avx2/gbr_to_rgb.rs index 00c08648..ad111636 100644 --- a/src/avx2/gbr_to_rgb.rs +++ b/src/avx2/gbr_to_rgb.rs @@ -26,7 +26,9 @@ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -use crate::avx2::avx2_utils::{_mm256_store_interleave_rgb_for_yuv, avx2_pack_u16}; +use crate::avx2::avx2_utils::{ + _mm256_expand8_to_10, _mm256_store_interleave_rgb_for_yuv, avx2_pack_u16, +}; use crate::yuv_support::YuvSourceChannels; #[cfg(target_arch = "x86")] use std::arch::x86::*; @@ -119,8 +121,6 @@ unsafe fn avx_yuv_to_rgba_row_limited_impl( let v_alpha = _mm256_set1_epi8(255u8 as i8); - const V_SCALE: i32 = 2; - let vy_coeff = _mm256_set1_epi16(y_coeff as i16); let vy_bias = _mm256_set1_epi8(y_bias as i8); @@ -138,37 +138,17 @@ unsafe fn avx_yuv_to_rgba_row_limited_impl( vy_bias, ); - let rl_hi = _mm256_mulhrs_epi16( - _mm256_slli_epi16::(_mm256_cvtepu8_epi16(_mm256_castsi256_si128(r_values0))), - vy_coeff, - ); - let gl_hi = _mm256_mulhrs_epi16( - _mm256_slli_epi16::(_mm256_cvtepu8_epi16(_mm256_castsi256_si128(g_values0))), - vy_coeff, - ); - let bl_hi = _mm256_mulhrs_epi16( - _mm256_slli_epi16::(_mm256_cvtepu8_epi16(_mm256_castsi256_si128(b_values0))), - vy_coeff, - ); + let (r_y_lo, r_y_hi) = _mm256_expand8_to_10(r_values0); + let (g_y_lo, g_y_hi) = _mm256_expand8_to_10(g_values0); + let (b_y_lo, b_y_hi) = _mm256_expand8_to_10(b_values0); - let rl_lo = _mm256_mulhrs_epi16( - _mm256_slli_epi16::(_mm256_cvtepu8_epi16(_mm256_extracti128_si256::<1>( - r_values0, - ))), - vy_coeff, - ); - let gl_lo = _mm256_mulhrs_epi16( - _mm256_slli_epi16::(_mm256_cvtepu8_epi16(_mm256_extracti128_si256::<1>( - g_values0, - ))), - vy_coeff, - ); - let bl_lo = _mm256_mulhrs_epi16( - _mm256_slli_epi16::(_mm256_cvtepu8_epi16(_mm256_extracti128_si256::<1>( - b_values0, - ))), - vy_coeff, - ); + let rl_hi = _mm256_mulhrs_epi16(r_y_hi, vy_coeff); + let gl_hi = _mm256_mulhrs_epi16(g_y_hi, vy_coeff); + let bl_hi = _mm256_mulhrs_epi16(b_y_hi, vy_coeff); + + let rl_lo = _mm256_mulhrs_epi16(r_y_lo, vy_coeff); + let gl_lo = _mm256_mulhrs_epi16(g_y_lo, vy_coeff); + let bl_lo = _mm256_mulhrs_epi16(b_y_lo, vy_coeff); let r_values = avx2_pack_u16(rl_lo, rl_hi); let g_values = avx2_pack_u16(gl_lo, gl_hi); diff --git a/src/avx2/mod.rs b/src/avx2/mod.rs index b31b15dd..2821082c 100644 --- a/src/avx2/mod.rs +++ b/src/avx2/mod.rs @@ -42,8 +42,6 @@ mod y_to_rgba; mod y_to_rgba_alpha; mod ycgco_to_rgb; mod ycgco_to_rgba_alpha; -mod yuv2rgba420_fast; -mod yuv2rgba_fast; mod yuv_nv_to_rgba; mod yuv_nv_to_rgba420; mod yuv_p16_to_rgb16; @@ -74,8 +72,6 @@ pub(crate) use y_to_rgba::avx2_y_to_rgba_row; pub(crate) use y_to_rgba_alpha::avx2_y_to_rgba_alpha_row; pub(crate) use ycgco_to_rgb::avx2_ycgco_to_rgb_row; pub(crate) use ycgco_to_rgba_alpha::avx2_ycgco_to_rgba_alpha; -pub(crate) use yuv2rgba420_fast::avx2_yuv_to_rgba_fast_row420; -pub(crate) use yuv2rgba_fast::avx2_yuv_to_rgba_fast_row; pub(crate) use yuv_nv_to_rgba::avx2_yuv_nv_to_rgba_row; pub(crate) use yuv_nv_to_rgba420::avx2_yuv_nv_to_rgba_row420; pub(crate) use yuv_p16_to_rgb16::avx_yuv_p16_to_rgba_row; diff --git a/src/avx2/yuv2rgba420_fast.rs b/src/avx2/yuv2rgba420_fast.rs deleted file mode 100644 index 8d96f41a..00000000 --- a/src/avx2/yuv2rgba420_fast.rs +++ /dev/null @@ -1,295 +0,0 @@ -/* - * Copyright (c) Radzivon Bartoshyk, 10/2024. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, - * are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -use crate::avx2::avx2_utils::*; -use crate::internals::{interleaved_epi8, ProcessedOffset}; -use crate::yuv_support::{CbCrInverseTransform, YuvChromaRange, YuvSourceChannels}; -#[cfg(target_arch = "x86")] -use std::arch::x86::*; -#[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; - -pub(crate) fn avx2_yuv_to_rgba_fast_row420( - range: &YuvChromaRange, - transform: &CbCrInverseTransform, - y_plane0: &[u8], - y_plane1: &[u8], - u_plane: &[u8], - v_plane: &[u8], - rgba0: &mut [u8], - rgba1: &mut [u8], - start_cx: usize, - start_ux: usize, - width: usize, -) -> ProcessedOffset { - unsafe { - avx2_yuv_to_rgba_row_fast_impl420::( - range, transform, y_plane0, y_plane1, u_plane, v_plane, rgba0, rgba1, start_cx, - start_ux, width, - ) - } -} - -#[target_feature(enable = "avx2")] -unsafe fn avx2_yuv_to_rgba_row_fast_impl420( - range: &YuvChromaRange, - transform: &CbCrInverseTransform, - y_plane0: &[u8], - y_plane1: &[u8], - u_plane: &[u8], - v_plane: &[u8], - rgba0: &mut [u8], - rgba1: &mut [u8], - start_cx: usize, - start_ux: usize, - width: usize, -) -> ProcessedOffset { - let destination_channels: YuvSourceChannels = DESTINATION_CHANNELS.into(); - let channels = destination_channels.get_channels_count(); - - let mut cx = start_cx; - let mut uv_x = start_ux; - let u_ptr = u_plane.as_ptr(); - let v_ptr = v_plane.as_ptr(); - - const PRECISION: i32 = 6; - - let y_corr = _mm256_set1_epi8(range.bias_y as i8); - let v_luma_coeff = _mm256_set1_epi16((transform.y_coef as u16 * 256) as i16); - let v_cr_coeff = _mm256_set1_epi16(interleaved_epi8( - transform.cr_coef as i8, - -transform.cr_coef as i8, - )); - let v_cb_coeff = _mm256_set1_epi16(interleaved_epi8( - transform.cb_coef as i8, - -transform.cb_coef as i8, - )); - let v_g_coeff_1 = _mm256_set1_epi16(interleaved_epi8( - transform.g_coeff_1 as i8, - -transform.g_coeff_1 as i8, - )); - let v_g_coeff_2 = _mm256_set1_epi16(interleaved_epi8( - transform.g_coeff_2 as i8, - -transform.g_coeff_2 as i8, - )); - let u_bias_uv = _mm256_set1_epi8(range.bias_uv as i8); - - while cx + 32 < width { - let y_values0 = _mm256_subs_epu8( - _mm256_loadu_si256(y_plane0.get_unchecked(cx..).as_ptr() as *const __m256i), - y_corr, - ); - let y_values1 = _mm256_subs_epu8( - _mm256_loadu_si256(y_plane1.get_unchecked(cx..).as_ptr() as *const __m256i), - y_corr, - ); - - let u_values = _mm_loadu_si128(u_ptr.add(uv_x) as *const __m128i); - let v_values = _mm_loadu_si128(v_ptr.add(uv_x) as *const __m128i); - - let (u_low_u16, u_high_u16) = _mm256_interleave_epi8( - avx2_create( - _mm_unpacklo_epi8(u_values, u_values), - _mm_unpackhi_epi8(u_values, u_values), - ), - u_bias_uv, - ); - - let (v_low_u16, v_high_u16) = _mm256_interleave_epi8( - avx2_create( - _mm_unpacklo_epi8(v_values, v_values), - _mm_unpackhi_epi8(v_values, v_values), - ), - u_bias_uv, - ); - - let y0_10 = _mm256_interleave_epi8(y_values0, y_values0); - let y1_10 = _mm256_interleave_epi8(y_values1, y_values1); - - let y_high0 = _mm256_mulhi_epu16(y0_10.1, v_luma_coeff); - let y_high1 = _mm256_mulhi_epu16(y1_10.1, v_luma_coeff); - - let g_coeff_hi = _mm256_adds_epi16( - _mm256_maddubs_epi16(v_high_u16, v_g_coeff_1), - _mm256_maddubs_epi16(u_high_u16, v_g_coeff_2), - ); - - let r_high0 = _mm256_adds_epi16(y_high0, _mm256_maddubs_epi16(v_high_u16, v_cr_coeff)); - let b_high0 = _mm256_adds_epi16(y_high0, _mm256_maddubs_epi16(u_high_u16, v_cb_coeff)); - let g_high0 = _mm256_subs_epi16(y_high0, g_coeff_hi); - - let r_high1 = _mm256_adds_epi16(y_high1, _mm256_maddubs_epi16(v_high_u16, v_cr_coeff)); - let b_high1 = _mm256_adds_epi16(y_high1, _mm256_maddubs_epi16(u_high_u16, v_cb_coeff)); - let g_high1 = _mm256_subs_epi16(y_high1, g_coeff_hi); - - let y_low0 = _mm256_mulhi_epu16(y0_10.0, v_luma_coeff); - let y_low1 = _mm256_mulhi_epu16(y1_10.0, v_luma_coeff); - - let g_coeff_lo = _mm256_adds_epi16( - _mm256_maddubs_epi16(v_low_u16, v_g_coeff_1), - _mm256_maddubs_epi16(u_low_u16, v_g_coeff_2), - ); - - let r_low0 = _mm256_adds_epi16(y_low0, _mm256_maddubs_epi16(v_low_u16, v_cr_coeff)); - let b_low0 = _mm256_adds_epi16(y_low0, _mm256_maddubs_epi16(u_low_u16, v_cb_coeff)); - let g_low0 = _mm256_subs_epi16(y_low0, g_coeff_lo); - - let r_low1 = _mm256_adds_epi16(y_low1, _mm256_maddubs_epi16(v_low_u16, v_cr_coeff)); - let b_low1 = _mm256_adds_epi16(y_low1, _mm256_maddubs_epi16(u_low_u16, v_cb_coeff)); - let g_low1 = _mm256_subs_epi16(y_low1, g_coeff_lo); - - let r_values0 = avx2_pack_u16( - _mm256_srai_epi16::(r_low0), - _mm256_srai_epi16::(r_high0), - ); - let g_values0 = avx2_pack_u16( - _mm256_srai_epi16::(g_low0), - _mm256_srai_epi16::(g_high0), - ); - let b_values0 = avx2_pack_u16( - _mm256_srai_epi16::(b_low0), - _mm256_srai_epi16::(b_high0), - ); - - let r_values1 = avx2_pack_u16( - _mm256_srai_epi16::(r_low1), - _mm256_srai_epi16::(r_high1), - ); - let g_values1 = avx2_pack_u16( - _mm256_srai_epi16::(g_low1), - _mm256_srai_epi16::(g_high1), - ); - let b_values1 = avx2_pack_u16( - _mm256_srai_epi16::(b_low1), - _mm256_srai_epi16::(b_high1), - ); - - let dst_shift = cx * channels; - - let v_alpha = _mm256_set1_epi8(255u8 as i8); - - _mm256_store_interleave_rgb_for_yuv::( - rgba0.get_unchecked_mut(dst_shift..).as_mut_ptr(), - r_values0, - g_values0, - b_values0, - v_alpha, - ); - _mm256_store_interleave_rgb_for_yuv::( - rgba1.get_unchecked_mut(dst_shift..).as_mut_ptr(), - r_values1, - g_values1, - b_values1, - v_alpha, - ); - - cx += 32; - uv_x += 16; - } - - while cx + 16 < width { - let y_values0 = _mm256_subs_epu8( - _mm256_castsi128_si256(_mm_loadu_si128( - y_plane0.get_unchecked(cx..).as_ptr() as *const __m128i - )), - y_corr, - ); - let y_values1 = _mm256_subs_epu8( - _mm256_castsi128_si256(_mm_loadu_si128( - y_plane1.get_unchecked(cx..).as_ptr() as *const __m128i - )), - y_corr, - ); - - let u_values = _mm_loadu_si64(u_ptr.add(uv_x)); - let v_values = _mm_loadu_si64(v_ptr.add(uv_x)); - - let (u_low_u16, _) = _mm256_interleave_epi8( - avx2_create(_mm_unpacklo_epi8(u_values, u_values), _mm_setzero_si128()), - u_bias_uv, - ); - - let (v_low_u16, _) = _mm256_interleave_epi8( - avx2_create(_mm_unpacklo_epi8(v_values, v_values), _mm_setzero_si128()), - u_bias_uv, - ); - - let y0_10 = _mm256_interleave_epi8(y_values0, y_values0); - let y1_10 = _mm256_interleave_epi8(y_values1, y_values1); - - let y_low0 = _mm256_mulhi_epu16(y0_10.0, v_luma_coeff); - let y_low1 = _mm256_mulhi_epu16(y1_10.0, v_luma_coeff); - - let g_coeff_lo = _mm256_adds_epi16( - _mm256_maddubs_epi16(v_low_u16, v_g_coeff_1), - _mm256_maddubs_epi16(u_low_u16, v_g_coeff_2), - ); - - let r_low0 = _mm256_adds_epi16(y_low0, _mm256_maddubs_epi16(v_low_u16, v_cr_coeff)); - let b_low0 = _mm256_adds_epi16(y_low0, _mm256_maddubs_epi16(u_low_u16, v_cb_coeff)); - let g_low0 = _mm256_subs_epi16(y_low0, g_coeff_lo); - - let r_low1 = _mm256_adds_epi16(y_low1, _mm256_maddubs_epi16(v_low_u16, v_cr_coeff)); - let b_low1 = _mm256_adds_epi16(y_low1, _mm256_maddubs_epi16(u_low_u16, v_cb_coeff)); - let g_low1 = _mm256_subs_epi16(y_low1, g_coeff_lo); - - let zeros = _mm256_setzero_si256(); - - let r_values0 = avx2_pack_u16(_mm256_srai_epi16::(r_low0), zeros); - let g_values0 = avx2_pack_u16(_mm256_srai_epi16::(g_low0), zeros); - let b_values0 = avx2_pack_u16(_mm256_srai_epi16::(b_low0), zeros); - - let r_values1 = avx2_pack_u16(_mm256_srai_epi16::(r_low1), zeros); - let g_values1 = avx2_pack_u16(_mm256_srai_epi16::(g_low1), zeros); - let b_values1 = avx2_pack_u16(_mm256_srai_epi16::(b_low1), zeros); - - let dst_shift = cx * channels; - - let v_alpha = _mm256_set1_epi8(255u8 as i8); - _mm256_store_interleave_rgb_half_for_yuv::( - rgba0.get_unchecked_mut(dst_shift..).as_mut_ptr(), - r_values0, - g_values0, - b_values0, - v_alpha, - ); - _mm256_store_interleave_rgb_half_for_yuv::( - rgba1.get_unchecked_mut(dst_shift..).as_mut_ptr(), - r_values1, - g_values1, - b_values1, - v_alpha, - ); - - cx += 16; - uv_x += 8; - } - - ProcessedOffset { cx, ux: uv_x } -} diff --git a/src/avx2/yuv2rgba_fast.rs b/src/avx2/yuv2rgba_fast.rs deleted file mode 100644 index 7a908fcc..00000000 --- a/src/avx2/yuv2rgba_fast.rs +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright (c) Radzivon Bartoshyk, 10/2024. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, - * are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -use crate::avx2::avx2_utils::*; -use crate::internals::{interleaved_epi8, ProcessedOffset}; -use crate::yuv_support::{ - CbCrInverseTransform, YuvChromaRange, YuvChromaSubsampling, YuvSourceChannels, -}; -#[cfg(target_arch = "x86")] -use std::arch::x86::*; -#[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; - -pub(crate) fn avx2_yuv_to_rgba_fast_row( - range: &YuvChromaRange, - transform: &CbCrInverseTransform, - y_plane: &[u8], - u_plane: &[u8], - v_plane: &[u8], - rgba: &mut [u8], - start_cx: usize, - start_ux: usize, - width: usize, -) -> ProcessedOffset { - unsafe { - avx2_yuv_to_rgba_row_fast_impl::( - range, transform, y_plane, u_plane, v_plane, rgba, start_cx, start_ux, width, - ) - } -} - -#[target_feature(enable = "avx2")] -unsafe fn avx2_yuv_to_rgba_row_fast_impl( - range: &YuvChromaRange, - transform: &CbCrInverseTransform, - y_plane: &[u8], - u_plane: &[u8], - v_plane: &[u8], - rgba: &mut [u8], - start_cx: usize, - start_ux: usize, - width: usize, -) -> ProcessedOffset { - let chroma_subsampling: YuvChromaSubsampling = SAMPLING.into(); - let destination_channels: YuvSourceChannels = DESTINATION_CHANNELS.into(); - let channels = destination_channels.get_channels_count(); - - const PRECISION: i32 = 6; - - let mut cx = start_cx; - let mut uv_x = start_ux; - let y_ptr = y_plane.as_ptr(); - let u_ptr = u_plane.as_ptr(); - let v_ptr = v_plane.as_ptr(); - let rgba_ptr = rgba.as_mut_ptr(); - - let y_corr = _mm256_set1_epi8(range.bias_y as i8); - let v_luma_coeff = _mm256_set1_epi16((transform.y_coef as u16 * 256) as i16); - let v_cr_coeff = _mm256_set1_epi16(interleaved_epi8( - transform.cr_coef as i8, - -transform.cr_coef as i8, - )); - let v_cb_coeff = _mm256_set1_epi16(interleaved_epi8( - transform.cb_coef as i8, - -transform.cb_coef as i8, - )); - let v_g_coeff_1 = _mm256_set1_epi16(interleaved_epi8( - transform.g_coeff_1 as i8, - -transform.g_coeff_1 as i8, - )); - let v_g_coeff_2 = _mm256_set1_epi16(interleaved_epi8( - transform.g_coeff_2 as i8, - -transform.g_coeff_2 as i8, - )); - let u_bias_uv = _mm256_set1_epi8(range.bias_uv as i8); - - while cx + 32 < width { - let y_values = - _mm256_subs_epu8(_mm256_loadu_si256(y_ptr.add(cx) as *const __m256i), y_corr); - - let (u_high_u16, v_high_u16, u_low_u16, v_low_u16); - - match chroma_subsampling { - YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => { - let u_values = _mm_loadu_si128(u_ptr.add(uv_x) as *const __m128i); - let v_values = _mm_loadu_si128(v_ptr.add(uv_x) as *const __m128i); - - (u_low_u16, u_high_u16) = _mm256_interleave_epi8( - avx2_create( - _mm_unpacklo_epi8(u_values, u_values), - _mm_unpackhi_epi8(u_values, u_values), - ), - u_bias_uv, - ); - - (v_low_u16, v_high_u16) = _mm256_interleave_epi8( - avx2_create( - _mm_unpacklo_epi8(v_values, v_values), - _mm_unpackhi_epi8(v_values, v_values), - ), - u_bias_uv, - ); - } - YuvChromaSubsampling::Yuv444 => { - let u_values = _mm256_loadu_si256(u_ptr.add(uv_x) as *const __m256i); - let v_values = _mm256_loadu_si256(v_ptr.add(uv_x) as *const __m256i); - - (u_low_u16, u_high_u16) = _mm256_interleave_epi8(u_values, u_bias_uv); - (v_low_u16, v_high_u16) = _mm256_interleave_epi8(v_values, u_bias_uv); - } - } - - let interleaved_y = _mm256_interleave_epi8(y_values, y_values); - - let y_high = _mm256_mulhi_epu16(interleaved_y.1, v_luma_coeff); - - let r_high = _mm256_adds_epi16(y_high, _mm256_maddubs_epi16(v_high_u16, v_cr_coeff)); - let b_high = _mm256_adds_epi16(y_high, _mm256_maddubs_epi16(u_high_u16, v_cb_coeff)); - let g_high = _mm256_subs_epi16( - y_high, - _mm256_adds_epi16( - _mm256_maddubs_epi16(v_high_u16, v_g_coeff_1), - _mm256_maddubs_epi16(u_high_u16, v_g_coeff_2), - ), - ); - - let y_low = _mm256_mulhi_epu16(interleaved_y.0, v_luma_coeff); - - let r_low = _mm256_adds_epi16(y_low, _mm256_maddubs_epi16(v_low_u16, v_cr_coeff)); - let b_low = _mm256_adds_epi16(y_low, _mm256_maddubs_epi16(u_low_u16, v_cb_coeff)); - let g_low = _mm256_subs_epi16( - y_low, - _mm256_adds_epi16( - _mm256_maddubs_epi16(v_low_u16, v_g_coeff_1), - _mm256_maddubs_epi16(u_low_u16, v_g_coeff_2), - ), - ); - - let r_values = avx2_pack_u16( - _mm256_srai_epi16::(r_low), - _mm256_srai_epi16::(r_high), - ); - let g_values = avx2_pack_u16( - _mm256_srai_epi16::(g_low), - _mm256_srai_epi16::(g_high), - ); - let b_values = avx2_pack_u16( - _mm256_srai_epi16::(b_low), - _mm256_srai_epi16::(b_high), - ); - - let dst_shift = cx * channels; - - let v_alpha = _mm256_set1_epi8(255u8 as i8); - _mm256_store_interleave_rgb_for_yuv::( - rgba_ptr.add(dst_shift), - r_values, - g_values, - b_values, - v_alpha, - ); - - cx += 32; - - match chroma_subsampling { - YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => { - uv_x += 16; - } - YuvChromaSubsampling::Yuv444 => { - uv_x += 32; - } - } - } - - ProcessedOffset { cx, ux: uv_x } -} diff --git a/src/avx512bw/mod.rs b/src/avx512bw/mod.rs index 57ce8908..f218c42a 100644 --- a/src/avx512bw/mod.rs +++ b/src/avx512bw/mod.rs @@ -39,8 +39,6 @@ mod rgba_to_yuv420; mod y_to_rgb; mod ycgco_to_rgb; mod ycgco_to_rgba_alpha; -mod yuv2rgba420_fast; -mod yuv2rgba_fast; mod yuv_nv_to_rgba; mod yuv_nv_to_rgba420; mod yuv_p16_to_rgb16; @@ -58,8 +56,6 @@ pub(crate) use rgba_to_yuv420::avx512_rgba_to_yuv420; pub(crate) use y_to_rgb::avx512_y_to_rgb_row; pub(crate) use ycgco_to_rgb::avx512_ycgco_to_rgb_row; pub(crate) use ycgco_to_rgba_alpha::avx512_ycgco_to_rgba_alpha; -pub(crate) use yuv2rgba420_fast::avx512_yuv_to_rgba_fast_420; -pub(crate) use yuv2rgba_fast::avx512_yuv_to_fast_rgba; pub(crate) use yuv_nv_to_rgba::avx512_yuv_nv_to_rgba; pub(crate) use yuv_nv_to_rgba420::avx512_yuv_nv_to_rgba420; pub(crate) use yuv_p16_to_rgb16::avx512_yuv_p16_to_rgba16_row; diff --git a/src/avx512bw/yuv2rgba420_fast.rs b/src/avx512bw/yuv2rgba420_fast.rs deleted file mode 100644 index b7d6fb37..00000000 --- a/src/avx512bw/yuv2rgba420_fast.rs +++ /dev/null @@ -1,259 +0,0 @@ -/* - * Copyright (c) Radzivon Bartoshyk, 10/2024. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, - * are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -use crate::avx2::_mm256_interleave_epi8; -use crate::avx512bw::avx512_utils::{ - avx512_create, avx512_pack_u16, avx512_store_rgba_for_yuv_u8, avx512_zip_epi8, -}; -use crate::internals::{interleaved_epi8, ProcessedOffset}; -use crate::yuv_support::{CbCrInverseTransform, YuvChromaRange, YuvSourceChannels}; -#[cfg(target_arch = "x86")] -use std::arch::x86::*; -#[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; - -pub(crate) fn avx512_yuv_to_rgba_fast_420( - range: &YuvChromaRange, - transform: &CbCrInverseTransform, - y_plane0: &[u8], - y_plane1: &[u8], - u_plane: &[u8], - v_plane: &[u8], - rgba0: &mut [u8], - rgba1: &mut [u8], - start_cx: usize, - start_ux: usize, - width: usize, -) -> ProcessedOffset { - unsafe { - if HAS_VBMI { - avx512_yuv_to_rgba_bmi_fast_impl420::( - range, transform, y_plane0, y_plane1, u_plane, v_plane, rgba0, rgba1, start_cx, - start_ux, width, - ) - } else { - avx512_yuv_to_rgba_def_fast_impl420::( - range, transform, y_plane0, y_plane1, u_plane, v_plane, rgba0, rgba1, start_cx, - start_ux, width, - ) - } - } -} - -#[target_feature(enable = "avx512bw", enable = "avx512f", enable = "avx512vbmi")] -unsafe fn avx512_yuv_to_rgba_bmi_fast_impl420( - range: &YuvChromaRange, - transform: &CbCrInverseTransform, - y_plane0: &[u8], - y_plane1: &[u8], - u_plane: &[u8], - v_plane: &[u8], - rgba0: &mut [u8], - rgba1: &mut [u8], - start_cx: usize, - start_ux: usize, - width: usize, -) -> ProcessedOffset { - avx512_yuv_to_rgba_fast_impl420::( - range, transform, y_plane0, y_plane1, u_plane, v_plane, rgba0, rgba1, start_cx, start_ux, - width, - ) -} - -#[target_feature(enable = "avx512bw", enable = "avx512f")] -unsafe fn avx512_yuv_to_rgba_def_fast_impl420( - range: &YuvChromaRange, - transform: &CbCrInverseTransform, - y_plane0: &[u8], - y_plane1: &[u8], - u_plane: &[u8], - v_plane: &[u8], - rgba0: &mut [u8], - rgba1: &mut [u8], - start_cx: usize, - start_ux: usize, - width: usize, -) -> ProcessedOffset { - avx512_yuv_to_rgba_fast_impl420::( - range, transform, y_plane0, y_plane1, u_plane, v_plane, rgba0, rgba1, start_cx, start_ux, - width, - ) -} - -#[inline(always)] -unsafe fn avx512_yuv_to_rgba_fast_impl420( - range: &YuvChromaRange, - transform: &CbCrInverseTransform, - y_plane0: &[u8], - y_plane1: &[u8], - u_plane: &[u8], - v_plane: &[u8], - rgba0: &mut [u8], - rgba1: &mut [u8], - start_cx: usize, - start_ux: usize, - width: usize, -) -> ProcessedOffset { - let destination_channels: YuvSourceChannels = DESTINATION_CHANNELS.into(); - let channels = destination_channels.get_channels_count(); - - let mut cx = start_cx; - let mut uv_x = start_ux; - let u_ptr = u_plane.as_ptr(); - let v_ptr = v_plane.as_ptr(); - - let v_luma_coeff = _mm512_set1_epi16((transform.y_coef as u16 * 256) as i16); - let v_cr_coeff = _mm512_set1_epi16(interleaved_epi8( - transform.cr_coef as i8, - -transform.cr_coef as i8, - )); - let v_cb_coeff = _mm512_set1_epi16(interleaved_epi8( - transform.cb_coef as i8, - -transform.cb_coef as i8, - )); - let v_g_coeff_1 = _mm512_set1_epi16(interleaved_epi8( - transform.g_coeff_1 as i8, - -transform.g_coeff_1 as i8, - )); - let v_g_coeff_2 = _mm512_set1_epi16(interleaved_epi8( - transform.g_coeff_2 as i8, - -transform.g_coeff_2 as i8, - )); - let y_corr = _mm512_set1_epi8(range.bias_y as i8); - let u_bias_uv = _mm512_set1_epi8(range.bias_uv as i8); - - while cx + 64 < width { - let y_values0 = _mm512_subs_epu8( - _mm512_loadu_si512(y_plane0.get_unchecked(cx..).as_ptr() as *const i32), - y_corr, - ); - let y_values1 = _mm512_subs_epu8( - _mm512_loadu_si512(y_plane1.get_unchecked(cx..).as_ptr() as *const i32), - y_corr, - ); - - let y0_10 = avx512_zip_epi8::(y_values0, y_values0); - let y1_10 = avx512_zip_epi8::(y_values1, y_values1); - - let u_values = _mm256_loadu_si256(u_ptr.add(uv_x) as *const __m256i); - let v_values = _mm256_loadu_si256(v_ptr.add(uv_x) as *const __m256i); - - let (mu_low0, mu_high0) = _mm256_interleave_epi8(u_values, u_values); - let (u_low0, u_high0) = - avx512_zip_epi8::(avx512_create(mu_low0, mu_high0), u_bias_uv); - let (mv_low0, mv_high0) = _mm256_interleave_epi8(v_values, v_values); - let (v_low0, v_high0) = - avx512_zip_epi8::(avx512_create(mv_low0, mv_high0), u_bias_uv); - - let y_high0 = _mm512_mulhi_epu16(y0_10.1, v_luma_coeff); - let y_high1 = _mm512_mulhi_epu16(y1_10.1, v_luma_coeff); - - let g_coeff_hi = _mm512_adds_epi16( - _mm512_maddubs_epi16(v_high0, v_g_coeff_1), - _mm512_maddubs_epi16(u_high0, v_g_coeff_2), - ); - - let r_high0 = _mm512_adds_epi16(y_high0, _mm512_maddubs_epi16(v_high0, v_cr_coeff)); - let b_high0 = _mm512_adds_epi16(y_high0, _mm512_maddubs_epi16(u_high0, v_cb_coeff)); - let g_high0 = _mm512_subs_epi16(y_high0, g_coeff_hi); - - let r_high1 = _mm512_adds_epi16(y_high1, _mm512_maddubs_epi16(v_high0, v_cr_coeff)); - let b_high1 = _mm512_adds_epi16(y_high1, _mm512_maddubs_epi16(u_high0, v_cb_coeff)); - let g_high1 = _mm512_subs_epi16(y_high1, g_coeff_hi); - - let y_low0 = _mm512_mulhi_epu16(y0_10.0, v_luma_coeff); - let y_low1 = _mm512_mulhi_epu16(y1_10.0, v_luma_coeff); - - let g_coeff_lo = _mm512_adds_epi16( - _mm512_maddubs_epi16(v_low0, v_g_coeff_1), - _mm512_maddubs_epi16(u_low0, v_g_coeff_2), - ); - - let r_low0 = _mm512_adds_epi16(y_low0, _mm512_maddubs_epi16(v_low0, v_cr_coeff)); - let b_low0 = _mm512_adds_epi16(y_low0, _mm512_maddubs_epi16(u_low0, v_cb_coeff)); - let g_low0 = _mm512_subs_epi16(y_low0, g_coeff_lo); - - let r_low1 = _mm512_adds_epi16(y_low1, _mm512_maddubs_epi16(v_low0, v_cr_coeff)); - let b_low1 = _mm512_adds_epi16(y_low1, _mm512_maddubs_epi16(u_low0, v_cb_coeff)); - let g_low1 = _mm512_subs_epi16(y_low1, g_coeff_lo); - - const PREC: u32 = 6; - - let r_values0 = avx512_pack_u16( - _mm512_srai_epi16::(r_low0), - _mm512_srai_epi16::(r_high0), - ); - let g_values0 = avx512_pack_u16( - _mm512_srai_epi16::(g_low0), - _mm512_srai_epi16::(g_high0), - ); - let b_values0 = avx512_pack_u16( - _mm512_srai_epi16::(b_low0), - _mm512_srai_epi16::(b_high0), - ); - - let r_values1 = avx512_pack_u16( - _mm512_srai_epi16::(r_low1), - _mm512_srai_epi16::(r_high1), - ); - let g_values1 = avx512_pack_u16( - _mm512_srai_epi16::(g_low1), - _mm512_srai_epi16::(g_high1), - ); - let b_values1 = avx512_pack_u16( - _mm512_srai_epi16::(b_low1), - _mm512_srai_epi16::(b_high1), - ); - - let dst_shift = cx * channels; - - let v_alpha = _mm512_set1_epi8(255u8 as i8); - - avx512_store_rgba_for_yuv_u8::( - rgba0.get_unchecked_mut(dst_shift..).as_mut_ptr(), - r_values0, - g_values0, - b_values0, - v_alpha, - ); - - avx512_store_rgba_for_yuv_u8::( - rgba1.get_unchecked_mut(dst_shift..).as_mut_ptr(), - r_values1, - g_values1, - b_values1, - v_alpha, - ); - - cx += 64; - uv_x += 32; - } - - ProcessedOffset { cx, ux: uv_x } -} diff --git a/src/avx512bw/yuv2rgba_fast.rs b/src/avx512bw/yuv2rgba_fast.rs deleted file mode 100644 index 629cfc9a..00000000 --- a/src/avx512bw/yuv2rgba_fast.rs +++ /dev/null @@ -1,244 +0,0 @@ -/* - * Copyright (c) Radzivon Bartoshyk, 10/2024. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, - * are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -use crate::avx2::_mm256_interleave_epi8; -use crate::avx512bw::avx512_utils::{ - avx512_create, avx512_pack_u16, avx512_store_rgba_for_yuv_u8, avx512_zip_epi8, -}; -use crate::internals::{interleaved_epi8, ProcessedOffset}; -use crate::yuv_support::{ - CbCrInverseTransform, YuvChromaRange, YuvChromaSubsampling, YuvSourceChannels, -}; -#[cfg(target_arch = "x86")] -use std::arch::x86::*; -#[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; - -pub(crate) fn avx512_yuv_to_fast_rgba< - const DESTINATION_CHANNELS: u8, - const SAMPLING: u8, - const HAS_VBMI: bool, ->( - range: &YuvChromaRange, - transform: &CbCrInverseTransform, - y_plane: &[u8], - u_plane: &[u8], - v_plane: &[u8], - rgba: &mut [u8], - start_cx: usize, - start_ux: usize, - width: usize, -) -> ProcessedOffset { - unsafe { - if HAS_VBMI { - avx512_yuv_to_rgba_bmi_fast_impl::( - range, transform, y_plane, u_plane, v_plane, rgba, start_cx, start_ux, width, - ) - } else { - avx512_yuv_to_rgba_def_fast_impl::( - range, transform, y_plane, u_plane, v_plane, rgba, start_cx, start_ux, width, - ) - } - } -} - -#[target_feature(enable = "avx512bw", enable = "avx512f", enable = "avx512vbmi")] -unsafe fn avx512_yuv_to_rgba_bmi_fast_impl( - range: &YuvChromaRange, - transform: &CbCrInverseTransform, - y_plane: &[u8], - u_plane: &[u8], - v_plane: &[u8], - rgba: &mut [u8], - start_cx: usize, - start_ux: usize, - width: usize, -) -> ProcessedOffset { - avx512_yuv_to_rgba_fast_impl::( - range, transform, y_plane, u_plane, v_plane, rgba, start_cx, start_ux, width, - ) -} - -#[target_feature(enable = "avx512bw", enable = "avx512f")] -unsafe fn avx512_yuv_to_rgba_def_fast_impl( - range: &YuvChromaRange, - transform: &CbCrInverseTransform, - y_plane: &[u8], - u_plane: &[u8], - v_plane: &[u8], - rgba: &mut [u8], - start_cx: usize, - start_ux: usize, - width: usize, -) -> ProcessedOffset { - avx512_yuv_to_rgba_fast_impl::( - range, transform, y_plane, u_plane, v_plane, rgba, start_cx, start_ux, width, - ) -} - -#[inline(always)] -unsafe fn avx512_yuv_to_rgba_fast_impl< - const DESTINATION_CHANNELS: u8, - const SAMPLING: u8, - const HAS_VBMI: bool, ->( - range: &YuvChromaRange, - transform: &CbCrInverseTransform, - y_plane: &[u8], - u_plane: &[u8], - v_plane: &[u8], - rgba: &mut [u8], - start_cx: usize, - start_ux: usize, - width: usize, -) -> ProcessedOffset { - let chroma_subsampling: YuvChromaSubsampling = SAMPLING.into(); - let destination_channels: YuvSourceChannels = DESTINATION_CHANNELS.into(); - let channels = destination_channels.get_channels_count(); - - const PRECISION: u32 = 6; - - let mut cx = start_cx; - let mut uv_x = start_ux; - let y_ptr = y_plane.as_ptr(); - let u_ptr = u_plane.as_ptr(); - let v_ptr = v_plane.as_ptr(); - let rgba_ptr = rgba.as_mut_ptr(); - - let v_luma_coeff = _mm512_set1_epi16((transform.y_coef as u16 * 256) as i16); - let v_cr_coeff = _mm512_set1_epi16(interleaved_epi8( - transform.cr_coef as i8, - -transform.cr_coef as i8, - )); - let v_cb_coeff = _mm512_set1_epi16(interleaved_epi8( - transform.cb_coef as i8, - -transform.cb_coef as i8, - )); - let v_g_coeff_1 = _mm512_set1_epi16(interleaved_epi8( - transform.g_coeff_1 as i8, - -transform.g_coeff_1 as i8, - )); - let v_g_coeff_2 = _mm512_set1_epi16(interleaved_epi8( - transform.g_coeff_2 as i8, - -transform.g_coeff_2 as i8, - )); - let y_corr = _mm512_set1_epi8(range.bias_y as i8); - let u_bias_uv = _mm512_set1_epi8(range.bias_uv as i8); - - while cx + 64 < width { - let y_values = _mm512_subs_epu8(_mm512_loadu_si512(y_ptr.add(cx) as *const i32), y_corr); - - let (u_high0, v_high0, u_low0, v_low0); - - match chroma_subsampling { - YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => { - let u_values = _mm256_loadu_si256(u_ptr.add(uv_x) as *const __m256i); - let v_values = _mm256_loadu_si256(v_ptr.add(uv_x) as *const __m256i); - - let (mu_low0, mu_high0) = _mm256_interleave_epi8(u_values, u_values); - (u_low0, u_high0) = - avx512_zip_epi8::(avx512_create(mu_low0, mu_high0), u_bias_uv); - let (mv_low0, mv_high0) = _mm256_interleave_epi8(v_values, v_values); - (v_low0, v_high0) = - avx512_zip_epi8::(avx512_create(mv_low0, mv_high0), u_bias_uv); - } - YuvChromaSubsampling::Yuv444 => { - let u_values = _mm512_loadu_si512(u_ptr.add(uv_x) as *const i32); - let v_values = _mm512_loadu_si512(v_ptr.add(uv_x) as *const i32); - - (u_low0, u_high0) = avx512_zip_epi8::(u_values, u_bias_uv); - (v_low0, v_high0) = avx512_zip_epi8::(v_values, u_bias_uv); - } - } - - let (y_lo, y_hi) = avx512_zip_epi8::(y_values, y_values); - - let y_high = _mm512_mulhi_epu16(y_hi, v_luma_coeff); - - let r_high = _mm512_adds_epi16(y_high, _mm512_maddubs_epi16(v_high0, v_cr_coeff)); - let b_high = _mm512_adds_epi16(y_high, _mm512_maddubs_epi16(u_high0, v_cb_coeff)); - let g_high = _mm512_subs_epi16( - y_high, - _mm512_adds_epi16( - _mm512_maddubs_epi16(v_high0, v_g_coeff_1), - _mm512_maddubs_epi16(u_high0, v_g_coeff_2), - ), - ); - - let y_low = _mm512_mulhi_epu16(y_lo, v_luma_coeff); - - let r_low = _mm512_adds_epi16(y_low, _mm512_maddubs_epi16(v_low0, v_cr_coeff)); - let b_low = _mm512_adds_epi16(y_low, _mm512_maddubs_epi16(u_low0, v_cb_coeff)); - let g_low = _mm512_subs_epi16( - y_low, - _mm512_adds_epi16( - _mm512_maddubs_epi16(v_low0, v_g_coeff_1), - _mm512_maddubs_epi16(u_low0, v_g_coeff_2), - ), - ); - - let r_values = avx512_pack_u16( - _mm512_srai_epi16::(r_low), - _mm512_srai_epi16::(r_high), - ); - let g_values = avx512_pack_u16( - _mm512_srai_epi16::(g_low), - _mm512_srai_epi16::(g_high), - ); - let b_values = avx512_pack_u16( - _mm512_srai_epi16::(b_low), - _mm512_srai_epi16::(b_high), - ); - - let dst_shift = cx * channels; - - let v_alpha = _mm512_set1_epi8(255u8 as i8); - - avx512_store_rgba_for_yuv_u8::( - rgba_ptr.add(dst_shift), - r_values, - g_values, - b_values, - v_alpha, - ); - - cx += 64; - - match chroma_subsampling { - YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => { - uv_x += 32; - } - YuvChromaSubsampling::Yuv444 => { - uv_x += 64; - } - } - } - - ProcessedOffset { cx, ux: uv_x } -} diff --git a/src/conversion_quality.rs b/src/conversion_quality.rs deleted file mode 100644 index 9739188f..00000000 --- a/src/conversion_quality.rs +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) Radzivon Bartoshyk, 12/2024. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, - * are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -use crate::ConversionQuality::{Fastest, Good}; - -#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug, Default)] -pub enum ConversionQuality { - /// The fastest conversion with a cost of precision - Fastest, - /// Fast conversion with good precision - #[default] - Good, -} - -impl From for ConversionQuality { - fn from(quality: usize) -> Self { - match quality { - 0 => Fastest, - 1 => Good, - _ => unimplemented!("Conversion quality {} is not implemented yet", quality), - } - } -} diff --git a/src/internals.rs b/src/internals.rs index 231b3ae1..af7f8772 100644 --- a/src/internals.rs +++ b/src/internals.rs @@ -33,11 +33,3 @@ pub(crate) struct ProcessedOffset { pub(crate) cx: usize, pub(crate) ux: usize, } - -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -#[inline(always)] -pub(crate) fn interleaved_epi8(lo: i8, hi: i8) -> i16 { - let w0 = hi as u16; - let w1 = lo as u16; - (w0 << 8 | w1) as i16 -} diff --git a/src/lib.rs b/src/lib.rs index 64b2e644..734207c8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -42,7 +42,6 @@ mod avx2; ))] mod avx512bw; mod built_coefficients; -mod conversion_quality; mod from_identity; mod from_identity_alpha; mod images; @@ -441,7 +440,6 @@ pub use yuv_p16_rgba_p16::*; pub use ar30_rgb::{ ab30_to_rgb8, ar30_to_rgb8, ar30_to_rgba8, ba30_to_rgb8, ra30_to_rgb8, ra30_to_rgba8, }; -pub use conversion_quality::ConversionQuality; pub use rgb_ar30::{rgb8_to_ar30, rgb8_to_ra30, rgba8_to_ar30, rgba8_to_ra30}; pub use yuv_p16_ar30::{ yuv420_p16_to_ab30, yuv420_p16_to_ar30, yuv420_p16_to_ra30, yuv422_p16_to_ab30, diff --git a/src/neon/mod.rs b/src/neon/mod.rs index 3a3801c8..830d7dc4 100644 --- a/src/neon/mod.rs +++ b/src/neon/mod.rs @@ -43,8 +43,6 @@ mod y_to_rgb; mod y_to_rgb_alpha; mod ycgco_to_rgb; mod ycgco_to_rgb_alpha; -mod yuv2rgba420_fast; -mod yuv2rgba_fast; mod yuv_nv_p10_to_rgba; mod yuv_nv_p16_to_rgb; mod yuv_nv_to_rgba; @@ -76,8 +74,6 @@ pub(crate) use y_to_rgb::{neon_y_to_rgb_row, neon_y_to_rgb_row_rdm}; pub(crate) use y_to_rgb_alpha::{neon_y_to_rgb_alpha_row, neon_y_to_rgb_row_alpha_rdm}; pub(crate) use ycgco_to_rgb::neon_ycgco_to_rgb_row; pub(crate) use ycgco_to_rgb_alpha::neon_ycgco_to_rgb_alpha_row; -pub(crate) use yuv2rgba420_fast::neon_yuv_to_rgba_row_fast420; -pub(crate) use yuv2rgba_fast::neon_yuv_to_rgba_fast_row; pub(crate) use yuv_nv_p10_to_rgba::neon_yuv_nv12_p10_to_rgba_row; pub(crate) use yuv_nv_p16_to_rgb::{neon_yuv_nv_p16_to_rgba_row, neon_yuv_nv_p16_to_rgba_row_rdm}; pub(crate) use yuv_nv_to_rgba::{neon_yuv_nv_to_rgba_row, neon_yuv_nv_to_rgba_row_rdm}; diff --git a/src/neon/utils.rs b/src/neon/utils.rs index 79047abe..5c85389d 100644 --- a/src/neon/utils.rs +++ b/src/neon/utils.rs @@ -512,33 +512,10 @@ pub(crate) unsafe fn vpackuq_n_shift16(a: int16x8_t) -> pub(crate) unsafe fn vexpand8_to_10(a: uint8x8_t) -> uint16x8_t { let k = vcombine_u8(a, a); vrshrq_n_u16::<6>(vreinterpretq_u16_u8(vzip1q_u8(k, k))) - // vshll_n_u8::<2>(a) } /// Expands exactly 8 bit to 10 #[inline(always)] pub(crate) unsafe fn vexpand_high_8_to_10(a: uint8x16_t) -> uint16x8_t { vrshrq_n_u16::<6>(vreinterpretq_u16_u8(vzip2q_u8(a, a))) - // vshll_high_n_u8::<2>(a) -} - -/// Expands exactly 8 bit to 16 -#[inline(always)] -pub(crate) unsafe fn vexpand8_to_16(a: uint8x8_t) -> uint16x8_t { - let k = vcombine_u8(a, a); - vreinterpretq_u16_u8(vzip1q_u8(k, k)) -} - -/// Expands exactly 8 bit to 10 -#[inline(always)] -pub(crate) unsafe fn vexpand_high_8_to_16(a: uint8x16_t) -> uint16x8_t { - vreinterpretq_u16_u8(vzip2q_u8(a, a)) -} - -/// Expands exactly 8 bit to 10 -#[inline(always)] -pub(crate) unsafe fn vumulhiq_u16(a: uint16x8_t, b: uint16x8_t) -> int16x8_t { - let hi = vmull_high_u16(a, b); - let lo = vmull_u16(vget_low_u16(a), vget_low_u16(b)); - vreinterpretq_s16_u16(vcombine_u16(vshrn_n_u32::<8>(lo), vshrn_n_u32::<8>(hi))) } diff --git a/src/neon/yuv2rgba420_fast.rs b/src/neon/yuv2rgba420_fast.rs deleted file mode 100644 index 5129fab5..00000000 --- a/src/neon/yuv2rgba420_fast.rs +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (c) Radzivon Bartoshyk, 10/2024. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, - * are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -use crate::internals::ProcessedOffset; -use crate::neon::utils::{ - neon_store_half_rgb8, neon_store_rgb8, vexpand8_to_16, vexpand_high_8_to_16, vumulhiq_u16, -}; -use crate::yuv_support::{CbCrInverseTransform, YuvChromaRange, YuvSourceChannels}; -use std::arch::aarch64::*; - -pub(crate) unsafe fn neon_yuv_to_rgba_row_fast420( - range: &YuvChromaRange, - transform: &CbCrInverseTransform, - y_plane0: &[u8], - y_plane1: &[u8], - u_plane: &[u8], - v_plane: &[u8], - rgba0: &mut [u8], - rgba1: &mut [u8], - start_cx: usize, - start_ux: usize, - width: usize, -) -> ProcessedOffset { - let destination_channels: YuvSourceChannels = DESTINATION_CHANNELS.into(); - let channels = destination_channels.get_channels_count(); - - const PRECISION: i32 = 6; - - let mut cx = start_cx; - let mut uv_x = start_ux; - - let u_ptr = u_plane.as_ptr(); - let v_ptr = v_plane.as_ptr(); - - let y_corr = vdupq_n_u8(range.bias_y as u8); - let uv_corr = vdupq_n_s16(range.bias_uv as i16); - - let cb_coeff = vdupq_n_s16(transform.cb_coef as i16); - let cr_coeff = vdupq_n_s16(transform.cr_coef as i16); - let y_coeff = vdupq_n_u16(transform.y_coef as u16); - let g_coeff1 = vdupq_n_s16(-transform.g_coeff_1 as i16); - let g_coeff2 = vdupq_n_s16(-transform.g_coeff_2 as i16); - - let v_alpha = vdupq_n_u8(255u8); - - while cx + 16 < width { - let vl0 = vld1q_u8(y_plane0.get_unchecked(cx..).as_ptr()); - let vl1 = vld1q_u8(y_plane1.get_unchecked(cx..).as_ptr()); - let y_values0 = vqsubq_u8(vl0, y_corr); - let y_values1 = vqsubq_u8(vl1, y_corr); - - let u_values = vld1_u8(u_ptr.add(uv_x)); - let v_values = vld1_u8(v_ptr.add(uv_x)); - - let u_high_u8 = vzip2_u8(u_values, u_values); - let v_high_u8 = vzip2_u8(v_values, v_values); - let u_low_u8 = vzip1_u8(u_values, u_values); - let v_low_u8 = vzip1_u8(v_values, v_values); - - let u_high = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(u_high_u8)), uv_corr); - let v_high = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(v_high_u8)), uv_corr); - - let y_high0 = vumulhiq_u16(vexpand_high_8_to_16(y_values0), y_coeff); - let y_high1 = vumulhiq_u16(vexpand_high_8_to_16(y_values1), y_coeff); - - let g_coeff_hi = vqaddq_s16(vmulq_s16(v_high, g_coeff1), vmulq_s16(u_high, g_coeff2)); - - let r_high0 = vqrshrun_n_s16::(vqaddq_s16(y_high0, vmulq_s16(v_high, cr_coeff))); - let b_high0 = vqrshrun_n_s16::(vqaddq_s16(y_high0, vmulq_s16(u_high, cb_coeff))); - let g_high0 = vqrshrun_n_s16::(vqaddq_s16(y_high0, g_coeff_hi)); - - let r_high1 = vqrshrun_n_s16::(vqaddq_s16(y_high1, vmulq_s16(v_high, cr_coeff))); - let b_high1 = vqrshrun_n_s16::(vqaddq_s16(y_high1, vmulq_s16(u_high, cb_coeff))); - let g_high1 = vqrshrun_n_s16::(vqaddq_s16(y_high1, g_coeff_hi)); - - let u_low = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(u_low_u8)), uv_corr); - let v_low = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(v_low_u8)), uv_corr); - - let y_low0 = vumulhiq_u16(vexpand8_to_16(vget_low_u8(y_values0)), y_coeff); - let y_low1 = vumulhiq_u16(vexpand8_to_16(vget_low_u8(y_values1)), y_coeff); - - let g_coeff_lo = vqaddq_s16(vmulq_s16(v_low, g_coeff1), vmulq_s16(u_low, g_coeff2)); - - let r_low0 = vqrshrun_n_s16::(vqaddq_s16(y_low0, vmulq_s16(v_low, cr_coeff))); - let b_low0 = vqrshrun_n_s16::(vqaddq_s16(y_low0, vmulq_s16(u_low, cb_coeff))); - let g_low0 = vqrshrun_n_s16::(vqaddq_s16(y_low0, g_coeff_lo)); - - let r_low1 = vqrshrun_n_s16::(vqaddq_s16(y_low1, vmulq_s16(v_low, cr_coeff))); - let b_low1 = vqrshrun_n_s16::(vqaddq_s16(y_low1, vmulq_s16(u_low, cb_coeff))); - let g_low1 = vqrshrun_n_s16::(vqaddq_s16(y_low1, g_coeff_lo)); - - let r_values0 = vcombine_u8(r_low0, r_high0); - let g_values0 = vcombine_u8(g_low0, g_high0); - let b_values0 = vcombine_u8(b_low0, b_high0); - - let r_values1 = vcombine_u8(r_low1, r_high1); - let g_values1 = vcombine_u8(g_low1, g_high1); - let b_values1 = vcombine_u8(b_low1, b_high1); - - let dst_shift = cx * channels; - - neon_store_rgb8::( - rgba0.get_unchecked_mut(dst_shift..).as_mut_ptr(), - r_values0, - g_values0, - b_values0, - v_alpha, - ); - neon_store_rgb8::( - rgba1.get_unchecked_mut(dst_shift..).as_mut_ptr(), - r_values1, - g_values1, - b_values1, - v_alpha, - ); - - cx += 16; - uv_x += 8; - } - - while cx + 8 < width { - let vl0 = vld1_u8(y_plane0.get_unchecked(cx..).as_ptr()); - let vl1 = vld1_u8(y_plane1.get_unchecked(cx..).as_ptr()); - let y_values0 = vqsub_u8(vl0, vget_low_u8(y_corr)); - let y_values1 = vqsub_u8(vl1, vget_low_u8(y_corr)); - - let u_values = vreinterpret_u8_u32(vld1_dup_u32(u_ptr.add(uv_x) as *const u32)); - let v_values = vreinterpret_u8_u32(vld1_dup_u32(v_ptr.add(uv_x) as *const u32)); - - let u_low_u8 = vzip1_u8(u_values, u_values); - let v_low_u8 = vzip1_u8(v_values, v_values); - - let u_low = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(u_low_u8)), uv_corr); - let v_low = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(v_low_u8)), uv_corr); - - let y_low0 = vumulhiq_u16(vexpand8_to_16(y_values0), y_coeff); - let y_low1 = vumulhiq_u16(vexpand8_to_16(y_values1), y_coeff); - - let g_coeff_lo = vqaddq_s16(vmulq_s16(v_low, g_coeff1), vmulq_s16(u_low, g_coeff2)); - - let r0 = vqrshrun_n_s16::(vqaddq_s16(y_low0, vmulq_s16(v_low, cr_coeff))); - let b0 = vqrshrun_n_s16::(vqaddq_s16(y_low0, vmulq_s16(u_low, cb_coeff))); - let g0 = vqrshrun_n_s16::(vqaddq_s16(y_low0, g_coeff_lo)); - - let r1 = vqrshrun_n_s16::(vqaddq_s16(y_low1, vmulq_s16(v_low, cr_coeff))); - let b1 = vqrshrun_n_s16::(vqaddq_s16(y_low1, vmulq_s16(u_low, cb_coeff))); - let g1 = vqrshrun_n_s16::(vqaddq_s16(y_low1, g_coeff_lo)); - - let dst_shift = cx * channels; - - neon_store_half_rgb8::( - rgba0.get_unchecked_mut(dst_shift..).as_mut_ptr(), - r0, - g0, - b0, - vget_low_u8(v_alpha), - ); - neon_store_half_rgb8::( - rgba1.get_unchecked_mut(dst_shift..).as_mut_ptr(), - r1, - g1, - b1, - vget_low_u8(v_alpha), - ); - - cx += 8; - uv_x += 4; - } - - ProcessedOffset { cx, ux: uv_x } -} diff --git a/src/neon/yuv2rgba_fast.rs b/src/neon/yuv2rgba_fast.rs deleted file mode 100644 index 57a15c4f..00000000 --- a/src/neon/yuv2rgba_fast.rs +++ /dev/null @@ -1,366 +0,0 @@ -/* - * Copyright (c) Radzivon Bartoshyk, 12/2024. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, - * are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -use crate::internals::ProcessedOffset; -use crate::neon::utils::{ - neon_store_half_rgb8, neon_store_rgb8, vexpand8_to_16, vexpand_high_8_to_16, vumulhiq_u16, - xvld1q_u8_x2, -}; -use crate::yuv_support::{CbCrInverseTransform, YuvChromaRange, YuvSourceChannels}; -use crate::YuvChromaSubsampling; -use std::arch::aarch64::*; - -pub(crate) unsafe fn neon_yuv_to_rgba_fast_row< - const DESTINATION_CHANNELS: u8, - const SAMPLING: u8, ->( - range: &YuvChromaRange, - transform: &CbCrInverseTransform, - y_plane: &[u8], - u_plane: &[u8], - v_plane: &[u8], - rgba: &mut [u8], - start_cx: usize, - start_ux: usize, - width: usize, -) -> ProcessedOffset { - const PRECISION: i32 = 6; - let chroma_subsampling: YuvChromaSubsampling = SAMPLING.into(); - let destination_channels: YuvSourceChannels = DESTINATION_CHANNELS.into(); - let channels = destination_channels.get_channels_count(); - - let mut cx = start_cx; - let mut uv_x = start_ux; - - let y_ptr = y_plane.as_ptr(); - let u_ptr = u_plane.as_ptr(); - let v_ptr = v_plane.as_ptr(); - let rgba_ptr = rgba.as_mut_ptr(); - - let y_corr = vdupq_n_u8(range.bias_y as u8); - let uv_corr = vdupq_n_s16(range.bias_uv as i16); - - let cb_coeff = vdupq_n_s16(transform.cb_coef as i16); - let cr_coeff = vdupq_n_s16(transform.cr_coef as i16); - let y_coeff = vdupq_n_u16(transform.y_coef as u16); - let g_coeff1 = vdupq_n_s16(-transform.g_coeff_1 as i16); - let g_coeff2 = vdupq_n_s16(-transform.g_coeff_2 as i16); - - let v_alpha = vdupq_n_u8(255u8); - - while cx + 32 < width { - let mut y_set = xvld1q_u8_x2(y_ptr.add(cx)); - y_set.0 = vqsubq_u8(y_set.0, y_corr); - y_set.1 = vqsubq_u8(y_set.1, y_corr); - - let u_high_u8: uint8x16_t; - let v_high_u8: uint8x16_t; - let u_low_u8: uint8x16_t; - let v_low_u8: uint8x16_t; - - match chroma_subsampling { - YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => { - let u_values = vld1q_u8(u_ptr.add(uv_x)); - let v_values = vld1q_u8(v_ptr.add(uv_x)); - - u_high_u8 = vzip2q_u8(u_values, u_values); - v_high_u8 = vzip2q_u8(v_values, v_values); - u_low_u8 = vzip1q_u8(u_values, u_values); - v_low_u8 = vzip1q_u8(v_values, v_values); - } - YuvChromaSubsampling::Yuv444 => { - let u_values = xvld1q_u8_x2(u_ptr.add(uv_x)); - let v_values = xvld1q_u8_x2(v_ptr.add(uv_x)); - - u_high_u8 = u_values.1; - v_high_u8 = v_values.1; - u_low_u8 = u_values.0; - v_low_u8 = v_values.0; - } - } - - let u_high1 = vsubq_s16(vreinterpretq_s16_u16(vmovl_high_u8(u_high_u8)), uv_corr); - let v_high1 = vsubq_s16(vreinterpretq_s16_u16(vmovl_high_u8(v_high_u8)), uv_corr); - let y_high1 = vumulhiq_u16(vexpand_high_8_to_16(y_set.1), y_coeff); - - let r_high1 = vqaddq_s16(y_high1, vmulq_s16(v_high1, cr_coeff)); - let b_high1 = vqaddq_s16(y_high1, vmulq_s16(u_high1, cb_coeff)); - let g_high1 = vqaddq_s16( - vqaddq_s16(y_high1, vmulq_s16(v_high1, g_coeff1)), - vmulq_s16(u_high1, g_coeff2), - ); - - let u_high0 = vsubq_s16( - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(u_high_u8))), - uv_corr, - ); - let v_high0 = vsubq_s16( - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_high_u8))), - uv_corr, - ); - let y_high0 = vumulhiq_u16(vexpand8_to_16(vget_low_u8(y_set.1)), y_coeff); - - let r_high0 = vqaddq_s16(y_high0, vmulq_s16(v_high0, cr_coeff)); - let b_high0 = vqaddq_s16(y_high0, vmulq_s16(u_high0, cb_coeff)); - let g_high0 = vqaddq_s16( - vqaddq_s16(y_high0, vmulq_s16(v_high0, g_coeff1)), - vmulq_s16(u_high0, g_coeff2), - ); - - let u_low1 = vsubq_s16(vreinterpretq_s16_u16(vmovl_high_u8(u_low_u8)), uv_corr); - let v_low1 = vsubq_s16(vreinterpretq_s16_u16(vmovl_high_u8(v_low_u8)), uv_corr); - let y_low1 = vumulhiq_u16(vexpand_high_8_to_16(y_set.0), y_coeff); - - let r_low1 = vqaddq_s16(y_low1, vmulq_s16(v_low1, cr_coeff)); - let b_low1 = vqaddq_s16(y_low1, vmulq_s16(u_low1, cb_coeff)); - let g_low1 = vqaddq_s16( - vqaddq_s16(y_low1, vmulq_s16(v_low1, g_coeff1)), - vmulq_s16(u_low1, g_coeff2), - ); - - let u_low0 = vsubq_s16( - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(u_low_u8))), - uv_corr, - ); - let v_low0 = vsubq_s16( - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_low_u8))), - uv_corr, - ); - let y_low0 = vumulhiq_u16(vexpand8_to_16(vget_low_u8(y_set.0)), y_coeff); - - let r_low0 = vqaddq_s16(y_low0, vmulq_s16(v_low0, cr_coeff)); - let b_low0 = vqaddq_s16(y_low0, vmulq_s16(u_low0, cb_coeff)); - let g_low0 = vqaddq_s16( - vqaddq_s16(y_low0, vmulq_s16(v_low0, g_coeff1)), - vmulq_s16(u_low0, g_coeff2), - ); - - let r_values0 = vcombine_u8( - vqrshrun_n_s16::(r_low0), - vqrshrun_n_s16::(r_low1), - ); - let g_values0 = vcombine_u8( - vqrshrun_n_s16::(g_low0), - vqrshrun_n_s16::(g_low1), - ); - let b_values0 = vcombine_u8( - vqrshrun_n_s16::(b_low0), - vqrshrun_n_s16::(b_low1), - ); - - let r_values1 = vcombine_u8( - vqrshrun_n_s16::(r_high0), - vqrshrun_n_s16::(r_high1), - ); - let g_values1 = vcombine_u8( - vqrshrun_n_s16::(g_high0), - vqrshrun_n_s16::(g_high1), - ); - let b_values1 = vcombine_u8( - vqrshrun_n_s16::(b_high0), - vqrshrun_n_s16::(b_high1), - ); - - let dst_shift = cx * channels; - - neon_store_rgb8::( - rgba_ptr.add(dst_shift), - r_values0, - g_values0, - b_values0, - v_alpha, - ); - - neon_store_rgb8::( - rgba_ptr.add(dst_shift + 16 * channels), - r_values1, - g_values1, - b_values1, - v_alpha, - ); - - cx += 32; - - match chroma_subsampling { - YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => { - uv_x += 16; - } - YuvChromaSubsampling::Yuv444 => { - uv_x += 32; - } - } - } - - while cx + 16 < width { - let y_values = vqsubq_u8(vld1q_u8(y_ptr.add(cx)), y_corr); - - let u_high_u8: uint8x8_t; - let v_high_u8: uint8x8_t; - let u_low_u8: uint8x8_t; - let v_low_u8: uint8x8_t; - - match chroma_subsampling { - YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => { - let u_values = vld1_u8(u_ptr.add(uv_x)); - let v_values = vld1_u8(v_ptr.add(uv_x)); - - u_high_u8 = vzip2_u8(u_values, u_values); - v_high_u8 = vzip2_u8(v_values, v_values); - u_low_u8 = vzip1_u8(u_values, u_values); - v_low_u8 = vzip1_u8(v_values, v_values); - } - YuvChromaSubsampling::Yuv444 => { - let u_values = vld1q_u8(u_ptr.add(uv_x)); - let v_values = vld1q_u8(v_ptr.add(uv_x)); - - u_high_u8 = vget_high_u8(u_values); - v_high_u8 = vget_high_u8(v_values); - u_low_u8 = vget_low_u8(u_values); - v_low_u8 = vget_low_u8(v_values); - } - } - - let u_high = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(u_high_u8)), uv_corr); - let v_high = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(v_high_u8)), uv_corr); - let y_high = vumulhiq_u16(vexpand_high_8_to_16(y_values), y_coeff); - - let r_high = vqaddq_s16(y_high, vmulq_s16(v_high, cr_coeff)); - let b_high = vqaddq_s16(y_high, vmulq_s16(u_high, cb_coeff)); - let g_high = vqaddq_s16( - vqaddq_s16(y_high, vmulq_s16(v_high, g_coeff1)), - vmulq_s16(u_high, g_coeff2), - ); - - let u_low = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(u_low_u8)), uv_corr); - let v_low = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(v_low_u8)), uv_corr); - let y_low = vumulhiq_u16(vexpand8_to_16(vget_low_u8(y_values)), y_coeff); - - let r_low = vqaddq_s16(y_low, vmulq_s16(v_low, cr_coeff)); - let b_low = vqaddq_s16(y_low, vmulq_s16(u_low, cb_coeff)); - let g_low = vqaddq_s16( - vqaddq_s16(y_low, vmulq_s16(v_low, g_coeff1)), - vmulq_s16(u_low, g_coeff2), - ); - - let r_values = vcombine_u8( - vqrshrun_n_s16::(r_low), - vqrshrun_n_s16::(r_high), - ); - let g_values = vcombine_u8( - vqrshrun_n_s16::(g_low), - vqrshrun_n_s16::(g_high), - ); - let b_values = vcombine_u8( - vqrshrun_n_s16::(b_low), - vqrshrun_n_s16::(b_high), - ); - - let dst_shift = cx * channels; - - neon_store_rgb8::( - rgba_ptr.add(dst_shift), - r_values, - g_values, - b_values, - v_alpha, - ); - - cx += 16; - - match chroma_subsampling { - YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => { - uv_x += 8; - } - YuvChromaSubsampling::Yuv444 => { - uv_x += 16; - } - } - } - - while cx + 8 < width { - let y_values = vqsub_u8(vld1_u8(y_ptr.add(cx)), vget_low_u8(y_corr)); - - let u_low_u8: uint8x8_t; - let v_low_u8: uint8x8_t; - - match chroma_subsampling { - YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => { - let u_values = vreinterpret_u8_u32(vld1_dup_u32(u_ptr.add(uv_x) as *const u32)); - let v_values = vreinterpret_u8_u32(vld1_dup_u32(v_ptr.add(uv_x) as *const u32)); - - u_low_u8 = vzip1_u8(u_values, u_values); - v_low_u8 = vzip1_u8(v_values, v_values); - } - YuvChromaSubsampling::Yuv444 => { - let u_values = vld1_u8(u_ptr.add(uv_x)); - let v_values = vld1_u8(v_ptr.add(uv_x)); - - u_low_u8 = u_values; - v_low_u8 = v_values; - } - } - - let u_low = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(u_low_u8)), uv_corr); - let v_low = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(v_low_u8)), uv_corr); - let y_low = vumulhiq_u16(vexpand8_to_16(y_values), y_coeff); - - let r_low = vqaddq_s16(y_low, vmulq_s16(v_low, cr_coeff)); - let b_low = vqaddq_s16(y_low, vmulq_s16(u_low, cb_coeff)); - let g_low = vqaddq_s16( - vqaddq_s16(y_low, vmulq_s16(v_low, g_coeff1)), - vmulq_s16(u_low, g_coeff2), - ); - - let r_values = vqrshrun_n_s16::(r_low); - let g_values = vqrshrun_n_s16::(g_low); - let b_values = vqrshrun_n_s16::(b_low); - - let dst_shift = cx * channels; - - neon_store_half_rgb8::( - rgba_ptr.add(dst_shift), - r_values, - g_values, - b_values, - vget_low_u8(v_alpha), - ); - - cx += 8; - - match chroma_subsampling { - YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => { - uv_x += 4; - } - YuvChromaSubsampling::Yuv444 => { - uv_x += 8; - } - } - } - - ProcessedOffset { cx, ux: uv_x } -} diff --git a/src/sse/mod.rs b/src/sse/mod.rs index cdcd30ce..6f79eaae 100644 --- a/src/sse/mod.rs +++ b/src/sse/mod.rs @@ -42,7 +42,6 @@ mod y_to_rgba; mod y_to_rgba_alpha; mod ycgco_to_rgb; mod ycgco_to_rgb_alpha; -mod yuv2rgba_fast; mod yuv_nv_p16_to_rgb; mod yuv_nv_to_rgba; mod yuv_nv_to_rgba420; @@ -52,7 +51,6 @@ mod yuv_p16_to_rgb8; mod yuv_p16_to_rgb8_alpha; mod yuv_to_rgba; mod yuv_to_rgba420; -mod yuv_to_rgba420_fast; mod yuv_to_rgba_alpha; mod yuv_to_yuy2; mod yuy2_to_rgb; @@ -72,7 +70,6 @@ pub(crate) use y_to_rgba::sse_y_to_rgba_row; pub(crate) use y_to_rgba_alpha::sse_y_to_rgba_alpha_row; pub(crate) use ycgco_to_rgb::sse_ycgco_to_rgb_row; pub(crate) use ycgco_to_rgb_alpha::sse_ycgco_to_rgb_alpha_row; -pub(crate) use yuv2rgba_fast::sse_yuv_to_rgba_fast_row; pub(crate) use yuv_nv_p16_to_rgb::sse_yuv_nv_p16_to_rgba_row; pub(crate) use yuv_nv_to_rgba::sse_yuv_nv_to_rgba; pub(crate) use yuv_nv_to_rgba420::sse_yuv_nv_to_rgba420; @@ -82,7 +79,6 @@ pub(crate) use yuv_p16_to_rgb8::sse_yuv_p16_to_rgba8_row; pub(crate) use yuv_p16_to_rgb8_alpha::sse_yuv_p16_to_rgba8_alpha_row; pub(crate) use yuv_to_rgba::sse_yuv_to_rgba_row; pub(crate) use yuv_to_rgba420::sse_yuv_to_rgba_row420; -pub(crate) use yuv_to_rgba420_fast::sse_yuv_to_rgba_fast_row420; pub(crate) use yuv_to_rgba_alpha::sse_yuv_to_rgba_alpha_row; pub(crate) use yuv_to_yuy2::yuv_to_yuy2_sse; pub(crate) use yuy2_to_rgb::yuy2_to_rgb_sse; diff --git a/src/sse/yuv2rgba_fast.rs b/src/sse/yuv2rgba_fast.rs deleted file mode 100644 index df7eded2..00000000 --- a/src/sse/yuv2rgba_fast.rs +++ /dev/null @@ -1,291 +0,0 @@ -/* - * Copyright (c) Radzivon Bartoshyk, 10/2024. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, - * are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -use crate::internals::{interleaved_epi8, ProcessedOffset}; -use crate::sse::{_mm_store_interleave_half_rgb_for_yuv, _mm_store_interleave_rgb_for_yuv}; -use crate::yuv_support::{ - CbCrInverseTransform, YuvChromaRange, YuvChromaSubsampling, YuvSourceChannels, -}; -#[cfg(target_arch = "x86")] -use std::arch::x86::*; -#[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; - -pub(crate) fn sse_yuv_to_rgba_fast_row< - const DESTINATION_CHANNELS: u8, - const SAMPLING: u8, - const PRECISION: i32, ->( - range: &YuvChromaRange, - transform: &CbCrInverseTransform, - y_plane: &[u8], - u_plane: &[u8], - v_plane: &[u8], - rgba: &mut [u8], - start_cx: usize, - start_ux: usize, - width: usize, -) -> ProcessedOffset { - unsafe { - sse_yuv_to_rgba_fast_row_impl::( - range, transform, y_plane, u_plane, v_plane, rgba, start_cx, start_ux, width, - ) - } -} - -#[target_feature(enable = "sse4.1")] -unsafe fn sse_yuv_to_rgba_fast_row_impl< - const DESTINATION_CHANNELS: u8, - const SAMPLING: u8, - const PRECISION: i32, ->( - range: &YuvChromaRange, - transform: &CbCrInverseTransform, - y_plane: &[u8], - u_plane: &[u8], - v_plane: &[u8], - rgba: &mut [u8], - start_cx: usize, - start_ux: usize, - width: usize, -) -> ProcessedOffset { - let chroma_subsampling: YuvChromaSubsampling = SAMPLING.into(); - let destination_channels: YuvSourceChannels = DESTINATION_CHANNELS.into(); - let channels = destination_channels.get_channels_count(); - - let mut cx = start_cx; - let mut uv_x = start_ux; - - let y_ptr = y_plane.as_ptr(); - let u_ptr = u_plane.as_ptr(); - let v_ptr = v_plane.as_ptr(); - let rgba_ptr = rgba.as_mut_ptr(); - - let y_corr = _mm_set1_epi8(range.bias_y as i8); - let v_luma_coeff = _mm_set1_epi16((transform.y_coef as u16 * 256) as i16); - let v_cr_coeff = _mm_set1_epi16(interleaved_epi8( - transform.cr_coef as i8, - -transform.cr_coef as i8, - )); - let v_cb_coeff = _mm_set1_epi16(interleaved_epi8( - transform.cb_coef as i8, - -transform.cb_coef as i8, - )); - let v_g_coeff_1 = _mm_set1_epi16(interleaved_epi8( - transform.g_coeff_1 as i8, - -transform.g_coeff_1 as i8, - )); - let v_g_coeff_2 = _mm_set1_epi16(interleaved_epi8( - transform.g_coeff_2 as i8, - -transform.g_coeff_2 as i8, - )); - - let u_bias_uv = _mm_set1_epi8(range.bias_uv as i8); - - while cx + 16 < width { - let y_values = _mm_subs_epu8(_mm_loadu_si128(y_ptr.add(cx) as *const __m128i), y_corr); - - let (u_high_u16, v_high_u16, u_low_u16, v_low_u16); - - match chroma_subsampling { - YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => { - let reshuffle = _mm_setr_epi8(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7); - let u_values = _mm_shuffle_epi8(_mm_loadu_si64(u_ptr.add(uv_x)), reshuffle); - let v_values = _mm_shuffle_epi8(_mm_loadu_si64(v_ptr.add(uv_x)), reshuffle); - - u_high_u16 = _mm_unpackhi_epi8(u_values, u_bias_uv); - v_high_u16 = _mm_unpackhi_epi8(v_values, u_bias_uv); - u_low_u16 = _mm_unpacklo_epi8(u_values, u_bias_uv); - v_low_u16 = _mm_unpacklo_epi8(v_values, u_bias_uv); - } - YuvChromaSubsampling::Yuv444 => { - let u_values = _mm_loadu_si128(u_ptr.add(uv_x) as *const __m128i); - let v_values = _mm_loadu_si128(v_ptr.add(uv_x) as *const __m128i); - - u_high_u16 = _mm_unpackhi_epi8(u_values, u_bias_uv); - v_high_u16 = _mm_unpackhi_epi8(v_values, u_bias_uv); - u_low_u16 = _mm_unpacklo_epi8(u_values, u_bias_uv); - v_low_u16 = _mm_unpacklo_epi8(v_values, u_bias_uv); - } - } - - let y_high = _mm_mulhi_epu16(_mm_unpackhi_epi8(y_values, y_values), v_luma_coeff); - - let r_high = _mm_adds_epi16(y_high, _mm_maddubs_epi16(v_high_u16, v_cr_coeff)); - let b_high = _mm_adds_epi16(y_high, _mm_maddubs_epi16(u_high_u16, v_cb_coeff)); - let g_high = _mm_subs_epi16( - y_high, - _mm_adds_epi16( - _mm_maddubs_epi16(v_high_u16, v_g_coeff_1), - _mm_maddubs_epi16(u_high_u16, v_g_coeff_2), - ), - ); - - let y_low = _mm_mulhi_epu16(_mm_unpacklo_epi8(y_values, y_values), v_luma_coeff); - - let r_low = _mm_adds_epi16(y_low, _mm_maddubs_epi16(v_low_u16, v_cr_coeff)); - - let b_low = _mm_adds_epi16(y_low, _mm_maddubs_epi16(u_low_u16, v_cb_coeff)); - let g_low = _mm_subs_epi16( - y_low, - _mm_adds_epi16( - _mm_maddubs_epi16(v_low_u16, v_g_coeff_1), - _mm_maddubs_epi16(u_low_u16, v_g_coeff_2), - ), - ); - - let r_values = _mm_packus_epi16( - _mm_srai_epi16::(r_low), - _mm_srai_epi16::(r_high), - ); - let g_values = _mm_packus_epi16( - _mm_srai_epi16::(g_low), - _mm_srai_epi16::(g_high), - ); - let b_values = _mm_packus_epi16( - _mm_srai_epi16::(b_low), - _mm_srai_epi16::(b_high), - ); - - let dst_shift = cx * channels; - - let v_alpha = _mm_set1_epi8(255u8 as i8); - - _mm_store_interleave_rgb_for_yuv::( - rgba_ptr.add(dst_shift), - r_values, - g_values, - b_values, - v_alpha, - ); - - cx += 16; - - match chroma_subsampling { - YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => { - uv_x += 8; - } - YuvChromaSubsampling::Yuv444 => { - uv_x += 16; - } - } - } - - while cx + 8 < width { - let y_values = _mm_subs_epi8(_mm_loadu_si64(y_ptr.add(cx)), y_corr); - - let (u_low_u16, v_low_u16); - - match chroma_subsampling { - YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => { - let reshuffle = _mm_setr_epi8(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7); - let u_value = (u_ptr.add(uv_x) as *const i32).read_unaligned(); - let v_value = (v_ptr.add(uv_x) as *const i32).read_unaligned(); - let u_values = _mm_shuffle_epi8( - _mm_insert_epi32::<0>(_mm_setzero_si128(), u_value), - reshuffle, - ); - let v_values = _mm_shuffle_epi8( - _mm_insert_epi32::<0>(_mm_setzero_si128(), v_value), - reshuffle, - ); - - u_low_u16 = _mm_unpacklo_epi8(u_values, u_bias_uv); - v_low_u16 = _mm_unpacklo_epi8(v_values, u_bias_uv); - } - YuvChromaSubsampling::Yuv444 => { - let u_values = _mm_loadu_si64(u_ptr.add(uv_x)); - let v_values = _mm_loadu_si64(v_ptr.add(uv_x)); - - u_low_u16 = _mm_unpacklo_epi8(u_values, u_bias_uv); - v_low_u16 = _mm_unpacklo_epi8(v_values, u_bias_uv); - } - } - - let y_low = _mm_mulhi_epu16(_mm_unpacklo_epi8(y_values, y_values), v_luma_coeff); - - let r_low = _mm_adds_epi16(y_low, _mm_maddubs_epi16(v_low_u16, v_cr_coeff)); - let b_low = _mm_adds_epi16(y_low, _mm_maddubs_epi16(u_low_u16, v_cb_coeff)); - let g_low = _mm_subs_epi16( - y_low, - _mm_adds_epi16( - _mm_maddubs_epi16(v_low_u16, v_g_coeff_1), - _mm_maddubs_epi16(u_low_u16, v_g_coeff_2), - ), - ); - - let r_values = _mm_packus_epi16(_mm_srai_epi16::(r_low), r_low); - let g_values = _mm_packus_epi16(_mm_srai_epi16::(g_low), r_low); - let b_values = _mm_packus_epi16(_mm_srai_epi16::(b_low), r_low); - - let dst_shift = cx * channels; - - let v_alpha = _mm_set1_epi8(255u8 as i8); - - _mm_store_interleave_half_rgb_for_yuv::( - rgba_ptr.add(dst_shift), - r_values, - g_values, - b_values, - v_alpha, - ); - - cx += 8; - - match chroma_subsampling { - YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => { - uv_x += 4; - } - YuvChromaSubsampling::Yuv444 => { - uv_x += 8; - } - } - } - - ProcessedOffset { cx, ux: uv_x } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_mulhi_addubs() { - unsafe { - let v_cr_coeff = _mm_set1_epi16(interleaved_epi8(5, -1)); - let base_val = _mm_set1_epi8(1); - let ones = _mm_set1_epi8(1); - let mul_val = _mm_unpacklo_epi8(base_val, ones); - let product = _mm_maddubs_epi16(mul_val, v_cr_coeff); - let mut rs: [i16; 8] = [0; 8]; - _mm_storeu_si128(rs.as_mut_ptr() as *mut __m128i, product); - assert_eq!(rs[0], 4); - } - } -} diff --git a/src/sse/yuv_to_rgba420_fast.rs b/src/sse/yuv_to_rgba420_fast.rs deleted file mode 100644 index a729f8eb..00000000 --- a/src/sse/yuv_to_rgba420_fast.rs +++ /dev/null @@ -1,284 +0,0 @@ -/* - * Copyright (c) Radzivon Bartoshyk, 10/2024. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, - * are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -use crate::internals::{interleaved_epi8, ProcessedOffset}; -use crate::sse::{_mm_store_interleave_half_rgb_for_yuv, _mm_store_interleave_rgb_for_yuv}; -use crate::yuv_support::{CbCrInverseTransform, YuvChromaRange, YuvSourceChannels}; -#[cfg(target_arch = "x86")] -use std::arch::x86::*; -#[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; - -pub(crate) fn sse_yuv_to_rgba_fast_row420( - range: &YuvChromaRange, - transform: &CbCrInverseTransform, - y_plane0: &[u8], - y_plane1: &[u8], - u_plane: &[u8], - v_plane: &[u8], - rgba0: &mut [u8], - rgba1: &mut [u8], - start_cx: usize, - start_ux: usize, - width: usize, -) -> ProcessedOffset { - unsafe { - sse_yuv_to_rgba_fast_row_impl420::( - range, transform, y_plane0, y_plane1, u_plane, v_plane, rgba0, rgba1, start_cx, - start_ux, width, - ) - } -} - -#[target_feature(enable = "sse4.1")] -unsafe fn sse_yuv_to_rgba_fast_row_impl420( - range: &YuvChromaRange, - transform: &CbCrInverseTransform, - y_plane0: &[u8], - y_plane1: &[u8], - u_plane: &[u8], - v_plane: &[u8], - rgba0: &mut [u8], - rgba1: &mut [u8], - start_cx: usize, - start_ux: usize, - width: usize, -) -> ProcessedOffset { - let destination_channels: YuvSourceChannels = DESTINATION_CHANNELS.into(); - let channels = destination_channels.get_channels_count(); - - let mut cx = start_cx; - let mut uv_x = start_ux; - - const PRECISION: i32 = 6; - - let u_ptr = u_plane.as_ptr(); - let v_ptr = v_plane.as_ptr(); - - let y_corr = _mm_set1_epi8(range.bias_y as i8); - let v_luma_coeff = _mm_set1_epi16((transform.y_coef as u16 * 256) as i16); - let v_cr_coeff = _mm_set1_epi16(interleaved_epi8( - transform.cr_coef as i8, - -transform.cr_coef as i8, - )); - let v_cb_coeff = _mm_set1_epi16(interleaved_epi8( - transform.cb_coef as i8, - -transform.cb_coef as i8, - )); - let v_g_coeff_1 = _mm_set1_epi16(interleaved_epi8( - transform.g_coeff_1 as i8, - -transform.g_coeff_1 as i8, - )); - let v_g_coeff_2 = _mm_set1_epi16(interleaved_epi8( - transform.g_coeff_2 as i8, - -transform.g_coeff_2 as i8, - )); - - let u_bias_uv = _mm_set1_epi8(range.bias_uv as i8); - - let reshuffle = _mm_setr_epi8(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7); - - while cx + 16 < width { - let y_values0 = _mm_subs_epu8( - _mm_loadu_si128(y_plane0.get_unchecked(cx..).as_ptr() as *const __m128i), - y_corr, - ); - let y_values1 = _mm_subs_epu8( - _mm_loadu_si128(y_plane1.get_unchecked(cx..).as_ptr() as *const __m128i), - y_corr, - ); - - let u_values = _mm_shuffle_epi8(_mm_loadu_si64(u_ptr.add(uv_x)), reshuffle); - let v_values = _mm_shuffle_epi8(_mm_loadu_si64(v_ptr.add(uv_x)), reshuffle); - - let u_high_u16 = _mm_unpackhi_epi8(u_values, u_bias_uv); - let v_high_u16 = _mm_unpackhi_epi8(v_values, u_bias_uv); - let u_low_u16 = _mm_unpacklo_epi8(u_values, u_bias_uv); - let v_low_u16 = _mm_unpacklo_epi8(v_values, u_bias_uv); - - let y_high0 = _mm_mulhi_epu16(_mm_unpackhi_epi8(y_values0, y_values0), v_luma_coeff); - let y_high1 = _mm_mulhi_epu16(_mm_unpackhi_epi8(y_values1, y_values1), v_luma_coeff); - - let g_coeff_hi = _mm_adds_epi16( - _mm_maddubs_epi16(v_high_u16, v_g_coeff_1), - _mm_maddubs_epi16(u_high_u16, v_g_coeff_2), - ); - - let r_high0 = _mm_adds_epi16(y_high0, _mm_maddubs_epi16(v_high_u16, v_cr_coeff)); - let b_high0 = _mm_adds_epi16(y_high0, _mm_maddubs_epi16(u_high_u16, v_cb_coeff)); - let g_high0 = _mm_subs_epi16(y_high0, g_coeff_hi); - - let r_high1 = _mm_adds_epi16(y_high1, _mm_maddubs_epi16(v_high_u16, v_cr_coeff)); - let b_high1 = _mm_adds_epi16(y_high1, _mm_maddubs_epi16(u_high_u16, v_cb_coeff)); - let g_high1 = _mm_subs_epi16(y_high1, g_coeff_hi); - - let y_low0 = _mm_mulhi_epu16(_mm_unpacklo_epi8(y_values0, y_values0), v_luma_coeff); - let y_low1 = _mm_mulhi_epu16(_mm_unpacklo_epi8(y_values1, y_values1), v_luma_coeff); - - let g_coeff_lo = _mm_adds_epi16( - _mm_maddubs_epi16(v_low_u16, v_g_coeff_1), - _mm_maddubs_epi16(u_low_u16, v_g_coeff_2), - ); - - let r_low0 = _mm_adds_epi16(y_low0, _mm_maddubs_epi16(v_low_u16, v_cr_coeff)); - let b_low0 = _mm_adds_epi16(y_low0, _mm_maddubs_epi16(u_low_u16, v_cb_coeff)); - let g_low0 = _mm_subs_epi16(y_low0, g_coeff_lo); - - let r_low1 = _mm_adds_epi16(y_low1, _mm_maddubs_epi16(v_low_u16, v_cr_coeff)); - let b_low1 = _mm_adds_epi16(y_low1, _mm_maddubs_epi16(u_low_u16, v_cb_coeff)); - let g_low1 = _mm_subs_epi16(y_low1, g_coeff_lo); - - let r_values0 = _mm_packus_epi16( - _mm_srai_epi16::(r_low0), - _mm_srai_epi16::(r_high0), - ); - let g_values0 = _mm_packus_epi16( - _mm_srai_epi16::(g_low0), - _mm_srai_epi16::(g_high0), - ); - let b_values0 = _mm_packus_epi16( - _mm_srai_epi16::(b_low0), - _mm_srai_epi16::(b_high0), - ); - - let r_values1 = _mm_packus_epi16( - _mm_srai_epi16::(r_low1), - _mm_srai_epi16::(r_high1), - ); - let g_values1 = _mm_packus_epi16( - _mm_srai_epi16::(g_low1), - _mm_srai_epi16::(g_high1), - ); - let b_values1 = _mm_packus_epi16( - _mm_srai_epi16::(b_low1), - _mm_srai_epi16::(b_high1), - ); - - let dst_shift = cx * channels; - - let v_alpha = _mm_set1_epi8(255u8 as i8); - - _mm_store_interleave_rgb_for_yuv::( - rgba0.get_unchecked_mut(dst_shift..).as_mut_ptr(), - r_values0, - g_values0, - b_values0, - v_alpha, - ); - _mm_store_interleave_rgb_for_yuv::( - rgba1.get_unchecked_mut(dst_shift..).as_mut_ptr(), - r_values1, - g_values1, - b_values1, - v_alpha, - ); - - cx += 16; - uv_x += 8; - } - - while cx + 8 < width { - let y_values0 = _mm_subs_epi8( - _mm_loadu_si64(y_plane0.get_unchecked(cx..).as_ptr()), - y_corr, - ); - let y_values1 = _mm_subs_epi8( - _mm_loadu_si64(y_plane1.get_unchecked(cx..).as_ptr()), - y_corr, - ); - - let (u_low_u16, v_low_u16); - - let reshuffle = _mm_setr_epi8(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7); - let u_value = (u_ptr.add(uv_x) as *const i32).read_unaligned(); - let v_value = (v_ptr.add(uv_x) as *const i32).read_unaligned(); - let u_values = _mm_shuffle_epi8( - _mm_insert_epi32::<0>(_mm_setzero_si128(), u_value), - reshuffle, - ); - let v_values = _mm_shuffle_epi8( - _mm_insert_epi32::<0>(_mm_setzero_si128(), v_value), - reshuffle, - ); - - u_low_u16 = _mm_unpacklo_epi8(u_values, u_bias_uv); - v_low_u16 = _mm_unpacklo_epi8(v_values, u_bias_uv); - - let y_low0 = _mm_mulhi_epu16(_mm_unpacklo_epi8(y_values0, y_values0), v_luma_coeff); - let y_low1 = _mm_mulhi_epu16(_mm_unpacklo_epi8(y_values1, y_values1), v_luma_coeff); - - let g_coeff = _mm_adds_epi16( - _mm_maddubs_epi16(v_low_u16, v_g_coeff_1), - _mm_maddubs_epi16(u_low_u16, v_g_coeff_2), - ); - - let r_low0 = _mm_adds_epi16(y_low0, _mm_maddubs_epi16(v_low_u16, v_cr_coeff)); - let b_low0 = _mm_adds_epi16(y_low0, _mm_maddubs_epi16(u_low_u16, v_cb_coeff)); - let g_low0 = _mm_subs_epi16(y_low0, g_coeff); - - let r_low1 = _mm_adds_epi16(y_low1, _mm_maddubs_epi16(v_low_u16, v_cr_coeff)); - let b_low1 = _mm_adds_epi16(y_low1, _mm_maddubs_epi16(u_low_u16, v_cb_coeff)); - let g_low1 = _mm_subs_epi16(y_low1, g_coeff); - - let zeros = _mm_setzero_si128(); - - let r_values0 = _mm_packus_epi16(_mm_srai_epi16::(r_low0), zeros); - let g_values0 = _mm_packus_epi16(_mm_srai_epi16::(g_low0), zeros); - let b_values0 = _mm_packus_epi16(_mm_srai_epi16::(b_low0), zeros); - - let r_values1 = _mm_packus_epi16(_mm_srai_epi16::(r_low1), zeros); - let g_values1 = _mm_packus_epi16(_mm_srai_epi16::(g_low1), zeros); - let b_values1 = _mm_packus_epi16(_mm_srai_epi16::(b_low1), zeros); - - let dst_shift = cx * channels; - - let v_alpha = _mm_set1_epi8(255u8 as i8); - - _mm_store_interleave_half_rgb_for_yuv::( - rgba0.get_unchecked_mut(dst_shift..).as_mut_ptr(), - r_values0, - g_values0, - b_values0, - v_alpha, - ); - - _mm_store_interleave_half_rgb_for_yuv::( - rgba1.get_unchecked_mut(dst_shift..).as_mut_ptr(), - r_values1, - g_values1, - b_values1, - v_alpha, - ); - - cx += 8; - uv_x += 4; - } - - ProcessedOffset { cx, ux: uv_x } -} diff --git a/src/yuv_to_rgba.rs b/src/yuv_to_rgba.rs index 1e0f2375..582de040 100644 --- a/src/yuv_to_rgba.rs +++ b/src/yuv_to_rgba.rs @@ -27,40 +27,32 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -use crate::avx2::{ - avx2_yuv_to_rgba_fast_row, avx2_yuv_to_rgba_fast_row420, avx2_yuv_to_rgba_row, - avx2_yuv_to_rgba_row420, -}; +use crate::avx2::{avx2_yuv_to_rgba_row, avx2_yuv_to_rgba_row420}; #[cfg(all( any(target_arch = "x86", target_arch = "x86_64"), feature = "nightly_avx512" ))] -use crate::avx512bw::{ - avx512_yuv_to_fast_rgba, avx512_yuv_to_rgba, avx512_yuv_to_rgba420, avx512_yuv_to_rgba_fast_420, -}; +use crate::avx512bw::{avx512_yuv_to_rgba, avx512_yuv_to_rgba420}; #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] use crate::neon::{ - neon_yuv_to_rgba_fast_row, neon_yuv_to_rgba_row, neon_yuv_to_rgba_row420, - neon_yuv_to_rgba_row_fast420, neon_yuv_to_rgba_row_rdm, neon_yuv_to_rgba_row_rdm420, + neon_yuv_to_rgba_row, neon_yuv_to_rgba_row420, neon_yuv_to_rgba_row_rdm, + neon_yuv_to_rgba_row_rdm420, }; use crate::numerics::qrshr; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -use crate::sse::{ - sse_yuv_to_rgba_fast_row, sse_yuv_to_rgba_fast_row420, sse_yuv_to_rgba_row, - sse_yuv_to_rgba_row420, -}; +use crate::sse::{sse_yuv_to_rgba_row, sse_yuv_to_rgba_row420}; #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] use crate::wasm32::{wasm_yuv_to_rgba_row, wasm_yuv_to_rgba_row420}; use crate::yuv_error::check_rgba_destination; #[allow(unused_imports)] use crate::yuv_support::*; -use crate::{ConversionQuality, YuvError, YuvPlanarImage}; +use crate::{YuvError, YuvPlanarImage}; #[cfg(feature = "rayon")] use rayon::iter::{IndexedParallelIterator, ParallelIterator}; #[cfg(feature = "rayon")] use rayon::prelude::{ParallelSlice, ParallelSliceMut}; -fn yuv_to_rgbx_impl( +fn yuv_to_rgbx( image: &YuvPlanarImage, rgba: &mut [u8], rgba_stride: u32, @@ -77,6 +69,8 @@ fn yuv_to_rgbx_impl - } else { - neon_yuv_to_rgba_row:: - } + let neon_wide_row_handler = if is_rdm_available { + neon_yuv_to_rgba_row_rdm:: } else { - neon_yuv_to_rgba_fast_row:: + neon_yuv_to_rgba_row:: }; #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] - let neon_double_row_handler = if PRECISION == 13 { - if is_rdm_available { - neon_yuv_to_rgba_row_rdm420:: - } else { - neon_yuv_to_rgba_row420:: - } + let neon_double_row_handler = if is_rdm_available { + neon_yuv_to_rgba_row_rdm420:: } else { - neon_yuv_to_rgba_row_fast420:: + neon_yuv_to_rgba_row420:: }; #[cfg(all( any(target_arch = "x86", target_arch = "x86_64"), feature = "nightly_avx512" ))] - let avx512_wide_row = if PRECISION == 13 { - if use_vbmi { - avx512_yuv_to_rgba:: - } else { - avx512_yuv_to_rgba:: - } + let avx512_wide_row = if use_vbmi { + avx512_yuv_to_rgba:: } else { - if use_vbmi { - avx512_yuv_to_fast_rgba:: - } else { - avx512_yuv_to_fast_rgba:: - } + avx512_yuv_to_rgba:: }; #[cfg(all( any(target_arch = "x86", target_arch = "x86_64"), feature = "nightly_avx512" ))] - let avx512_double_wide_row = if PRECISION == 13 { - if use_vbmi { - avx512_yuv_to_rgba420:: - } else { - avx512_yuv_to_rgba420:: - } + let avx512_double_wide_row = if use_vbmi { + avx512_yuv_to_rgba420:: } else { - if use_vbmi { - avx512_yuv_to_rgba_fast_420:: - } else { - avx512_yuv_to_rgba_fast_420:: - } + avx512_yuv_to_rgba420:: }; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - let sse_row_handler = if PRECISION == 13 { - sse_yuv_to_rgba_row:: - } else { - sse_yuv_to_rgba_fast_row:: - }; + let sse_row_handler = sse_yuv_to_rgba_row::; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - let avx_row_handler = if PRECISION == 13 { - avx2_yuv_to_rgba_row:: - } else { - avx2_yuv_to_rgba_fast_row:: - }; + let avx_row_handler = avx2_yuv_to_rgba_row::; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - let sse_double_row_handler = if PRECISION == 13 { - sse_yuv_to_rgba_row420:: - } else { - sse_yuv_to_rgba_fast_row420:: - }; + let sse_double_row_handler = sse_yuv_to_rgba_row420::; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - let avx_double_row_handler = if PRECISION == 13 { - avx2_yuv_to_rgba_row420:: - } else { - avx2_yuv_to_rgba_fast_row420:: - }; + let avx_double_row_handler = avx2_yuv_to_rgba_row420::; let process_wide_row = |_y_plane: &[u8], _u_plane: &[u8], _v_plane: &[u8], _rgba: &mut [u8]| { let mut _cx = 0usize; @@ -362,23 +316,21 @@ fn yuv_to_rgbx_impl( - &chroma_range, - &inverse_transform, - _y_plane0, - _y_plane1, - _u_plane, - _v_plane, - _rgba0, - _rgba1, - _cx, - _uv_x, - image.width as usize, - ); - _cx = processed.cx; - _uv_x = processed.ux; - } + let processed = wasm_yuv_to_rgba_row420::( + &chroma_range, + &inverse_transform, + _y_plane0, + _y_plane1, + _u_plane, + _v_plane, + _rgba0, + _rgba1, + _cx, + _uv_x, + image.width as usize, + ); + _cx = processed.cx; + _uv_x = processed.ux; } } @@ -708,50 +660,6 @@ fn yuv_to_rgbx_impl( - image: &YuvPlanarImage, - rgba: &mut [u8], - rgba_stride: u32, - range: YuvRange, - matrix: YuvStandardMatrix, - quality: ConversionQuality, -) -> Result<(), YuvError> { - match quality { - ConversionQuality::Fastest => { - #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] - let is_rdm_available = std::arch::is_aarch64_feature_detected!("rdm"); - - #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] - { - if is_rdm_available { - return yuv_to_rgbx_impl::( - image, - rgba, - rgba_stride, - range, - matrix, - ); - } - } - - yuv_to_rgbx_impl::( - image, - rgba, - rgba_stride, - range, - matrix, - ) - } - ConversionQuality::Good => yuv_to_rgbx_impl::( - image, - rgba, - rgba_stride, - range, - matrix, - ), - } -} - /// Convert YUV 420 planar format to RGB format. /// /// This function takes YUV 420 planar format data with 8-bit precision, @@ -776,7 +684,6 @@ pub fn yuv420_to_rgb( rgb_stride: u32, range: YuvRange, matrix: YuvStandardMatrix, - conversion_quality: ConversionQuality, ) -> Result<(), YuvError> { yuv_to_rgbx::<{ YuvSourceChannels::Rgb as u8 }, { YuvChromaSubsampling::Yuv420 as u8 }>( planar_image, @@ -784,7 +691,6 @@ pub fn yuv420_to_rgb( rgb_stride, range, matrix, - conversion_quality, ) } @@ -812,7 +718,6 @@ pub fn yuv420_to_bgr( bgr_stride: u32, range: YuvRange, matrix: YuvStandardMatrix, - conversion_quality: ConversionQuality, ) -> Result<(), YuvError> { yuv_to_rgbx::<{ YuvSourceChannels::Bgr as u8 }, { YuvChromaSubsampling::Yuv420 as u8 }>( planar_image, @@ -820,7 +725,6 @@ pub fn yuv420_to_bgr( bgr_stride, range, matrix, - conversion_quality, ) } @@ -848,7 +752,6 @@ pub fn yuv420_to_rgba( rgba_stride: u32, range: YuvRange, matrix: YuvStandardMatrix, - conversion_quality: ConversionQuality, ) -> Result<(), YuvError> { yuv_to_rgbx::<{ YuvSourceChannels::Rgba as u8 }, { YuvChromaSubsampling::Yuv420 as u8 }>( planar_image, @@ -856,7 +759,6 @@ pub fn yuv420_to_rgba( rgba_stride, range, matrix, - conversion_quality, ) } @@ -884,7 +786,6 @@ pub fn yuv420_to_bgra( bgra_stride: u32, range: YuvRange, matrix: YuvStandardMatrix, - conversion_quality: ConversionQuality, ) -> Result<(), YuvError> { yuv_to_rgbx::<{ YuvSourceChannels::Bgra as u8 }, { YuvChromaSubsampling::Yuv420 as u8 }>( planar_image, @@ -892,7 +793,6 @@ pub fn yuv420_to_bgra( bgra_stride, range, matrix, - conversion_quality, ) } @@ -920,7 +820,6 @@ pub fn yuv422_to_rgb( rgb_stride: u32, range: YuvRange, matrix: YuvStandardMatrix, - conversion_quality: ConversionQuality, ) -> Result<(), YuvError> { yuv_to_rgbx::<{ YuvSourceChannels::Rgb as u8 }, { YuvChromaSubsampling::Yuv422 as u8 }>( planar_image, @@ -928,7 +827,6 @@ pub fn yuv422_to_rgb( rgb_stride, range, matrix, - conversion_quality, ) } @@ -956,7 +854,6 @@ pub fn yuv422_to_bgr( bgr_stride: u32, range: YuvRange, matrix: YuvStandardMatrix, - conversion_quality: ConversionQuality, ) -> Result<(), YuvError> { yuv_to_rgbx::<{ YuvSourceChannels::Bgr as u8 }, { YuvChromaSubsampling::Yuv422 as u8 }>( planar_image, @@ -964,7 +861,6 @@ pub fn yuv422_to_bgr( bgr_stride, range, matrix, - conversion_quality, ) } @@ -992,7 +888,6 @@ pub fn yuv422_to_rgba( rgba_stride: u32, range: YuvRange, matrix: YuvStandardMatrix, - conversion_quality: ConversionQuality, ) -> Result<(), YuvError> { yuv_to_rgbx::<{ YuvSourceChannels::Rgba as u8 }, { YuvChromaSubsampling::Yuv422 as u8 }>( planar_image, @@ -1000,7 +895,6 @@ pub fn yuv422_to_rgba( rgba_stride, range, matrix, - conversion_quality, ) } @@ -1028,7 +922,6 @@ pub fn yuv422_to_bgra( bgra_stride: u32, range: YuvRange, matrix: YuvStandardMatrix, - conversion_quality: ConversionQuality, ) -> Result<(), YuvError> { yuv_to_rgbx::<{ YuvSourceChannels::Bgra as u8 }, { YuvChromaSubsampling::Yuv422 as u8 }>( planar_image, @@ -1036,7 +929,6 @@ pub fn yuv422_to_bgra( bgra_stride, range, matrix, - conversion_quality, ) } @@ -1064,7 +956,6 @@ pub fn yuv444_to_rgba( rgba_stride: u32, range: YuvRange, matrix: YuvStandardMatrix, - conversion_quality: ConversionQuality, ) -> Result<(), YuvError> { yuv_to_rgbx::<{ YuvSourceChannels::Rgba as u8 }, { YuvChromaSubsampling::Yuv444 as u8 }>( planar_image, @@ -1072,7 +963,6 @@ pub fn yuv444_to_rgba( rgba_stride, range, matrix, - conversion_quality, ) } @@ -1101,7 +991,6 @@ pub fn yuv444_to_bgra( bgra_stride: u32, range: YuvRange, matrix: YuvStandardMatrix, - conversion_quality: ConversionQuality, ) -> Result<(), YuvError> { yuv_to_rgbx::<{ YuvSourceChannels::Bgra as u8 }, { YuvChromaSubsampling::Yuv444 as u8 }>( planar_image, @@ -1109,7 +998,6 @@ pub fn yuv444_to_bgra( bgra_stride, range, matrix, - conversion_quality, ) } @@ -1137,7 +1025,6 @@ pub fn yuv444_to_rgb( rgb_stride: u32, range: YuvRange, matrix: YuvStandardMatrix, - conversion_quality: ConversionQuality, ) -> Result<(), YuvError> { yuv_to_rgbx::<{ YuvSourceChannels::Rgb as u8 }, { YuvChromaSubsampling::Yuv444 as u8 }>( planar_image, @@ -1145,7 +1032,6 @@ pub fn yuv444_to_rgb( rgb_stride, range, matrix, - conversion_quality, ) } @@ -1173,7 +1059,6 @@ pub fn yuv444_to_bgr( bgr_stride: u32, range: YuvRange, matrix: YuvStandardMatrix, - conversion_quality: ConversionQuality, ) -> Result<(), YuvError> { yuv_to_rgbx::<{ YuvSourceChannels::Bgr as u8 }, { YuvChromaSubsampling::Yuv444 as u8 }>( planar_image, @@ -1181,7 +1066,6 @@ pub fn yuv444_to_bgr( bgr_stride, range, matrix, - conversion_quality, ) } @@ -1193,727 +1077,709 @@ mod tests { #[test] fn test_yuv444_round_trip_full_range() { - for quality in [ConversionQuality::Fastest, ConversionQuality::Good] { - let image_width = 256usize; - let image_height = 256usize; - - let random_point_x = rand::thread_rng().gen_range(0..image_width); - let random_point_y = rand::thread_rng().gen_range(0..image_height); - - let pixel_points = [ - [0, 0], - [image_width - 1, image_height - 1], - [image_width - 1, 0], - [0, image_height - 1], - [(image_width - 1) / 2, (image_height - 1) / 2], - [image_width / 5, image_height / 5], - [0, image_height / 5], - [image_width / 5, 0], - [image_width / 5 * 3, image_height / 5], - [image_width / 5 * 3, image_height / 5 * 3], - [image_width / 5, image_height / 5 * 3], - [random_point_x, random_point_y], - ]; - let mut image_rgb = vec![0u8; image_width * image_height * 3]; - - let or = rand::thread_rng().gen_range(0..256) as u8; - let og = rand::thread_rng().gen_range(0..256) as u8; - let ob = rand::thread_rng().gen_range(0..256) as u8; - - for point in &pixel_points { - image_rgb[point[0] * 3 + point[1] * image_width * 3] = or; - image_rgb[point[0] * 3 + point[1] * image_width * 3 + 1] = og; - image_rgb[point[0] * 3 + point[1] * image_width * 3 + 2] = ob; - } + let image_width = 256usize; + let image_height = 256usize; + + let random_point_x = rand::thread_rng().gen_range(0..image_width); + let random_point_y = rand::thread_rng().gen_range(0..image_height); + + let pixel_points = [ + [0, 0], + [image_width - 1, image_height - 1], + [image_width - 1, 0], + [0, image_height - 1], + [(image_width - 1) / 2, (image_height - 1) / 2], + [image_width / 5, image_height / 5], + [0, image_height / 5], + [image_width / 5, 0], + [image_width / 5 * 3, image_height / 5], + [image_width / 5 * 3, image_height / 5 * 3], + [image_width / 5, image_height / 5 * 3], + [random_point_x, random_point_y], + ]; + let mut image_rgb = vec![0u8; image_width * image_height * 3]; + + let or = rand::thread_rng().gen_range(0..256) as u8; + let og = rand::thread_rng().gen_range(0..256) as u8; + let ob = rand::thread_rng().gen_range(0..256) as u8; + + for point in &pixel_points { + image_rgb[point[0] * 3 + point[1] * image_width * 3] = or; + image_rgb[point[0] * 3 + point[1] * image_width * 3 + 1] = og; + image_rgb[point[0] * 3 + point[1] * image_width * 3 + 2] = ob; + } - let mut planar_image = YuvPlanarImageMut::::alloc( - image_width as u32, - image_height as u32, - YuvChromaSubsampling::Yuv444, + let mut planar_image = YuvPlanarImageMut::::alloc( + image_width as u32, + image_height as u32, + YuvChromaSubsampling::Yuv444, + ); + + rgb_to_yuv444( + &mut planar_image, + &image_rgb, + image_width as u32 * 3, + YuvRange::Full, + YuvStandardMatrix::Bt709, + ) + .unwrap(); + + image_rgb.fill(0); + + let fixed_planar = planar_image.to_fixed(); + + yuv444_to_rgb( + &fixed_planar, + &mut image_rgb, + image_width as u32 * 3, + YuvRange::Full, + YuvStandardMatrix::Bt709, + ) + .unwrap(); + + for point in &pixel_points { + let x = point[0]; + let y = point[1]; + let r = image_rgb[x * 3 + y * image_width * 3]; + let g = image_rgb[x * 3 + y * image_width * 3 + 1]; + let b = image_rgb[x * 3 + y * image_width * 3 + 2]; + + let diff_r = (r as i32 - or as i32).abs(); + let diff_g = (g as i32 - og as i32).abs(); + let diff_b = (b as i32 - ob as i32).abs(); + + assert!( + diff_r <= 3, + "Original RGB {:?}, Round-tripped RGB {:?}", + [or, og, ob], + [r, g, b] + ); + assert!( + diff_g <= 3, + "Original RGB {:?}, Round-tripped RGB {:?}", + [or, og, ob], + [r, g, b] + ); + assert!( + diff_b <= 3, + "Original RGB {:?}, Round-tripped RGB {:?}", + [or, og, ob], + [r, g, b] ); - - rgb_to_yuv444( - &mut planar_image, - &image_rgb, - image_width as u32 * 3, - YuvRange::Full, - YuvStandardMatrix::Bt709, - ) - .unwrap(); - - image_rgb.fill(0); - - let fixed_planar = planar_image.to_fixed(); - - yuv444_to_rgb( - &fixed_planar, - &mut image_rgb, - image_width as u32 * 3, - YuvRange::Full, - YuvStandardMatrix::Bt709, - quality, - ) - .unwrap(); - - for point in &pixel_points { - let x = point[0]; - let y = point[1]; - let r = image_rgb[x * 3 + y * image_width * 3]; - let g = image_rgb[x * 3 + y * image_width * 3 + 1]; - let b = image_rgb[x * 3 + y * image_width * 3 + 2]; - - let diff_r = (r as i32 - or as i32).abs(); - let diff_g = (g as i32 - og as i32).abs(); - let diff_b = (b as i32 - ob as i32).abs(); - - assert!( - diff_r <= 3, - "Original RGB {:?}, Round-tripped RGB {:?}", - [or, og, ob], - [r, g, b] - ); - assert!( - diff_g <= 3, - "Original RGB {:?}, Round-tripped RGB {:?}", - [or, og, ob], - [r, g, b] - ); - assert!( - diff_b <= 3, - "Original RGB {:?}, Round-tripped RGB {:?}", - [or, og, ob], - [r, g, b] - ); - } } } #[test] fn test_yuv444_round_trip_limited_range() { - for quality in [ConversionQuality::Fastest, ConversionQuality::Good] { - let image_width = 256usize; - let image_height = 256usize; - - let random_point_x = rand::thread_rng().gen_range(0..image_width); - let random_point_y = rand::thread_rng().gen_range(0..image_height); - - let pixel_points = [ - [0, 0], - [image_width - 1, image_height - 1], - [image_width - 1, 0], - [0, image_height - 1], - [(image_width - 1) / 2, (image_height - 1) / 2], - [image_width / 5, image_height / 5], - [0, image_height / 5], - [image_width / 5, 0], - [image_width / 5 * 3, image_height / 5], - [image_width / 5 * 3, image_height / 5 * 3], - [image_width / 5, image_height / 5 * 3], - [random_point_x, random_point_y], - ]; - let mut image_rgb = vec![0u8; image_width * image_height * 3]; - - let or = rand::thread_rng().gen_range(0..256) as u8; - let og = rand::thread_rng().gen_range(0..256) as u8; - let ob = rand::thread_rng().gen_range(0..256) as u8; - - for point in &pixel_points { - image_rgb[point[0] * 3 + point[1] * image_width * 3] = or; - image_rgb[point[0] * 3 + point[1] * image_width * 3 + 1] = og; - image_rgb[point[0] * 3 + point[1] * image_width * 3 + 2] = ob; - } + let image_width = 256usize; + let image_height = 256usize; + + let random_point_x = rand::thread_rng().gen_range(0..image_width); + let random_point_y = rand::thread_rng().gen_range(0..image_height); + + let pixel_points = [ + [0, 0], + [image_width - 1, image_height - 1], + [image_width - 1, 0], + [0, image_height - 1], + [(image_width - 1) / 2, (image_height - 1) / 2], + [image_width / 5, image_height / 5], + [0, image_height / 5], + [image_width / 5, 0], + [image_width / 5 * 3, image_height / 5], + [image_width / 5 * 3, image_height / 5 * 3], + [image_width / 5, image_height / 5 * 3], + [random_point_x, random_point_y], + ]; + let mut image_rgb = vec![0u8; image_width * image_height * 3]; + + let or = rand::thread_rng().gen_range(0..256) as u8; + let og = rand::thread_rng().gen_range(0..256) as u8; + let ob = rand::thread_rng().gen_range(0..256) as u8; + + for point in &pixel_points { + image_rgb[point[0] * 3 + point[1] * image_width * 3] = or; + image_rgb[point[0] * 3 + point[1] * image_width * 3 + 1] = og; + image_rgb[point[0] * 3 + point[1] * image_width * 3 + 2] = ob; + } - let mut planar_image = YuvPlanarImageMut::::alloc( - image_width as u32, - image_height as u32, - YuvChromaSubsampling::Yuv444, + let mut planar_image = YuvPlanarImageMut::::alloc( + image_width as u32, + image_height as u32, + YuvChromaSubsampling::Yuv444, + ); + + rgb_to_yuv444( + &mut planar_image, + &image_rgb, + image_width as u32 * 3, + YuvRange::Limited, + YuvStandardMatrix::Bt709, + ) + .unwrap(); + + image_rgb.fill(0); + + let fixed_planar = planar_image.to_fixed(); + + yuv444_to_rgb( + &fixed_planar, + &mut image_rgb, + image_width as u32 * 3, + YuvRange::Limited, + YuvStandardMatrix::Bt709, + ) + .unwrap(); + + for point in &pixel_points { + let x = point[0]; + let y = point[1]; + let r = image_rgb[x * 3 + y * image_width * 3]; + let g = image_rgb[x * 3 + y * image_width * 3 + 1]; + let b = image_rgb[x * 3 + y * image_width * 3 + 2]; + + let diff_r = (r as i32 - or as i32).abs(); + let diff_g = (g as i32 - og as i32).abs(); + let diff_b = (b as i32 - ob as i32).abs(); + + assert!( + diff_r <= 20, + "Original RGB {:?}, Round-tripped RGB {:?}", + [or, og, ob], + [r, g, b] + ); + assert!( + diff_g <= 20, + "Original RGB {:?}, Round-tripped RGB {:?}", + [or, og, ob], + [r, g, b] + ); + assert!( + diff_b <= 20, + "Original RGB {:?}, Round-tripped RGB {:?}", + [or, og, ob], + [r, g, b] ); - - rgb_to_yuv444( - &mut planar_image, - &image_rgb, - image_width as u32 * 3, - YuvRange::Limited, - YuvStandardMatrix::Bt709, - ) - .unwrap(); - - image_rgb.fill(0); - - let fixed_planar = planar_image.to_fixed(); - - yuv444_to_rgb( - &fixed_planar, - &mut image_rgb, - image_width as u32 * 3, - YuvRange::Limited, - YuvStandardMatrix::Bt709, - quality, - ) - .unwrap(); - - for point in &pixel_points { - let x = point[0]; - let y = point[1]; - let r = image_rgb[x * 3 + y * image_width * 3]; - let g = image_rgb[x * 3 + y * image_width * 3 + 1]; - let b = image_rgb[x * 3 + y * image_width * 3 + 2]; - - let diff_r = (r as i32 - or as i32).abs(); - let diff_g = (g as i32 - og as i32).abs(); - let diff_b = (b as i32 - ob as i32).abs(); - - assert!( - diff_r <= 20, - "Original RGB {:?}, Round-tripped RGB {:?}", - [or, og, ob], - [r, g, b] - ); - assert!( - diff_g <= 20, - "Original RGB {:?}, Round-tripped RGB {:?}", - [or, og, ob], - [r, g, b] - ); - assert!( - diff_b <= 20, - "Original RGB {:?}, Round-tripped RGB {:?}", - [or, og, ob], - [r, g, b] - ); - } } } #[test] fn test_yuv422_round_trip_full_range() { - for quality in [ConversionQuality::Fastest, ConversionQuality::Good] { - let image_width = 256usize; - let image_height = 256usize; - - let random_point_x = rand::thread_rng().gen_range(0..image_width); - let random_point_y = rand::thread_rng().gen_range(0..image_height); - - const CHANNELS: usize = 3; - - let pixel_points = [ - [0, 0], - [image_width - 1, image_height - 1], - [image_width - 1, 0], - [0, image_height - 1], - [(image_width - 1) / 2, (image_height - 1) / 2], - [image_width / 5, image_height / 5], - [0, image_height / 5], - [image_width / 5, 0], - [image_width / 5 * 3, image_height / 5], - [image_width / 5 * 3, image_height / 5 * 3], - [image_width / 5, image_height / 5 * 3], - [random_point_x, random_point_y], - ]; - - let mut source_rgb = vec![0u8; image_width * image_height * CHANNELS]; - - let or = rand::thread_rng().gen_range(0..256) as u8; - let og = rand::thread_rng().gen_range(0..256) as u8; - let ob = rand::thread_rng().gen_range(0..256) as u8; - - for point in &pixel_points { - source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS] = or; - source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS + 1] = og; - source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS + 2] = ob; - - let nx = (point[0] + 1).min(image_width - 1); - let ny = point[1].min(image_height - 1); - - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; - - let nx = point[0].saturating_sub(1).min(image_width - 1); - let ny = point[1].min(image_height - 1); - - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; - } + let image_width = 256usize; + let image_height = 256usize; + + let random_point_x = rand::thread_rng().gen_range(0..image_width); + let random_point_y = rand::thread_rng().gen_range(0..image_height); + + const CHANNELS: usize = 3; + + let pixel_points = [ + [0, 0], + [image_width - 1, image_height - 1], + [image_width - 1, 0], + [0, image_height - 1], + [(image_width - 1) / 2, (image_height - 1) / 2], + [image_width / 5, image_height / 5], + [0, image_height / 5], + [image_width / 5, 0], + [image_width / 5 * 3, image_height / 5], + [image_width / 5 * 3, image_height / 5 * 3], + [image_width / 5, image_height / 5 * 3], + [random_point_x, random_point_y], + ]; + + let mut source_rgb = vec![0u8; image_width * image_height * CHANNELS]; + + let or = rand::thread_rng().gen_range(0..256) as u8; + let og = rand::thread_rng().gen_range(0..256) as u8; + let ob = rand::thread_rng().gen_range(0..256) as u8; + + for point in &pixel_points { + source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS] = or; + source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS + 1] = og; + source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS + 2] = ob; + + let nx = (point[0] + 1).min(image_width - 1); + let ny = point[1].min(image_height - 1); + + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; + + let nx = point[0].saturating_sub(1).min(image_width - 1); + let ny = point[1].min(image_height - 1); + + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; + } - let mut planar_image = YuvPlanarImageMut::::alloc( - image_width as u32, - image_height as u32, - YuvChromaSubsampling::Yuv422, + let mut planar_image = YuvPlanarImageMut::::alloc( + image_width as u32, + image_height as u32, + YuvChromaSubsampling::Yuv422, + ); + + rgb_to_yuv422( + &mut planar_image, + &source_rgb, + image_width as u32 * 3, + YuvRange::Full, + YuvStandardMatrix::Bt709, + ) + .unwrap(); + + let mut dest_rgb = vec![0u8; image_width * image_height * CHANNELS]; + + let fixed_planar = planar_image.to_fixed(); + + yuv422_to_rgb( + &fixed_planar, + &mut dest_rgb, + image_width as u32 * 3, + YuvRange::Full, + YuvStandardMatrix::Bt709, + ) + .unwrap(); + + for point in &pixel_points { + let x = point[0]; + let y = point[1]; + let px = x * CHANNELS + y * image_width * CHANNELS; + + let r = dest_rgb[px]; + let g = dest_rgb[px + 1]; + let b = dest_rgb[px + 2]; + + let diff_r = r as i32 - or as i32; + let diff_g = g as i32 - og as i32; + let diff_b = b as i32 - ob as i32; + + assert!( + diff_r <= 3, + "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", + diff_r, + [or, og, ob], + [r, g, b] + ); + assert!( + diff_g <= 3, + "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", + diff_g, + [or, og, ob], + [r, g, b] + ); + assert!( + diff_b <= 3, + "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", + diff_b, + [or, og, ob], + [r, g, b] ); - - rgb_to_yuv422( - &mut planar_image, - &source_rgb, - image_width as u32 * 3, - YuvRange::Full, - YuvStandardMatrix::Bt709, - ) - .unwrap(); - - let mut dest_rgb = vec![0u8; image_width * image_height * CHANNELS]; - - let fixed_planar = planar_image.to_fixed(); - - yuv422_to_rgb( - &fixed_planar, - &mut dest_rgb, - image_width as u32 * 3, - YuvRange::Full, - YuvStandardMatrix::Bt709, - quality, - ) - .unwrap(); - - for point in &pixel_points { - let x = point[0]; - let y = point[1]; - let px = x * CHANNELS + y * image_width * CHANNELS; - - let r = dest_rgb[px]; - let g = dest_rgb[px + 1]; - let b = dest_rgb[px + 2]; - - let diff_r = r as i32 - or as i32; - let diff_g = g as i32 - og as i32; - let diff_b = b as i32 - ob as i32; - - assert!( - diff_r <= 3, - "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", - diff_r, - [or, og, ob], - [r, g, b] - ); - assert!( - diff_g <= 3, - "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", - diff_g, - [or, og, ob], - [r, g, b] - ); - assert!( - diff_b <= 3, - "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", - diff_b, - [or, og, ob], - [r, g, b] - ); - } } } #[test] fn test_yuv422_round_trip_limited_range() { - for quality in [ConversionQuality::Fastest, ConversionQuality::Good] { - let image_width = 256usize; - let image_height = 256usize; - - let random_point_x = rand::thread_rng().gen_range(0..image_width); - let random_point_y = rand::thread_rng().gen_range(0..image_height); - - const CHANNELS: usize = 3; - - let pixel_points = [ - [0, 0], - [image_width - 1, image_height - 1], - [image_width - 1, 0], - [0, image_height - 1], - [(image_width - 1) / 2, (image_height - 1) / 2], - [image_width / 5, image_height / 5], - [0, image_height / 5], - [image_width / 5, 0], - [image_width / 5 * 3, image_height / 5], - [image_width / 5 * 3, image_height / 5 * 3], - [image_width / 5, image_height / 5 * 3], - [random_point_x, random_point_y], - ]; - - let mut source_rgb = vec![0u8; image_width * image_height * CHANNELS]; - - let or = rand::thread_rng().gen_range(0..256) as u8; - let og = rand::thread_rng().gen_range(0..256) as u8; - let ob = rand::thread_rng().gen_range(0..256) as u8; - - for point in &pixel_points { - source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS] = or; - source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS + 1] = og; - source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS + 2] = ob; - - let nx = (point[0] + 1).min(image_width - 1); - let ny = point[1].min(image_height - 1); - - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; - - let nx = point[0].saturating_sub(1).min(image_width - 1); - let ny = point[1].min(image_height - 1); - - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; - } + let image_width = 256usize; + let image_height = 256usize; + + let random_point_x = rand::thread_rng().gen_range(0..image_width); + let random_point_y = rand::thread_rng().gen_range(0..image_height); + + const CHANNELS: usize = 3; + + let pixel_points = [ + [0, 0], + [image_width - 1, image_height - 1], + [image_width - 1, 0], + [0, image_height - 1], + [(image_width - 1) / 2, (image_height - 1) / 2], + [image_width / 5, image_height / 5], + [0, image_height / 5], + [image_width / 5, 0], + [image_width / 5 * 3, image_height / 5], + [image_width / 5 * 3, image_height / 5 * 3], + [image_width / 5, image_height / 5 * 3], + [random_point_x, random_point_y], + ]; + + let mut source_rgb = vec![0u8; image_width * image_height * CHANNELS]; + + let or = rand::thread_rng().gen_range(0..256) as u8; + let og = rand::thread_rng().gen_range(0..256) as u8; + let ob = rand::thread_rng().gen_range(0..256) as u8; + + for point in &pixel_points { + source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS] = or; + source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS + 1] = og; + source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS + 2] = ob; + + let nx = (point[0] + 1).min(image_width - 1); + let ny = point[1].min(image_height - 1); + + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; + + let nx = point[0].saturating_sub(1).min(image_width - 1); + let ny = point[1].min(image_height - 1); + + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; + } - let mut planar_image = YuvPlanarImageMut::::alloc( - image_width as u32, - image_height as u32, - YuvChromaSubsampling::Yuv422, + let mut planar_image = YuvPlanarImageMut::::alloc( + image_width as u32, + image_height as u32, + YuvChromaSubsampling::Yuv422, + ); + + rgb_to_yuv422( + &mut planar_image, + &source_rgb, + image_width as u32 * 3, + YuvRange::Limited, + YuvStandardMatrix::Bt709, + ) + .unwrap(); + + let mut dest_rgb = vec![0u8; image_width * image_height * CHANNELS]; + + let fixed_planar = planar_image.to_fixed(); + + yuv422_to_rgb( + &fixed_planar, + &mut dest_rgb, + image_width as u32 * 3, + YuvRange::Limited, + YuvStandardMatrix::Bt709, + ) + .unwrap(); + + for point in pixel_points.iter() { + let x = point[0]; + let y = point[1]; + let px = x * CHANNELS + y * image_width * CHANNELS; + + let r = dest_rgb[px]; + let g = dest_rgb[px + 1]; + let b = dest_rgb[px + 2]; + + let diff_r = r as i32 - or as i32; + let diff_g = g as i32 - og as i32; + let diff_b = b as i32 - ob as i32; + + assert!( + diff_r <= 10, + "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", + diff_r, + [or, og, ob], + [r, g, b] + ); + assert!( + diff_g <= 10, + "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", + diff_g, + [or, og, ob], + [r, g, b] + ); + assert!( + diff_b <= 10, + "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", + diff_b, + [or, og, ob], + [r, g, b] ); - - rgb_to_yuv422( - &mut planar_image, - &source_rgb, - image_width as u32 * 3, - YuvRange::Limited, - YuvStandardMatrix::Bt709, - ) - .unwrap(); - - let mut dest_rgb = vec![0u8; image_width * image_height * CHANNELS]; - - let fixed_planar = planar_image.to_fixed(); - - yuv422_to_rgb( - &fixed_planar, - &mut dest_rgb, - image_width as u32 * 3, - YuvRange::Limited, - YuvStandardMatrix::Bt709, - quality, - ) - .unwrap(); - - for point in pixel_points.iter() { - let x = point[0]; - let y = point[1]; - let px = x * CHANNELS + y * image_width * CHANNELS; - - let r = dest_rgb[px]; - let g = dest_rgb[px + 1]; - let b = dest_rgb[px + 2]; - - let diff_r = r as i32 - or as i32; - let diff_g = g as i32 - og as i32; - let diff_b = b as i32 - ob as i32; - - assert!( - diff_r <= 10, - "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", - diff_r, - [or, og, ob], - [r, g, b] - ); - assert!( - diff_g <= 10, - "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", - diff_g, - [or, og, ob], - [r, g, b] - ); - assert!( - diff_b <= 10, - "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", - diff_b, - [or, og, ob], - [r, g, b] - ); - } } } #[test] fn test_yuv420_round_trip_full_range() { - for quality in [ConversionQuality::Fastest, ConversionQuality::Good] { - let image_width = 256usize; - let image_height = 256usize; - - let random_point_x = rand::thread_rng().gen_range(0..image_width); - let random_point_y = rand::thread_rng().gen_range(0..image_height); - - const CHANNELS: usize = 3; - - let pixel_points = [ - [0, 0], - [image_width - 1, image_height - 1], - [image_width - 1, 0], - [0, image_height - 1], - [(image_width - 1) / 2, (image_height - 1) / 2], - [image_width / 5, image_height / 5], - [0, image_height / 5], - [image_width / 5, 0], - [image_width / 5 * 3, image_height / 5], - [image_width / 5 * 3, image_height / 5 * 3], - [image_width / 5, image_height / 5 * 3], - [random_point_x, random_point_y], - ]; - - let mut source_rgb = vec![0u8; image_width * image_height * CHANNELS]; - - let or = rand::thread_rng().gen_range(0..256) as u8; - let og = rand::thread_rng().gen_range(0..256) as u8; - let ob = rand::thread_rng().gen_range(0..256) as u8; - - for point in &pixel_points { - source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS] = or; - source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS + 1] = og; - source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS + 2] = ob; - - let nx = (point[0] + 1).min(image_width - 1); - let ny = point[1].min(image_height - 1); - - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; - - let nx = (point[0] + 1).min(image_width - 1); - let ny = (point[1] + 1).min(image_height - 1); - - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; - - let nx = point[0].min(image_width - 1); - let ny = (point[1] + 1).min(image_height - 1); - - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; - - let nx = point[0].saturating_sub(1).min(image_width - 1); - let ny = point[1].saturating_sub(1).min(image_height - 1); - - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; - - let nx = point[0].min(image_width - 1); - let ny = point[1].saturating_sub(1).min(image_height - 1); - - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; - - let nx = point[0].saturating_sub(1).min(image_width - 1); - let ny = point[1].min(image_height - 1); - - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; - } + let image_width = 256usize; + let image_height = 256usize; - let mut planar_image = YuvPlanarImageMut::::alloc( - image_width as u32, - image_height as u32, - YuvChromaSubsampling::Yuv420, - ); + let random_point_x = rand::thread_rng().gen_range(0..image_width); + let random_point_y = rand::thread_rng().gen_range(0..image_height); - rgb_to_yuv420( - &mut planar_image, - &source_rgb, - image_width as u32 * 3, - YuvRange::Full, - YuvStandardMatrix::Bt709, - ) - .unwrap(); - - let mut dest_rgb = vec![0u8; image_width * image_height * CHANNELS]; - - let fixed_planar = planar_image.to_fixed(); - - yuv420_to_rgb( - &fixed_planar, - &mut dest_rgb, - image_width as u32 * 3, - YuvRange::Full, - YuvStandardMatrix::Bt709, - quality, - ) - .unwrap(); - - for point in &pixel_points { - let x = point[0]; - let y = point[1]; - let px = x * CHANNELS + y * image_width * CHANNELS; - - let r = dest_rgb[px]; - let g = dest_rgb[px + 1]; - let b = dest_rgb[px + 2]; - - let diff_r = r as i32 - or as i32; - let diff_g = g as i32 - og as i32; - let diff_b = b as i32 - ob as i32; - - assert!( - diff_r <= 47, - "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", - diff_r, - [or, og, ob], - [r, g, b] - ); - assert!( - diff_g <= 47, - "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", - diff_g, - [or, og, ob], - [r, g, b] - ); - assert!( - diff_b <= 47, - "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", - diff_b, - [or, og, ob], - [r, g, b] - ); - } + const CHANNELS: usize = 3; + + let pixel_points = [ + [0, 0], + [image_width - 1, image_height - 1], + [image_width - 1, 0], + [0, image_height - 1], + [(image_width - 1) / 2, (image_height - 1) / 2], + [image_width / 5, image_height / 5], + [0, image_height / 5], + [image_width / 5, 0], + [image_width / 5 * 3, image_height / 5], + [image_width / 5 * 3, image_height / 5 * 3], + [image_width / 5, image_height / 5 * 3], + [random_point_x, random_point_y], + ]; + + let mut source_rgb = vec![0u8; image_width * image_height * CHANNELS]; + + let or = rand::thread_rng().gen_range(0..256) as u8; + let og = rand::thread_rng().gen_range(0..256) as u8; + let ob = rand::thread_rng().gen_range(0..256) as u8; + + for point in &pixel_points { + source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS] = or; + source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS + 1] = og; + source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS + 2] = ob; + + let nx = (point[0] + 1).min(image_width - 1); + let ny = point[1].min(image_height - 1); + + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; + + let nx = (point[0] + 1).min(image_width - 1); + let ny = (point[1] + 1).min(image_height - 1); + + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; + + let nx = point[0].min(image_width - 1); + let ny = (point[1] + 1).min(image_height - 1); + + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; + + let nx = point[0].saturating_sub(1).min(image_width - 1); + let ny = point[1].saturating_sub(1).min(image_height - 1); + + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; + + let nx = point[0].min(image_width - 1); + let ny = point[1].saturating_sub(1).min(image_height - 1); + + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; + + let nx = point[0].saturating_sub(1).min(image_width - 1); + let ny = point[1].min(image_height - 1); + + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; + } + + let mut planar_image = YuvPlanarImageMut::::alloc( + image_width as u32, + image_height as u32, + YuvChromaSubsampling::Yuv420, + ); + + rgb_to_yuv420( + &mut planar_image, + &source_rgb, + image_width as u32 * 3, + YuvRange::Full, + YuvStandardMatrix::Bt709, + ) + .unwrap(); + + let mut dest_rgb = vec![0u8; image_width * image_height * CHANNELS]; + + let fixed_planar = planar_image.to_fixed(); + + yuv420_to_rgb( + &fixed_planar, + &mut dest_rgb, + image_width as u32 * 3, + YuvRange::Full, + YuvStandardMatrix::Bt709, + ) + .unwrap(); + + for point in &pixel_points { + let x = point[0]; + let y = point[1]; + let px = x * CHANNELS + y * image_width * CHANNELS; + + let r = dest_rgb[px]; + let g = dest_rgb[px + 1]; + let b = dest_rgb[px + 2]; + + let diff_r = r as i32 - or as i32; + let diff_g = g as i32 - og as i32; + let diff_b = b as i32 - ob as i32; + + assert!( + diff_r <= 47, + "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", + diff_r, + [or, og, ob], + [r, g, b] + ); + assert!( + diff_g <= 47, + "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", + diff_g, + [or, og, ob], + [r, g, b] + ); + assert!( + diff_b <= 47, + "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", + diff_b, + [or, og, ob], + [r, g, b] + ); } } #[test] fn test_yuv420_round_trip_limited_range() { - for quality in [ConversionQuality::Fastest, ConversionQuality::Good] { - let image_width = 256usize; - let image_height = 256usize; - - let random_point_x = rand::thread_rng().gen_range(0..image_width); - let random_point_y = rand::thread_rng().gen_range(0..image_height); - - const CHANNELS: usize = 3; - - let pixel_points = [ - [0, 0], - [image_width - 1, image_height - 1], - [image_width - 1, 0], - [0, image_height - 1], - [(image_width - 1) / 2, (image_height - 1) / 2], - [image_width / 5, image_height / 5], - [0, image_height / 5], - [image_width / 5, 0], - [image_width / 5 * 3, image_height / 5], - [image_width / 5 * 3, image_height / 5 * 3], - [image_width / 5, image_height / 5 * 3], - [random_point_x, random_point_y], - ]; - - let mut source_rgb = vec![0u8; image_width * image_height * CHANNELS]; - - let or = rand::thread_rng().gen_range(0..256) as u8; - let og = rand::thread_rng().gen_range(0..256) as u8; - let ob = rand::thread_rng().gen_range(0..256) as u8; - - for point in &pixel_points { - source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS] = or; - source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS + 1] = og; - source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS + 2] = ob; - - let nx = (point[0] + 1).min(image_width - 1); - let ny = point[1].min(image_height - 1); - - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; - - let nx = (point[0] + 1).min(image_width - 1); - let ny = (point[1] + 1).min(image_height - 1); - - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; - - let nx = point[0].min(image_width - 1); - let ny = (point[1] + 1).min(image_height - 1); - - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; - - let nx = point[0].saturating_sub(1).min(image_width - 1); - let ny = point[1].saturating_sub(1).min(image_height - 1); - - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; - - let nx = point[0].min(image_width - 1); - let ny = point[1].saturating_sub(1).min(image_height - 1); - - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; - - let nx = point[0].saturating_sub(1).min(image_width - 1); - let ny = point[1].min(image_height - 1); - - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; - source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; - } + let image_width = 256usize; + let image_height = 256usize; - let mut planar_image = YuvPlanarImageMut::::alloc( - image_width as u32, - image_height as u32, - YuvChromaSubsampling::Yuv420, - ); + let random_point_x = rand::thread_rng().gen_range(0..image_width); + let random_point_y = rand::thread_rng().gen_range(0..image_height); - rgb_to_yuv420( - &mut planar_image, - &source_rgb, - image_width as u32 * 3, - YuvRange::Limited, - YuvStandardMatrix::Bt709, - ) - .unwrap(); - - let mut dest_rgb = vec![0u8; image_width * image_height * CHANNELS]; - - let fixed_planar = planar_image.to_fixed(); - - yuv420_to_rgb( - &fixed_planar, - &mut dest_rgb, - image_width as u32 * 3, - YuvRange::Limited, - YuvStandardMatrix::Bt709, - quality, - ) - .unwrap(); - - for point in &pixel_points { - let x = point[0]; - let y = point[1]; - let px = x * CHANNELS + y * image_width * CHANNELS; - - let r = dest_rgb[px]; - let g = dest_rgb[px + 1]; - let b = dest_rgb[px + 2]; - - let diff_r = r as i32 - or as i32; - let diff_g = g as i32 - og as i32; - let diff_b = b as i32 - ob as i32; - - assert!( - diff_r <= 55, - "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", - diff_r, - [or, og, ob], - [r, g, b] - ); - assert!( - diff_g <= 55, - "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", - diff_g, - [or, og, ob], - [r, g, b] - ); - assert!( - diff_b <= 55, - "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", - diff_b, - [or, og, ob], - [r, g, b] - ); - } + const CHANNELS: usize = 3; + + let pixel_points = [ + [0, 0], + [image_width - 1, image_height - 1], + [image_width - 1, 0], + [0, image_height - 1], + [(image_width - 1) / 2, (image_height - 1) / 2], + [image_width / 5, image_height / 5], + [0, image_height / 5], + [image_width / 5, 0], + [image_width / 5 * 3, image_height / 5], + [image_width / 5 * 3, image_height / 5 * 3], + [image_width / 5, image_height / 5 * 3], + [random_point_x, random_point_y], + ]; + + let mut source_rgb = vec![0u8; image_width * image_height * CHANNELS]; + + let or = rand::thread_rng().gen_range(0..256) as u8; + let og = rand::thread_rng().gen_range(0..256) as u8; + let ob = rand::thread_rng().gen_range(0..256) as u8; + + for point in &pixel_points { + source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS] = or; + source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS + 1] = og; + source_rgb[point[0] * CHANNELS + point[1] * image_width * CHANNELS + 2] = ob; + + let nx = (point[0] + 1).min(image_width - 1); + let ny = point[1].min(image_height - 1); + + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; + + let nx = (point[0] + 1).min(image_width - 1); + let ny = (point[1] + 1).min(image_height - 1); + + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; + + let nx = point[0].min(image_width - 1); + let ny = (point[1] + 1).min(image_height - 1); + + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; + + let nx = point[0].saturating_sub(1).min(image_width - 1); + let ny = point[1].saturating_sub(1).min(image_height - 1); + + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; + + let nx = point[0].min(image_width - 1); + let ny = point[1].saturating_sub(1).min(image_height - 1); + + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; + + let nx = point[0].saturating_sub(1).min(image_width - 1); + let ny = point[1].min(image_height - 1); + + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS] = or; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 1] = og; + source_rgb[nx * CHANNELS + ny * image_width * CHANNELS + 2] = ob; + } + + let mut planar_image = YuvPlanarImageMut::::alloc( + image_width as u32, + image_height as u32, + YuvChromaSubsampling::Yuv420, + ); + + rgb_to_yuv420( + &mut planar_image, + &source_rgb, + image_width as u32 * 3, + YuvRange::Limited, + YuvStandardMatrix::Bt709, + ) + .unwrap(); + + let mut dest_rgb = vec![0u8; image_width * image_height * CHANNELS]; + + let fixed_planar = planar_image.to_fixed(); + + yuv420_to_rgb( + &fixed_planar, + &mut dest_rgb, + image_width as u32 * 3, + YuvRange::Limited, + YuvStandardMatrix::Bt709, + ) + .unwrap(); + + for point in &pixel_points { + let x = point[0]; + let y = point[1]; + let px = x * CHANNELS + y * image_width * CHANNELS; + + let r = dest_rgb[px]; + let g = dest_rgb[px + 1]; + let b = dest_rgb[px + 2]; + + let diff_r = r as i32 - or as i32; + let diff_g = g as i32 - og as i32; + let diff_b = b as i32 - ob as i32; + + assert!( + diff_r <= 55, + "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", + diff_r, + [or, og, ob], + [r, g, b] + ); + assert!( + diff_g <= 55, + "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", + diff_g, + [or, og, ob], + [r, g, b] + ); + assert!( + diff_b <= 55, + "Actual diff {}, Original RGB {:?}, Round-tripped RGB {:?}", + diff_b, + [or, og, ob], + [r, g, b] + ); } } }