diff --git a/src/intel_simd_support.rs b/src/intel_simd_support.rs index 040a3503..ed51128e 100644 --- a/src/intel_simd_support.rs +++ b/src/intel_simd_support.rs @@ -183,9 +183,10 @@ pub unsafe fn store_u8_rgb_avx2( if use_transient { _mm256_storeu_si256(ptr.add(72) as *mut __m256i, rgb3); } else { - let mut transient: [u8; 32] = [0u8; 32]; - _mm256_storeu_si256(transient.as_mut_ptr() as *mut __m256i, rgb3); - std::ptr::copy_nonoverlapping(transient.as_ptr(), ptr.add(72), 24); + // let mut transient: [u8; 32] = [0u8; 32]; + // _mm256_storeu_si256(transient.as_mut_ptr() as *mut __m256i, rgb3); + // std::ptr::copy_nonoverlapping(transient.as_ptr(), ptr.add(72), 24); + std::ptr::copy_nonoverlapping(&rgb3 as *const _ as *const u8, ptr.add(72), 24); } } @@ -352,8 +353,6 @@ pub unsafe fn store_u8_rgb_sse( if use_transient { _mm_storeu_si128(ptr.add(36) as *mut __m128i, rgb3); } else { - let mut transient: [u8; 16] = [0u8; 16]; - _mm_storeu_si128(transient.as_mut_ptr() as *mut __m128i, rgb3); - std::ptr::copy(transient.as_ptr(), ptr.add(36), 12); + std::ptr::copy(&rgb3 as *const _ as *const u8, ptr.add(36), 12); } } diff --git a/src/rgba_to_yuv.rs b/src/rgba_to_yuv.rs index 4f0a7c25..4ed8d27a 100644 --- a/src/rgba_to_yuv.rs +++ b/src/rgba_to_yuv.rs @@ -119,12 +119,12 @@ unsafe fn sse_row( let cr_h = _mm_avg_epu8(cr, cr); std::ptr::copy_nonoverlapping( &cb_h as *const _ as *const u8, - u_ptr.add(uv_x) as *mut u8, + u_ptr.add(uv_x), 8, ); std::ptr::copy_nonoverlapping( &cr_h as *const _ as *const u8, - v_ptr.add(uv_x) as *mut u8, + v_ptr.add(uv_x), 8, ); uv_x += 8; @@ -190,7 +190,7 @@ fn rgbx_to_yuv8( #[cfg(target_arch = "x86_64")] { - if is_x86_feature_detected!("sse4.1") && is_x86_feature_detected!("ssse3") { + if is_x86_feature_detected!("sse4.1") { use_sse = true; } } diff --git a/src/yuv_to_rgba.rs b/src/yuv_to_rgba.rs index e7245133..063d61b1 100644 --- a/src/yuv_to_rgba.rs +++ b/src/yuv_to_rgba.rs @@ -638,8 +638,9 @@ fn yuv_to_rgbx( if chroma_subsampling == YuvChromaSample::YUV420 || chroma_subsampling == YuvChromaSample::YUV422 { - if x + 1 < width as usize { - let y_value = (y_plane[y_offset + x + 1] as i32 - bias_y) * y_coef; + let next_x = x + 1; + if next_x < width as usize { + let y_value = (y_plane[y_offset + next_x] as i32 - bias_y) * y_coef; let r = ((y_value + cr_coef * cr_value) >> 6).min(255).max(0); let b = ((y_value + cb_coef * cb_value) >> 6).min(255).max(0); @@ -647,7 +648,7 @@ fn yuv_to_rgbx( .min(255) .max(0); - let next_px = (x + 1) * channels; + let next_px = next_x * channels; let rgba_shift = rgba_offset + next_px;