Skip to content

Commit

Permalink
RGBA to YUV, SSE
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed May 22, 2024
1 parent 969013d commit 650be4d
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 12 deletions.
11 changes: 5 additions & 6 deletions src/intel_simd_support.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,10 @@ pub unsafe fn store_u8_rgb_avx2(
if use_transient {
_mm256_storeu_si256(ptr.add(72) as *mut __m256i, rgb3);
} else {
let mut transient: [u8; 32] = [0u8; 32];
_mm256_storeu_si256(transient.as_mut_ptr() as *mut __m256i, rgb3);
std::ptr::copy_nonoverlapping(transient.as_ptr(), ptr.add(72), 24);
// let mut transient: [u8; 32] = [0u8; 32];
// _mm256_storeu_si256(transient.as_mut_ptr() as *mut __m256i, rgb3);
// std::ptr::copy_nonoverlapping(transient.as_ptr(), ptr.add(72), 24);
std::ptr::copy_nonoverlapping(&rgb3 as *const _ as *const u8, ptr.add(72), 24);
}
}

Expand Down Expand Up @@ -352,8 +353,6 @@ pub unsafe fn store_u8_rgb_sse(
if use_transient {
_mm_storeu_si128(ptr.add(36) as *mut __m128i, rgb3);
} else {
let mut transient: [u8; 16] = [0u8; 16];
_mm_storeu_si128(transient.as_mut_ptr() as *mut __m128i, rgb3);
std::ptr::copy(transient.as_ptr(), ptr.add(36), 12);
std::ptr::copy(&rgb3 as *const _ as *const u8, ptr.add(36), 12);
}
}
6 changes: 3 additions & 3 deletions src/rgba_to_yuv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,12 @@ unsafe fn sse_row<const ORIGIN_CHANNELS: u8, const SAMPLING: u8>(
let cr_h = _mm_avg_epu8(cr, cr);
std::ptr::copy_nonoverlapping(
&cb_h as *const _ as *const u8,
u_ptr.add(uv_x) as *mut u8,
u_ptr.add(uv_x),
8,
);
std::ptr::copy_nonoverlapping(
&cr_h as *const _ as *const u8,
v_ptr.add(uv_x) as *mut u8,
v_ptr.add(uv_x),
8,
);
uv_x += 8;
Expand Down Expand Up @@ -190,7 +190,7 @@ fn rgbx_to_yuv8<const ORIGIN_CHANNELS: u8, const SAMPLING: u8>(

#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("sse4.1") && is_x86_feature_detected!("ssse3") {
if is_x86_feature_detected!("sse4.1") {
use_sse = true;
}
}
Expand Down
7 changes: 4 additions & 3 deletions src/yuv_to_rgba.rs
Original file line number Diff line number Diff line change
Expand Up @@ -638,16 +638,17 @@ fn yuv_to_rgbx<const DESTINATION_CHANNELS: u8, const SAMPLING: u8>(
if chroma_subsampling == YuvChromaSample::YUV420
|| chroma_subsampling == YuvChromaSample::YUV422
{
if x + 1 < width as usize {
let y_value = (y_plane[y_offset + x + 1] as i32 - bias_y) * y_coef;
let next_x = x + 1;
if next_x < width as usize {
let y_value = (y_plane[y_offset + next_x] as i32 - bias_y) * y_coef;

let r = ((y_value + cr_coef * cr_value) >> 6).min(255).max(0);
let b = ((y_value + cb_coef * cb_value) >> 6).min(255).max(0);
let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value) >> 6)
.min(255)
.max(0);

let next_px = (x + 1) * channels;
let next_px = next_x * channels;

let rgba_shift = rgba_offset + next_px;

Expand Down

0 comments on commit 650be4d

Please sign in to comment.