Skip to content

Commit

Permalink
Added yuv400 16-bit
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Oct 16, 2024
1 parent d93c890 commit 4145431
Show file tree
Hide file tree
Showing 7 changed files with 532 additions and 6 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ workspace = { members = ["app"] }

[package]
name = "yuvutils-rs"
version = "0.4.9"
version = "0.4.10"
edition = "2021"
description = "High performance utilities for YUV format handling and conversion."
readme = "README.md"
Expand Down
11 changes: 7 additions & 4 deletions app/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@ use std::io::Read;
use std::ops::Sub;
use std::time::Instant;

use yuvutils_rs::{bgra_to_yuv444_p16, rgb_to_sharp_yuv420, rgb_to_yuv420, rgb_to_yuv420_p16, rgba_to_sharp_yuv420, rgba_to_yuv420_p16, rgba_to_yuv444_p16, yuv420_p16_to_rgb16, yuv420_to_rgb, yuv420_to_yuyv422, yuyv422_to_rgb, SharpYuvGammaTransfer, YuvBytesPacking, YuvEndianness, YuvRange, YuvStandardMatrix};
use yuvutils_rs::{
bgra_to_yuv444_p16, rgb_to_sharp_yuv420, rgb_to_yuv420, rgb_to_yuv420_p16,
rgba_to_sharp_yuv420, rgba_to_yuv420_p16, rgba_to_yuv444_p16, yuv420_p16_to_rgb16,
yuv420_to_rgb, yuv420_to_yuyv422, yuyv422_to_rgb, SharpYuvGammaTransfer, YuvBytesPacking,
YuvEndianness, YuvRange, YuvStandardMatrix,
};

fn read_file_bytes(file_path: &str) -> Result<Vec<u8>, String> {
// Open the file
Expand Down Expand Up @@ -246,7 +251,7 @@ fn main() {
&y_plane,
y_stride as u32,
&u_plane,
u_stride as u32 ,
u_stride as u32,
&v_plane,
v_stride as u32,
&mut rgba,
Expand Down Expand Up @@ -291,8 +296,6 @@ fn main() {
//
// rgba = Vec::from(gbr);

println!("{}", std::mem::align_of::<f32>());

image::save_buffer(
"converted_sharp.png",
rgba.as_bytes(),
Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ mod sse;
mod to_identity;
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
mod wasm32;
mod y_p16_to_rgb16;
mod y_to_rgb;
mod ycgco_r_to_rgb;
mod ycgco_to_rgb;
Expand Down Expand Up @@ -392,3 +393,4 @@ pub use yuv_p16_rgba::*;
pub use yuv_p16_rgba16_alpha::*;
pub use yuv_p16_rgba_alpha::*;
pub use yuv_p16_rgba_p16::*;
pub use y_p16_to_rgb16::*;
2 changes: 2 additions & 0 deletions src/neon/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ mod rgb_to_yuv_p16;
mod rgba_to_nv;
mod rgba_to_yuv;
mod to_identity;
mod y_p16_to_rgba16;
mod y_to_rgb;
mod ycgco_to_rgb;
mod ycgco_to_rgb_alpha;
Expand Down Expand Up @@ -59,3 +60,4 @@ pub use yuv_to_rgba_alpha::neon_yuv_to_rgba_alpha;
pub use yuv_to_yuy2::yuv_to_yuy2_neon_impl;
pub use yuy2_to_rgb::yuy2_to_rgb_neon;
pub use yuy2_to_yuv::yuy2_to_yuv_neon_impl;
pub use y_p16_to_rgba16::neon_y_p16_to_rgba16_row;
100 changes: 100 additions & 0 deletions src/neon/y_p16_to_rgba16.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* // Copyright (c) the Radzivon Bartoshyk. All rights reserved.
* //
* // Use of this source code is governed by a BSD-style
* // license that can be found in the LICENSE file.
*/

use std::arch::aarch64::*;

use crate::internals::ProcessedOffset;
use crate::yuv_support::{
CbCrInverseTransform, YuvBytesPacking, YuvChromaRange, YuvEndianness,
YuvSourceChannels,
};

#[inline(always)]
pub unsafe fn neon_y_p16_to_rgba16_row<
const DESTINATION_CHANNELS: u8,
const ENDIANNESS: u8,
const BYTES_POSITION: u8,
>(
y_ld_ptr: *const u16,
rgba: *mut u16,
dst_offset: usize,
width: u32,
range: &YuvChromaRange,
transform: &CbCrInverseTransform<i32>,
start_cx: usize,
bit_depth: usize,
) -> ProcessedOffset {
let destination_channels: YuvSourceChannels = DESTINATION_CHANNELS.into();
let channels = destination_channels.get_channels_count();
let endianness: YuvEndianness = ENDIANNESS.into();
let bytes_position: YuvBytesPacking = BYTES_POSITION.into();
let dst_ptr = rgba;

let y_corr = vdupq_n_s16(range.bias_y as i16);
let v_luma_coeff = vdupq_n_s16(transform.y_coef as i16);
let v_min_values = vdupq_n_s16(0i16);
let v_alpha = vdupq_n_u16((1 << bit_depth) - 1);
let v_msb_shift = vdupq_n_s16(bit_depth as i16 - 16);

let mut cx = start_cx;

while cx + 8 < width as usize {
let y_values: int16x8_t;

match endianness {
YuvEndianness::BigEndian => {
let mut y_u_values = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(
vld1q_u16(y_ld_ptr.add(cx)),
)));
if bytes_position == YuvBytesPacking::MostSignificantBytes {
y_u_values = vshlq_u16(y_u_values, v_msb_shift);
}
y_values = vsubq_s16(vreinterpretq_s16_u16(y_u_values), y_corr);
}
YuvEndianness::LittleEndian => {
let mut y_vl = vld1q_u16(y_ld_ptr.add(cx));
if bytes_position == YuvBytesPacking::MostSignificantBytes {
y_vl = vshlq_u16(y_vl, v_msb_shift);
}
y_values = vsubq_s16(vreinterpretq_s16_u16(y_vl), y_corr);
}
}

let y_high = vmull_high_s16(y_values, v_luma_coeff);

let r_high = vrshrn_n_s32::<6>(y_high);

let y_low = vmull_s16(vget_low_s16(y_values), vget_low_s16(v_luma_coeff));

let r_low = vrshrn_n_s32::<6>(y_low);

let r_values = vreinterpretq_u16_s16(vmaxq_s16(vcombine_s16(r_low, r_high), v_min_values));

match destination_channels {
YuvSourceChannels::Rgb => {
let dst_pack = uint16x8x3_t(r_values, r_values, r_values);
vst3q_u16(dst_ptr.add(dst_offset + cx * channels), dst_pack);
}
YuvSourceChannels::Bgr => {
let dst_pack = uint16x8x3_t(r_values, r_values, r_values);
vst3q_u16(dst_ptr.add(dst_offset + cx * channels), dst_pack);
}
YuvSourceChannels::Rgba => {
let dst_pack = uint16x8x4_t(r_values, r_values, r_values, v_alpha);
vst4q_u16(dst_ptr.add(dst_offset + cx * channels), dst_pack);
}
YuvSourceChannels::Bgra => {
let dst_pack = uint16x8x4_t(r_values, r_values, r_values, v_alpha);
vst4q_u16(dst_ptr.add(dst_offset + cx * channels), dst_pack);
}
}

cx += 8;
}

ProcessedOffset { cx, ux: 0 }
}
Loading

0 comments on commit 4145431

Please sign in to comment.