diff --git a/src/converter/mod.rs b/src/converter/mod.rs index 53f4bd4..e9a4c02 100644 --- a/src/converter/mod.rs +++ b/src/converter/mod.rs @@ -85,25 +85,35 @@ pub enum OutputFormat { NV12, /// I420: Y plane, U plane, V plane (4:2:0), 8-bit. I420, - /// YUV444: Full resolution Y, U, V planes, 8-bit. + /// YUV444 8-bit: 2-plane semi-planar (Y plane + interleaved UV) at full resolution. YUV444, /// P010: Y plane followed by interleaved UV (4:2:0), 10-bit in 16-bit words. P010, - /// YUV444 10-bit: Full resolution Y, U, V in 16-bit words. + /// YUV444 10-bit: 2-plane semi-planar (Y plane + interleaved UV) in 16-bit words. YUV444P10, } impl OutputFormat { /// Calculate output size in bytes for given dimensions. + /// + /// The returned size is always a multiple of 4, since the compute shader writes + /// to a `uint[]` buffer and `vkCmdFillBuffer` requires 4-byte aligned sizes. pub fn output_size(&self, width: u32, height: u32) -> usize { let pixel_count = (width * height) as usize; - match self { + let raw = match self { OutputFormat::NV12 | OutputFormat::I420 => pixel_count * 3 / 2, - OutputFormat::YUV444 => pixel_count * 3, + OutputFormat::YUV444 => { + // Y plane (aligned to 4 bytes) + UV interleaved plane. + crate::align4(pixel_count) + pixel_count * 2 + } // 10-bit formats use 2 bytes per sample. OutputFormat::P010 => pixel_count * 3, // Y (2 bytes) + UV (1 byte each, half res) - OutputFormat::YUV444P10 => pixel_count * 6, // Y + U + V, each 2 bytes. - } + OutputFormat::YUV444P10 => { + // Y plane (2 bytes/sample, aligned to 4 bytes) + UV interleaved (4 bytes/pixel). + crate::align4(pixel_count * 2) + pixel_count * 4 + } + }; + crate::align4(raw) } /// Get the Vulkan format for this output format. @@ -111,7 +121,7 @@ impl OutputFormat { match self { OutputFormat::NV12 => vk::Format::G8_B8R8_2PLANE_420_UNORM, OutputFormat::I420 => vk::Format::G8_B8_R8_3PLANE_420_UNORM, - OutputFormat::YUV444 => vk::Format::G8_B8_R8_3PLANE_444_UNORM, + OutputFormat::YUV444 => vk::Format::G8_B8R8_2PLANE_444_UNORM, OutputFormat::P010 => vk::Format::G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, OutputFormat::YUV444P10 => vk::Format::G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, } @@ -150,6 +160,11 @@ pub struct ColorConverterConfig { /// Full range (0-255 luma) or limited/studio range (16-235 luma). /// Must match the `full_range` flag in `ColorDescription` for correct playback. pub full_range: bool, + /// SDR reference white level in nits for the sRGB→BT.2020+PQ conversion. + /// + /// Per ITU-R BT.2408 the standard value is 203 nits. + /// Only used when `color_space` is `SrgbToBt2020Pq`. + pub sdr_reference_white_nits: f32, } impl ColorConverterConfig { @@ -167,6 +182,7 @@ impl ColorConverterConfig { output_format, color_space: ColorSpace::Bt709, full_range: true, + sdr_reference_white_nits: 203.0, } } } @@ -231,6 +247,14 @@ impl ColorConverter { self.config.color_space = color_space; } + /// Set the full-range flag for subsequent conversions. + /// + /// This takes effect on the next `convert()` call without recreating the pipeline, + /// since the full-range flag is passed via push constants. + pub fn set_full_range(&mut self, full_range: bool) { + self.config.full_range = full_range; + } + /// Build buffer-to-image copy regions for multi-planar YUV formats. /// /// For multi-planar formats like NV12, I420, and YUV444, we need separate. @@ -341,8 +365,10 @@ impl ColorConverter { ] } OutputFormat::YUV444 => { - // YUV444: Y, U, V planes all at full resolution. - let plane_size = (self.config.width * self.config.height) as u64; + // YUV444 8-bit 2-plane: Y plane at full resolution, UV interleaved at full resolution. + // Align Y plane size to 4 bytes for VkBufferImageCopy::bufferOffset compliance. + let y_size = + crate::align4((self.config.width * self.config.height) as usize) as u64; vec![ // Y plane. vk::BufferImageCopy { @@ -362,9 +388,9 @@ impl ColorConverter { depth: 1, }, }, - // U plane. + // UV plane (interleaved, full resolution). vk::BufferImageCopy { - buffer_offset: plane_size, + buffer_offset: y_size, buffer_row_length: 0, buffer_image_height: 0, image_subresource: vk::ImageSubresourceLayers { @@ -380,24 +406,6 @@ impl ColorConverter { depth: 1, }, }, - // V plane. - vk::BufferImageCopy { - buffer_offset: plane_size * 2, - buffer_row_length: 0, - buffer_image_height: 0, - image_subresource: vk::ImageSubresourceLayers { - aspect_mask: vk::ImageAspectFlags::PLANE_2, - mip_level: 0, - base_array_layer: 0, - layer_count: 1, - }, - image_offset: vk::Offset3D { x: 0, y: 0, z: 0 }, - image_extent: vk::Extent3D { - width: self.config.width, - height: self.config.height, - depth: 1, - }, - }, ] } OutputFormat::P010 => { @@ -446,8 +454,9 @@ impl ColorConverter { } OutputFormat::YUV444P10 => { // YUV444 10-bit: 2-plane format (Y plane, UV interleaved). - // Note: Using 2-plane format as that's what the encoder expects. - let y_size = (self.config.width * self.config.height * 2) as u64; + // Align Y plane size to 4 bytes for VkBufferImageCopy::bufferOffset compliance. + let y_size = + crate::align4((self.config.width * self.config.height * 2) as usize) as u64; vec![ // Y plane (16-bit samples). vk::BufferImageCopy { @@ -628,14 +637,15 @@ impl ColorConverter { &[], ); - // Push constants: width, height, input_format, output_format, color_space, full_range. - let push_constants: [u32; 6] = [ + // Push constants: width, height, input_format, output_format, color_space, full_range, sdr_white_nits. + let push_constants: [u32; 7] = [ self.config.width, self.config.height, self.config.input_format as u32, self.config.output_format as u32, self.config.color_space as u32, self.config.full_range as u32, + self.config.sdr_reference_white_nits.to_bits(), ]; let push_constants_bytes: &[u8] = std::slice::from_raw_parts( push_constants.as_ptr() as *const u8, @@ -989,6 +999,7 @@ mod tests { output_format: OutputFormat::NV12, color_space: ColorSpace::Bt709, full_range: true, + sdr_reference_white_nits: 203.0, }; let cloned = config.clone(); @@ -1009,6 +1020,7 @@ mod tests { output_format: OutputFormat::I420, color_space: ColorSpace::Bt709, full_range: true, + sdr_reference_white_nits: 203.0, }; let debug_str = format!("{:?}", config); @@ -1050,6 +1062,7 @@ mod tests { output_format: OutputFormat::NV12, color_space: ColorSpace::Bt709, full_range: true, + sdr_reference_white_nits: 203.0, }; let result = ColorConverter::new(context, config); @@ -1084,6 +1097,7 @@ mod tests { output_format: *output_format, color_space: ColorSpace::Bt709, full_range: true, + sdr_reference_white_nits: 203.0, }; let result = ColorConverter::new(context.clone(), config); diff --git a/src/converter/pipeline.rs b/src/converter/pipeline.rs index 58ab1be..62d217b 100644 --- a/src/converter/pipeline.rs +++ b/src/converter/pipeline.rs @@ -38,7 +38,7 @@ pub fn create_converter( let push_constant_range = vk::PushConstantRange::default() .stage_flags(vk::ShaderStageFlags::COMPUTE) .offset(0) - .size(24); // 6 x u32: width, height, input_format, output_format, color_space, full_range + .size(28); // 7 x u32: width, height, input_format, output_format, color_space, full_range, sdr_white_nits(f32) let pipeline_layout_info = vk::PipelineLayoutCreateInfo::default() .set_layouts(std::slice::from_ref(&descriptor_set_layout)) diff --git a/src/converter/shader.rs b/src/converter/shader.rs index 2a7df19..c79f433 100644 --- a/src/converter/shader.rs +++ b/src/converter/shader.rs @@ -42,6 +42,7 @@ layout(push_constant) uniform PushConstants { uint output_format; // 0=NV12, 1=I420, 2=YUV444, 3=P010, 4=YUV444P10 uint color_space; // 0=BT.709, 1=BT.2020, 2=sRGB→BT.2020+PQ uint full_range; // 0=limited/studio range, 1=full range + float sdr_white_nits; // SDR reference white (nits), used for sRGB→BT.2020+PQ } params; // Source image sampled directly — eliminates the image-to-buffer copy. @@ -129,8 +130,9 @@ vec3 read_rgb(ivec2 coord) { // sRGB→BT.2020+PQ: decode sRGB gamma → linear BT.709 → BT.2020 gamut → PQ. vec3 linear_709 = srgb_to_linear(rgba.rgb); vec3 linear_2020 = bt709_to_bt2020(linear_709); - // SDR reference white at 203 nits (ITU-R BT.2408) → normalize to PQ's 10000 nit scale. - return linear_to_pq(linear_2020 * (203.0 / 10000.0)); + // SDR reference white (configurable, default 203 nits per ITU-R BT.2408) + // normalized to PQ's 10000 nit scale. + return linear_to_pq(linear_2020 * (params.sdr_white_nits / 10000.0)); } // BT.709 or BT.2020 passthrough: values are already properly encoded. return rgba.rgb; @@ -198,20 +200,20 @@ void main() { uint pixel_count = params.width * params.height; if (params.output_format == 2u) { - // YUV444 8-bit: Full resolution, byte-packed into uints. + // YUV444 8-bit: 2-plane semi-planar (Y plane + UV interleaved). uint y_byte_idx = pixel_idx; uint y_word_idx = y_byte_idx / 4u; uint y_byte_offset = y_byte_idx % 4u; atomicOr(output_data[y_word_idx], q8_y(yuv.x) << (y_byte_offset * 8u)); - uint u_base = pixel_count; - uint u_byte_idx = u_base + pixel_idx; - uint u_word_idx = u_byte_idx / 4u; - uint u_byte_offset = u_byte_idx % 4u; - atomicOr(output_data[u_word_idx], q8_c(yuv.y) << (u_byte_offset * 8u)); + // Align UV plane offset to 4 bytes for VkBufferImageCopy compliance. + uint uv_base = (pixel_count + 3u) & ~3u; + uint uv_byte_idx = uv_base + pixel_idx * 2u; + uint uv_word_idx = uv_byte_idx / 4u; + uint uv_byte_offset = uv_byte_idx % 4u; + atomicOr(output_data[uv_word_idx], q8_c(yuv.y) << (uv_byte_offset * 8u)); - uint v_base = 2u * pixel_count; - uint v_byte_idx = v_base + pixel_idx; + uint v_byte_idx = uv_byte_idx + 1u; uint v_word_idx = v_byte_idx / 4u; uint v_byte_offset = v_byte_idx % 4u; atomicOr(output_data[v_word_idx], q8_c(yuv.z) << (v_byte_offset * 8u)); @@ -221,7 +223,8 @@ void main() { uint y_packed_idx = pixel_idx / 2u; atomicOr(output_data[y_packed_idx], q10_y(yuv.x) << (y_half_offset * 16u)); - uint uv_base_words = pixel_count / 2u; + // Align UV base to 4 bytes: Y plane is ceil(pixel_count/2) words. + uint uv_base_words = (pixel_count + 1u) / 2u; uint uv_word_idx = uv_base_words + pixel_idx; uint uv_packed = q10_c(yuv.y) | (q10_c(yuv.z) << 16u); output_data[uv_word_idx] = uv_packed; diff --git a/src/encoder/av1/api.rs b/src/encoder/av1/api.rs index 8dca6fe..224a989 100644 --- a/src/encoder/av1/api.rs +++ b/src/encoder/av1/api.rs @@ -1,7 +1,7 @@ use super::AV1Encoder; use crate::encoder::gop::{GopFrameType, GopPosition}; -use crate::encoder::EncodedPacket; +use crate::encoder::{ColorDescription, EncodedPacket}; use crate::error::{PixelForgeError, Result}; use ash::vk; use tracing::debug; @@ -207,4 +207,53 @@ impl AV1Encoder { } } } + + /// Update the color description in the AV1 sequence header. + /// + /// This recreates the video session parameters with a new sequence header + /// containing the updated color configuration. The next encoded frame will + /// be a key frame with the new sequence header prepended. + pub fn set_color_description(&mut self, desc: ColorDescription) -> Result<()> { + // Wait for any in-flight encode to complete before modifying session params. + // Do NOT reset the fence here — submit_encode_and_read_bitstream() resets it + // before queue_submit. Leaving the fence signaled allows consecutive + // set_color_description() calls without deadlock. + unsafe { + self.context + .device() + .wait_for_fences(&[self.encode_fence], true, u64::MAX) + .map_err(|e| { + PixelForgeError::Synchronization(format!( + "Failed to wait for encode fence: {:?}", + e + )) + })?; + } + + // Save old handle so we can destroy it after successful creation. + let old_session_params = self.session_params; + + let new_session_params = self.create_session_params(&desc)?; + + // Destroy old session parameters now that the new ones are created. + unsafe { + self.video_queue_fn + .destroy_video_session_parameters(old_session_params, None); + } + + self.session_params = new_session_params; + self.config.color_description = Some(desc); + self.header_data = None; // Invalidate cached sequence header + self.gop.request_idr(); + + debug!( + "AV1 color description updated: primaries={}, transfer={}, matrix={}, full_range={}", + desc.color_primaries, + desc.transfer_characteristics, + desc.matrix_coefficients, + desc.full_range + ); + + Ok(()) + } } diff --git a/src/encoder/av1/init.rs b/src/encoder/av1/init.rs index 3c43171..1a59b30 100644 --- a/src/encoder/av1/init.rs +++ b/src/encoder/av1/init.rs @@ -6,7 +6,7 @@ use crate::encoder::resources::{ create_dpb_images, create_image, get_video_format, make_codec_name, map_bitstream_buffer, query_supported_video_formats, ClearImageParams, }; -use crate::encoder::PixelFormat; +use crate::encoder::{ColorDescription, PixelFormat}; use crate::error::{PixelForgeError, Result}; use crate::vulkan::VideoContext; use ash::vk; @@ -276,187 +276,10 @@ impl AV1Encoder { // Allocate session memory. let session_memory = allocate_session_memory(&context, session, &video_queue_fn)?; - // Create AV1 sequence header - similar to H.265 VPS/SPS/PPS but for AV1. - // Calculate frame dimension representation bits. - // Use actual display dimensions for sequence header (not coded extent). - // The video session's max_coded_extent is the upper bound for alignment, - // but the sequence header and per-frame coded extents use display dimensions. - let frame_width_bits = 32 - (width - 1).leading_zeros(); - let frame_height_bits = 32 - (height - 1).leading_zeros(); - - // AV1 color configuration. - // Map ColorDescription to AV1 enum constants, defaulting to BT.709. - let (av1_color_primaries, av1_transfer, av1_matrix, av1_full_range) = if let Some(cd) = - &config.color_description - { - let primaries = match cd.color_primaries { - 9 => { - ash::vk::native::StdVideoAV1ColorPrimaries_STD_VIDEO_AV1_COLOR_PRIMARIES_BT_2020 - } - _ => { - ash::vk::native::StdVideoAV1ColorPrimaries_STD_VIDEO_AV1_COLOR_PRIMARIES_BT_709 - } - }; - let transfer = match cd.transfer_characteristics { - 16 => ash::vk::native::StdVideoAV1TransferCharacteristics_STD_VIDEO_AV1_TRANSFER_CHARACTERISTICS_SMPTE_2084, - _ => ash::vk::native::StdVideoAV1TransferCharacteristics_STD_VIDEO_AV1_TRANSFER_CHARACTERISTICS_BT_709, - }; - let matrix = match cd.matrix_coefficients { - 9 => ash::vk::native::StdVideoAV1MatrixCoefficients_STD_VIDEO_AV1_MATRIX_COEFFICIENTS_BT_2020_NCL, - _ => ash::vk::native::StdVideoAV1MatrixCoefficients_STD_VIDEO_AV1_MATRIX_COEFFICIENTS_BT_709, - }; - let full_range = if cd.full_range { 1 } else { 0 }; - (primaries, transfer, matrix, full_range) - } else { - ( - ash::vk::native::StdVideoAV1ColorPrimaries_STD_VIDEO_AV1_COLOR_PRIMARIES_BT_709, - ash::vk::native::StdVideoAV1TransferCharacteristics_STD_VIDEO_AV1_TRANSFER_CHARACTERISTICS_BT_709, - ash::vk::native::StdVideoAV1MatrixCoefficients_STD_VIDEO_AV1_MATRIX_COEFFICIENTS_BT_709, - 1, // full range for SDR - ) - }; - - let color_config_flags = ash::vk::native::StdVideoAV1ColorConfigFlags { - _bitfield_align_1: [], - _bitfield_1: ash::vk::native::StdVideoAV1ColorConfigFlags::new_bitfield_1( - 0, // mono_chrome - av1_full_range, // color_range - 0, // separate_uv_delta_q - 1, // color_description_present_flag - 0, // reserved - ), - }; - - // Bit depth: 8 for Eight, 10 for Ten - let bit_depth = match config.bit_depth { - crate::encoder::BitDepth::Eight => 8, - crate::encoder::BitDepth::Ten => 10, - }; - - // Chroma subsampling based on pixel format. - let (subsampling_x, subsampling_y) = match config.pixel_format { - PixelFormat::Yuv420 => (1u8, 1u8), // 4:2:0 - PixelFormat::Yuv444 => (0u8, 0u8), // 4:4:4 - _ => (1u8, 1u8), // Default to 4:2:0 - }; - - let color_config = ash::vk::native::StdVideoAV1ColorConfig { - flags: color_config_flags, - BitDepth: bit_depth, - subsampling_x, - subsampling_y, - reserved1: 0, - color_primaries: av1_color_primaries, - transfer_characteristics: av1_transfer, - matrix_coefficients: av1_matrix, - chroma_sample_position: ash::vk::native::StdVideoAV1ChromaSamplePosition_STD_VIDEO_AV1_CHROMA_SAMPLE_POSITION_UNKNOWN, - }; - - // AV1 sequence header flags - use minimal set to avoid driver issues. - // Disable features we're not providing data for (restoration, most inter-frame features). - let seq_flags = ash::vk::native::StdVideoAV1SequenceHeaderFlags { - _bitfield_align_1: [], - _bitfield_1: ash::vk::native::StdVideoAV1SequenceHeaderFlags::new_bitfield_1( - 0, // still_picture - 0, // reduced_still_picture_header - 0, // use_128x128_superblock (use 64x64 superblocks) - 0, // enable_filter_intra - disable for simplicity - 0, // enable_intra_edge_filter - disable for simplicity - 0, // enable_interintra_compound - 0, // enable_masked_compound - 0, // enable_warped_motion - disable for simplicity - 0, // enable_dual_filter - disable for simplicity - 1, // enable_order_hint - keep for reference frames - 0, // enable_jnt_comp - 0, // enable_ref_frame_mvs - 0, // frame_id_numbers_present_flag - 0, // enable_superres - 1, // enable_cdef - keep enabled - 0, // enable_restoration - DISABLE (we don't provide restoration data) - 0, // film_grain_params_present - 0, // timing_info_present_flag - 0, // initial_display_delay_present_flag - 0, // reserved - ), - }; - - let av1_sequence_header = ash::vk::native::StdVideoAV1SequenceHeader { - flags: seq_flags, - seq_profile: profile, - frame_width_bits_minus_1: (frame_width_bits - 1) as u8, - frame_height_bits_minus_1: (frame_height_bits - 1) as u8, - max_frame_width_minus_1: (width - 1) as u16, - max_frame_height_minus_1: (height - 1) as u16, - delta_frame_id_length_minus_2: 0, - additional_frame_id_length_minus_1: 0, - order_hint_bits_minus_1: 7, // 8 bits for order hint - seq_force_integer_mv: 0, - seq_force_screen_content_tools: 0, - reserved1: [0; 5], - pColorConfig: &color_config, - pTimingInfo: ptr::null(), // No timing info - }; + let color_desc = config + .color_description + .unwrap_or(ColorDescription::bt709()); - // Create decoder model info (zero-initialized like FFmpeg). - let decoder_model_info = ash::vk::native::StdVideoEncodeAV1DecoderModelInfo { - buffer_delay_length_minus_1: 0, - buffer_removal_time_length_minus_1: 0, - frame_presentation_time_length_minus_1: 0, - reserved1: 0, - num_units_in_decoding_tick: 0, - }; - - // Create operating point info (single operating point like FFmpeg). - let operating_point = ash::vk::native::StdVideoEncodeAV1OperatingPointInfo { - flags: ash::vk::native::StdVideoEncodeAV1OperatingPointInfoFlags { - _bitfield_align_1: [], - _bitfield_1: - ash::vk::native::StdVideoEncodeAV1OperatingPointInfoFlags::new_bitfield_1( - 0, // decoder_model_present_for_this_op - 0, // low_delay_mode_flag - 0, // initial_display_delay_present_for_this_op - 0, // reserved - ), - }, - operating_point_idc: 0, - seq_level_idx: 5, // Level 3.1 (encoded as: 2.0=0, 2.1=1, ... 3.0=4, 3.1=5) - seq_tier: 0, - initial_display_delay_minus_1: 0, - decoder_buffer_delay: 0, - encoder_buffer_delay: 0, - }; - - // Create session parameters with all required structures (matching FFmpeg). - let mut av1_session_params_create_info = - vk::VideoEncodeAV1SessionParametersCreateInfoKHR::default() - .std_sequence_header(&av1_sequence_header) - .std_decoder_model_info(&decoder_model_info) - .std_operating_points(std::slice::from_ref(&operating_point)); - - // Add quality level info to pNext chain (matching FFmpeg). - // Chain: SessionParametersCreateInfo -> QualityLevelInfo -> AV1SessionParametersCreateInfo - let mut quality_info = vk::VideoEncodeQualityLevelInfoKHR::default().quality_level(0); // Default quality level - - quality_info.p_next = (&mut av1_session_params_create_info - as *mut vk::VideoEncodeAV1SessionParametersCreateInfoKHR) - .cast(); - - let session_params_create_info = vk::VideoSessionParametersCreateInfoKHR { - video_session: session, - p_next: (&mut quality_info as *mut vk::VideoEncodeQualityLevelInfoKHR).cast(), - ..Default::default() - }; - - let session_params = unsafe { - video_queue_fn - .create_video_session_parameters(&session_params_create_info, None) - .map_err(|e| { - PixelForgeError::SessionParametersCreation(format!( - "Failed to create AV1 session parameters: {:?}", - e - )) - })? - }; // Create input image. let (input_image, input_image_memory, input_image_view) = create_image( &context, @@ -535,14 +358,14 @@ impl AV1Encoder { // Initialize GOP structure. let gop = GopStructure::new(config.gop_size, config.b_frame_count, config.gop_size); - Ok(Self { + let mut encoder = Self { context, config, gop, video_queue_fn, video_encode_fn, session, - session_params, + session_params: vk::VideoSessionParametersKHR::null(), session_memory, input_frame_num: 0, encode_frame_num: 0, @@ -571,6 +394,10 @@ impl AV1Encoder { header_data: None, current_dpb_slot: 0, references: Vec::new(), - }) + }; + + encoder.session_params = encoder.create_session_params(&color_desc)?; + + Ok(encoder) } } diff --git a/src/encoder/av1/mod.rs b/src/encoder/av1/mod.rs index 8d996de..2edcde4 100644 --- a/src/encoder/av1/mod.rs +++ b/src/encoder/av1/mod.rs @@ -5,6 +5,7 @@ mod api; mod encode; mod init; +mod session_params; use ash::vk; use tracing::debug; @@ -173,8 +174,10 @@ impl Drop for AV1Encoder { .free_memory(self.dpb_image_memories[i], None); } - self.video_queue_fn - .destroy_video_session_parameters(self.session_params, None); + if self.session_params != vk::VideoSessionParametersKHR::null() { + self.video_queue_fn + .destroy_video_session_parameters(self.session_params, None); + } self.video_queue_fn .destroy_video_session(self.session, None); for mem in &self.session_memory { diff --git a/src/encoder/av1/session_params.rs b/src/encoder/av1/session_params.rs new file mode 100644 index 0000000..cb517b1 --- /dev/null +++ b/src/encoder/av1/session_params.rs @@ -0,0 +1,180 @@ +use super::AV1Encoder; + +use crate::encoder::{BitDepth, ColorDescription, PixelFormat}; +use crate::error::{PixelForgeError, Result}; +use ash::vk; +use std::ptr; + +impl AV1Encoder { + /// Build AV1 sequence header and create Vulkan video session parameters. + /// + /// This is used both during initial encoder creation and by + /// `set_color_description()` to rebuild session parameters with + /// updated color configuration. Keeping a single implementation + /// ensures the sequence header stays bit-for-bit consistent. + pub(crate) fn create_session_params( + &self, + desc: &ColorDescription, + ) -> Result { + let width = self.config.dimensions.width; + let height = self.config.dimensions.height; + + let frame_width_bits = 32 - (width - 1).leading_zeros(); + let frame_height_bits = 32 - (height - 1).leading_zeros(); + + // Map ColorDescription to AV1 enum constants. + let av1_color_primaries = match desc.color_primaries { + 9 => ash::vk::native::StdVideoAV1ColorPrimaries_STD_VIDEO_AV1_COLOR_PRIMARIES_BT_2020, + _ => ash::vk::native::StdVideoAV1ColorPrimaries_STD_VIDEO_AV1_COLOR_PRIMARIES_BT_709, + }; + let av1_transfer = match desc.transfer_characteristics { + 16 => ash::vk::native::StdVideoAV1TransferCharacteristics_STD_VIDEO_AV1_TRANSFER_CHARACTERISTICS_SMPTE_2084, + _ => ash::vk::native::StdVideoAV1TransferCharacteristics_STD_VIDEO_AV1_TRANSFER_CHARACTERISTICS_BT_709, + }; + let av1_matrix = match desc.matrix_coefficients { + 9 => ash::vk::native::StdVideoAV1MatrixCoefficients_STD_VIDEO_AV1_MATRIX_COEFFICIENTS_BT_2020_NCL, + _ => ash::vk::native::StdVideoAV1MatrixCoefficients_STD_VIDEO_AV1_MATRIX_COEFFICIENTS_BT_709, + }; + let av1_full_range = if desc.full_range { 1 } else { 0 }; + + let color_config_flags = ash::vk::native::StdVideoAV1ColorConfigFlags { + _bitfield_align_1: [], + _bitfield_1: ash::vk::native::StdVideoAV1ColorConfigFlags::new_bitfield_1( + 0, // mono_chrome + av1_full_range, // color_range + 0, // separate_uv_delta_q + 1, // color_description_present_flag + 0, // reserved + ), + }; + + let bit_depth = match self.config.bit_depth { + BitDepth::Eight => 8, + BitDepth::Ten => 10, + }; + + // Chroma subsampling based on pixel format. + let (subsampling_x, subsampling_y) = match self.config.pixel_format { + PixelFormat::Yuv420 => (1u8, 1u8), // 4:2:0 + PixelFormat::Yuv444 => (0u8, 0u8), // 4:4:4 + _ => (1u8, 1u8), // Default to 4:2:0 + }; + + let color_config = ash::vk::native::StdVideoAV1ColorConfig { + flags: color_config_flags, + BitDepth: bit_depth, + subsampling_x, + subsampling_y, + reserved1: 0, + color_primaries: av1_color_primaries, + transfer_characteristics: av1_transfer, + matrix_coefficients: av1_matrix, + chroma_sample_position: ash::vk::native::StdVideoAV1ChromaSamplePosition_STD_VIDEO_AV1_CHROMA_SAMPLE_POSITION_UNKNOWN, + }; + + let profile = match self.config.pixel_format { + PixelFormat::Yuv420 => ash::vk::native::StdVideoAV1Profile_STD_VIDEO_AV1_PROFILE_MAIN, + _ => ash::vk::native::StdVideoAV1Profile_STD_VIDEO_AV1_PROFILE_HIGH, + }; + + // AV1 sequence header flags - use minimal set to avoid driver issues. + let seq_flags = ash::vk::native::StdVideoAV1SequenceHeaderFlags { + _bitfield_align_1: [], + _bitfield_1: ash::vk::native::StdVideoAV1SequenceHeaderFlags::new_bitfield_1( + 0, // still_picture + 0, // reduced_still_picture_header + 0, // use_128x128_superblock (use 64x64 superblocks) + 0, // enable_filter_intra + 0, // enable_intra_edge_filter + 0, // enable_interintra_compound + 0, // enable_masked_compound + 0, // enable_warped_motion + 0, // enable_dual_filter + 1, // enable_order_hint + 0, // enable_jnt_comp + 0, // enable_ref_frame_mvs + 0, // frame_id_numbers_present_flag + 0, // enable_superres + 1, // enable_cdef + 0, // enable_restoration + 0, // film_grain_params_present + 0, // timing_info_present_flag + 0, // initial_display_delay_present_flag + 0, // reserved + ), + }; + + let av1_sequence_header = ash::vk::native::StdVideoAV1SequenceHeader { + flags: seq_flags, + seq_profile: profile, + frame_width_bits_minus_1: (frame_width_bits - 1) as u8, + frame_height_bits_minus_1: (frame_height_bits - 1) as u8, + max_frame_width_minus_1: (width - 1) as u16, + max_frame_height_minus_1: (height - 1) as u16, + delta_frame_id_length_minus_2: 0, + additional_frame_id_length_minus_1: 0, + order_hint_bits_minus_1: 7, + seq_force_integer_mv: 0, + seq_force_screen_content_tools: 0, + reserved1: [0; 5], + pColorConfig: &color_config, + pTimingInfo: ptr::null(), + }; + + // Create decoder model info (zero-initialized like FFmpeg). + let decoder_model_info = ash::vk::native::StdVideoEncodeAV1DecoderModelInfo { + buffer_delay_length_minus_1: 0, + buffer_removal_time_length_minus_1: 0, + frame_presentation_time_length_minus_1: 0, + reserved1: 0, + num_units_in_decoding_tick: 0, + }; + + // Create operating point info (single operating point like FFmpeg). + let operating_point = ash::vk::native::StdVideoEncodeAV1OperatingPointInfo { + flags: ash::vk::native::StdVideoEncodeAV1OperatingPointInfoFlags { + _bitfield_align_1: [], + _bitfield_1: + ash::vk::native::StdVideoEncodeAV1OperatingPointInfoFlags::new_bitfield_1( + 0, 0, 0, 0, + ), + }, + operating_point_idc: 0, + seq_level_idx: 5, // Level 3.1 + seq_tier: 0, + initial_display_delay_minus_1: 0, + decoder_buffer_delay: 0, + encoder_buffer_delay: 0, + }; + + let mut av1_session_params_create_info = + vk::VideoEncodeAV1SessionParametersCreateInfoKHR::default() + .std_sequence_header(&av1_sequence_header) + .std_decoder_model_info(&decoder_model_info) + .std_operating_points(std::slice::from_ref(&operating_point)); + + let mut quality_info = vk::VideoEncodeQualityLevelInfoKHR::default().quality_level(0); + quality_info.p_next = (&mut av1_session_params_create_info + as *mut vk::VideoEncodeAV1SessionParametersCreateInfoKHR) + .cast(); + + let session_params_create_info = vk::VideoSessionParametersCreateInfoKHR { + video_session: self.session, + p_next: (&mut quality_info as *mut vk::VideoEncodeQualityLevelInfoKHR).cast(), + ..Default::default() + }; + + let session_params = unsafe { + self.video_queue_fn + .create_video_session_parameters(&session_params_create_info, None) + .map_err(|e| { + PixelForgeError::SessionParametersCreation(format!( + "Failed to create AV1 session parameters: {:?}", + e + )) + })? + }; + + Ok(session_params) + } +} diff --git a/src/encoder/h264/api.rs b/src/encoder/h264/api.rs index 2b37ac5..f41a080 100644 --- a/src/encoder/h264/api.rs +++ b/src/encoder/h264/api.rs @@ -2,7 +2,7 @@ use super::H264Encoder; use crate::encoder::dpb::{DecodedPictureBufferTrait, DpbConfig, PictureStartInfo, PictureType}; use crate::encoder::gop::{GopFrameType, GopPosition}; -use crate::encoder::EncodedPacket; +use crate::encoder::{ColorDescription, EncodedPacket}; use crate::error::Result; use crate::PixelForgeError; use ash::vk; @@ -237,4 +237,59 @@ impl H264Encoder { } } } + + /// Update the color description (VUI parameters) in the encoded stream. + /// + /// This recreates the video session parameters with a new SPS containing the + /// updated VUI color primaries, transfer characteristics, and matrix coefficients. + /// The next encoded frame will be an IDR with the new SPS/PPS prepended. + pub fn set_color_description(&mut self, desc: ColorDescription) -> Result<()> { + // Wait for any in-flight encode to complete before modifying session params. + // Do NOT reset the fence here — submit_encode_and_read_bitstream() resets it + // before queue_submit. Leaving the fence signaled allows consecutive + // set_color_description() calls without deadlock. + unsafe { + self.context + .device() + .wait_for_fences(&[self.encode_fence], true, u64::MAX) + .map_err(|e| { + PixelForgeError::Synchronization(format!( + "Failed to wait for encode fence: {:?}", + e + )) + })?; + } + + // Save old handle so we can destroy it after successful creation. + let old_session_params = self.session_params; + + let new_session_params = self.create_session_params(&desc)?; + + // Destroy old session parameters now that the new ones are created. + unsafe { + (self + .video_queue_fn + .fp() + .destroy_video_session_parameters_khr)( + self.context.device().handle(), + old_session_params, + std::ptr::null(), + ); + } + + self.session_params = new_session_params; + self.config.color_description = Some(desc); + self.sps_written = false; + self.gop.request_idr(); + + debug!( + "H.264 color description updated: primaries={}, transfer={}, matrix={}, full_range={}", + desc.color_primaries, + desc.transfer_characteristics, + desc.matrix_coefficients, + desc.full_range + ); + + Ok(()) + } } diff --git a/src/encoder/h264/init.rs b/src/encoder/h264/init.rs index 5fdd5aa..1ccb373 100644 --- a/src/encoder/h264/init.rs +++ b/src/encoder/h264/init.rs @@ -403,173 +403,9 @@ impl H264Encoder { ))); } - let frame_crop_right = crop_right_pixels / crop_unit_x; - let frame_crop_bottom = crop_bottom_pixels / crop_unit_y; - - let constraint_set3_flag = 0; - - let mut sps_flags: ash::vk::native::StdVideoH264SpsFlags = unsafe { std::mem::zeroed() }; - sps_flags.set_constraint_set3_flag(constraint_set3_flag); - sps_flags.set_direct_8x8_inference_flag(1); - sps_flags.set_frame_mbs_only_flag(1); - if frame_crop_right > 0 || frame_crop_bottom > 0 { - sps_flags.set_frame_cropping_flag(1); - } - sps_flags.set_vui_parameters_present_flag(1); - - // Get chroma_format_idc based on pixel format. - let chroma_format_idc = match config.pixel_format { - PixelFormat::Yuv420 => { - ash::vk::native::StdVideoH264ChromaFormatIdc_STD_VIDEO_H264_CHROMA_FORMAT_IDC_420 - } - PixelFormat::Yuv444 => { - ash::vk::native::StdVideoH264ChromaFormatIdc_STD_VIDEO_H264_CHROMA_FORMAT_IDC_444 - } - _ => unreachable!("Pixel format validated above"), - }; - - let (bit_depth_luma_minus8, bit_depth_chroma_minus8) = match config.bit_depth { - crate::encoder::BitDepth::Eight => (0u8, 0u8), - crate::encoder::BitDepth::Ten => (2u8, 2u8), - }; - - let mut vui_flags: ash::vk::native::StdVideoH264SpsVuiFlags = unsafe { std::mem::zeroed() }; - vui_flags.set_aspect_ratio_info_present_flag(1); - vui_flags.set_video_signal_type_present_flag(1); let color_desc = config .color_description .unwrap_or(ColorDescription::bt709()); - vui_flags.set_video_full_range_flag(if color_desc.full_range { 1 } else { 0 }); - vui_flags.set_color_description_present_flag(1); - // Do not set HRD parameters when rate control is disabled/CQP. - // HRD with zeroed bitrate values causes device loss on some drivers (AMD). - vui_flags.set_nal_hrd_parameters_present_flag(0); - vui_flags.set_bitstream_restriction_flag(1); - - let vui = ash::vk::native::StdVideoH264SequenceParameterSetVui { - flags: vui_flags, - aspect_ratio_idc: - ash::vk::native::StdVideoH264AspectRatioIdc_STD_VIDEO_H264_ASPECT_RATIO_IDC_SQUARE, - sar_width: 0, - sar_height: 0, - video_format: 5, - colour_primaries: color_desc.color_primaries, - transfer_characteristics: color_desc.transfer_characteristics, - matrix_coefficients: color_desc.matrix_coefficients, - num_units_in_tick: 0, - time_scale: 0, - max_num_reorder_frames: if config.b_frame_count > 0 { 1 } else { 0 }, - max_dec_frame_buffering: (max_active_reference_pictures + 1) as u8, - chroma_sample_loc_type_top_field: 0, - chroma_sample_loc_type_bottom_field: 0, - reserved1: 0, - pHrdParameters: ptr::null(), - }; - - let sps = ash::vk::native::StdVideoH264SequenceParameterSet { - flags: sps_flags, - profile_idc, - level_idc: ash::vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_4_1, - chroma_format_idc, - seq_parameter_set_id: 0, - bit_depth_luma_minus8, - bit_depth_chroma_minus8, - log2_max_frame_num_minus4: 4, - pic_order_cnt_type: if config.b_frame_count > 0 { - ash::vk::native::StdVideoH264PocType_STD_VIDEO_H264_POC_TYPE_0 - } else { - ash::vk::native::StdVideoH264PocType_STD_VIDEO_H264_POC_TYPE_2 - }, - offset_for_non_ref_pic: 0, - offset_for_top_to_bottom_field: 0, - log2_max_pic_order_cnt_lsb_minus4: 4, - num_ref_frames_in_pic_order_cnt_cycle: 0, - max_num_ref_frames: max_active_reference_pictures as u8, - reserved1: 0, - pic_width_in_mbs_minus1: pic_width_in_mbs - 1, - pic_height_in_map_units_minus1: pic_height_in_map_units - 1, - frame_crop_left_offset: 0, - frame_crop_right_offset: frame_crop_right, - frame_crop_top_offset: 0, - frame_crop_bottom_offset: frame_crop_bottom, - reserved2: 0, - pOffsetForRefFrame: ptr::null(), - pScalingLists: ptr::null(), - pSequenceParameterSetVui: &vui, - }; - - let mut pps_flags: ash::vk::native::StdVideoH264PpsFlags = unsafe { std::mem::zeroed() }; - // Enable 8x8 transform for High profile and above (required by some - // drivers for High 4:4:4 Predictive SPS/PPS generation). - let transform_8x8 = - profile_idc >= ash::vk::native::StdVideoH264ProfileIdc_STD_VIDEO_H264_PROFILE_IDC_HIGH; - pps_flags.set_transform_8x8_mode_flag(transform_8x8 as u32); - // Use the driver's preferred entropy coding mode from quality level properties. - // Some drivers (e.g., NVIDIA for H.264 High 4:4:4 Predictive) require CAVLC. - pps_flags.set_entropy_coding_mode_flag(preferred_entropy_cabac as u32); - pps_flags.set_deblocking_filter_control_present_flag(1); - - // vk_video_samples sets chroma QP offsets to 6 for 4:4:4 unless lossless. - // This improves driver compatibility for SPS/PPS generation. - let (chroma_qp_index_offset, second_chroma_qp_index_offset) = match config.pixel_format { - PixelFormat::Yuv444 => (6i8, 6i8), - _ => (0i8, 0i8), - }; - - let pps = ash::vk::native::StdVideoH264PictureParameterSet { - flags: pps_flags, - seq_parameter_set_id: 0, - pic_parameter_set_id: 0, - num_ref_idx_l0_default_active_minus1: (max_active_reference_pictures as i8 - 1).max(0) as u8, - num_ref_idx_l1_default_active_minus1: 0, - weighted_bipred_idc: ash::vk::native::StdVideoH264WeightedBipredIdc_STD_VIDEO_H264_WEIGHTED_BIPRED_IDC_DEFAULT, - pic_init_qp_minus26: 0, - pic_init_qs_minus26: 0, - chroma_qp_index_offset, - second_chroma_qp_index_offset, - pScalingLists: ptr::null(), - }; - - let sps_array = [sps]; - let pps_array = [pps]; - - let h264_add_info = vk::VideoEncodeH264SessionParametersAddInfoKHR::default() - .std_sp_ss(&sps_array) - .std_pp_ss(&pps_array); - - let mut h264_params_create_info = - vk::VideoEncodeH264SessionParametersCreateInfoKHR::default() - .max_std_sps_count(1) - .max_std_pps_count(1) - .parameters_add_info(&h264_add_info); - - // Chain quality level info into session parameters creation. - // This is required by AMD RADV and matches FFmpeg's approach. - let mut quality_level_info = vk::VideoEncodeQualityLevelInfoKHR::default().quality_level(0); // Use quality level 0 (best quality). - quality_level_info.p_next = (&mut h264_params_create_info - as *mut vk::VideoEncodeH264SessionParametersCreateInfoKHR) - .cast(); - - let mut params_create_info = - vk::VideoSessionParametersCreateInfoKHR::default().video_session(session); - params_create_info.p_next = - (&mut quality_level_info as *mut vk::VideoEncodeQualityLevelInfoKHR).cast(); - - let mut session_params = vk::VideoSessionParametersKHR::null(); - let result = unsafe { - (video_queue_fn.fp().create_video_session_parameters_khr)( - context.device().handle(), - ¶ms_create_info, - ptr::null(), - &mut session_params, - ) - }; - if result != vk::Result::SUCCESS { - return Err(PixelForgeError::SessionParametersCreation(format!( - "{:?}", - result - ))); - } // Create profile info for images/buffers. let mut h264_profile_for_resources = @@ -713,7 +549,7 @@ impl H264Encoder { info!("H.264 encoder created successfully"); - Ok(Self { + let mut encoder = Self { context, config: config.clone(), dpb, @@ -723,7 +559,7 @@ impl H264Encoder { video_queue_fn, video_encode_fn, session, - session_params, + session_params: vk::VideoSessionParametersKHR::null(), session_memory, input_frame_num: 0, encode_frame_num: 0, @@ -760,6 +596,12 @@ impl H264Encoder { backward_reference_frame_num: 0, backward_reference_poc: 0, backward_reference_dpb_slot: 2, - }) + profile_idc, + preferred_entropy_cabac, + }; + + encoder.session_params = encoder.create_session_params(&color_desc)?; + + Ok(encoder) } } diff --git a/src/encoder/h264/mod.rs b/src/encoder/h264/mod.rs index d2c69db..3d339d3 100644 --- a/src/encoder/h264/mod.rs +++ b/src/encoder/h264/mod.rs @@ -5,6 +5,7 @@ mod api; mod encode; mod init; +mod session_params; use ash::vk; use tracing::debug; @@ -102,6 +103,10 @@ pub struct H264Encoder { l0_references: Vec, /// Number of active reference frames (as configured/negotiated). active_reference_count: u32, + /// H.264 profile IDC (cached from initialization for session parameter recreation). + profile_idc: u32, + /// Whether CABAC entropy coding is preferred (cached from quality level query). + preferred_entropy_cabac: bool, } impl H264Encoder { diff --git a/src/encoder/h264/session_params.rs b/src/encoder/h264/session_params.rs new file mode 100644 index 0000000..314bf7b --- /dev/null +++ b/src/encoder/h264/session_params.rs @@ -0,0 +1,210 @@ +use super::H264Encoder; + +use crate::encoder::{BitDepth, ColorDescription, PixelFormat}; +use crate::error::{PixelForgeError, Result}; +use ash::vk; +use std::ptr; + +impl H264Encoder { + /// Build SPS/PPS and create Vulkan video session parameters. + /// + /// This is used both during initial encoder creation and by + /// `set_color_description()` to rebuild session parameters with + /// updated VUI color metadata. Keeping a single implementation + /// ensures the parameter sets stay bit-for-bit consistent. + pub(crate) fn create_session_params( + &self, + desc: &ColorDescription, + ) -> Result { + let width = self.config.dimensions.width; + let height = self.config.dimensions.height; + + let pic_width_in_mbs = self.aligned_width / 16; + let pic_height_in_map_units = self.aligned_height / 16; + + // Cropping offsets are expressed in units that depend on chroma subsampling. + // For progressive frames (frame_mbs_only_flag=1): + // - 4:2:0 => crop_unit_x=2, crop_unit_y=2 + // - 4:4:4 => crop_unit_x=1, crop_unit_y=1 + let (crop_unit_x, crop_unit_y) = match self.config.pixel_format { + PixelFormat::Yuv420 => (2u32, 2u32), + PixelFormat::Yuv444 => (1u32, 1u32), + _ => { + return Err(PixelForgeError::InvalidInput(format!( + "Unsupported pixel format for H.264: {:?}", + self.config.pixel_format + ))); + } + }; + + let coded_width = pic_width_in_mbs * 16; + let coded_height = pic_height_in_map_units * 16; + let frame_crop_right = coded_width.saturating_sub(width) / crop_unit_x; + let frame_crop_bottom = coded_height.saturating_sub(height) / crop_unit_y; + + let mut sps_flags: ash::vk::native::StdVideoH264SpsFlags = unsafe { std::mem::zeroed() }; + sps_flags.set_constraint_set3_flag(0); + sps_flags.set_direct_8x8_inference_flag(1); + sps_flags.set_frame_mbs_only_flag(1); + if frame_crop_right > 0 || frame_crop_bottom > 0 { + sps_flags.set_frame_cropping_flag(1); + } + sps_flags.set_vui_parameters_present_flag(1); + + let chroma_format_idc = match self.config.pixel_format { + PixelFormat::Yuv420 => { + ash::vk::native::StdVideoH264ChromaFormatIdc_STD_VIDEO_H264_CHROMA_FORMAT_IDC_420 + } + PixelFormat::Yuv444 => { + ash::vk::native::StdVideoH264ChromaFormatIdc_STD_VIDEO_H264_CHROMA_FORMAT_IDC_444 + } + _ => unreachable!("Pixel format validated above"), + }; + + let (bit_depth_luma_minus8, bit_depth_chroma_minus8) = match self.config.bit_depth { + BitDepth::Eight => (0u8, 0u8), + BitDepth::Ten => (2u8, 2u8), + }; + + let max_active = self.active_reference_count as u8; + + let mut vui_flags: ash::vk::native::StdVideoH264SpsVuiFlags = unsafe { std::mem::zeroed() }; + vui_flags.set_aspect_ratio_info_present_flag(1); + vui_flags.set_video_signal_type_present_flag(1); + vui_flags.set_video_full_range_flag(if desc.full_range { 1 } else { 0 }); + vui_flags.set_color_description_present_flag(1); + // Do not set HRD parameters when rate control is disabled/CQP. + // HRD with zeroed bitrate values causes device loss on some drivers (AMD). + vui_flags.set_nal_hrd_parameters_present_flag(0); + vui_flags.set_bitstream_restriction_flag(1); + + let vui = ash::vk::native::StdVideoH264SequenceParameterSetVui { + flags: vui_flags, + aspect_ratio_idc: + ash::vk::native::StdVideoH264AspectRatioIdc_STD_VIDEO_H264_ASPECT_RATIO_IDC_SQUARE, + sar_width: 0, + sar_height: 0, + video_format: 5, + colour_primaries: desc.color_primaries, + transfer_characteristics: desc.transfer_characteristics, + matrix_coefficients: desc.matrix_coefficients, + num_units_in_tick: 0, + time_scale: 0, + max_num_reorder_frames: if self.config.b_frame_count > 0 { 1 } else { 0 }, + max_dec_frame_buffering: max_active + 1, + chroma_sample_loc_type_top_field: 0, + chroma_sample_loc_type_bottom_field: 0, + reserved1: 0, + pHrdParameters: ptr::null(), + }; + + let sps = ash::vk::native::StdVideoH264SequenceParameterSet { + flags: sps_flags, + profile_idc: self.profile_idc, + level_idc: ash::vk::native::StdVideoH264LevelIdc_STD_VIDEO_H264_LEVEL_IDC_4_1, + chroma_format_idc, + seq_parameter_set_id: 0, + bit_depth_luma_minus8, + bit_depth_chroma_minus8, + log2_max_frame_num_minus4: 4, + pic_order_cnt_type: if self.config.b_frame_count > 0 { + ash::vk::native::StdVideoH264PocType_STD_VIDEO_H264_POC_TYPE_0 + } else { + ash::vk::native::StdVideoH264PocType_STD_VIDEO_H264_POC_TYPE_2 + }, + offset_for_non_ref_pic: 0, + offset_for_top_to_bottom_field: 0, + log2_max_pic_order_cnt_lsb_minus4: 4, + num_ref_frames_in_pic_order_cnt_cycle: 0, + max_num_ref_frames: max_active, + reserved1: 0, + pic_width_in_mbs_minus1: pic_width_in_mbs - 1, + pic_height_in_map_units_minus1: pic_height_in_map_units - 1, + frame_crop_left_offset: 0, + frame_crop_right_offset: frame_crop_right, + frame_crop_top_offset: 0, + frame_crop_bottom_offset: frame_crop_bottom, + reserved2: 0, + pOffsetForRefFrame: ptr::null(), + pScalingLists: ptr::null(), + pSequenceParameterSetVui: &vui, + }; + + let mut pps_flags: ash::vk::native::StdVideoH264PpsFlags = unsafe { std::mem::zeroed() }; + // Enable 8x8 transform for High profile and above (required by some + // drivers for High 4:4:4 Predictive SPS/PPS generation). + let transform_8x8 = self.profile_idc + >= ash::vk::native::StdVideoH264ProfileIdc_STD_VIDEO_H264_PROFILE_IDC_HIGH; + pps_flags.set_transform_8x8_mode_flag(transform_8x8 as u32); + // Use the driver's preferred entropy coding mode from quality level properties. + // Some drivers (e.g., NVIDIA for H.264 High 4:4:4 Predictive) require CAVLC. + pps_flags.set_entropy_coding_mode_flag(self.preferred_entropy_cabac as u32); + pps_flags.set_deblocking_filter_control_present_flag(1); + + // vk_video_samples sets chroma QP offsets to 6 for 4:4:4 unless lossless. + // This improves driver compatibility for SPS/PPS generation. + let (chroma_qp_index_offset, second_chroma_qp_index_offset) = match self.config.pixel_format + { + PixelFormat::Yuv444 => (6i8, 6i8), + _ => (0i8, 0i8), + }; + + let pps = ash::vk::native::StdVideoH264PictureParameterSet { + flags: pps_flags, + seq_parameter_set_id: 0, + pic_parameter_set_id: 0, + num_ref_idx_l0_default_active_minus1: (max_active as i8 - 1).max(0) as u8, + num_ref_idx_l1_default_active_minus1: 0, + weighted_bipred_idc: + ash::vk::native::StdVideoH264WeightedBipredIdc_STD_VIDEO_H264_WEIGHTED_BIPRED_IDC_DEFAULT, + pic_init_qp_minus26: 0, + pic_init_qs_minus26: 0, + chroma_qp_index_offset, + second_chroma_qp_index_offset, + pScalingLists: ptr::null(), + }; + + let sps_array = [sps]; + let pps_array = [pps]; + + let h264_add_info = vk::VideoEncodeH264SessionParametersAddInfoKHR::default() + .std_sp_ss(&sps_array) + .std_pp_ss(&pps_array); + + let mut h264_params_create_info = + vk::VideoEncodeH264SessionParametersCreateInfoKHR::default() + .max_std_sps_count(1) + .max_std_pps_count(1) + .parameters_add_info(&h264_add_info); + + // Chain quality level info into session parameters creation. + // This is required by AMD RADV and matches FFmpeg's approach. + let mut quality_level_info = vk::VideoEncodeQualityLevelInfoKHR::default().quality_level(0); + quality_level_info.p_next = (&mut h264_params_create_info + as *mut vk::VideoEncodeH264SessionParametersCreateInfoKHR) + .cast(); + + let mut params_create_info = + vk::VideoSessionParametersCreateInfoKHR::default().video_session(self.session); + params_create_info.p_next = + (&mut quality_level_info as *mut vk::VideoEncodeQualityLevelInfoKHR).cast(); + + let mut session_params = vk::VideoSessionParametersKHR::null(); + let result = unsafe { + (self.video_queue_fn.fp().create_video_session_parameters_khr)( + self.context.device().handle(), + ¶ms_create_info, + ptr::null(), + &mut session_params, + ) + }; + if result != vk::Result::SUCCESS { + return Err(PixelForgeError::SessionParametersCreation(format!( + "{:?}", + result + ))); + } + + Ok(session_params) + } +} diff --git a/src/encoder/h265/api.rs b/src/encoder/h265/api.rs index 4a90510..e52b210 100644 --- a/src/encoder/h265/api.rs +++ b/src/encoder/h265/api.rs @@ -2,7 +2,7 @@ use super::H265Encoder; use crate::encoder::dpb::{DecodedPictureBufferTrait, DpbConfig, PictureStartInfo, PictureType}; use crate::encoder::gop::{GopFrameType, GopPosition}; -use crate::encoder::EncodedPacket; +use crate::encoder::{ColorDescription, EncodedPacket}; use crate::error::Result; use crate::PixelForgeError; use ash::vk; @@ -271,4 +271,59 @@ impl H265Encoder { } } } + + /// Update the color description (VUI parameters) in the encoded stream. + /// + /// This recreates the video session parameters with a new SPS containing the + /// updated VUI color primaries, transfer characteristics, and matrix coefficients. + /// The next encoded frame will be an IDR with the new VPS/SPS/PPS prepended. + pub fn set_color_description(&mut self, desc: ColorDescription) -> Result<()> { + // Wait for any in-flight encode to complete before modifying session params. + // Do NOT reset the fence here — submit_encode_and_read_bitstream() resets it + // before queue_submit. Leaving the fence signaled allows consecutive + // set_color_description() calls without deadlock. + unsafe { + self.context + .device() + .wait_for_fences(&[self.encode_fence], true, u64::MAX) + .map_err(|e| { + PixelForgeError::Synchronization(format!( + "Failed to wait for encode fence: {:?}", + e + )) + })?; + } + + // Save old handle so we can destroy it after successful creation. + let old_session_params = self.session_params; + + let new_session_params = self.create_session_params(&desc)?; + + // Destroy old session parameters now that the new ones are created. + unsafe { + (self + .video_queue_fn + .fp() + .destroy_video_session_parameters_khr)( + self.context.device().handle(), + old_session_params, + std::ptr::null(), + ); + } + + self.session_params = new_session_params; + self.config.color_description = Some(desc); + self.header_data = None; // Invalidate cached VPS/SPS/PPS header + self.gop.request_idr(); + + debug!( + "H.265 color description updated: primaries={}, transfer={}, matrix={}, full_range={}", + desc.color_primaries, + desc.transfer_characteristics, + desc.matrix_coefficients, + desc.full_range + ); + + Ok(()) + } } diff --git a/src/encoder/h265/init.rs b/src/encoder/h265/init.rs index 7241c19..12e3041 100644 --- a/src/encoder/h265/init.rs +++ b/src/encoder/h265/init.rs @@ -297,413 +297,10 @@ impl H265Encoder { // Query and allocate session memory. let session_memory = allocate_session_memory(&context, session, &video_queue_fn)?; - // Create VPS, SPS and PPS - // H.265 coding block sizes: - // CTB size (cuSize) = 32x32 -> log2_ctb_size = 5 -> cuSize enum = 2 - // Min CB size (cuMinSize) = 16x16 -> log2_min_cb_size = 4 -> cuMinSize enum = 1 - let ctb_log2_size_y: u8 = 5; // 32x32 CTB - let min_cb_log2_size_y: u8 = 4; // 16x16 min CB - let log2_min_transform_block_size: u8 = 2; // 4x4 min TU - let log2_max_transform_block_size: u8 = 5; // 32x32 max TU - - // Calculate SPS parameters. - let pic_width_in_luma_samples = aligned_width; - let pic_height_in_luma_samples = aligned_height; - - // Conformance window for cropping. - // SubWidthC and SubHeightC depend on chroma format: - // - YUV420: SubWidthC=2, SubHeightC=2 - // - YUV444: SubWidthC=1, SubHeightC=1 - let (sub_width_c, sub_height_c) = match config.pixel_format { - PixelFormat::Yuv420 => (2u32, 2u32), - PixelFormat::Yuv444 => (1u32, 1u32), - _ => (2u32, 2u32), // Default to 4:2:0 - }; - let conf_win_right_offset = (aligned_width - width) / sub_width_c; - let conf_win_bottom_offset = (aligned_height - height) / sub_height_c; - let conformance_window_flag = conf_win_right_offset > 0 || conf_win_bottom_offset > 0; - - // Profile tier level - Main/Main10/Main 4:4:4 profile, level 5.1 (sufficient for 4K) - let profile_tier_level = ash::vk::native::StdVideoH265ProfileTierLevel { - flags: ash::vk::native::StdVideoH265ProfileTierLevelFlags { - _bitfield_align_1: [], - _bitfield_1: ash::vk::native::StdVideoH265ProfileTierLevelFlags::new_bitfield_1( - 0, // general_tier_flag (Main tier) - 1, // general_progressive_source_flag - 0, // general_interlaced_source_flag - 0, // general_non_packed_constraint_flag - 1, // general_frame_only_constraint_flag - ), - __bindgen_padding_0: [0; 3], - }, - general_profile_idc: profile_idc, - general_level_idc: ash::vk::native::StdVideoH265LevelIdc_STD_VIDEO_H265_LEVEL_IDC_5_1, - }; - - // Decoded Picture Buffer Manager - let dec_pic_buf_mgr = ash::vk::native::StdVideoH265DecPicBufMgr { - max_latency_increase_plus1: [0; 7], - max_dec_pic_buffering_minus1: [(dpb_slot_count - 1) as u8, 0, 0, 0, 0, 0, 0], - max_num_reorder_pics: [0; 7], // No B-frame reordering by default - }; - - // Short-term reference picture set (in SPS for RPS in SPS mode) - // Set up a simple RPS with one reference picture - let short_term_ref_pic_set = ash::vk::native::StdVideoH265ShortTermRefPicSet { - flags: ash::vk::native::StdVideoH265ShortTermRefPicSetFlags { - _bitfield_align_1: [], - _bitfield_1: ash::vk::native::StdVideoH265ShortTermRefPicSetFlags::new_bitfield_1( - 0, // inter_ref_pic_set_prediction_flag - 0, // delta_rps_sign - ), - __bindgen_padding_0: [0; 3], - }, - delta_idx_minus1: 0, - use_delta_flag: 0, - abs_delta_rps_minus1: 0, - used_by_curr_pic_flag: 0, - used_by_curr_pic_s0_flag: 1, // First negative reference is used - used_by_curr_pic_s1_flag: 0, - reserved1: 0, - reserved2: 0, - reserved3: 0, - num_negative_pics: 1, // One backward reference - num_positive_pics: 0, - delta_poc_s0_minus1: [0; 16], - delta_poc_s1_minus1: [0; 16], - }; - - let long_term_ref_pics_sps = ash::vk::native::StdVideoH265LongTermRefPicsSps { - used_by_curr_pic_lt_sps_flag: 0, - lt_ref_pic_poc_lsb_sps: [0; 32], - }; - - // SPS flags - let sps_flags = ash::vk::native::StdVideoH265SpsFlags { - _bitfield_align_1: [], - _bitfield_1: ash::vk::native::StdVideoH265SpsFlags::new_bitfield_1( - 1, // sps_temporal_id_nesting_flag - 0, // separate_colour_plane_flag - if conformance_window_flag { 1 } else { 0 }, // conformance_window_flag - 1, // sps_sub_layer_ordering_info_present_flag - 0, // scaling_list_enabled_flag - 0, // sps_scaling_list_data_present_flag - 1, // amp_enabled_flag (asymmetric motion partitions) - 1, // sample_adaptive_offset_enabled_flag - 0, // pcm_enabled_flag - 0, // pcm_loop_filter_disabled_flag - 0, // long_term_ref_pics_present_flag - 0, // sps_temporal_mvp_enabled_flag - 0, // strong_intra_smoothing_enabled_flag - 1, // vui_parameters_present_flag - 0, // sps_extension_present_flag - 0, // sps_range_extension_flag - 0, // transform_skip_rotation_enabled_flag - 0, // transform_skip_context_enabled_flag - 0, // implicit_rdpcm_enabled_flag - 0, // explicit_rdpcm_enabled_flag - 0, // extended_precision_processing_flag - 0, // intra_smoothing_disabled_flag - 0, // high_precision_offsets_enabled_flag - 0, // persistent_rice_adaptation_enabled_flag - 0, // cabac_bypass_alignment_enabled_flag - 0, // sps_scc_extension_flag - 0, // sps_curr_pic_ref_enabled_flag - 0, // palette_mode_enabled_flag - 0, // sps_palette_predictor_initializers_present_flag - 0, // intra_boundary_filtering_disabled_flag - ), - }; - - // Build VUI structure. Defaults to BT.709 when no color description is set. + // Build VPS/SPS/PPS and session parameters via shared helper. let color_desc = config .color_description .unwrap_or(ColorDescription::bt709()); - let vui_flags = ash::vk::native::StdVideoH265SpsVuiFlags { - _bitfield_align_1: [], - _bitfield_1: ash::vk::native::StdVideoH265SpsVuiFlags::new_bitfield_1( - 1, // aspect_ratio_info_present_flag - 0, // overscan_info_present_flag - 0, // overscan_appropriate_flag - 1, // video_signal_type_present_flag - if color_desc.full_range { 1 } else { 0 }, // video_full_range_flag - 1, // colour_description_present_flag - 0, // chroma_loc_info_present_flag - 0, // neutral_chroma_indication_flag - 0, // field_seq_flag - 0, // frame_field_info_present_flag - 0, // default_display_window_flag - 0, // vui_timing_info_present_flag - 0, // vui_poc_proportional_to_timing_flag - 0, // vui_hrd_parameters_present_flag - 0, // bitstream_restriction_flag - 0, // tiles_fixed_structure_flag - 0, // motion_vectors_over_pic_boundaries_flag - 0, // restricted_ref_pic_lists_flag - ), - __bindgen_padding_0: 0, - }; - - let vui = ash::vk::native::StdVideoH265SequenceParameterSetVui { - flags: vui_flags, - aspect_ratio_idc: - ash::vk::native::StdVideoH265AspectRatioIdc_STD_VIDEO_H265_ASPECT_RATIO_IDC_SQUARE, - sar_width: 0, - sar_height: 0, - video_format: 5, - colour_primaries: color_desc.color_primaries, - transfer_characteristics: color_desc.transfer_characteristics, - matrix_coeffs: color_desc.matrix_coefficients, - chroma_sample_loc_type_top_field: 0, - chroma_sample_loc_type_bottom_field: 0, - reserved1: 0, - reserved2: 0, - def_disp_win_left_offset: 0, - def_disp_win_right_offset: 0, - def_disp_win_top_offset: 0, - def_disp_win_bottom_offset: 0, - vui_num_units_in_tick: 0, - vui_time_scale: 0, - vui_num_ticks_poc_diff_one_minus1: 0, - min_spatial_segmentation_idc: 0, - reserved3: 0, - max_bytes_per_pic_denom: 0, - max_bits_per_min_cu_denom: 0, - log2_max_mv_length_horizontal: 0, - log2_max_mv_length_vertical: 0, - pHrdParameters: ptr::null(), - }; - - // Calculate bit depth minus 8 values for SPS (0 for 8-bit, 2 for 10-bit) - let bit_depth_minus8: u8 = match config.bit_depth { - BitDepth::Eight => 0, - BitDepth::Ten => 2, - }; - - // Get chroma_format_idc based on pixel format. - let chroma_format_idc = match config.pixel_format { - PixelFormat::Yuv420 => { - ash::vk::native::StdVideoH265ChromaFormatIdc_STD_VIDEO_H265_CHROMA_FORMAT_IDC_420 - } - PixelFormat::Yuv444 => { - ash::vk::native::StdVideoH265ChromaFormatIdc_STD_VIDEO_H265_CHROMA_FORMAT_IDC_444 - } - _ => { - return Err(PixelForgeError::InvalidInput(format!( - "Unsupported pixel format for H.265: {:?}", - config.pixel_format - ))); - } - }; - - let sps = ash::vk::native::StdVideoH265SequenceParameterSet { - flags: sps_flags, - chroma_format_idc, - pic_width_in_luma_samples, - pic_height_in_luma_samples, - sps_video_parameter_set_id: 0, - sps_max_sub_layers_minus1: 0, - sps_seq_parameter_set_id: 0, - bit_depth_luma_minus8: bit_depth_minus8, - bit_depth_chroma_minus8: bit_depth_minus8, - log2_max_pic_order_cnt_lsb_minus4: 4, // POC LSB range = 256 (wraps every ~4s at 60fps) - log2_min_luma_coding_block_size_minus3: min_cb_log2_size_y - 3, - log2_diff_max_min_luma_coding_block_size: ctb_log2_size_y - min_cb_log2_size_y, - log2_min_luma_transform_block_size_minus2: log2_min_transform_block_size - 2, - log2_diff_max_min_luma_transform_block_size: log2_max_transform_block_size - - log2_min_transform_block_size, - max_transform_hierarchy_depth_inter: (ctb_log2_size_y - log2_min_transform_block_size) - .max(1), - max_transform_hierarchy_depth_intra: 3, - num_short_term_ref_pic_sets: 0, - num_long_term_ref_pics_sps: 0, - pcm_sample_bit_depth_luma_minus1: 7, - pcm_sample_bit_depth_chroma_minus1: 7, - log2_min_pcm_luma_coding_block_size_minus3: min_cb_log2_size_y - 3, - log2_diff_max_min_pcm_luma_coding_block_size: ctb_log2_size_y - min_cb_log2_size_y, - reserved1: 0, - reserved2: 0, - palette_max_size: 0, - delta_palette_max_predictor_size: 0, - motion_vector_resolution_control_idc: 0, - sps_num_palette_predictor_initializers_minus1: 0, - conf_win_left_offset: 0, - conf_win_right_offset, - conf_win_top_offset: 0, - conf_win_bottom_offset, - pProfileTierLevel: ptr::null(), // Will be set below - pDecPicBufMgr: ptr::null(), - pScalingLists: ptr::null(), - pShortTermRefPicSet: ptr::null(), - pLongTermRefPicsSps: ptr::null(), - pSequenceParameterSetVui: &vui, - pPredictorPaletteEntries: ptr::null(), - }; - - // VPS flags - let vps_flags = ash::vk::native::StdVideoH265VpsFlags { - _bitfield_align_1: [], - _bitfield_1: ash::vk::native::StdVideoH265VpsFlags::new_bitfield_1( - 1, // vps_temporal_id_nesting_flag - 1, // vps_sub_layer_ordering_info_present_flag - 0, // vps_timing_info_present_flag - 0, // vps_poc_proportional_to_timing_flag - ), - __bindgen_padding_0: [0; 3], - }; - - let vps = ash::vk::native::StdVideoH265VideoParameterSet { - flags: vps_flags, - vps_video_parameter_set_id: 0, - vps_max_sub_layers_minus1: 0, - reserved1: 0, - reserved2: 0, - vps_num_units_in_tick: 0, - vps_time_scale: 0, - vps_num_ticks_poc_diff_one_minus1: 0, - reserved3: 0, - pDecPicBufMgr: ptr::null(), - pHrdParameters: ptr::null(), - pProfileTierLevel: ptr::null(), - }; - - // PPS flags - let pps_flags = ash::vk::native::StdVideoH265PpsFlags { - _bitfield_align_1: [], - _bitfield_1: ash::vk::native::StdVideoH265PpsFlags::new_bitfield_1( - 0, // dependent_slice_segments_enabled_flag - 0, // output_flag_present_flag - 0, // sign_data_hiding_enabled_flag - 1, // cabac_init_present_flag - 0, // constrained_intra_pred_flag - 1, // transform_skip_enabled_flag - 1, // cu_qp_delta_enabled_flag - 0, // pps_slice_chroma_qp_offsets_present_flag - 0, // weighted_pred_flag - 0, // weighted_bipred_flag - 0, // transquant_bypass_enabled_flag - 0, // tiles_enabled_flag - 0, // entropy_coding_sync_enabled_flag - 0, // uniform_spacing_flag - 0, // loop_filter_across_tiles_enabled_flag - 1, // pps_loop_filter_across_slices_enabled_flag - 1, // deblocking_filter_control_present_flag - 0, // deblocking_filter_override_enabled_flag - 0, // pps_deblocking_filter_disabled_flag - 0, // pps_scaling_list_data_present_flag - 0, // lists_modification_present_flag - 0, // slice_segment_header_extension_present_flag - 0, // pps_extension_present_flag - 0, // cross_component_prediction_enabled_flag - 0, // chroma_qp_offset_list_enabled_flag - 0, // pps_curr_pic_ref_enabled_flag - 0, // residual_adaptive_colour_transform_enabled_flag - 0, // pps_slice_act_qp_offsets_present_flag - 0, // pps_palette_predictor_initializers_present_flag - 0, // monochrome_palette_flag - 0, // pps_range_extension_flag - ), - }; - - let pps = ash::vk::native::StdVideoH265PictureParameterSet { - flags: pps_flags, - pps_pic_parameter_set_id: 0, - pps_seq_parameter_set_id: 0, - sps_video_parameter_set_id: 0, - num_extra_slice_header_bits: 0, - num_ref_idx_l0_default_active_minus1: 0, - num_ref_idx_l1_default_active_minus1: 0, - init_qp_minus26: 0, - diff_cu_qp_delta_depth: 0, - pps_cb_qp_offset: 0, - pps_cr_qp_offset: 0, - pps_beta_offset_div2: 0, - pps_tc_offset_div2: 0, - log2_parallel_merge_level_minus2: 0, - log2_max_transform_skip_block_size_minus2: 0, - diff_cu_chroma_qp_offset_depth: 0, - chroma_qp_offset_list_len_minus1: 0, - cb_qp_offset_list: [0; 6], - cr_qp_offset_list: [0; 6], - log2_sao_offset_scale_luma: 0, - log2_sao_offset_scale_chroma: 0, - pps_act_y_qp_offset_plus5: 0, - pps_act_cb_qp_offset_plus5: 0, - pps_act_cr_qp_offset_plus3: 0, - pps_num_palette_predictor_initializers: 0, - luma_bit_depth_entry_minus8: bit_depth_minus8, - chroma_bit_depth_entry_minus8: bit_depth_minus8, - num_tile_columns_minus1: 0, - num_tile_rows_minus1: 0, - reserved1: 0, - reserved2: 0, - column_width_minus1: [0; 19], - row_height_minus1: [0; 21], - reserved3: 0, - pScalingLists: ptr::null(), - pPredictorPaletteEntries: ptr::null(), - }; - - // Box the structures so they live long enough for session parameter creation - let profile_tier_level_boxed = Box::new(profile_tier_level); - let dec_pic_buf_mgr_boxed = Box::new(dec_pic_buf_mgr); - let short_term_ref_pic_set_boxed = Box::new(short_term_ref_pic_set); - let long_term_ref_pics_sps_boxed = Box::new(long_term_ref_pics_sps); - - // Create mutable copies with correct pointers - let mut sps_with_ptrs = sps; - sps_with_ptrs.pProfileTierLevel = profile_tier_level_boxed.as_ref(); - sps_with_ptrs.pDecPicBufMgr = dec_pic_buf_mgr_boxed.as_ref(); - sps_with_ptrs.pShortTermRefPicSet = short_term_ref_pic_set_boxed.as_ref(); - sps_with_ptrs.pLongTermRefPicsSps = long_term_ref_pics_sps_boxed.as_ref(); - - let mut vps_with_ptrs = vps; - vps_with_ptrs.pProfileTierLevel = profile_tier_level_boxed.as_ref(); - vps_with_ptrs.pDecPicBufMgr = dec_pic_buf_mgr_boxed.as_ref(); - - let vps_array = [vps_with_ptrs]; - let sps_array = [sps_with_ptrs]; - let pps_array = [pps]; - - let h265_add_info = vk::VideoEncodeH265SessionParametersAddInfoKHR::default() - .std_vp_ss(&vps_array) - .std_sp_ss(&sps_array) - .std_pp_ss(&pps_array); - - let mut h265_params_create_info = - vk::VideoEncodeH265SessionParametersCreateInfoKHR::default() - .max_std_vps_count(1) - .max_std_sps_count(1) - .max_std_pps_count(1) - .parameters_add_info(&h265_add_info); - - // Chain quality level info into session parameters creation. - // This is required by AMD RADV and matches FFmpeg's approach. - let mut quality_level_info = vk::VideoEncodeQualityLevelInfoKHR::default().quality_level(0); // Use quality level 0 (best quality). - quality_level_info.p_next = (&mut h265_params_create_info - as *mut vk::VideoEncodeH265SessionParametersCreateInfoKHR) - .cast(); - - let mut params_create_info = - vk::VideoSessionParametersCreateInfoKHR::default().video_session(session); - params_create_info.p_next = - (&mut quality_level_info as *mut vk::VideoEncodeQualityLevelInfoKHR).cast(); - - let mut session_params = vk::VideoSessionParametersKHR::null(); - let result = unsafe { - (video_queue_fn.fp().create_video_session_parameters_khr)( - context.device().handle(), - ¶ms_create_info, - ptr::null(), - &mut session_params, - ) - }; - if result != vk::Result::SUCCESS { - return Err(PixelForgeError::SessionParametersCreation(format!( - "{:?}", - result - ))); - } // Create profile info for images/buffers let mut h265_profile_for_resources = @@ -848,7 +445,7 @@ impl H265Encoder { info!("H.265 encoder created successfully"); - Ok(Self { + let mut encoder = Self { context, config: config.clone(), dpb, @@ -858,7 +455,7 @@ impl H265Encoder { video_queue_fn, video_encode_fn, session, - session_params, + session_params: vk::VideoSessionParametersKHR::null(), session_memory, input_frame_num: 0, encode_frame_num: 0, @@ -888,7 +485,11 @@ impl H265Encoder { current_dpb_slot: 0, l0_references: Vec::new(), active_reference_count: max_active_reference_pictures as u32, + profile_idc, dpb_slot_active, - }) + }; + + encoder.session_params = encoder.create_session_params(&color_desc)?; + Ok(encoder) } } diff --git a/src/encoder/h265/mod.rs b/src/encoder/h265/mod.rs index c4bb371..522f262 100644 --- a/src/encoder/h265/mod.rs +++ b/src/encoder/h265/mod.rs @@ -5,6 +5,7 @@ mod api; mod encode; mod init; +mod session_params; use ash::vk; use tracing::debug; @@ -94,6 +95,8 @@ pub struct H265Encoder { l0_references: Vec, /// Number of active reference frames. active_reference_count: u32, + /// H.265 profile IDC (cached from initialization for session parameter recreation). + profile_idc: u32, // DPB slot activation tracking. /// Tracks which DPB slots have been activated (used at least once). diff --git a/src/encoder/h265/session_params.rs b/src/encoder/h265/session_params.rs new file mode 100644 index 0000000..e9e81c0 --- /dev/null +++ b/src/encoder/h265/session_params.rs @@ -0,0 +1,363 @@ +use super::H265Encoder; + +use crate::encoder::{BitDepth, ColorDescription, PixelFormat}; +use crate::error::{PixelForgeError, Result}; +use ash::vk; +use std::ptr; + +impl H265Encoder { + /// Build VPS/SPS/PPS and create Vulkan video session parameters. + /// + /// This is used both during initial encoder creation and by + /// `set_color_description()` to rebuild session parameters with + /// updated VUI color metadata. Keeping a single implementation + /// ensures the parameter sets stay bit-for-bit consistent. + pub(crate) fn create_session_params( + &self, + desc: &ColorDescription, + ) -> Result { + // CTB/CB size constants. + let ctb_log2_size_y: u8 = 5; + let min_cb_log2_size_y: u8 = 4; + let log2_min_transform_block_size: u8 = 2; + let log2_max_transform_block_size: u8 = 5; + + let pic_width_in_luma_samples = self.aligned_width; + let pic_height_in_luma_samples = self.aligned_height; + + let (sub_width_c, sub_height_c) = match self.config.pixel_format { + PixelFormat::Yuv420 => (2u32, 2u32), + PixelFormat::Yuv444 => (1u32, 1u32), + _ => (2u32, 2u32), + }; + let conf_win_right_offset = + (self.aligned_width - self.config.dimensions.width) / sub_width_c; + let conf_win_bottom_offset = + (self.aligned_height - self.config.dimensions.height) / sub_height_c; + let conformance_window_flag = conf_win_right_offset > 0 || conf_win_bottom_offset > 0; + + let profile_tier_level = ash::vk::native::StdVideoH265ProfileTierLevel { + flags: ash::vk::native::StdVideoH265ProfileTierLevelFlags { + _bitfield_align_1: [], + _bitfield_1: ash::vk::native::StdVideoH265ProfileTierLevelFlags::new_bitfield_1( + 0, 1, 0, 0, 1, + ), + __bindgen_padding_0: [0; 3], + }, + general_profile_idc: self.profile_idc, + general_level_idc: ash::vk::native::StdVideoH265LevelIdc_STD_VIDEO_H265_LEVEL_IDC_5_1, + }; + + let dec_pic_buf_mgr = ash::vk::native::StdVideoH265DecPicBufMgr { + max_latency_increase_plus1: [0; 7], + max_dec_pic_buffering_minus1: [(self.dpb_slot_count - 1) as u8, 0, 0, 0, 0, 0, 0], + max_num_reorder_pics: [0; 7], + }; + + let short_term_ref_pic_set = ash::vk::native::StdVideoH265ShortTermRefPicSet { + flags: ash::vk::native::StdVideoH265ShortTermRefPicSetFlags { + _bitfield_align_1: [], + _bitfield_1: ash::vk::native::StdVideoH265ShortTermRefPicSetFlags::new_bitfield_1( + 0, 0, + ), + __bindgen_padding_0: [0; 3], + }, + delta_idx_minus1: 0, + use_delta_flag: 0, + abs_delta_rps_minus1: 0, + used_by_curr_pic_flag: 0, + used_by_curr_pic_s0_flag: 1, + used_by_curr_pic_s1_flag: 0, + reserved1: 0, + reserved2: 0, + reserved3: 0, + num_negative_pics: 1, + num_positive_pics: 0, + delta_poc_s0_minus1: [0; 16], + delta_poc_s1_minus1: [0; 16], + }; + + let long_term_ref_pics_sps = ash::vk::native::StdVideoH265LongTermRefPicsSps { + used_by_curr_pic_lt_sps_flag: 0, + lt_ref_pic_poc_lsb_sps: [0; 32], + }; + + let sps_flags = ash::vk::native::StdVideoH265SpsFlags { + _bitfield_align_1: [], + _bitfield_1: ash::vk::native::StdVideoH265SpsFlags::new_bitfield_1( + 1, + 0, + if conformance_window_flag { 1 } else { 0 }, + 1, + 0, + 0, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ), + }; + + let vui_flags = ash::vk::native::StdVideoH265SpsVuiFlags { + _bitfield_align_1: [], + _bitfield_1: ash::vk::native::StdVideoH265SpsVuiFlags::new_bitfield_1( + 1, + 0, + 0, + 1, + if desc.full_range { 1 } else { 0 }, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ), + __bindgen_padding_0: 0, + }; + + let vui = ash::vk::native::StdVideoH265SequenceParameterSetVui { + flags: vui_flags, + aspect_ratio_idc: + ash::vk::native::StdVideoH265AspectRatioIdc_STD_VIDEO_H265_ASPECT_RATIO_IDC_SQUARE, + sar_width: 0, + sar_height: 0, + video_format: 5, + colour_primaries: desc.color_primaries, + transfer_characteristics: desc.transfer_characteristics, + matrix_coeffs: desc.matrix_coefficients, + chroma_sample_loc_type_top_field: 0, + chroma_sample_loc_type_bottom_field: 0, + reserved1: 0, + reserved2: 0, + def_disp_win_left_offset: 0, + def_disp_win_right_offset: 0, + def_disp_win_top_offset: 0, + def_disp_win_bottom_offset: 0, + vui_num_units_in_tick: 0, + vui_time_scale: 0, + vui_num_ticks_poc_diff_one_minus1: 0, + min_spatial_segmentation_idc: 0, + reserved3: 0, + max_bytes_per_pic_denom: 0, + max_bits_per_min_cu_denom: 0, + log2_max_mv_length_horizontal: 0, + log2_max_mv_length_vertical: 0, + pHrdParameters: ptr::null(), + }; + + let bit_depth_minus8: u8 = match self.config.bit_depth { + BitDepth::Eight => 0, + BitDepth::Ten => 2, + }; + + let chroma_format_idc = match self.config.pixel_format { + PixelFormat::Yuv420 => { + ash::vk::native::StdVideoH265ChromaFormatIdc_STD_VIDEO_H265_CHROMA_FORMAT_IDC_420 + } + PixelFormat::Yuv444 => { + ash::vk::native::StdVideoH265ChromaFormatIdc_STD_VIDEO_H265_CHROMA_FORMAT_IDC_444 + } + _ => { + return Err(PixelForgeError::InvalidInput(format!( + "Unsupported pixel format for H.265: {:?}", + self.config.pixel_format + ))); + } + }; + + let sps = ash::vk::native::StdVideoH265SequenceParameterSet { + flags: sps_flags, + chroma_format_idc, + pic_width_in_luma_samples, + pic_height_in_luma_samples, + sps_video_parameter_set_id: 0, + sps_max_sub_layers_minus1: 0, + sps_seq_parameter_set_id: 0, + bit_depth_luma_minus8: bit_depth_minus8, + bit_depth_chroma_minus8: bit_depth_minus8, + log2_max_pic_order_cnt_lsb_minus4: 4, + log2_min_luma_coding_block_size_minus3: min_cb_log2_size_y - 3, + log2_diff_max_min_luma_coding_block_size: ctb_log2_size_y - min_cb_log2_size_y, + log2_min_luma_transform_block_size_minus2: log2_min_transform_block_size - 2, + log2_diff_max_min_luma_transform_block_size: log2_max_transform_block_size + - log2_min_transform_block_size, + max_transform_hierarchy_depth_inter: (ctb_log2_size_y - log2_min_transform_block_size) + .max(1), + max_transform_hierarchy_depth_intra: 3, + num_short_term_ref_pic_sets: 0, + num_long_term_ref_pics_sps: 0, + pcm_sample_bit_depth_luma_minus1: 7, + pcm_sample_bit_depth_chroma_minus1: 7, + log2_min_pcm_luma_coding_block_size_minus3: min_cb_log2_size_y - 3, + log2_diff_max_min_pcm_luma_coding_block_size: ctb_log2_size_y - min_cb_log2_size_y, + reserved1: 0, + reserved2: 0, + palette_max_size: 0, + delta_palette_max_predictor_size: 0, + motion_vector_resolution_control_idc: 0, + sps_num_palette_predictor_initializers_minus1: 0, + conf_win_left_offset: 0, + conf_win_right_offset, + conf_win_top_offset: 0, + conf_win_bottom_offset, + pProfileTierLevel: ptr::null(), + pDecPicBufMgr: ptr::null(), + pScalingLists: ptr::null(), + pShortTermRefPicSet: ptr::null(), + pLongTermRefPicsSps: ptr::null(), + pSequenceParameterSetVui: &vui, + pPredictorPaletteEntries: ptr::null(), + }; + + let profile_tier_level_boxed = Box::new(profile_tier_level); + let dec_pic_buf_mgr_boxed = Box::new(dec_pic_buf_mgr); + let short_term_ref_pic_set_boxed = Box::new(short_term_ref_pic_set); + let long_term_ref_pics_sps_boxed = Box::new(long_term_ref_pics_sps); + + let mut sps_with_ptrs = sps; + sps_with_ptrs.pProfileTierLevel = profile_tier_level_boxed.as_ref(); + sps_with_ptrs.pDecPicBufMgr = dec_pic_buf_mgr_boxed.as_ref(); + sps_with_ptrs.pShortTermRefPicSet = short_term_ref_pic_set_boxed.as_ref(); + sps_with_ptrs.pLongTermRefPicsSps = long_term_ref_pics_sps_boxed.as_ref(); + + let vps_flags = ash::vk::native::StdVideoH265VpsFlags { + _bitfield_align_1: [], + _bitfield_1: ash::vk::native::StdVideoH265VpsFlags::new_bitfield_1(1, 1, 0, 0), + __bindgen_padding_0: [0; 3], + }; + + let vps = ash::vk::native::StdVideoH265VideoParameterSet { + flags: vps_flags, + vps_video_parameter_set_id: 0, + vps_max_sub_layers_minus1: 0, + reserved1: 0, + reserved2: 0, + vps_num_units_in_tick: 0, + vps_time_scale: 0, + vps_num_ticks_poc_diff_one_minus1: 0, + reserved3: 0, + pDecPicBufMgr: dec_pic_buf_mgr_boxed.as_ref(), + pHrdParameters: ptr::null(), + pProfileTierLevel: profile_tier_level_boxed.as_ref(), + }; + + let pps_flags = ash::vk::native::StdVideoH265PpsFlags { + _bitfield_align_1: [], + _bitfield_1: ash::vk::native::StdVideoH265PpsFlags::new_bitfield_1( + 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, + ), + }; + + let pps = ash::vk::native::StdVideoH265PictureParameterSet { + flags: pps_flags, + pps_pic_parameter_set_id: 0, + pps_seq_parameter_set_id: 0, + sps_video_parameter_set_id: 0, + num_extra_slice_header_bits: 0, + num_ref_idx_l0_default_active_minus1: 0, + num_ref_idx_l1_default_active_minus1: 0, + init_qp_minus26: 0, + diff_cu_qp_delta_depth: 0, + pps_cb_qp_offset: 0, + pps_cr_qp_offset: 0, + pps_beta_offset_div2: 0, + pps_tc_offset_div2: 0, + log2_parallel_merge_level_minus2: 0, + log2_max_transform_skip_block_size_minus2: 0, + diff_cu_chroma_qp_offset_depth: 0, + chroma_qp_offset_list_len_minus1: 0, + cb_qp_offset_list: [0; 6], + cr_qp_offset_list: [0; 6], + log2_sao_offset_scale_luma: 0, + log2_sao_offset_scale_chroma: 0, + pps_act_y_qp_offset_plus5: 0, + pps_act_cb_qp_offset_plus5: 0, + pps_act_cr_qp_offset_plus3: 0, + pps_num_palette_predictor_initializers: 0, + luma_bit_depth_entry_minus8: bit_depth_minus8, + chroma_bit_depth_entry_minus8: bit_depth_minus8, + num_tile_columns_minus1: 0, + num_tile_rows_minus1: 0, + reserved1: 0, + reserved2: 0, + column_width_minus1: [0; 19], + row_height_minus1: [0; 21], + reserved3: 0, + pScalingLists: ptr::null(), + pPredictorPaletteEntries: ptr::null(), + }; + + let vps_array = [vps]; + let sps_array = [sps_with_ptrs]; + let pps_array = [pps]; + + let h265_add_info = vk::VideoEncodeH265SessionParametersAddInfoKHR::default() + .std_vp_ss(&vps_array) + .std_sp_ss(&sps_array) + .std_pp_ss(&pps_array); + + let mut h265_params_create_info = + vk::VideoEncodeH265SessionParametersCreateInfoKHR::default() + .max_std_vps_count(1) + .max_std_sps_count(1) + .max_std_pps_count(1) + .parameters_add_info(&h265_add_info); + + let mut quality_level_info = vk::VideoEncodeQualityLevelInfoKHR::default().quality_level(0); + quality_level_info.p_next = (&mut h265_params_create_info + as *mut vk::VideoEncodeH265SessionParametersCreateInfoKHR) + .cast(); + + let mut params_create_info = + vk::VideoSessionParametersCreateInfoKHR::default().video_session(self.session); + params_create_info.p_next = + (&mut quality_level_info as *mut vk::VideoEncodeQualityLevelInfoKHR).cast(); + + let mut session_params = vk::VideoSessionParametersKHR::null(); + let result = unsafe { + (self.video_queue_fn.fp().create_video_session_parameters_khr)( + self.context.device().handle(), + ¶ms_create_info, + ptr::null(), + &mut session_params, + ) + }; + if result != vk::Result::SUCCESS { + return Err(PixelForgeError::SessionParametersCreation(format!( + "{:?}", + result + ))); + } + + Ok(session_params) + } +} diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs index f925539..983ba12 100644 --- a/src/encoder/mod.rs +++ b/src/encoder/mod.rs @@ -496,6 +496,19 @@ impl Encoder { Encoder::AV1(encoder) => encoder.request_idr(), } } + + /// Update the color description (VUI parameters) for the encoder. + /// + /// This recreates the video session parameters with an updated SPS/VPS/sequence + /// header containing the new color description. The next frame will be encoded as + /// an IDR/key frame with the new parameters. + pub fn set_color_description(&mut self, desc: ColorDescription) -> Result<()> { + match self { + Encoder::H264(encoder) => encoder.set_color_description(desc), + Encoder::H265(encoder) => encoder.set_color_description(desc), + Encoder::AV1(encoder) => encoder.set_color_description(desc), + } + } } #[cfg(test)] diff --git a/src/encoder/resources.rs b/src/encoder/resources.rs index d97f0f6..98ec065 100644 --- a/src/encoder/resources.rs +++ b/src/encoder/resources.rs @@ -488,11 +488,13 @@ pub(crate) fn create_command_resources( .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; let upload_command_buffer = upload_command_buffers[0]; - // Create fences. + // Create fences. The encode fence is created signaled so that + // set_color_description() can safely wait on it before the first encode. let fence_create_info = vk::FenceCreateInfo::default(); let upload_fence = unsafe { context.device().create_fence(&fence_create_info, None) } .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; - let encode_fence = unsafe { context.device().create_fence(&fence_create_info, None) } + let signaled_fence_info = vk::FenceCreateInfo::default().flags(vk::FenceCreateFlags::SIGNALED); + let encode_fence = unsafe { context.device().create_fence(&signaled_fence_info, None) } .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; Ok(CommandResources { @@ -669,7 +671,14 @@ pub(crate) fn clear_input_image(context: &VideoContext, params: &ClearImageParam }; // Calculate per-plane sizes. - let plane0_size = params.width * params.height * bytes_per_component; + // For YUV444, align Y plane size to 4 bytes so the UV plane buffer offset + // meets VkBufferImageCopy::bufferOffset alignment requirements. + // YUV420/422 dimensions are always even, so alignment is naturally satisfied. + let plane0_raw = (params.width * params.height * bytes_per_component) as usize; + let plane0_size = match params.pixel_format { + PixelFormat::Yuv444 => crate::align4(plane0_raw) as u32, + _ => plane0_raw as u32, + }; let plane1_size = match params.pixel_format { // YUV 4:2:0 (e.g., NV12): UV plane is half width, half height, 2 components per pixel. PixelFormat::Yuv420 => (params.width / 2) * (params.height / 2) * 2 * bytes_per_component, @@ -1147,11 +1156,13 @@ pub(crate) unsafe fn destroy_encoder_resources( device.free_memory(*memory, None); } - (video_queue_fn.fp().destroy_video_session_parameters_khr)( - device.handle(), - res.session_params, - std::ptr::null(), - ); + if res.session_params != vk::VideoSessionParametersKHR::null() { + (video_queue_fn.fp().destroy_video_session_parameters_khr)( + device.handle(), + res.session_params, + std::ptr::null(), + ); + } (video_queue_fn.fp().destroy_video_session_khr)(device.handle(), res.session, std::ptr::null()); for memory in res.session_memory { @@ -1328,12 +1339,13 @@ pub(crate) unsafe fn record_post_encode_dpb_barrier( /// Submit an encode command buffer and wait for completion. /// -/// Submits the command buffer to the encode queue, waits for the fence, resets it, +/// Submits the command buffer to the encode queue, waits for the fence, /// then reads query results and copies the encoded bitstream data. +/// The fence is reset before submission so it may be in any state on entry. /// /// # Safety /// -/// The command buffer must have been ended. The fence must be in the unsignaled state. +/// The command buffer must have been ended. /// The bitstream buffer pointer must be valid and the buffer must be persistently mapped. pub(crate) unsafe fn submit_encode_and_read_bitstream( device: &ash::Device, @@ -1346,6 +1358,14 @@ pub(crate) unsafe fn submit_encode_and_read_bitstream( let submit_info = vk::SubmitInfo::default().command_buffers(std::slice::from_ref(&command_buffer)); + // Reset the fence before submit (it may be signaled from a previous encode + // or from initial creation with SIGNALED_BIT). This ensures the fence is + // unsignaled for queue_submit, and after wait_for_fences it stays signaled — + // which lets set_color_description() safely wait on it between encodes. + device + .reset_fences(&[fence]) + .map_err(|e| PixelForgeError::Synchronization(e.to_string()))?; + device .queue_submit(encode_queue, &[submit_info], fence) .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; @@ -1354,10 +1374,6 @@ pub(crate) unsafe fn submit_encode_and_read_bitstream( .wait_for_fences(&[fence], true, u64::MAX) .map_err(|e| PixelForgeError::CommandBuffer(e.to_string()))?; - device - .reset_fences(&[fence]) - .map_err(|e| PixelForgeError::Synchronization(e.to_string()))?; - // Read query results (offset + bytes_written). #[repr(C)] struct QueryResult { diff --git a/src/image.rs b/src/image.rs index 13cc03c..23be508 100644 --- a/src/image.rs +++ b/src/image.rs @@ -136,12 +136,20 @@ impl InputImage { .map_err(|e| PixelForgeError::MemoryAllocation(e.to_string()))?; // Create staging buffer for uploads. - // Size depends on pixel format and bit depth. - let staging_size = pixel_format.frame_size(width, height) - * match bit_depth { - BitDepth::Eight => 1, - BitDepth::Ten => 2, // 2 bytes per sample for 10-bit - }; + // Size depends on pixel format, bit depth, and alignment padding between planes. + let bytes_per_sample_init = match bit_depth { + BitDepth::Eight => 1usize, + BitDepth::Ten => 2, + }; + let luma_pixels = (width * height) as usize; + let staging_size = match pixel_format { + PixelFormat::Yuv444 => { + // Y plane (aligned to 4 bytes) + UV interleaved plane. + let y_bytes = luma_pixels * bytes_per_sample_init; + crate::align4(y_bytes) + luma_pixels * 2 * bytes_per_sample_init + } + _ => pixel_format.frame_size(width, height) * bytes_per_sample_init, + }; let buffer_create_info = vk::BufferCreateInfo::default() .size(staging_size as vk::DeviceSize) .usage(vk::BufferUsageFlags::TRANSFER_SRC) @@ -547,12 +555,15 @@ impl InputImage { // Copy Y plane directly. dst[..plane_size].copy_from_slice(&yuv_data[..plane_size]); + // Align UV plane offset to 4 bytes for VkBufferImageCopy compliance. + let uv_offset = crate::align4(plane_size); + // Interleave U and V planes for semi-planar format. let u_plane = &yuv_data[plane_size..plane_size * 2]; let v_plane = &yuv_data[plane_size * 2..plane_size * 3]; for i in 0..plane_size { - dst[plane_size + i * 2] = u_plane[i]; - dst[plane_size + i * 2 + 1] = v_plane[i]; + dst[uv_offset + i * 2] = u_plane[i]; + dst[uv_offset + i * 2 + 1] = v_plane[i]; } } BitDepth::Ten => { @@ -564,12 +575,16 @@ impl InputImage { for i in 0..plane_size { dst[i] = (yuv_data[i] as u16) << 8; } + + // Align UV plane offset to 4 bytes for VkBufferImageCopy compliance. + let uv_offset_u16 = crate::align4(plane_size * 2) / 2; + // Interleave and convert U and V planes. let u_plane = &yuv_data[plane_size..plane_size * 2]; let v_plane = &yuv_data[plane_size * 2..plane_size * 3]; for i in 0..plane_size { - dst[plane_size + i * 2] = (u_plane[i] as u16) << 8; - dst[plane_size + i * 2 + 1] = (v_plane[i] as u16) << 8; + dst[uv_offset_u16 + i * 2] = (u_plane[i] as u16) << 8; + dst[uv_offset_u16 + i * 2 + 1] = (v_plane[i] as u16) << 8; } } } @@ -611,7 +626,15 @@ impl InputImage { BitDepth::Eight => 1, BitDepth::Ten => 2, }; - let y_plane_size_bytes = (width * height) as usize * bytes_per_sample; + // For YUV444, align Y plane size to 4 bytes so the UV plane buffer offset + // meets VkBufferImageCopy::bufferOffset alignment requirements. + // YUV420 dimensions are always even (required for 4:2:0), so alignment is + // naturally satisfied and must not pad to stay consistent with upload functions. + let y_plane_bytes = (width * height) as usize * bytes_per_sample; + let y_plane_size_bytes = match self.pixel_format { + PixelFormat::Yuv444 => crate::align4(y_plane_bytes) as vk::DeviceSize, + _ => y_plane_bytes as vk::DeviceSize, + }; unsafe { device.reset_command_buffer(self.command_buffer, vk::CommandBufferResetFlags::empty()) @@ -676,7 +699,7 @@ impl InputImage { }), // UV plane (interleaved, half resolution). vk::BufferImageCopy::default() - .buffer_offset(y_plane_size_bytes as vk::DeviceSize) + .buffer_offset(y_plane_size_bytes) .buffer_row_length(0) .buffer_image_height(0) .image_subresource(vk::ImageSubresourceLayers { @@ -715,7 +738,7 @@ impl InputImage { }), // UV plane (interleaved, full resolution for 444). vk::BufferImageCopy::default() - .buffer_offset(y_plane_size_bytes as vk::DeviceSize) + .buffer_offset(y_plane_size_bytes) .buffer_row_length(0) .buffer_image_height(0) .image_subresource(vk::ImageSubresourceLayers { diff --git a/src/lib.rs b/src/lib.rs index dbe1ffa..7dfbb45 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -186,6 +186,14 @@ pub mod error; pub mod image; pub mod vulkan; +/// Align a byte size up to a multiple of 4. +/// +/// Required for `VkBufferImageCopy::bufferOffset` to meet the texel block alignment +/// of multi-component plane formats (e.g. R8G8, R16G16). +pub(crate) const fn align4(size: usize) -> usize { + (size + 3) & !3 +} + pub use converter::{ColorConverter, ColorConverterConfig, ColorSpace, InputFormat, OutputFormat}; pub use encoder::{ BitDepth as EncodeBitDepth, Codec, ColorDescription, EncodeConfig, EncodedPacket, Encoder,