Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 47 additions & 33 deletions src/converter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,33 +85,43 @@ pub enum OutputFormat {
NV12,
/// I420: Y plane, U plane, V plane (4:2:0), 8-bit.
I420,
/// YUV444: Full resolution Y, U, V planes, 8-bit.
/// YUV444 8-bit: 2-plane semi-planar (Y plane + interleaved UV) at full resolution.
YUV444,
/// P010: Y plane followed by interleaved UV (4:2:0), 10-bit in 16-bit words.
P010,
/// YUV444 10-bit: Full resolution Y, U, V in 16-bit words.
/// YUV444 10-bit: 2-plane semi-planar (Y plane + interleaved UV) in 16-bit words.
YUV444P10,
}

impl OutputFormat {
/// Calculate output size in bytes for given dimensions.
///
/// The returned size is always a multiple of 4, since the compute shader writes
/// to a `uint[]` buffer and `vkCmdFillBuffer` requires 4-byte aligned sizes.
pub fn output_size(&self, width: u32, height: u32) -> usize {
let pixel_count = (width * height) as usize;
match self {
let raw = match self {
OutputFormat::NV12 | OutputFormat::I420 => pixel_count * 3 / 2,
OutputFormat::YUV444 => pixel_count * 3,
OutputFormat::YUV444 => {
// Y plane (aligned to 4 bytes) + UV interleaved plane.
crate::align4(pixel_count) + pixel_count * 2
}
// 10-bit formats use 2 bytes per sample.
OutputFormat::P010 => pixel_count * 3, // Y (2 bytes) + UV (1 byte each, half res)
OutputFormat::YUV444P10 => pixel_count * 6, // Y + U + V, each 2 bytes.
}
OutputFormat::YUV444P10 => {
// Y plane (2 bytes/sample, aligned to 4 bytes) + UV interleaved (4 bytes/pixel).
crate::align4(pixel_count * 2) + pixel_count * 4
}
};
crate::align4(raw)
}

/// Get the Vulkan format for this output format.
pub fn vulkan_format(&self) -> vk::Format {
match self {
OutputFormat::NV12 => vk::Format::G8_B8R8_2PLANE_420_UNORM,
OutputFormat::I420 => vk::Format::G8_B8_R8_3PLANE_420_UNORM,
OutputFormat::YUV444 => vk::Format::G8_B8_R8_3PLANE_444_UNORM,
OutputFormat::YUV444 => vk::Format::G8_B8R8_2PLANE_444_UNORM,
OutputFormat::P010 => vk::Format::G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
OutputFormat::YUV444P10 => vk::Format::G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16,
}
Expand Down Expand Up @@ -150,6 +160,11 @@ pub struct ColorConverterConfig {
/// Full range (0-255 luma) or limited/studio range (16-235 luma).
/// Must match the `full_range` flag in `ColorDescription` for correct playback.
pub full_range: bool,
/// SDR reference white level in nits for the sRGB→BT.2020+PQ conversion.
///
/// Per ITU-R BT.2408 the standard value is 203 nits.
/// Only used when `color_space` is `SrgbToBt2020Pq`.
pub sdr_reference_white_nits: f32,
}

impl ColorConverterConfig {
Expand All @@ -167,6 +182,7 @@ impl ColorConverterConfig {
output_format,
color_space: ColorSpace::Bt709,
full_range: true,
sdr_reference_white_nits: 203.0,
}
}
}
Expand Down Expand Up @@ -231,6 +247,14 @@ impl ColorConverter {
self.config.color_space = color_space;
}

/// Set the full-range flag for subsequent conversions.
///
/// This takes effect on the next `convert()` call without recreating the pipeline,
/// since the full-range flag is passed via push constants.
pub fn set_full_range(&mut self, full_range: bool) {
self.config.full_range = full_range;
}

/// Build buffer-to-image copy regions for multi-planar YUV formats.
///
/// For multi-planar formats like NV12, I420, and YUV444, we need separate.
Expand Down Expand Up @@ -341,8 +365,10 @@ impl ColorConverter {
]
}
OutputFormat::YUV444 => {
// YUV444: Y, U, V planes all at full resolution.
let plane_size = (self.config.width * self.config.height) as u64;
// YUV444 8-bit 2-plane: Y plane at full resolution, UV interleaved at full resolution.
// Align Y plane size to 4 bytes for VkBufferImageCopy::bufferOffset compliance.
let y_size =
crate::align4((self.config.width * self.config.height) as usize) as u64;
vec![
// Y plane.
vk::BufferImageCopy {
Expand All @@ -362,9 +388,9 @@ impl ColorConverter {
depth: 1,
},
},
// U plane.
// UV plane (interleaved, full resolution).
vk::BufferImageCopy {
buffer_offset: plane_size,
buffer_offset: y_size,
buffer_row_length: 0,
buffer_image_height: 0,
image_subresource: vk::ImageSubresourceLayers {
Expand All @@ -380,24 +406,6 @@ impl ColorConverter {
depth: 1,
},
},
// V plane.
vk::BufferImageCopy {
buffer_offset: plane_size * 2,
buffer_row_length: 0,
buffer_image_height: 0,
image_subresource: vk::ImageSubresourceLayers {
aspect_mask: vk::ImageAspectFlags::PLANE_2,
mip_level: 0,
base_array_layer: 0,
layer_count: 1,
},
image_offset: vk::Offset3D { x: 0, y: 0, z: 0 },
image_extent: vk::Extent3D {
width: self.config.width,
height: self.config.height,
depth: 1,
},
},
]
}
OutputFormat::P010 => {
Expand Down Expand Up @@ -446,8 +454,9 @@ impl ColorConverter {
}
OutputFormat::YUV444P10 => {
// YUV444 10-bit: 2-plane format (Y plane, UV interleaved).
// Note: Using 2-plane format as that's what the encoder expects.
let y_size = (self.config.width * self.config.height * 2) as u64;
// Align Y plane size to 4 bytes for VkBufferImageCopy::bufferOffset compliance.
let y_size =
crate::align4((self.config.width * self.config.height * 2) as usize) as u64;
vec![
// Y plane (16-bit samples).
vk::BufferImageCopy {
Expand Down Expand Up @@ -628,14 +637,15 @@ impl ColorConverter {
&[],
);

// Push constants: width, height, input_format, output_format, color_space, full_range.
let push_constants: [u32; 6] = [
// Push constants: width, height, input_format, output_format, color_space, full_range, sdr_white_nits.
let push_constants: [u32; 7] = [
self.config.width,
self.config.height,
self.config.input_format as u32,
self.config.output_format as u32,
self.config.color_space as u32,
self.config.full_range as u32,
self.config.sdr_reference_white_nits.to_bits(),
];
let push_constants_bytes: &[u8] = std::slice::from_raw_parts(
push_constants.as_ptr() as *const u8,
Expand Down Expand Up @@ -989,6 +999,7 @@ mod tests {
output_format: OutputFormat::NV12,
color_space: ColorSpace::Bt709,
full_range: true,
sdr_reference_white_nits: 203.0,
};

let cloned = config.clone();
Expand All @@ -1009,6 +1020,7 @@ mod tests {
output_format: OutputFormat::I420,
color_space: ColorSpace::Bt709,
full_range: true,
sdr_reference_white_nits: 203.0,
};

let debug_str = format!("{:?}", config);
Expand Down Expand Up @@ -1050,6 +1062,7 @@ mod tests {
output_format: OutputFormat::NV12,
color_space: ColorSpace::Bt709,
full_range: true,
sdr_reference_white_nits: 203.0,
};

let result = ColorConverter::new(context, config);
Expand Down Expand Up @@ -1084,6 +1097,7 @@ mod tests {
output_format: *output_format,
color_space: ColorSpace::Bt709,
full_range: true,
sdr_reference_white_nits: 203.0,
};

let result = ColorConverter::new(context.clone(), config);
Expand Down
2 changes: 1 addition & 1 deletion src/converter/pipeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ pub fn create_converter(
let push_constant_range = vk::PushConstantRange::default()
.stage_flags(vk::ShaderStageFlags::COMPUTE)
.offset(0)
.size(24); // 6 x u32: width, height, input_format, output_format, color_space, full_range
.size(28); // 7 x u32: width, height, input_format, output_format, color_space, full_range, sdr_white_nits(f32)

let pipeline_layout_info = vk::PipelineLayoutCreateInfo::default()
.set_layouts(std::slice::from_ref(&descriptor_set_layout))
Expand Down
25 changes: 14 additions & 11 deletions src/converter/shader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ layout(push_constant) uniform PushConstants {
uint output_format; // 0=NV12, 1=I420, 2=YUV444, 3=P010, 4=YUV444P10
uint color_space; // 0=BT.709, 1=BT.2020, 2=sRGB→BT.2020+PQ
uint full_range; // 0=limited/studio range, 1=full range
float sdr_white_nits; // SDR reference white (nits), used for sRGB→BT.2020+PQ
} params;

// Source image sampled directly — eliminates the image-to-buffer copy.
Expand Down Expand Up @@ -129,8 +130,9 @@ vec3 read_rgb(ivec2 coord) {
// sRGB→BT.2020+PQ: decode sRGB gamma → linear BT.709 → BT.2020 gamut → PQ.
vec3 linear_709 = srgb_to_linear(rgba.rgb);
vec3 linear_2020 = bt709_to_bt2020(linear_709);
// SDR reference white at 203 nits (ITU-R BT.2408) → normalize to PQ's 10000 nit scale.
return linear_to_pq(linear_2020 * (203.0 / 10000.0));
// SDR reference white (configurable, default 203 nits per ITU-R BT.2408)
// normalized to PQ's 10000 nit scale.
return linear_to_pq(linear_2020 * (params.sdr_white_nits / 10000.0));
}
// BT.709 or BT.2020 passthrough: values are already properly encoded.
return rgba.rgb;
Expand Down Expand Up @@ -198,20 +200,20 @@ void main() {
uint pixel_count = params.width * params.height;

if (params.output_format == 2u) {
// YUV444 8-bit: Full resolution, byte-packed into uints.
// YUV444 8-bit: 2-plane semi-planar (Y plane + UV interleaved).
uint y_byte_idx = pixel_idx;
uint y_word_idx = y_byte_idx / 4u;
uint y_byte_offset = y_byte_idx % 4u;
atomicOr(output_data[y_word_idx], q8_y(yuv.x) << (y_byte_offset * 8u));

uint u_base = pixel_count;
uint u_byte_idx = u_base + pixel_idx;
uint u_word_idx = u_byte_idx / 4u;
uint u_byte_offset = u_byte_idx % 4u;
atomicOr(output_data[u_word_idx], q8_c(yuv.y) << (u_byte_offset * 8u));
// Align UV plane offset to 4 bytes for VkBufferImageCopy compliance.
uint uv_base = (pixel_count + 3u) & ~3u;
uint uv_byte_idx = uv_base + pixel_idx * 2u;
uint uv_word_idx = uv_byte_idx / 4u;
uint uv_byte_offset = uv_byte_idx % 4u;
atomicOr(output_data[uv_word_idx], q8_c(yuv.y) << (uv_byte_offset * 8u));

uint v_base = 2u * pixel_count;
uint v_byte_idx = v_base + pixel_idx;
uint v_byte_idx = uv_byte_idx + 1u;
uint v_word_idx = v_byte_idx / 4u;
uint v_byte_offset = v_byte_idx % 4u;
atomicOr(output_data[v_word_idx], q8_c(yuv.z) << (v_byte_offset * 8u));
Expand All @@ -221,7 +223,8 @@ void main() {
uint y_packed_idx = pixel_idx / 2u;
atomicOr(output_data[y_packed_idx], q10_y(yuv.x) << (y_half_offset * 16u));

uint uv_base_words = pixel_count / 2u;
// Align UV base to 4 bytes: Y plane is ceil(pixel_count/2) words.
uint uv_base_words = (pixel_count + 1u) / 2u;
uint uv_word_idx = uv_base_words + pixel_idx;
uint uv_packed = q10_c(yuv.y) | (q10_c(yuv.z) << 16u);
output_data[uv_word_idx] = uv_packed;
Expand Down
51 changes: 50 additions & 1 deletion src/encoder/av1/api.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::AV1Encoder;

use crate::encoder::gop::{GopFrameType, GopPosition};
use crate::encoder::EncodedPacket;
use crate::encoder::{ColorDescription, EncodedPacket};
use crate::error::{PixelForgeError, Result};
use ash::vk;
use tracing::debug;
Expand Down Expand Up @@ -207,4 +207,53 @@ impl AV1Encoder {
}
}
}

/// Update the color description in the AV1 sequence header.
///
/// This recreates the video session parameters with a new sequence header
/// containing the updated color configuration. The next encoded frame will
/// be a key frame with the new sequence header prepended.
pub fn set_color_description(&mut self, desc: ColorDescription) -> Result<()> {
// Wait for any in-flight encode to complete before modifying session params.
// Do NOT reset the fence here — submit_encode_and_read_bitstream() resets it
// before queue_submit. Leaving the fence signaled allows consecutive
// set_color_description() calls without deadlock.
unsafe {
self.context
.device()
.wait_for_fences(&[self.encode_fence], true, u64::MAX)
.map_err(|e| {
PixelForgeError::Synchronization(format!(
"Failed to wait for encode fence: {:?}",
e
))
})?;
}

// Save old handle so we can destroy it after successful creation.
let old_session_params = self.session_params;

let new_session_params = self.create_session_params(&desc)?;

// Destroy old session parameters now that the new ones are created.
unsafe {
self.video_queue_fn
.destroy_video_session_parameters(old_session_params, None);
}

self.session_params = new_session_params;
self.config.color_description = Some(desc);
self.header_data = None; // Invalidate cached sequence header
self.gop.request_idr();

debug!(
"AV1 color description updated: primaries={}, transfer={}, matrix={}, full_range={}",
desc.color_primaries,
desc.transfer_characteristics,
desc.matrix_coefficients,
desc.full_range
);

Ok(())
}
}
Loading
Loading