Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions codeview/src/types/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ impl<'a> TypesIter<'a> {
pub fn new(buffer: &'a [u8]) -> Self {
Self { buffer }
}

/// Returns the "rest" of the data that has not been parsed.
pub fn rest(&self) -> &'a [u8] {
self.buffer
}
}

impl<'a> HasRestLen for TypesIter<'a> {
Expand All @@ -27,6 +32,11 @@ impl<'a> HasRestLen for TypesIter<'a> {
impl<'a> Iterator for TypesIter<'a> {
type Item = TypeRecord<'a>;

/// Finds the next type record
///
/// This implementation makes an important guarantee: If it cannot decode the next record,
/// it _will not_ change `self.buffer`. This is important because it allows an application
/// to detect the exact length and contents of an unparseable record.
fn next(&mut self) -> Option<TypeRecord<'a>> {
if self.buffer.is_empty() {
return None;
Expand Down
9 changes: 9 additions & 0 deletions msf/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,15 @@ impl<F> Msf<F> {
page_to_offset(self.pages.num_pages, self.pages.page_size)
}

/// Return the total number of pages allocated to the file, including all pages (allocated,
/// unallocated, etc.).
///
/// This count includes pages allocated to streams, Page 0, FPM pages, pages that are free
/// (not allocated), and pages allocated to the Stream Directory.
pub fn num_total_pages(&self) -> u32 {
self.pages.num_pages
}

/// Returns the number of free pages.
///
/// This number counts the pages that are _less than_ `num_pages`. There may be pages assigned
Expand Down
2 changes: 1 addition & 1 deletion msfz/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "ms-pdb-msfz"
version = "0.1.9"
version = "0.1.10"
edition = "2024"
description = "Reads Compressed Multi-Stream Files, which is part of the Microsoft PDB file format"
authors = ["Arlie Davis <ardavis@microsoft.com>"]
Expand Down
7 changes: 6 additions & 1 deletion msfz/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ mod stream_data;
mod tests;
mod writer;

pub use reader::*;
pub use reader::{Fragment, FragmentLocation, Msfz, StreamReader};
pub use stream_data::StreamData;
pub use writer::*;

Expand Down Expand Up @@ -100,6 +100,11 @@ pub const COMPRESSION_ZSTD: u32 = 1;
/// This uses the "raw" Deflate stream. It _does not_ use the GZIP encapsulation header.
pub const COMPRESSION_DEFLATE: u32 = 2;

/// This is the maximum file offset where an uncompressed fragment be be stored.
///
/// The MSFZ specification provides 48 bits for storing the file offset of an uncompressed fragment.
pub const MAX_UNCOMPRESSED_FILE_OFFSET: u64 = (1u64 << 48) - 1;

/// Specifies the compression algorithms that are supported by this library.
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
#[non_exhaustive]
Expand Down
60 changes: 40 additions & 20 deletions msfz/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@ pub struct Msfz<F = RandomAccessFile> {
chunk_cache: Vec<OnceLock<Arc<[u8]>>>,
}

// Describes a region within a stream.
/// Describes a region within a stream.
#[derive(Clone)]
struct Fragment {
size: u32,
location: FragmentLocation,
pub struct Fragment {
/// The size in bytes of the fragment
pub size: u32,
/// The location of the fragment
pub location: FragmentLocation,
}

impl std::fmt::Debug for Fragment {
Expand All @@ -52,25 +54,29 @@ impl std::fmt::Debug for FragmentLocation {
} else if self.is_compressed() {
write!(
f,
"uncompressed at 0x{:06x}",
self.uncompressed_file_offset()
"chunk {} : 0x{:04x}",
self.compressed_first_chunk(),
self.compressed_offset_within_chunk()
)
} else {
write!(
f,
"chunk {} : 0x{:04x}",
self.compressed_first_chunk(),
self.compressed_offset_within_chunk()
"uncompressed at 0x{:06x}",
self.uncompressed_file_offset()
)
}
}
}

const FRAGMENT_LOCATION_32BIT_IS_COMPRESSED_MASK: u32 = 1u32 << 31;

/// A bit mask for the bits within a packed `FragmentLocation` which encode
/// the file offset of an uncompressed fragment.
const UNCOMPRESSED_FRAGMENT_FILE_OFFSET_MASK: u64 = (1u64 << 48) - 1;

/// Represents the location of a fragment, either compressed or uncompressed.
#[derive(Copy, Clone)]
struct FragmentLocation {
pub struct FragmentLocation {
/// bits 0-31
lo: u32,
/// bits 32-63
Expand All @@ -89,11 +95,15 @@ impl FragmentLocation {
self.lo == u32::MAX && self.hi == u32::MAX
}

fn is_compressed(&self) -> bool {
/// Returns `true` if this is a compressed fragment
pub fn is_compressed(&self) -> bool {
(self.hi & FRAGMENT_LOCATION_32BIT_IS_COMPRESSED_MASK) != 0
}

fn compressed_first_chunk(&self) -> u32 {
/// Returns the chunk index for this compressed fragment.
///
/// You must check `is_compressed()` before calling this function.
pub fn compressed_first_chunk(&self) -> u32 {
debug_assert!(!self.is_nil());
debug_assert!(self.is_compressed());
self.hi & !FRAGMENT_LOCATION_32BIT_IS_COMPRESSED_MASK
Expand All @@ -108,7 +118,7 @@ impl FragmentLocation {
fn uncompressed_file_offset(&self) -> u64 {
debug_assert!(!self.is_nil());
debug_assert!(!self.is_compressed());
((self.hi as u64) << 32) | (self.lo as u64)
(((self.hi as u64) << 32) | (self.lo as u64)) & UNCOMPRESSED_FRAGMENT_FILE_OFFSET_MASK
}
}

Expand Down Expand Up @@ -231,7 +241,7 @@ impl<F: ReadAt> Msfz<F> {
/// Gets the fragments for a given stream.
///
/// If `stream` is out of range, returns `None`.
fn stream_fragments(&self, stream: u32) -> Option<&[Fragment]> {
pub fn stream_fragments(&self, stream: u32) -> Option<&[Fragment]> {
let i = stream as usize;
if i < self.stream_fragments.len() - 1 {
let start = self.stream_fragments[i] as usize;
Expand Down Expand Up @@ -481,10 +491,20 @@ impl<F: ReadAt> Msfz<F> {
self.fragments.len()
}

/// Raw access to the Fragments table
pub fn fragments(&self) -> &[Fragment] {
&self.fragments
}

/// The total number of compressed chunks.
pub fn num_chunks(&self) -> usize {
self.chunk_table.len()
}

/// Raw access to the Chunks table
pub fn chunks(&self) -> &[ChunkEntry] {
&self.chunk_table
}
}

/// Allows reading a stream using the [`Read`], [`Seek`], and [`ReadAt`] traits.
Expand Down Expand Up @@ -544,7 +564,9 @@ impl<'a, F: ReadAt> ReadAt for StreamReader<'a, F> {
)?;
buf_xfer.copy_from_slice(chunk_slice);
} else {
// Read the stream data directly from disk.
// Read the stream data directly from disk. We don't validate the file offset or
// the length of the transfer. Instead, we just allow the underlying file system
// to report errors.
let file_offset = fragment.location.uncompressed_file_offset();
self.msfz
.file
Expand All @@ -555,11 +577,9 @@ impl<'a, F: ReadAt> ReadAt for StreamReader<'a, F> {
break;
}

if current_offset >= num_bytes_xfer as u64 {
current_offset -= num_bytes_xfer as u64;
} else {
current_offset = 0;
}
// Since we have finished reading from one chunk and there is more data to read in the
// next chunk, the "offset within chunk" becomes zero.
current_offset = 0;
}

Ok(original_buf_len - buf.len())
Expand Down
22 changes: 20 additions & 2 deletions msfz/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ use pow2::Pow2;
use std::fs::File;
use std::io::{Seek, SeekFrom, Write};
use std::path::Path;
use tracing::{debug, debug_span, trace, trace_span};
use tracing::{debug, debug_span, error, trace, trace_span};
use zerocopy::IntoBytes;

/// The default threshold for compressing a chunk of data.
pub const DEFAULT_CHUNK_THRESHOLD: u32 = 0x40_0000; // 4 MiB
pub const DEFAULT_CHUNK_THRESHOLD: u32 = 0x10_0000; // 1 MiB

/// The minimum value for the uncompressed chunk size threshold.
pub const MIN_CHUNK_SIZE: u32 = 0x1000;
Expand Down Expand Up @@ -359,6 +359,11 @@ impl<F: Write + Seek> MsfzWriterFile<F> {
})
}

fn bytes_available_in_chunk_buffer(&self) -> usize {
(self.uncompressed_chunk_size_threshold as usize)
.saturating_sub(self.uncompressed_chunk_data.len())
}

#[inline(never)]
fn finish_current_chunk(&mut self) -> std::io::Result<()> {
let _span = debug_span!("finish_current_chunk").entered();
Expand Down Expand Up @@ -459,6 +464,12 @@ impl<'a, F: Write + Seek> StreamWriter<'a, F> {
self.file.finish_current_chunk()
}

/// The number of bytes that can be written to the current chunk buffer, without exceeding
/// the configured maximum.
pub fn bytes_available_in_chunk_buffer(&self) -> usize {
self.file.bytes_available_in_chunk_buffer()
}

/// Specifies whether to use chunked compression or not. The default value for this setting is
/// `true` (chunked compression is enabled).
///
Expand Down Expand Up @@ -525,6 +536,13 @@ impl<'a, F: Write + Seek> Write for StreamWriter<'a, F> {
} else {
self.file.out.stream_position()?
};

// The MSFZ spec allocates 48 bytes for the file offset of uncompressed fragments.
if fragment_file_offset > MAX_UNCOMPRESSED_FILE_OFFSET {
error!("The uncompressed file fragment ");
return Err(std::io::ErrorKind::FileTooLarge.into());
};

self.file.out.write_all(buf)?;

add_fragment_uncompressed(&mut self.stream.fragments, buf_len, fragment_file_offset);
Expand Down
4 changes: 2 additions & 2 deletions pdb/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "ms-pdb"
version = "0.1.18"
version = "0.1.19"
edition = "2024"
description = "Reads Microsoft Program Database (PDB) files"
authors = ["Arlie Davis <ardavis@microsoft.com>"]
Expand Down Expand Up @@ -39,7 +39,7 @@ version = "0.1.6"
path = "../msf"

[dependencies.ms-pdb-msfz]
version = "0.1.9"
version = "0.1.10"
path = "../msfz"

[dev-dependencies]
Expand Down
4 changes: 2 additions & 2 deletions pdbtool/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "pdbtool"
version = "0.1.18"
version = "0.1.19"
edition = "2024"
description = "A tool for reading Program Database (PDB) files and displaying information about them."
authors = ["Arlie Davis <ardavis@microsoft.com>"]
Expand Down Expand Up @@ -34,5 +34,5 @@ version = "0.1.0"
path = "../coff"

[dependencies.ms-pdb]
version = "0.1.18"
version = "0.1.19"
path = "../pdb"
Loading