microsoft · sivadeilra · Feb 10, 2026 · Feb 4, 2026 · Feb 9, 2026 · Feb 10, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/codeview/src/types/iter.rs b/codeview/src/types/iter.rs
@@ -16,6 +16,11 @@ impl<'a> TypesIter<'a> {
     pub fn new(buffer: &'a [u8]) -> Self {
         Self { buffer }
     }
+
+    /// Returns the "rest" of the data that has not been parsed.
+    pub fn rest(&self) -> &'a [u8] {
+        self.buffer
+    }
 }
 
 impl<'a> HasRestLen for TypesIter<'a> {
@@ -27,6 +32,11 @@ impl<'a> HasRestLen for TypesIter<'a> {
 impl<'a> Iterator for TypesIter<'a> {
     type Item = TypeRecord<'a>;
 
+    /// Finds the next type record
+    ///
+    /// This implementation makes an important guarantee: If it cannot decode the next record,
+    /// it _will not_ change `self.buffer`. This is important because it allows an application
+    /// to detect the exact length and contents of an unparseable record.
     fn next(&mut self) -> Option<TypeRecord<'a>> {
         if self.buffer.is_empty() {
             return None;

diff --git a/msf/src/lib.rs b/msf/src/lib.rs
@@ -353,6 +353,15 @@ impl<F> Msf<F> {
         page_to_offset(self.pages.num_pages, self.pages.page_size)
     }
 
+    /// Return the total number of pages allocated to the file, including all pages (allocated,
+    /// unallocated, etc.).
+    ///
+    /// This count includes pages allocated to streams, Page 0, FPM pages, pages that are free
+    /// (not allocated), and pages allocated to the Stream Directory.
+    pub fn num_total_pages(&self) -> u32 {
+        self.pages.num_pages
+    }
+
     /// Returns the number of free pages.
     ///
     /// This number counts the pages that are _less than_ `num_pages`. There may be pages assigned

diff --git a/msfz/Cargo.toml b/msfz/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "ms-pdb-msfz"
-version = "0.1.9"
+version = "0.1.10"
 edition = "2024"
 description = "Reads Compressed Multi-Stream Files, which is part of the Microsoft PDB file format"
 authors = ["Arlie Davis <ardavis@microsoft.com>"]

diff --git a/msfz/src/lib.rs b/msfz/src/lib.rs
@@ -23,7 +23,7 @@ mod stream_data;
 mod tests;
 mod writer;
 
-pub use reader::*;
+pub use reader::{Fragment, FragmentLocation, Msfz, StreamReader};
 pub use stream_data::StreamData;
 pub use writer::*;
 
@@ -100,6 +100,11 @@ pub const COMPRESSION_ZSTD: u32 = 1;
 /// This uses the "raw" Deflate stream. It _does not_ use the GZIP encapsulation header.
 pub const COMPRESSION_DEFLATE: u32 = 2;
 
+/// This is the maximum file offset where an uncompressed fragment be be stored.
+///
+/// The MSFZ specification provides 48 bits for storing the file offset of an uncompressed fragment.
+pub const MAX_UNCOMPRESSED_FILE_OFFSET: u64 = (1u64 << 48) - 1;
+
 /// Specifies the compression algorithms that are supported by this library.
 #[derive(Copy, Clone, Eq, PartialEq, Debug)]
 #[non_exhaustive]

diff --git a/msfz/src/reader.rs b/msfz/src/reader.rs
@@ -32,11 +32,13 @@ pub struct Msfz<F = RandomAccessFile> {
     chunk_cache: Vec<OnceLock<Arc<[u8]>>>,
 }
 
-// Describes a region within a stream.
+/// Describes a region within a stream.
 #[derive(Clone)]
-struct Fragment {
-    size: u32,
-    location: FragmentLocation,
+pub struct Fragment {
+    /// The size in bytes of the fragment
+    pub size: u32,
+    /// The location of the fragment
+    pub location: FragmentLocation,
 }
 
 impl std::fmt::Debug for Fragment {
@@ -52,25 +54,29 @@ impl std::fmt::Debug for FragmentLocation {
         } else if self.is_compressed() {
             write!(
                 f,
-                "uncompressed at 0x{:06x}",
-                self.uncompressed_file_offset()
+                "chunk {} : 0x{:04x}",
+                self.compressed_first_chunk(),
+                self.compressed_offset_within_chunk()
             )
         } else {
             write!(
                 f,
-                "chunk {} : 0x{:04x}",
-                self.compressed_first_chunk(),
-                self.compressed_offset_within_chunk()
+                "uncompressed at 0x{:06x}",
+                self.uncompressed_file_offset()
             )
         }
     }
 }
 
 const FRAGMENT_LOCATION_32BIT_IS_COMPRESSED_MASK: u32 = 1u32 << 31;
 
+/// A bit mask for the bits within a packed `FragmentLocation` which encode
+/// the file offset of an uncompressed fragment.
+const UNCOMPRESSED_FRAGMENT_FILE_OFFSET_MASK: u64 = (1u64 << 48) - 1;
+
 /// Represents the location of a fragment, either compressed or uncompressed.
 #[derive(Copy, Clone)]
-struct FragmentLocation {
+pub struct FragmentLocation {
     /// bits 0-31
     lo: u32,
     /// bits 32-63
@@ -89,11 +95,15 @@ impl FragmentLocation {
         self.lo == u32::MAX && self.hi == u32::MAX
     }
 
-    fn is_compressed(&self) -> bool {
+    /// Returns `true` if this is a compressed fragment
+    pub fn is_compressed(&self) -> bool {
         (self.hi & FRAGMENT_LOCATION_32BIT_IS_COMPRESSED_MASK) != 0
     }
 
-    fn compressed_first_chunk(&self) -> u32 {
+    /// Returns the chunk index for this compressed fragment.
+    ///
+    /// You must check `is_compressed()` before calling this function.
+    pub fn compressed_first_chunk(&self) -> u32 {
         debug_assert!(!self.is_nil());
         debug_assert!(self.is_compressed());
         self.hi & !FRAGMENT_LOCATION_32BIT_IS_COMPRESSED_MASK
@@ -108,7 +118,7 @@ impl FragmentLocation {
     fn uncompressed_file_offset(&self) -> u64 {
         debug_assert!(!self.is_nil());
         debug_assert!(!self.is_compressed());
-        ((self.hi as u64) << 32) | (self.lo as u64)
+        (((self.hi as u64) << 32) | (self.lo as u64)) & UNCOMPRESSED_FRAGMENT_FILE_OFFSET_MASK
     }
 }
 
@@ -231,7 +241,7 @@ impl<F: ReadAt> Msfz<F> {
     /// Gets the fragments for a given stream.
     ///
     /// If `stream` is out of range, returns `None`.
-    fn stream_fragments(&self, stream: u32) -> Option<&[Fragment]> {
+    pub fn stream_fragments(&self, stream: u32) -> Option<&[Fragment]> {
         let i = stream as usize;
         if i < self.stream_fragments.len() - 1 {
             let start = self.stream_fragments[i] as usize;
@@ -481,10 +491,20 @@ impl<F: ReadAt> Msfz<F> {
         self.fragments.len()
     }
 
+    /// Raw access to the Fragments table
+    pub fn fragments(&self) -> &[Fragment] {
+        &self.fragments
+    }
+
     /// The total number of compressed chunks.
     pub fn num_chunks(&self) -> usize {
         self.chunk_table.len()
     }
+
+    /// Raw access to the Chunks table
+    pub fn chunks(&self) -> &[ChunkEntry] {
+        &self.chunk_table
+    }
 }
 
 /// Allows reading a stream using the [`Read`], [`Seek`], and [`ReadAt`] traits.
@@ -544,7 +564,9 @@ impl<'a, F: ReadAt> ReadAt for StreamReader<'a, F> {
                 )?;
                 buf_xfer.copy_from_slice(chunk_slice);
             } else {
-                // Read the stream data directly from disk.
+                // Read the stream data directly from disk.  We don't validate the file offset or
+                // the length of the transfer. Instead, we just allow the underlying file system
+                // to report errors.
                 let file_offset = fragment.location.uncompressed_file_offset();
                 self.msfz
                     .file
@@ -555,11 +577,9 @@ impl<'a, F: ReadAt> ReadAt for StreamReader<'a, F> {
                 break;
             }
 
-            if current_offset >= num_bytes_xfer as u64 {
-                current_offset -= num_bytes_xfer as u64;
-            } else {
-                current_offset = 0;
-            }
+            // Since we have finished reading from one chunk and there is more data to read in the
+            // next chunk, the "offset within chunk" becomes zero.
+            current_offset = 0;
         }
 
         Ok(original_buf_len - buf.len())

diff --git a/msfz/src/writer.rs b/msfz/src/writer.rs
@@ -4,11 +4,11 @@ use pow2::Pow2;
 use std::fs::File;
 use std::io::{Seek, SeekFrom, Write};
 use std::path::Path;
-use tracing::{debug, debug_span, trace, trace_span};
+use tracing::{debug, debug_span, error, trace, trace_span};
 use zerocopy::IntoBytes;
 
 /// The default threshold for compressing a chunk of data.
-pub const DEFAULT_CHUNK_THRESHOLD: u32 = 0x40_0000; // 4 MiB
+pub const DEFAULT_CHUNK_THRESHOLD: u32 = 0x10_0000; // 1 MiB
 
 /// The minimum value for the uncompressed chunk size threshold.
 pub const MIN_CHUNK_SIZE: u32 = 0x1000;
@@ -359,6 +359,11 @@ impl<F: Write + Seek> MsfzWriterFile<F> {
         })
     }
 
+    fn bytes_available_in_chunk_buffer(&self) -> usize {
+        (self.uncompressed_chunk_size_threshold as usize)
+            .saturating_sub(self.uncompressed_chunk_data.len())
+    }
+
     #[inline(never)]
     fn finish_current_chunk(&mut self) -> std::io::Result<()> {
         let _span = debug_span!("finish_current_chunk").entered();
@@ -459,6 +464,12 @@ impl<'a, F: Write + Seek> StreamWriter<'a, F> {
         self.file.finish_current_chunk()
     }
 
+    /// The number of bytes that can be written to the current chunk buffer, without exceeding
+    /// the configured maximum.
+    pub fn bytes_available_in_chunk_buffer(&self) -> usize {
+        self.file.bytes_available_in_chunk_buffer()
+    }
+
     /// Specifies whether to use chunked compression or not. The default value for this setting is
     /// `true` (chunked compression is enabled).
     ///
@@ -525,6 +536,13 @@ impl<'a, F: Write + Seek> Write for StreamWriter<'a, F> {
             } else {
                 self.file.out.stream_position()?
             };
+
+            // The MSFZ spec allocates 48 bytes for the file offset of uncompressed fragments.
+            if fragment_file_offset > MAX_UNCOMPRESSED_FILE_OFFSET {
+                error!("The uncompressed file fragment ");
+                return Err(std::io::ErrorKind::FileTooLarge.into());
+            };
+
             self.file.out.write_all(buf)?;
 
             add_fragment_uncompressed(&mut self.stream.fragments, buf_len, fragment_file_offset);

diff --git a/pdb/Cargo.toml b/pdb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "ms-pdb"
-version = "0.1.18"
+version = "0.1.19"
 edition = "2024"
 description = "Reads Microsoft Program Database (PDB) files"
 authors = ["Arlie Davis <ardavis@microsoft.com>"]
@@ -39,7 +39,7 @@ version = "0.1.6"
 path = "../msf"
 
 [dependencies.ms-pdb-msfz]
-version = "0.1.9"
+version = "0.1.10"
 path = "../msfz"
 
 [dev-dependencies]

diff --git a/pdbtool/Cargo.toml b/pdbtool/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "pdbtool"
-version = "0.1.18"
+version = "0.1.19"
 edition = "2024"
 description = "A tool for reading Program Database (PDB) files and displaying information about them."
 authors = ["Arlie Davis <ardavis@microsoft.com>"]
@@ -34,5 +34,5 @@ version = "0.1.0"
 path = "../coff"
 
 [dependencies.ms-pdb]
-version = "0.1.18"
+version = "0.1.19"
 path = "../pdb"