From c955216d5a5c336c9be65b661469fa80a4581285 Mon Sep 17 00:00:00 2001 From: n4n5 Date: Wed, 17 Dec 2025 20:26:01 -0700 Subject: [PATCH 01/13] git push iterable zip --- src/read.rs | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/src/read.rs b/src/read.rs index f45fb1908..dcc52401c 100644 --- a/src/read.rs +++ b/src/read.rs @@ -110,6 +110,111 @@ pub(crate) mod zip_archive { } } +/// Iterable version of ZipArchive +pub struct IterableZipArchive { + #[allow(unused)] + pub(crate) config: Config, + pub(crate) iterable_shared: IterableShared, +} +impl IterableZipArchive { + /// Try to create a new zip archive + pub fn try_new(reader: R) -> ZipResult> { + Self::with_config(Default::default(), reader) + } + + /// Read the central header + fn read_central_header( + dir_info: CentralDirectoryInfo, + reader: R, + ) -> ZipResult> { + // If the parsed number of files is greater than the offset then + // something fishy is going on and we shouldn't trust number_of_files. + let file_capacity = if dir_info.number_of_files > dir_info.directory_start as usize { + 0 + } else { + dir_info.number_of_files + }; + + if dir_info.disk_number != dir_info.disk_with_central_directory { + return unsupported_zip_error("Support for multi-disk files is not implemented"); + } + + if file_capacity.saturating_mul(size_of::()) > isize::MAX as usize { + return unsupported_zip_error("Oversized central directory"); + } + + IterableShared::try_new(reader, dir_info) + } + + fn with_config(config: Config, mut reader: R) -> ZipResult> { + let file_len = reader.seek(io::SeekFrom::End(0))?; + let mut end_exclusive = file_len; + let mut last_err = None; + + let dir_info = loop { + let cde = match spec::find_central_directory( + &mut reader, + config.archive_offset, + end_exclusive, + file_len, + ) { + Ok(cde) => cde, + Err(e) => return Err(last_err.unwrap_or(e)), + }; + + match CentralDirectoryInfo::try_from(&cde) { + Ok(info) => break info, + Err(e) => { + last_err = Some(e); + end_exclusive = cde.eocd.position; + } + } + }; + + let iterable_shared = Self::read_central_header(dir_info, reader)?; + + Ok(IterableZipArchive { + config, + iterable_shared, + }) + } + + /// Get the file as an iterator + pub fn files(&mut self) -> &mut IterableShared { + &mut self.iterable_shared + } +} + +/// Iterable Files +#[derive(Debug)] +pub struct IterableShared { + reader: R, + dir_info: CentralDirectoryInfo, +} + +impl IterableShared { + /// Try to create an iterable of files + pub(crate) fn try_new(mut reader: R, dir_info: CentralDirectoryInfo) -> ZipResult { + reader.seek(SeekFrom::Start(dir_info.directory_start))?; + Ok(Self { reader, dir_info }) + } +} + +impl Iterator for IterableShared { + // We can refer to this type using Self::Item + type Item = ZipFileData; + + // Here, we define the sequence using `.curr` and `.next`. + // The return type is `Option`: + // * When the `Iterator` is finished, `None` is returned. + // * Otherwise, the next value is wrapped in `Some` and returned. + // We use Self::Item in the return type, so we can change + // the type without having to update the function signatures. + fn next(&mut self) -> Option { + central_header_to_zip_file(&mut self.reader, &self.dir_info).ok() + } +} + #[cfg(feature = "aes-crypto")] use crate::aes::PWD_VERIFY_LENGTH; use crate::extra_fields::UnicodeExtraField; From 6cd92f6149c9a93075fcbab89468f3177657a1f9 Mon Sep 17 00:00:00 2001 From: n4n5 Date: Wed, 17 Dec 2025 20:32:06 -0700 Subject: [PATCH 02/13] check number of files --- src/read.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/read.rs b/src/read.rs index dcc52401c..6be400778 100644 --- a/src/read.rs +++ b/src/read.rs @@ -190,19 +190,24 @@ impl IterableZipArchive { pub struct IterableShared { reader: R, dir_info: CentralDirectoryInfo, + current_file: usize, } impl IterableShared { /// Try to create an iterable of files pub(crate) fn try_new(mut reader: R, dir_info: CentralDirectoryInfo) -> ZipResult { reader.seek(SeekFrom::Start(dir_info.directory_start))?; - Ok(Self { reader, dir_info }) + Ok(Self { + reader, + dir_info, + current_file: 0, + }) } } impl Iterator for IterableShared { // We can refer to this type using Self::Item - type Item = ZipFileData; + type Item = ZipResult; // Here, we define the sequence using `.curr` and `.next`. // The return type is `Option`: @@ -211,7 +216,11 @@ impl Iterator for IterableShared { // We use Self::Item in the return type, so we can change // the type without having to update the function signatures. fn next(&mut self) -> Option { - central_header_to_zip_file(&mut self.reader, &self.dir_info).ok() + if self.current_file >= self.dir_info.number_of_files { + return None; + } + self.current_file += 1; + Some(central_header_to_zip_file(&mut self.reader, &self.dir_info)) } } From ff180cad77a6acf5aec42fbd51557897a2d8ce0c Mon Sep 17 00:00:00 2001 From: n4n5 Date: Wed, 17 Dec 2025 20:35:26 -0700 Subject: [PATCH 03/13] cleanup --- src/read.rs | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/src/read.rs b/src/read.rs index 6be400778..d0a5e3a60 100644 --- a/src/read.rs +++ b/src/read.rs @@ -129,20 +129,14 @@ impl IterableZipArchive { ) -> ZipResult> { // If the parsed number of files is greater than the offset then // something fishy is going on and we shouldn't trust number_of_files. - let file_capacity = if dir_info.number_of_files > dir_info.directory_start as usize { - 0 - } else { - dir_info.number_of_files - }; + if dir_info.number_of_files > dir_info.directory_start as usize { + return unsupported_zip_error("Fishy error :)"); + } if dir_info.disk_number != dir_info.disk_with_central_directory { return unsupported_zip_error("Support for multi-disk files is not implemented"); } - if file_capacity.saturating_mul(size_of::()) > isize::MAX as usize { - return unsupported_zip_error("Oversized central directory"); - } - IterableShared::try_new(reader, dir_info) } @@ -206,15 +200,8 @@ impl IterableShared { } impl Iterator for IterableShared { - // We can refer to this type using Self::Item type Item = ZipResult; - // Here, we define the sequence using `.curr` and `.next`. - // The return type is `Option`: - // * When the `Iterator` is finished, `None` is returned. - // * Otherwise, the next value is wrapped in `Some` and returned. - // We use Self::Item in the return type, so we can change - // the type without having to update the function signatures. fn next(&mut self) -> Option { if self.current_file >= self.dir_info.number_of_files { return None; From 53d36f90c0b871cae65126d231103ae71447041b Mon Sep 17 00:00:00 2001 From: n4n5 Date: Wed, 7 Jan 2026 19:14:02 -0700 Subject: [PATCH 04/13] sync --- src/read.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/read.rs b/src/read.rs index d0a5e3a60..569b45756 100644 --- a/src/read.rs +++ b/src/read.rs @@ -118,8 +118,8 @@ pub struct IterableZipArchive { } impl IterableZipArchive { /// Try to create a new zip archive - pub fn try_new(reader: R) -> ZipResult> { - Self::with_config(Default::default(), reader) + pub fn try_new(reader: R, config: Config) -> ZipResult> { + Self::with_config(config, reader) } /// Read the central header From c8dd1d6523f94cfb23e8f20d8ad1ad33e882f728 Mon Sep 17 00:00:00 2001 From: n4n5 Date: Sun, 11 Jan 2026 16:35:44 -0700 Subject: [PATCH 05/13] refactor --- benches/read_entry.rs | 35 ++++++++++++++- src/read.rs | 99 +++++++++++++++++++++++++++++++------------ 2 files changed, 104 insertions(+), 30 deletions(-) diff --git a/benches/read_entry.rs b/benches/read_entry.rs index 8fd4ef3a5..927ae6e4f 100644 --- a/benches/read_entry.rs +++ b/benches/read_entry.rs @@ -3,7 +3,7 @@ use bencher::{benchmark_group, benchmark_main}; use std::io::{Cursor, Read, Write}; use bencher::Bencher; -use zip::{write::SimpleFileOptions, ZipArchive, ZipWriter}; +use zip::{read::Config, write::SimpleFileOptions, ZipArchive, ZipWriter}; fn generate_random_archive(size: usize) -> Vec { let data = Vec::new(); @@ -37,5 +37,36 @@ fn read_entry(bench: &mut Bencher) { bench.bytes = size as u64; } -benchmark_group!(benches, read_entry); +fn read_entry_iterable(bench: &mut Bencher) { + use zip::read::IterableZipArchive; + let size = 1024 * 1024; + let bytes = generate_random_archive(size); + let mut archive = + IterableZipArchive::try_new(Cursor::new(bytes.as_slice()), Config::default()).unwrap(); + + bench.iter(|| { + let file = archive + .files() + .unwrap() + .find(|f| { + let file = f.as_ref().unwrap(); + let filename = &*file.file_name; + filename == "random.dat" + }) + .unwrap() + .unwrap(); + let mut buf = [0u8; 1024]; + let mut file_reader = archive.by_file_data(&file, Default::default()).unwrap(); + loop { + let n = file_reader.read(&mut buf).unwrap(); + if n == 0 { + break; + } + } + }); + + bench.bytes = size as u64; +} + +benchmark_group!(benches, read_entry, read_entry_iterable); benchmark_main!(benches); diff --git a/src/read.rs b/src/read.rs index 2e4106a97..0991e9b64 100644 --- a/src/read.rs +++ b/src/read.rs @@ -122,30 +122,12 @@ impl IterableZipArchive { Self::with_config(config, reader) } - /// Read the central header - fn read_central_header( - dir_info: CentralDirectoryInfo, - reader: R, - ) -> ZipResult> { - // If the parsed number of files is greater than the offset then - // something fishy is going on and we shouldn't trust number_of_files. - if dir_info.number_of_files > dir_info.directory_start as usize { - return unsupported_zip_error("Fishy error :)"); - } - - if dir_info.disk_number != dir_info.disk_with_central_directory { - return unsupported_zip_error("Support for multi-disk files is not implemented"); - } - - IterableShared::try_new(reader, dir_info) - } - fn with_config(config: Config, mut reader: R) -> ZipResult> { let file_len = reader.seek(io::SeekFrom::End(0))?; let mut end_exclusive = file_len; let mut last_err = None; - let dir_info = loop { + let central_directory = loop { let cde = match spec::find_central_directory( &mut reader, config.archive_offset, @@ -165,7 +147,17 @@ impl IterableZipArchive { } }; - let iterable_shared = Self::read_central_header(dir_info, reader)?; + // If the parsed number of files is greater than the offset then + // something fishy is going on and we shouldn't trust number_of_files. + if central_directory.number_of_files > central_directory.directory_start as usize { + return unsupported_zip_error("Fishy error :)"); + } + + if central_directory.disk_number != central_directory.disk_with_central_directory { + return unsupported_zip_error("Support for multi-disk files is not implemented"); + } + + let iterable_shared = IterableShared::try_new(reader, central_directory)?; Ok(IterableZipArchive { config, @@ -174,8 +166,46 @@ impl IterableZipArchive { } /// Get the file as an iterator - pub fn files(&mut self) -> &mut IterableShared { - &mut self.iterable_shared + pub fn files(&mut self) -> ZipResult<&mut IterableShared> { + self.iterable_shared.reset()?; + Ok(&mut self.iterable_shared) + } + + /// Get a contained file by index with options. + pub fn by_file_data<'data>( + &'data mut self, + data: &'data ZipFileData, + mut options: ZipReadOptions<'_>, + ) -> ZipResult> { + if options.ignore_encryption_flag { + // Always use no password when we're ignoring the encryption flag. + options.password = None; + } else { + // Require and use the password only if the file is encrypted. + match (options.password, data.encrypted) { + (None, true) => { + return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)) + } + // Password supplied, but none needed! Discard. + (Some(_), false) => options.password = None, + _ => {} + } + } + let limit_reader = find_content(data, &mut self.iterable_shared.reader)?; + + let crypto_reader = + make_crypto_reader(data, limit_reader, options.password, data.aes_mode)?; + + Ok(ZipFile { + data: Cow::Borrowed(data), + reader: make_reader( + data.compression_method, + data.uncompressed_size, + data.crc32, + crypto_reader, + data.flags, + )?, + }) } } @@ -183,31 +213,44 @@ impl IterableZipArchive { #[derive(Debug)] pub struct IterableShared { reader: R, - dir_info: CentralDirectoryInfo, + central_directory: CentralDirectoryInfo, current_file: usize, } impl IterableShared { /// Try to create an iterable of files - pub(crate) fn try_new(mut reader: R, dir_info: CentralDirectoryInfo) -> ZipResult { - reader.seek(SeekFrom::Start(dir_info.directory_start))?; + pub(crate) fn try_new( + mut reader: R, + central_directory: CentralDirectoryInfo, + ) -> ZipResult { + reader.seek(SeekFrom::Start(central_directory.directory_start))?; Ok(Self { reader, - dir_info, + central_directory, current_file: 0, }) } + + pub(crate) fn reset(&mut self) -> ZipResult<()> { + self.current_file = 0; + self.reader + .seek(SeekFrom::Start(self.central_directory.directory_start))?; + Ok(()) + } } impl Iterator for IterableShared { type Item = ZipResult; fn next(&mut self) -> Option { - if self.current_file >= self.dir_info.number_of_files { + if self.current_file >= self.central_directory.number_of_files { return None; } self.current_file += 1; - Some(central_header_to_zip_file(&mut self.reader, &self.dir_info)) + Some(central_header_to_zip_file( + &mut self.reader, + &self.central_directory, + )) } } From f4cff0d4de0dc176c25287a4954e523bfd6c765d Mon Sep 17 00:00:00 2001 From: n4n5 Date: Tue, 20 Jan 2026 18:14:39 -0700 Subject: [PATCH 06/13] mv to new file - clean --- benches/read_entry.rs | 4 +- src/read.rs | 148 +---------------------------------- src/read/iterable_zip.rs | 162 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+), 146 deletions(-) create mode 100644 src/read/iterable_zip.rs diff --git a/benches/read_entry.rs b/benches/read_entry.rs index 927ae6e4f..57ac927ec 100644 --- a/benches/read_entry.rs +++ b/benches/read_entry.rs @@ -38,11 +38,11 @@ fn read_entry(bench: &mut Bencher) { } fn read_entry_iterable(bench: &mut Bencher) { - use zip::read::IterableZipArchive; + use zip::read::IterableZip; let size = 1024 * 1024; let bytes = generate_random_archive(size); let mut archive = - IterableZipArchive::try_new(Cursor::new(bytes.as_slice()), Config::default()).unwrap(); + IterableZip::try_new(Cursor::new(bytes.as_slice()), Config::default()).unwrap(); bench.iter(|| { let file = archive diff --git a/src/read.rs b/src/read.rs index 9869b4cc8..56b447dc2 100644 --- a/src/read.rs +++ b/src/read.rs @@ -34,6 +34,10 @@ pub(crate) mod stream; pub(crate) mod magic_finder; +/// Iterable zip +pub mod iterable_zip; +pub use iterable_zip::IterableZip; + /// Immutable metadata about a `ZipArchive`. #[derive(Debug)] pub struct ZipArchiveMetadata { @@ -111,150 +115,6 @@ pub(crate) mod zip_archive { } } -/// Iterable version of ZipArchive -pub struct IterableZipArchive { - #[allow(unused)] - pub(crate) config: Config, - pub(crate) iterable_shared: IterableShared, -} -impl IterableZipArchive { - /// Try to create a new zip archive - pub fn try_new(reader: R, config: Config) -> ZipResult> { - Self::with_config(config, reader) - } - - fn with_config(config: Config, mut reader: R) -> ZipResult> { - let file_len = reader.seek(io::SeekFrom::End(0))?; - let mut end_exclusive = file_len; - let mut last_err = None; - - let central_directory = loop { - let cde = match spec::find_central_directory( - &mut reader, - config.archive_offset, - end_exclusive, - file_len, - ) { - Ok(cde) => cde, - Err(e) => return Err(last_err.unwrap_or(e)), - }; - - match CentralDirectoryInfo::try_from(&cde) { - Ok(info) => break info, - Err(e) => { - last_err = Some(e); - end_exclusive = cde.eocd.position; - } - } - }; - - // If the parsed number of files is greater than the offset then - // something fishy is going on and we shouldn't trust number_of_files. - if central_directory.number_of_files > central_directory.directory_start as usize { - return unsupported_zip_error("Fishy error :)"); - } - - if central_directory.disk_number != central_directory.disk_with_central_directory { - return unsupported_zip_error("Support for multi-disk files is not implemented"); - } - - let iterable_shared = IterableShared::try_new(reader, central_directory)?; - - Ok(IterableZipArchive { - config, - iterable_shared, - }) - } - - /// Get the file as an iterator - pub fn files(&mut self) -> ZipResult<&mut IterableShared> { - self.iterable_shared.reset()?; - Ok(&mut self.iterable_shared) - } - - /// Get a contained file by index with options. - pub fn by_file_data<'data>( - &'data mut self, - data: &'data ZipFileData, - mut options: ZipReadOptions<'_>, - ) -> ZipResult> { - if options.ignore_encryption_flag { - // Always use no password when we're ignoring the encryption flag. - options.password = None; - } else { - // Require and use the password only if the file is encrypted. - match (options.password, data.encrypted) { - (None, true) => { - return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)) - } - // Password supplied, but none needed! Discard. - (Some(_), false) => options.password = None, - _ => {} - } - } - let limit_reader = find_content(data, &mut self.iterable_shared.reader)?; - - let crypto_reader = - make_crypto_reader(data, limit_reader, options.password, data.aes_mode)?; - - Ok(ZipFile { - data: Cow::Borrowed(data), - reader: make_reader( - data.compression_method, - data.uncompressed_size, - data.crc32, - crypto_reader, - data.flags, - )?, - }) - } -} - -/// Iterable Files -#[derive(Debug)] -pub struct IterableShared { - reader: R, - central_directory: CentralDirectoryInfo, - current_file: usize, -} - -impl IterableShared { - /// Try to create an iterable of files - pub(crate) fn try_new( - mut reader: R, - central_directory: CentralDirectoryInfo, - ) -> ZipResult { - reader.seek(SeekFrom::Start(central_directory.directory_start))?; - Ok(Self { - reader, - central_directory, - current_file: 0, - }) - } - - pub(crate) fn reset(&mut self) -> ZipResult<()> { - self.current_file = 0; - self.reader - .seek(SeekFrom::Start(self.central_directory.directory_start))?; - Ok(()) - } -} - -impl Iterator for IterableShared { - type Item = ZipResult; - - fn next(&mut self) -> Option { - if self.current_file >= self.central_directory.number_of_files { - return None; - } - self.current_file += 1; - Some(central_header_to_zip_file( - &mut self.reader, - &self.central_directory, - )) - } -} - #[cfg(feature = "aes-crypto")] use crate::aes::PWD_VERIFY_LENGTH; use crate::extra_fields::UnicodeExtraField; diff --git a/src/read/iterable_zip.rs b/src/read/iterable_zip.rs new file mode 100644 index 000000000..7b1141736 --- /dev/null +++ b/src/read/iterable_zip.rs @@ -0,0 +1,162 @@ +//! Iterable zip reader + +use std::{ + borrow::Cow, + io::{Read, Seek, SeekFrom}, +}; + +use crate::{ + read::{ + central_header_to_zip_file, find_content, make_crypto_reader, make_reader, + unsupported_zip_error, CentralDirectoryInfo, Config, ZipFile, + }, + result::{ZipError, ZipResult}, + spec, + types::ZipFileData, + ZipReadOptions, +}; + +/// Iterable version of ZipArchive +pub struct IterableZip { + #[allow(unused)] + pub(crate) config: Config, + pub(crate) iterable_files: IterableZipFiles, +} +impl IterableZip { + /// Try to create a new zip archive + pub fn try_new(reader: R, config: Config) -> ZipResult> { + Self::with_config(config, reader) + } + + fn with_config(config: Config, mut reader: R) -> ZipResult> { + let file_len = reader.seek(SeekFrom::End(0))?; + let mut end_exclusive = file_len; + let mut last_err = None; + + let central_directory = loop { + let cde = match spec::find_central_directory( + &mut reader, + config.archive_offset, + end_exclusive, + file_len, + ) { + Ok(cde) => cde, + Err(e) => return Err(last_err.unwrap_or(e)), + }; + + match CentralDirectoryInfo::try_from(&cde) { + Ok(info) => break info, + Err(e) => { + last_err = Some(e); + end_exclusive = cde.eocd.position; + } + } + }; + + // If the parsed number of files is greater than the offset then + // something fishy is going on and we shouldn't trust number_of_files. + if central_directory.number_of_files > central_directory.directory_start as usize { + return unsupported_zip_error("Fishy error :)"); + } + + if central_directory.disk_number != central_directory.disk_with_central_directory { + return unsupported_zip_error("Support for multi-disk files is not implemented"); + } + + let iterable_shared = IterableZipFiles::try_new(reader, central_directory)?; + + Ok(IterableZip { + config, + iterable_files: iterable_shared, + }) + } + + /// Get the file as an iterator + pub fn files(&mut self) -> ZipResult<&mut IterableZipFiles> { + self.iterable_files.reset()?; + Ok(&mut self.iterable_files) + } + + /// Get a contained file by index with options. + pub fn by_file_data<'data>( + &'data mut self, + data: &'data ZipFileData, + mut options: ZipReadOptions<'_>, + ) -> ZipResult> { + if options.ignore_encryption_flag { + // Always use no password when we're ignoring the encryption flag. + options.password = None; + } else { + // Require and use the password only if the file is encrypted. + match (options.password, data.encrypted) { + (None, true) => { + return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)) + } + // Password supplied, but none needed! Discard. + (Some(_), false) => options.password = None, + _ => {} + } + } + let limit_reader = find_content(data, &mut self.iterable_files.reader)?; + + let crypto_reader = + make_crypto_reader(data, limit_reader, options.password, data.aes_mode)?; + + Ok(ZipFile { + data: Cow::Borrowed(data), + reader: make_reader( + data.compression_method, + data.uncompressed_size, + data.crc32, + crypto_reader, + #[cfg(feature = "legacy-zip")] + data.flags, + )?, + }) + } +} + +/// Iterable Files +#[derive(Debug)] +pub struct IterableZipFiles { + reader: R, + central_directory: CentralDirectoryInfo, + current_file: usize, +} + +impl IterableZipFiles { + /// Try to create an iterable of files + pub(crate) fn try_new( + mut reader: R, + central_directory: CentralDirectoryInfo, + ) -> ZipResult { + reader.seek(SeekFrom::Start(central_directory.directory_start))?; + Ok(Self { + reader, + central_directory, + current_file: 0, + }) + } + + pub(crate) fn reset(&mut self) -> ZipResult<()> { + self.current_file = 0; + self.reader + .seek(SeekFrom::Start(self.central_directory.directory_start))?; + Ok(()) + } +} + +impl Iterator for IterableZipFiles { + type Item = ZipResult; + + fn next(&mut self) -> Option { + if self.current_file >= self.central_directory.number_of_files { + return None; + } + self.current_file += 1; + Some(central_header_to_zip_file( + &mut self.reader, + &self.central_directory, + )) + } +} From 0ce50f541b879dd0ffa46c85b7973ff9ea8a5b3d Mon Sep 17 00:00:00 2001 From: n4n5 Date: Tue, 20 Jan 2026 18:15:48 -0700 Subject: [PATCH 07/13] clean --- benches/read_entry.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benches/read_entry.rs b/benches/read_entry.rs index 57ac927ec..c936ed458 100644 --- a/benches/read_entry.rs +++ b/benches/read_entry.rs @@ -3,7 +3,7 @@ use bencher::{benchmark_group, benchmark_main}; use std::io::{Cursor, Read, Write}; use bencher::Bencher; -use zip::{read::Config, write::SimpleFileOptions, ZipArchive, ZipWriter}; +use zip::{write::SimpleFileOptions, ZipArchive, ZipWriter}; fn generate_random_archive(size: usize) -> Vec { let data = Vec::new(); @@ -38,6 +38,7 @@ fn read_entry(bench: &mut Bencher) { } fn read_entry_iterable(bench: &mut Bencher) { + use zip::read::Config; use zip::read::IterableZip; let size = 1024 * 1024; let bytes = generate_random_archive(size); From e92d2cfbad0b3ff6721d9bb54ea5508120c21114 Mon Sep 17 00:00:00 2001 From: n4n5 Date: Sun, 8 Mar 2026 13:44:25 -0600 Subject: [PATCH 08/13] cargo fmt --- src/read/iterable_zip.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/read/iterable_zip.rs b/src/read/iterable_zip.rs index 7b1141736..5e593a0e5 100644 --- a/src/read/iterable_zip.rs +++ b/src/read/iterable_zip.rs @@ -6,14 +6,14 @@ use std::{ }; use crate::{ + ZipReadOptions, read::{ - central_header_to_zip_file, find_content, make_crypto_reader, make_reader, - unsupported_zip_error, CentralDirectoryInfo, Config, ZipFile, + CentralDirectoryInfo, Config, ZipFile, central_header_to_zip_file, find_content, + make_crypto_reader, make_reader, unsupported_zip_error, }, result::{ZipError, ZipResult}, spec, types::ZipFileData, - ZipReadOptions, }; /// Iterable version of ZipArchive @@ -90,7 +90,7 @@ impl IterableZip { // Require and use the password only if the file is encrypted. match (options.password, data.encrypted) { (None, true) => { - return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)) + return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)); } // Password supplied, but none needed! Discard. (Some(_), false) => options.password = None, From 96b1396f6bf434ac41fca7ce5efdccb3237fa353 Mon Sep 17 00:00:00 2001 From: n4n5 Date: Sun, 8 Mar 2026 18:22:21 -0600 Subject: [PATCH 09/13] add some changes --- benches/read_entry.rs | 14 +++-- src/read/iterable_zip.rs | 131 ++++++++++++++++++++++++++++++++++----- 2 files changed, 125 insertions(+), 20 deletions(-) diff --git a/benches/read_entry.rs b/benches/read_entry.rs index e031d7f32..7036dddfe 100644 --- a/benches/read_entry.rs +++ b/benches/read_entry.rs @@ -25,7 +25,7 @@ fn read_entry(bench: &mut Bencher) { let size = 1024 * 1024; let bytes = generate_random_archive(size) .expect("Failed to create a random archive for the bench read_entry()"); - let mut archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); + let mut archive = ZipArchive::new(Cursor::new(&bytes)).unwrap(); bench.iter(|| { let mut file = archive.by_name("random.dat").unwrap(); @@ -45,9 +45,10 @@ fn read_entry_iterable(bench: &mut Bencher) { use zip::read::Config; use zip::read::IterableZip; let size = 1024 * 1024; - let bytes = generate_random_archive(size); - let mut archive = - IterableZip::try_new(Cursor::new(bytes.as_slice()), Config::default()).unwrap(); + let bytes = generate_random_archive(size) + .expect("Failed to create a random archive for the bench read_entry()"); + let mut reader = Cursor::new(&bytes); + let mut archive = IterableZip::try_new(reader.clone(), Config::default()).unwrap(); bench.iter(|| { let file = archive @@ -55,13 +56,14 @@ fn read_entry_iterable(bench: &mut Bencher) { .unwrap() .find(|f| { let file = f.as_ref().unwrap(); - let filename = &*file.file_name; + let filename = file.file_name().unwrap(); filename == "random.dat" }) .unwrap() .unwrap(); let mut buf = [0u8; 1024]; - let mut file_reader = archive.by_file_data(&file, Default::default()).unwrap(); + let zip_data = &file.into_zip_file_data(&mut reader).unwrap(); + let mut file_reader = archive.by_file_data(&zip_data, Default::default()).unwrap(); loop { let n = file_reader.read(&mut buf).unwrap(); if n == 0 { diff --git a/src/read/iterable_zip.rs b/src/read/iterable_zip.rs index 5e593a0e5..a8e1e8c27 100644 --- a/src/read/iterable_zip.rs +++ b/src/read/iterable_zip.rs @@ -1,19 +1,21 @@ //! Iterable zip reader -use std::{ - borrow::Cow, - io::{Read, Seek, SeekFrom}, -}; - +use crate::cp437::FromCp437; +use crate::read::central_header_to_zip_file_inner; +use crate::spec::ZipFlags; use crate::{ ZipReadOptions, read::{ - CentralDirectoryInfo, Config, ZipFile, central_header_to_zip_file, find_content, - make_crypto_reader, make_reader, unsupported_zip_error, + CentralDirectoryInfo, Config, ZipFile, find_content, make_crypto_reader, make_reader, + read_variable_length_byte_field, unsupported_zip_error, }, result::{ZipError, ZipResult}, - spec, - types::ZipFileData, + spec::{self, FixedSizeBlock}, + types::{ZipCentralEntryBlock, ZipFileData}, +}; +use std::{ + borrow::Cow, + io::{Read, Seek, SeekFrom}, }; /// Iterable version of ZipArchive @@ -144,19 +146,120 @@ impl IterableZipFiles { .seek(SeekFrom::Start(self.central_directory.directory_start))?; Ok(()) } + + pub(crate) fn parse_entry(&mut self) -> ZipResult { + let central_header_start = self.reader.stream_position()?; + + // Parse central header + let block = ZipCentralEntryBlock::parse(&mut self.reader)?; + let variable_data = + ZipCentralEntryVariableDataRaw::try_from_reader(&mut self.reader, &block)?; + let file = ZipEntry::new( + self.central_directory.archive_offset, + block, + variable_data, + central_header_start, + ); + let central_header_end = self.reader.stream_position()?; + + self.reader.seek(SeekFrom::Start(central_header_end))?; + Ok(file) + } } impl Iterator for IterableZipFiles { - type Item = ZipResult; + type Item = ZipResult; fn next(&mut self) -> Option { if self.current_file >= self.central_directory.number_of_files { return None; } self.current_file += 1; - Some(central_header_to_zip_file( - &mut self.reader, - &self.central_directory, - )) + let file = self.parse_entry(); + Some(file) + } +} + +#[derive(Debug, Clone)] +pub(crate) struct ZipCentralEntryVariableDataRaw { + file_name: Box<[u8]>, + extra_fields: Box<[u8]>, + file_comment: Box<[u8]>, +} + +impl ZipCentralEntryVariableDataRaw { + fn try_from_reader(reader: &mut R, block: &ZipCentralEntryBlock) -> ZipResult { + let file_name_raw = + read_variable_length_byte_field(reader, block.file_name_length as usize)?; + let extra_field = + read_variable_length_byte_field(reader, block.extra_field_length as usize)?; + let file_comment_raw = + read_variable_length_byte_field(reader, block.file_comment_length as usize)?; + Ok(Self { + file_name: file_name_raw, + extra_fields: extra_field, + file_comment: file_comment_raw, + }) + } +} + +/// A Zip entry +#[non_exhaustive] +#[derive(Debug, Clone)] +pub struct ZipEntry { + archive_offset: u64, + central_block: ZipCentralEntryBlock, + variable_data: ZipCentralEntryVariableDataRaw, + central_block_start: u64, +} + +impl ZipEntry { + pub(crate) fn new( + archive_offset: u64, + central_block: ZipCentralEntryBlock, + variable_data: ZipCentralEntryVariableDataRaw, + start_offset: u64, + ) -> Self { + Self { + archive_offset, + central_block, + variable_data, + central_block_start: start_offset, + } + } + /// Check if the entry have the UTF-8 encoding flag + pub fn is_utf8(&self) -> bool { + // TODO + self.central_block.flags & (ZipFlags::LanguageEncoding as u16) != 0 + } + + /// Get file name + pub fn file_name(&self) -> ZipResult> { + let file_name_raw = self.file_name_raw(); + // TODO + let file_name = if self.is_utf8() { + String::from_utf8_lossy(file_name_raw) + } else { + file_name_raw.from_cp437()? + }; + Ok(file_name) + } + + /// Get raw file name + pub fn file_name_raw(&self) -> &[u8] { + &self.variable_data.file_name + } + + /// TODO convert into zip_file + pub fn into_zip_file_data( + self, + reader: &mut R, + ) -> ZipResult { + central_header_to_zip_file_inner( + reader, + self.archive_offset, + self.central_block_start, + self.central_block, + ) } } From 40f3352b2d35de3ad60b3155e082ff24397290d8 Mon Sep 17 00:00:00 2001 From: n4n5 Date: Sat, 21 Mar 2026 09:44:11 -0600 Subject: [PATCH 10/13] add benchmark --- Cargo.toml | 4 +++ benches/zip_file_list.rs | 70 ++++++++++++++++++++++++++++++++++++++++ src/read/iterable_zip.rs | 14 ++++---- 3 files changed, 81 insertions(+), 7 deletions(-) create mode 100644 benches/zip_file_list.rs diff --git a/Cargo.toml b/Cargo.toml index 1f1deae87..0a2de1c80 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -133,3 +133,7 @@ harness = false [[bench]] name = "merge_archive" harness = false + +[[bench]] +name = "zip_file_list" +harness = false diff --git a/benches/zip_file_list.rs b/benches/zip_file_list.rs new file mode 100644 index 000000000..971e3f6bc --- /dev/null +++ b/benches/zip_file_list.rs @@ -0,0 +1,70 @@ +use bencher::{benchmark_group, benchmark_main}; + +use std::io::{Cursor, Read, Write}; + +use bencher::Bencher; +use zip::{ZipArchive, ZipWriter, write::SimpleFileOptions}; + +const NB_FILES: usize = 100; +const FILENAME: &str = "bench_file_listing.zip"; + +fn generate_random_archive(size: usize) -> Result, std::io::Error> { + let data = Vec::new(); + let mut writer = ZipWriter::new(Cursor::new(data)); + let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + for count in 0..NB_FILES { + writer.start_file(format!("random_{}.dat", count), options)?; + let mut bytes = vec![0u8; size]; + getrandom::fill(&mut bytes) + .map_err(|e| std::io::Error::other(format!("getrandom error: {}", e)))?; + writer.write_all(&bytes)?; + } + let w = writer.finish()?; + + Ok(w.into_inner()) +} + +fn generate_random_archive_to_file(size: usize) -> Result<(), std::io::Error> { + use std::fs::File; + + let bytes = generate_random_archive(size)?; + let mut file = File::create(FILENAME)?; + file.write_all(&bytes)?; + Ok(()) +} + +fn file_listing(bench: &mut Bencher) { + let size = 1024 * 1024; + let bytes = generate_random_archive(size) + .expect("Failed to create a random archive for the bench read_entry()"); + + bench.iter(|| { + let mut archive = ZipArchive::new(Cursor::new(&bytes)).unwrap(); + let mut names = vec![]; + for idx in 0..archive.len() { + let file = archive.by_index(idx).unwrap(); + names.push(file.name().to_string()); + } + }); +} + +fn file_listing_iterable(bench: &mut Bencher) { + use zip::read::Config; + use zip::read::IterableZip; + let size = 1024 * 1024; + let bytes = generate_random_archive(size) + .expect("Failed to create a random archive for the bench read_entry()"); + + bench.iter(|| { + let mut reader = Cursor::new(&bytes); + let mut archive = IterableZip::try_new(reader.clone(), Config::default()).unwrap(); + let mut names = vec![]; + for file in archive.files().unwrap() { + let file = file.unwrap(); + names.push(file.file_name().unwrap().to_string()); + } + }); +} + +benchmark_group!(benches, file_listing, file_listing_iterable); +benchmark_main!(benches); diff --git a/src/read/iterable_zip.rs b/src/read/iterable_zip.rs index a8e1e8c27..01d551b50 100644 --- a/src/read/iterable_zip.rs +++ b/src/read/iterable_zip.rs @@ -2,16 +2,16 @@ use crate::cp437::FromCp437; use crate::read::central_header_to_zip_file_inner; -use crate::spec::ZipFlags; +use crate::spec::{FixedSizeBlock, ZipCentralEntryBlock, ZipFlags}; use crate::{ ZipReadOptions, read::{ CentralDirectoryInfo, Config, ZipFile, find_content, make_crypto_reader, make_reader, - read_variable_length_byte_field, unsupported_zip_error, + read_variable_length_byte_field }, result::{ZipError, ZipResult}, - spec::{self, FixedSizeBlock}, - types::{ZipCentralEntryBlock, ZipFileData}, + spec, + types::ZipFileData, }; use std::{ borrow::Cow, @@ -58,11 +58,11 @@ impl IterableZip { // If the parsed number of files is greater than the offset then // something fishy is going on and we shouldn't trust number_of_files. if central_directory.number_of_files > central_directory.directory_start as usize { - return unsupported_zip_error("Fishy error :)"); + return Err(ZipError::UnsupportedArchive("Fishy error :)")); } if central_directory.disk_number != central_directory.disk_with_central_directory { - return unsupported_zip_error("Support for multi-disk files is not implemented"); + return Err(ZipError::UnsupportedArchive("Support for multi-disk files is not implemented")); } let iterable_shared = IterableZipFiles::try_new(reader, central_directory)?; @@ -109,7 +109,7 @@ impl IterableZip { reader: make_reader( data.compression_method, data.uncompressed_size, - data.crc32, + Some(data.crc32), crypto_reader, #[cfg(feature = "legacy-zip")] data.flags, From e47044368325ca4a51093d224316ef148e56c859 Mon Sep 17 00:00:00 2001 From: n4n5 Date: Sat, 21 Mar 2026 09:45:30 -0600 Subject: [PATCH 11/13] cargo fmt --- src/read/iterable_zip.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/read/iterable_zip.rs b/src/read/iterable_zip.rs index 01d551b50..9764327c5 100644 --- a/src/read/iterable_zip.rs +++ b/src/read/iterable_zip.rs @@ -7,7 +7,7 @@ use crate::{ ZipReadOptions, read::{ CentralDirectoryInfo, Config, ZipFile, find_content, make_crypto_reader, make_reader, - read_variable_length_byte_field + read_variable_length_byte_field, }, result::{ZipError, ZipResult}, spec, @@ -62,7 +62,9 @@ impl IterableZip { } if central_directory.disk_number != central_directory.disk_with_central_directory { - return Err(ZipError::UnsupportedArchive("Support for multi-disk files is not implemented")); + return Err(ZipError::UnsupportedArchive( + "Support for multi-disk files is not implemented", + )); } let iterable_shared = IterableZipFiles::try_new(reader, central_directory)?; From caed92fdced07113a670a76499a7b830ad531e58 Mon Sep 17 00:00:00 2001 From: n4n5 Date: Sat, 21 Mar 2026 11:22:58 -0600 Subject: [PATCH 12/13] move to unstable --- benches/read_entry.rs | 2 +- benches/zip_file_list.rs | 57 +++++++++++++++++++++++++++++++++++----- src/read.rs | 6 ++--- src/read/iterable_zip.rs | 17 ++++++++++-- src/unstable.rs | 6 +++++ 5 files changed, 74 insertions(+), 14 deletions(-) diff --git a/benches/read_entry.rs b/benches/read_entry.rs index 7036dddfe..d2ef9f9d5 100644 --- a/benches/read_entry.rs +++ b/benches/read_entry.rs @@ -43,7 +43,7 @@ fn read_entry(bench: &mut Bencher) { fn read_entry_iterable(bench: &mut Bencher) { use zip::read::Config; - use zip::read::IterableZip; + use zip::unstable::read::IterableZip; let size = 1024 * 1024; let bytes = generate_random_archive(size) .expect("Failed to create a random archive for the bench read_entry()"); diff --git a/benches/zip_file_list.rs b/benches/zip_file_list.rs index 971e3f6bc..a57445603 100644 --- a/benches/zip_file_list.rs +++ b/benches/zip_file_list.rs @@ -1,6 +1,6 @@ use bencher::{benchmark_group, benchmark_main}; -use std::io::{Cursor, Read, Write}; +use std::io::{Cursor, Write}; use bencher::Bencher; use zip::{ZipArchive, ZipWriter, write::SimpleFileOptions}; @@ -33,7 +33,7 @@ fn generate_random_archive_to_file(size: usize) -> Result<(), std::io::Error> { Ok(()) } -fn file_listing(bench: &mut Bencher) { +fn file_listing_memory(bench: &mut Bencher) { let size = 1024 * 1024; let bytes = generate_random_archive(size) .expect("Failed to create a random archive for the bench read_entry()"); @@ -48,16 +48,53 @@ fn file_listing(bench: &mut Bencher) { }); } -fn file_listing_iterable(bench: &mut Bencher) { +fn file_listing_file(bench: &mut Bencher) { + use std::fs::File; + + let size = 1024 * 1024; + generate_random_archive_to_file(size) + .expect("Failed to create a random archive for the bench read_entry()"); + + bench.iter(|| { + let file = File::open(FILENAME).unwrap(); + let mut archive = ZipArchive::new(file).unwrap(); + let mut names = vec![]; + for idx in 0..archive.len() { + let file = archive.by_index(idx).unwrap(); + names.push(file.name().to_string()); + } + }); +} + +fn file_listing_iterable_memory(bench: &mut Bencher) { use zip::read::Config; - use zip::read::IterableZip; + use zip::unstable::read::IterableZip; let size = 1024 * 1024; let bytes = generate_random_archive(size) .expect("Failed to create a random archive for the bench read_entry()"); bench.iter(|| { - let mut reader = Cursor::new(&bytes); - let mut archive = IterableZip::try_new(reader.clone(), Config::default()).unwrap(); + let mut archive = IterableZip::try_new(Cursor::new(&bytes), Config::default()).unwrap(); + let mut names = vec![]; + for file in archive.files().unwrap() { + let file = file.unwrap(); + names.push(file.file_name().unwrap().to_string()); + } + }); +} + +fn file_listing_iterable_file(bench: &mut Bencher) { + use std::fs::File; + use zip::read::Config; + use zip::unstable::read::IterableZip; + + let size = 1024 * 1024; + generate_random_archive_to_file(size) + .expect("Failed to create a random archive for the bench read_entry()"); + + bench.iter(|| { + let file = File::open(FILENAME).unwrap(); + let mut archive = IterableZip::try_new(file, Config::default()).unwrap(); let mut names = vec![]; for file in archive.files().unwrap() { let file = file.unwrap(); @@ -66,5 +103,11 @@ fn file_listing_iterable(bench: &mut Bencher) { }); } -benchmark_group!(benches, file_listing, file_listing_iterable); +benchmark_group!( + benches, + file_listing_memory, + file_listing_iterable_memory, + file_listing_file, + file_listing_iterable_file +); benchmark_main!(benches); diff --git a/src/read.rs b/src/read.rs index 54dfedabd..a5ee204d0 100644 --- a/src/read.rs +++ b/src/read.rs @@ -33,11 +33,9 @@ pub(crate) mod stream; pub use stream::read_zipfile_from_stream; pub use stream::read_zipfile_from_stream_with_compressed_size; -pub(crate) mod magic_finder; +pub(crate) mod iterable_zip; -/// Iterable zip -pub mod iterable_zip; -pub use iterable_zip::IterableZip; +pub(crate) mod magic_finder; pub use zip_archive::ZipArchive; diff --git a/src/read/iterable_zip.rs b/src/read/iterable_zip.rs index 9764327c5..e691dfafa 100644 --- a/src/read/iterable_zip.rs +++ b/src/read/iterable_zip.rs @@ -230,9 +230,9 @@ impl ZipEntry { } } /// Check if the entry have the UTF-8 encoding flag + #[must_use] pub fn is_utf8(&self) -> bool { - // TODO - self.central_block.flags & (ZipFlags::LanguageEncoding as u16) != 0 + ZipFlags::matching(self.central_block.flags, ZipFlags::LanguageEncoding) } /// Get file name @@ -248,10 +248,23 @@ impl ZipEntry { } /// Get raw file name + #[must_use] pub fn file_name_raw(&self) -> &[u8] { &self.variable_data.file_name } + /// Get raw extra fields + #[must_use] + pub fn extra_fields(&self) -> &[u8] { + &self.variable_data.extra_fields + } + + /// Get raw comment + #[must_use] + pub fn comment(&self) -> &[u8] { + &self.variable_data.file_comment + } + /// TODO convert into zip_file pub fn into_zip_file_data( self, diff --git a/src/unstable.rs b/src/unstable.rs index 33cc4235f..97cc05a92 100644 --- a/src/unstable.rs +++ b/src/unstable.rs @@ -9,6 +9,12 @@ use std::path::{Component, MAIN_SEPARATOR, Path}; pub mod stream { pub use crate::read::stream::{ZipStreamFileMetadata, ZipStreamReader, ZipStreamVisitor}; } + +/// Iterable zip +pub mod read { + pub use crate::read::iterable_zip::IterableZip; +} + /// Types for creating ZIP archives. pub mod write { use crate::result::{ZipError, ZipResult}; From e52c920c520dd20ff06a23c8dab3722f88c12d42 Mon Sep 17 00:00:00 2001 From: n4n5 Date: Sat, 21 Mar 2026 11:26:16 -0600 Subject: [PATCH 13/13] add cleanup bench --- benches/zip_file_list.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/benches/zip_file_list.rs b/benches/zip_file_list.rs index a57445603..3c2ab971e 100644 --- a/benches/zip_file_list.rs +++ b/benches/zip_file_list.rs @@ -64,6 +64,8 @@ fn file_listing_file(bench: &mut Bencher) { names.push(file.name().to_string()); } }); + + std::fs::remove_file(FILENAME).unwrap(); } fn file_listing_iterable_memory(bench: &mut Bencher) { @@ -101,6 +103,7 @@ fn file_listing_iterable_file(bench: &mut Bencher) { names.push(file.file_name().unwrap().to_string()); } }); + std::fs::remove_file(FILENAME).unwrap(); } benchmark_group!(