diff --git a/Cargo.toml b/Cargo.toml index 1f1deae87..59f3f8e16 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -130,6 +130,10 @@ harness = false name = "read_metadata" harness = false +[[bench]] +name = "read_many_entries" +harness = false + [[bench]] name = "merge_archive" harness = false diff --git a/benches/read_entry.rs b/benches/read_entry.rs index 88be9c8ab..f8f72cd59 100644 --- a/benches/read_entry.rs +++ b/benches/read_entry.rs @@ -25,9 +25,9 @@ fn read_entry(bench: &mut Bencher) { let size = 1024 * 1024; let bytes = generate_random_archive(size) .expect("Failed to create a random archive for the bench read_entry()"); - let mut archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); bench.iter(|| { + let mut archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); let mut file = archive.by_name("random.dat").unwrap(); let mut buf = [0u8; 1024]; loop { diff --git a/benches/read_many_entries.rs b/benches/read_many_entries.rs new file mode 100644 index 000000000..5c99d1389 --- /dev/null +++ b/benches/read_many_entries.rs @@ -0,0 +1,94 @@ +use bencher::{benchmark_group, benchmark_main}; + +use std::fs::File; +use std::io::{Cursor, Write}; + +use bencher::Bencher; +use zip::{ZipArchive, ZipWriter, write::SimpleFileOptions}; + +const NB_FILES: usize = 100; +const FILENAME: &str = "bench_read_many_entries.zip"; + +fn generate_random_archive(size: usize) -> Result, std::io::Error> { + let data = Vec::new(); + let mut writer = ZipWriter::new(Cursor::new(data)); + let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + for count in 0..NB_FILES { + writer.start_file(format!("random_{}.dat", count), options)?; + let mut bytes = vec![0u8; size]; + getrandom::fill(&mut bytes) + .map_err(|e| std::io::Error::other(format!("getrandom error: {}", e)))?; + writer.write_all(&bytes)?; + } + let w = writer.finish()?; + + Ok(w.into_inner()) +} + +fn generate_random_archive_to_file(size: usize) -> Result<(), std::io::Error> { + let bytes = generate_random_archive(size)?; + let mut file = File::create(FILENAME)?; + file.write_all(&bytes)?; + Ok(()) +} + +fn read_many_entries_as_file(bench: &mut Bencher) { + let size = 1024 * 1024; + generate_random_archive_to_file(size) + .expect("Failed to create a random archive for the bench read_entry()"); + + bench.iter(|| { + let file = File::open(FILENAME).unwrap(); + let mut archive = ZipArchive::new(file).unwrap(); + for idx in 0..archive.len() { + let mut file = archive.by_index(idx).unwrap(); + let _n = std::io::copy(&mut file, &mut std::io::sink()).unwrap(); + } + }); + + bench.bytes = (size * NB_FILES) as u64; + std::fs::remove_file(FILENAME).unwrap(); +} + +fn read_many_entries_as_file_buf(bench: &mut Bencher) { + let size = 1024 * 1024; + generate_random_archive_to_file(size) + .expect("Failed to create a random archive for the bench read_entry()"); + + bench.iter(|| { + let file = File::open(FILENAME).unwrap(); + let file = std::io::BufReader::new(file); + let mut archive = ZipArchive::new(file).unwrap(); + for idx in 0..archive.len() { + let mut file = archive.by_index(idx).unwrap(); + let _n = std::io::copy(&mut file, &mut std::io::sink()).unwrap(); + } + }); + + bench.bytes = (size * NB_FILES) as u64; + std::fs::remove_file(FILENAME).unwrap(); +} + +fn read_many_entries(bench: &mut Bencher) { + let size = 1024 * 1024; + let bytes = generate_random_archive(size) + .expect("Failed to create a random archive for the bench read_entry()"); + + bench.iter(|| { + let mut archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); + for idx in 0..archive.len() { + let mut file = archive.by_index(idx).unwrap(); + let _n = std::io::copy(&mut file, &mut std::io::sink()).unwrap(); + } + }); + + bench.bytes = (size * NB_FILES) as u64; +} + +benchmark_group!( + benches, + read_many_entries, + read_many_entries_as_file, + read_many_entries_as_file_buf +); +benchmark_main!(benches); diff --git a/src/read/stream.rs b/src/read/stream.rs index eecbacc90..9d3a237a1 100644 --- a/src/read/stream.rs +++ b/src/read/stream.rs @@ -236,15 +236,18 @@ pub fn read_zipfile_from_stream(reader: &mut R) -> ZipResult()]; + reader.read_exact(&mut magic_buf)?; - match block.magic().from_le() { + match Magic::from_le_bytes(magic_buf) { Magic::LOCAL_FILE_HEADER_SIGNATURE => (), Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None), _ => return Err(ZipLocalEntryBlock::WRONG_MAGIC_ERROR), } + let mut block = ZipLocalEntryBlock::zeroed(); + reader.read_exact(block.as_bytes_mut())?; + let block = block.from_le(); let mut result = ZipFileData::from_local_block(block, reader)?; @@ -284,15 +287,18 @@ pub fn read_zipfile_from_stream_with_compressed_size( reader: &mut R, compressed_size: u64, ) -> ZipResult>> { - let mut block = ZipLocalEntryBlock::zeroed(); - reader.read_exact(block.as_bytes_mut())?; + let mut magic_buf = [0; size_of::()]; + reader.read_exact(&mut magic_buf)?; - match block.magic().from_le() { + match Magic::from_le_bytes(magic_buf) { Magic::LOCAL_FILE_HEADER_SIGNATURE => (), Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None), _ => return Err(ZipLocalEntryBlock::WRONG_MAGIC_ERROR), } + let mut block = ZipLocalEntryBlock::zeroed(); + reader.read_exact(block.as_bytes_mut())?; + let block = block.from_le(); let mut result = ZipFileData::from_local_block(block, reader)?; diff --git a/src/spec.rs b/src/spec.rs index 8c7920527..cba5d7bef 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -33,6 +33,7 @@ impl Magic { } #[allow(clippy::wrong_self_convention)] + #[allow(unused)] #[inline(always)] pub fn from_le(self) -> Self { Self(u32::from_le(self.0)) @@ -203,11 +204,24 @@ pub(crate) unsafe trait Pod: Copy + 'static { } } -pub(crate) trait FixedSizeBlock: Pod { - type Magic: Copy + Eq; - const MAGIC: Self::Magic; +#[derive(Copy, Clone)] +#[repr(C, packed)] +struct BlockWithMagic { + magic: Magic, + inner: T, +} +unsafe impl Pod for BlockWithMagic {} - fn magic(self) -> Self::Magic; +impl BlockWithMagic { + fn to_le(mut self) -> Self { + self.magic = self.magic.to_le(); + self.inner = self.inner.to_le(); + self + } +} + +pub(crate) trait FixedSizeBlock: Pod { + const MAGIC: Magic; const WRONG_MAGIC_ERROR: ZipError; @@ -215,16 +229,21 @@ pub(crate) trait FixedSizeBlock: Pod { fn from_le(self) -> Self; fn parse(reader: &mut R) -> ZipResult { - let mut block = Self::zeroed(); - if let Err(e) = reader.read_exact(block.as_bytes_mut()) { + let mut block_with_magic = BlockWithMagic::zeroed(); + if let Err(e) = reader.read_exact(block_with_magic.as_bytes_mut()) { if e.kind() == io::ErrorKind::UnexpectedEof { return Err(invalid!("Unexpected end of {}", type_name::())); } return Err(e.into()); } + let BlockWithMagic { + magic, + inner: block, + } = block_with_magic; + let magic = Magic::from_le(magic); let block = Self::from_le(block); - if block.magic() != Self::MAGIC { + if magic != Self::MAGIC { return Err(Self::WRONG_MAGIC_ERROR); } Ok(block) @@ -233,7 +252,11 @@ pub(crate) trait FixedSizeBlock: Pod { fn to_le(self) -> Self; fn write(self, writer: &mut T) -> ZipResult<()> { - let block = self.to_le(); + let block = BlockWithMagic { + magic: Self::MAGIC, + inner: self, + }; + let block = block.to_le(); writer.write_all(block.as_bytes())?; Ok(()) } @@ -289,7 +312,6 @@ macro_rules! to_and_from_le { #[derive(Copy, Clone, Debug)] #[repr(packed, C)] pub(crate) struct ZipCentralEntryBlock { - pub(crate) magic: Magic, pub version_made_by: u16, pub version_to_extract: u16, pub flags: u16, @@ -311,18 +333,11 @@ pub(crate) struct ZipCentralEntryBlock { unsafe impl Pod for ZipCentralEntryBlock {} impl FixedSizeBlock for ZipCentralEntryBlock { - type Magic = Magic; const MAGIC: Magic = Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE; - #[inline(always)] - fn magic(self) -> Magic { - self.magic - } - const WRONG_MAGIC_ERROR: ZipError = invalid!("Invalid Central Directory header"); to_and_from_le![ - (magic, Magic), (version_made_by, u16), (version_to_extract, u16), (flags, u16), @@ -345,7 +360,6 @@ impl FixedSizeBlock for ZipCentralEntryBlock { #[derive(Copy, Clone, Debug)] #[repr(packed, C)] pub(crate) struct ZipLocalEntryBlock { - pub(crate) magic: Magic, pub version_made_by: u16, pub flags: u16, pub compression_method: u16, @@ -361,18 +375,11 @@ pub(crate) struct ZipLocalEntryBlock { unsafe impl Pod for ZipLocalEntryBlock {} impl FixedSizeBlock for ZipLocalEntryBlock { - type Magic = Magic; const MAGIC: Magic = Magic::LOCAL_FILE_HEADER_SIGNATURE; - #[inline(always)] - fn magic(self) -> Magic { - self.magic - } - const WRONG_MAGIC_ERROR: ZipError = invalid!("Invalid local file header"); to_and_from_le![ - (magic, Magic), (version_made_by, u16), (flags, u16), (compression_method, u16), @@ -389,7 +396,6 @@ impl FixedSizeBlock for ZipLocalEntryBlock { #[derive(Copy, Clone, Debug)] #[repr(packed, C)] pub(crate) struct ZipDataDescriptorBlock { - pub(crate) magic: Magic, pub crc32: u32, pub compressed_size: u32, pub uncompressed_size: u32, @@ -398,18 +404,11 @@ pub(crate) struct ZipDataDescriptorBlock { unsafe impl Pod for ZipDataDescriptorBlock {} impl FixedSizeBlock for ZipDataDescriptorBlock { - type Magic = Magic; const MAGIC: Magic = Magic::DATA_DESCRIPTOR_SIGNATURE; - #[inline(always)] - fn magic(self) -> Magic { - self.magic - } - const WRONG_MAGIC_ERROR: ZipError = invalid!("Invalid data descriptor header"); to_and_from_le![ - (magic, Magic), (crc32, u32), (compressed_size, u32), (uncompressed_size, u32), @@ -419,7 +418,6 @@ impl FixedSizeBlock for ZipDataDescriptorBlock { #[derive(Copy, Clone, Debug)] #[repr(packed, C)] pub(crate) struct Zip64DataDescriptorBlock { - pub(crate) magic: Magic, pub crc32: u32, pub compressed_size: u64, pub uncompressed_size: u64, @@ -428,18 +426,11 @@ pub(crate) struct Zip64DataDescriptorBlock { unsafe impl Pod for Zip64DataDescriptorBlock {} impl FixedSizeBlock for Zip64DataDescriptorBlock { - type Magic = Magic; const MAGIC: Magic = Magic::DATA_DESCRIPTOR_SIGNATURE; - #[inline] - fn magic(self) -> Magic { - self.magic - } - const WRONG_MAGIC_ERROR: ZipError = invalid!("Invalid zip64 data descriptor header"); to_and_from_le![ - (magic, Magic), (crc32, u32), (compressed_size, u64), (uncompressed_size, u64), @@ -449,7 +440,6 @@ impl FixedSizeBlock for Zip64DataDescriptorBlock { #[derive(Copy, Clone, Debug)] #[repr(packed, C)] pub(crate) struct Zip32CDEBlock { - magic: Magic, pub disk_number: u16, pub disk_with_central_directory: u16, pub number_of_files_on_this_disk: u16, @@ -462,18 +452,11 @@ pub(crate) struct Zip32CDEBlock { unsafe impl Pod for Zip32CDEBlock {} impl FixedSizeBlock for Zip32CDEBlock { - type Magic = Magic; const MAGIC: Magic = Magic::CENTRAL_DIRECTORY_END_SIGNATURE; - #[inline(always)] - fn magic(self) -> Magic { - self.magic - } - const WRONG_MAGIC_ERROR: ZipError = invalid!("Invalid digital signature header"); to_and_from_le![ - (magic, Magic), (disk_number, u16), (disk_with_central_directory, u16), (number_of_files_on_this_disk, u16), @@ -507,7 +490,6 @@ impl Zip32CentralDirectoryEnd { zip_file_comment, } = self; let block = Zip32CDEBlock { - magic: Zip32CDEBlock::MAGIC, disk_number, disk_with_central_directory, number_of_files_on_this_disk, @@ -522,7 +504,6 @@ impl Zip32CentralDirectoryEnd { pub fn parse(reader: &mut T) -> ZipResult { let Zip32CDEBlock { - // magic, disk_number, disk_with_central_directory, number_of_files_on_this_disk, @@ -573,7 +554,6 @@ impl Zip32CentralDirectoryEnd { #[derive(Copy, Clone)] #[repr(packed, C)] pub(crate) struct Zip64CDELocatorBlock { - magic: Magic, pub disk_with_central_directory: u32, pub end_of_central_directory_offset: u64, pub number_of_disks: u32, @@ -582,18 +562,11 @@ pub(crate) struct Zip64CDELocatorBlock { unsafe impl Pod for Zip64CDELocatorBlock {} impl FixedSizeBlock for Zip64CDELocatorBlock { - type Magic = Magic; const MAGIC: Magic = Magic::ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE; - #[inline(always)] - fn magic(self) -> Magic { - self.magic - } - const WRONG_MAGIC_ERROR: ZipError = invalid!("Invalid zip64 locator digital signature header"); to_and_from_le![ - (magic, Magic), (disk_with_central_directory, u32), (end_of_central_directory_offset, u64), (number_of_disks, u32), @@ -609,7 +582,6 @@ pub(crate) struct Zip64CentralDirectoryEndLocator { impl Zip64CentralDirectoryEndLocator { pub fn parse(reader: &mut T) -> ZipResult { let Zip64CDELocatorBlock { - // magic, disk_with_central_directory, end_of_central_directory_offset, number_of_disks, @@ -630,7 +602,6 @@ impl Zip64CentralDirectoryEndLocator { number_of_disks, } = self; Zip64CDELocatorBlock { - magic: Zip64CDELocatorBlock::MAGIC, disk_with_central_directory, end_of_central_directory_offset, number_of_disks, @@ -645,7 +616,6 @@ impl Zip64CentralDirectoryEndLocator { #[derive(Copy, Clone)] #[repr(packed, C)] pub(crate) struct Zip64CDEBlock { - magic: Magic, pub record_size: u64, pub version_made_by: u16, pub version_needed_to_extract: u16, @@ -660,17 +630,11 @@ pub(crate) struct Zip64CDEBlock { unsafe impl Pod for Zip64CDEBlock {} impl FixedSizeBlock for Zip64CDEBlock { - type Magic = Magic; const MAGIC: Magic = Magic::ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE; - fn magic(self) -> Magic { - self.magic - } - const WRONG_MAGIC_ERROR: ZipError = invalid!("Invalid digital signature header"); to_and_from_le![ - (magic, Magic), (record_size, u64), (version_made_by, u16), (version_needed_to_extract, u16), @@ -760,7 +724,6 @@ impl Zip64CentralDirectoryEnd { ( Zip64CDEBlock { - magic: Zip64CDEBlock::MAGIC, record_size, version_made_by, version_needed_to_extract, @@ -855,11 +818,14 @@ pub(crate) fn find_central_directory( reader: &mut (impl Read + Seek + ?Sized), eocd_offset: u64, ) -> ZipResult<(u64, Zip64CentralDirectoryEndLocator)> { - if eocd_offset < mem::size_of::() as u64 { + if eocd_offset + < (mem::size_of::() + mem::size_of::()) as u64 + { return Err(invalid!("EOCD64 Locator does not fit in file")); } - let locator64_offset = eocd_offset - mem::size_of::() as u64; + let locator64_offset = eocd_offset + - (mem::size_of::() + mem::size_of::()) as u64; reader.seek(io::SeekFrom::Start(locator64_offset))?; let locator64 = Zip64CentralDirectoryEndLocator::parse(reader); @@ -987,7 +953,10 @@ pub(crate) fn find_central_directory( if eocd64_offset < eocd64 .number_of_files - .saturating_mul(mem::size_of::() as u64) + .saturating_mul( + (mem::size_of::() + mem::size_of::()) + as u64, + ) .saturating_add(eocd64.central_directory_offset) { local_error = @@ -1032,30 +1001,23 @@ mod tests { #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] #[repr(packed, C)] pub struct TestBlock { - magic: Magic, pub file_name_length: u16, } unsafe impl Pod for TestBlock {} impl FixedSizeBlock for TestBlock { - type Magic = Magic; const MAGIC: Magic = Magic::literal(0x01111); - fn magic(self) -> Magic { - self.magic - } - const WRONG_MAGIC_ERROR: ZipError = invalid!("unreachable"); - to_and_from_le![(magic, Magic), (file_name_length, u16)]; + to_and_from_le![(file_name_length, u16)]; } /// Demonstrate that a block object can be safely written to memory and deserialized back out. #[test] fn block_serde() { let block = TestBlock { - magic: TestBlock::MAGIC, file_name_length: 3, }; let mut c = Cursor::new(Vec::new()); diff --git a/src/types.rs b/src/types.rs index 28ff5c09a..e0e3cfd29 100644 --- a/src/types.rs +++ b/src/types.rs @@ -8,8 +8,8 @@ use crate::path::{enclosed_name, file_name_sanitized}; use crate::result::{ZipError, ZipResult, invalid}; use crate::spec::is_dir; use crate::spec::{ - self, FixedSizeBlock, Zip64DataDescriptorBlock, ZipCentralEntryBlock, ZipDataDescriptorBlock, - ZipFlags, ZipLocalEntryBlock, + self, FixedSizeBlock, Magic, Zip64DataDescriptorBlock, ZipCentralEntryBlock, + ZipDataDescriptorBlock, ZipFlags, ZipLocalEntryBlock, }; use crate::write::FileOptionExtension; use crate::zipcrypto::EncryptWith; @@ -246,8 +246,9 @@ impl ZipFileData { // Each of these fields must be converted to u64 before adding, as the result may // easily overflow a u16. u64::from(block.file_name_length) + u64::from(block.extra_field_length); - let data_start = - self.header_start + size_of::() as u64 + variable_fields_len; + let data_start = self.header_start + + (size_of::() + size_of::()) as u64 + + variable_fields_len; // Set the value so we don't have to read it again. match self.data_start.set(data_start) { @@ -461,7 +462,6 @@ impl ZipFileData { reader: &mut R, ) -> ZipResult { let ZipLocalEntryBlock { - // magic, version_made_by, flags, compression_method, @@ -610,7 +610,6 @@ impl ZipFileData { .last_modified_time .unwrap_or_else(DateTime::default_for_write); Ok(ZipLocalEntryBlock { - magic: ZipLocalEntryBlock::MAGIC, version_made_by: self.version_needed(), flags: self.flags(), compression_method: self.compression_method.serialize_to_u16(), @@ -664,7 +663,6 @@ impl ZipFileData { let version_to_extract = self.version_needed(); let version_made_by = u16::from(self.version_made_by).max(version_to_extract); Ok(ZipCentralEntryBlock { - magic: ZipCentralEntryBlock::MAGIC, version_made_by: ((self.system as u16) << 8) | version_made_by, version_to_extract, flags: self.flags(), @@ -717,7 +715,6 @@ impl ZipFileData { pub(crate) fn data_descriptor_block(&self) -> ZipDataDescriptorBlock { ZipDataDescriptorBlock { - magic: ZipDataDescriptorBlock::MAGIC, crc32: self.crc32, compressed_size: self.compressed_size as u32, uncompressed_size: self.uncompressed_size as u32, @@ -726,7 +723,6 @@ impl ZipFileData { pub(crate) fn zip64_data_descriptor_block(&self) -> Zip64DataDescriptorBlock { Zip64DataDescriptorBlock { - magic: Zip64DataDescriptorBlock::MAGIC, crc32: self.crc32, compressed_size: self.compressed_size, uncompressed_size: self.uncompressed_size, diff --git a/src/write.rs b/src/write.rs index ffaa998c7..b3980d919 100644 --- a/src/write.rs +++ b/src/write.rs @@ -7,7 +7,7 @@ use crate::extra_fields::UsedExtraField; use crate::extra_fields::Zip64ExtendedInformation; use crate::read::{Config, ZipArchive, ZipFile, parse_single_extra_field}; use crate::result::{ZipError, ZipResult, invalid}; -use crate::spec::{self, FixedSizeBlock, Pod, Zip32CDEBlock, ZipLocalEntryBlock}; +use crate::spec::{self, FixedSizeBlock, Magic, Pod, Zip32CDEBlock, ZipLocalEntryBlock}; use crate::types::{AesVendorVersion, MIN_VERSION, System, ZipFileData, ZipRawValues, ffi}; use core::default::Default; use core::fmt::{Debug, Formatter}; @@ -392,7 +392,7 @@ impl ExtendedFileOptions { header_id: u16, data: &[u8], ) -> Result<(), ZipError> { - vec.reserve_exact(data.len() + 4); + vec.reserve_exact(data.len() + size_of::() + size_of::()); vec.write_u16_le(header_id)?; vec.write_u16_le(data.len() as u16)?; vec.write_all(data)?; @@ -899,7 +899,7 @@ impl ZipWriter { new_data.file_name_raw = dest_name_raw.into(); new_data.header_start = write_position; let extra_data_start = write_position - + size_of::() as u64 + + (size_of::() + size_of::()) as u64 + new_data.file_name_raw.len() as u64; new_data.extra_data_start = Some(extra_data_start); if let Some(extra) = &src_data.extra_field { @@ -1206,8 +1206,9 @@ impl ZipWriter { buf, )?; } - let header_end = - header_start + size_of::() as u64 + name.to_string().len() as u64; + let header_end = header_start + + (size_of::() + size_of::()) as u64 + + name.to_string().len() as u64; if options.alignment > 1 { let extra_data_end = header_end + extra_data.len() as u64; @@ -1849,7 +1850,9 @@ impl ZipWriter { writer.seek(SeekFrom::Start(central_start))?; writer.write_u32_le(0)?; writer.seek(SeekFrom::Start( - footer_end - size_of::() as u64 - self.comment.len() as u64, + footer_end + - (size_of::() + size_of::()) as u64 + - self.comment.len() as u64, ))?; writer.write_u32_le(0)?; @@ -2400,7 +2403,7 @@ impl ZipFileData { writer: &mut T, ) -> ZipResult<()> { writer.seek(SeekFrom::Start( - self.header_start + offset_of!(ZipLocalEntryBlock, crc32) as u64, + self.header_start + (size_of::() + offset_of!(ZipLocalEntryBlock, crc32)) as u64, ))?; writer.write_u32_le(self.crc32)?; if self.large_file { @@ -2436,7 +2439,7 @@ impl ZipFileData { ))?; let zip64_extra_field_start = self.header_start - + size_of::() as u64 + + (size_of::() + size_of::()) as u64 + self.file_name_raw.len() as u64; writer.seek(SeekFrom::Start(zip64_extra_field_start))?;