From df5cb07459733033819fa9fbaa8d7713dbbf641c Mon Sep 17 00:00:00 2001 From: Xiaoge Su Date: Wed, 21 Jan 2026 16:20:24 -0800 Subject: [PATCH 1/3] Extract FileObjectSource, CacheRead and CacheWrite to cache_io.rs Also, put get_file_mode and set_file_mode to utils.rs --- src/cache/cache.rs | 286 +----------------------------------------- src/cache/cache_io.rs | 270 +++++++++++++++++++++++++++++++++++++++ src/cache/mod.rs | 3 + src/cache/utils.rs | 44 +++++++ 4 files changed, 320 insertions(+), 283 deletions(-) create mode 100644 src/cache/cache_io.rs create mode 100644 src/cache/utils.rs diff --git a/src/cache/cache.rs b/src/cache/cache.rs index 0fd91d291..37dcaf74c 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use super::cache_io::*; #[cfg(feature = "azure")] use crate::cache::azure::AzureBlobCache; #[cfg(feature = "cos")] @@ -46,296 +47,14 @@ use crate::config::Config; ))] use crate::config::{self, CacheType}; use async_trait::async_trait; -use fs_err as fs; use serde::{Deserialize, Serialize}; -use std::fmt; -use std::io::{self, Cursor, Read, Seek, Write}; -use std::path::{Path, PathBuf}; +use std::io; use std::sync::Arc; use std::time::Duration; -use tempfile::NamedTempFile; -use zip::write::FileOptions; -use zip::{CompressionMethod, ZipArchive, ZipWriter}; use crate::errors::*; -#[cfg(unix)] -fn get_file_mode(file: &fs::File) -> Result> { - use std::os::unix::fs::MetadataExt; - Ok(Some(file.metadata()?.mode())) -} - -#[cfg(windows)] -#[allow(clippy::unnecessary_wraps)] -fn get_file_mode(_file: &fs::File) -> Result> { - Ok(None) -} - -#[cfg(unix)] -fn set_file_mode(path: &Path, mode: u32) -> Result<()> { - use std::fs::Permissions; - use std::os::unix::fs::PermissionsExt; - let p = Permissions::from_mode(mode); - fs::set_permissions(path, p)?; - Ok(()) -} - -#[cfg(windows)] -#[allow(clippy::unnecessary_wraps)] -fn set_file_mode(_path: &Path, _mode: u32) -> Result<()> { - Ok(()) -} - -/// Cache object sourced by a file. -#[derive(Clone)] -pub struct FileObjectSource { - /// Identifier for this object. Should be unique within a compilation unit. - /// Note that a compilation unit is a single source file in C/C++ and a crate in Rust. - pub key: String, - /// Absolute path to the file. - pub path: PathBuf, - /// Whether the file must be present on disk and is essential for the compilation. - pub optional: bool, -} - -/// Result of a cache lookup. -pub enum Cache { - /// Result was found in cache. - Hit(CacheRead), - /// Result was not found in cache. - Miss, - /// Do not cache the results of the compilation. - None, - /// Cache entry should be ignored, force compilation. - Recache, -} - -impl fmt::Debug for Cache { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match *self { - Cache::Hit(_) => write!(f, "Cache::Hit(...)"), - Cache::Miss => write!(f, "Cache::Miss"), - Cache::None => write!(f, "Cache::None"), - Cache::Recache => write!(f, "Cache::Recache"), - } - } -} - -/// CacheMode is used to represent which mode we are using. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum CacheMode { - /// Only read cache from storage. - ReadOnly, - /// Full support of cache storage: read and write. - ReadWrite, -} - -/// Trait objects can't be bounded by more than one non-builtin trait. -pub trait ReadSeek: Read + Seek + Send {} - -impl ReadSeek for T {} - -/// Data stored in the compiler cache. -pub struct CacheRead { - zip: ZipArchive>, -} - -/// Represents a failure to decompress stored object data. -#[derive(Debug)] -pub struct DecompressionFailure; - -impl std::fmt::Display for DecompressionFailure { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "failed to decompress content") - } -} - -impl std::error::Error for DecompressionFailure {} - -impl CacheRead { - /// Create a cache entry from `reader`. - pub fn from(reader: R) -> Result - where - R: ReadSeek + 'static, - { - let z = ZipArchive::new(Box::new(reader) as Box) - .context("Failed to parse cache entry")?; - Ok(CacheRead { zip: z }) - } - - /// Get an object from this cache entry at `name` and write it to `to`. - /// If the file has stored permissions, return them. - pub fn get_object(&mut self, name: &str, to: &mut T) -> Result> - where - T: Write, - { - let file = self.zip.by_name(name).or(Err(DecompressionFailure))?; - if file.compression() != CompressionMethod::Stored { - bail!(DecompressionFailure); - } - let mode = file.unix_mode(); - zstd::stream::copy_decode(file, to).or(Err(DecompressionFailure))?; - Ok(mode) - } - - /// Get the stdout from this cache entry, if it exists. - pub fn get_stdout(&mut self) -> Vec { - self.get_bytes("stdout") - } - - /// Get the stderr from this cache entry, if it exists. - pub fn get_stderr(&mut self) -> Vec { - self.get_bytes("stderr") - } - - fn get_bytes(&mut self, name: &str) -> Vec { - let mut bytes = Vec::new(); - drop(self.get_object(name, &mut bytes)); - bytes - } - - pub async fn extract_objects( - mut self, - objects: T, - pool: &tokio::runtime::Handle, - ) -> Result<()> - where - T: IntoIterator + Send + Sync + 'static, - { - pool.spawn_blocking(move || { - for FileObjectSource { - key, - path, - optional, - } in objects - { - let dir = match path.parent() { - Some(d) => d, - None => bail!("Output file without a parent directory!"), - }; - // Write the cache entry to a tempfile and then atomically - // move it to its final location so that other rustc invocations - // happening in parallel don't see a partially-written file. - let mut tmp = NamedTempFile::new_in(dir)?; - match (self.get_object(&key, &mut tmp), optional) { - (Ok(mode), _) => { - tmp.persist(&path)?; - if let Some(mode) = mode { - set_file_mode(&path, mode)?; - } - } - (Err(e), false) => return Err(e), - // skip if no object found and it's optional - (Err(_), true) => continue, - } - } - Ok(()) - }) - .await? - } -} - -/// Data to be stored in the compiler cache. -pub struct CacheWrite { - zip: ZipWriter>>, -} - -impl CacheWrite { - /// Create a new, empty cache entry. - pub fn new() -> CacheWrite { - CacheWrite { - zip: ZipWriter::new(io::Cursor::new(vec![])), - } - } - - /// Create a new cache entry populated with the contents of `objects`. - pub async fn from_objects(objects: T, pool: &tokio::runtime::Handle) -> Result - where - T: IntoIterator + Send + Sync + 'static, - { - pool.spawn_blocking(move || { - let mut entry = CacheWrite::new(); - for FileObjectSource { - key, - path, - optional, - } in objects - { - let f = fs::File::open(&path) - .with_context(|| format!("failed to open file `{:?}`", path)); - match (f, optional) { - (Ok(mut f), _) => { - let mode = get_file_mode(&f)?; - entry.put_object(&key, &mut f, mode).with_context(|| { - format!("failed to put object `{:?}` in cache entry", path) - })?; - } - (Err(e), false) => return Err(e), - (Err(_), true) => continue, - } - } - Ok(entry) - }) - .await? - } - - /// Add an object containing the contents of `from` to this cache entry at `name`. - /// If `mode` is `Some`, store the file entry with that mode. - pub fn put_object(&mut self, name: &str, from: &mut T, mode: Option) -> Result<()> - where - T: Read, - { - // We're going to declare the compression method as "stored", - // but we're actually going to store zstd-compressed blobs. - let opts = FileOptions::default().compression_method(CompressionMethod::Stored); - let opts = if let Some(mode) = mode { - opts.unix_permissions(mode) - } else { - opts - }; - self.zip - .start_file(name, opts) - .context("Failed to start cache entry object")?; - - let compression_level = std::env::var("SCCACHE_CACHE_ZSTD_LEVEL") - .ok() - .and_then(|value| value.parse::().ok()) - .unwrap_or(3); - zstd::stream::copy_encode(from, &mut self.zip, compression_level)?; - Ok(()) - } - - pub fn put_stdout(&mut self, bytes: &[u8]) -> Result<()> { - self.put_bytes("stdout", bytes) - } - - pub fn put_stderr(&mut self, bytes: &[u8]) -> Result<()> { - self.put_bytes("stderr", bytes) - } - - fn put_bytes(&mut self, name: &str, bytes: &[u8]) -> Result<()> { - if !bytes.is_empty() { - let mut cursor = Cursor::new(bytes); - return self.put_object(name, &mut cursor, None); - } - Ok(()) - } - - /// Finish writing data to the cache entry writer, and return the data. - pub fn finish(self) -> Result> { - let CacheWrite { mut zip } = self; - let cur = zip.finish().context("Failed to finish cache entry zip")?; - Ok(cur.into_inner()) - } -} - -impl Default for CacheWrite { - fn default() -> Self { - Self::new() - } -} - /// An interface to cache storage. #[async_trait] pub trait Storage: Send + Sync { @@ -817,6 +536,7 @@ pub fn storage_from_config( #[cfg(test)] mod test { use super::*; + use fs_err as fs; use crate::config::CacheModeConfig; #[test] diff --git a/src/cache/cache_io.rs b/src/cache/cache_io.rs new file mode 100644 index 000000000..c4c2fa134 --- /dev/null +++ b/src/cache/cache_io.rs @@ -0,0 +1,270 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::utils::{get_file_mode, set_file_mode}; +use crate::errors::*; +use fs_err as fs; +use std::fmt; +use std::io::{Cursor, Read, Seek, Write}; +use std::path::PathBuf; +use tempfile::NamedTempFile; +use zip::write::FileOptions; +use zip::{CompressionMethod, ZipArchive, ZipWriter}; + +/// Cache object sourced by a file. +#[derive(Clone)] +pub struct FileObjectSource { + /// Identifier for this object. Should be unique within a compilation unit. + /// Note that a compilation unit is a single source file in C/C++ and a crate in Rust. + pub key: String, + /// Absolute path to the file. + pub path: PathBuf, + /// Whether the file must be present on disk and is essential for the compilation. + pub optional: bool, +} + +/// Result of a cache lookup. +pub enum Cache { + /// Result was found in cache. + Hit(CacheRead), + /// Result was not found in cache. + Miss, + /// Do not cache the results of the compilation. + None, + /// Cache entry should be ignored, force compilation. + Recache, +} + +impl fmt::Debug for Cache { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + Cache::Hit(_) => write!(f, "Cache::Hit(...)"), + Cache::Miss => write!(f, "Cache::Miss"), + Cache::None => write!(f, "Cache::None"), + Cache::Recache => write!(f, "Cache::Recache"), + } + } +} + +/// CacheMode is used to represent which mode we are using. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum CacheMode { + /// Only read cache from storage. + ReadOnly, + /// Full support of cache storage: read and write. + ReadWrite, +} + +/// Trait objects can't be bounded by more than one non-builtin trait. +pub trait ReadSeek: Read + Seek + Send {} + +impl ReadSeek for T {} + +/// Data stored in the compiler cache. +pub struct CacheRead { + zip: ZipArchive>, +} + +/// Represents a failure to decompress stored object data. +#[derive(Debug)] +pub struct DecompressionFailure; + +impl std::fmt::Display for DecompressionFailure { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "failed to decompress content") + } +} + +impl std::error::Error for DecompressionFailure {} + +impl CacheRead { + /// Create a cache entry from `reader`. + pub fn from(reader: R) -> Result + where + R: ReadSeek + 'static, + { + let z = ZipArchive::new(Box::new(reader) as Box) + .context("Failed to parse cache entry")?; + Ok(CacheRead { zip: z }) + } + + /// Get an object from this cache entry at `name` and write it to `to`. + /// If the file has stored permissions, return them. + pub fn get_object(&mut self, name: &str, to: &mut T) -> Result> + where + T: Write, + { + let file = self.zip.by_name(name).or(Err(DecompressionFailure))?; + if file.compression() != CompressionMethod::Stored { + bail!(DecompressionFailure); + } + let mode = file.unix_mode(); + zstd::stream::copy_decode(file, to).or(Err(DecompressionFailure))?; + Ok(mode) + } + + /// Get the stdout from this cache entry, if it exists. + pub fn get_stdout(&mut self) -> Vec { + self.get_bytes("stdout") + } + + /// Get the stderr from this cache entry, if it exists. + pub fn get_stderr(&mut self) -> Vec { + self.get_bytes("stderr") + } + + fn get_bytes(&mut self, name: &str) -> Vec { + let mut bytes = Vec::new(); + drop(self.get_object(name, &mut bytes)); + bytes + } + + pub async fn extract_objects( + mut self, + objects: T, + pool: &tokio::runtime::Handle, + ) -> Result<()> + where + T: IntoIterator + Send + Sync + 'static, + { + pool.spawn_blocking(move || { + for FileObjectSource { + key, + path, + optional, + } in objects + { + let dir = match path.parent() { + Some(d) => d, + None => bail!("Output file without a parent directory!"), + }; + // Write the cache entry to a tempfile and then atomically + // move it to its final location so that other rustc invocations + // happening in parallel don't see a partially-written file. + let mut tmp = NamedTempFile::new_in(dir)?; + match (self.get_object(&key, &mut tmp), optional) { + (Ok(mode), _) => { + tmp.persist(&path)?; + if let Some(mode) = mode { + set_file_mode(&path, mode)?; + } + } + (Err(e), false) => return Err(e), + // skip if no object found and it's optional + (Err(_), true) => continue, + } + } + Ok(()) + }) + .await? + } +} + +/// Data to be stored in the compiler cache. +pub struct CacheWrite { + zip: ZipWriter>>, +} + +impl CacheWrite { + /// Create a new, empty cache entry. + pub fn new() -> CacheWrite { + CacheWrite { + zip: ZipWriter::new(Cursor::new(vec![])), + } + } + + /// Create a new cache entry populated with the contents of `objects`. + pub async fn from_objects(objects: T, pool: &tokio::runtime::Handle) -> Result + where + T: IntoIterator + Send + Sync + 'static, + { + pool.spawn_blocking(move || { + let mut entry = CacheWrite::new(); + for FileObjectSource { + key, + path, + optional, + } in objects + { + let f = fs::File::open(&path) + .with_context(|| format!("failed to open file `{:?}`", path)); + match (f, optional) { + (Ok(mut f), _) => { + let mode = get_file_mode(&f)?; + entry.put_object(&key, &mut f, mode).with_context(|| { + format!("failed to put object `{:?}` in cache entry", path) + })?; + } + (Err(e), false) => return Err(e), + (Err(_), true) => continue, + } + } + Ok(entry) + }) + .await? + } + + /// Add an object containing the contents of `from` to this cache entry at `name`. + /// If `mode` is `Some`, store the file entry with that mode. + pub fn put_object(&mut self, name: &str, from: &mut T, mode: Option) -> Result<()> + where + T: Read, + { + // We're going to declare the compression method as "stored", + // but we're actually going to store zstd-compressed blobs. + let opts = FileOptions::default().compression_method(CompressionMethod::Stored); + let opts = if let Some(mode) = mode { + opts.unix_permissions(mode) + } else { + opts + }; + self.zip + .start_file(name, opts) + .context("Failed to start cache entry object")?; + + let compression_level = std::env::var("SCCACHE_CACHE_ZSTD_LEVEL") + .ok() + .and_then(|value| value.parse::().ok()) + .unwrap_or(3); + zstd::stream::copy_encode(from, &mut self.zip, compression_level)?; + Ok(()) + } + + pub fn put_stdout(&mut self, bytes: &[u8]) -> Result<()> { + self.put_bytes("stdout", bytes) + } + + pub fn put_stderr(&mut self, bytes: &[u8]) -> Result<()> { + self.put_bytes("stderr", bytes) + } + + fn put_bytes(&mut self, name: &str, bytes: &[u8]) -> Result<()> { + if !bytes.is_empty() { + let mut cursor = Cursor::new(bytes); + return self.put_object(name, &mut cursor, None); + } + Ok(()) + } + + /// Finish writing data to the cache entry writer, and return the data. + pub fn finish(self) -> Result> { + let CacheWrite { mut zip } = self; + let cur = zip.finish().context("Failed to finish cache entry zip")?; + Ok(cur.into_inner()) + } +} + +impl Default for CacheWrite { + fn default() -> Self { + Self::new() + } +} diff --git a/src/cache/mod.rs b/src/cache/mod.rs index c23be2304..643b07333 100644 --- a/src/cache/mod.rs +++ b/src/cache/mod.rs @@ -16,6 +16,7 @@ pub mod azure; #[allow(clippy::module_inception)] pub mod cache; +pub mod cache_io; #[cfg(feature = "cos")] pub mod cos; pub mod disk; @@ -34,6 +35,7 @@ pub mod readonly; pub mod redis; #[cfg(feature = "s3")] pub mod s3; +pub(crate) mod utils; #[cfg(feature = "webdav")] pub mod webdav; @@ -49,4 +51,5 @@ pub mod webdav; pub(crate) mod http_client; pub use crate::cache::cache::*; +pub use crate::cache::cache_io::*; pub use crate::cache::lazy_disk_cache::*; diff --git a/src/cache/utils.rs b/src/cache/utils.rs new file mode 100644 index 000000000..38a8d08a0 --- /dev/null +++ b/src/cache/utils.rs @@ -0,0 +1,44 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use fs_err as fs; + +use std::path::Path; + +use crate::errors::*; + +#[cfg(unix)] +pub(in crate::cache) fn get_file_mode(file: &fs::File) -> Result> { + use std::os::unix::fs::MetadataExt; + Ok(Some(file.metadata()?.mode())) +} + +#[cfg(windows)] +#[allow(clippy::unnecessary_wraps)] +pub(in crate::cache) fn get_file_mode(_file: &fs::File) -> Result> { + Ok(None) +} + +#[cfg(unix)] +pub(in crate::cache) fn set_file_mode(path: &Path, mode: u32) -> Result<()> { + use std::fs::Permissions; + use std::os::unix::fs::PermissionsExt; + let p = Permissions::from_mode(mode); + fs::set_permissions(path, p)?; + Ok(()) +} + +#[cfg(windows)] +#[allow(clippy::unnecessary_wraps)] +pub(in crate::cache) fn set_file_mode(_path: &Path, _mode: u32) -> Result<()> { + Ok(()) +} From bac6e92738420f9a852285b1fe2d9f227a646e16 Mon Sep 17 00:00:00 2001 From: Xiaoge Su Date: Wed, 28 Jan 2026 14:05:14 -0800 Subject: [PATCH 2/3] fixup! rustfmt update --- src/cache/cache.rs | 2 +- tests/integration/randomize_readdir/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cache/cache.rs b/src/cache/cache.rs index 37dcaf74c..d376cf687 100644 --- a/src/cache/cache.rs +++ b/src/cache/cache.rs @@ -536,8 +536,8 @@ pub fn storage_from_config( #[cfg(test)] mod test { use super::*; - use fs_err as fs; use crate::config::CacheModeConfig; + use fs_err as fs; #[test] fn test_normalize_key() { diff --git a/tests/integration/randomize_readdir/src/lib.rs b/tests/integration/randomize_readdir/src/lib.rs index b95f9d2af..a76896e28 100644 --- a/tests/integration/randomize_readdir/src/lib.rs +++ b/tests/integration/randomize_readdir/src/lib.rs @@ -46,7 +46,7 @@ use ctor::ctor; use libc::dirent as dirent64; #[cfg(not(target_vendor = "apple"))] use libc::dirent64; -use libc::{c_char, c_int, c_void, dirent, dlsym, DIR, RTLD_NEXT}; +use libc::{DIR, RTLD_NEXT, c_char, c_int, c_void, dirent, dlsym}; use log::{error, info}; use rand::seq::SliceRandom; use rand::thread_rng; From 3bb6737df91153ba18307050ef2c53c15af55303 Mon Sep 17 00:00:00 2001 From: Xiaoge Su Date: Wed, 28 Jan 2026 14:38:45 -0800 Subject: [PATCH 3/3] fixup! cargo fmt --- tests/integration/randomize_readdir/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/randomize_readdir/src/lib.rs b/tests/integration/randomize_readdir/src/lib.rs index a76896e28..b95f9d2af 100644 --- a/tests/integration/randomize_readdir/src/lib.rs +++ b/tests/integration/randomize_readdir/src/lib.rs @@ -46,7 +46,7 @@ use ctor::ctor; use libc::dirent as dirent64; #[cfg(not(target_vendor = "apple"))] use libc::dirent64; -use libc::{DIR, RTLD_NEXT, c_char, c_int, c_void, dirent, dlsym}; +use libc::{c_char, c_int, c_void, dirent, dlsym, DIR, RTLD_NEXT}; use log::{error, info}; use rand::seq::SliceRandom; use rand::thread_rng;