diff --git a/Cargo.toml b/Cargo.toml index bf8a0aeb..7b15d8d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,4 +1,4 @@ [workspace] -members = ["akd", "akd_core", "examples", "xtask"] +members = ["akd", "akd_core", "examples", "akd_traits", "xtask"] resolver = "2" diff --git a/akd/Cargo.toml b/akd/Cargo.toml index a283701d..0e5f921f 100644 --- a/akd/Cargo.toml +++ b/akd/Cargo.toml @@ -23,7 +23,7 @@ default = [ "experimental", ] -bench = ["experimental", "public_tests", "tokio/rt-multi-thread"] +bench = ["experimental", "public_tests", "tokio/rt-multi-thread", "akd_traits/bench"] # Greedy loading of lookup proof nodes greedy_lookup_preload = [] public_auditing = ["dep:protobuf", "akd_core/protobuf"] @@ -56,6 +56,7 @@ tracing_instrument = ["tracing/attributes"] akd_core = { version = "0.12.0-pre.12", path = "../akd_core", default-features = false, features = [ "vrf", ] } +akd_traits = { path = "../akd_traits" } async-recursion = "1" async-trait = "0.1" dashmap = "5" @@ -102,3 +103,8 @@ required-features = ["bench"] name = "directory" harness = false required-features = ["bench"] + +[[bench]] +name = "kd_benches" +harness = false +required-features = ["bench"] diff --git a/akd/benches/kd_benches.rs b/akd/benches/kd_benches.rs new file mode 100644 index 00000000..9e9f855e --- /dev/null +++ b/akd/benches/kd_benches.rs @@ -0,0 +1,80 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is dual-licensed under either the MIT license found in the +// LICENSE-MIT file in the root directory of this source tree or the Apache +// License, Version 2.0 found in the LICENSE-APACHE file in the root directory +// of this source tree. You may select, at your option, one of the above-listed licenses. + +use akd::append_only_zks::AzksParallelismConfig; +use akd::ecvrf::HardCodedAkdVRF; +use akd::storage::manager::StorageManager; +use akd::storage::memory::AsyncInMemoryDatabase; +use akd::{AkdLabel, AkdValue, Directory, LookupProof}; +use async_trait::async_trait; +use criterion::Criterion; +use rand::distributions::Alphanumeric; +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; + +type Config = akd::WhatsAppV1Configuration; + +struct AkdBenchSetup; + +#[async_trait] +impl akd_traits::bench::BenchmarkSetup for AkdBenchSetup { + type Directory = Directory; + + async fn create_directory() -> Self::Directory { + let db = AsyncInMemoryDatabase::new(); + let storage = StorageManager::new_no_cache(db); + let vrf = HardCodedAkdVRF {}; + Directory::::new(storage, vrf, AzksParallelismConfig::default()) + .await + .unwrap() + } + + fn generate_test_data(num_entries: usize, seed: u64) -> Vec<(AkdLabel, AkdValue)> { + let mut rng = StdRng::seed_from_u64(seed); + (0..num_entries) + .map(|i| { + let label = format!("user_{}", i); + let value: String = (0..16) + .map(|_| rng.sample(Alphanumeric)) + .map(char::from) + .collect(); + (AkdLabel::from(&label), AkdValue::from(&value)) + }) + .collect() + } + + fn name() -> &'static str { + "AKD (WhatsAppV1)" + } +} + +/// Compute the approximate size of an AKD lookup proof in bytes. +fn akd_lookup_proof_size(proof: &LookupProof) -> usize { + proof.existence_vrf_proof.len() + + proof.marker_vrf_proof.len() + + proof.freshness_vrf_proof.len() + + proof.commitment_nonce.len() + + std::mem::size_of_val(&proof.existence_proof) + + std::mem::size_of_val(&proof.marker_proof) + + std::mem::size_of_val(&proof.freshness_proof) +} + +fn main() { + let mut criterion = Criterion::default().configure_from_args(); + + akd_traits::bench::bench_publish::(&mut criterion); + akd_traits::bench::bench_lookup::(&mut criterion); + akd_traits::bench::bench_lookup_verify::( + &mut criterion, + akd_lookup_proof_size, + ); + akd_traits::bench::bench_key_history::(&mut criterion); + akd_traits::bench::bench_audit::(&mut criterion); + akd_traits::bench::bench_audit_verify::(&mut criterion); + + criterion.final_summary(); +} diff --git a/akd/src/directory.rs b/akd/src/directory.rs index f72c8201..95dd8ef8 100644 --- a/akd/src/directory.rs +++ b/akd/src/directory.rs @@ -22,8 +22,11 @@ use crate::{ use crate::VersionFreshness; use akd_core::configuration::Configuration; +use akd_core::types::VerifyResult; use akd_core::utils::get_marker_versions; use akd_core::verify::history::HistoryParams; +use akd_traits::KeyDirectory; +use async_trait::async_trait; use std::collections::{HashMap, HashSet}; use std::marker::PhantomData; use std::sync::Arc; @@ -847,6 +850,102 @@ where } } +#[async_trait] +impl KeyDirectory for Directory +where + TC: Configuration, + S: Database + 'static, + V: VRFKeyStorage, +{ + type LookupProof = LookupProof; + type HistoryProof = HistoryProof; + type AuditProof = AppendOnlyProof; + type PublicKey = VRFPublicKey; + type HistoryParams = akd_core::verify::history::HistoryParams; + type HistoryVerificationParams = crate::client::HistoryVerificationParams; + type Error = AkdError; + + async fn publish(&self, updates: Vec<(AkdLabel, AkdValue)>) -> Result { + Directory::publish(self, updates).await + } + + async fn lookup(&self, label: AkdLabel) -> Result<(LookupProof, EpochHash), AkdError> { + Directory::lookup(self, label).await + } + + async fn batch_lookup( + &self, + labels: &[AkdLabel], + ) -> Result<(Vec, EpochHash), AkdError> { + Directory::batch_lookup(self, labels).await + } + + async fn key_history( + &self, + label: &AkdLabel, + params: akd_core::verify::history::HistoryParams, + ) -> Result<(HistoryProof, EpochHash), AkdError> { + Directory::key_history(self, label, params).await + } + + async fn audit(&self, start_epoch: u64, end_epoch: u64) -> Result { + Directory::audit(self, start_epoch, end_epoch).await + } + + async fn get_public_key(&self) -> Result { + Directory::get_public_key(self).await + } + + async fn get_epoch_hash(&self) -> Result { + Directory::get_epoch_hash(self).await + } + + fn lookup_verify( + public_key: &VRFPublicKey, + root_hash: Digest, + current_epoch: u64, + label: AkdLabel, + proof: LookupProof, + ) -> Result { + akd_core::verify::lookup::lookup_verify::( + public_key.as_bytes(), + root_hash, + current_epoch, + label, + proof, + ) + .map_err(|e| akd_traits::KeyDirectoryError::Verification(format!("{e:?}"))) + } + + fn key_history_verify( + public_key: &VRFPublicKey, + root_hash: Digest, + current_epoch: u64, + label: AkdLabel, + proof: HistoryProof, + params: crate::client::HistoryVerificationParams, + ) -> Result, akd_traits::KeyDirectoryError> { + akd_core::verify::history::key_history_verify::( + public_key.as_bytes(), + root_hash, + current_epoch, + label, + proof, + params, + ) + .map_err(|e| akd_traits::KeyDirectoryError::Verification(format!("{e:?}"))) + } + + async fn audit_verify( + hashes: Vec, + proof: AppendOnlyProof, + ) -> Result<(), akd_traits::KeyDirectoryError> { + crate::auditor::audit_verify::(hashes, proof) + .await + .map_err(|e| e.into()) + } +} + /// A thin newtype which offers read-only interactivity with a [Directory]. #[derive(Clone)] pub struct ReadOnlyDirectory(Directory) diff --git a/akd/src/errors.rs b/akd/src/errors.rs index cfb0aece..426d5b10 100644 --- a/akd/src/errors.rs +++ b/akd/src/errors.rs @@ -338,3 +338,18 @@ impl fmt::Display for ParallelismError { } } } + +impl From for akd_traits::KeyDirectoryError { + fn from(e: AkdError) -> Self { + match e { + AkdError::TreeNode(e) => akd_traits::KeyDirectoryError::Directory(format!("{e:?}")), + AkdError::Directory(e) => akd_traits::KeyDirectoryError::Directory(format!("{e:?}")), + AkdError::AzksErr(e) => akd_traits::KeyDirectoryError::Directory(format!("{e:?}")), + AkdError::Vrf(e) => akd_traits::KeyDirectoryError::Directory(format!("{e:?}")), + AkdError::Storage(e) => akd_traits::KeyDirectoryError::Storage(format!("{e:?}")), + AkdError::AuditErr(e) => akd_traits::KeyDirectoryError::Audit(format!("{e:?}")), + AkdError::Parallelism(e) => akd_traits::KeyDirectoryError::Other(format!("{e:?}")), + AkdError::TestErr(s) => akd_traits::KeyDirectoryError::Other(s), + } + } +} diff --git a/akd/src/helper_structs.rs b/akd/src/helper_structs.rs index 707e5329..54c9a417 100644 --- a/akd/src/helper_structs.rs +++ b/akd/src/helper_structs.rs @@ -8,24 +8,8 @@ //! Helper structs that are used for various data structures, //! to make it easier to pass arguments around. -use crate::Digest; use crate::{storage::types::ValueState, NodeLabel}; -/// Root hash of the tree and its associated epoch -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub struct EpochHash(pub u64, pub Digest); - -impl EpochHash { - /// Get the contained epoch - pub fn epoch(&self) -> u64 { - self.0 - } - /// Get the contained hash - pub fn hash(&self) -> Digest { - self.1 - } -} - #[derive(Clone, Debug)] /// Info needed for a lookup of a user for an epoch pub struct LookupInfo { diff --git a/akd/src/lib.rs b/akd/src/lib.rs index 1cbb7016..cc5193ce 100644 --- a/akd/src/lib.rs +++ b/akd/src/lib.rs @@ -565,10 +565,10 @@ pub use akd_core::{ mod utils; // ========== Type re-exports which are commonly used ========== // +pub use akd_traits::KeyDirectory; pub use append_only_zks::{Azks, AzksParallelismConfig, AzksParallelismOption}; pub use client::HistoryVerificationParams; pub use directory::Directory; -pub use helper_structs::EpochHash; // ========== Constants and type aliases ========== // #[cfg(any(test, feature = "public_tests"))] diff --git a/akd_core/Cargo.toml b/akd_core/Cargo.toml index ff9f539d..b288e1ac 100644 --- a/akd_core/Cargo.toml +++ b/akd_core/Cargo.toml @@ -23,19 +23,21 @@ whatsapp_v1 = ["dep:blake3"] experimental = ["dep:blake3"] # Include the VRF verification logic vrf = ["ed25519-dalek", "curve25519-dalek"] -serde_serialization = ["dep:serde", "dep:serde_bytes", "ed25519-dalek/serde"] +serde_serialization = ["dep:serde", "dep:serde_bytes", "ed25519-dalek/serde", "akd_traits/serde"] # Parallelize VRF calculations during publish parallel_vrf = ["tokio"] bench = ["parallel_vrf", "experimental", "vrf", "tokio/rt-multi-thread"] public_tests = ["dep:paste"] protobuf = ["dep:protobuf"] +rand = ["dep:rand", "akd_traits/rand"] # Default features mix default = ["vrf", "experimental"] [dependencies] ## Required dependencies ## +akd_traits = { path = "../akd_traits", default-features = false } async-trait = "0.1" curve25519-dalek = { version = "4", optional = true } ed25519-dalek = { version = "2", features = [ diff --git a/akd_core/src/types/mod.rs b/akd_core/src/types/mod.rs index 5c8fdefa..4c192ae3 100644 --- a/akd_core/src/types/mod.rs +++ b/akd_core/src/types/mod.rs @@ -14,10 +14,7 @@ use crate::hash::Digest; #[cfg(feature = "serde_serialization")] -use crate::utils::serde_helpers::{ - azks_value_hex_deserialize, azks_value_hex_serialize, bytes_deserialize_hex, - bytes_serialize_hex, -}; +use crate::utils::serde_helpers::{azks_value_hex_deserialize, azks_value_hex_serialize}; use crate::ARITY; #[cfg(feature = "nostd")] @@ -26,8 +23,6 @@ use alloc::string::{String, ToString}; use alloc::vec::Vec; #[cfg(feature = "nostd")] use core::cmp::{Ord, Ordering, PartialOrd}; -#[cfg(feature = "rand")] -use rand::{CryptoRng, Rng}; #[cfg(not(feature = "nostd"))] use std::cmp::{Ord, Ordering, PartialOrd}; @@ -153,126 +148,27 @@ impl Direction { } } -/// The label of a particular entry in the AKD -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[cfg_attr( - feature = "serde_serialization", - derive(serde::Serialize, serde::Deserialize) -)] -pub struct AkdLabel( - #[cfg_attr( - feature = "serde_serialization", - serde(serialize_with = "bytes_serialize_hex") - )] - #[cfg_attr( - feature = "serde_serialization", - serde(deserialize_with = "bytes_deserialize_hex") - )] - pub Vec, -); +// Re-export types from akd_traits +/// Backward-compatible re-export for [`DirectoryLabel`] +pub use akd_traits::types::DirectoryLabel as AkdLabel; +/// Backward-compatible re-export for [`DirectoryValue`] +pub use akd_traits::types::DirectoryValue as AkdValue; +pub use akd_traits::types::{DirectoryLabel, DirectoryValue, EpochHash, VerifyResult}; -impl SizeOf for AkdLabel { +/// The label of a particular entry in the AKD +impl SizeOf for DirectoryLabel { fn size_of(&self) -> usize { self.0.len() } } -impl core::ops::Deref for AkdLabel { - type Target = Vec; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl core::ops::DerefMut for AkdLabel { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 - } -} - -impl core::convert::From<&str> for AkdLabel { - fn from(s: &str) -> Self { - Self(s.as_bytes().to_vec()) - } -} - -impl core::convert::From<&String> for AkdLabel { - fn from(s: &String) -> Self { - Self(s.as_bytes().to_vec()) - } -} - -impl AkdLabel { - #[cfg(feature = "rand")] - /// Gets a random label - pub fn random(rng: &mut R) -> Self { - let mut bytes = [0u8; 32]; - rng.fill_bytes(&mut bytes); - Self(bytes.to_vec()) - } -} - /// The value of a particular entry in the AKD -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -#[cfg_attr( - feature = "serde_serialization", - derive(serde::Serialize, serde::Deserialize) -)] -pub struct AkdValue( - #[cfg_attr( - feature = "serde_serialization", - serde(serialize_with = "bytes_serialize_hex") - )] - #[cfg_attr( - feature = "serde_serialization", - serde(deserialize_with = "bytes_deserialize_hex") - )] - pub Vec, -); - -impl SizeOf for AkdValue { +impl SizeOf for DirectoryValue { fn size_of(&self) -> usize { self.0.len() } } -impl core::ops::Deref for AkdValue { - type Target = Vec; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl core::ops::DerefMut for AkdValue { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 - } -} - -impl core::convert::From<&str> for AkdValue { - fn from(s: &str) -> Self { - Self(s.as_bytes().to_vec()) - } -} - -impl core::convert::From<&String> for AkdValue { - fn from(s: &String) -> Self { - Self(s.as_bytes().to_vec()) - } -} - -impl AkdValue { - #[cfg(feature = "rand")] - /// Gets a random value for a AKD - pub fn random(rng: &mut R) -> Self { - let mut bytes = [0u8; 32]; - rng.fill_bytes(&mut bytes); - Self(bytes.to_vec()) - } -} - /// The value to be hashed every time an empty node's hash is to be considered pub const EMPTY_VALUE: [u8; 1] = [0u8]; @@ -306,7 +202,7 @@ pub struct AzksValueWithEpoch(pub Digest); /// Represents an element to be inserted into the AZKS. This /// is a pair consisting of a label ([NodeLabel]) and a value. /// The purpose of the directory publish is to convert an -/// insertion set of ([AkdLabel], [AkdValue]) tuples into a +/// insertion set of ([DirectoryLabel], [DirectoryValue]) tuples into a /// set of [AzksElement]s, which are then inserted into /// the AZKS. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -494,24 +390,6 @@ pub struct HistoryProof { pub non_existence_of_future_marker_proofs: Vec, } -/// The payload that is outputted as a result of successful verification of -/// a [LookupProof] or [HistoryProof]. This includes the fields containing the -/// epoch that the leaf was published in, the version corresponding to the value, -/// and the value itself. -#[derive(Debug, Clone, PartialEq, Eq)] -#[cfg_attr( - feature = "serde_serialization", - derive(serde::Deserialize, serde::Serialize) -)] -pub struct VerifyResult { - /// The epoch of this record - pub epoch: u64, - /// Version at this update - pub version: u64, - /// The plaintext value associated with the record - pub value: AkdValue, -} - /// Proof that no leaves were deleted from the initial epoch. /// This means that unchanged_nodes should hash to the initial root hash /// and the vec of inserted is the set of leaves inserted between these epochs. diff --git a/akd_core/src/verify/history.rs b/akd_core/src/verify/history.rs index 19e4983a..b675564d 100644 --- a/akd_core/src/verify/history.rs +++ b/akd_core/src/verify/history.rs @@ -41,7 +41,7 @@ impl Default for HistoryParams { } /// Parameters for customizing how history proof verification proceeds -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub enum HistoryVerificationParams { /// No customization to the verification procedure Default { diff --git a/akd_traits/Cargo.toml b/akd_traits/Cargo.toml new file mode 100644 index 00000000..224b2eb9 --- /dev/null +++ b/akd_traits/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "akd_traits" +version = "0.12.0-pre.12" +authors = ["akd contributors"] +description = "Abstract trait and framework for key directories" +license = "MIT OR Apache-2.0" +edition = "2021" +repository = "https://github.com/facebook/akd" +publish = false + +[features] +bench = ["rand", "dep:criterion", "dep:tokio"] +rand = ["dep:rand"] +serde = ["dep:serde", "dep:hex"] + +[dependencies] +async-trait = "0.1" +criterion = { version = "0.5", optional = true } +hex = { version = "0.4", optional = true } +rand = { version = "0.8", optional = true } +serde = { version = "1", features = ["derive"], optional = true } +tokio = { version = "1", features = ["rt-multi-thread"], optional = true } diff --git a/akd_traits/src/bench/criterion_benches.rs b/akd_traits/src/bench/criterion_benches.rs new file mode 100644 index 00000000..53fccb86 --- /dev/null +++ b/akd_traits/src/bench/criterion_benches.rs @@ -0,0 +1,206 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is dual-licensed under either the MIT license found in the +// LICENSE-MIT file in the root directory of this source tree or the Apache +// License, Version 2.0 found in the LICENSE-APACHE file in the root directory +// of this source tree. You may select, at your option, one of the above-listed licenses. + +//! Criterion benchmark functions for key directory implementations. + +use super::BenchmarkSetup; +use crate::traits::KeyDirectory; +use criterion::{BatchSize, Criterion}; + +/// Register publish benchmarks. +pub fn bench_publish(c: &mut Criterion) { + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap(); + + let num_entries = 1000; + let id = format!("KD publish ({} entries) [{}]", num_entries, S::name()); + c.bench_function(&id, |b| { + b.iter_batched( + || { + let dir = runtime.block_on(S::create_directory()); + let data = S::generate_test_data(num_entries, 42); + (dir, data) + }, + |(dir, data)| { + runtime.block_on(dir.publish(data)).unwrap(); + }, + BatchSize::PerIteration, + ); + }); +} + +/// Register lookup benchmarks. +pub fn bench_lookup(c: &mut Criterion) { + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap(); + + let num_entries = 1000; + let id = format!("KD lookup ({} entries) [{}]", num_entries, S::name()); + c.bench_function(&id, |b| { + b.iter_batched( + || { + let dir = runtime.block_on(S::create_directory()); + let data = S::generate_test_data(num_entries, 42); + runtime.block_on(dir.publish(data.clone())).unwrap(); + let label = data[0].0.clone(); + (dir, label) + }, + |(dir, label)| { + runtime.block_on(dir.lookup(label)).unwrap(); + }, + BatchSize::PerIteration, + ); + }); +} + +/// Register lookup verification benchmarks. +/// +/// The `proof_size_fn` parameter computes the size of a lookup proof in bytes. +/// This avoids orphan-rule issues by letting the caller provide the sizing logic. +pub fn bench_lookup_verify(c: &mut Criterion, proof_size_fn: F) +where + S: BenchmarkSetup, + F: Fn(&::LookupProof) -> usize, +{ + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap(); + + let num_entries = 1000; + let id = format!("KD lookup_verify ({} entries) [{}]", num_entries, S::name()); + c.bench_function(&id, |b| { + b.iter_batched( + || { + let dir = runtime.block_on(S::create_directory()); + let data = S::generate_test_data(num_entries, 42); + runtime.block_on(dir.publish(data.clone())).unwrap(); + let label = data[0].0.clone(); + let (proof, epoch_hash) = runtime.block_on(dir.lookup(label.clone())).unwrap(); + let pk = runtime.block_on(dir.get_public_key()).unwrap(); + + // Print proof size on first iteration + eprintln!(" Lookup proof size: {} bytes", proof_size_fn(&proof)); + + (pk, epoch_hash, label, proof) + }, + |(pk, epoch_hash, label, proof)| { + ::lookup_verify( + &pk, + epoch_hash.hash(), + epoch_hash.epoch(), + label, + proof, + ) + .unwrap(); + }, + BatchSize::PerIteration, + ); + }); +} + +/// Register key history benchmarks. +pub fn bench_key_history(c: &mut Criterion) { + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap(); + + let num_entries = 100; + let num_epochs = 5; + let id = format!( + "KD key_history ({} entries, {} epochs) [{}]", + num_entries, + num_epochs, + S::name() + ); + c.bench_function(&id, |b| { + b.iter_batched( + || { + let dir = runtime.block_on(S::create_directory()); + let label = { + let data = S::generate_test_data(num_entries, 42); + let label = data[0].0.clone(); + runtime.block_on(dir.publish(data)).unwrap(); + label + }; + // Publish additional epochs with updated values + for epoch in 1..num_epochs { + let data = S::generate_test_data(num_entries, 42 + epoch as u64); + runtime.block_on(dir.publish(data)).unwrap(); + } + (dir, label) + }, + |(dir, label)| { + let params = Default::default(); + runtime.block_on(dir.key_history(&label, params)).unwrap(); + }, + BatchSize::PerIteration, + ); + }); +} + +/// Register audit proof generation benchmarks. +pub fn bench_audit(c: &mut Criterion) { + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap(); + + let num_entries = 1000; + let id = format!("KD audit ({} entries) [{}]", num_entries, S::name()); + c.bench_function(&id, |b| { + b.iter_batched( + || { + let dir = runtime.block_on(S::create_directory()); + let data1 = S::generate_test_data(num_entries, 42); + runtime.block_on(dir.publish(data1)).unwrap(); + let data2 = S::generate_test_data(num_entries, 43); + runtime.block_on(dir.publish(data2)).unwrap(); + dir + }, + |dir| { + runtime.block_on(dir.audit(1, 2)).unwrap(); + }, + BatchSize::PerIteration, + ); + }); +} + +/// Register audit verification benchmarks. +pub fn bench_audit_verify(c: &mut Criterion) { + let runtime = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap(); + + let num_entries = 1000; + let id = format!("KD audit_verify ({} entries) [{}]", num_entries, S::name()); + c.bench_function(&id, |b| { + b.iter_batched( + || { + let dir = runtime.block_on(S::create_directory()); + let data1 = S::generate_test_data(num_entries, 42); + let eh1 = runtime.block_on(dir.publish(data1)).unwrap(); + let data2 = S::generate_test_data(num_entries, 43); + let eh2 = runtime.block_on(dir.publish(data2)).unwrap(); + let proof = runtime.block_on(dir.audit(1, 2)).unwrap(); + (vec![eh1.hash(), eh2.hash()], proof) + }, + |(hashes, proof)| { + runtime + .block_on(::audit_verify(hashes, proof)) + .unwrap(); + }, + BatchSize::PerIteration, + ); + }); +} diff --git a/akd_traits/src/bench/mod.rs b/akd_traits/src/bench/mod.rs new file mode 100644 index 00000000..3dc4da08 --- /dev/null +++ b/akd_traits/src/bench/mod.rs @@ -0,0 +1,38 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is dual-licensed under either the MIT license found in the +// LICENSE-MIT file in the root directory of this source tree or the Apache +// License, Version 2.0 found in the LICENSE-APACHE file in the root directory +// of this source tree. You may select, at your option, one of the above-listed licenses. + +//! Generic benchmarking harness for key directory implementations. + +pub mod stats; + +pub use stats::ProofSizeOf; + +use crate::types::{DirectoryLabel, DirectoryValue}; +use async_trait::async_trait; + +use crate::traits::KeyDirectory; + +/// Trait that key directory implementations provide to set up benchmark fixtures. +#[async_trait] +pub trait BenchmarkSetup: 'static { + /// The key directory type being benchmarked. + type Directory: KeyDirectory; + + /// Create a fresh directory instance for benchmarking. + async fn create_directory() -> Self::Directory; + + /// Generate a batch of deterministic test label-value pairs. + fn generate_test_data(num_entries: usize, seed: u64) -> Vec<(DirectoryLabel, DirectoryValue)>; + + /// A descriptive name for this implementation (used in benchmark names). + fn name() -> &'static str; +} + +#[cfg(feature = "bench")] +mod criterion_benches; +#[cfg(feature = "bench")] +pub use criterion_benches::*; diff --git a/akd_traits/src/bench/stats.rs b/akd_traits/src/bench/stats.rs new file mode 100644 index 00000000..4557a2a1 --- /dev/null +++ b/akd_traits/src/bench/stats.rs @@ -0,0 +1,14 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is dual-licensed under either the MIT license found in the +// LICENSE-MIT file in the root directory of this source tree or the Apache +// License, Version 2.0 found in the LICENSE-APACHE file in the root directory +// of this source tree. You may select, at your option, one of the above-listed licenses. + +//! Proof size measurement and benchmark statistics. + +/// Trait for measuring the size of a proof in bytes. +pub trait ProofSizeOf { + /// Returns the size of the proof in bytes. + fn proof_size(&self) -> usize; +} diff --git a/akd_traits/src/errors.rs b/akd_traits/src/errors.rs new file mode 100644 index 00000000..7affeba8 --- /dev/null +++ b/akd_traits/src/errors.rs @@ -0,0 +1,39 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is dual-licensed under either the MIT license found in the +// LICENSE-MIT file in the root directory of this source tree or the Apache +// License, Version 2.0 found in the LICENSE-APACHE file in the root directory +// of this source tree. You may select, at your option, one of the above-listed licenses. + +//! Error types for the key directory framework. + +/// Generic error type for key directory operations. +#[derive(Debug)] +pub enum KeyDirectoryError { + /// Server-side directory operation error + Directory(String), + /// Storage layer error + Storage(String), + /// Verification error + Verification(String), + /// Audit error + Audit(String), + /// Other error + Other(String), +} + +impl std::fmt::Display for KeyDirectoryError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + KeyDirectoryError::Directory(s) => write!(f, "Key directory error: {s}"), + KeyDirectoryError::Storage(s) => write!(f, "Key directory storage error: {s}"), + KeyDirectoryError::Verification(s) => { + write!(f, "Key directory verification error: {s}") + } + KeyDirectoryError::Audit(s) => write!(f, "Key directory audit error: {s}"), + KeyDirectoryError::Other(s) => write!(f, "Key directory error: {s}"), + } + } +} + +impl std::error::Error for KeyDirectoryError {} diff --git a/akd_traits/src/lib.rs b/akd_traits/src/lib.rs new file mode 100644 index 00000000..1a11c556 --- /dev/null +++ b/akd_traits/src/lib.rs @@ -0,0 +1,27 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is dual-licensed under either the MIT license found in the +// LICENSE-MIT file in the root directory of this source tree or the Apache +// License, Version 2.0 found in the LICENSE-APACHE file in the root directory +// of this source tree. You may select, at your option, one of the above-listed licenses. + +//! # Key Directory Framework +//! +//! This crate provides the abstract [`KeyDirectory`] trait that +//! defines the interface for any key directory implementation. +//! Both server-side operations (publish, lookup, key history, audit) and +//! client-side verification are part of the trait. + +#![warn(missing_docs)] + +pub mod bench; +pub mod errors; +pub mod traits; +pub mod types; + +/// Digest type (32-byte hash). +pub type Digest = [u8; 32]; + +pub use errors::KeyDirectoryError; +pub use traits::KeyDirectory; +pub use types::{DirectoryLabel, DirectoryValue, EpochHash, VerifyResult}; diff --git a/akd_traits/src/traits.rs b/akd_traits/src/traits.rs new file mode 100644 index 00000000..106d50e8 --- /dev/null +++ b/akd_traits/src/traits.rs @@ -0,0 +1,133 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is dual-licensed under either the MIT license found in the +// LICENSE-MIT file in the root directory of this source tree or the Apache +// License, Version 2.0 found in the LICENSE-APACHE file in the root directory +// of this source tree. You may select, at your option, one of the above-listed licenses. + +//! Abstract trait for a key directory. +//! +//! This trait defines both server-side operations (publish, lookup, key_history, +//! audit) and client-side verification functions. Implementations provide their +//! own proof types, public key types, and verification logic. + +use async_trait::async_trait; +use core::fmt::Debug; + +use crate::types::{DirectoryLabel, DirectoryValue, EpochHash, VerifyResult}; +use crate::{Digest, KeyDirectoryError}; + +/// Abstract Key Directory interface. +/// +/// This trait abstracts over both server-side operations and client-side +/// verification. Implementations provide their own proof types, public key +/// types, and verification logic through associated types. +/// +/// Server-side methods are instance methods (`&self`). Client-side verification +/// methods are associated functions (no `self` parameter), called as +/// `D::lookup_verify(...)`. +#[async_trait] +pub trait KeyDirectory: Send + Sync { + /// The proof type returned by a single-key lookup. + type LookupProof: Send + Sync + Debug; + /// The proof type returned by a key history query. + type HistoryProof: Send + Sync + Debug; + /// The proof type returned by an audit between two epochs. + type AuditProof: Send + Sync + Debug; + /// The public key type for this directory. + type PublicKey: Send + Sync + Debug; + /// Parameters controlling how much key history to retrieve. + type HistoryParams: Send + Sync + Debug + Default; + /// Parameters controlling how key history verification proceeds. + type HistoryVerificationParams: Send + Sync + Debug + Default; + /// Implementation-specific error type. + type Error: std::error::Error + Send + Sync + Into; + + // ======================== + // Server-side operations + // ======================== + + /// Publish a batch of label-value updates to the directory. + /// Returns the new epoch hash (epoch number + root hash). + async fn publish( + &self, + updates: Vec<(DirectoryLabel, DirectoryValue)>, + ) -> Result; + + /// Generate a lookup proof for a single label at the latest epoch. + async fn lookup( + &self, + label: DirectoryLabel, + ) -> Result<(Self::LookupProof, EpochHash), Self::Error>; + + /// Generate lookup proofs for multiple labels at the latest epoch. + /// + /// The default implementation calls [`lookup`](Self::lookup) sequentially. + /// Implementations may override for efficiency (e.g., batch preloading). + async fn batch_lookup( + &self, + labels: &[DirectoryLabel], + ) -> Result<(Vec, EpochHash), Self::Error> { + if labels.is_empty() { + let epoch_hash = self.get_epoch_hash().await?; + return Ok((vec![], epoch_hash)); + } + let mut proofs = Vec::with_capacity(labels.len()); + let mut last_epoch_hash = None; + for label in labels { + let (proof, eh) = self.lookup(label.clone()).await?; + proofs.push(proof); + last_epoch_hash = Some(eh); + } + Ok((proofs, last_epoch_hash.unwrap())) + } + + /// Generate a key history proof for a label. + async fn key_history( + &self, + label: &DirectoryLabel, + params: Self::HistoryParams, + ) -> Result<(Self::HistoryProof, EpochHash), Self::Error>; + + /// Generate an audit proof between two epochs. + async fn audit( + &self, + start_epoch: u64, + end_epoch: u64, + ) -> Result; + + /// Retrieve the public key for this directory. + async fn get_public_key(&self) -> Result; + + /// Retrieve the current epoch and root hash. + async fn get_epoch_hash(&self) -> Result; + + // ======================== + // Client-side verification + // ======================== + + /// Verify a lookup proof against a public key and root hash. + fn lookup_verify( + public_key: &Self::PublicKey, + root_hash: Digest, + current_epoch: u64, + label: DirectoryLabel, + proof: Self::LookupProof, + ) -> Result; + + /// Verify a key history proof against a public key and root hash. + fn key_history_verify( + public_key: &Self::PublicKey, + root_hash: Digest, + current_epoch: u64, + label: DirectoryLabel, + proof: Self::HistoryProof, + params: Self::HistoryVerificationParams, + ) -> Result, KeyDirectoryError>; + + /// Verify an audit proof given a sequence of root hashes. + async fn audit_verify( + hashes: Vec, + proof: Self::AuditProof, + ) -> Result<(), KeyDirectoryError>; +} diff --git a/akd_traits/src/types.rs b/akd_traits/src/types.rs new file mode 100644 index 00000000..3ff7573f --- /dev/null +++ b/akd_traits/src/types.rs @@ -0,0 +1,166 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is dual-licensed under either the MIT license found in the +// LICENSE-MIT file in the root directory of this source tree or the Apache +// License, Version 2.0 found in the LICENSE-APACHE file in the root directory +// of this source tree. You may select, at your option, one of the above-listed licenses. + +//! Core types for key directory implementations. + +use crate::Digest; + +#[cfg(feature = "serde")] +mod serde_helpers { + use hex::FromHex; + + pub fn bytes_serialize_hex(x: &T, s: S) -> Result + where + S: serde::Serializer, + T: AsRef<[u8]>, + { + use hex::ToHex; + let hex_str = &x.as_ref().encode_hex_upper::(); + s.serialize_str(hex_str) + } + + pub fn bytes_deserialize_hex<'de, D, T>(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + T: AsRef<[u8]> + FromHex, + ::Error: core::fmt::Display, + { + use serde::Deserialize; + let hex_str = String::deserialize(deserializer)?; + T::from_hex(hex_str).map_err(serde::de::Error::custom) + } +} + +/// The label of a particular entry in the key directory. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct DirectoryLabel( + #[cfg_attr( + feature = "serde", + serde(serialize_with = "serde_helpers::bytes_serialize_hex") + )] + #[cfg_attr( + feature = "serde", + serde(deserialize_with = "serde_helpers::bytes_deserialize_hex") + )] + pub Vec, +); + +impl core::ops::Deref for DirectoryLabel { + type Target = Vec; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl core::ops::DerefMut for DirectoryLabel { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl core::convert::From<&str> for DirectoryLabel { + fn from(s: &str) -> Self { + Self(s.as_bytes().to_vec()) + } +} + +impl core::convert::From<&String> for DirectoryLabel { + fn from(s: &String) -> Self { + Self(s.as_bytes().to_vec()) + } +} + +#[cfg(feature = "rand")] +impl DirectoryLabel { + /// Gets a random label + pub fn random(rng: &mut R) -> Self { + let mut bytes = [0u8; 32]; + rng.fill_bytes(&mut bytes); + Self(bytes.to_vec()) + } +} + +/// The value of a particular entry in the key directory. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct DirectoryValue( + #[cfg_attr( + feature = "serde", + serde(serialize_with = "serde_helpers::bytes_serialize_hex") + )] + #[cfg_attr( + feature = "serde", + serde(deserialize_with = "serde_helpers::bytes_deserialize_hex") + )] + pub Vec, +); + +impl core::ops::Deref for DirectoryValue { + type Target = Vec; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl core::ops::DerefMut for DirectoryValue { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl core::convert::From<&str> for DirectoryValue { + fn from(s: &str) -> Self { + Self(s.as_bytes().to_vec()) + } +} + +impl core::convert::From<&String> for DirectoryValue { + fn from(s: &String) -> Self { + Self(s.as_bytes().to_vec()) + } +} + +#[cfg(feature = "rand")] +impl DirectoryValue { + /// Gets a random value + pub fn random(rng: &mut R) -> Self { + let mut bytes = [0u8; 32]; + rng.fill_bytes(&mut bytes); + Self(bytes.to_vec()) + } +} + +/// Root hash of the tree and its associated epoch. +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub struct EpochHash(pub u64, pub Digest); + +impl EpochHash { + /// Get the contained epoch + pub fn epoch(&self) -> u64 { + self.0 + } + /// Get the contained hash + pub fn hash(&self) -> Digest { + self.1 + } +} + +/// The payload that is outputted as a result of successful verification of +/// a lookup proof or history proof. This includes the fields containing the +/// epoch that the leaf was published in, the version corresponding to the value, +/// and the value itself. +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct VerifyResult { + /// The epoch of this record + pub epoch: u64, + /// Version at this update + pub version: u64, + /// The plaintext value associated with the record + pub value: DirectoryValue, +} diff --git a/examples/Cargo.toml b/examples/Cargo.toml index 072d73b6..faa8f544 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -41,6 +41,7 @@ xml-rs = "0.8" reqwest = "0.11" regex = "1" serde_yaml = "0.9" +bincode = "1" wasm-bindgen = "0.2" akd = { path = "../akd", features = [ @@ -50,6 +51,7 @@ akd = { path = "../akd", features = [ "experimental", ] } akd_core = { path = "../akd_core" } +akd_traits = { path = "../akd_traits" } [dev-dependencies] serial_test = "2" diff --git a/examples/src/main.rs b/examples/src/main.rs index a8ea7b67..8eeb5f3f 100644 --- a/examples/src/main.rs +++ b/examples/src/main.rs @@ -9,6 +9,7 @@ mod fixture_generator; mod mysql_demo; +mod scaling_bench; mod test_vectors; mod wasm_client; mod whatsapp_kt_auditor; @@ -35,6 +36,8 @@ enum ExampleType { FixtureGenerator(fixture_generator::Args), /// Test vectors generator TestVectors(test_vectors::Args), + /// Key directory scaling benchmarks + ScalingBench(scaling_bench::Args), } // MAIN // @@ -47,6 +50,7 @@ async fn main() -> Result<()> { ExampleType::MysqlDemo(args) => mysql_demo::render_cli(args).await?, ExampleType::FixtureGenerator(args) => fixture_generator::run(args).await, ExampleType::TestVectors(args) => test_vectors::run(args).await, + ExampleType::ScalingBench(args) => scaling_bench::run(args).await, } Ok(()) diff --git a/examples/src/mysql_demo/mod.rs b/examples/src/mysql_demo/mod.rs index 29d15c7a..2052d195 100644 --- a/examples/src/mysql_demo/mod.rs +++ b/examples/src/mysql_demo/mod.rs @@ -456,7 +456,7 @@ async fn process_input( println!("Error flushing database: {error}"); } else { println!( - "Database flushed, exiting application. Please restart to create a new VKD" + "Database flushed, exiting application. Please restart to create a new directory" ); break; } diff --git a/examples/src/scaling_bench/akd_setup.rs b/examples/src/scaling_bench/akd_setup.rs new file mode 100644 index 00000000..3120db1e --- /dev/null +++ b/examples/src/scaling_bench/akd_setup.rs @@ -0,0 +1,162 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is dual-licensed under either the MIT license found in the +// LICENSE-MIT file in the root directory of this source tree or the Apache +// License, Version 2.0 found in the LICENSE-APACHE file in the root directory +// of this source tree. You may select, at your option, one of the above-listed licenses. + +//! AKD (WhatsAppV1) implementation of [`BenchmarkSetup`] and [`BenchCache`]. + +use std::fs; +use std::path::Path; +use std::sync::Arc; + +use akd::append_only_zks::AzksParallelismConfig; +use akd::ecvrf::HardCodedAkdVRF; +use akd::storage::manager::StorageManager; +use akd::storage::memory::AsyncInMemoryDatabase; +use akd::storage::types::DbRecord; +use akd::storage::{Database, StorageUtil}; +use akd::{AkdLabel, AkdValue, Directory, EpochHash, WhatsAppV1Configuration}; +use akd_traits::bench::BenchmarkSetup; +use async_trait::async_trait; +use serde::{Deserialize, Serialize}; + +use super::cache::BenchCache; + +pub(crate) struct AkdSetup; + +#[async_trait] +impl BenchmarkSetup for AkdSetup { + type Directory = Directory; + + async fn create_directory() -> Self::Directory { + let db = AsyncInMemoryDatabase::new(); + let storage = StorageManager::new_no_cache(db); + let vrf = HardCodedAkdVRF {}; + Directory::::new( + storage, + vrf, + AzksParallelismConfig::default(), + ) + .await + .unwrap() + } + + fn generate_test_data(num_entries: usize, seed: u64) -> Vec<(AkdLabel, AkdValue)> { + (0..num_entries) + .map(|i| { + let label = format!("user_{}", i); + let value = format!("value_{}_{}", seed, i); + (AkdLabel::from(&label), AkdValue::from(&value)) + }) + .collect() + } + + fn name() -> &'static str { + "AKD (WhatsAppV1)" + } +} + +/// Serializable cache data containing all database records and epoch hashes. +#[derive(Serialize, Deserialize)] +struct CacheData { + records: Vec, + epoch_hashes: Vec<(u64, [u8; 32])>, +} + +#[async_trait] +impl BenchCache for AkdSetup { + type CacheHandle = Arc; + + async fn create_directory_with_handle() -> (Self::Directory, Self::CacheHandle) { + let db = AsyncInMemoryDatabase::new(); + let storage = StorageManager::new_no_cache(db); + let db_arc = storage.get_db(); + let vrf = HardCodedAkdVRF {}; + let dir = Directory::::new( + storage, + vrf, + AzksParallelismConfig::default(), + ) + .await + .unwrap(); + (dir, db_arc) + } + + async fn save(handle: &Self::CacheHandle, epoch_hashes: &[EpochHash], path: &Path) { + let records = handle.batch_get_all_direct().await.unwrap(); + let hashes: Vec<(u64, [u8; 32])> = epoch_hashes + .iter() + .map(|eh| (eh.epoch(), eh.hash())) + .collect(); + + let data = CacheData { + records, + epoch_hashes: hashes, + }; + + let encoded = bincode::serialize(&data).unwrap(); + + if let Some(parent) = path.parent() { + fs::create_dir_all(parent).unwrap(); + } + + fs::write(path, &encoded).unwrap(); + + let size_mb = encoded.len() as f64 / (1024.0 * 1024.0); + println!(" Saved cache ({:.1} MB): {}", size_mb, path.display()); + } + + async fn load(path: &Path) -> Option<(Self::Directory, Vec)> { + let bytes = fs::read(path).ok()?; + + let data: CacheData = match bincode::deserialize(&bytes) { + Ok(d) => d, + Err(e) => { + eprintln!( + "Warning: failed to deserialize cache {}: {}", + path.display(), + e + ); + return None; + } + }; + + let db = AsyncInMemoryDatabase::new(); + if let Err(e) = db + .batch_set(data.records, akd::storage::DbSetState::General) + .await + { + eprintln!("Warning: failed to restore database from cache: {}", e); + return None; + } + + let storage = StorageManager::new_no_cache(db); + let vrf = HardCodedAkdVRF {}; + let akd_dir = match Directory::::new( + storage, + vrf, + AzksParallelismConfig::default(), + ) + .await + { + Ok(d) => d, + Err(e) => { + eprintln!("Warning: failed to create directory from cache: {}", e); + return None; + } + }; + + let epoch_hashes: Vec = data + .epoch_hashes + .into_iter() + .map(|(epoch, digest)| EpochHash(epoch, digest)) + .collect(); + + let size_mb = bytes.len() as f64 / (1024.0 * 1024.0); + println!(" Loaded cache ({:.1} MB): {}", size_mb, path.display()); + + Some((akd_dir, epoch_hashes)) + } +} diff --git a/examples/src/scaling_bench/cache.rs b/examples/src/scaling_bench/cache.rs new file mode 100644 index 00000000..9892297b --- /dev/null +++ b/examples/src/scaling_bench/cache.rs @@ -0,0 +1,68 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is dual-licensed under either the MIT license found in the +// LICENSE-MIT file in the root directory of this source tree or the Apache +// License, Version 2.0 found in the LICENSE-APACHE file in the root directory +// of this source tree. You may select, at your option, one of the above-listed licenses. + +//! Generic database caching trait for scaling benchmarks. +//! +//! Implementations save and load populated directories to disk so that +//! subsequent benchmark runs skip the expensive setup phase. + +use std::fs; +use std::path::{Path, PathBuf}; + +use akd_core::types::EpochHash; +use akd_traits::bench::BenchmarkSetup; +use async_trait::async_trait; + +/// Trait for saving/loading benchmark directories to/from disk. +/// +/// Implementors provide serialization logic specific to their directory type. +/// The associated `CacheHandle` type carries any extra state needed for +/// serialization (e.g. a database handle) that isn't accessible from the +/// directory alone. +#[async_trait] +pub(crate) trait BenchCache: BenchmarkSetup { + /// Extra state needed to save the directory (e.g. a database handle). + type CacheHandle: Send; + + /// Create a fresh directory, returning both the directory and a cache handle. + async fn create_directory_with_handle() -> (Self::Directory, Self::CacheHandle); + + /// Save the directory state and epoch hashes to `path`. + async fn save(handle: &Self::CacheHandle, epoch_hashes: &[EpochHash], path: &Path); + + /// Load a cached directory and epoch hashes from `path`. + /// Returns `None` if the file doesn't exist or deserialization fails. + async fn load(path: &Path) -> Option<(Self::Directory, Vec)>; +} + +/// Return the cache directory path. +pub(crate) fn cache_dir() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap() + .join("target") + .join("bench-cache") +} + +/// Return the cache file path for a given directory size. +pub(crate) fn cache_path(size: usize) -> PathBuf { + cache_dir().join(format!("kd-N{}.bin", size)) +} + +/// Clear all cached databases. +pub(crate) fn clear() { + let dir = cache_dir(); + if dir.exists() { + if let Err(e) = fs::remove_dir_all(&dir) { + eprintln!("Warning: failed to clear cache directory: {}", e); + } else { + println!("Cleared cache directory: {}", dir.display()); + } + } else { + println!("No cache directory found at {}", dir.display()); + } +} diff --git a/examples/src/scaling_bench/mod.rs b/examples/src/scaling_bench/mod.rs new file mode 100644 index 00000000..9b966491 --- /dev/null +++ b/examples/src/scaling_bench/mod.rs @@ -0,0 +1,239 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is dual-licensed under either the MIT license found in the +// LICENSE-MIT file in the root directory of this source tree or the Apache +// License, Version 2.0 found in the LICENSE-APACHE file in the root directory +// of this source tree. You may select, at your option, one of the above-listed licenses. + +//! CLI subcommand for key directory scaling benchmarks. +//! +//! Measures how generation times and verification times scale with directory size. +//! +//! ```text +//! cargo run -p examples --release -- scaling-bench --sizes 10,14,17 +//! ``` +//! +//! ## Adding a new key directory implementation +//! +//! 1. Create a new file (e.g. `my_kd_setup.rs`) implementing +//! `akd_traits::bench::BenchmarkSetup` and [`cache::BenchCache`] for your +//! directory type. +//! 2. Add `mod my_kd_setup;` in this file. +//! 3. Add a match arm in `run()` for `--impl my-kd`. +//! +//! Then run: +//! ```text +//! cargo run -p examples --release -- scaling-bench --impl my-kd --sizes 10,14 +//! ``` + +mod akd_setup; +mod cache; +mod runner; +mod table; + +use clap::Parser; + +use cache::BenchCache; + +/// Operations that can be benchmarked. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) enum Op { + Publish, + PublishUpdate, + Lookup, + LookupVerify, + History, + Audit, + AuditVerify, + Setup, +} + +impl Op { + fn all() -> Vec { + vec![ + Op::Publish, + Op::PublishUpdate, + Op::Lookup, + Op::LookupVerify, + Op::History, + Op::Audit, + Op::AuditVerify, + ] + } + + fn from_str(s: &str) -> Option { + match s { + "publish" => Some(Op::Publish), + "publish_update" => Some(Op::PublishUpdate), + "lookup" => Some(Op::Lookup), + "lookup_verify" => Some(Op::LookupVerify), + "history" => Some(Op::History), + "audit" => Some(Op::Audit), + "audit_verify" => Some(Op::AuditVerify), + _ => None, + } + } +} + +/// Output format for benchmark results. +#[derive(Debug, Clone, Copy, Default)] +pub(crate) enum Format { + #[default] + Table, + Csv, + Json, +} + +#[derive(Parser, Debug, Clone)] +pub(crate) struct Args { + /// Comma-separated log2 directory sizes to benchmark (e.g. 10,14,17 means 2^10, 2^14, 2^17) + #[clap(long, default_value = "10,14,17")] + sizes: String, + + /// Comma-separated operations (publish,publish_update,lookup,lookup_verify,history,audit,audit_verify) + #[clap(long, default_value = "all")] + ops: String, + + /// Output format: table, csv, json + #[clap(long, default_value = "table")] + format: String, + + /// Log2 of entries per update epoch (e.g. 7 means 2^7=128 entries) + #[clap(long, default_value = "7")] + updates_per_epoch: u32, + + /// Number of iterations for timing (median of N runs) + #[clap(long, default_value = "3")] + iterations: usize, + + /// Key directory implementation to benchmark (e.g. "akd") + #[clap(long = "impl", default_value = "akd")] + impl_name: String, + + /// Disable database caching (force fresh setup) + #[clap(long)] + no_cache: bool, + + /// Clear all cached databases and exit + #[clap(long)] + clear_cache: bool, + + /// Sweep mode: comma-separated log2 values for updates-per-epoch (e.g. 5,7,9,10,12) + #[clap(long)] + sweep_updates_per_epoch: Option, +} + +impl Args { + fn parse_sizes(&self) -> Vec { + self.sizes + .split(',') + .filter_map(|s| s.trim().parse::().ok()) + .map(|exp| 1usize << exp) + .collect() + } + + fn parse_ops(&self) -> Vec { + if self.ops.trim() == "all" { + return Op::all(); + } + self.ops + .split(',') + .filter_map(|s| Op::from_str(s.trim())) + .collect() + } + + fn parse_format(&self) -> Option { + match self.format.trim() { + "table" => Some(Format::Table), + "csv" => Some(Format::Csv), + "json" => Some(Format::Json), + _ => None, + } + } + + fn parse_sweep_m_values(&self) -> Option> { + self.sweep_updates_per_epoch.as_ref().map(|s| { + s.split(',') + .filter_map(|v| v.trim().parse::().ok()) + .map(|exp| 1usize << exp) + .collect() + }) + } +} + +async fn run_with(args: &Args, format: Format, sizes: &[usize], ops: &[Op]) { + let use_cache = !args.no_cache; + + // Sweep mode: vary updates-per-epoch for a fixed N + if let Some(m_values) = args.parse_sweep_m_values() { + if m_values.is_empty() { + eprintln!("Error: no valid M values in --sweep-updates-per-epoch"); + return; + } + let size = sizes[0]; // Use first size as fixed N + let sweep_results = + runner::run_sweep_updates::(size, &m_values, args.iterations, use_cache).await; + + match format { + Format::Table | Format::Json => { + table::print_sweep_table(&sweep_results, size, args.iterations) + } + Format::Csv => table::print_sweep_csv(&sweep_results), + } + return; + } + + let updates_per_epoch = 1usize << args.updates_per_epoch; + let results = + runner::run_benchmarks::(sizes, ops, updates_per_epoch, args.iterations, use_cache) + .await; + + match format { + Format::Table => { + println!("Key Directory Scaling Benchmarks: {}", S::name()); + println!("{}", "=".repeat(24 + S::name().len())); + println!(); + table::print_table(&results, sizes, ops); + } + Format::Csv => table::print_csv(&results, sizes, ops), + Format::Json => table::print_json(&results, sizes, ops), + } +} + +pub(crate) async fn run(args: Args) { + if args.clear_cache { + cache::clear(); + return; + } + + let sizes = args.parse_sizes(); + let ops = args.parse_ops(); + + if sizes.is_empty() { + eprintln!("Error: no valid sizes provided"); + return; + } + if ops.is_empty() { + eprintln!("Error: no valid operations provided"); + return; + } + if args.iterations < 1 { + eprintln!("Error: iterations must be at least 1"); + return; + } + let format = match args.parse_format() { + Some(f) => f, + None => { + eprintln!( + "Error: invalid format '{}' (expected: table, csv, json)", + args.format + ); + return; + } + }; + + match args.impl_name.as_str() { + "akd" => run_with::(&args, format, &sizes, &ops).await, + other => eprintln!("Unknown implementation '{other}'. Available: akd"), + } +} diff --git a/examples/src/scaling_bench/runner.rs b/examples/src/scaling_bench/runner.rs new file mode 100644 index 00000000..8eaa5c21 --- /dev/null +++ b/examples/src/scaling_bench/runner.rs @@ -0,0 +1,274 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is dual-licensed under either the MIT license found in the +// LICENSE-MIT file in the root directory of this source tree or the Apache +// License, Version 2.0 found in the LICENSE-APACHE file in the root directory +// of this source tree. You may select, at your option, one of the above-listed licenses. + +//! Generic benchmark runner for any [`BenchCache`] implementation. + +use std::time::{Duration, Instant}; + +use akd_core::types::{AkdLabel, EpochHash}; +use akd_traits::KeyDirectory; +use indicatif::{ProgressBar, ProgressStyle}; + +use super::cache::{self, BenchCache}; +use super::table::Results; +use super::Op; + +/// Set up a directory populated with `size` entries and 5 additional epochs +/// of `updates_per_epoch` entries each. +/// +/// When `use_cache` is true, attempts to load from disk first and saves +/// after setup. +async fn setup_directory( + size: usize, + updates_per_epoch: usize, + use_cache: bool, +) -> (S::Directory, Vec) { + let cache_file = cache::cache_path(size); + + if use_cache { + if let Some(cached) = S::load(&cache_file).await { + return cached; + } + } + + let num_history_epochs = 5; + let (dir, cache_handle) = S::create_directory_with_handle().await; + let data = S::generate_test_data(size, 42); + let initial_eh = dir.publish(data).await.map_err(Into::into).unwrap(); + + let mut epoch_hashes = vec![initial_eh]; + for epoch_seed in 1..=num_history_epochs { + let epoch_data = S::generate_test_data(updates_per_epoch, 100 + epoch_seed as u64); + let eh = dir.publish(epoch_data).await.map_err(Into::into).unwrap(); + epoch_hashes.push(eh); + } + + if use_cache { + S::save(&cache_handle, &epoch_hashes, &cache_file).await; + } + + (dir, epoch_hashes) +} + +/// Compute the median of a slice of durations. Panics if the slice is empty. +fn median_duration(times: &mut [Duration]) -> Duration { + assert!(!times.is_empty(), "cannot compute median of empty slice"); + times.sort(); + let mid = times.len() / 2; + if times.len().is_multiple_of(2) { + (times[mid - 1] + times[mid]) / 2 + } else { + times[mid] + } +} + +/// Create a spinner-style progress bar for a benchmark phase. +fn make_progress_bar(msg: &str, size: usize) -> ProgressBar { + let pb = ProgressBar::new_spinner(); + pb.set_style( + ProgressStyle::with_template(" {msg} (N={pos}) {spinner}") + .unwrap() + .tick_chars("⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"), + ); + pb.set_message(msg.to_string()); + pb.set_position(size as u64); + pb.enable_steady_tick(Duration::from_millis(100)); + pb +} + +/// Run all benchmarks for the given sizes and operations. +pub(crate) async fn run_benchmarks( + sizes: &[usize], + ops: &[Op], + updates_per_epoch: usize, + iterations: usize, + use_cache: bool, +) -> Results { + assert!(iterations >= 1, "iterations must be at least 1"); + + let mut results = Results::new(); + + for &size in sizes { + // --- Setup: create directory and populate with `size` entries --- + let pb = make_progress_bar("Setting up directory", size); + let setup_start = Instant::now(); + + let (dir, epoch_hashes) = setup_directory::(size, updates_per_epoch, use_cache).await; + + let setup_elapsed = setup_start.elapsed(); + pb.finish_and_clear(); + + // Record setup time + results.insert((Op::Setup, size), setup_elapsed); + + let label = AkdLabel::from("user_0"); + + // --- Publish benchmark: time publishing N entries into an empty directory --- + if ops.contains(&Op::Publish) { + let pb = make_progress_bar("Benchmarking publish", size); + let mut times = Vec::with_capacity(iterations); + for iter in 0..iterations { + let fresh_dir = S::create_directory().await; + let publish_data = S::generate_test_data(size, 1000 + iter as u64); + let start = Instant::now(); + fresh_dir + .publish(publish_data) + .await + .map_err(Into::into) + .unwrap(); + times.push(start.elapsed()); + } + results.insert((Op::Publish, size), median_duration(&mut times)); + pb.finish_and_clear(); + } + + // --- Publish update benchmark: time publishing M entries into the existing directory --- + if ops.contains(&Op::PublishUpdate) { + let pb = make_progress_bar("Benchmarking publish update", size); + let mut times = Vec::with_capacity(iterations); + for iter in 0..iterations { + let update_data = S::generate_test_data(updates_per_epoch, 2000 + iter as u64); + let start = Instant::now(); + dir.publish(update_data).await.map_err(Into::into).unwrap(); + times.push(start.elapsed()); + } + results.insert((Op::PublishUpdate, size), median_duration(&mut times)); + pb.finish_and_clear(); + } + + // --- Lookup benchmark --- + if ops.contains(&Op::Lookup) || ops.contains(&Op::LookupVerify) { + let pb = make_progress_bar("Benchmarking lookup", size); + + if ops.contains(&Op::Lookup) { + let mut times = Vec::with_capacity(iterations); + for _ in 0..iterations { + let start = Instant::now(); + let (_proof, _eh) = + dir.lookup(label.clone()).await.map_err(Into::into).unwrap(); + times.push(start.elapsed()); + } + results.insert((Op::Lookup, size), median_duration(&mut times)); + } + + // --- Lookup verify --- + if ops.contains(&Op::LookupVerify) { + let pk = dir.get_public_key().await.map_err(Into::into).unwrap(); + let mut verify_times = Vec::with_capacity(iterations); + for _ in 0..iterations { + let (p, e) = dir.lookup(label.clone()).await.map_err(Into::into).unwrap(); + let start = Instant::now(); + ::lookup_verify( + &pk, + e.hash(), + e.epoch(), + label.clone(), + p, + ) + .unwrap(); + verify_times.push(start.elapsed()); + } + results.insert((Op::LookupVerify, size), median_duration(&mut verify_times)); + } + + pb.finish_and_clear(); + } + + // --- Key history benchmark --- + if ops.contains(&Op::History) { + let pb = make_progress_bar("Benchmarking history", size); + let mut times = Vec::with_capacity(iterations); + + for _ in 0..iterations { + let params = Default::default(); + let start = Instant::now(); + let (_proof, _eh) = dir + .key_history(&label, params) + .await + .map_err(Into::into) + .unwrap(); + times.push(start.elapsed()); + } + + results.insert((Op::History, size), median_duration(&mut times)); + pb.finish_and_clear(); + } + + // --- Audit benchmark --- + if ops.contains(&Op::Audit) || ops.contains(&Op::AuditVerify) { + let pb = make_progress_bar("Benchmarking audit", size); + + // Audit between epoch 1 and 2 + if ops.contains(&Op::Audit) { + let mut times = Vec::with_capacity(iterations); + for _ in 0..iterations { + let start = Instant::now(); + let _proof = dir.audit(1, 2).await.map_err(Into::into).unwrap(); + times.push(start.elapsed()); + } + results.insert((Op::Audit, size), median_duration(&mut times)); + } + + // --- Audit verify --- + if ops.contains(&Op::AuditVerify) { + let hashes = vec![epoch_hashes[0].hash(), epoch_hashes[1].hash()]; + let mut verify_times = Vec::with_capacity(iterations); + for _ in 0..iterations { + let proof = dir.audit(1, 2).await.map_err(Into::into).unwrap(); + let start = Instant::now(); + ::audit_verify(hashes.clone(), proof) + .await + .unwrap(); + verify_times.push(start.elapsed()); + } + results.insert((Op::AuditVerify, size), median_duration(&mut verify_times)); + } + + pb.finish_and_clear(); + } + } + + results +} + +/// Run a sweep of publish_update benchmarks across different M (updates-per-epoch) values +/// for a fixed directory size N. +pub(crate) async fn run_sweep_updates( + size: usize, + m_values: &[usize], + iterations: usize, + use_cache: bool, +) -> Vec<(usize, Duration)> { + assert!(iterations >= 1, "iterations must be at least 1"); + + // Use the smallest M value for initial setup + let setup_m = *m_values.iter().min().unwrap(); + + // Set up directory once + let pb = make_progress_bar("Setting up directory for sweep", size); + let (dir, _epoch_hashes) = setup_directory::(size, setup_m, use_cache).await; + pb.finish_and_clear(); + + let mut results = Vec::with_capacity(m_values.len()); + + for &m in m_values { + let pb = make_progress_bar(&format!("Sweeping M=2^{}", m.trailing_zeros()), size); + let mut times = Vec::with_capacity(iterations); + + for iter in 0..iterations { + let update_data = S::generate_test_data(m, 3000 + iter as u64); + let start = Instant::now(); + dir.publish(update_data).await.map_err(Into::into).unwrap(); + times.push(start.elapsed()); + } + + results.push((m, median_duration(&mut times))); + pb.finish_and_clear(); + } + + results +} diff --git a/examples/src/scaling_bench/table.rs b/examples/src/scaling_bench/table.rs new file mode 100644 index 00000000..966dcdd2 --- /dev/null +++ b/examples/src/scaling_bench/table.rs @@ -0,0 +1,268 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is dual-licensed under either the MIT license found in the +// LICENSE-MIT file in the root directory of this source tree or the Apache +// License, Version 2.0 found in the LICENSE-APACHE file in the root directory +// of this source tree. You may select, at your option, one of the above-listed licenses. + +//! Table formatting and CSV/JSON output for scaling benchmark results. + +use std::collections::HashMap; +use std::fmt::Write as FmtWrite; +use std::time::Duration; + +use super::Op; + +/// Key for looking up results: (operation, directory_size). +pub(crate) type ResultKey = (Op, usize); + +/// All benchmark results: maps (op, size) to a timing duration. +pub(crate) type Results = HashMap; + +/// Row descriptor: label and the operation to look up. +struct Row { + label: &'static str, + op: Op, +} + +/// All rows to display in the table. +fn table_rows(ops: &[Op]) -> Vec { + let mut rows = Vec::new(); + + // Setup is always recorded + rows.push(Row { + label: "Setup", + op: Op::Setup, + }); + + if ops.contains(&Op::Publish) { + rows.push(Row { + label: "Publish N", + op: Op::Publish, + }); + } + if ops.contains(&Op::PublishUpdate) { + rows.push(Row { + label: "Publish update", + op: Op::PublishUpdate, + }); + } + if ops.contains(&Op::Lookup) { + rows.push(Row { + label: "Lookup gen", + op: Op::Lookup, + }); + } + if ops.contains(&Op::LookupVerify) { + rows.push(Row { + label: "Lookup verify", + op: Op::LookupVerify, + }); + } + if ops.contains(&Op::History) { + rows.push(Row { + label: "History gen (5 ep)", + op: Op::History, + }); + } + if ops.contains(&Op::Audit) { + rows.push(Row { + label: "Audit gen", + op: Op::Audit, + }); + } + if ops.contains(&Op::AuditVerify) { + rows.push(Row { + label: "Audit verify", + op: Op::AuditVerify, + }); + } + + rows +} + +/// Format a Duration for display. +fn format_duration(d: Duration) -> String { + let micros = d.as_micros(); + if micros < 1_000 { + format!("{} us", micros) + } else if micros < 1_000_000 { + format!("{:.1} ms", micros as f64 / 1_000.0) + } else { + format!("{:.2} s", micros as f64 / 1_000_000.0) + } +} + +/// Format a directory size as 2^k if it's a power of two, otherwise with commas. +fn format_n(n: usize) -> String { + if n.is_power_of_two() && n > 1 { + format!("2^{}", n.trailing_zeros()) + } else { + let s = n.to_string(); + let mut result = String::new(); + for (i, c) in s.chars().rev().enumerate() { + if i > 0 && i % 3 == 0 { + result.push(','); + } + result.push(c); + } + result.chars().rev().collect() + } +} + +/// Print the results as a formatted table. +pub(crate) fn print_table(results: &Results, sizes: &[usize], ops: &[Op]) { + let rows = table_rows(ops); + if rows.is_empty() { + return; + } + + // Compute column widths + let label_width = rows.iter().map(|r| r.label.len()).max().unwrap_or(0); + let col_width = 12; + + // Header + let mut header = String::new(); + write!(header, "{:width$}", "", width = label_width + 1).unwrap(); + for &size in sizes { + write!( + header, + "| N={: format_duration(*d), + None => "-".to_string(), + }; + write!(line, "| {: format!("{:.3}", d.as_secs_f64() * 1000.0), + None => String::new(), + }; + print!(",{cell}"); + } + println!(); + } +} + +/// Print the results as JSON to stdout. +pub(crate) fn print_json(results: &Results, sizes: &[usize], ops: &[Op]) { + let rows = table_rows(ops); + + println!("["); + for (ri, row) in rows.iter().enumerate() { + println!(" {{"); + println!(" \"metric\": \"{}\",", row.label); + println!(" \"values\": {{"); + for (si, &size) in sizes.iter().enumerate() { + let key = (row.op, size); + let value = match results.get(&key) { + Some(d) => format!("{:.3}", d.as_secs_f64() * 1000.0), + None => "null".to_string(), + }; + let comma = if si + 1 < sizes.len() { "," } else { "" }; + println!(" \"{size}\": {value}{comma}"); + } + println!(" }},"); + println!(" \"unit\": \"ms\""); + let comma = if ri + 1 < rows.len() { "," } else { "" }; + println!(" }}{comma}"); + } + println!("]"); +} + +/// Print sweep results as an ASCII bar chart table. +pub(crate) fn print_sweep_table(results: &[(usize, Duration)], size: usize, iterations: usize) { + let bar_width = 50; + let blocks = ['█', '▉', '▊', '▋', '▌', '▍', '▎', '▏']; + + let max_time = results + .iter() + .map(|(_, d)| d.as_nanos()) + .max() + .unwrap_or(1) + .max(1); + + println!( + "Publish update time vs M (N={}, median of {} iteration{})", + format_n(size), + iterations, + if iterations == 1 { "" } else { "s" } + ); + + // Header + println!(" {:>6} | {:>10} |", "M", "Time"); + println!("--------+------------+{}", "-".repeat(bar_width + 1)); + + for &(m, duration) in results { + let m_label = format_n(m); + let time_str = format_duration(duration); + let ratio = duration.as_nanos() as f64 / max_time as f64; + let bar_float = ratio * bar_width as f64; + let full_blocks = bar_float as usize; + let remainder = ((bar_float - full_blocks as f64) * 8.0).min(7.0) as usize; + + let mut bar = String::new(); + for _ in 0..full_blocks { + bar.push('█'); + } + if remainder > 0 && full_blocks < bar_width { + bar.push(blocks[8 - remainder]); + } + + println!(" {:>6} | {:>10} | {}", m_label, time_str, bar); + } +} + +/// Print sweep results as CSV. +pub(crate) fn print_sweep_csv(results: &[(usize, Duration)]) { + println!("m,time_ms"); + for &(m, duration) in results { + println!("{},{:.3}", m, duration.as_secs_f64() * 1000.0); + } +}