//! Shared helpers for `iobuf` benchmarks. //! //! The benchmark modules in this directory share the same small set of //! threading presets and the same timing harness: //! //! - [`Threading`] defines the single-threaded and multi-threaded benchmark //! shapes used by both suites. //! - [`Pattern`] describes the multi-threaded synchronization style: //! - `Lockstep`: all workers enter the hot path together, maximizing //! contention. //! - `Staggered`: workers add a small variable spin delay between iterations //! to decorrelate access timing. //! - [`measure`] runs the benchmark body under those presets, including the //! barrier synchronization used by the multi-threaded cases. //! //! Keeping these helpers in one place ensures that the `pool` and `freelist` //! benchmarks use the same contention patterns and wall-clock measurement //! rules. use std::{ hint::spin_loop, sync::{Arc, Barrier}, thread, time::{Duration, Instant}, }; const MIN_BENCH_THREADS: usize = 2; const MAX_BENCH_THREADS: usize = 8; #[derive(Clone, Copy)] pub enum Pattern { /// All workers enter the hot path together, maximizing contention. Lockstep, /// Workers add a small spin delay to decorrelate access timing. Staggered, } impl Pattern { pub const fn as_str(self) -> &'static str { match self { Self::Lockstep => "lockstep", Self::Staggered => "staggered", } } } #[derive(Clone, Copy)] pub enum Threading { Single, Multi { threads: usize, pattern: Pattern }, } impl Threading { pub fn standard() -> [Self; 3] { let threads = std::thread::available_parallelism().map_or(MIN_BENCH_THREADS, |n| { n.get().clamp(MIN_BENCH_THREADS, MAX_BENCH_THREADS) }); [ Self::Single, Self::Multi { threads, pattern: Pattern::Lockstep, }, Self::Multi { threads, pattern: Pattern::Staggered, }, ] } pub const fn threads(self) -> usize { match self { Self::Single => 1, Self::Multi { threads, .. } => threads, } } } /// Measure `iters` repetitions of `step`. /// /// `setup` runs per-worker before timing starts and returns state passed to /// each `step` invocation. For multi-threaded runs, all workers synchronize /// via a barrier after setup so timing captures concurrent execution only. pub fn measure( iters: u64, threading: Threading, setup: impl Fn() -> T + Sync, step: impl Fn(&mut T) + Sync, ) -> Duration { let Threading::Multi { threads, pattern } = threading else { let mut state = setup(); let start = Instant::now(); for _ in 0..iters { step(&mut state); } return start.elapsed(); }; let start = thread::scope(|scope| { let ready = Arc::new(Barrier::new(threads + 1)); let launch = Arc::new(Barrier::new(threads + 1)); for thread_id in 0..threads { let ready = ready.clone(); let launch = launch.clone(); let setup = &setup; let step = &step; scope.spawn(move || { let mut state = setup(); ready.wait(); launch.wait(); for iter in 0..iters { step(&mut state); if matches!(pattern, Pattern::Staggered) { // Desynchronize threads so they don't all hit the // allocator at once. This spreads access times apart // without adding enough delay to dominate the // measurement. let spins = (iter as usize).wrapping_add(1).wrapping_mul( thread_id .wrapping_mul(MAX_BENCH_THREADS - 1) .wrapping_add(1), ) & 0xF; for _ in 0..spins { spin_loop(); } } } }); } ready.wait(); let start = Instant::now(); launch.wait(); start }); start.elapsed() }