//! Encode data to enable recovery from a subset of fragments. //! //! # Status //! //! `commonware-coding` is **ALPHA** software and is not yet recommended for production use. Developers should //! expect breaking changes and occasional instability. #![doc( html_logo_url = "https://commonware.xyz/imgs/rustdoc_logo.svg", html_favicon_url = "https://commonware.xyz/favicon.ico" )] use bytes::Buf; use commonware_codec::{Codec, FixedSize, Read, Write}; use std::fmt::Debug; mod reed_solomon; use commonware_cryptography::Digest; pub use reed_solomon::{Error as ReedSolomonError, ReedSolomon}; mod no_coding; pub use no_coding::{Error as NoCodingError, NoCoding}; mod zoda; pub use zoda::{Error as ZodaError, Zoda}; /// Configuration common to all encoding schemes. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Config { /// The minimum number of shards needed to encode the data. pub minimum_shards: u16, /// Extra shards beyond the minimum number. /// /// Alternatively, one can think of the configuration as having a total number /// `N = extra_shards + minimum_shards`, but by specifying the `extra_shards` /// rather than `N`, we avoid needing to check that `minimum_shards <= N`. pub extra_shards: u16, } impl Config { /// Returns the total number of shards produced by this configuration. pub fn total_shards(&self) -> u32 { u32::from(self.minimum_shards) + u32::from(self.extra_shards) } } impl FixedSize for Config { const SIZE: usize = 2 * ::SIZE; } impl Write for Config { fn write(&self, buf: &mut impl bytes::BufMut) { self.minimum_shards.write(buf); self.extra_shards.write(buf); } } impl Read for Config { type Cfg = (); fn read_cfg(buf: &mut impl Buf, cfg: &Self::Cfg) -> Result { Ok(Self { minimum_shards: u16::read_cfg(buf, cfg)?, extra_shards: u16::read_cfg(buf, cfg)?, }) } } /// The configuration for decoding shard data. #[derive(Clone, Debug)] pub struct CodecConfig { /// The maximum number of bytes a shard is expected to contain. /// /// This can be an upper bound, and only constrains the non-fixed-size portion /// of shard data. pub maximum_shard_size: usize, } /// A scheme for encoding data into pieces, and recovering the data from those pieces. /// /// # Example /// ``` /// use commonware_coding::{Config, ReedSolomon, Scheme as _}; /// use commonware_cryptography::Sha256; /// /// const CONCURRENCY: usize = 1; /// /// type RS = ReedSolomon; /// /// let config = Config { minimum_shards: 2, extra_shards: 1 }; /// let data = b"Hello!"; /// // Turn the data into shards, and a commitment to those shards. /// let (commitment, shards) = /// RS::encode(&config, data.as_slice(), CONCURRENCY).unwrap(); /// /// // Each person produces reshards, their own checked shard, and checking data /// // to check other peoples reshards. /// let (mut checking_data_w_shard, reshards): (Vec<_>, Vec<_>) = shards /// .into_iter() /// .enumerate() /// .map(|(i, shard)| { /// let (checking_data, checked_shard, reshard) = RS::reshard(&config, &commitment, i as u16, shard).unwrap(); /// ((checking_data, checked_shard), reshard) /// }) /// .collect(); /// // Let's pretend that the last item is "ours" /// let (checking_data, checked_shard) = checking_data_w_shard.pop().unwrap(); /// // We can use this checking_data to check the other shards. /// let mut checked_shards = Vec::new(); /// checked_shards.push(checked_shard); /// for (i, reshard) in reshards.into_iter().enumerate().skip(1) { /// checked_shards.push(RS::check(&config, &commitment, &checking_data, i as u16, reshard).unwrap()) /// } /// /// let data2 = RS::decode(&config, &commitment, checking_data, &checked_shards[..2], CONCURRENCY).unwrap(); /// assert_eq!(&data[..], &data2[..]); /// /// // Decoding works with different shards, with a guarantee to get the same result. /// let data3 = RS::decode(&config, &commitment, checking_data, &checked_shards[1..], CONCURRENCY).unwrap(); /// assert_eq!(&data[..], &data3[..]); /// ``` pub trait Scheme: Debug + Clone + Send + Sync + 'static { /// A commitment attesting to the shards of data. type Commitment: Digest; /// A shard of data, to be received by a participant. type Shard: Clone + Eq + Codec + Send + Sync + 'static; /// A shard shared with other participants, to aid them in reconstruction. /// /// In most cases, this will be the same as `Shard`, but some schemes might /// have extra information in `Shard` that may not be necessary to reconstruct /// the data. type ReShard: Clone + Eq + Codec + Send + Sync + 'static; /// Data which can assist in checking shards. type CheckingData: Clone + Send; /// A shard that has been checked for inclusion in the commitment. /// /// This allows excluding [Scheme::ReShard]s which are invalid, and shouldn't /// be considered as progress towards meeting the minimum number of shards. type CheckedShard; type Error: std::fmt::Debug; /// Encode a piece of data, returning a commitment, along with shards, and proofs. /// /// Each shard and proof is intended for exactly one participant. The number of shards returned /// should equal `config.minimum_shards + config.extra_shards`. #[allow(clippy::type_complexity)] fn encode( config: &Config, data: impl Buf, concurrency: usize, ) -> Result<(Self::Commitment, Vec), Self::Error>; /// Take your own shard, check it, and produce a [Scheme::ReShard] to forward to others. /// /// This takes in an index, which is the index you expect the shard to be. /// /// This will produce a [Scheme::CheckedShard] which counts towards the minimum /// number of shards you need to reconstruct the data, in [Scheme::decode]. /// /// You also get [Scheme::CheckingData], which has information you can use to check /// the shards you receive from others. #[allow(clippy::type_complexity)] fn reshard( config: &Config, commitment: &Self::Commitment, index: u16, shard: Self::Shard, ) -> Result<(Self::CheckingData, Self::CheckedShard, Self::ReShard), Self::Error>; /// Check the integrity of a reshard, producing a checked shard. /// /// This requires the [Scheme::CheckingData] produced by [Scheme::reshard]. /// /// This takes in an index, to make sure that the reshard you're checking /// is associated with the participant you expect it to be. fn check( config: &Config, commitment: &Self::Commitment, checking_data: &Self::CheckingData, index: u16, reshard: Self::ReShard, ) -> Result; /// Decode the data from shards received from other participants. /// /// The data must be decodeable with as few as `config.minimum_shards`, /// including your own shard. /// /// Calls to this function with the same commitment, but with different shards, /// or shards in a different should also result in the same output data, or in failure. /// In other words, when using the decoding function in a broader system, you /// get a guarantee that every participant decoding will see the same final /// data, even if they receive different shards, or receive them in a different order. fn decode( config: &Config, commitment: &Self::Commitment, checking_data: Self::CheckingData, shards: &[Self::CheckedShard], concurrency: usize, ) -> Result, Self::Error>; } /// A marker trait indicating that [Scheme::check] proves validity of the encoding. /// /// In more detail, this means that upon a successful call to [Scheme::check], /// guarantees that the shard results from a valid encoding of the data, and thus, /// if other participants also call check, then the data is guaranteed to be reconstructable. pub trait ValidatingScheme: Scheme {} #[cfg(test)] mod test { use super::*; use crate::reed_solomon::ReedSolomon; use commonware_codec::Encode; use commonware_cryptography::Sha256; use std::cmp::Reverse; const CONCURRENCY: usize = 1; const MAX_DATA_BYTES: usize = 1 << 31; fn general_test( name: &str, data: &[u8], min_shards: u16, total_shards: u16, indices: &[u16], ) { // If the indices reference some larger shard, use that as the total. let total_shards = indices .iter() .map(|&x| x + 1) .max() .map_or(total_shards, |x| x.max(total_shards)); assert!(min_shards >= 1, "min_shards must be at least 1"); assert!( indices.len() >= min_shards as usize, "you need to supply at least {min_shards} indices" ); assert!( total_shards >= min_shards, "total_shards ({total_shards}) must be >= min_shards ({min_shards})" ); let config = Config { minimum_shards: min_shards, extra_shards: total_shards - min_shards, }; let read_cfg = CodecConfig { maximum_shard_size: MAX_DATA_BYTES, }; let (commitment, shards) = S::encode(&config, data, CONCURRENCY).unwrap(); // Pick out the packets we want, in reverse order. let ((_, _, checking_data, my_checked_shard, _), other_packets) = { let mut out = shards .into_iter() .enumerate() .filter_map(|(i, shard)| { let shard = S::Shard::read_cfg(&mut shard.encode(), &read_cfg).unwrap(); let i = i as u16; let pos_of_i = indices.iter().position(|&x| x == i)?; let (x0, x1, x2) = S::reshard(&config, &commitment, i, shard).unwrap(); Some((pos_of_i, i, x0, x1, x2)) }) .collect::>(); out.sort_by_key(|&(pos_of_i, _, _, _, _)| Reverse(pos_of_i)); let first = out.pop().unwrap(); (first, out) }; let checked_shards = { let mut others = other_packets .into_iter() .map(|(_, i, _, _, reshard)| { let reshard = S::ReShard::read_cfg(&mut reshard.encode(), &read_cfg).unwrap(); S::check(&config, &commitment, &checking_data, i, reshard).unwrap() }) .collect::>(); others.push(my_checked_shard); others }; let decoded = S::decode( &config, &commitment, checking_data, &checked_shards, CONCURRENCY, ) .unwrap(); assert_eq!(&decoded, data, "{name} failed"); } fn test_basic() { general_test::("test_basic", b"Hello, Reed-Solomon!", 4, 7, &[0, 1, 2, 3]); } fn test_moderate() { general_test::( "test_moderate", b"Testing with more pieces than minimum", 4, 10, &[0, 1, 2, 8, 9], ); } fn test_odd_shard_len() { general_test::("test_odd_shard_len", b"?", 2, 3, &[0, 1]); } fn test_recovery() { general_test::( "test_recovery", b"Testing recovery pieces", 3, 8, &[5, 6, 7], ); } fn test_empty_data() { general_test::( "test_empty_data", b"", 30, 100, (0..30u16).collect::>().as_slice(), ); } fn test_large_data() { general_test::( "test_large_data", vec![42u8; 1000].as_slice(), 3, 4, &[0, 1, 2], ); } fn test_no_data_two_shards() { general_test::("test_no_data_one_shard", b"", 1, 2, &[0]); } // This exercises an edge case in ZODA, but is also useful for other schemes. fn test_2_pow_16_25_total_shards() { general_test::( "test_2_pow_16_25_total_shards", vec![0x67; 1 << 16].as_slice(), 8, 25, &(0..8).collect::>(), ); } fn test_suite() { test_basic::(); test_moderate::(); test_odd_shard_len::(); test_recovery::(); test_empty_data::(); test_large_data::(); test_no_data_two_shards::(); test_2_pow_16_25_total_shards::(); } #[test] fn test_suite_reed_solomon() { test_suite::>(); } #[test] fn test_suite_no_coding() { test_suite::>(); } #[test] fn test_suite_zoda() { test_suite::>(); } #[cfg(feature = "arbitrary")] mod conformance { use super::*; use commonware_codec::conformance::CodecConformance; commonware_conformance::conformance_tests! { CodecConformance, } } }