Skip to main content

tempo_xtask/
generate_state_bloat.rs

1//! State bloat generation tool for generating large TIP20 storage state files.
2//!
3//! Generates a binary file containing TIP20 storage slots (total_supply + balances)
4//! that can be loaded during genesis initialization to create a bloated state.
5//!
6//! Uses chunked streaming to keep memory bounded regardless of target file size.
7
8use alloy::{
9    primitives::{Address, U256, keccak256},
10    signers::{
11        local::coins_bip39::{English, Mnemonic},
12        utils::secret_key_to_address,
13    },
14};
15use coins_bip32::prelude::*;
16use eyre::{Context as _, ensure};
17use indicatif::{ProgressBar, ProgressStyle};
18use itertools::Itertools;
19use rayon::prelude::*;
20use std::{
21    fs::File,
22    io::{BufWriter, Write},
23    path::PathBuf,
24    sync::Arc,
25};
26use tempo_precompiles::tip20::tip20_slots;
27use tempo_primitives::transaction::TIP20_PAYMENT_PREFIX;
28
29/// Magic bytes for the state bloat binary format (8 bytes)
30const MAGIC: &[u8; 8] = b"TEMPOSB\x00";
31
32/// Format version
33const VERSION: u16 = 1;
34
35/// Default chunk size: 256k entries per chunk (~16 MiB memory)
36const DEFAULT_CHUNK_SIZE: usize = 256 * 1024;
37
38/// Generate state bloat file
39#[derive(Debug, clap::Args)]
40pub(crate) struct GenerateStateBloat {
41    /// Mnemonic to use for account generation
42    #[arg(
43        short,
44        long,
45        default_value = "test test test test test test test test test test test junk"
46    )]
47    mnemonic: String,
48
49    /// Target file size in MiB
50    #[arg(short, long, default_value = "1024")]
51    size: u64,
52
53    /// Token IDs to generate storage for (can be specified multiple times)
54    /// Uses reserved TIP20 addresses: 0x20C0...{token_id}
55    #[arg(short, long, default_values_t = vec![0u64])]
56    token: Vec<u64>,
57
58    /// Output file path
59    #[arg(short, long, default_value = "state_bloat.bin")]
60    out: PathBuf,
61
62    /// Balance value to assign to each account (in smallest units)
63    #[arg(long, default_value = "1000000")]
64    balance: u64,
65
66    /// Number of addresses to derive using proper BIP32 (signable).
67    /// Remaining addresses use fast keccak-based derivation (not signable).
68    #[arg(long, default_value = "10000")]
69    signable_count: usize,
70
71    /// Number of entries to process per chunk. Controls peak memory usage.
72    #[arg(long, default_value_t = DEFAULT_CHUNK_SIZE)]
73    chunk_size: usize,
74}
75
76impl GenerateStateBloat {
77    pub(crate) async fn run(self) -> eyre::Result<()> {
78        let Self {
79            mnemonic,
80            size,
81            token: tokens,
82            out,
83            balance,
84            signable_count,
85            chunk_size,
86        } = self;
87
88        ensure!(
89            !tokens.is_empty(),
90            "at least one token ID must be specified"
91        );
92        ensure!(size > 0, "size must be greater than 0");
93        ensure!(chunk_size > 0, "chunk_size must be greater than 0");
94
95        let target_bytes = size * 1024 * 1024; // MiB to bytes
96        let num_tokens = tokens.len() as u64;
97
98        // Calculate number of accounts needed
99        // Per token: 1 header (40 bytes) + 1 total_supply (64 bytes) + N balances (64 bytes each)
100        // With chunking, each chunk gets its own header, so overhead increases slightly.
101        // We calculate based on the simple model first, then adjust.
102        let header_size = 40u64;
103        let entry_size = 64u64;
104        let overhead_per_token = header_size + entry_size; // header + total_supply
105        let available_for_balances = target_bytes.saturating_sub(num_tokens * overhead_per_token);
106        let total_balance_entries = available_for_balances / entry_size;
107        let accounts_per_token = total_balance_entries / num_tokens;
108
109        ensure!(
110            accounts_per_token > 0,
111            "target size too small for the number of tokens"
112        );
113
114        let total_accounts = accounts_per_token as usize;
115        let actual_signable = signable_count.min(total_accounts);
116
117        let estimated_size_mib =
118            (num_tokens * (overhead_per_token + accounts_per_token * entry_size)) as f64
119                / (1024.0 * 1024.0);
120        let out_display = out.display();
121        let num_chunks = total_accounts.div_ceil(chunk_size);
122        println!("State bloat generation:");
123        println!("  Target size: {size} MiB");
124        println!("  Tokens: {num_tokens}");
125        println!("  Accounts per token: {accounts_per_token}");
126        println!("  Estimated file size: {estimated_size_mib:.2} MiB");
127        println!("  Chunk size: {chunk_size} entries ({num_chunks} chunks)");
128        println!("  Output: {out_display}");
129
130        // Step 1: Derive parent key
131        let parent_key = derive_parent_key(&mnemonic)?;
132        let parent_key = Arc::new(parent_key);
133        let seed = keccak256(mnemonic.as_bytes());
134
135        // Step 2: Generate token addresses
136        let token_addresses: Vec<Address> = tokens.iter().map(|&id| token_address(id)).collect();
137
138        println!("\nToken addresses:");
139        for (id, addr) in tokens.iter().zip(&token_addresses) {
140            println!("  Token {id}: {addr}");
141        }
142
143        // Step 3: Precompute constants
144        let balance_value = U256::from(balance);
145        let total_supply = balance_value * U256::from(total_accounts);
146        let balance_bytes = balance_value.to_be_bytes::<32>();
147        let total_supply_bytes = total_supply.to_be_bytes::<32>();
148        let total_supply_slot_bytes = tip20_slots::TOTAL_SUPPLY.to_be_bytes::<32>();
149
150        // Step 4: Stream-write the binary file in chunks
151        let file = File::create(&out).wrap_err("failed to create output file")?;
152        let mut writer = BufWriter::with_capacity(64 * 1024 * 1024, file); // 64MB buffer
153
154        println!("\nGenerating and writing in {num_chunks} chunks...");
155
156        let pb = ProgressBar::new(total_accounts as u64);
157        pb.set_style(
158            ProgressStyle::default_bar()
159                .template("[{elapsed_precise}] {bar:40.cyan/blue} {pos}/{len} ({per_sec}) ({eta})")
160                .expect("valid template"),
161        );
162
163        let mut chunk_buf = Vec::with_capacity(chunk_size.min(total_accounts) * 64);
164
165        let mut is_first_chunk = true;
166
167        for chunk in &(0..total_accounts).chunks(chunk_size) {
168            let chunk_indices: Vec<_> = chunk.collect();
169            let chunk_len = chunk_indices.len();
170
171            // Derive addresses and compute slot bytes for this chunk only
172            let slot_bytes: Vec<[u8; 32]> = chunk_indices
173                .into_par_iter()
174                .map(|i| {
175                    let addr = if i < actual_signable {
176                        let child = parent_key
177                            .derive_child(i as u32)
178                            .expect("child derivation should not fail");
179                        let key: &coins_bip32::prelude::SigningKey = child.as_ref();
180                        let credential =
181                            k256::ecdsa::SigningKey::from_bytes(&key.to_bytes()).unwrap();
182                        secret_key_to_address(&credential)
183                    } else {
184                        derive_address_fast(&seed, i as u64)
185                    };
186                    compute_mapping_slot(addr, tip20_slots::BALANCES).to_be_bytes::<32>()
187                })
188                .collect();
189
190            // Write one block per token for this chunk
191            for (token_idx, token_addr) in token_addresses.iter().enumerate() {
192                let pair_count = chunk_len as u64 + if is_first_chunk { 1 } else { 0 };
193
194                write_header(&mut writer, *token_addr, pair_count)?;
195
196                // Only write total_supply in the first chunk for each token
197                if is_first_chunk {
198                    writer.write_all(&total_supply_slot_bytes)?;
199                    writer.write_all(&total_supply_bytes)?;
200                }
201
202                // Write balance entries in chunks
203                chunk_buf.clear();
204                for slot in &slot_bytes {
205                    chunk_buf.extend_from_slice(slot);
206                    chunk_buf.extend_from_slice(&balance_bytes);
207                }
208                writer.write_all(&chunk_buf)?;
209
210                // Only count progress once per chunk (on the last token)
211                if token_idx == token_addresses.len() - 1 {
212                    pb.inc(chunk_len as u64);
213                }
214            }
215
216            is_first_chunk = false;
217        }
218
219        writer.flush()?;
220        pb.finish_with_message("done");
221
222        let file_size = std::fs::metadata(&out)?.len();
223        println!(
224            "\nGenerated {} ({:.2} MiB)",
225            out.display(),
226            file_size as f64 / (1024.0 * 1024.0)
227        );
228
229        Ok(())
230    }
231}
232
233/// Compute a reserved TIP20 token address from a token ID.
234/// Reserved addresses use the TIP20 prefix with the token ID in the last 8 bytes.
235fn token_address(token_id: u64) -> Address {
236    let mut bytes = [0u8; 20];
237    bytes[..12].copy_from_slice(&TIP20_PAYMENT_PREFIX);
238    bytes[12..].copy_from_slice(&token_id.to_be_bytes());
239    Address::from(bytes)
240}
241
242/// Fast address derivation using keccak256(seed || index).
243/// This is much faster than BIP32 but the resulting addresses are NOT signable.
244/// Used for generating bloat addresses beyond the signable count.
245fn derive_address_fast(seed: &[u8; 32], index: u64) -> Address {
246    let mut buf = [0u8; 40]; // 32 bytes seed + 8 bytes index
247    buf[..32].copy_from_slice(seed);
248    buf[32..].copy_from_slice(&index.to_be_bytes());
249    let hash = keccak256(buf);
250    // Take last 20 bytes of hash as address
251    Address::from_slice(&hash[12..])
252}
253
254/// Derive the parent key for BIP44 Ethereum path: m/44'/60'/0'/0
255/// This performs PBKDF2 once, then subsequent child derivations are fast.
256fn derive_parent_key(mnemonic_phrase: &str) -> eyre::Result<XPriv> {
257    let mnemonic = Mnemonic::<English>::new_from_phrase(mnemonic_phrase)
258        .map_err(|e| eyre::eyre!("invalid mnemonic: {e}"))?;
259
260    // Derive seed from mnemonic (this is the slow PBKDF2 step)
261    let master: XPriv = mnemonic
262        .derive_key("m/44'/60'/0'/0", None)
263        .map_err(|e| eyre::eyre!("key derivation failed: {e}"))?;
264
265    Ok(master)
266}
267
268/// Compute a Solidity mapping slot: keccak256(pad32(key) || pad32(base_slot))
269fn compute_mapping_slot(key: Address, base_slot: U256) -> U256 {
270    let mut buf = [0u8; 64];
271    // Left-pad address to 32 bytes
272    buf[12..32].copy_from_slice(key.as_slice());
273    // Base slot as big-endian 32 bytes
274    buf[32..].copy_from_slice(&base_slot.to_be_bytes::<32>());
275    U256::from_be_bytes(keccak256(buf).0)
276}
277
278/// Write a block header to the output.
279/// Format: `[magic:8][version:2][flags:2][address:20][pair_count:8] = 40 bytes`
280fn write_header(writer: &mut impl Write, address: Address, pair_count: u64) -> eyre::Result<()> {
281    writer.write_all(MAGIC)?;
282    writer.write_all(&VERSION.to_be_bytes())?;
283    writer.write_all(&0u16.to_be_bytes())?; // flags (reserved)
284    writer.write_all(address.as_slice())?;
285    writer.write_all(&pair_count.to_be_bytes())?;
286    Ok(())
287}
288
289#[cfg(test)]
290mod tests {
291    use super::*;
292
293    #[test]
294    fn test_token_address() {
295        let addr = token_address(0);
296        assert_eq!(
297            addr,
298            "0x20C0000000000000000000000000000000000000"
299                .parse::<Address>()
300                .unwrap()
301        );
302
303        let addr = token_address(1);
304        assert_eq!(
305            addr,
306            "0x20C0000000000000000000000000000000000001"
307                .parse::<Address>()
308                .unwrap()
309        );
310    }
311
312    #[test]
313    fn test_compute_mapping_slot() {
314        // Verify the slot computation matches Solidity's keccak256(abi.encode(key, slot))
315        let addr: Address = "0x1234567890123456789012345678901234567890"
316            .parse()
317            .unwrap();
318        let slot = compute_mapping_slot(addr, tip20_slots::BALANCES);
319
320        // The slot should be deterministic
321        let slot2 = compute_mapping_slot(addr, tip20_slots::BALANCES);
322        assert_eq!(slot, slot2);
323
324        // Different addresses should produce different slots
325        let other_addr: Address = "0xabcdefabcdefabcdefabcdefabcdefabcdefabcd"
326            .parse()
327            .unwrap();
328        let other_slot = compute_mapping_slot(other_addr, tip20_slots::BALANCES);
329        assert_ne!(slot, other_slot);
330    }
331
332    #[test]
333    fn test_header_size() {
334        let mut buf = Vec::new();
335        write_header(&mut buf, Address::ZERO, 100).unwrap();
336        assert_eq!(buf.len(), 40);
337    }
338
339    #[test]
340    fn test_derive_parent_key_matches_mnemonic_builder() {
341        use alloy::signers::local::MnemonicBuilder;
342
343        let mnemonic = "test test test test test test test test test test test junk";
344        let parent_key = derive_parent_key(mnemonic).unwrap();
345
346        // Verify first 10 addresses match MnemonicBuilder::from_phrase_nth
347        for i in 0..10u32 {
348            let expected = MnemonicBuilder::from_phrase_nth(mnemonic, i);
349
350            let child = parent_key.derive_child(i).unwrap();
351            let key: &coins_bip32::prelude::SigningKey = child.as_ref();
352            let credential = k256::ecdsa::SigningKey::from_bytes(&key.to_bytes()).unwrap();
353            let actual = secret_key_to_address(&credential);
354
355            assert_eq!(actual, expected.address(), "address mismatch at index {i}");
356        }
357    }
358
359    #[test]
360    fn test_entry_size() {
361        let slot = U256::ZERO.to_be_bytes::<32>();
362        let value = U256::from(1).to_be_bytes::<32>();
363        assert_eq!(slot.len() + value.len(), 64);
364    }
365}