use std::fs; use std::io::{Read, Seek, SeekFrom, Write}; use std::path::{Path, PathBuf}; use rand::Rng; use rayon::prelude::*; use std::os::unix::fs::PermissionsExt; use crate::compression; use crate::crypto; use crate::format::{self, Header, TocEntry, HEADER_SIZE}; /// Processed file data collected during Pass 1 of pack. struct ProcessedFile { name: String, entry_type: u8, // 0x00 = file, 0x01 = directory permissions: u16, // Lower 12 bits of POSIX mode_t original_size: u32, compressed_size: u32, encrypted_size: u32, iv: [u8; 16], hmac: [u8; 32], sha256: [u8; 32], compression_flag: u8, ciphertext: Vec, padding_after: u16, padding_bytes: Vec, } /// Collected entry from the directory walk (before crypto processing). /// /// Separates the fast sequential path-collection phase from the /// parallelizable crypto-processing phase. enum CollectedEntry { Dir { name: String, permissions: u16, }, File { path: PathBuf, name: String, permissions: u16, }, } /// Read and de-obfuscate archive header and TOC entries. /// /// Handles XOR header bootstrapping (FORMAT.md Section 10 steps 1-3), /// optional salt reading (between header and TOC), and TOC decryption /// (Section 10 step 4) automatically. /// Used by both unpack() and inspect(). /// /// When `key` is `None` and the TOC is encrypted, returns `Ok((header, vec![], salt))`. /// The caller can check `header.flags & 0x02` to determine if entries were omitted. fn read_archive_metadata(file: &mut fs::File, key: Option<&[u8; 32]>) -> anyhow::Result<(Header, Vec, Option<[u8; 16]>)> { // Step 1-3: Read header with XOR bootstrapping let header = format::read_header_auto(file)?; // Read salt if present (between header and TOC) let salt = format::read_salt(file, &header)?; // Step 4: Read TOC (possibly encrypted) file.seek(SeekFrom::Start(header.toc_offset as u64))?; let mut toc_raw = vec![0u8; header.toc_size as usize]; file.read_exact(&mut toc_raw)?; let entries = if header.flags & 0x02 != 0 { // TOC is encrypted if let Some(k) = key { // Decrypt with toc_iv, then parse let toc_plaintext = crypto::decrypt_data(&toc_raw, k, &header.toc_iv)?; format::read_toc_from_buf(&toc_plaintext, header.file_count)? } else { // No key provided: cannot decrypt TOC vec![] } } else { // TOC is plaintext: parse directly format::read_toc_from_buf(&toc_raw, header.file_count)? }; Ok((header, entries, salt)) } /// Read just the salt from an archive (for password-based key derivation before full unpack). pub fn read_archive_salt(archive: &Path) -> anyhow::Result> { let mut file = fs::File::open(archive)?; let header = format::read_header_auto(&mut file)?; format::read_salt(&mut file, &header) } /// Get Unix permission bits (lower 12 bits of mode_t) for a path. fn get_permissions(path: &Path) -> anyhow::Result { let metadata = fs::metadata(path)?; Ok((metadata.permissions().mode() & 0o7777) as u16) } /// Process a single file through the crypto pipeline, returning a ProcessedFile. /// /// Thread-safe: creates a thread-local RNG instead of accepting an external one. fn process_file( file_path: &Path, name: String, permissions: u16, no_compress: &[String], key: &[u8; 32], ) -> anyhow::Result { let data = fs::read(file_path)?; // Validate file size <= u32::MAX anyhow::ensure!( data.len() <= u32::MAX as usize, "File too large: {} ({} bytes exceeds 4 GB limit)", file_path.display(), data.len() ); // Step 1: SHA-256 of original data let sha256 = crypto::sha256_hash(&data); // Step 2: Determine compression and compress if needed let should_compress = compression::should_compress(&name, no_compress); let (compressed_data, compression_flag) = if should_compress { let compressed = compression::compress(&data)?; (compressed, 1u8) } else { (data.clone(), 0u8) }; let original_size = data.len() as u32; let compressed_size = compressed_data.len() as u32; // Step 3: Generate random IV let iv = crypto::generate_iv(); // Step 4: Encrypt let ciphertext = crypto::encrypt_data(&compressed_data, key, &iv); let encrypted_size = ciphertext.len() as u32; // Step 5: Compute HMAC over IV || ciphertext let hmac = crypto::compute_hmac(key, &iv, &ciphertext); // Step 6: Generate decoy padding (FORMAT.md Section 9.3) let mut rng = rand::rng(); let padding_after: u16 = rng.random_range(64..=4096); let mut padding_bytes = vec![0u8; padding_after as usize]; rand::Fill::fill(&mut padding_bytes[..], &mut rng); Ok(ProcessedFile { name, entry_type: 0, permissions, original_size, compressed_size, encrypted_size, iv, hmac, sha256, compression_flag, ciphertext, padding_after, padding_bytes, }) } /// Create a ProcessedFile for a directory entry (no data block). fn make_directory_entry(name: String, permissions: u16) -> ProcessedFile { ProcessedFile { name, entry_type: 1, permissions, original_size: 0, compressed_size: 0, encrypted_size: 0, iv: [0u8; 16], hmac: [0u8; 32], sha256: [0u8; 32], compression_flag: 0, ciphertext: Vec::new(), padding_after: 0, padding_bytes: Vec::new(), } } /// Recursively collect paths from a directory (no crypto processing). /// /// Entries are emitted in parent-before-child order (DFS preorder). /// The base_name is the top-level directory name used as prefix for all relative paths. fn collect_directory_paths( dir_path: &Path, base_name: &str, ) -> anyhow::Result> { let mut entries = Vec::new(); // Add the directory itself first (parent-before-child) let dir_perms = get_permissions(dir_path)?; entries.push(CollectedEntry::Dir { name: base_name.to_string(), permissions: dir_perms, }); // Collect children sorted by name for deterministic ordering let mut children: Vec = fs::read_dir(dir_path)? .collect::, _>>()?; children.sort_by_key(|e| e.file_name()); for child in children { let child_path = child.path(); let child_name = format!( "{}/{}", base_name, child.file_name().to_str() .ok_or_else(|| anyhow::anyhow!("Non-UTF-8 filename: {}", child_path.display()))? ); // Use symlink_metadata to avoid following symlinks. // is_dir()/is_file() follow symlinks, which can cause infinite // recursion or massively inflated entry counts with symlink farms // (e.g., pnpm node_modules with hundreds of directory symlinks). let meta = fs::symlink_metadata(&child_path)?; if meta.file_type().is_symlink() { eprintln!( "Warning: skipping symlink: {}", child_path.display() ); continue; } else if meta.is_dir() { // Recurse into real subdirectory (not a symlink) let sub_entries = collect_directory_paths( &child_path, &child_name, )?; entries.extend(sub_entries); } else { // Collect file path for later parallel processing let file_perms = (meta.permissions().mode() & 0o7777) as u16; entries.push(CollectedEntry::File { path: child_path, name: child_name, permissions: file_perms, }); } } Ok(entries) } /// Collect all entry paths from input paths (files and directories). /// /// Returns a list of CollectedEntry items in deterministic order, /// ready for parallel processing of file entries. fn collect_paths(inputs: &[PathBuf]) -> anyhow::Result> { let mut collected = Vec::new(); for input_path in inputs { // Check for symlinks at top level too let meta = fs::symlink_metadata(input_path)?; if meta.file_type().is_symlink() { eprintln!( "Warning: skipping symlink: {}", input_path.display() ); continue; } if meta.is_dir() { // Get the directory's own name for the archive prefix let dir_name = input_path .file_name() .ok_or_else(|| anyhow::anyhow!("Invalid directory path: {}", input_path.display()))? .to_str() .ok_or_else(|| anyhow::anyhow!("Non-UTF-8 directory name: {}", input_path.display()))? .to_string(); let dir_entries = collect_directory_paths(input_path, &dir_name)?; collected.extend(dir_entries); } else { // Single file: use just the filename let name = input_path .file_name() .ok_or_else(|| anyhow::anyhow!("Invalid file path: {}", input_path.display()))? .to_str() .ok_or_else(|| anyhow::anyhow!("Non-UTF-8 filename: {}", input_path.display()))? .to_string(); let file_perms = get_permissions(input_path)?; collected.push(CollectedEntry::File { path: input_path.clone(), name, permissions: file_perms, }); } } Ok(collected) } /// Pack files and directories into an encrypted archive. /// /// Two-pass algorithm with full obfuscation and parallel file processing: /// Pass 1a: Walk directory tree sequentially, collect paths in deterministic order. /// Pass 1b: Process file entries in parallel (read, hash, compress, encrypt, padding). /// Directory entries become zero-length entries (no processing needed). /// Pass 2: Encrypt TOC, compute offsets, XOR header, write archive sequentially. pub fn pack(files: &[PathBuf], output: &Path, no_compress: &[String], key: &[u8; 32], salt: Option<&[u8; 16]>) -> anyhow::Result<()> { anyhow::ensure!(!files.is_empty(), "No input files specified"); // --- Pass 1a: Collect paths sequentially (fast, deterministic) --- let collected = collect_paths(files)?; anyhow::ensure!(!collected.is_empty(), "No entries to archive"); // Guard against u16 overflow: file_count field in header is u16 (max 65535) anyhow::ensure!( collected.len() <= u16::MAX as usize, "Too many entries: {} exceeds maximum of {} (u16 file_count limit)", collected.len(), u16::MAX ); // --- Pass 1b: Process files in parallel, directories inline --- // We use par_iter on the collected entries while preserving their order. // Each entry is processed independently; file entries go through the full // crypto pipeline in parallel, directory entries are trivially converted. let processed: Vec = collected .into_par_iter() .map(|entry| match entry { CollectedEntry::Dir { name, permissions } => { Ok(make_directory_entry(name, permissions)) } CollectedEntry::File { path, name, permissions } => { process_file(&path, name, permissions, no_compress, key) } }) .collect::>>()?; // Count files and directories let file_count = processed.iter().filter(|pf| pf.entry_type == 0).count(); let dir_count = processed.iter().filter(|pf| pf.entry_type == 1).count(); // --- Pass 2: Compute offsets and write archive --- // Determine flags byte: bit 0 if any file is compressed, bits 1-3 for obfuscation let any_compressed = processed.iter().any(|pf| pf.compression_flag == 1); let mut flags: u8 = if any_compressed { 0x01 } else { 0x00 }; // Enable all three obfuscation features flags |= 0x02; // bit 1: TOC encrypted flags |= 0x04; // bit 2: XOR header flags |= 0x08; // bit 3: decoy padding // Set KDF salt flag if password-derived key if salt.is_some() { flags |= format::FLAG_KDF_SALT; // bit 4: KDF salt present } // Build TOC entries (with placeholder data_offset=0, will be set after toc_size known) let toc_entries: Vec = processed .iter() .map(|pf| TocEntry { name: pf.name.clone(), entry_type: pf.entry_type, permissions: pf.permissions, original_size: pf.original_size, compressed_size: pf.compressed_size, encrypted_size: pf.encrypted_size, data_offset: 0, // placeholder iv: pf.iv, hmac: pf.hmac, sha256: pf.sha256, compression_flag: pf.compression_flag, padding_after: pf.padding_after, }) .collect(); // Serialize TOC to get plaintext size, then encrypt to get final toc_size let toc_plaintext = format::serialize_toc(&toc_entries)?; // Generate TOC IV and encrypt let toc_iv = crypto::generate_iv(); let encrypted_toc = crypto::encrypt_data(&toc_plaintext, key, &toc_iv); let encrypted_toc_size = encrypted_toc.len() as u32; let toc_offset = if salt.is_some() { HEADER_SIZE + format::SALT_SIZE } else { HEADER_SIZE }; // Compute data offsets (accounting for encrypted TOC size and padding) // Directory entries are skipped (no data block). let data_block_start = toc_offset + encrypted_toc_size; let mut data_offsets: Vec = Vec::with_capacity(processed.len()); let mut current_offset = data_block_start; for pf in &processed { if pf.entry_type == 1 { // Directory: no data block, offset is 0 data_offsets.push(0); } else { data_offsets.push(current_offset); current_offset += pf.encrypted_size + pf.padding_after as u32; } } // Now re-serialize TOC with correct data_offsets let final_toc_entries: Vec = processed .iter() .enumerate() .map(|(i, pf)| TocEntry { name: pf.name.clone(), entry_type: pf.entry_type, permissions: pf.permissions, original_size: pf.original_size, compressed_size: pf.compressed_size, encrypted_size: pf.encrypted_size, data_offset: data_offsets[i], iv: pf.iv, hmac: pf.hmac, sha256: pf.sha256, compression_flag: pf.compression_flag, padding_after: pf.padding_after, }) .collect(); let final_toc_plaintext = format::serialize_toc(&final_toc_entries)?; let final_encrypted_toc = crypto::encrypt_data(&final_toc_plaintext, key, &toc_iv); let final_encrypted_toc_size = final_encrypted_toc.len() as u32; // Sanity check: encrypted TOC size should not change (same plaintext length) assert_eq!( encrypted_toc_size, final_encrypted_toc_size, "TOC encrypted size changed unexpectedly" ); // Create header let header = Header { version: format::VERSION, flags, file_count: processed.len() as u16, toc_offset, toc_size: final_encrypted_toc_size, toc_iv, reserved: [0u8; 8], }; // Serialize header to buffer and XOR let mut header_buf = format::write_header_to_buf(&header); format::xor_header_buf(&mut header_buf); // Open output file let mut out_file = fs::File::create(output)?; // Write XOR'd header out_file.write_all(&header_buf)?; // Write salt if present (between header and TOC) if let Some(s) = salt { format::write_salt(&mut out_file, s)?; } // Write encrypted TOC out_file.write_all(&final_encrypted_toc)?; // Write data blocks with interleaved decoy padding (skip directory entries) for pf in &processed { if pf.entry_type == 1 { continue; // directories have no data block } out_file.write_all(&pf.ciphertext)?; out_file.write_all(&pf.padding_bytes)?; } let total_bytes = current_offset; println!( "Packed {} entries ({} files, {} directories) into {} ({} bytes)", processed.len(), file_count, dir_count, output.display(), total_bytes ); Ok(()) } /// Inspect archive metadata. /// /// Without a key: displays header fields only (version, flags, file_count, etc.). /// If the TOC is encrypted and no key is provided, prints a message indicating /// that a key is needed to see the entry listing. /// /// With a key: decrypts TOC and displays full entry listing (file names, sizes, etc.). pub fn inspect(archive: &Path, key: Option<&[u8; 32]>) -> anyhow::Result<()> { let mut file = fs::File::open(archive)?; // Read header and TOC (TOC may be empty if encrypted and no key provided) let (header, entries, _salt) = read_archive_metadata(&mut file, key)?; // Print header info let filename = archive .file_name() .map(|s| s.to_string_lossy().to_string()) .unwrap_or_else(|| archive.display().to_string()); println!("Archive: {}", filename); println!("Version: {}", header.version); println!("Flags: 0x{:02X}", header.flags); println!("Entries: {}", header.file_count); println!("TOC offset: {}", header.toc_offset); println!("TOC size: {}", header.toc_size); println!(); // Check if TOC was encrypted but we had no key if entries.is_empty() && header.file_count > 0 && header.flags & 0x02 != 0 && key.is_none() { println!("TOC is encrypted, provide a key to see entry listing"); return Ok(()); } // Print each entry let mut total_original: u64 = 0; for (i, entry) in entries.iter().enumerate() { let type_str = if entry.entry_type == 1 { "dir" } else { "file" }; let perms_str = format!("{:04o}", entry.permissions); println!("[{}] {} ({}, {})", i, entry.name, type_str, perms_str); println!(" Permissions: {}", perms_str); if entry.entry_type == 0 { // File entry: show size and crypto details let compression_str = if entry.compression_flag == 1 { "yes" } else { "no" }; println!(" Original: {} bytes", entry.original_size); println!(" Compressed: {} bytes", entry.compressed_size); println!(" Encrypted: {} bytes", entry.encrypted_size); println!(" Offset: {}", entry.data_offset); println!(" Compression: {}", compression_str); println!(" Padding after: {} bytes", entry.padding_after); println!( " IV: {}", entry.iv.iter().map(|b| format!("{:02x}", b)).collect::() ); println!( " HMAC: {}", entry.hmac.iter().map(|b| format!("{:02x}", b)).collect::() ); println!( " SHA-256: {}", entry.sha256.iter().map(|b| format!("{:02x}", b)).collect::() ); total_original += entry.original_size as u64; } } println!(); println!("Total original size: {} bytes", total_original); Ok(()) } /// Data read from the archive for a single entry, ready for parallel processing. enum ReadEntry { /// Directory entry: just needs creation and permission setting. Dir { name: String, permissions: u16, }, /// File entry: ciphertext has been read, ready for verify/decrypt/decompress/write. File { entry: TocEntry, ciphertext: Vec, }, /// Entry with unsafe name that was skipped during reading. Skipped { _name: String, }, } /// Result of processing a single file entry during parallel unpack. enum UnpackResult { /// File extracted successfully. Ok { name: String, original_size: u32 }, /// File had a verification error but was still written (SHA-256 mismatch). Written { name: String, original_size: u32 }, /// File processing failed (HMAC, decryption, or decompression error). Error { name: String, message: String }, } /// Unpack an encrypted archive, extracting all files and directories with /// HMAC and SHA-256 verification, and Unix permission restoration. /// /// Uses parallel processing for the verify/decrypt/decompress/write pipeline: /// 1. Read header and TOC sequentially (single file handle). /// 2. Create all directories sequentially (ensures parent dirs exist). /// 3. Read all file ciphertexts sequentially from the archive. /// 4. Process and write files in parallel (HMAC, decrypt, decompress, SHA-256, write). pub fn unpack(archive: &Path, output_dir: &Path, key: &[u8; 32]) -> anyhow::Result<()> { let mut file = fs::File::open(archive)?; // Read header and TOC with full de-obfuscation let (_header, entries, _salt) = read_archive_metadata(&mut file, Some(key))?; // Create output directory fs::create_dir_all(output_dir)?; let entry_count = entries.len(); // --- Phase 1: Sequential read of all entry data --- // Separate directories from files, read ciphertexts for files. let mut read_entries: Vec = Vec::with_capacity(entry_count); for entry in entries { // Sanitize filename: reject directory traversal if entry.name.starts_with('/') || entry.name.contains("..") { eprintln!( "Skipping entry with unsafe name: {} (directory traversal attempt)", entry.name ); read_entries.push(ReadEntry::Skipped { _name: entry.name.clone() }); continue; } if entry.entry_type == 1 { read_entries.push(ReadEntry::Dir { name: entry.name.clone(), permissions: entry.permissions, }); } else { // Seek to data_offset and read ciphertext into memory file.seek(SeekFrom::Start(entry.data_offset as u64))?; let mut ciphertext = vec![0u8; entry.encrypted_size as usize]; file.read_exact(&mut ciphertext)?; read_entries.push(ReadEntry::File { entry, ciphertext, }); } } // --- Phase 2: Create directories sequentially (parent-before-child order) --- let mut dir_count: usize = 0; for re in &read_entries { if let ReadEntry::Dir { name, permissions } = re { let output_path = output_dir.join(name); fs::create_dir_all(&output_path)?; fs::set_permissions( &output_path, fs::Permissions::from_mode(*permissions as u32), )?; println!("Created directory: {}", name); dir_count += 1; } } // --- Phase 3: Process and write files in parallel --- // Count skipped entries from phase 1 let skipped_count = read_entries.iter() .filter(|re| matches!(re, ReadEntry::Skipped { .. })) .count(); // Collect only file entries for parallel processing let file_entries: Vec<(&TocEntry, &Vec)> = read_entries.iter() .filter_map(|re| { if let ReadEntry::File { entry, ciphertext } = re { Some((entry, ciphertext)) } else { None } }) .collect(); // Process all files in parallel: HMAC verify, decrypt, decompress, SHA-256, write let results: Vec = file_entries .par_iter() .map(|(entry, ciphertext)| { let output_path = output_dir.join(&entry.name); // Create parent directories if name contains path separators if let Some(parent) = output_path.parent() { if let Err(e) = fs::create_dir_all(parent) { return UnpackResult::Error { name: entry.name.clone(), message: format!("Failed to create parent directory: {}", e), }; } } // Step 1: Verify HMAC FIRST (encrypt-then-MAC) if !crypto::verify_hmac(key, &entry.iv, ciphertext, &entry.hmac) { return UnpackResult::Error { name: entry.name.clone(), message: "HMAC verification failed".to_string(), }; } // Step 2: Decrypt let decrypted = match crypto::decrypt_data(ciphertext, key, &entry.iv) { Ok(data) => data, Err(e) => { return UnpackResult::Error { name: entry.name.clone(), message: format!("Decryption failed: {}", e), }; } }; // Step 3: Decompress if compressed let decompressed = if entry.compression_flag == 1 { match compression::decompress(&decrypted) { Ok(data) => data, Err(e) => { return UnpackResult::Error { name: entry.name.clone(), message: format!("Decompression failed: {}", e), }; } } } else { decrypted }; // Step 4: Verify SHA-256 let computed_sha256 = crypto::sha256_hash(&decompressed); let sha256_ok = computed_sha256 == entry.sha256; // Step 5: Write file (even if SHA-256 mismatch, per spec) if let Err(e) = fs::write(&output_path, &decompressed) { return UnpackResult::Error { name: entry.name.clone(), message: format!("Write failed: {}", e), }; } // Step 6: Set file permissions if let Err(e) = fs::set_permissions( &output_path, fs::Permissions::from_mode(entry.permissions as u32), ) { return UnpackResult::Error { name: entry.name.clone(), message: format!("Failed to set permissions: {}", e), }; } if sha256_ok { UnpackResult::Ok { name: entry.name.clone(), original_size: entry.original_size, } } else { UnpackResult::Written { name: entry.name.clone(), original_size: entry.original_size, } } }) .collect(); // --- Phase 4: Report results (sequential for deterministic output) --- let mut final_error_count = skipped_count; let mut final_success_count = dir_count; for result in &results { match result { UnpackResult::Ok { name, original_size } => { println!("Extracted: {} ({} bytes)", name, original_size); final_success_count += 1; } UnpackResult::Written { name, original_size } => { eprintln!("SHA-256 mismatch for {} (data may be corrupted)", name); println!("Extracted: {} ({} bytes)", name, original_size); final_error_count += 1; // Original code increments both error_count AND success_count for // SHA-256 mismatch (file is still written and counted as extracted). final_success_count += 1; } UnpackResult::Error { name, message } => { eprintln!("{} for {}, skipping", message, name); final_error_count += 1; } } } println!( "Extracted {}/{} entries", final_success_count, entry_count ); if final_error_count > 0 { anyhow::bail!("{} entry(ies) had verification errors", final_error_count); } Ok(()) }