diff --git a/src/format.rs b/src/format.rs index 61e5caa..32627ad 100644 --- a/src/format.rs +++ b/src/format.rs @@ -1,5 +1,4 @@ -use std::io::Read; -use std::io::Write; +use std::io::{Cursor, Read, Write}; /// Custom magic bytes: leading 0x00 signals binary, remaining bytes are unrecognized. pub const MAGIC: [u8; 4] = [0x00, 0xEA, 0x72, 0x63]; @@ -10,6 +9,9 @@ pub const VERSION: u8 = 1; /// Fixed header size in bytes. pub const HEADER_SIZE: u32 = 40; +/// Fixed 8-byte XOR obfuscation key (FORMAT.md Section 9.1). +pub const XOR_KEY: [u8; 8] = [0xA5, 0x3C, 0x96, 0x0F, 0xE1, 0x7B, 0x4D, 0xC8]; + /// Archive header (40 bytes fixed at offset 0x00). #[derive(Debug, Clone)] pub struct Header { @@ -76,6 +78,112 @@ pub fn write_toc_entry(writer: &mut impl Write, entry: &TocEntry) -> anyhow::Res Ok(()) } +/// XOR-obfuscate or de-obfuscate a header buffer in-place. +/// +/// XOR is its own inverse, so the same function encodes and decodes. +/// Applies the 8-byte XOR_KEY cyclically across the first 40 bytes of the buffer. +pub fn xor_header_buf(buf: &mut [u8]) { + assert!(buf.len() >= 40, "buffer must be at least 40 bytes"); + for i in 0..40 { + buf[i] ^= XOR_KEY[i % 8]; + } +} + +/// Serialize the 40-byte archive header into a fixed buffer. +/// +/// Returns a `[u8; 40]` buffer that can be XOR-obfuscated before writing. +pub fn write_header_to_buf(header: &Header) -> [u8; 40] { + let mut buf = [0u8; 40]; + buf[0..4].copy_from_slice(&MAGIC); + buf[4] = header.version; + buf[5] = header.flags; + buf[6..8].copy_from_slice(&header.file_count.to_le_bytes()); + buf[8..12].copy_from_slice(&header.toc_offset.to_le_bytes()); + buf[12..16].copy_from_slice(&header.toc_size.to_le_bytes()); + buf[16..32].copy_from_slice(&header.toc_iv); + buf[32..40].copy_from_slice(&header.reserved); + buf +} + +/// Parse a header from a 40-byte buffer (already validated for magic). +/// +/// Verifies: version == 1, reserved flags bits 4-7 are zero. +fn parse_header_from_buf(buf: &[u8; 40]) -> anyhow::Result
{ + let version = buf[4]; + anyhow::ensure!(version == VERSION, "Unsupported version: {}", version); + + let flags = buf[5]; + anyhow::ensure!( + flags & 0xF0 == 0, + "Unknown flags set: 0x{:02X} (bits 4-7 must be zero)", + flags + ); + + let file_count = u16::from_le_bytes([buf[6], buf[7]]); + let toc_offset = u32::from_le_bytes([buf[8], buf[9], buf[10], buf[11]]); + let toc_size = u32::from_le_bytes([buf[12], buf[13], buf[14], buf[15]]); + + let mut toc_iv = [0u8; 16]; + toc_iv.copy_from_slice(&buf[16..32]); + + let mut reserved = [0u8; 8]; + reserved.copy_from_slice(&buf[32..40]); + + Ok(Header { + version, + flags, + file_count, + toc_offset, + toc_size, + toc_iv, + reserved, + }) +} + +/// Read 40 raw bytes and parse the header, with XOR bootstrapping. +/// +/// Implements FORMAT.md Section 10 steps 1-3: +/// 1. Read 40 bytes. +/// 2. Check magic: if match, parse normally; if no match, XOR and re-check. +/// 3. Parse header fields from the (possibly de-XORed) buffer. +pub fn read_header_auto(reader: &mut impl Read) -> anyhow::Result
{ + let mut buf = [0u8; 40]; + reader.read_exact(&mut buf)?; + + // Check magic bytes + if buf[0..4] != MAGIC { + // Attempt XOR de-obfuscation + xor_header_buf(&mut buf); + anyhow::ensure!( + buf[0..4] == MAGIC, + "Invalid magic bytes: expected {:02X?}, got {:02X?} (tried XOR de-obfuscation)", + MAGIC, + &buf[0..4] + ); + } + + parse_header_from_buf(&buf) +} + +/// Serialize all TOC entries to a Vec buffer. +/// +/// The buffer can be encrypted before writing to the archive. +pub fn serialize_toc(entries: &[TocEntry]) -> anyhow::Result> { + let mut buf = Vec::new(); + for entry in entries { + write_toc_entry(&mut buf, entry)?; + } + Ok(buf) +} + +/// Parse TOC entries from a byte slice (using a Cursor). +/// +/// Used for reading TOC from a decrypted buffer. +pub fn read_toc_from_buf(buf: &[u8], file_count: u16) -> anyhow::Result> { + let mut cursor = Cursor::new(buf); + read_toc(&mut cursor, file_count) +} + /// Read and parse the 40-byte archive header. /// /// Verifies: magic bytes, version == 1, reserved flags bits 4-7 are zero. @@ -396,4 +504,159 @@ mod tests { // FORMAT.md worked example: 110 + 109 = 219 assert_eq!(compute_toc_size(&[entry_hello, entry_data]), 219); } + + #[test] + fn test_xor_roundtrip() { + let header = Header { + version: 1, + flags: 0x0F, + file_count: 2, + toc_offset: HEADER_SIZE, + toc_size: 256, + toc_iv: [0x42; 16], + reserved: [0u8; 8], + }; + + let original_buf = write_header_to_buf(&header); + let mut buf = original_buf; + + // XOR once (encode) + xor_header_buf(&mut buf); + // XOR again (decode) -- must restore original + xor_header_buf(&mut buf); + + assert_eq!(buf, original_buf); + } + + #[test] + fn test_xor_changes_magic() { + let header = Header { + version: 1, + flags: 0x0F, + file_count: 2, + toc_offset: HEADER_SIZE, + toc_size: 256, + toc_iv: [0x42; 16], + reserved: [0u8; 8], + }; + + let mut buf = write_header_to_buf(&header); + + // Before XOR, magic is present + assert_eq!(&buf[0..4], &MAGIC); + + // After XOR, magic bytes must NOT be recognizable + xor_header_buf(&mut buf); + assert_ne!(&buf[0..4], &MAGIC); + } + + #[test] + fn test_read_header_auto_plain() { + // Plain (non-XOR'd) header should be parsed correctly + let header = Header { + version: 1, + flags: 0x01, + file_count: 3, + toc_offset: HEADER_SIZE, + toc_size: 330, + toc_iv: [0u8; 16], + reserved: [0u8; 8], + }; + + let buf = write_header_to_buf(&header); + let mut cursor = Cursor::new(buf.as_slice()); + let read_back = read_header_auto(&mut cursor).unwrap(); + + assert_eq!(read_back.version, 1); + assert_eq!(read_back.flags, 0x01); + assert_eq!(read_back.file_count, 3); + } + + #[test] + fn test_read_header_auto_xored() { + // XOR'd header should be de-obfuscated and parsed correctly + let header = Header { + version: 1, + flags: 0x0F, + file_count: 5, + toc_offset: HEADER_SIZE, + toc_size: 512, + toc_iv: [0xBB; 16], + reserved: [0u8; 8], + }; + + let mut buf = write_header_to_buf(&header); + xor_header_buf(&mut buf); + + let mut cursor = Cursor::new(buf.as_slice()); + let read_back = read_header_auto(&mut cursor).unwrap(); + + assert_eq!(read_back.version, 1); + assert_eq!(read_back.flags, 0x0F); + assert_eq!(read_back.file_count, 5); + assert_eq!(read_back.toc_size, 512); + assert_eq!(read_back.toc_iv, [0xBB; 16]); + } + + #[test] + fn test_write_header_to_buf_matches_write_header() { + let header = Header { + version: 1, + flags: 0x01, + file_count: 2, + toc_offset: HEADER_SIZE, + toc_size: 219, + toc_iv: [0xAA; 16], + reserved: [0u8; 8], + }; + + // write_header to a Vec + let mut vec_buf = Vec::new(); + write_header(&mut vec_buf, &header).unwrap(); + + // write_header_to_buf to a fixed array + let arr_buf = write_header_to_buf(&header); + + assert_eq!(vec_buf.as_slice(), &arr_buf[..]); + } + + #[test] + fn test_serialize_toc_and_read_toc_from_buf() { + let entries = vec![ + TocEntry { + name: "file1.txt".to_string(), + original_size: 100, + compressed_size: 80, + encrypted_size: 96, + data_offset: 300, + iv: [0x11; 16], + hmac: [0x22; 32], + sha256: [0x33; 32], + compression_flag: 1, + padding_after: 128, + }, + TocEntry { + name: "file2.bin".to_string(), + original_size: 200, + compressed_size: 180, + encrypted_size: 192, + data_offset: 524, + iv: [0x44; 16], + hmac: [0x55; 32], + sha256: [0x66; 32], + compression_flag: 0, + padding_after: 256, + }, + ]; + + let buf = serialize_toc(&entries).unwrap(); + let parsed = read_toc_from_buf(&buf, 2).unwrap(); + + assert_eq!(parsed.len(), 2); + assert_eq!(parsed[0].name, "file1.txt"); + assert_eq!(parsed[0].padding_after, 128); + assert_eq!(parsed[1].name, "file2.bin"); + assert_eq!(parsed[1].data_offset, 524); + assert_eq!(parsed[1].padding_after, 256); + } }