- crypto: encrypt/decrypt roundtrip, empty data, size formula, HMAC compute/verify, SHA-256 known values - compression: compress/decompress roundtrip, empty data, large data, should_compress heuristic - format: header write/read roundtrip, TOC entry roundtrip (ASCII + Cyrillic + empty name), bad magic/version rejection, entry size calculation matching FORMAT.md worked example - Update hex-literal to v1.1
400 lines
12 KiB
Rust
400 lines
12 KiB
Rust
use std::io::Read;
|
||
use std::io::Write;
|
||
|
||
/// Custom magic bytes: leading 0x00 signals binary, remaining bytes are unrecognized.
|
||
pub const MAGIC: [u8; 4] = [0x00, 0xEA, 0x72, 0x63];
|
||
|
||
/// Format version for this specification (v1).
|
||
pub const VERSION: u8 = 1;
|
||
|
||
/// Fixed header size in bytes.
|
||
pub const HEADER_SIZE: u32 = 40;
|
||
|
||
/// Archive header (40 bytes fixed at offset 0x00).
|
||
#[derive(Debug, Clone)]
|
||
pub struct Header {
|
||
pub version: u8,
|
||
pub flags: u8,
|
||
pub file_count: u16,
|
||
pub toc_offset: u32,
|
||
pub toc_size: u32,
|
||
pub toc_iv: [u8; 16],
|
||
pub reserved: [u8; 8],
|
||
}
|
||
|
||
/// File table entry (variable length: 101 + name_length bytes).
|
||
#[derive(Debug, Clone)]
|
||
pub struct TocEntry {
|
||
pub name: String,
|
||
pub original_size: u32,
|
||
pub compressed_size: u32,
|
||
pub encrypted_size: u32,
|
||
pub data_offset: u32,
|
||
pub iv: [u8; 16],
|
||
pub hmac: [u8; 32],
|
||
pub sha256: [u8; 32],
|
||
pub compression_flag: u8,
|
||
pub padding_after: u16,
|
||
}
|
||
|
||
/// Write the 40-byte archive header to the writer.
|
||
///
|
||
/// Field order matches FORMAT.md Section 4:
|
||
/// magic(4) | version(1) | flags(1) | file_count(2 LE) | toc_offset(4 LE) |
|
||
/// toc_size(4 LE) | toc_iv(16) | reserved(8)
|
||
pub fn write_header(writer: &mut impl Write, header: &Header) -> anyhow::Result<()> {
|
||
writer.write_all(&MAGIC)?;
|
||
writer.write_all(&[header.version])?;
|
||
writer.write_all(&[header.flags])?;
|
||
writer.write_all(&header.file_count.to_le_bytes())?;
|
||
writer.write_all(&header.toc_offset.to_le_bytes())?;
|
||
writer.write_all(&header.toc_size.to_le_bytes())?;
|
||
writer.write_all(&header.toc_iv)?;
|
||
writer.write_all(&header.reserved)?;
|
||
Ok(())
|
||
}
|
||
|
||
/// Write a single TOC entry to the writer.
|
||
///
|
||
/// Field order matches FORMAT.md Section 5:
|
||
/// name_length(2 LE) | name(N) | original_size(4 LE) | compressed_size(4 LE) |
|
||
/// encrypted_size(4 LE) | data_offset(4 LE) | iv(16) | hmac(32) | sha256(32) |
|
||
/// compression_flag(1) | padding_after(2 LE)
|
||
pub fn write_toc_entry(writer: &mut impl Write, entry: &TocEntry) -> anyhow::Result<()> {
|
||
let name_bytes = entry.name.as_bytes();
|
||
writer.write_all(&(name_bytes.len() as u16).to_le_bytes())?;
|
||
writer.write_all(name_bytes)?;
|
||
writer.write_all(&entry.original_size.to_le_bytes())?;
|
||
writer.write_all(&entry.compressed_size.to_le_bytes())?;
|
||
writer.write_all(&entry.encrypted_size.to_le_bytes())?;
|
||
writer.write_all(&entry.data_offset.to_le_bytes())?;
|
||
writer.write_all(&entry.iv)?;
|
||
writer.write_all(&entry.hmac)?;
|
||
writer.write_all(&entry.sha256)?;
|
||
writer.write_all(&[entry.compression_flag])?;
|
||
writer.write_all(&entry.padding_after.to_le_bytes())?;
|
||
Ok(())
|
||
}
|
||
|
||
/// Read and parse the 40-byte archive header.
|
||
///
|
||
/// Verifies: magic bytes, version == 1, reserved flags bits 4-7 are zero.
|
||
pub fn read_header(reader: &mut impl Read) -> anyhow::Result<Header> {
|
||
let mut buf = [0u8; 40];
|
||
reader.read_exact(&mut buf)?;
|
||
|
||
// Verify magic
|
||
anyhow::ensure!(
|
||
buf[0..4] == MAGIC,
|
||
"Invalid magic bytes: expected {:02X?}, got {:02X?}",
|
||
MAGIC,
|
||
&buf[0..4]
|
||
);
|
||
|
||
let version = buf[4];
|
||
anyhow::ensure!(version == VERSION, "Unsupported version: {}", version);
|
||
|
||
let flags = buf[5];
|
||
anyhow::ensure!(
|
||
flags & 0xF0 == 0,
|
||
"Unknown flags set: 0x{:02X} (bits 4-7 must be zero)",
|
||
flags
|
||
);
|
||
|
||
let file_count = u16::from_le_bytes([buf[6], buf[7]]);
|
||
let toc_offset = u32::from_le_bytes([buf[8], buf[9], buf[10], buf[11]]);
|
||
let toc_size = u32::from_le_bytes([buf[12], buf[13], buf[14], buf[15]]);
|
||
|
||
let mut toc_iv = [0u8; 16];
|
||
toc_iv.copy_from_slice(&buf[16..32]);
|
||
|
||
let mut reserved = [0u8; 8];
|
||
reserved.copy_from_slice(&buf[32..40]);
|
||
|
||
Ok(Header {
|
||
version,
|
||
flags,
|
||
file_count,
|
||
toc_offset,
|
||
toc_size,
|
||
toc_iv,
|
||
reserved,
|
||
})
|
||
}
|
||
|
||
/// Read a single TOC entry from the reader.
|
||
///
|
||
/// Reads variable-length name first, then all fixed fields.
|
||
pub fn read_toc_entry(reader: &mut impl Read) -> anyhow::Result<TocEntry> {
|
||
// name_length (u16 LE)
|
||
let mut buf2 = [0u8; 2];
|
||
reader.read_exact(&mut buf2)?;
|
||
let name_length = u16::from_le_bytes(buf2);
|
||
|
||
// name (name_length bytes, UTF-8)
|
||
let mut name_bytes = vec![0u8; name_length as usize];
|
||
reader.read_exact(&mut name_bytes)?;
|
||
let name = String::from_utf8(name_bytes)
|
||
.map_err(|e| anyhow::anyhow!("Invalid UTF-8 filename: {}", e))?;
|
||
|
||
// original_size (u32 LE)
|
||
let mut buf4 = [0u8; 4];
|
||
reader.read_exact(&mut buf4)?;
|
||
let original_size = u32::from_le_bytes(buf4);
|
||
|
||
// compressed_size (u32 LE)
|
||
reader.read_exact(&mut buf4)?;
|
||
let compressed_size = u32::from_le_bytes(buf4);
|
||
|
||
// encrypted_size (u32 LE)
|
||
reader.read_exact(&mut buf4)?;
|
||
let encrypted_size = u32::from_le_bytes(buf4);
|
||
|
||
// data_offset (u32 LE)
|
||
reader.read_exact(&mut buf4)?;
|
||
let data_offset = u32::from_le_bytes(buf4);
|
||
|
||
// iv (16 bytes)
|
||
let mut iv = [0u8; 16];
|
||
reader.read_exact(&mut iv)?;
|
||
|
||
// hmac (32 bytes)
|
||
let mut hmac = [0u8; 32];
|
||
reader.read_exact(&mut hmac)?;
|
||
|
||
// sha256 (32 bytes)
|
||
let mut sha256 = [0u8; 32];
|
||
reader.read_exact(&mut sha256)?;
|
||
|
||
// compression_flag (u8)
|
||
let mut buf1 = [0u8; 1];
|
||
reader.read_exact(&mut buf1)?;
|
||
let compression_flag = buf1[0];
|
||
|
||
// padding_after (u16 LE)
|
||
reader.read_exact(&mut buf2)?;
|
||
let padding_after = u16::from_le_bytes(buf2);
|
||
|
||
Ok(TocEntry {
|
||
name,
|
||
original_size,
|
||
compressed_size,
|
||
encrypted_size,
|
||
data_offset,
|
||
iv,
|
||
hmac,
|
||
sha256,
|
||
compression_flag,
|
||
padding_after,
|
||
})
|
||
}
|
||
|
||
/// Read all TOC entries sequentially.
|
||
pub fn read_toc(reader: &mut impl Read, file_count: u16) -> anyhow::Result<Vec<TocEntry>> {
|
||
let mut entries = Vec::with_capacity(file_count as usize);
|
||
for _ in 0..file_count {
|
||
entries.push(read_toc_entry(reader)?);
|
||
}
|
||
Ok(entries)
|
||
}
|
||
|
||
/// Compute the serialized size of a single TOC entry.
|
||
///
|
||
/// Formula from FORMAT.md Section 5: entry_size = 101 + name_length bytes.
|
||
pub fn entry_size(entry: &TocEntry) -> u32 {
|
||
101 + entry.name.len() as u32
|
||
}
|
||
|
||
/// Compute the total serialized size of all TOC entries.
|
||
pub fn compute_toc_size(entries: &[TocEntry]) -> u32 {
|
||
entries.iter().map(entry_size).sum()
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
use std::io::Cursor;
|
||
|
||
#[test]
|
||
fn test_header_write_read_roundtrip() {
|
||
let header = Header {
|
||
version: 1,
|
||
flags: 0x01,
|
||
file_count: 3,
|
||
toc_offset: HEADER_SIZE,
|
||
toc_size: 330,
|
||
toc_iv: [0u8; 16],
|
||
reserved: [0u8; 8],
|
||
};
|
||
|
||
let mut buf = Vec::new();
|
||
write_header(&mut buf, &header).unwrap();
|
||
assert_eq!(buf.len(), HEADER_SIZE as usize);
|
||
|
||
let mut cursor = Cursor::new(&buf);
|
||
let read_back = read_header(&mut cursor).unwrap();
|
||
|
||
assert_eq!(read_back.version, header.version);
|
||
assert_eq!(read_back.flags, header.flags);
|
||
assert_eq!(read_back.file_count, header.file_count);
|
||
assert_eq!(read_back.toc_offset, header.toc_offset);
|
||
assert_eq!(read_back.toc_size, header.toc_size);
|
||
assert_eq!(read_back.toc_iv, header.toc_iv);
|
||
assert_eq!(read_back.reserved, header.reserved);
|
||
}
|
||
|
||
#[test]
|
||
fn test_toc_entry_roundtrip_ascii() {
|
||
let entry = TocEntry {
|
||
name: "hello.txt".to_string(),
|
||
original_size: 5,
|
||
compressed_size: 25,
|
||
encrypted_size: 32,
|
||
data_offset: 259,
|
||
iv: [0xAA; 16],
|
||
hmac: [0xBB; 32],
|
||
sha256: [0xCC; 32],
|
||
compression_flag: 1,
|
||
padding_after: 0,
|
||
};
|
||
|
||
let mut buf = Vec::new();
|
||
write_toc_entry(&mut buf, &entry).unwrap();
|
||
assert_eq!(buf.len(), 101 + 9); // 101 + "hello.txt".len()
|
||
|
||
let mut cursor = Cursor::new(&buf);
|
||
let read_back = read_toc_entry(&mut cursor).unwrap();
|
||
|
||
assert_eq!(read_back.name, entry.name);
|
||
assert_eq!(read_back.original_size, entry.original_size);
|
||
assert_eq!(read_back.compressed_size, entry.compressed_size);
|
||
assert_eq!(read_back.encrypted_size, entry.encrypted_size);
|
||
assert_eq!(read_back.data_offset, entry.data_offset);
|
||
assert_eq!(read_back.iv, entry.iv);
|
||
assert_eq!(read_back.hmac, entry.hmac);
|
||
assert_eq!(read_back.sha256, entry.sha256);
|
||
assert_eq!(read_back.compression_flag, entry.compression_flag);
|
||
assert_eq!(read_back.padding_after, entry.padding_after);
|
||
}
|
||
|
||
#[test]
|
||
fn test_toc_entry_roundtrip_cyrillic() {
|
||
let name = "\u{0442}\u{0435}\u{0441}\u{0442}\u{043e}\u{0432}\u{044b}\u{0439}_\u{0444}\u{0430}\u{0439}\u{043b}.txt";
|
||
let entry = TocEntry {
|
||
name: name.to_string(),
|
||
original_size: 100,
|
||
compressed_size: 80,
|
||
encrypted_size: 96,
|
||
data_offset: 500,
|
||
iv: [0x11; 16],
|
||
hmac: [0x22; 32],
|
||
sha256: [0x33; 32],
|
||
compression_flag: 1,
|
||
padding_after: 0,
|
||
};
|
||
|
||
let mut buf = Vec::new();
|
||
write_toc_entry(&mut buf, &entry).unwrap();
|
||
// "тестовый_файл.txt" UTF-8 length
|
||
let expected_name_len = name.len();
|
||
assert_eq!(buf.len(), 101 + expected_name_len);
|
||
|
||
let mut cursor = Cursor::new(&buf);
|
||
let read_back = read_toc_entry(&mut cursor).unwrap();
|
||
|
||
assert_eq!(read_back.name, name);
|
||
assert_eq!(read_back.original_size, entry.original_size);
|
||
assert_eq!(read_back.compressed_size, entry.compressed_size);
|
||
assert_eq!(read_back.encrypted_size, entry.encrypted_size);
|
||
assert_eq!(read_back.data_offset, entry.data_offset);
|
||
}
|
||
|
||
#[test]
|
||
fn test_toc_entry_roundtrip_empty_name() {
|
||
let entry = TocEntry {
|
||
name: "".to_string(),
|
||
original_size: 0,
|
||
compressed_size: 0,
|
||
encrypted_size: 16,
|
||
data_offset: 40,
|
||
iv: [0u8; 16],
|
||
hmac: [0u8; 32],
|
||
sha256: [0u8; 32],
|
||
compression_flag: 0,
|
||
padding_after: 0,
|
||
};
|
||
|
||
let mut buf = Vec::new();
|
||
write_toc_entry(&mut buf, &entry).unwrap();
|
||
|
||
let mut cursor = Cursor::new(&buf);
|
||
let read_back = read_toc_entry(&mut cursor).unwrap();
|
||
|
||
assert_eq!(read_back.name, "");
|
||
}
|
||
|
||
#[test]
|
||
fn test_header_rejects_bad_magic() {
|
||
let mut buf = vec![0u8; 40];
|
||
// Wrong magic bytes
|
||
buf[0] = 0xFF;
|
||
buf[1] = 0xFF;
|
||
buf[2] = 0xFF;
|
||
buf[3] = 0xFF;
|
||
buf[4] = 1; // version
|
||
|
||
let mut cursor = Cursor::new(&buf);
|
||
let result = read_header(&mut cursor);
|
||
assert!(result.is_err());
|
||
assert!(result.unwrap_err().to_string().contains("magic"));
|
||
}
|
||
|
||
#[test]
|
||
fn test_header_rejects_bad_version() {
|
||
let mut buf = vec![0u8; 40];
|
||
// Correct magic
|
||
buf[0..4].copy_from_slice(&MAGIC);
|
||
// Wrong version
|
||
buf[4] = 2;
|
||
|
||
let mut cursor = Cursor::new(&buf);
|
||
let result = read_header(&mut cursor);
|
||
assert!(result.is_err());
|
||
assert!(result.unwrap_err().to_string().contains("version"));
|
||
}
|
||
|
||
#[test]
|
||
fn test_entry_size_calculation() {
|
||
let entry_hello = TocEntry {
|
||
name: "hello.txt".to_string(), // 9 bytes
|
||
original_size: 5,
|
||
compressed_size: 25,
|
||
encrypted_size: 32,
|
||
data_offset: 259,
|
||
iv: [0u8; 16],
|
||
hmac: [0u8; 32],
|
||
sha256: [0u8; 32],
|
||
compression_flag: 1,
|
||
padding_after: 0,
|
||
};
|
||
assert_eq!(entry_size(&entry_hello), 110); // 101 + 9
|
||
|
||
let entry_data = TocEntry {
|
||
name: "data.bin".to_string(), // 8 bytes
|
||
original_size: 32,
|
||
compressed_size: 22,
|
||
encrypted_size: 32,
|
||
data_offset: 291,
|
||
iv: [0u8; 16],
|
||
hmac: [0u8; 32],
|
||
sha256: [0u8; 32],
|
||
compression_flag: 1,
|
||
padding_after: 0,
|
||
};
|
||
assert_eq!(entry_size(&entry_data), 109); // 101 + 8
|
||
|
||
// FORMAT.md worked example: 110 + 109 = 219
|
||
assert_eq!(compute_toc_size(&[entry_hello, entry_data]), 219);
|
||
}
|
||
}
|