feat(02-01): format types, crypto pipeline, and compression module
- Implement Header and TocEntry structs matching FORMAT.md byte layout - Add write_header (40 bytes) and write_toc_entry (101+name_len bytes) serialization - Add read_header, read_toc_entry, read_toc deserialization with validation - Implement AES-256-CBC encrypt/decrypt with PKCS7 padding via cbc crate - Add HMAC-SHA-256 compute/verify over IV||ciphertext (encrypt-then-MAC) - Add SHA-256 hash for original file integrity - Implement gzip compress/decompress with deterministic mtime(0) - Add should_compress heuristic for known compressed file extensions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,2 +1,51 @@
|
||||
// Gzip compression/decompression and compression heuristic.
|
||||
// Will be implemented in Task 2.
|
||||
use flate2::read::GzDecoder;
|
||||
use flate2::{Compression, GzBuilder};
|
||||
use std::io::{Read, Write};
|
||||
|
||||
/// Gzip-compress data with reproducible output (mtime zeroed).
|
||||
///
|
||||
/// Uses `GzBuilder::new().mtime(0)` to zero the gzip timestamp,
|
||||
/// ensuring reproducible compressed output for testing.
|
||||
pub fn compress(data: &[u8]) -> anyhow::Result<Vec<u8>> {
|
||||
let mut encoder = GzBuilder::new()
|
||||
.mtime(0)
|
||||
.write(Vec::new(), Compression::default());
|
||||
encoder.write_all(data)?;
|
||||
let compressed = encoder.finish()?;
|
||||
Ok(compressed)
|
||||
}
|
||||
|
||||
/// Gzip-decompress data.
|
||||
pub fn decompress(data: &[u8]) -> anyhow::Result<Vec<u8>> {
|
||||
let mut decoder = GzDecoder::new(data);
|
||||
let mut decompressed = Vec::new();
|
||||
decoder.read_to_end(&mut decompressed)?;
|
||||
Ok(decompressed)
|
||||
}
|
||||
|
||||
/// Determine if a file should be compressed based on filename and exclusion list.
|
||||
///
|
||||
/// Returns false for:
|
||||
/// - Files matching any entry in `no_compress_list` (by suffix or exact match)
|
||||
/// - Files with known compressed extensions (apk, zip, gz, etc.)
|
||||
///
|
||||
/// Returns true otherwise.
|
||||
pub fn should_compress(filename: &str, no_compress_list: &[String]) -> bool {
|
||||
// Check explicit exclusion list
|
||||
if no_compress_list
|
||||
.iter()
|
||||
.any(|nc| filename.ends_with(nc) || filename == nc)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check known compressed extensions
|
||||
let ext = filename.rsplit('.').next().unwrap_or("").to_lowercase();
|
||||
!matches!(
|
||||
ext.as_str(),
|
||||
"apk" | "zip" | "gz" | "bz2" | "xz" | "zst"
|
||||
| "png" | "jpg" | "jpeg" | "gif" | "webp"
|
||||
| "mp4" | "mp3" | "aac" | "ogg" | "flac"
|
||||
| "7z" | "rar" | "jar"
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,2 +1,78 @@
|
||||
// Cryptographic operations: AES-256-CBC, HMAC-SHA-256, SHA-256.
|
||||
// Will be implemented in Task 2.
|
||||
use aes::cipher::{block_padding::Pkcs7, BlockDecryptMut, BlockEncryptMut, KeyIvInit};
|
||||
use hmac::Mac;
|
||||
type Aes256CbcEnc = cbc::Encryptor<aes::Aes256>;
|
||||
type Aes256CbcDec = cbc::Decryptor<aes::Aes256>;
|
||||
type HmacSha256 = hmac::Hmac<sha2::Sha256>;
|
||||
|
||||
/// Generate a random 16-byte initialization vector using a CSPRNG.
|
||||
pub fn generate_iv() -> [u8; 16] {
|
||||
let mut iv = [0u8; 16];
|
||||
rand::Fill::fill(&mut iv, &mut rand::rng());
|
||||
iv
|
||||
}
|
||||
|
||||
/// Encrypt plaintext with AES-256-CBC and PKCS7 padding.
|
||||
///
|
||||
/// Returns ciphertext of size `((plaintext.len() / 16) + 1) * 16`.
|
||||
/// PKCS7 always adds at least 1 byte of padding.
|
||||
pub fn encrypt_data(plaintext: &[u8], key: &[u8; 32], iv: &[u8; 16]) -> Vec<u8> {
|
||||
let encrypted_size = ((plaintext.len() / 16) + 1) * 16;
|
||||
let mut buf = vec![0u8; encrypted_size];
|
||||
buf[..plaintext.len()].copy_from_slice(plaintext);
|
||||
|
||||
let ct = Aes256CbcEnc::new(key.into(), iv.into())
|
||||
.encrypt_padded_mut::<Pkcs7>(&mut buf, plaintext.len())
|
||||
.expect("encryption buffer too small");
|
||||
|
||||
// ct is a slice into buf of length encrypted_size
|
||||
ct.to_vec()
|
||||
}
|
||||
|
||||
/// Decrypt ciphertext with AES-256-CBC and remove PKCS7 padding.
|
||||
///
|
||||
/// Returns the original plaintext data.
|
||||
pub fn decrypt_data(ciphertext: &[u8], key: &[u8; 32], iv: &[u8; 16]) -> anyhow::Result<Vec<u8>> {
|
||||
let mut buf = ciphertext.to_vec();
|
||||
|
||||
let pt = Aes256CbcDec::new(key.into(), iv.into())
|
||||
.decrypt_padded_mut::<Pkcs7>(&mut buf)
|
||||
.map_err(|_| anyhow::anyhow!("Decryption failed: invalid padding or wrong key"))?;
|
||||
|
||||
Ok(pt.to_vec())
|
||||
}
|
||||
|
||||
/// Compute HMAC-SHA-256 over IV || ciphertext.
|
||||
///
|
||||
/// HMAC input = IV (16 bytes) || ciphertext (encrypted_size bytes).
|
||||
/// Returns 32-byte HMAC tag.
|
||||
pub fn compute_hmac(key: &[u8; 32], iv: &[u8; 16], ciphertext: &[u8]) -> [u8; 32] {
|
||||
let mut mac =
|
||||
HmacSha256::new_from_slice(key).expect("HMAC can take key of any size");
|
||||
mac.update(iv);
|
||||
mac.update(ciphertext);
|
||||
mac.finalize().into_bytes().into()
|
||||
}
|
||||
|
||||
/// Verify HMAC-SHA-256 over IV || ciphertext using constant-time comparison.
|
||||
///
|
||||
/// Returns true if the computed HMAC matches the expected value.
|
||||
pub fn verify_hmac(
|
||||
key: &[u8; 32],
|
||||
iv: &[u8; 16],
|
||||
ciphertext: &[u8],
|
||||
expected: &[u8; 32],
|
||||
) -> bool {
|
||||
let mut mac =
|
||||
HmacSha256::new_from_slice(key).expect("HMAC can take key of any size");
|
||||
mac.update(iv);
|
||||
mac.update(ciphertext);
|
||||
mac.verify_slice(expected).is_ok()
|
||||
}
|
||||
|
||||
/// Compute SHA-256 hash of data.
|
||||
///
|
||||
/// Returns 32-byte digest. Used for integrity verification of original file content.
|
||||
pub fn sha256_hash(data: &[u8]) -> [u8; 32] {
|
||||
use sha2::Digest;
|
||||
sha2::Sha256::digest(data).into()
|
||||
}
|
||||
|
||||
213
src/format.rs
213
src/format.rs
@@ -1,2 +1,211 @@
|
||||
// Binary format types and serialization/deserialization.
|
||||
// Will be implemented in Task 2.
|
||||
use std::io::Read;
|
||||
use std::io::Write;
|
||||
|
||||
/// Custom magic bytes: leading 0x00 signals binary, remaining bytes are unrecognized.
|
||||
pub const MAGIC: [u8; 4] = [0x00, 0xEA, 0x72, 0x63];
|
||||
|
||||
/// Format version for this specification (v1).
|
||||
pub const VERSION: u8 = 1;
|
||||
|
||||
/// Fixed header size in bytes.
|
||||
pub const HEADER_SIZE: u32 = 40;
|
||||
|
||||
/// Archive header (40 bytes fixed at offset 0x00).
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Header {
|
||||
pub version: u8,
|
||||
pub flags: u8,
|
||||
pub file_count: u16,
|
||||
pub toc_offset: u32,
|
||||
pub toc_size: u32,
|
||||
pub toc_iv: [u8; 16],
|
||||
pub reserved: [u8; 8],
|
||||
}
|
||||
|
||||
/// File table entry (variable length: 101 + name_length bytes).
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TocEntry {
|
||||
pub name: String,
|
||||
pub original_size: u32,
|
||||
pub compressed_size: u32,
|
||||
pub encrypted_size: u32,
|
||||
pub data_offset: u32,
|
||||
pub iv: [u8; 16],
|
||||
pub hmac: [u8; 32],
|
||||
pub sha256: [u8; 32],
|
||||
pub compression_flag: u8,
|
||||
pub padding_after: u16,
|
||||
}
|
||||
|
||||
/// Write the 40-byte archive header to the writer.
|
||||
///
|
||||
/// Field order matches FORMAT.md Section 4:
|
||||
/// magic(4) | version(1) | flags(1) | file_count(2 LE) | toc_offset(4 LE) |
|
||||
/// toc_size(4 LE) | toc_iv(16) | reserved(8)
|
||||
pub fn write_header(writer: &mut impl Write, header: &Header) -> anyhow::Result<()> {
|
||||
writer.write_all(&MAGIC)?;
|
||||
writer.write_all(&[header.version])?;
|
||||
writer.write_all(&[header.flags])?;
|
||||
writer.write_all(&header.file_count.to_le_bytes())?;
|
||||
writer.write_all(&header.toc_offset.to_le_bytes())?;
|
||||
writer.write_all(&header.toc_size.to_le_bytes())?;
|
||||
writer.write_all(&header.toc_iv)?;
|
||||
writer.write_all(&header.reserved)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write a single TOC entry to the writer.
|
||||
///
|
||||
/// Field order matches FORMAT.md Section 5:
|
||||
/// name_length(2 LE) | name(N) | original_size(4 LE) | compressed_size(4 LE) |
|
||||
/// encrypted_size(4 LE) | data_offset(4 LE) | iv(16) | hmac(32) | sha256(32) |
|
||||
/// compression_flag(1) | padding_after(2 LE)
|
||||
pub fn write_toc_entry(writer: &mut impl Write, entry: &TocEntry) -> anyhow::Result<()> {
|
||||
let name_bytes = entry.name.as_bytes();
|
||||
writer.write_all(&(name_bytes.len() as u16).to_le_bytes())?;
|
||||
writer.write_all(name_bytes)?;
|
||||
writer.write_all(&entry.original_size.to_le_bytes())?;
|
||||
writer.write_all(&entry.compressed_size.to_le_bytes())?;
|
||||
writer.write_all(&entry.encrypted_size.to_le_bytes())?;
|
||||
writer.write_all(&entry.data_offset.to_le_bytes())?;
|
||||
writer.write_all(&entry.iv)?;
|
||||
writer.write_all(&entry.hmac)?;
|
||||
writer.write_all(&entry.sha256)?;
|
||||
writer.write_all(&[entry.compression_flag])?;
|
||||
writer.write_all(&entry.padding_after.to_le_bytes())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Read and parse the 40-byte archive header.
|
||||
///
|
||||
/// Verifies: magic bytes, version == 1, reserved flags bits 4-7 are zero.
|
||||
pub fn read_header(reader: &mut impl Read) -> anyhow::Result<Header> {
|
||||
let mut buf = [0u8; 40];
|
||||
reader.read_exact(&mut buf)?;
|
||||
|
||||
// Verify magic
|
||||
anyhow::ensure!(
|
||||
buf[0..4] == MAGIC,
|
||||
"Invalid magic bytes: expected {:02X?}, got {:02X?}",
|
||||
MAGIC,
|
||||
&buf[0..4]
|
||||
);
|
||||
|
||||
let version = buf[4];
|
||||
anyhow::ensure!(version == VERSION, "Unsupported version: {}", version);
|
||||
|
||||
let flags = buf[5];
|
||||
anyhow::ensure!(
|
||||
flags & 0xF0 == 0,
|
||||
"Unknown flags set: 0x{:02X} (bits 4-7 must be zero)",
|
||||
flags
|
||||
);
|
||||
|
||||
let file_count = u16::from_le_bytes([buf[6], buf[7]]);
|
||||
let toc_offset = u32::from_le_bytes([buf[8], buf[9], buf[10], buf[11]]);
|
||||
let toc_size = u32::from_le_bytes([buf[12], buf[13], buf[14], buf[15]]);
|
||||
|
||||
let mut toc_iv = [0u8; 16];
|
||||
toc_iv.copy_from_slice(&buf[16..32]);
|
||||
|
||||
let mut reserved = [0u8; 8];
|
||||
reserved.copy_from_slice(&buf[32..40]);
|
||||
|
||||
Ok(Header {
|
||||
version,
|
||||
flags,
|
||||
file_count,
|
||||
toc_offset,
|
||||
toc_size,
|
||||
toc_iv,
|
||||
reserved,
|
||||
})
|
||||
}
|
||||
|
||||
/// Read a single TOC entry from the reader.
|
||||
///
|
||||
/// Reads variable-length name first, then all fixed fields.
|
||||
pub fn read_toc_entry(reader: &mut impl Read) -> anyhow::Result<TocEntry> {
|
||||
// name_length (u16 LE)
|
||||
let mut buf2 = [0u8; 2];
|
||||
reader.read_exact(&mut buf2)?;
|
||||
let name_length = u16::from_le_bytes(buf2);
|
||||
|
||||
// name (name_length bytes, UTF-8)
|
||||
let mut name_bytes = vec![0u8; name_length as usize];
|
||||
reader.read_exact(&mut name_bytes)?;
|
||||
let name = String::from_utf8(name_bytes)
|
||||
.map_err(|e| anyhow::anyhow!("Invalid UTF-8 filename: {}", e))?;
|
||||
|
||||
// original_size (u32 LE)
|
||||
let mut buf4 = [0u8; 4];
|
||||
reader.read_exact(&mut buf4)?;
|
||||
let original_size = u32::from_le_bytes(buf4);
|
||||
|
||||
// compressed_size (u32 LE)
|
||||
reader.read_exact(&mut buf4)?;
|
||||
let compressed_size = u32::from_le_bytes(buf4);
|
||||
|
||||
// encrypted_size (u32 LE)
|
||||
reader.read_exact(&mut buf4)?;
|
||||
let encrypted_size = u32::from_le_bytes(buf4);
|
||||
|
||||
// data_offset (u32 LE)
|
||||
reader.read_exact(&mut buf4)?;
|
||||
let data_offset = u32::from_le_bytes(buf4);
|
||||
|
||||
// iv (16 bytes)
|
||||
let mut iv = [0u8; 16];
|
||||
reader.read_exact(&mut iv)?;
|
||||
|
||||
// hmac (32 bytes)
|
||||
let mut hmac = [0u8; 32];
|
||||
reader.read_exact(&mut hmac)?;
|
||||
|
||||
// sha256 (32 bytes)
|
||||
let mut sha256 = [0u8; 32];
|
||||
reader.read_exact(&mut sha256)?;
|
||||
|
||||
// compression_flag (u8)
|
||||
let mut buf1 = [0u8; 1];
|
||||
reader.read_exact(&mut buf1)?;
|
||||
let compression_flag = buf1[0];
|
||||
|
||||
// padding_after (u16 LE)
|
||||
reader.read_exact(&mut buf2)?;
|
||||
let padding_after = u16::from_le_bytes(buf2);
|
||||
|
||||
Ok(TocEntry {
|
||||
name,
|
||||
original_size,
|
||||
compressed_size,
|
||||
encrypted_size,
|
||||
data_offset,
|
||||
iv,
|
||||
hmac,
|
||||
sha256,
|
||||
compression_flag,
|
||||
padding_after,
|
||||
})
|
||||
}
|
||||
|
||||
/// Read all TOC entries sequentially.
|
||||
pub fn read_toc(reader: &mut impl Read, file_count: u16) -> anyhow::Result<Vec<TocEntry>> {
|
||||
let mut entries = Vec::with_capacity(file_count as usize);
|
||||
for _ in 0..file_count {
|
||||
entries.push(read_toc_entry(reader)?);
|
||||
}
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
/// Compute the serialized size of a single TOC entry.
|
||||
///
|
||||
/// Formula from FORMAT.md Section 5: entry_size = 101 + name_length bytes.
|
||||
pub fn entry_size(entry: &TocEntry) -> u32 {
|
||||
101 + entry.name.len() as u32
|
||||
}
|
||||
|
||||
/// Compute the total serialized size of all TOC entries.
|
||||
pub fn compute_toc_size(entries: &[TocEntry]) -> u32 {
|
||||
entries.iter().map(entry_size).sum()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user