diff --git a/src/compression.rs b/src/compression.rs index 6a384d2..1022ee2 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -1,2 +1,51 @@ -// Gzip compression/decompression and compression heuristic. -// Will be implemented in Task 2. +use flate2::read::GzDecoder; +use flate2::{Compression, GzBuilder}; +use std::io::{Read, Write}; + +/// Gzip-compress data with reproducible output (mtime zeroed). +/// +/// Uses `GzBuilder::new().mtime(0)` to zero the gzip timestamp, +/// ensuring reproducible compressed output for testing. +pub fn compress(data: &[u8]) -> anyhow::Result> { + let mut encoder = GzBuilder::new() + .mtime(0) + .write(Vec::new(), Compression::default()); + encoder.write_all(data)?; + let compressed = encoder.finish()?; + Ok(compressed) +} + +/// Gzip-decompress data. +pub fn decompress(data: &[u8]) -> anyhow::Result> { + let mut decoder = GzDecoder::new(data); + let mut decompressed = Vec::new(); + decoder.read_to_end(&mut decompressed)?; + Ok(decompressed) +} + +/// Determine if a file should be compressed based on filename and exclusion list. +/// +/// Returns false for: +/// - Files matching any entry in `no_compress_list` (by suffix or exact match) +/// - Files with known compressed extensions (apk, zip, gz, etc.) +/// +/// Returns true otherwise. +pub fn should_compress(filename: &str, no_compress_list: &[String]) -> bool { + // Check explicit exclusion list + if no_compress_list + .iter() + .any(|nc| filename.ends_with(nc) || filename == nc) + { + return false; + } + + // Check known compressed extensions + let ext = filename.rsplit('.').next().unwrap_or("").to_lowercase(); + !matches!( + ext.as_str(), + "apk" | "zip" | "gz" | "bz2" | "xz" | "zst" + | "png" | "jpg" | "jpeg" | "gif" | "webp" + | "mp4" | "mp3" | "aac" | "ogg" | "flac" + | "7z" | "rar" | "jar" + ) +} diff --git a/src/crypto.rs b/src/crypto.rs index 00f2fa9..2e4cf8d 100644 --- a/src/crypto.rs +++ b/src/crypto.rs @@ -1,2 +1,78 @@ -// Cryptographic operations: AES-256-CBC, HMAC-SHA-256, SHA-256. -// Will be implemented in Task 2. +use aes::cipher::{block_padding::Pkcs7, BlockDecryptMut, BlockEncryptMut, KeyIvInit}; +use hmac::Mac; +type Aes256CbcEnc = cbc::Encryptor; +type Aes256CbcDec = cbc::Decryptor; +type HmacSha256 = hmac::Hmac; + +/// Generate a random 16-byte initialization vector using a CSPRNG. +pub fn generate_iv() -> [u8; 16] { + let mut iv = [0u8; 16]; + rand::Fill::fill(&mut iv, &mut rand::rng()); + iv +} + +/// Encrypt plaintext with AES-256-CBC and PKCS7 padding. +/// +/// Returns ciphertext of size `((plaintext.len() / 16) + 1) * 16`. +/// PKCS7 always adds at least 1 byte of padding. +pub fn encrypt_data(plaintext: &[u8], key: &[u8; 32], iv: &[u8; 16]) -> Vec { + let encrypted_size = ((plaintext.len() / 16) + 1) * 16; + let mut buf = vec![0u8; encrypted_size]; + buf[..plaintext.len()].copy_from_slice(plaintext); + + let ct = Aes256CbcEnc::new(key.into(), iv.into()) + .encrypt_padded_mut::(&mut buf, plaintext.len()) + .expect("encryption buffer too small"); + + // ct is a slice into buf of length encrypted_size + ct.to_vec() +} + +/// Decrypt ciphertext with AES-256-CBC and remove PKCS7 padding. +/// +/// Returns the original plaintext data. +pub fn decrypt_data(ciphertext: &[u8], key: &[u8; 32], iv: &[u8; 16]) -> anyhow::Result> { + let mut buf = ciphertext.to_vec(); + + let pt = Aes256CbcDec::new(key.into(), iv.into()) + .decrypt_padded_mut::(&mut buf) + .map_err(|_| anyhow::anyhow!("Decryption failed: invalid padding or wrong key"))?; + + Ok(pt.to_vec()) +} + +/// Compute HMAC-SHA-256 over IV || ciphertext. +/// +/// HMAC input = IV (16 bytes) || ciphertext (encrypted_size bytes). +/// Returns 32-byte HMAC tag. +pub fn compute_hmac(key: &[u8; 32], iv: &[u8; 16], ciphertext: &[u8]) -> [u8; 32] { + let mut mac = + HmacSha256::new_from_slice(key).expect("HMAC can take key of any size"); + mac.update(iv); + mac.update(ciphertext); + mac.finalize().into_bytes().into() +} + +/// Verify HMAC-SHA-256 over IV || ciphertext using constant-time comparison. +/// +/// Returns true if the computed HMAC matches the expected value. +pub fn verify_hmac( + key: &[u8; 32], + iv: &[u8; 16], + ciphertext: &[u8], + expected: &[u8; 32], +) -> bool { + let mut mac = + HmacSha256::new_from_slice(key).expect("HMAC can take key of any size"); + mac.update(iv); + mac.update(ciphertext); + mac.verify_slice(expected).is_ok() +} + +/// Compute SHA-256 hash of data. +/// +/// Returns 32-byte digest. Used for integrity verification of original file content. +pub fn sha256_hash(data: &[u8]) -> [u8; 32] { + use sha2::Digest; + sha2::Sha256::digest(data).into() +} diff --git a/src/format.rs b/src/format.rs index b3b1193..544e5a5 100644 --- a/src/format.rs +++ b/src/format.rs @@ -1,2 +1,211 @@ -// Binary format types and serialization/deserialization. -// Will be implemented in Task 2. +use std::io::Read; +use std::io::Write; + +/// Custom magic bytes: leading 0x00 signals binary, remaining bytes are unrecognized. +pub const MAGIC: [u8; 4] = [0x00, 0xEA, 0x72, 0x63]; + +/// Format version for this specification (v1). +pub const VERSION: u8 = 1; + +/// Fixed header size in bytes. +pub const HEADER_SIZE: u32 = 40; + +/// Archive header (40 bytes fixed at offset 0x00). +#[derive(Debug, Clone)] +pub struct Header { + pub version: u8, + pub flags: u8, + pub file_count: u16, + pub toc_offset: u32, + pub toc_size: u32, + pub toc_iv: [u8; 16], + pub reserved: [u8; 8], +} + +/// File table entry (variable length: 101 + name_length bytes). +#[derive(Debug, Clone)] +pub struct TocEntry { + pub name: String, + pub original_size: u32, + pub compressed_size: u32, + pub encrypted_size: u32, + pub data_offset: u32, + pub iv: [u8; 16], + pub hmac: [u8; 32], + pub sha256: [u8; 32], + pub compression_flag: u8, + pub padding_after: u16, +} + +/// Write the 40-byte archive header to the writer. +/// +/// Field order matches FORMAT.md Section 4: +/// magic(4) | version(1) | flags(1) | file_count(2 LE) | toc_offset(4 LE) | +/// toc_size(4 LE) | toc_iv(16) | reserved(8) +pub fn write_header(writer: &mut impl Write, header: &Header) -> anyhow::Result<()> { + writer.write_all(&MAGIC)?; + writer.write_all(&[header.version])?; + writer.write_all(&[header.flags])?; + writer.write_all(&header.file_count.to_le_bytes())?; + writer.write_all(&header.toc_offset.to_le_bytes())?; + writer.write_all(&header.toc_size.to_le_bytes())?; + writer.write_all(&header.toc_iv)?; + writer.write_all(&header.reserved)?; + Ok(()) +} + +/// Write a single TOC entry to the writer. +/// +/// Field order matches FORMAT.md Section 5: +/// name_length(2 LE) | name(N) | original_size(4 LE) | compressed_size(4 LE) | +/// encrypted_size(4 LE) | data_offset(4 LE) | iv(16) | hmac(32) | sha256(32) | +/// compression_flag(1) | padding_after(2 LE) +pub fn write_toc_entry(writer: &mut impl Write, entry: &TocEntry) -> anyhow::Result<()> { + let name_bytes = entry.name.as_bytes(); + writer.write_all(&(name_bytes.len() as u16).to_le_bytes())?; + writer.write_all(name_bytes)?; + writer.write_all(&entry.original_size.to_le_bytes())?; + writer.write_all(&entry.compressed_size.to_le_bytes())?; + writer.write_all(&entry.encrypted_size.to_le_bytes())?; + writer.write_all(&entry.data_offset.to_le_bytes())?; + writer.write_all(&entry.iv)?; + writer.write_all(&entry.hmac)?; + writer.write_all(&entry.sha256)?; + writer.write_all(&[entry.compression_flag])?; + writer.write_all(&entry.padding_after.to_le_bytes())?; + Ok(()) +} + +/// Read and parse the 40-byte archive header. +/// +/// Verifies: magic bytes, version == 1, reserved flags bits 4-7 are zero. +pub fn read_header(reader: &mut impl Read) -> anyhow::Result
{ + let mut buf = [0u8; 40]; + reader.read_exact(&mut buf)?; + + // Verify magic + anyhow::ensure!( + buf[0..4] == MAGIC, + "Invalid magic bytes: expected {:02X?}, got {:02X?}", + MAGIC, + &buf[0..4] + ); + + let version = buf[4]; + anyhow::ensure!(version == VERSION, "Unsupported version: {}", version); + + let flags = buf[5]; + anyhow::ensure!( + flags & 0xF0 == 0, + "Unknown flags set: 0x{:02X} (bits 4-7 must be zero)", + flags + ); + + let file_count = u16::from_le_bytes([buf[6], buf[7]]); + let toc_offset = u32::from_le_bytes([buf[8], buf[9], buf[10], buf[11]]); + let toc_size = u32::from_le_bytes([buf[12], buf[13], buf[14], buf[15]]); + + let mut toc_iv = [0u8; 16]; + toc_iv.copy_from_slice(&buf[16..32]); + + let mut reserved = [0u8; 8]; + reserved.copy_from_slice(&buf[32..40]); + + Ok(Header { + version, + flags, + file_count, + toc_offset, + toc_size, + toc_iv, + reserved, + }) +} + +/// Read a single TOC entry from the reader. +/// +/// Reads variable-length name first, then all fixed fields. +pub fn read_toc_entry(reader: &mut impl Read) -> anyhow::Result { + // name_length (u16 LE) + let mut buf2 = [0u8; 2]; + reader.read_exact(&mut buf2)?; + let name_length = u16::from_le_bytes(buf2); + + // name (name_length bytes, UTF-8) + let mut name_bytes = vec![0u8; name_length as usize]; + reader.read_exact(&mut name_bytes)?; + let name = String::from_utf8(name_bytes) + .map_err(|e| anyhow::anyhow!("Invalid UTF-8 filename: {}", e))?; + + // original_size (u32 LE) + let mut buf4 = [0u8; 4]; + reader.read_exact(&mut buf4)?; + let original_size = u32::from_le_bytes(buf4); + + // compressed_size (u32 LE) + reader.read_exact(&mut buf4)?; + let compressed_size = u32::from_le_bytes(buf4); + + // encrypted_size (u32 LE) + reader.read_exact(&mut buf4)?; + let encrypted_size = u32::from_le_bytes(buf4); + + // data_offset (u32 LE) + reader.read_exact(&mut buf4)?; + let data_offset = u32::from_le_bytes(buf4); + + // iv (16 bytes) + let mut iv = [0u8; 16]; + reader.read_exact(&mut iv)?; + + // hmac (32 bytes) + let mut hmac = [0u8; 32]; + reader.read_exact(&mut hmac)?; + + // sha256 (32 bytes) + let mut sha256 = [0u8; 32]; + reader.read_exact(&mut sha256)?; + + // compression_flag (u8) + let mut buf1 = [0u8; 1]; + reader.read_exact(&mut buf1)?; + let compression_flag = buf1[0]; + + // padding_after (u16 LE) + reader.read_exact(&mut buf2)?; + let padding_after = u16::from_le_bytes(buf2); + + Ok(TocEntry { + name, + original_size, + compressed_size, + encrypted_size, + data_offset, + iv, + hmac, + sha256, + compression_flag, + padding_after, + }) +} + +/// Read all TOC entries sequentially. +pub fn read_toc(reader: &mut impl Read, file_count: u16) -> anyhow::Result> { + let mut entries = Vec::with_capacity(file_count as usize); + for _ in 0..file_count { + entries.push(read_toc_entry(reader)?); + } + Ok(entries) +} + +/// Compute the serialized size of a single TOC entry. +/// +/// Formula from FORMAT.md Section 5: entry_size = 101 + name_length bytes. +pub fn entry_size(entry: &TocEntry) -> u32 { + 101 + entry.name.len() as u32 +} + +/// Compute the total serialized size of all TOC entries. +pub fn compute_toc_size(entries: &[TocEntry]) -> u32 { + entries.iter().map(entry_size).sum() +}