feat(02-01): format types, crypto pipeline, and compression module

- Implement Header and TocEntry structs matching FORMAT.md byte layout
- Add write_header (40 bytes) and write_toc_entry (101+name_len bytes) serialization
- Add read_header, read_toc_entry, read_toc deserialization with validation
- Implement AES-256-CBC encrypt/decrypt with PKCS7 padding via cbc crate
- Add HMAC-SHA-256 compute/verify over IV||ciphertext (encrypt-then-MAC)
- Add SHA-256 hash for original file integrity
- Implement gzip compress/decompress with deterministic mtime(0)
- Add should_compress heuristic for known compressed file extensions

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
NikitolProject
2026-02-24 23:58:08 +03:00
parent c647f3a90e
commit 6292b41159
3 changed files with 340 additions and 6 deletions

View File

@@ -1,2 +1,51 @@
// Gzip compression/decompression and compression heuristic.
// Will be implemented in Task 2.
use flate2::read::GzDecoder;
use flate2::{Compression, GzBuilder};
use std::io::{Read, Write};
/// Gzip-compress data with reproducible output (mtime zeroed).
///
/// Uses `GzBuilder::new().mtime(0)` to zero the gzip timestamp,
/// ensuring reproducible compressed output for testing.
pub fn compress(data: &[u8]) -> anyhow::Result<Vec<u8>> {
let mut encoder = GzBuilder::new()
.mtime(0)
.write(Vec::new(), Compression::default());
encoder.write_all(data)?;
let compressed = encoder.finish()?;
Ok(compressed)
}
/// Gzip-decompress data.
pub fn decompress(data: &[u8]) -> anyhow::Result<Vec<u8>> {
let mut decoder = GzDecoder::new(data);
let mut decompressed = Vec::new();
decoder.read_to_end(&mut decompressed)?;
Ok(decompressed)
}
/// Determine if a file should be compressed based on filename and exclusion list.
///
/// Returns false for:
/// - Files matching any entry in `no_compress_list` (by suffix or exact match)
/// - Files with known compressed extensions (apk, zip, gz, etc.)
///
/// Returns true otherwise.
pub fn should_compress(filename: &str, no_compress_list: &[String]) -> bool {
// Check explicit exclusion list
if no_compress_list
.iter()
.any(|nc| filename.ends_with(nc) || filename == nc)
{
return false;
}
// Check known compressed extensions
let ext = filename.rsplit('.').next().unwrap_or("").to_lowercase();
!matches!(
ext.as_str(),
"apk" | "zip" | "gz" | "bz2" | "xz" | "zst"
| "png" | "jpg" | "jpeg" | "gif" | "webp"
| "mp4" | "mp3" | "aac" | "ogg" | "flac"
| "7z" | "rar" | "jar"
)
}

View File

@@ -1,2 +1,78 @@
// Cryptographic operations: AES-256-CBC, HMAC-SHA-256, SHA-256.
// Will be implemented in Task 2.
use aes::cipher::{block_padding::Pkcs7, BlockDecryptMut, BlockEncryptMut, KeyIvInit};
use hmac::Mac;
type Aes256CbcEnc = cbc::Encryptor<aes::Aes256>;
type Aes256CbcDec = cbc::Decryptor<aes::Aes256>;
type HmacSha256 = hmac::Hmac<sha2::Sha256>;
/// Generate a random 16-byte initialization vector using a CSPRNG.
pub fn generate_iv() -> [u8; 16] {
let mut iv = [0u8; 16];
rand::Fill::fill(&mut iv, &mut rand::rng());
iv
}
/// Encrypt plaintext with AES-256-CBC and PKCS7 padding.
///
/// Returns ciphertext of size `((plaintext.len() / 16) + 1) * 16`.
/// PKCS7 always adds at least 1 byte of padding.
pub fn encrypt_data(plaintext: &[u8], key: &[u8; 32], iv: &[u8; 16]) -> Vec<u8> {
let encrypted_size = ((plaintext.len() / 16) + 1) * 16;
let mut buf = vec![0u8; encrypted_size];
buf[..plaintext.len()].copy_from_slice(plaintext);
let ct = Aes256CbcEnc::new(key.into(), iv.into())
.encrypt_padded_mut::<Pkcs7>(&mut buf, plaintext.len())
.expect("encryption buffer too small");
// ct is a slice into buf of length encrypted_size
ct.to_vec()
}
/// Decrypt ciphertext with AES-256-CBC and remove PKCS7 padding.
///
/// Returns the original plaintext data.
pub fn decrypt_data(ciphertext: &[u8], key: &[u8; 32], iv: &[u8; 16]) -> anyhow::Result<Vec<u8>> {
let mut buf = ciphertext.to_vec();
let pt = Aes256CbcDec::new(key.into(), iv.into())
.decrypt_padded_mut::<Pkcs7>(&mut buf)
.map_err(|_| anyhow::anyhow!("Decryption failed: invalid padding or wrong key"))?;
Ok(pt.to_vec())
}
/// Compute HMAC-SHA-256 over IV || ciphertext.
///
/// HMAC input = IV (16 bytes) || ciphertext (encrypted_size bytes).
/// Returns 32-byte HMAC tag.
pub fn compute_hmac(key: &[u8; 32], iv: &[u8; 16], ciphertext: &[u8]) -> [u8; 32] {
let mut mac =
HmacSha256::new_from_slice(key).expect("HMAC can take key of any size");
mac.update(iv);
mac.update(ciphertext);
mac.finalize().into_bytes().into()
}
/// Verify HMAC-SHA-256 over IV || ciphertext using constant-time comparison.
///
/// Returns true if the computed HMAC matches the expected value.
pub fn verify_hmac(
key: &[u8; 32],
iv: &[u8; 16],
ciphertext: &[u8],
expected: &[u8; 32],
) -> bool {
let mut mac =
HmacSha256::new_from_slice(key).expect("HMAC can take key of any size");
mac.update(iv);
mac.update(ciphertext);
mac.verify_slice(expected).is_ok()
}
/// Compute SHA-256 hash of data.
///
/// Returns 32-byte digest. Used for integrity verification of original file content.
pub fn sha256_hash(data: &[u8]) -> [u8; 32] {
use sha2::Digest;
sha2::Sha256::digest(data).into()
}

View File

@@ -1,2 +1,211 @@
// Binary format types and serialization/deserialization.
// Will be implemented in Task 2.
use std::io::Read;
use std::io::Write;
/// Custom magic bytes: leading 0x00 signals binary, remaining bytes are unrecognized.
pub const MAGIC: [u8; 4] = [0x00, 0xEA, 0x72, 0x63];
/// Format version for this specification (v1).
pub const VERSION: u8 = 1;
/// Fixed header size in bytes.
pub const HEADER_SIZE: u32 = 40;
/// Archive header (40 bytes fixed at offset 0x00).
#[derive(Debug, Clone)]
pub struct Header {
pub version: u8,
pub flags: u8,
pub file_count: u16,
pub toc_offset: u32,
pub toc_size: u32,
pub toc_iv: [u8; 16],
pub reserved: [u8; 8],
}
/// File table entry (variable length: 101 + name_length bytes).
#[derive(Debug, Clone)]
pub struct TocEntry {
pub name: String,
pub original_size: u32,
pub compressed_size: u32,
pub encrypted_size: u32,
pub data_offset: u32,
pub iv: [u8; 16],
pub hmac: [u8; 32],
pub sha256: [u8; 32],
pub compression_flag: u8,
pub padding_after: u16,
}
/// Write the 40-byte archive header to the writer.
///
/// Field order matches FORMAT.md Section 4:
/// magic(4) | version(1) | flags(1) | file_count(2 LE) | toc_offset(4 LE) |
/// toc_size(4 LE) | toc_iv(16) | reserved(8)
pub fn write_header(writer: &mut impl Write, header: &Header) -> anyhow::Result<()> {
writer.write_all(&MAGIC)?;
writer.write_all(&[header.version])?;
writer.write_all(&[header.flags])?;
writer.write_all(&header.file_count.to_le_bytes())?;
writer.write_all(&header.toc_offset.to_le_bytes())?;
writer.write_all(&header.toc_size.to_le_bytes())?;
writer.write_all(&header.toc_iv)?;
writer.write_all(&header.reserved)?;
Ok(())
}
/// Write a single TOC entry to the writer.
///
/// Field order matches FORMAT.md Section 5:
/// name_length(2 LE) | name(N) | original_size(4 LE) | compressed_size(4 LE) |
/// encrypted_size(4 LE) | data_offset(4 LE) | iv(16) | hmac(32) | sha256(32) |
/// compression_flag(1) | padding_after(2 LE)
pub fn write_toc_entry(writer: &mut impl Write, entry: &TocEntry) -> anyhow::Result<()> {
let name_bytes = entry.name.as_bytes();
writer.write_all(&(name_bytes.len() as u16).to_le_bytes())?;
writer.write_all(name_bytes)?;
writer.write_all(&entry.original_size.to_le_bytes())?;
writer.write_all(&entry.compressed_size.to_le_bytes())?;
writer.write_all(&entry.encrypted_size.to_le_bytes())?;
writer.write_all(&entry.data_offset.to_le_bytes())?;
writer.write_all(&entry.iv)?;
writer.write_all(&entry.hmac)?;
writer.write_all(&entry.sha256)?;
writer.write_all(&[entry.compression_flag])?;
writer.write_all(&entry.padding_after.to_le_bytes())?;
Ok(())
}
/// Read and parse the 40-byte archive header.
///
/// Verifies: magic bytes, version == 1, reserved flags bits 4-7 are zero.
pub fn read_header(reader: &mut impl Read) -> anyhow::Result<Header> {
let mut buf = [0u8; 40];
reader.read_exact(&mut buf)?;
// Verify magic
anyhow::ensure!(
buf[0..4] == MAGIC,
"Invalid magic bytes: expected {:02X?}, got {:02X?}",
MAGIC,
&buf[0..4]
);
let version = buf[4];
anyhow::ensure!(version == VERSION, "Unsupported version: {}", version);
let flags = buf[5];
anyhow::ensure!(
flags & 0xF0 == 0,
"Unknown flags set: 0x{:02X} (bits 4-7 must be zero)",
flags
);
let file_count = u16::from_le_bytes([buf[6], buf[7]]);
let toc_offset = u32::from_le_bytes([buf[8], buf[9], buf[10], buf[11]]);
let toc_size = u32::from_le_bytes([buf[12], buf[13], buf[14], buf[15]]);
let mut toc_iv = [0u8; 16];
toc_iv.copy_from_slice(&buf[16..32]);
let mut reserved = [0u8; 8];
reserved.copy_from_slice(&buf[32..40]);
Ok(Header {
version,
flags,
file_count,
toc_offset,
toc_size,
toc_iv,
reserved,
})
}
/// Read a single TOC entry from the reader.
///
/// Reads variable-length name first, then all fixed fields.
pub fn read_toc_entry(reader: &mut impl Read) -> anyhow::Result<TocEntry> {
// name_length (u16 LE)
let mut buf2 = [0u8; 2];
reader.read_exact(&mut buf2)?;
let name_length = u16::from_le_bytes(buf2);
// name (name_length bytes, UTF-8)
let mut name_bytes = vec![0u8; name_length as usize];
reader.read_exact(&mut name_bytes)?;
let name = String::from_utf8(name_bytes)
.map_err(|e| anyhow::anyhow!("Invalid UTF-8 filename: {}", e))?;
// original_size (u32 LE)
let mut buf4 = [0u8; 4];
reader.read_exact(&mut buf4)?;
let original_size = u32::from_le_bytes(buf4);
// compressed_size (u32 LE)
reader.read_exact(&mut buf4)?;
let compressed_size = u32::from_le_bytes(buf4);
// encrypted_size (u32 LE)
reader.read_exact(&mut buf4)?;
let encrypted_size = u32::from_le_bytes(buf4);
// data_offset (u32 LE)
reader.read_exact(&mut buf4)?;
let data_offset = u32::from_le_bytes(buf4);
// iv (16 bytes)
let mut iv = [0u8; 16];
reader.read_exact(&mut iv)?;
// hmac (32 bytes)
let mut hmac = [0u8; 32];
reader.read_exact(&mut hmac)?;
// sha256 (32 bytes)
let mut sha256 = [0u8; 32];
reader.read_exact(&mut sha256)?;
// compression_flag (u8)
let mut buf1 = [0u8; 1];
reader.read_exact(&mut buf1)?;
let compression_flag = buf1[0];
// padding_after (u16 LE)
reader.read_exact(&mut buf2)?;
let padding_after = u16::from_le_bytes(buf2);
Ok(TocEntry {
name,
original_size,
compressed_size,
encrypted_size,
data_offset,
iv,
hmac,
sha256,
compression_flag,
padding_after,
})
}
/// Read all TOC entries sequentially.
pub fn read_toc(reader: &mut impl Read, file_count: u16) -> anyhow::Result<Vec<TocEntry>> {
let mut entries = Vec::with_capacity(file_count as usize);
for _ in 0..file_count {
entries.push(read_toc_entry(reader)?);
}
Ok(entries)
}
/// Compute the serialized size of a single TOC entry.
///
/// Formula from FORMAT.md Section 5: entry_size = 101 + name_length bytes.
pub fn entry_size(entry: &TocEntry) -> u32 {
101 + entry.name.len() as u32
}
/// Compute the total serialized size of all TOC entries.
pub fn compute_toc_size(entries: &[TocEntry]) -> u32 {
entries.iter().map(entry_size).sum()
}