diff --git a/kotlin/ArchiveDecoder.kt b/kotlin/ArchiveDecoder.kt new file mode 100644 index 0000000..11815a3 --- /dev/null +++ b/kotlin/ArchiveDecoder.kt @@ -0,0 +1,336 @@ +import java.io.ByteArrayInputStream +import java.io.File +import java.io.RandomAccessFile +import java.nio.ByteBuffer +import java.nio.ByteOrder +import java.security.MessageDigest +import java.util.zip.GZIPInputStream +import javax.crypto.Cipher +import javax.crypto.Mac +import javax.crypto.spec.IvParameterSpec +import javax.crypto.spec.SecretKeySpec + +// --------------------------------------------------------------------------- +// Constants (matching FORMAT.md Section 4 and src/key.rs) +// --------------------------------------------------------------------------- + +/** Custom magic bytes: 0x00 0xEA 0x72 0x63 (FORMAT.md Section 4). */ +val MAGIC = byteArrayOf(0x00, 0xEA.toByte(), 0x72, 0x63) + +/** Fixed header size in bytes (FORMAT.md Section 4). */ +const val HEADER_SIZE = 40 + +/** + * Hardcoded 32-byte AES-256 key. + * Same key is used for AES-256-CBC encryption and HMAC-SHA-256 authentication (v1). + * Matches src/key.rs exactly. + */ +val KEY = byteArrayOf( + 0x7A, 0x35, 0xC1.toByte(), 0xD9.toByte(), 0x4F, 0xE8.toByte(), 0x2B, 0x6A, + 0x91.toByte(), 0x0D, 0xF3.toByte(), 0x58, 0xBC.toByte(), 0x74, 0xA6.toByte(), 0x1E, + 0x42, 0x8F.toByte(), 0xD0.toByte(), 0x63, 0xE5.toByte(), 0x17, 0x9B.toByte(), 0x2C, + 0xFA.toByte(), 0x84.toByte(), 0x06, 0xCD.toByte(), 0x3E, 0x79, 0xB5.toByte(), 0x50, +) + +// --------------------------------------------------------------------------- +// Data classes +// --------------------------------------------------------------------------- + +/** Archive header (40 bytes fixed at offset 0x00). FORMAT.md Section 4. */ +data class ArchiveHeader( + val version: Int, + val flags: Int, + val fileCount: Int, + val tocOffset: Long, + val tocSize: Long, + val tocIv: ByteArray, +) + +/** File table entry (variable length: 101 + name_length bytes). FORMAT.md Section 5. */ +data class TocEntry( + val name: String, + val originalSize: Long, + val compressedSize: Long, + val encryptedSize: Int, + val dataOffset: Long, + val iv: ByteArray, + val hmac: ByteArray, + val sha256: ByteArray, + val compressionFlag: Int, + val paddingAfter: Int, +) + +// --------------------------------------------------------------------------- +// Little-endian integer helpers (using ByteBuffer) +// --------------------------------------------------------------------------- + +/** Read an unsigned 16-bit little-endian integer from [data] at [offset]. */ +fun readLeU16(data: ByteArray, offset: Int): Int { + return ByteBuffer.wrap(data, offset, 2) + .order(ByteOrder.LITTLE_ENDIAN) + .short.toInt() and 0xFFFF +} + +/** Read an unsigned 32-bit little-endian integer from [data] at [offset]. */ +fun readLeU32(data: ByteArray, offset: Int): Long { + return ByteBuffer.wrap(data, offset, 4) + .order(ByteOrder.LITTLE_ENDIAN) + .int.toLong() and 0xFFFFFFFFL +} + +// --------------------------------------------------------------------------- +// Header parsing (FORMAT.md Section 4) +// --------------------------------------------------------------------------- + +/** + * Parse the 40-byte archive header. + * + * Verifies: magic bytes, version == 1, reserved flag bits 4-7 are zero. + */ +fun parseHeader(data: ByteArray): ArchiveHeader { + require(data.size >= HEADER_SIZE) { "Header too short: ${data.size} bytes" } + + // Verify magic bytes + require( + data[0] == MAGIC[0] && data[1] == MAGIC[1] && + data[2] == MAGIC[2] && data[3] == MAGIC[3] + ) { "Invalid magic bytes" } + + // Version check + val version = data[4].toInt() and 0xFF + require(version == 1) { "Unsupported version: $version" } + + // Flags validation + val flags = data[5].toInt() and 0xFF + require(flags and 0xF0 == 0) { "Unknown flags set: 0x${flags.toString(16)} (bits 4-7 must be zero)" } + + // Read remaining fields + val fileCount = readLeU16(data, 6) + val tocOffset = readLeU32(data, 8) + val tocSize = readLeU32(data, 12) + val tocIv = data.copyOfRange(16, 32) + + return ArchiveHeader(version, flags, fileCount, tocOffset, tocSize, tocIv) +} + +// --------------------------------------------------------------------------- +// TOC parsing (FORMAT.md Section 5) +// --------------------------------------------------------------------------- + +/** + * Parse a single TOC entry from [data] starting at [offset]. + * + * Returns a Pair of the parsed entry and the new offset after the entry. + * Entry size formula: 101 + name_length bytes. + */ +fun parseTocEntry(data: ByteArray, offset: Int): Pair { + var pos = offset + + // name_length (u16 LE) + val nameLength = readLeU16(data, pos) + pos += 2 + + // name (UTF-8 bytes) + val name = String(data, pos, nameLength, Charsets.UTF_8) + pos += nameLength + + // Fixed fields: original_size, compressed_size, encrypted_size, data_offset (all u32 LE) + val originalSize = readLeU32(data, pos); pos += 4 + val compressedSize = readLeU32(data, pos); pos += 4 + val encryptedSize = readLeU32(data, pos).toInt(); pos += 4 + val dataOffset = readLeU32(data, pos); pos += 4 + + // iv (16 bytes) + val iv = data.copyOfRange(pos, pos + 16); pos += 16 + + // hmac (32 bytes) + val hmac = data.copyOfRange(pos, pos + 32); pos += 32 + + // sha256 (32 bytes) + val sha256 = data.copyOfRange(pos, pos + 32); pos += 32 + + // compression_flag (u8) + val compressionFlag = data[pos].toInt() and 0xFF; pos += 1 + + // padding_after (u16 LE) + val paddingAfter = readLeU16(data, pos); pos += 2 + + val entry = TocEntry( + name, originalSize, compressedSize, encryptedSize, + dataOffset, iv, hmac, sha256, compressionFlag, paddingAfter + ) + return Pair(entry, pos) +} + +/** + * Parse all TOC entries sequentially. + * + * Asserts that after parsing all [fileCount] entries, the cursor equals [data].size. + */ +fun parseToc(data: ByteArray, fileCount: Int): List { + val entries = mutableListOf() + var pos = 0 + for (i in 0 until fileCount) { + val (entry, newPos) = parseTocEntry(data, pos) + entries.add(entry) + pos = newPos + } + require(pos == data.size) { + "TOC parsing error: consumed $pos bytes but TOC size is ${data.size}" + } + return entries +} + +// --------------------------------------------------------------------------- +// Crypto utility functions (FORMAT.md Section 7, Section 13.6) +// --------------------------------------------------------------------------- + +/** + * Verify HMAC-SHA-256 over IV || ciphertext. + * + * @param iv The 16-byte IV from the TOC entry. + * @param ciphertext The encrypted data (encrypted_size bytes). + * @param key The 32-byte key (same as AES key in v1). + * @param expectedHmac The 32-byte HMAC from the TOC entry. + * @return true if HMAC matches. + */ +fun verifyHmac(iv: ByteArray, ciphertext: ByteArray, key: ByteArray, expectedHmac: ByteArray): Boolean { + val mac = Mac.getInstance("HmacSHA256") + mac.init(SecretKeySpec(key, "HmacSHA256")) + mac.update(iv) + mac.update(ciphertext) + val computed = mac.doFinal() + return computed.contentEquals(expectedHmac) +} + +/** + * Decrypt AES-256-CBC ciphertext. + * + * Uses PKCS5Padding which is functionally identical to PKCS7 for 16-byte AES blocks. + * cipher.doFinal() automatically removes PKCS7 padding. + * + * @param ciphertext The encrypted data. + * @param iv The 16-byte IV. + * @param key The 32-byte AES key. + * @return Decrypted plaintext (padding already removed). + */ +fun decryptAesCbc(ciphertext: ByteArray, iv: ByteArray, key: ByteArray): ByteArray { + val cipher = Cipher.getInstance("AES/CBC/PKCS5Padding") + cipher.init(Cipher.DECRYPT_MODE, SecretKeySpec(key, "AES"), IvParameterSpec(iv)) + return cipher.doFinal(ciphertext) +} + +/** + * Decompress gzip data. + * + * @param compressed Gzip-compressed data. + * @return Decompressed data. + */ +fun decompressGzip(compressed: ByteArray): ByteArray { + return GZIPInputStream(ByteArrayInputStream(compressed)).readBytes() +} + +/** + * Verify SHA-256 checksum of data. + * + * @param data The decompressed file content. + * @param expectedSha256 The 32-byte SHA-256 from the TOC entry. + * @return true if checksum matches. + */ +fun verifySha256(data: ByteArray, expectedSha256: ByteArray): Boolean { + val digest = MessageDigest.getInstance("SHA-256") + val computed = digest.digest(data) + return computed.contentEquals(expectedSha256) +} + +// --------------------------------------------------------------------------- +// Main decode orchestration (FORMAT.md Section 10) +// --------------------------------------------------------------------------- + +/** + * Decode an encrypted archive, extracting all files to [outputDir]. + * + * Follows FORMAT.md Section 10 decode order: + * 1. Read and parse 40-byte header + * 2. Seek to tocOffset, read and parse TOC entries + * 3. For each file: verify HMAC, decrypt, decompress, verify SHA-256, write + */ +fun decode(archivePath: String, outputDir: String) { + val raf = RandomAccessFile(archivePath, "r") + + // Read 40-byte header + val headerBytes = ByteArray(HEADER_SIZE) + raf.readFully(headerBytes) + val header = parseHeader(headerBytes) + + // Seek to TOC and read all TOC bytes + raf.seek(header.tocOffset) + val tocBytes = ByteArray(header.tocSize.toInt()) + raf.readFully(tocBytes) + + // Parse all TOC entries + val entries = parseToc(tocBytes, header.fileCount) + + var successCount = 0 + + for (entry in entries) { + // Step 1: Seek to data_offset and read ciphertext + raf.seek(entry.dataOffset) + val ciphertext = ByteArray(entry.encryptedSize) + raf.readFully(ciphertext) + + // Step 2: Verify HMAC FIRST (Encrypt-then-MAC -- FORMAT.md Section 7) + if (!verifyHmac(entry.iv, ciphertext, KEY, entry.hmac)) { + System.err.println("HMAC failed for ${entry.name}, skipping") + continue + } + + // Step 3: Decrypt (PKCS5Padding auto-removes PKCS7 padding) + val decrypted = decryptAesCbc(ciphertext, entry.iv, KEY) + + // Step 4: Decompress if compression_flag == 1 + val original = if (entry.compressionFlag == 1) { + decompressGzip(decrypted) + } else { + decrypted + } + + // Step 5: Verify SHA-256 checksum + if (!verifySha256(original, entry.sha256)) { + System.err.println("WARNING: SHA-256 mismatch for ${entry.name}") + // Still write the file (matching Rust behavior) + } + + // Step 6: Write output file + val outFile = File(outputDir, entry.name) + outFile.writeBytes(original) + println("Extracted: ${entry.name} (${original.size} bytes)") + successCount++ + } + + raf.close() + + println("Done: $successCount files extracted") +} + +// --------------------------------------------------------------------------- +// CLI entry point +// --------------------------------------------------------------------------- + +fun main(args: Array) { + if (args.size != 2) { + System.err.println("Usage: java -jar ArchiveDecoder.jar ") + System.exit(1) + } + + val archivePath = args[0] + val outputDir = args[1] + + // Validate archive exists + require(File(archivePath).exists()) { "Archive not found: $archivePath" } + + // Create output directory if needed + File(outputDir).mkdirs() + + decode(archivePath, outputDir) +}