feat(04-01): implement Kotlin archive decoder with full decode pipeline
- 40-byte header parsing with magic byte, version, and flags validation - Variable-length TOC entry parsing with little-endian ByteBuffer helpers - HMAC-SHA-256 verification BEFORE decryption (Encrypt-then-MAC) - AES-256-CBC decryption via javax.crypto with PKCS5Padding - Gzip decompression via java.util.zip when compression_flag == 1 - SHA-256 checksum verification after decompression - Hardcoded 32-byte key matching src/key.rs exactly - CLI main() for standalone JAR usage - Zero third-party dependencies (only Android SDK / JVM stdlib) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
336
kotlin/ArchiveDecoder.kt
Normal file
336
kotlin/ArchiveDecoder.kt
Normal file
@@ -0,0 +1,336 @@
|
||||
import java.io.ByteArrayInputStream
|
||||
import java.io.File
|
||||
import java.io.RandomAccessFile
|
||||
import java.nio.ByteBuffer
|
||||
import java.nio.ByteOrder
|
||||
import java.security.MessageDigest
|
||||
import java.util.zip.GZIPInputStream
|
||||
import javax.crypto.Cipher
|
||||
import javax.crypto.Mac
|
||||
import javax.crypto.spec.IvParameterSpec
|
||||
import javax.crypto.spec.SecretKeySpec
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants (matching FORMAT.md Section 4 and src/key.rs)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Custom magic bytes: 0x00 0xEA 0x72 0x63 (FORMAT.md Section 4). */
|
||||
val MAGIC = byteArrayOf(0x00, 0xEA.toByte(), 0x72, 0x63)
|
||||
|
||||
/** Fixed header size in bytes (FORMAT.md Section 4). */
|
||||
const val HEADER_SIZE = 40
|
||||
|
||||
/**
|
||||
* Hardcoded 32-byte AES-256 key.
|
||||
* Same key is used for AES-256-CBC encryption and HMAC-SHA-256 authentication (v1).
|
||||
* Matches src/key.rs exactly.
|
||||
*/
|
||||
val KEY = byteArrayOf(
|
||||
0x7A, 0x35, 0xC1.toByte(), 0xD9.toByte(), 0x4F, 0xE8.toByte(), 0x2B, 0x6A,
|
||||
0x91.toByte(), 0x0D, 0xF3.toByte(), 0x58, 0xBC.toByte(), 0x74, 0xA6.toByte(), 0x1E,
|
||||
0x42, 0x8F.toByte(), 0xD0.toByte(), 0x63, 0xE5.toByte(), 0x17, 0x9B.toByte(), 0x2C,
|
||||
0xFA.toByte(), 0x84.toByte(), 0x06, 0xCD.toByte(), 0x3E, 0x79, 0xB5.toByte(), 0x50,
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Data classes
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Archive header (40 bytes fixed at offset 0x00). FORMAT.md Section 4. */
|
||||
data class ArchiveHeader(
|
||||
val version: Int,
|
||||
val flags: Int,
|
||||
val fileCount: Int,
|
||||
val tocOffset: Long,
|
||||
val tocSize: Long,
|
||||
val tocIv: ByteArray,
|
||||
)
|
||||
|
||||
/** File table entry (variable length: 101 + name_length bytes). FORMAT.md Section 5. */
|
||||
data class TocEntry(
|
||||
val name: String,
|
||||
val originalSize: Long,
|
||||
val compressedSize: Long,
|
||||
val encryptedSize: Int,
|
||||
val dataOffset: Long,
|
||||
val iv: ByteArray,
|
||||
val hmac: ByteArray,
|
||||
val sha256: ByteArray,
|
||||
val compressionFlag: Int,
|
||||
val paddingAfter: Int,
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Little-endian integer helpers (using ByteBuffer)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Read an unsigned 16-bit little-endian integer from [data] at [offset]. */
|
||||
fun readLeU16(data: ByteArray, offset: Int): Int {
|
||||
return ByteBuffer.wrap(data, offset, 2)
|
||||
.order(ByteOrder.LITTLE_ENDIAN)
|
||||
.short.toInt() and 0xFFFF
|
||||
}
|
||||
|
||||
/** Read an unsigned 32-bit little-endian integer from [data] at [offset]. */
|
||||
fun readLeU32(data: ByteArray, offset: Int): Long {
|
||||
return ByteBuffer.wrap(data, offset, 4)
|
||||
.order(ByteOrder.LITTLE_ENDIAN)
|
||||
.int.toLong() and 0xFFFFFFFFL
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Header parsing (FORMAT.md Section 4)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parse the 40-byte archive header.
|
||||
*
|
||||
* Verifies: magic bytes, version == 1, reserved flag bits 4-7 are zero.
|
||||
*/
|
||||
fun parseHeader(data: ByteArray): ArchiveHeader {
|
||||
require(data.size >= HEADER_SIZE) { "Header too short: ${data.size} bytes" }
|
||||
|
||||
// Verify magic bytes
|
||||
require(
|
||||
data[0] == MAGIC[0] && data[1] == MAGIC[1] &&
|
||||
data[2] == MAGIC[2] && data[3] == MAGIC[3]
|
||||
) { "Invalid magic bytes" }
|
||||
|
||||
// Version check
|
||||
val version = data[4].toInt() and 0xFF
|
||||
require(version == 1) { "Unsupported version: $version" }
|
||||
|
||||
// Flags validation
|
||||
val flags = data[5].toInt() and 0xFF
|
||||
require(flags and 0xF0 == 0) { "Unknown flags set: 0x${flags.toString(16)} (bits 4-7 must be zero)" }
|
||||
|
||||
// Read remaining fields
|
||||
val fileCount = readLeU16(data, 6)
|
||||
val tocOffset = readLeU32(data, 8)
|
||||
val tocSize = readLeU32(data, 12)
|
||||
val tocIv = data.copyOfRange(16, 32)
|
||||
|
||||
return ArchiveHeader(version, flags, fileCount, tocOffset, tocSize, tocIv)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// TOC parsing (FORMAT.md Section 5)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Parse a single TOC entry from [data] starting at [offset].
|
||||
*
|
||||
* Returns a Pair of the parsed entry and the new offset after the entry.
|
||||
* Entry size formula: 101 + name_length bytes.
|
||||
*/
|
||||
fun parseTocEntry(data: ByteArray, offset: Int): Pair<TocEntry, Int> {
|
||||
var pos = offset
|
||||
|
||||
// name_length (u16 LE)
|
||||
val nameLength = readLeU16(data, pos)
|
||||
pos += 2
|
||||
|
||||
// name (UTF-8 bytes)
|
||||
val name = String(data, pos, nameLength, Charsets.UTF_8)
|
||||
pos += nameLength
|
||||
|
||||
// Fixed fields: original_size, compressed_size, encrypted_size, data_offset (all u32 LE)
|
||||
val originalSize = readLeU32(data, pos); pos += 4
|
||||
val compressedSize = readLeU32(data, pos); pos += 4
|
||||
val encryptedSize = readLeU32(data, pos).toInt(); pos += 4
|
||||
val dataOffset = readLeU32(data, pos); pos += 4
|
||||
|
||||
// iv (16 bytes)
|
||||
val iv = data.copyOfRange(pos, pos + 16); pos += 16
|
||||
|
||||
// hmac (32 bytes)
|
||||
val hmac = data.copyOfRange(pos, pos + 32); pos += 32
|
||||
|
||||
// sha256 (32 bytes)
|
||||
val sha256 = data.copyOfRange(pos, pos + 32); pos += 32
|
||||
|
||||
// compression_flag (u8)
|
||||
val compressionFlag = data[pos].toInt() and 0xFF; pos += 1
|
||||
|
||||
// padding_after (u16 LE)
|
||||
val paddingAfter = readLeU16(data, pos); pos += 2
|
||||
|
||||
val entry = TocEntry(
|
||||
name, originalSize, compressedSize, encryptedSize,
|
||||
dataOffset, iv, hmac, sha256, compressionFlag, paddingAfter
|
||||
)
|
||||
return Pair(entry, pos)
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse all TOC entries sequentially.
|
||||
*
|
||||
* Asserts that after parsing all [fileCount] entries, the cursor equals [data].size.
|
||||
*/
|
||||
fun parseToc(data: ByteArray, fileCount: Int): List<TocEntry> {
|
||||
val entries = mutableListOf<TocEntry>()
|
||||
var pos = 0
|
||||
for (i in 0 until fileCount) {
|
||||
val (entry, newPos) = parseTocEntry(data, pos)
|
||||
entries.add(entry)
|
||||
pos = newPos
|
||||
}
|
||||
require(pos == data.size) {
|
||||
"TOC parsing error: consumed $pos bytes but TOC size is ${data.size}"
|
||||
}
|
||||
return entries
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Crypto utility functions (FORMAT.md Section 7, Section 13.6)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Verify HMAC-SHA-256 over IV || ciphertext.
|
||||
*
|
||||
* @param iv The 16-byte IV from the TOC entry.
|
||||
* @param ciphertext The encrypted data (encrypted_size bytes).
|
||||
* @param key The 32-byte key (same as AES key in v1).
|
||||
* @param expectedHmac The 32-byte HMAC from the TOC entry.
|
||||
* @return true if HMAC matches.
|
||||
*/
|
||||
fun verifyHmac(iv: ByteArray, ciphertext: ByteArray, key: ByteArray, expectedHmac: ByteArray): Boolean {
|
||||
val mac = Mac.getInstance("HmacSHA256")
|
||||
mac.init(SecretKeySpec(key, "HmacSHA256"))
|
||||
mac.update(iv)
|
||||
mac.update(ciphertext)
|
||||
val computed = mac.doFinal()
|
||||
return computed.contentEquals(expectedHmac)
|
||||
}
|
||||
|
||||
/**
|
||||
* Decrypt AES-256-CBC ciphertext.
|
||||
*
|
||||
* Uses PKCS5Padding which is functionally identical to PKCS7 for 16-byte AES blocks.
|
||||
* cipher.doFinal() automatically removes PKCS7 padding.
|
||||
*
|
||||
* @param ciphertext The encrypted data.
|
||||
* @param iv The 16-byte IV.
|
||||
* @param key The 32-byte AES key.
|
||||
* @return Decrypted plaintext (padding already removed).
|
||||
*/
|
||||
fun decryptAesCbc(ciphertext: ByteArray, iv: ByteArray, key: ByteArray): ByteArray {
|
||||
val cipher = Cipher.getInstance("AES/CBC/PKCS5Padding")
|
||||
cipher.init(Cipher.DECRYPT_MODE, SecretKeySpec(key, "AES"), IvParameterSpec(iv))
|
||||
return cipher.doFinal(ciphertext)
|
||||
}
|
||||
|
||||
/**
|
||||
* Decompress gzip data.
|
||||
*
|
||||
* @param compressed Gzip-compressed data.
|
||||
* @return Decompressed data.
|
||||
*/
|
||||
fun decompressGzip(compressed: ByteArray): ByteArray {
|
||||
return GZIPInputStream(ByteArrayInputStream(compressed)).readBytes()
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify SHA-256 checksum of data.
|
||||
*
|
||||
* @param data The decompressed file content.
|
||||
* @param expectedSha256 The 32-byte SHA-256 from the TOC entry.
|
||||
* @return true if checksum matches.
|
||||
*/
|
||||
fun verifySha256(data: ByteArray, expectedSha256: ByteArray): Boolean {
|
||||
val digest = MessageDigest.getInstance("SHA-256")
|
||||
val computed = digest.digest(data)
|
||||
return computed.contentEquals(expectedSha256)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Main decode orchestration (FORMAT.md Section 10)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Decode an encrypted archive, extracting all files to [outputDir].
|
||||
*
|
||||
* Follows FORMAT.md Section 10 decode order:
|
||||
* 1. Read and parse 40-byte header
|
||||
* 2. Seek to tocOffset, read and parse TOC entries
|
||||
* 3. For each file: verify HMAC, decrypt, decompress, verify SHA-256, write
|
||||
*/
|
||||
fun decode(archivePath: String, outputDir: String) {
|
||||
val raf = RandomAccessFile(archivePath, "r")
|
||||
|
||||
// Read 40-byte header
|
||||
val headerBytes = ByteArray(HEADER_SIZE)
|
||||
raf.readFully(headerBytes)
|
||||
val header = parseHeader(headerBytes)
|
||||
|
||||
// Seek to TOC and read all TOC bytes
|
||||
raf.seek(header.tocOffset)
|
||||
val tocBytes = ByteArray(header.tocSize.toInt())
|
||||
raf.readFully(tocBytes)
|
||||
|
||||
// Parse all TOC entries
|
||||
val entries = parseToc(tocBytes, header.fileCount)
|
||||
|
||||
var successCount = 0
|
||||
|
||||
for (entry in entries) {
|
||||
// Step 1: Seek to data_offset and read ciphertext
|
||||
raf.seek(entry.dataOffset)
|
||||
val ciphertext = ByteArray(entry.encryptedSize)
|
||||
raf.readFully(ciphertext)
|
||||
|
||||
// Step 2: Verify HMAC FIRST (Encrypt-then-MAC -- FORMAT.md Section 7)
|
||||
if (!verifyHmac(entry.iv, ciphertext, KEY, entry.hmac)) {
|
||||
System.err.println("HMAC failed for ${entry.name}, skipping")
|
||||
continue
|
||||
}
|
||||
|
||||
// Step 3: Decrypt (PKCS5Padding auto-removes PKCS7 padding)
|
||||
val decrypted = decryptAesCbc(ciphertext, entry.iv, KEY)
|
||||
|
||||
// Step 4: Decompress if compression_flag == 1
|
||||
val original = if (entry.compressionFlag == 1) {
|
||||
decompressGzip(decrypted)
|
||||
} else {
|
||||
decrypted
|
||||
}
|
||||
|
||||
// Step 5: Verify SHA-256 checksum
|
||||
if (!verifySha256(original, entry.sha256)) {
|
||||
System.err.println("WARNING: SHA-256 mismatch for ${entry.name}")
|
||||
// Still write the file (matching Rust behavior)
|
||||
}
|
||||
|
||||
// Step 6: Write output file
|
||||
val outFile = File(outputDir, entry.name)
|
||||
outFile.writeBytes(original)
|
||||
println("Extracted: ${entry.name} (${original.size} bytes)")
|
||||
successCount++
|
||||
}
|
||||
|
||||
raf.close()
|
||||
|
||||
println("Done: $successCount files extracted")
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CLI entry point
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fun main(args: Array<String>) {
|
||||
if (args.size != 2) {
|
||||
System.err.println("Usage: java -jar ArchiveDecoder.jar <archive> <output_dir>")
|
||||
System.exit(1)
|
||||
}
|
||||
|
||||
val archivePath = args[0]
|
||||
val outputDir = args[1]
|
||||
|
||||
// Validate archive exists
|
||||
require(File(archivePath).exists()) { "Archive not found: $archivePath" }
|
||||
|
||||
// Create output directory if needed
|
||||
File(outputDir).mkdirs()
|
||||
|
||||
decode(archivePath, outputDir)
|
||||
}
|
||||
Reference in New Issue
Block a user