You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
427 lines
9.4 KiB
427 lines
9.4 KiB
//! DEFLATE loader. |
|
|
|
use crate::{ |
|
data::read, |
|
ffi, |
|
types::{stkvec, Cast, StkVec}, |
|
}; |
|
use std::cmp::Ordering; |
|
|
|
type AlphabetTable = StkVec<[u16; 320]>; |
|
|
|
#[derive(thiserror::Error, Debug)] |
|
pub enum Err { |
|
#[error("Bad stream block type")] |
|
BlockType, |
|
#[error("Not enough bits for block")] |
|
BlockBits, |
|
#[error("Not enough bits for 0-repeat")] |
|
Bits0, |
|
#[error("Not enough bits for code-repeat")] |
|
BitsCode, |
|
#[error("Couldn't decode from table")] |
|
TableDecode, |
|
#[error("Bad distance in pair")] |
|
Distance, |
|
#[error("Couldn't read header for dynamic stream")] |
|
DynStream, |
|
#[error("Couldn't decode length/distance pair")] |
|
Pair, |
|
#[error("Not enough bytes for literal")] |
|
Literal, |
|
#[error("Bad symbol in alphabet")] |
|
Symbol, |
|
#[error("Couldn't read table")] |
|
Table, |
|
} |
|
|
|
/// Loads a zlib file header. |
|
/// |
|
/// Returns `None` if the header does not match the specification or |
|
/// is unsupported. |
|
pub fn read_zlib(b: &[u8]) -> Option<&[u8]> { |
|
const METHOD: u8 = 0b0000_1111; |
|
const WINDOW: u8 = 0b1111_0000; |
|
const FDICTI: u8 = 0b0010_0000; |
|
|
|
if b.len() < 2 { |
|
return None; |
|
} |
|
|
|
let cmflag = b[0]; |
|
let flagfl = b[1]; |
|
|
|
let fcheck = u16::from_be_bytes([cmflag, flagfl]); |
|
|
|
let method = cmflag & METHOD; |
|
let window = cmflag & WINDOW; |
|
|
|
if method == 8 |
|
&& window <= 7 << 4 |
|
&& fcheck % 31 == 0 |
|
&& flagfl & FDICTI == 0 |
|
{ |
|
Some(&b[2..]) |
|
} else { |
|
None |
|
} |
|
} |
|
|
|
/// Loads a gzip file header. |
|
/// |
|
/// Returns `None` if the header does not match the specification or |
|
/// is unsupported. |
|
pub fn read_gzip(b: &[u8]) -> Option<&[u8]> { |
|
const FCRC: u8 = 1 << 1; |
|
const FEXT: u8 = 1 << 2; |
|
const FNAM: u8 = 1 << 3; |
|
const FCOM: u8 = 1 << 4; |
|
|
|
if b.len() >= 10 && b[0..3] == [0x1f, 0x8b, 8] && b[3] & 0xE0 == 0 { |
|
let fl = b[3]; |
|
|
|
let b = b.get(10..)?; |
|
let b = if fl & FEXT == 0 { |
|
b |
|
} else { |
|
b.get(usize::from(read::u16le(b.get(0..2)?, 0)) + 2..)? |
|
}; |
|
let b = |
|
if fl & FNAM == 0 { b } else { b.get(ffi::end_of_cstr(b) + 1..)? }; |
|
let b = |
|
if fl & FCOM == 0 { b } else { b.get(ffi::end_of_cstr(b) + 1..)? }; |
|
let b = if fl & FCRC == 0 { b } else { b.get(2..)? }; |
|
Some(b) |
|
} else { |
|
None |
|
} |
|
} |
|
|
|
/// Decompresses a DEFLATE compressed bitstream starting at the `p`-th bit. |
|
/// |
|
/// # Errors |
|
/// |
|
/// Returns `Err` if the bit-stream fails to parse. |
|
pub fn read_deflate_from(b: &[u8], p: &mut usize) -> Result<Vec<u8>, Err> { |
|
let mut v = Vec::new(); |
|
|
|
loop { |
|
let bfinal = read::ble_u8(b, *p, 1).ok_or(Err::BlockBits)?; |
|
let bstype = read::ble_u8(b, *p + 1, 2).ok_or(Err::BlockBits)?; |
|
*p += 3; |
|
|
|
match bstype { |
|
| 0b10 => st_dynamic(b, p, &mut v)?, |
|
| 0b01 => st_s_table(b, p, &mut v)?, |
|
| 0b00 => st_literal(b, p, &mut v)?, |
|
| _ => return Err(Err::BlockType), |
|
} |
|
|
|
if bfinal == 1 { |
|
return Ok(v); |
|
} |
|
} |
|
} |
|
|
|
/// Decompresses a DEFLATE compressed bitstream. |
|
/// |
|
/// # Errors |
|
/// |
|
/// Returns `Err` if the bit-stream fails to parse. |
|
pub fn read_deflate(b: &[u8]) -> Result<Vec<u8>, Err> { |
|
read_deflate_from(b, &mut 0) |
|
} |
|
|
|
fn st_dynamic(b: &[u8], p: &mut usize, v: &mut Vec<u8>) -> Result<(), Err> { |
|
const CODE_ORDERING: [usize; 19] = |
|
[16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]; |
|
|
|
const NHLITS: usize = 257; |
|
const NHDIST: usize = 1; |
|
const NHCLEN: usize = 4; |
|
|
|
// read the header: number of literal alphabet codes, number of |
|
// distance alphabet codes, and number of lengths for decoding the |
|
// alphabet |
|
let hlits = read::ble_usz(b, *p, 5).ok_or(Err::DynStream)?; |
|
let hdist = read::ble_usz(b, *p + 5, 5).ok_or(Err::DynStream)?; |
|
let hclen = read::ble_usz(b, *p + 10, 4).ok_or(Err::DynStream)?; |
|
*p += 14; |
|
|
|
let hlits = NHLITS + hlits; |
|
let hdist = NHDIST + hdist; |
|
let hclen = NHCLEN + hclen; |
|
|
|
// first, get the huffman coding for the alphabet, which is also |
|
// compressed |
|
let mut table = [0; 19]; |
|
for &i in CODE_ORDERING.iter().take(hclen) { |
|
table[i] = read::ble_u16(b, *p, 3).ok_or(Err::DynStream)?; |
|
*p += 3; |
|
} |
|
|
|
// then, we decode the alphabet, doing both types at the same time, |
|
// because they're encoded the same anyways |
|
let table = HuffmanTable::read(&table)?; |
|
let alpha = read_alphabet(b, p, hlits + hdist, &table)?; |
|
|
|
if alpha[256] == 0 { |
|
Err(Err::DynStream) |
|
} else { |
|
// build the length and distance tables from this information |
|
let table_len = HuffmanTable::read(&alpha[0..hlits])?; |
|
let table_dst = HuffmanTable::read(&alpha[hlits..hlits + hdist])?; |
|
|
|
output_tables(b, p, v, &table_len, &table_dst) |
|
} |
|
} |
|
|
|
fn st_s_table(b: &[u8], p: &mut usize, v: &mut Vec<u8>) -> Result<(), Err> { |
|
let mut len = [0; 288]; |
|
|
|
for (i, len) in len.iter_mut().enumerate() { |
|
*len = match i { |
|
| 144..=255 => 9, |
|
| 256..=279 => 7, |
|
| _ => 8, |
|
}; |
|
} |
|
|
|
let dst = [5; 30]; |
|
|
|
let table_len = HuffmanTable::read(&len)?; |
|
let table_dst = HuffmanTable::read(&dst)?; |
|
|
|
output_tables(b, p, v, &table_len, &table_dst) |
|
} |
|
|
|
fn st_literal(b: &[u8], p: &mut usize, v: &mut Vec<u8>) -> Result<(), Err> { |
|
// copy data directly from byte boundary |
|
let bound = *p / 8 + 1; |
|
let b = &b[bound..]; |
|
let len = read::u16le(b.get(0..2).ok_or(Err::Literal)?, 0).into(); |
|
let b = &b[4..]; |
|
*p += bound * 8 + 4 * 8 + len * 8; |
|
|
|
if len > b.len() { |
|
Err(Err::Literal) |
|
} else { |
|
v.extend(&b[..len]); |
|
Ok(()) |
|
} |
|
} |
|
|
|
fn next_alpha( |
|
alpha: &mut AlphabetTable, i: &mut usize, n: u16, |
|
) -> Result<(), Err> { |
|
alpha[*i] = n; |
|
*i += 1; |
|
Ok(()) |
|
} |
|
|
|
fn read_alphabet( |
|
b: &[u8], p: &mut usize, n: usize, table: &HuffmanTable, |
|
) -> Result<AlphabetTable, Err> { |
|
let mut alpha = stkvec![0; n]; |
|
let mut i = 0; |
|
|
|
while i < alpha.len() { |
|
let ty = table.decode(b, p)?; |
|
|
|
match ty { |
|
| 0..=15 => { |
|
// raw code |
|
next_alpha(&mut alpha, &mut i, ty)?; |
|
} |
|
| 16 => { |
|
// copy previous code 3-6 times |
|
let lst = alpha[i - 1]; |
|
let len = read::ble_u8(b, *p, 2).ok_or(Err::BitsCode)? + 3; |
|
*p += 2; |
|
|
|
for _ in 0..len { |
|
next_alpha(&mut alpha, &mut i, lst)?; |
|
} |
|
} |
|
| 17 => { |
|
// repeat '0' 3-10 times |
|
let len = read::ble_u8(b, *p, 3).ok_or(Err::Bits0)? + 3; |
|
*p += 3; |
|
|
|
for _ in 0..len { |
|
next_alpha(&mut alpha, &mut i, 0)?; |
|
} |
|
} |
|
| 18 => { |
|
// repeat '0' 11-138 times |
|
let len = read::ble_u8(b, *p, 7).ok_or(Err::Bits0)? + 11; |
|
*p += 7; |
|
|
|
for _ in 0..len { |
|
next_alpha(&mut alpha, &mut i, 0)?; |
|
} |
|
} |
|
| _ => { |
|
return Err(Err::Symbol); |
|
} |
|
} |
|
} |
|
|
|
Ok(alpha) |
|
} |
|
|
|
fn output_tables( |
|
b: &[u8], p: &mut usize, v: &mut Vec<u8>, table_len: &HuffmanTable, |
|
table_dst: &HuffmanTable, |
|
) -> Result<(), Err> { |
|
const LEN_BASE: [usize; 29] = [ |
|
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, |
|
67, 83, 99, 115, 131, 163, 195, 227, 258, |
|
]; |
|
|
|
const LEN_EXTRA_BITS: [usize; 29] = [ |
|
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, |
|
5, 5, 5, 5, 0, |
|
]; |
|
|
|
const DST_BASE: [usize; 30] = [ |
|
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, |
|
513, 769, 1025, 1537, 2049, 3073, 4097, 0x1801, 0x2001, 0x3001, 0x4001, |
|
0x6001, |
|
]; |
|
|
|
const DST_EXTRA_BITS: [usize; 30] = [ |
|
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, |
|
10, 11, 11, 12, 12, 13, 13, |
|
]; |
|
|
|
loop { |
|
let ty = table_len.decode(b, p)?; |
|
|
|
match ty.cmp(&256) { |
|
| Ordering::Less => { |
|
// direct byte |
|
v.push(ty as u8); |
|
} |
|
| Ordering::Equal => { |
|
// finished |
|
return Ok(()); |
|
} |
|
| Ordering::Greater => { |
|
// <len, dst> pair |
|
|
|
// decode the length (plus extra bits) |
|
let len = { |
|
let sym = usize::from(ty - 257); |
|
|
|
if sym > 29 { |
|
return Err(Err::Pair); |
|
} |
|
|
|
let bit = LEN_EXTRA_BITS[sym]; |
|
let len = read::ble_usz(b, *p, bit).ok_or(Err::Pair)?; |
|
|
|
*p += bit; |
|
|
|
LEN_BASE[sym] + len |
|
}; |
|
|
|
// decode the distance with its alphabet (plus extra bits) |
|
let dst = { |
|
let sym = usize::from(table_dst.decode(b, p)?); |
|
|
|
let bit = DST_EXTRA_BITS[sym]; |
|
let dst = read::ble_usz(b, *p, bit).ok_or(Err::Pair)?; |
|
|
|
*p += bit; |
|
|
|
DST_BASE[sym] + dst |
|
}; |
|
|
|
if dst > v.len() { |
|
return Err(Err::Distance); |
|
} |
|
|
|
// copy bytes from earlier |
|
for _ in 0..len { |
|
v.push(v[v.len() - dst]); |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
impl HuffmanTable { |
|
fn read(table: &[u16]) -> Result<Self, Err> { |
|
let mut syms = stkvec![0; table.len()]; |
|
let mut nums = [0; 16]; |
|
|
|
// count the number of symbols for each bit length |
|
for &length in table { |
|
nums[usize::from(length)] += 1; |
|
} |
|
|
|
if nums[0] == table.len() { |
|
return Err(Err::Table); |
|
} |
|
|
|
// make offsets into the symbol table for each bit count |
|
let mut ofs = [0; 16]; |
|
|
|
for i in 1..=14 { |
|
ofs[i + 1] = ofs[i] + nums[i]; |
|
} |
|
|
|
// make the actual bit pattern table |
|
for (&length, n) in table.iter().zip(0..table.len().cast()) { |
|
// length 0 means this code isn't used, so only try to make bit |
|
// patterns for codes that actually exist |
|
if length != 0 { |
|
// make sure to make each offset unique |
|
let offset = &mut ofs[usize::from(length)]; |
|
syms[*offset] = n; |
|
*offset += 1; |
|
} |
|
} |
|
|
|
Ok(Self { nums, syms }) |
|
} |
|
|
|
fn decode(&self, b: &[u8], p: &mut usize) -> Result<u16, Err> { |
|
let mut codes = 0; |
|
let mut first = 0; |
|
let mut index = 0; |
|
let mut i = 1; |
|
|
|
while i < 16 { |
|
// add bit from file |
|
codes |= read::ble_usz(b, *p, 1).ok_or(Err::Table)?; |
|
*p += 1; |
|
|
|
// check our symbol table for this one (quick tree check) |
|
let count = self.nums[i]; |
|
i += 1; |
|
|
|
if (codes as isize) - (count as isize) < (first as isize) { |
|
return Ok(self.syms[index + codes - first]); |
|
} |
|
|
|
// continue on, trying to find the correct sequence |
|
index += count; |
|
first += count; |
|
|
|
first <<= 1; |
|
codes <<= 1; |
|
} |
|
|
|
Err(Err::TableDecode) |
|
} |
|
} |
|
|
|
#[derive(Debug)] |
|
struct HuffmanTable { |
|
nums: [usize; 16], |
|
syms: StkVec<[u16; 288]>, |
|
} |
|
|
|
// EOF
|
|
|