You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

413 lines
9.2 KiB

//! DEFLATE loader.
use crate::{
data::{bit::ReadBits, read},
ffi,
};
use std::cmp::Ordering;
#[derive(Error, Debug)]
pub enum Err {
#[error("Bad stream block type")]
BlockType,
#[error("Not enough bits for block")]
BlockBits,
#[error("Not enough bits for 0-repeat")]
Bits0,
#[error("Not enough bits for code-repeat")]
BitsCode,
#[error("Invalid alphabet index")]
AlphaIndex,
#[error("Couldn't decode from table")]
TableDecode,
#[error("Bad distance in pair")]
Distance,
#[error("Couldn't read header for dynamic stream")]
DynStream,
#[error("Couldn't decode length/distance pair")]
Pair,
#[error("Not enough bytes for literal")]
Literal,
#[error("Bad symbol in alphabet")]
Symbol,
#[error("Couldn't read table")]
Table,
}
/// Loads a zlib file header.
///
/// Returns `None` if the header does not match the specification or
/// is unsupported.
pub fn read_zlib(b: &[u8]) -> Option<&[u8]> {
const METHOD: u8 = 0b0000_1111;
const WINDOW: u8 = 0b1111_0000;
const FDICTI: u8 = 0b0010_0000;
if b.len() < 2 {
return None;
}
let cmflag = b[0];
let flagfl = b[1];
let fcheck = u16::from_be_bytes([cmflag, flagfl]);
let method = cmflag & METHOD;
let window = cmflag & WINDOW;
if method == 8
&& window <= 7 << 4
&& fcheck % 31 == 0
&& flagfl & FDICTI == 0
{
Some(&b[2..])
} else {
None
}
}
/// Loads a gzip file header.
///
/// Returns `None` if the header does not match the specification or
/// is unsupported.
pub fn read_gzip(b: &[u8]) -> Option<&[u8]> {
const FCRC: u8 = 1 << 1;
const FEXT: u8 = 1 << 2;
const FNAM: u8 = 1 << 3;
const FCOM: u8 = 1 << 4;
if b.len() >= 10 && b[0..3] == [0x1f, 0x8b, 8] && b[3] & 0xE0 == 0 {
let fl = b[3];
let b = b.get(10..)?;
let b = if fl & FEXT == 0 {
b
} else {
b.get(read::u16le_sz(b.get(0..2)?, 0) + 2..)?
};
let b =
if fl & FNAM == 0 { b } else { b.get(ffi::end_of_cstr(b) + 1..)? };
let b =
if fl & FCOM == 0 { b } else { b.get(ffi::end_of_cstr(b) + 1..)? };
let b = if fl & FCRC == 0 { b } else { b.get(2..)? };
Some(b)
} else {
None
}
}
/// Decompresses a DEFLATE compressed bitstream.
///
/// # Errors
///
/// Returns `Err` if the bit-stream fails to parse.
pub fn read_defl(b: &[u8], p: &mut usize) -> Result<Vec<u8>, Err> {
let mut v = Vec::new();
loop {
let bfinal = u8::read_bits_le(b, *p, 1).ok_or(Err::BlockBits)?;
let bstype = u8::read_bits_le(b, *p + 1, 2).ok_or(Err::BlockBits)?;
*p += 3;
match bstype {
| 0b10 => st_dynamic(b, p, &mut v)?,
| 0b01 => st_s_table(b, p, &mut v)?,
| 0b00 => st_literal(b, p, &mut v)?,
| _ => return Err(Err::BlockType),
}
if bfinal == 1 {
return Ok(v);
}
}
}
fn st_dynamic(b: &[u8], p: &mut usize, v: &mut Vec<u8>) -> Result<(), Err> {
const CODE_ORDERING: [usize; 19] =
[16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15];
const NHLITS: usize = 257;
const NHDIST: usize = 1;
const NHCLEN: usize = 4;
// read the header: number of literal alphabet codes, number of
// distance alphabet codes, and number of lengths for decoding the
// alphabet
let hlits = usize::read_bits_le(b, *p, 5).ok_or(Err::DynStream)?;
let hdist = usize::read_bits_le(b, *p + 5, 5).ok_or(Err::DynStream)?;
let hclen = usize::read_bits_le(b, *p + 10, 4).ok_or(Err::DynStream)?;
*p += 14;
let hlits = NHLITS + hlits;
let hdist = NHDIST + hdist;
let hclen = NHCLEN + hclen;
// first, get the huffman coding for the alphabet, which is also
// compressed
let mut table = [0; 19];
for &i in CODE_ORDERING.iter().take(hclen) {
table[i] = u16::read_bits_le(b, *p, 3).ok_or(Err::DynStream)?;
*p += 3;
}
// then, we decode the alphabet, doing both types at the same time,
// because they're encoded the same anyways
let table = HuffmanTable::read(&table)?;
let alpha = read_alphabet(b, p, hlits + hdist, &table)?;
if alpha[256] == 0 {
Err(Err::DynStream)
} else {
// build the length and distance tables from this information
let table_len = HuffmanTable::read(&alpha[0..hlits])?;
let table_dst = HuffmanTable::read(&alpha[hlits..hlits + hdist])?;
output_tables(b, p, v, &table_len, &table_dst)
}
}
fn st_s_table(b: &[u8], p: &mut usize, v: &mut Vec<u8>) -> Result<(), Err> {
let mut len = [0; 288];
for (i, len) in len.iter_mut().enumerate() {
*len = match i {
| 144..=255 => 9,
| 256..=279 => 7,
| _ => 8,
};
}
let dst = [5; 30];
let table_len = HuffmanTable::read(&len)?;
let table_dst = HuffmanTable::read(&dst)?;
output_tables(b, p, v, &table_len, &table_dst)
}
fn st_literal(b: &[u8], p: &mut usize, v: &mut Vec<u8>) -> Result<(), Err> {
// copy data directly from byte boundary
let bound = *p / 8 + 1;
let b = &b[bound..];
let len = read::u16le_sz(b.get(0..2).ok_or(Err::Literal)?, 0);
let b = &b[4..];
*p += bound * 8 + 4 * 8 + len * 8;
if len > b.len() {
Err(Err::Literal)
} else {
v.extend(&b[..len]);
Ok(())
}
}
fn next_alpha(alpha: &mut [u16], i: &mut usize, n: u16) -> Result<(), Err> {
*alpha.get_mut(*i).ok_or(Err::AlphaIndex)? = n;
*i += 1;
Ok(())
}
fn read_alphabet(
b: &[u8], p: &mut usize, n: usize, table: &HuffmanTable,
) -> Result<Vec<u16>, Err> {
let mut alpha = vec![0; n];
let mut i = 0;
while i < alpha.len() {
let ty = table.decode(b, p)?;
match ty {
| 0..=15 => {
// raw code
next_alpha(&mut alpha, &mut i, ty)?;
}
| 16 => {
// copy previous code 3-6 times
let lst = *alpha.get(i - 1).ok_or(Err::BitsCode)?;
let len = u8::read_bits_le(b, *p, 2).ok_or(Err::BitsCode)? + 3;
*p += 2;
for _ in 0..len {
next_alpha(&mut alpha, &mut i, lst)?;
}
}
| 17 => {
// repeat '0' 3-10 times
let len = u8::read_bits_le(b, *p, 3).ok_or(Err::Bits0)? + 3;
*p += 3;
for _ in 0..len {
next_alpha(&mut alpha, &mut i, 0)?;
}
}
| 18 => {
// repeat '0' 11-138 times
let len = u8::read_bits_le(b, *p, 7).ok_or(Err::Bits0)? + 11;
*p += 7;
for _ in 0..len {
next_alpha(&mut alpha, &mut i, 0)?;
}
}
| _ => {
return Err(Err::Symbol);
}
}
}
Ok(alpha)
}
fn output_tables(
b: &[u8], p: &mut usize, v: &mut Vec<u8>, table_len: &HuffmanTable,
table_dst: &HuffmanTable,
) -> Result<(), Err> {
const LEN_BASE: [usize; 29] = [
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
67, 83, 99, 115, 131, 163, 195, 227, 258,
];
const LEN_EXTRA_BITS: [usize; 29] = [
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4,
5, 5, 5, 5, 0,
];
const DST_BASE: [usize; 30] = [
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385,
513, 769, 1025, 1537, 2049, 3073, 4097, 0x1801, 0x2001, 0x3001, 0x4001,
0x6001,
];
const DST_EXTRA_BITS: [usize; 30] = [
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
10, 11, 11, 12, 12, 13, 13,
];
loop {
let ty = table_len.decode(b, p)?;
match ty.cmp(&256) {
| Ordering::Less => {
// direct byte
v.push(ty as u8);
}
| Ordering::Equal => {
// finished
return Ok(());
}
| Ordering::Greater => {
// <len, dst> pair
// decode the length (plus extra bits)
let len = {
let sym = usize::from(ty - 257);
if sym > 29 {
return Err(Err::Pair);
}
let bit = LEN_EXTRA_BITS[sym];
let len = usize::read_bits_le(b, *p, bit).ok_or(Err::Pair)?;
*p += bit;
LEN_BASE[sym] + len
};
// decode the distance with its alphabet (plus extra bits)
let dst = {
let sym = usize::from(table_dst.decode(b, p)?);
let bit = DST_EXTRA_BITS[sym];
let dst = usize::read_bits_le(b, *p, bit).ok_or(Err::Pair)?;
*p += bit;
DST_BASE[sym] + dst
};
if dst > v.len() {
return Err(Err::Distance);
}
// copy bytes from earlier
for _ in 0..len {
v.push(v[v.len() - dst]);
}
}
}
}
}
impl HuffmanTable {
fn read(table: &[u16]) -> Result<Self, Err> {
let mut syms = vec![0; table.len()];
let mut nums = [0; 16];
// count the number of symbols for each bit length
for &length in table {
nums[usize::from(length)] += 1;
}
if nums[0] == table.len() {
return Err(Err::Table);
}
// make offsets into the symbol table for each bit count
let mut ofs = [0; 16];
for i in 1..=14 {
ofs[i + 1] = ofs[i] + nums[i];
}
// make the actual bit pattern table
for (n, &length) in table.iter().enumerate() {
// length 0 means this code isn't used, so only try to make bit
// patterns for codes that actually exist
if length != 0 {
// make sure to make each offset unique
let offset = &mut ofs[usize::from(length)];
syms[*offset] = n as u16;
*offset += 1;
}
}
Ok(Self { nums, syms })
}
fn decode(&self, b: &[u8], p: &mut usize) -> Result<u16, Err> {
let mut codes = 0;
let mut first = 0;
let mut index = 0;
for i in 1..=15 {
// add bit from file
codes |= usize::read_bits_le(b, *p, 1).ok_or(Err::Table)?;
*p += 1;
// check our symbol table for this one (quick tree check)
let count = self.nums[i];
if (codes as isize) - (count as isize) < (first as isize) {
return Ok(self.syms[index + codes - first]);
}
// continue on, trying to find the correct sequence
index += count;
first += count;
first <<= 1;
codes <<= 1;
}
Err(Err::TableDecode)
}
}
#[derive(Debug)]
struct HuffmanTable {
nums: [usize; 16],
syms: Vec<u16>,
}
// EOF