427 lines
10 KiB
Rust
427 lines
10 KiB
Rust
//! DEFLATE loader.
|
|
|
|
use crate::{bin::check_data, bit::*, err::*};
|
|
use std::cmp::Ordering;
|
|
|
|
/// Loads a ZLIB file header.
|
|
pub fn load_zlib_header(b: &[u8]) -> ResultS<usize>
|
|
{
|
|
const CM: u8 = 0b0000_1111;
|
|
const CINFO: u8 = 0b1111_0000;
|
|
const FDICT: u8 = 0b0010_0000;
|
|
|
|
read_data! {
|
|
endian: BIG, buf: b, size: 2, start: 0, data {
|
|
let fcheck = u16[0];
|
|
let cmf = u8[0];
|
|
let flg = u8[1];
|
|
}
|
|
}
|
|
|
|
let cm = cmf & CM;
|
|
let cinfo = cmf & CINFO;
|
|
|
|
if cm != 8 {
|
|
bail!("unknown compression method");
|
|
}
|
|
|
|
if cinfo > 7 << 4 {
|
|
bail!("lz77 window size logarithm is invalid");
|
|
}
|
|
|
|
if fcheck % 31 != 0 {
|
|
bail!("invalid fcheck");
|
|
}
|
|
|
|
if flg & FDICT != 0 {
|
|
bail!("dictionary not supported");
|
|
}
|
|
|
|
Ok(2)
|
|
}
|
|
|
|
/// Loads a GZIP file header.
|
|
pub fn load_gzip_header(b: &[u8]) -> ResultS<usize>
|
|
{
|
|
const FHCRC: u8 = 1 << 1;
|
|
const FEXTRA: u8 = 1 << 2;
|
|
const FNAME: u8 = 1 << 3;
|
|
const FCOMMENT: u8 = 1 << 4;
|
|
const FRESERVED: u8 = 0xe0;
|
|
|
|
read_data! {
|
|
endian: LITTLE, buf: b, size: 10, start: 0, data {
|
|
let id = u16[0];
|
|
let cm = u8[2];
|
|
let fl = u8[3];
|
|
}
|
|
}
|
|
|
|
if id != 0x8b1f || cm != 8 {
|
|
bail!("not gzip format");
|
|
}
|
|
|
|
let mut p = 10;
|
|
|
|
if fl & FRESERVED != 0 {
|
|
bail!("reserved flags set");
|
|
}
|
|
|
|
if fl & FEXTRA != 0 {
|
|
read_data! {
|
|
endian: LITTLE, buf: b, size: 2, start: p, data {
|
|
let xlen = u16[0] usize;
|
|
}
|
|
}
|
|
|
|
p += 2 + xlen;
|
|
|
|
check_data(b, p)?;
|
|
}
|
|
|
|
if fl & FNAME != 0 {
|
|
p += skip_zero_terminated_item(&b[p..])?;
|
|
}
|
|
|
|
if fl & FCOMMENT != 0 {
|
|
p += skip_zero_terminated_item(&b[p..])?;
|
|
}
|
|
|
|
if fl & FHCRC != 0 {
|
|
p += 2;
|
|
|
|
check_data(b, p)?;
|
|
}
|
|
|
|
Ok(p)
|
|
}
|
|
|
|
fn skip_zero_terminated_item(b: &[u8]) -> ResultS<usize>
|
|
{
|
|
if let Some(i) = b.iter().position(|&n| n == 0) {
|
|
Ok(i + 1)
|
|
} else {
|
|
bail!("no end of zero terminated item");
|
|
}
|
|
}
|
|
|
|
/// Decompresses a DEFLATE compressed bitstream.
|
|
pub fn load_deflate(b: &[u8]) -> ResultS<(usize, Vec<u8>)>
|
|
{
|
|
let mut v = Vec::new();
|
|
let mut p = 0;
|
|
|
|
loop {
|
|
let bfinal = read_bits_l(b, p, 1)?;
|
|
p += 1;
|
|
let btype = read_bits_l(b, p, 2)?;
|
|
p += 2;
|
|
|
|
match btype {
|
|
0b10 => p = stream_dynamic(&mut v, b, p)?,
|
|
0b01 => p = stream_s_table(&mut v, b, p)?,
|
|
0b00 => p = stream_literal(&mut v, b, p)?,
|
|
_ => bail!("bad btype"),
|
|
}
|
|
|
|
if bfinal == 1 {
|
|
return Ok((p / 8, v));
|
|
}
|
|
}
|
|
}
|
|
|
|
fn stream_dynamic(v: &mut Vec<u8>, b: &[u8], mut p: usize) -> ResultS<usize>
|
|
{
|
|
const CODE_ORDERING: [usize; 19] =
|
|
[16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15];
|
|
|
|
// read header (number of literal alphabet codes, number of distance
|
|
// alphabet codes, and number of lengths for decoding the alphabet)
|
|
let hlit = read_bits_l(b, p, 5)?;
|
|
p += 5;
|
|
let hdist = read_bits_l(b, p, 5)?;
|
|
p += 5;
|
|
let hclen = read_bits_l(b, p, 4)?;
|
|
p += 4;
|
|
|
|
let hlit = 257 + hlit as usize;
|
|
let hdist = 1 + hdist as usize;
|
|
let hclen = 4 + hclen as usize;
|
|
|
|
// first, get the huffman coding for the alphabet (which is also compressed)
|
|
let mut code_table = [0; 19];
|
|
|
|
for i in 0..hclen {
|
|
let len = read_bits_l(b, p, 3)? as u16;
|
|
p += 3;
|
|
|
|
code_table[CODE_ORDERING[i]] = len;
|
|
}
|
|
|
|
// then, we decode the alphabet (doing both types at the same time, because
|
|
// they're encoded the same anyways)
|
|
let code_table = HuffmanTable::new(&code_table)?;
|
|
let mut alphabet = vec![0; hlit + hdist];
|
|
|
|
p = read_alphabet(b, p, &mut alphabet, code_table)?;
|
|
|
|
if alphabet[256] == 0 {
|
|
bail!("no way to end block");
|
|
}
|
|
|
|
// build the length and distance tables from this information
|
|
let table_len = HuffmanTable::new(&alphabet[0..hlit])?;
|
|
let table_dst = HuffmanTable::new(&alphabet[hlit..hlit + hdist])?;
|
|
|
|
output_tables(v, b, p, table_len, table_dst)
|
|
}
|
|
|
|
#[allow(clippy::needless_range_loop)]
|
|
fn stream_s_table(v: &mut Vec<u8>, b: &[u8], p: usize) -> ResultS<usize>
|
|
{
|
|
let mut len = [0; 288];
|
|
|
|
for i in 0..144 {len[i] = 8;}
|
|
for i in 144..256 {len[i] = 9;}
|
|
for i in 256..280 {len[i] = 7;}
|
|
for i in 280..288 {len[i] = 8;}
|
|
|
|
let dst = [5; 30];
|
|
|
|
let table_len = HuffmanTable::new(&len)?;
|
|
let table_dst = HuffmanTable::new(&dst)?;
|
|
|
|
output_tables(v, b, p, table_len, table_dst)
|
|
}
|
|
|
|
fn stream_literal(v: &mut Vec<u8>, b: &[u8], p: usize) -> ResultS<usize>
|
|
{
|
|
// copy data directly from byte boundary
|
|
let mut p = p / 8 + 1;
|
|
|
|
read_data! {
|
|
endian: LITTLE, buf: b, size: 4, start: p, data {
|
|
let len = u16[0] usize;
|
|
}
|
|
}
|
|
|
|
p += 4;
|
|
v.extend(ok!(b.get(p..p + len), "not enough data")?);
|
|
|
|
Ok((p + len) * 8)
|
|
}
|
|
|
|
fn read_alphabet(b: &[u8],
|
|
mut p: usize,
|
|
alphabet: &mut [u16],
|
|
code_table: HuffmanTable) -> ResultS<usize>
|
|
{
|
|
let mut i = 0;
|
|
while i < alphabet.len() {
|
|
let (bits, sym) = code_table.decode(b, p)?;
|
|
p += bits;
|
|
|
|
match sym {
|
|
0..=15 => {
|
|
// raw code
|
|
alphabet[i] = sym;
|
|
i += 1;
|
|
}
|
|
16 => {
|
|
// copy previous code 3-6 times
|
|
if i == 0 {
|
|
bail!("cannot copy on first alphabet code");
|
|
}
|
|
|
|
let len = usize::from(read_bits_l(b, p, 2)? as u8 + 3);
|
|
let lst = alphabet[i - 1];
|
|
p += 2;
|
|
|
|
for _ in 0..len {
|
|
alphabet[i] = lst;
|
|
i += 1;
|
|
}
|
|
}
|
|
17 => {
|
|
// repeat '0' 3-10 times
|
|
let len = usize::from(read_bits_l(b, p, 3)? as u8 + 3);
|
|
p += 3;
|
|
|
|
for _ in 0..len {
|
|
alphabet[i] = 0;
|
|
i += 1;
|
|
}
|
|
}
|
|
18 => {
|
|
// repeat '0' 11-138 times
|
|
let len = usize::from(read_bits_l(b, p, 7)? as u8 + 11);
|
|
p += 7;
|
|
|
|
for _ in 0..len {
|
|
alphabet[i] = 0;
|
|
i += 1;
|
|
}
|
|
}
|
|
_ => {
|
|
bail!("bad symbol in alphabet");
|
|
}
|
|
}
|
|
|
|
if i > alphabet.len() {
|
|
bail!("too many codes");
|
|
}
|
|
}
|
|
|
|
Ok(p)
|
|
}
|
|
|
|
fn output_tables(v: &mut Vec<u8>,
|
|
b: &[u8],
|
|
mut p: usize,
|
|
table_len: HuffmanTable,
|
|
table_dst: HuffmanTable) -> ResultS<usize>
|
|
{
|
|
const LEN_BASE: [usize; 29] = [3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19,
|
|
23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115,
|
|
131, 163, 195, 227, 258];
|
|
|
|
const LEN_EXTRA_BITS: [u8; 29] = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2,
|
|
2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
|
|
0];
|
|
|
|
const DST_BASE: [usize; 30] = [1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65,
|
|
97, 129, 193, 257, 385, 513, 769, 1025,
|
|
1537, 2049, 3073, 4097, 0x1801, 0x2001,
|
|
0x3001, 0x4001, 0x6001];
|
|
|
|
const DST_EXTRA_BITS: [u8; 30] = [0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5,
|
|
6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
|
|
12, 12, 13, 13];
|
|
|
|
loop {
|
|
let (bits, sym) = table_len.decode(b, p)?;
|
|
p += bits;
|
|
|
|
match sym.cmp(&256) {
|
|
Ordering::Less => {
|
|
// direct byte
|
|
v.push(sym as u8);
|
|
}
|
|
Ordering::Equal => {
|
|
return Ok(p);
|
|
}
|
|
Ordering::Greater => {
|
|
// this is a <len, dst> pair
|
|
let sym = sym - 257;
|
|
|
|
if sym > 29 {
|
|
bail!("invalid fixed code");
|
|
}
|
|
|
|
let sym = usize::from(sym);
|
|
|
|
// first get the actual length and any extra bits it may have
|
|
let bits = LEN_EXTRA_BITS[sym];
|
|
let leng = LEN_BASE[sym] + read_bits_l(b, p, bits)? as usize;
|
|
p += usize::from(bits);
|
|
|
|
// decode the distance with its alphabet
|
|
let (bits, sym) = table_dst.decode(b, p)?;
|
|
p += bits;
|
|
|
|
let sym = usize::from(sym);
|
|
|
|
// get the actual distance and any extra bits it may have
|
|
let bits = DST_EXTRA_BITS[sym];
|
|
let dist = DST_BASE[sym] + read_bits_l(b, p, bits)? as usize;
|
|
p += usize::from(bits);
|
|
|
|
if dist > v.len() {
|
|
bail!("bad distance");
|
|
}
|
|
|
|
// copy bytes from earlier
|
|
for _ in 0..leng {
|
|
v.push(v[v.len() - dist]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl HuffmanTable
|
|
{
|
|
fn new(table: &[u16]) -> ResultS<Self>
|
|
{
|
|
let mut syms = vec![0; table.len()];
|
|
let mut nums = [0; 16];
|
|
|
|
// count the number of symbols for each bit length
|
|
for &length in table {
|
|
nums[usize::from(length)] += 1;
|
|
}
|
|
|
|
if usize::from(nums[0]) == table.len() {
|
|
bail!("bad table lengths");
|
|
}
|
|
|
|
// make offsets into the symbol table for each bit count
|
|
let mut ofs = [0; 16];
|
|
|
|
for i in 1..=14 {
|
|
ofs[i + 1] = ofs[i] + usize::from(nums[i]);
|
|
}
|
|
|
|
// make the actual bit pattern table
|
|
for (n, &length) in table.iter().enumerate() {
|
|
// length 0 means this code isn't used, so only try to make bit
|
|
// patterns for codes that actually exist
|
|
if length != 0 {
|
|
// make sure to make each offset unique
|
|
let offset = &mut ofs[usize::from(length)];
|
|
syms[*offset] = n as u16;
|
|
*offset += 1;
|
|
}
|
|
}
|
|
|
|
Ok(Self{nums, syms})
|
|
}
|
|
|
|
fn decode(&self, b: &[u8], mut p: usize) -> ResultS<(usize, u16)>
|
|
{
|
|
let mut code = 0_u16;
|
|
let mut first = 0_u16;
|
|
let mut index = 0_u16;
|
|
|
|
for i in 1..=15 {
|
|
// add bit from file
|
|
code |= read_bits_l(b, p, 1)? as u16;
|
|
p += 1;
|
|
|
|
// check our symbol table for this one (quick tree check)
|
|
let count = self.nums[i];
|
|
|
|
if i32::from(code) - i32::from(count) < i32::from(first) {
|
|
return Ok((i, self.syms[usize::from(index + code - first)]));
|
|
}
|
|
|
|
// continue on, trying to find the correct sequence
|
|
index += count;
|
|
first += count;
|
|
|
|
first <<= 1;
|
|
code <<= 1;
|
|
}
|
|
|
|
Err(repr_error(code))
|
|
}
|
|
}
|
|
|
|
struct HuffmanTable
|
|
{
|
|
nums: [u16; 16],
|
|
syms: Vec<u16>,
|
|
}
|
|
|
|
// EOF
|