Maraiah/source/marathon/defl.rs

407 lines
9.8 KiB
Rust

//! DEFLATE loader.
use crate::durandal::{bit::*, err::*};
use std::cmp::Ordering;
/// Loads a ZLIB file header.
pub fn load_zlib_header(b: &[u8]) -> ResultS<usize>
{
const CM: u8 = 0b0000_1111;
const CINFO: u8 = 0b1111_0000;
const FDICT: u8 = 0b0010_0000;
read_data! {
2, BE in b =>
fcheck = u16[0];
cmf = u8[0];
flg = u8[1];
}
let cm = cmf & CM;
let cinfo = cmf & CINFO >> 4;
if cm != 8 || fcheck % 31 != 0 || cinfo > 7 {
bail!("not zlib format");
}
if flg & FDICT != 0 {
bail!("dictionary not supported");
}
Ok(2)
}
/// Loads a GZIP file header.
pub fn load_gzip_header(b: &[u8]) -> ResultS<usize>
{
const FHCRC: u8 = 1 << 1;
const FEXTRA: u8 = 1 << 2;
const FNAME: u8 = 1 << 3;
const FCOMMENT: u8 = 1 << 4;
const FRESERVED: u8 = 0xE0;
read_data! {
10, LE in b =>
id = u16[0];
cm = u8[2];
fl = u8[3];
}
if id != 0x8B1F || cm != 8 {
bail!("not gzip format");
}
let mut p = 10;
if fl & FRESERVED != 0 {
bail!("reserved flags set");
}
if fl & FEXTRA != 0 {
read_data!(p + 2, LE in b => xlen = u16[p] usize;);
check_data!(p + 2 + xlen, b);
p += xlen;
}
if fl & FNAME != 0 {
p += skip_zero_terminated_item(&b[p..])?;
}
if fl & FCOMMENT != 0 {
p += skip_zero_terminated_item(&b[p..])?;
}
if fl & FHCRC != 0 {
check_data!(p + 2, b);
p += 2;
}
Ok(p)
}
fn skip_zero_terminated_item(b: &[u8]) -> ResultS<usize>
{
if let Some(i) = b.iter().position(|&n| n == 0) {
Ok(i + 1)
} else {
bail!("no end of zero terminated item");
}
}
/// Decompresses a DEFLATE compressed bitstream.
pub fn load_deflate(b: &[u8]) -> ResultS<(usize, Vec<u8>)>
{
let mut v = Vec::new();
let p = stream_deflate(&mut v, b, 0)?;
Ok((p / 8, v))
}
fn stream_deflate(v: &mut Vec<u8>, b: &[u8], mut p: usize) -> ResultS<usize>
{
let bfinal = read_bits_l(b, p, 1)?; p += 1;
let btype = read_bits_l(b, p, 2)?; p += 2;
let p = match btype {
0b10 => stream_dynamic(v, b, p)?,
0b01 => stream_s_table(v, b, p)?,
0b00 => stream_literal(v, b, p)?,
_ => bail!("bad btype"),
};
if bfinal == 0 {
stream_deflate(v, b, p)
} else {
Ok(p)
}
}
fn stream_dynamic(v: &mut Vec<u8>, b: &[u8], mut p: usize) -> ResultS<usize>
{
const CODE_ORDERING: [usize; NUM_CODES] = [
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
];
const NUM_CODES: usize = 19;
const MAX_LIT_CODES: usize = 286;
const MAX_DST_CODES: usize = 30;
// read header (number of literal alphabet codes, number of distance
// alphabet codes, and number of lengths for decoding the alphabet)
let hlit = read_bits_l(b, p, 5)?; p += 5;
let hdist = read_bits_l(b, p, 5)?; p += 5;
let hclen = read_bits_l(b, p, 4)?; p += 4;
let hlit = 257 + hlit as usize;
let hdist = 1 + hdist as usize;
let hclen = 4 + hclen as usize;
let alphabet_total = hlit + hdist;
// first, get the huffman coding for the alphabet (which is also compressed)
let mut code_lengths = [0; NUM_CODES];
for i in 0..hclen {
let len = read_bits_l(b, p, 3)? as u16;
p += 3;
code_lengths[CODE_ORDERING[i]] = len;
}
let code_lengths = HuffmanTable::new(&code_lengths)?;
let mut alphabet = [0; MAX_LIT_CODES + MAX_DST_CODES];
// then, we decode the alphabet (doing both types at the same time, because
// they're encoded the same anyways)
let mut i = 0;
while i < alphabet_total {
let (bits, sym) = code_lengths.decode(b, p)?;
p += bits;
match sym {
0..=15 => {
// raw code
alphabet[i] = sym;
i += 1;
}
16 => {
// copy previous code 3-6 times
if i == 0 {bail!("cannot copy on first alphabet code");}
let len = usize::from(read_bits_l(b, p, 2)? as u8 + 3);
let lst = alphabet[i - 1];
p += 2;
for _ in 0..len {alphabet[i] = lst; i += 1;}
}
17 => {
// repeat '0' 3-10 times
let len = usize::from(read_bits_l(b, p, 3)? as u8 + 3);
p += 3;
for _ in 0..len {alphabet[i] = 0; i += 1;}
}
18 => {
// repeat '0' 11-138 times
let len = usize::from(read_bits_l(b, p, 7)? as u8 + 11);
p += 7;
for _ in 0..len {alphabet[i] = 0; i += 1;}
}
_ => {
bail!("bad symbol in alphabet");
}
}
if i > alphabet_total {
bail!("too many codes");
}
}
if alphabet[256] == 0 {
bail!("no way to end block");
}
let len_sta = 0;
let len_end = hlit;
let dst_sta = len_end;
let dst_end = dst_sta + hdist;
// build the length and distance tables from this information
let table_len = HuffmanTable::new(&alphabet[len_sta..len_end])?;
let table_dst = HuffmanTable::new(&alphabet[dst_sta..dst_end])?;
output_tables(v, b, p, table_len, table_dst)
}
fn stream_s_table(v: &mut Vec<u8>, b: &[u8], p: usize) -> ResultS<usize>
{
let mut len = [0; 288];
for len in len.iter_mut().take(144) {*len = 8;}
for len in len.iter_mut().take(256).skip(144) {*len = 9;}
for len in len.iter_mut().take(280).skip(256) {*len = 7;}
for len in len.iter_mut().take(280).skip(288) {*len = 8;}
let dst = [5; 30];
let table_len = HuffmanTable::new(&len)?;
let table_dst = HuffmanTable::new(&dst)?;
output_tables(v, b, p, table_len, table_dst)
}
fn stream_literal(v: &mut Vec<u8>, b: &[u8], p: usize) -> ResultS<usize>
{
// copy data directly from byte boundary
let mut p = p / 8 + 1;
read_data! {
p + 4, LE in b =>
len = u16[p] usize;
}
p += 4;
v.extend_from_slice(ok!(b.get(p..p + len), "not enough data")?);
Ok((p + len) * 8)
}
fn output_tables(v: &mut Vec<u8>,
b: &[u8],
mut p: usize,
table_len: HuffmanTable,
table_dst: HuffmanTable)
-> ResultS<usize>
{
const LEN_BASE: [usize; 29] = [
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
67, 83, 99, 115, 131, 163, 195, 227, 258
];
const LEN_EXTRA_BITS: [u8; 29] = [
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4,
5, 5, 5, 5, 0
];
const DST_BASE: [usize; 30] = [
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
769, 1025, 1537, 2049, 3073, 4097, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001
];
const DST_EXTRA_BITS: [u8; 30] = [
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10,
11, 11, 12, 12, 13, 13
];
let (bits, sym) = table_len.decode(b, p)?;
p += bits;
match sym.cmp(&256) {
Ordering::Less => {
// direct byte
v.push(sym as u8);
output_tables(v, b, p, table_len, table_dst)
}
Ordering::Equal => {
Ok(p)
}
Ordering::Greater => {
// this is a <len, dst> pair
let sym = sym - 257;
if sym > 29 {
bail!("invalid fixed code");
}
let sym = usize::from(sym);
// first get the actual length and any extra bits it may have
let bits = LEN_EXTRA_BITS[sym];
let leng = LEN_BASE[sym] + read_bits_l(b, p, bits)? as usize;
p += usize::from(bits);
// decode the distance with its alphabet
let (bits, sym) = table_dst.decode(b, p)?;
p += bits;
let sym = usize::from(sym);
// get the actual distance and any extra bits it may have
let bits = DST_EXTRA_BITS[sym];
let dist = DST_BASE[sym] + read_bits_l(b, p, bits)? as usize;
p += usize::from(bits);
if dist > v.len() {
bail!("bad distance");
}
// copy bytes from earlier
for _ in 0..leng {
v.push(v[v.len() - dist]);
}
output_tables(v, b, p, table_len, table_dst)
}
}
}
impl HuffmanTable
{
fn new(table: &[u16]) -> ResultS<Self>
{
let mut syms = vec![0; table.len()];
let mut nums = [0; 16];
// count the number of symbols for each bit length
for &length in table {
nums[usize::from(length)] += 1;
}
if usize::from(nums[0]) == table.len() {
bail!("bad table lengths");
}
// make offsets into the symbol table for each bit count
let mut ofs = [0; 16];
for i in 1..=14 {
ofs[i + 1] = ofs[i] + usize::from(nums[i]);
}
// make the actual bit pattern table
for (n, &length) in table.iter().enumerate() {
// length 0 means this code isn't used, so only try to make bit
// patterns for codes that actually exist
if length != 0 {
// make sure to make each offset unique
let offset = &mut ofs[usize::from(length)];
syms[*offset] = n as u16;
*offset += 1;
}
}
Ok(Self{nums, syms})
}
fn decode(&self, b: &[u8], mut p: usize) -> ResultS<(usize, u16)>
{
let mut code = 0_u16;
let mut first = 0_u16;
let mut index = 0_u16;
for i in 1..=15 {
// add bit from file
code |= read_bits_l(b, p, 1)? as u16;
p += 1;
// check our symbol table for this one (quick tree check)
let count = u16::from(self.nums[i]);
if i32::from(code) - i32::from(count) < i32::from(first) {
return Ok((i, self.syms[usize::from(index + code - first)]));
}
// continue on, trying to find the correct sequence
index += count;
first += count;
first <<= 1;
code <<= 1;
}
Err(err_msg("code not found in symbol tree"))
}
}
struct HuffmanTable
{
nums: [u8; 16],
syms: Vec<u16>,
}
// EOF