//! DEFLATE loader. use crate::durandal::{bit::*, err::*}; use std::cmp::Ordering; /// Loads a ZLIB file header. pub fn load_zlib_header(b: &[u8]) -> ResultS { const CM: u8 = 0b0000_1111; const CINFO: u8 = 0b1111_0000; const FDICT: u8 = 0b0010_0000; read_data! { 2, BE in b => fcheck = u16[0]; cmf = u8[0]; flg = u8[1]; } let cm = cmf & CM; let cinfo = cmf & CINFO >> 4; if cm != 8 || fcheck % 31 != 0 || cinfo > 7 { bail!("not zlib format"); } if flg & FDICT != 0 { bail!("dictionary not supported"); } Ok(2) } /// Loads a GZIP file header. pub fn load_gzip_header(b: &[u8]) -> ResultS { const FHCRC: u8 = 1 << 1; const FEXTRA: u8 = 1 << 2; const FNAME: u8 = 1 << 3; const FCOMMENT: u8 = 1 << 4; const FRESERVED: u8 = 0xE0; read_data! { 10, LE in b => id = u16[0]; cm = u8[2]; fl = u8[3]; } if id != 0x8B1F || cm != 8 { bail!("not gzip format"); } let mut p = 10; if fl & FRESERVED != 0 { bail!("reserved flags set"); } if fl & FEXTRA != 0 { read_data!(p + 2, LE in b => xlen = u16[p] usize;); check_data!(p + 2 + xlen, b); p += xlen; } if fl & FNAME != 0 { p += skip_zero_terminated_item(&b[p..])?; } if fl & FCOMMENT != 0 { p += skip_zero_terminated_item(&b[p..])?; } if fl & FHCRC != 0 { check_data!(p + 2, b); p += 2; } Ok(p) } fn skip_zero_terminated_item(b: &[u8]) -> ResultS { if let Some(i) = b.iter().position(|&n| n == 0) { Ok(i + 1) } else { bail!("no end of zero terminated item"); } } /// Decompresses a DEFLATE compressed bitstream. pub fn load_deflate(b: &[u8]) -> ResultS<(usize, Vec)> { let mut v = Vec::new(); let p = stream_deflate(&mut v, b, 0)?; Ok((p / 8, v)) } fn stream_deflate(v: &mut Vec, b: &[u8], mut p: usize) -> ResultS { let bfinal = read_bits_l(b, p, 1)?; p += 1; let btype = read_bits_l(b, p, 2)?; p += 2; let p = match btype { 0b10 => stream_dynamic(v, b, p)?, 0b01 => stream_s_table(v, b, p)?, 0b00 => stream_literal(v, b, p)?, _ => bail!("bad btype"), }; if bfinal == 0 { stream_deflate(v, b, p) } else { Ok(p) } } fn stream_dynamic(v: &mut Vec, b: &[u8], mut p: usize) -> ResultS { const CODE_ORDERING: [usize; NUM_CODES] = [ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 ]; const NUM_CODES: usize = 19; const MAX_LIT_CODES: usize = 286; const MAX_DST_CODES: usize = 30; // read header (number of literal alphabet codes, number of distance // alphabet codes, and number of lengths for decoding the alphabet) let hlit = read_bits_l(b, p, 5)?; p += 5; let hdist = read_bits_l(b, p, 5)?; p += 5; let hclen = read_bits_l(b, p, 4)?; p += 4; let hlit = 257 + hlit as usize; let hdist = 1 + hdist as usize; let hclen = 4 + hclen as usize; let alphabet_total = hlit + hdist; // first, get the huffman coding for the alphabet (which is also compressed) let mut code_lengths = [0; NUM_CODES]; for i in 0..hclen { let len = read_bits_l(b, p, 3)? as u16; p += 3; code_lengths[CODE_ORDERING[i]] = len; } let code_lengths = HuffmanTable::new(&code_lengths)?; let mut alphabet = [0; MAX_LIT_CODES + MAX_DST_CODES]; // then, we decode the alphabet (doing both types at the same time, because // they're encoded the same anyways) let mut i = 0; while i < alphabet_total { let (bits, sym) = code_lengths.decode(b, p)?; p += bits; match sym { 0..=15 => { // raw code alphabet[i] = sym; i += 1; } 16 => { // copy previous code 3-6 times if i == 0 {bail!("cannot copy on first alphabet code");} let len = usize::from(read_bits_l(b, p, 2)? as u8 + 3); let lst = alphabet[i - 1]; p += 2; for _ in 0..len {alphabet[i] = lst; i += 1;} } 17 => { // repeat '0' 3-10 times let len = usize::from(read_bits_l(b, p, 3)? as u8 + 3); p += 3; for _ in 0..len {alphabet[i] = 0; i += 1;} } 18 => { // repeat '0' 11-138 times let len = usize::from(read_bits_l(b, p, 7)? as u8 + 11); p += 7; for _ in 0..len {alphabet[i] = 0; i += 1;} } _ => { bail!("bad symbol in alphabet"); } } if i > alphabet_total { bail!("too many codes"); } } if alphabet[256] == 0 { bail!("no way to end block"); } let len_sta = 0; let len_end = hlit; let dst_sta = len_end; let dst_end = dst_sta + hdist; // build the length and distance tables from this information let table_len = HuffmanTable::new(&alphabet[len_sta..len_end])?; let table_dst = HuffmanTable::new(&alphabet[dst_sta..dst_end])?; output_tables(v, b, p, table_len, table_dst) } fn stream_s_table(v: &mut Vec, b: &[u8], p: usize) -> ResultS { let mut len = [0; 288]; for len in len.iter_mut().take(144) {*len = 8;} for len in len.iter_mut().take(256).skip(144) {*len = 9;} for len in len.iter_mut().take(280).skip(256) {*len = 7;} for len in len.iter_mut().take(280).skip(288) {*len = 8;} let dst = [5; 30]; let table_len = HuffmanTable::new(&len)?; let table_dst = HuffmanTable::new(&dst)?; output_tables(v, b, p, table_len, table_dst) } fn stream_literal(v: &mut Vec, b: &[u8], p: usize) -> ResultS { // copy data directly from byte boundary let mut p = p / 8 + 1; read_data! { p + 4, LE in b => len = u16[p] usize; } p += 4; v.extend_from_slice(ok!(b.get(p..p + len), "not enough data")?); Ok((p + len) * 8) } fn output_tables(v: &mut Vec, b: &[u8], mut p: usize, table_len: HuffmanTable, table_dst: HuffmanTable) -> ResultS { const LEN_BASE: [usize; 29] = [ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258 ]; const LEN_EXTRA_BITS: [u8; 29] = [ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 ]; const DST_BASE: [usize; 30] = [ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001 ]; const DST_EXTRA_BITS: [u8; 30] = [ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 ]; let (bits, sym) = table_len.decode(b, p)?; p += bits; match sym.cmp(&256) { Ordering::Less => { // direct byte v.push(sym as u8); output_tables(v, b, p, table_len, table_dst) } Ordering::Equal => { Ok(p) } Ordering::Greater => { // this is a pair let sym = sym - 257; if sym > 29 { bail!("invalid fixed code"); } let sym = usize::from(sym); // first get the actual length and any extra bits it may have let bits = LEN_EXTRA_BITS[sym]; let leng = LEN_BASE[sym] + read_bits_l(b, p, bits)? as usize; p += usize::from(bits); // decode the distance with its alphabet let (bits, sym) = table_dst.decode(b, p)?; p += bits; let sym = usize::from(sym); // get the actual distance and any extra bits it may have let bits = DST_EXTRA_BITS[sym]; let dist = DST_BASE[sym] + read_bits_l(b, p, bits)? as usize; p += usize::from(bits); if dist > v.len() { bail!("bad distance"); } // copy bytes from earlier for _ in 0..leng { v.push(v[v.len() - dist]); } output_tables(v, b, p, table_len, table_dst) } } } impl HuffmanTable { fn new(table: &[u16]) -> ResultS { let mut syms = vec![0; table.len()]; let mut nums = [0; 16]; // count the number of symbols for each bit length for &length in table { nums[usize::from(length)] += 1; } if usize::from(nums[0]) == table.len() { bail!("bad table lengths"); } // make offsets into the symbol table for each bit count let mut ofs = [0; 16]; for i in 1..=14 { ofs[i + 1] = ofs[i] + usize::from(nums[i]); } // make the actual bit pattern table for (n, &length) in table.iter().enumerate() { // length 0 means this code isn't used, so only try to make bit // patterns for codes that actually exist if length != 0 { // make sure to make each offset unique let offset = &mut ofs[usize::from(length)]; syms[*offset] = n as u16; *offset += 1; } } Ok(Self{nums, syms}) } fn decode(&self, b: &[u8], mut p: usize) -> ResultS<(usize, u16)> { let mut code = 0_u16; let mut first = 0_u16; let mut index = 0_u16; for i in 1..=15 { // add bit from file code |= read_bits_l(b, p, 1)? as u16; p += 1; // check our symbol table for this one (quick tree check) let count = u16::from(self.nums[i]); if i32::from(code) - i32::from(count) < i32::from(first) { return Ok((i, self.syms[usize::from(index + code - first)])); } // continue on, trying to find the correct sequence index += count; first += count; first <<= 1; code <<= 1; } Err(err_msg("code not found in symbol tree")) } } struct HuffmanTable { nums: [u8; 16], syms: Vec, } // EOF