diff --git a/source/marathon/defl.rs b/source/marathon/defl.rs index b9f4ace..72b9404 100644 --- a/source/marathon/defl.rs +++ b/source/marathon/defl.rs @@ -94,41 +94,31 @@ fn skip_zero_terminated_item(b: &[u8]) -> ResultS pub fn load_deflate(b: &[u8]) -> ResultS<(usize, Vec)> { let mut v = Vec::new(); - let p = stream_deflate(&mut v, b, 0)?; + let mut p = 0; - Ok((p / 8, v)) -} + loop { + let bfinal = read_bits_l(b, p, 1)?; p += 1; + let btype = read_bits_l(b, p, 2)?; p += 2; -fn stream_deflate(v: &mut Vec, b: &[u8], mut p: usize) -> ResultS -{ - let bfinal = read_bits_l(b, p, 1)?; p += 1; - let btype = read_bits_l(b, p, 2)?; p += 2; + match btype { + 0b10 => p = stream_dynamic(&mut v, b, p)?, + 0b01 => p = stream_s_table(&mut v, b, p)?, + 0b00 => p = stream_literal(&mut v, b, p)?, + _ => bail!("bad btype"), + } - let p = match btype { - 0b10 => stream_dynamic(v, b, p)?, - 0b01 => stream_s_table(v, b, p)?, - 0b00 => stream_literal(v, b, p)?, - _ => bail!("bad btype"), - }; - - if bfinal == 0 { - stream_deflate(v, b, p) - } else { - Ok(p) + if bfinal == 1 { + return Ok((p / 8, v)); + } } } fn stream_dynamic(v: &mut Vec, b: &[u8], mut p: usize) -> ResultS { - const CODE_ORDERING: [usize; NUM_CODES] = [ + const CODE_ORDERING: [usize; 19] = [ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 ]; - const NUM_CODES: usize = 19; - - const MAX_LIT_CODES: usize = 286; - const MAX_DST_CODES: usize = 30; - // read header (number of literal alphabet codes, number of distance // alphabet codes, and number of lengths for decoding the alphabet) let hlit = read_bits_l(b, p, 5)?; p += 5; @@ -139,80 +129,30 @@ fn stream_dynamic(v: &mut Vec, b: &[u8], mut p: usize) -> ResultS let hdist = 1 + hdist as usize; let hclen = 4 + hclen as usize; - let alphabet_total = hlit + hdist; - // first, get the huffman coding for the alphabet (which is also compressed) - let mut code_lengths = [0; NUM_CODES]; + let mut code_table = [0; 19]; for i in 0..hclen { let len = read_bits_l(b, p, 3)? as u16; p += 3; - code_lengths[CODE_ORDERING[i]] = len; + code_table[CODE_ORDERING[i]] = len; } - let code_lengths = HuffmanTable::new(&code_lengths)?; - let mut alphabet = [0; MAX_LIT_CODES + MAX_DST_CODES]; - // then, we decode the alphabet (doing both types at the same time, because // they're encoded the same anyways) - let mut i = 0; - while i < alphabet_total { - let (bits, sym) = code_lengths.decode(b, p)?; - p += bits; + let code_table = HuffmanTable::new(&code_table)?; + let mut alphabet = vec![0; hlit + hdist]; - match sym { - 0..=15 => { - // raw code - alphabet[i] = sym; - i += 1; - } - 16 => { - // copy previous code 3-6 times - if i == 0 {bail!("cannot copy on first alphabet code");} - - let len = usize::from(read_bits_l(b, p, 2)? as u8 + 3); - let lst = alphabet[i - 1]; - p += 2; - - for _ in 0..len {alphabet[i] = lst; i += 1;} - } - 17 => { - // repeat '0' 3-10 times - let len = usize::from(read_bits_l(b, p, 3)? as u8 + 3); - p += 3; - - for _ in 0..len {alphabet[i] = 0; i += 1;} - } - 18 => { - // repeat '0' 11-138 times - let len = usize::from(read_bits_l(b, p, 7)? as u8 + 11); - p += 7; - - for _ in 0..len {alphabet[i] = 0; i += 1;} - } - _ => { - bail!("bad symbol in alphabet"); - } - } - - if i > alphabet_total { - bail!("too many codes"); - } - } + p = read_alphabet(b, p, &mut alphabet, code_table)?; if alphabet[256] == 0 { bail!("no way to end block"); } - let len_sta = 0; - let len_end = hlit; - let dst_sta = len_end; - let dst_end = dst_sta + hdist; - // build the length and distance tables from this information - let table_len = HuffmanTable::new(&alphabet[len_sta..len_end])?; - let table_dst = HuffmanTable::new(&alphabet[dst_sta..dst_end])?; + let table_len = HuffmanTable::new(&alphabet[ 0..hlit ])?; + let table_dst = HuffmanTable::new(&alphabet[hlit..hlit + hdist])?; output_tables(v, b, p, table_len, table_dst) } @@ -251,6 +191,60 @@ fn stream_literal(v: &mut Vec, b: &[u8], p: usize) -> ResultS Ok((p + len) * 8) } +fn read_alphabet(b: &[u8], + mut p: usize, + alphabet: &mut [u16], + code_table: HuffmanTable) + -> ResultS +{ + let mut i = 0; + while i < alphabet.len() { + let (bits, sym) = code_table.decode(b, p)?; + p += bits; + + match sym { + 0..=15 => { + // raw code + alphabet[i] = sym; + i += 1; + } + 16 => { + // copy previous code 3-6 times + if i == 0 {bail!("cannot copy on first alphabet code");} + + let len = usize::from(read_bits_l(b, p, 2)? as u8 + 3); + let lst = alphabet[i - 1]; + p += 2; + + for _ in 0..len {alphabet[i] = lst; i += 1;} + } + 17 => { + // repeat '0' 3-10 times + let len = usize::from(read_bits_l(b, p, 3)? as u8 + 3); + p += 3; + + for _ in 0..len {alphabet[i] = 0; i += 1;} + } + 18 => { + // repeat '0' 11-138 times + let len = usize::from(read_bits_l(b, p, 7)? as u8 + 11); + p += 7; + + for _ in 0..len {alphabet[i] = 0; i += 1;} + } + _ => { + bail!("bad symbol in alphabet"); + } + } + + if i > alphabet.len() { + bail!("too many codes"); + } + } + + Ok(p) +} + fn output_tables(v: &mut Vec, b: &[u8], mut p: usize, @@ -278,54 +272,53 @@ fn output_tables(v: &mut Vec, 11, 11, 12, 12, 13, 13 ]; - let (bits, sym) = table_len.decode(b, p)?; - p += bits; + loop { + let (bits, sym) = table_len.decode(b, p)?; + p += bits; - match sym.cmp(&256) { - Ordering::Less => { - // direct byte - v.push(sym as u8); - output_tables(v, b, p, table_len, table_dst) - } - Ordering::Equal => { - Ok(p) - } - Ordering::Greater => { - // this is a pair - let sym = sym - 257; - - if sym > 29 { - bail!("invalid fixed code"); + match sym.cmp(&256) { + Ordering::Less => { + // direct byte + v.push(sym as u8); } - - let sym = usize::from(sym); - - // first get the actual length and any extra bits it may have - let bits = LEN_EXTRA_BITS[sym]; - let leng = LEN_BASE[sym] + read_bits_l(b, p, bits)? as usize; - p += usize::from(bits); - - // decode the distance with its alphabet - let (bits, sym) = table_dst.decode(b, p)?; - p += bits; - - let sym = usize::from(sym); - - // get the actual distance and any extra bits it may have - let bits = DST_EXTRA_BITS[sym]; - let dist = DST_BASE[sym] + read_bits_l(b, p, bits)? as usize; - p += usize::from(bits); - - if dist > v.len() { - bail!("bad distance"); + Ordering::Equal => { + return Ok(p); } + Ordering::Greater => { + // this is a pair + let sym = sym - 257; - // copy bytes from earlier - for _ in 0..leng { - v.push(v[v.len() - dist]); + if sym > 29 { + bail!("invalid fixed code"); + } + + let sym = usize::from(sym); + + // first get the actual length and any extra bits it may have + let bits = LEN_EXTRA_BITS[sym]; + let leng = LEN_BASE[sym] + read_bits_l(b, p, bits)? as usize; + p += usize::from(bits); + + // decode the distance with its alphabet + let (bits, sym) = table_dst.decode(b, p)?; + p += bits; + + let sym = usize::from(sym); + + // get the actual distance and any extra bits it may have + let bits = DST_EXTRA_BITS[sym]; + let dist = DST_BASE[sym] + read_bits_l(b, p, bits)? as usize; + p += usize::from(bits); + + if dist > v.len() { + bail!("bad distance"); + } + + // copy bytes from earlier + for _ in 0..leng { + v.push(v[v.len() - dist]); + } } - - output_tables(v, b, p, table_len, table_dst) } } }