1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
use std::error::Error;
use std::fmt;
use std::io;
use std::marker;
use std::slice;

use libc::{c_int, c_uint};

use Compression;
use ffi;

/// Raw in-memory compression stream for blocks of data.
///
/// This type is the building block for the I/O streams in the rest of this
/// crate. It requires more management than the [`Read`]/[`Write`] API but is
/// maximally flexible in terms of accepting input from any source and being
/// able to produce output to any memory location.
///
/// It is recommended to use the I/O stream adaptors over this type as they're
/// easier to use.
///
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
#[derive(Debug)]
pub struct Compress {
    inner: Stream<DirCompress>,
}

/// Raw in-memory decompression stream for blocks of data.
///
/// This type is the building block for the I/O streams in the rest of this
/// crate. It requires more management than the [`Read`]/[`Write`] API but is
/// maximally flexible in terms of accepting input from any source and being
/// able to produce output to any memory location.
///
/// It is recommended to use the I/O stream adaptors over this type as they're
/// easier to use.
///
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
#[derive(Debug)]
pub struct Decompress {
    inner: Stream<DirDecompress>,
}

#[derive(Debug)]
struct Stream<D: Direction> {
    stream_wrapper: ffi::StreamWrapper,
    total_in: u64,
    total_out: u64,
    _marker: marker::PhantomData<D>,
}

unsafe impl<D: Direction> Send for Stream<D> {}
unsafe impl<D: Direction> Sync for Stream<D> {}

trait Direction {
    unsafe fn destroy(stream: *mut ffi::mz_stream) -> c_int;
}

#[derive(Debug)]
enum DirCompress {}
#[derive(Debug)]
enum DirDecompress {}

#[derive(Copy, Clone, PartialEq, Eq, Debug)]
/// Values which indicate the form of flushing to be used when compressing
/// in-memory data.
pub enum FlushCompress {
    /// A typical parameter for passing to compression/decompression functions,
    /// this indicates that the underlying stream to decide how much data to
    /// accumulate before producing output in order to maximize compression.
    None = ffi::MZ_NO_FLUSH as isize,

    /// All pending output is flushed to the output buffer and the output is
    /// aligned on a byte boundary so that the decompressor can get all input
    /// data available so far.
    ///
    /// Flushing may degrade compression for some compression algorithms and so
    /// it should only be used when necessary. This will complete the current
    /// deflate block and follow it with an empty stored block.
    Sync = ffi::MZ_SYNC_FLUSH as isize,

    /// All pending output is flushed to the output buffer, but the output is
    /// not aligned to a byte boundary.
    ///
    /// All of the input data so far will be available to the decompressor (as
    /// with `Flush::Sync`. This completes the current deflate block and follows
    /// it with an empty fixed codes block that is 10 bites long, and it assures
    /// that enough bytes are output in order for the decompessor to finish the
    /// block before the empty fixed code block.
    Partial = ffi::MZ_PARTIAL_FLUSH as isize,

    /// All output is flushed as with `Flush::Sync` and the compression state is
    /// reset so decompression can restart from this point if previous
    /// compressed data has been damaged or if random access is desired.
    ///
    /// Using this option too often can seriously degrade compression.
    Full = ffi::MZ_FULL_FLUSH as isize,

    /// Pending input is processed and pending output is flushed.
    ///
    /// The return value may indicate that the stream is not yet done and more
    /// data has yet to be processed.
    Finish = ffi::MZ_FINISH as isize,

    #[doc(hidden)]
    _Nonexhaustive
}

#[derive(Copy, Clone, PartialEq, Eq, Debug)]
/// Values which indicate the form of flushing to be used when
/// decompressing in-memory data.
pub enum FlushDecompress {
    /// A typical parameter for passing to compression/decompression functions,
    /// this indicates that the underlying stream to decide how much data to
    /// accumulate before producing output in order to maximize compression.
    None = ffi::MZ_NO_FLUSH as isize,

    /// All pending output is flushed to the output buffer and the output is
    /// aligned on a byte boundary so that the decompressor can get all input
    /// data available so far.
    ///
    /// Flushing may degrade compression for some compression algorithms and so
    /// it should only be used when necessary. This will complete the current
    /// deflate block and follow it with an empty stored block.
    Sync = ffi::MZ_SYNC_FLUSH as isize,

    /// Pending input is processed and pending output is flushed.
    ///
    /// The return value may indicate that the stream is not yet done and more
    /// data has yet to be processed.
    Finish = ffi::MZ_FINISH as isize,

    #[doc(hidden)]
    _Nonexhaustive
}

/// Error returned when a decompression object finds that the input stream of
/// bytes was not a valid input stream of bytes.
#[derive(Debug)]
pub struct DecompressError(());

/// Error returned when a compression object is used incorrectly or otherwise
/// generates an error.
#[derive(Debug)]
pub struct CompressError(());

/// Possible status results of compressing some data or successfully
/// decompressing a block of data.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
pub enum Status {
    /// Indicates success.
    ///
    /// Means that more input may be needed but isn't available
    /// and/or there's more output to be written but the output buffer is full.
    Ok,

    /// Indicates that forward progress is not possible due to input or output
    /// buffers being empty.
    ///
    /// For compression it means the input buffer needs some more data or the
    /// output buffer needs to be freed up before trying again.
    ///
    /// For decompression this means that more input is needed to continue or
    /// the output buffer isn't large enough to contain the result. The function
    /// can be called again after fixing both.
    BufError,

    /// Indicates that all input has been consumed and all output bytes have
    /// been written. Decompression/compression should not be called again.
    ///
    /// For decompression with zlib streams the adler-32 of the decompressed
    /// data has also been verified.
    StreamEnd,
}

impl Compress {
    /// Creates a new object ready for compressing data that it's given.
    ///
    /// The `level` argument here indicates what level of compression is going
    /// to be performed, and the `zlib_header` argument indicates whether the
    /// output data should have a zlib header or not.
    pub fn new(level: Compression, zlib_header: bool) -> Compress {
        unsafe {
            let mut state = ffi::StreamWrapper::default();
            let ret = ffi::mz_deflateInit2(&mut *state,
                                           level.0 as c_int,
                                           ffi::MZ_DEFLATED,
                                           if zlib_header {
                                               ffi::MZ_DEFAULT_WINDOW_BITS
                                           } else {
                                               -ffi::MZ_DEFAULT_WINDOW_BITS
                                           },
                                           9,
                                           ffi::MZ_DEFAULT_STRATEGY);
            debug_assert_eq!(ret, 0);
            Compress {
                inner: Stream {
                    stream_wrapper: state,
                    total_in: 0,
                    total_out: 0,
                    _marker: marker::PhantomData,
                },
            }
        }
    }

    /// Returns the total number of input bytes which have been processed by
    /// this compression object.
    pub fn total_in(&self) -> u64 {
        self.inner.total_in
    }

    /// Returns the total number of output bytes which have been produced by
    /// this compression object.
    pub fn total_out(&self) -> u64 {
        self.inner.total_out
    }

    /// Quickly resets this compressor without having to reallocate anything.
    ///
    /// This is equivalent to dropping this object and then creating a new one.
    pub fn reset(&mut self) {
        let rc = unsafe { ffi::mz_deflateReset(&mut *self.inner.stream_wrapper) };
        assert_eq!(rc, ffi::MZ_OK);

        self.inner.total_in = 0;
        self.inner.total_out = 0;
    }

    /// Compresses the input data into the output, consuming only as much
    /// input as needed and writing as much output as possible.
    ///
    /// The flush option can be any of the available `FlushCompress` parameters.
    ///
    /// To learn how much data was consumed or how much output was produced, use
    /// the `total_in` and `total_out` functions before/after this is called.
    pub fn compress(&mut self,
                    input: &[u8],
                    output: &mut [u8],
                    flush: FlushCompress)
                    -> Result<Status, CompressError> {
        let raw = &mut *self.inner.stream_wrapper;
        raw.next_in = input.as_ptr() as *mut _;
        raw.avail_in = input.len() as c_uint;
        raw.next_out = output.as_mut_ptr();
        raw.avail_out = output.len() as c_uint;

        let rc = unsafe { ffi::mz_deflate(raw, flush as c_int) };

        // Unfortunately the total counters provided by zlib might be only
        // 32 bits wide and overflow while processing large amounts of data.
        self.inner.total_in += (raw.next_in as usize -
                                input.as_ptr() as usize) as u64;
        self.inner.total_out += (raw.next_out as usize -
                                 output.as_ptr() as usize) as u64;

        match rc {
            ffi::MZ_OK => Ok(Status::Ok),
            ffi::MZ_BUF_ERROR => Ok(Status::BufError),
            ffi::MZ_STREAM_END => Ok(Status::StreamEnd),
            ffi::MZ_STREAM_ERROR => Err(CompressError(())),
            c => panic!("unknown return code: {}", c),
        }
    }

    /// Compresses the input data into the extra space of the output, consuming
    /// only as much input as needed and writing as much output as possible.
    ///
    /// This function has the same semantics as `compress`, except that the
    /// length of `vec` is managed by this function. This will not reallocate
    /// the vector provided or attempt to grow it, so space for the output must
    /// be reserved in the output vector by the caller before calling this
    /// function.
    pub fn compress_vec(&mut self,
                        input: &[u8],
                        output: &mut Vec<u8>,
                        flush: FlushCompress)
                        -> Result<Status, CompressError> {
        let cap = output.capacity();
        let len = output.len();

        unsafe {
            let before = self.total_out();
            let ret = {
                let ptr = output.as_mut_ptr().offset(len as isize);
                let out = slice::from_raw_parts_mut(ptr, cap - len);
                self.compress(input, out, flush)
            };
            output.set_len((self.total_out() - before) as usize + len);
            return ret
        }
    }
}

impl Decompress {
    /// Creates a new object ready for decompressing data that it's given.
    ///
    /// The `zlib_header` argument indicates whether the input data is expected
    /// to have a zlib header or not.
    pub fn new(zlib_header: bool) -> Decompress {
        unsafe {
            let mut state = ffi::StreamWrapper::default();
            let ret = ffi::mz_inflateInit2(&mut *state,
                                           if zlib_header {
                                               ffi::MZ_DEFAULT_WINDOW_BITS
                                           } else {
                                               -ffi::MZ_DEFAULT_WINDOW_BITS
                                           });
            debug_assert_eq!(ret, 0);
            Decompress {
                inner: Stream {
                    stream_wrapper: state,
                    total_in: 0,
                    total_out: 0,
                    _marker: marker::PhantomData,
                },
            }
        }
    }

    /// Returns the total number of input bytes which have been processed by
    /// this decompression object.
    pub fn total_in(&self) -> u64 {
        self.inner.total_in
    }

    /// Returns the total number of output bytes which have been produced by
    /// this decompression object.
    pub fn total_out(&self) -> u64 {
        self.inner.total_out
    }

    /// Decompresses the input data into the output, consuming only as much
    /// input as needed and writing as much output as possible.
    ///
    /// The flush option can be any of the available `FlushDecompress` parameters.
    ///
    /// If the first call passes `FlushDecompress::Finish` it is assumed that
    /// the input and output buffers are both sized large enough to decompress
    /// the entire stream in a single call.
    ///
    /// A flush value of `FlushDecompress::Finish` indicates that there are no
    /// more source bytes available beside what's already in the input buffer,
    /// and the output buffer is large enough to hold the rest of the
    /// decompressed data.
    ///
    /// To learn how much data was consumed or how much output was produced, use
    /// the `total_in` and `total_out` functions before/after this is called.
    ///
    /// # Errors
    ///
    /// If the input data to this instance of `Decompress` is not a valid
    /// zlib/deflate stream then this function may return an instance of
    /// `DecompressError` to indicate that the stream of input bytes is corrupted.
    pub fn decompress(&mut self,
                      input: &[u8],
                      output: &mut [u8],
                      flush: FlushDecompress)
                      -> Result<Status, DecompressError> {
        let raw = &mut *self.inner.stream_wrapper;
        raw.next_in = input.as_ptr() as *mut u8;
        raw.avail_in = input.len() as c_uint;
        raw.next_out = output.as_mut_ptr();
        raw.avail_out = output.len() as c_uint;

        let rc = unsafe { ffi::mz_inflate(raw, flush as c_int) };

        // Unfortunately the total counters provided by zlib might be only
        // 32 bits wide and overflow while processing large amounts of data.
        self.inner.total_in += (raw.next_in as usize -
                                input.as_ptr() as usize) as u64;
        self.inner.total_out += (raw.next_out as usize -
                                 output.as_ptr() as usize) as u64;

        match rc {
            ffi::MZ_DATA_ERROR |
            ffi::MZ_STREAM_ERROR => Err(DecompressError(())),
            ffi::MZ_OK => Ok(Status::Ok),
            ffi::MZ_BUF_ERROR => Ok(Status::BufError),
            ffi::MZ_STREAM_END => Ok(Status::StreamEnd),
            c => panic!("unknown return code: {}", c),
        }
    }

    /// Decompresses the input data into the extra space in the output vector
    /// specified by `output`.
    ///
    /// This function has the same semantics as `decompress`, except that the
    /// length of `vec` is managed by this function. This will not reallocate
    /// the vector provided or attempt to grow it, so space for the output must
    /// be reserved in the output vector by the caller before calling this
    /// function.
    ///
    /// # Errors
    ///
    /// If the input data to this instance of `Decompress` is not a valid
    /// zlib/deflate stream then this function may return an instance of
    /// `DecompressError` to indicate that the stream of input bytes is corrupted.
    pub fn decompress_vec(&mut self,
                          input: &[u8],
                          output: &mut Vec<u8>,
                          flush: FlushDecompress)
                          -> Result<Status, DecompressError> {
        let cap = output.capacity();
        let len = output.len();

        unsafe {
            let before = self.total_out();
            let ret = {
                let ptr = output.as_mut_ptr().offset(len as isize);
                let out = slice::from_raw_parts_mut(ptr, cap - len);
                self.decompress(input, out, flush)
            };
            output.set_len((self.total_out() - before) as usize + len);
            return ret
        }
    }

    /// Performs the equivalent of replacing this decompression state with a
    /// freshly allocated copy.
    ///
    /// This function may not allocate memory, though, and attempts to reuse any
    /// previously existing resources.
    ///
    /// The argument provided here indicates whether the reset state will
    /// attempt to decode a zlib header first or not.
    pub fn reset(&mut self, zlib_header: bool) {
        self._reset(zlib_header);
    }

    #[cfg(feature = "zlib")]
    fn _reset(&mut self, zlib_header: bool) {
        let bits = if zlib_header {
            ffi::MZ_DEFAULT_WINDOW_BITS
        } else {
            -ffi::MZ_DEFAULT_WINDOW_BITS
        };
        unsafe {
            ffi::inflateReset2(&mut *self.inner.stream_wrapper, bits);
        }
        self.inner.total_out = 0;
        self.inner.total_in = 0;
    }

    #[cfg(not(feature = "zlib"))]
    fn _reset(&mut self, zlib_header: bool) {
        *self = Decompress::new(zlib_header);
    }
}

impl Error for DecompressError {
    fn description(&self) -> &str { "deflate decompression error" }
}

impl From<DecompressError> for io::Error {
    fn from(data: DecompressError) -> io::Error {
        io::Error::new(io::ErrorKind::Other, data)
    }
}

impl fmt::Display for DecompressError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        self.description().fmt(f)
    }
}

impl Error for CompressError {
    fn description(&self) -> &str { "deflate compression error" }
}

impl From<CompressError> for io::Error {
    fn from(data: CompressError) -> io::Error {
        io::Error::new(io::ErrorKind::Other, data)
    }
}

impl fmt::Display for CompressError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        self.description().fmt(f)
    }
}

impl Direction for DirCompress {
    unsafe fn destroy(stream: *mut ffi::mz_stream) -> c_int {
        ffi::mz_deflateEnd(stream)
    }
}
impl Direction for DirDecompress {
    unsafe fn destroy(stream: *mut ffi::mz_stream) -> c_int {
        ffi::mz_inflateEnd(stream)
    }
}

impl<D: Direction> Drop for Stream<D> {
    fn drop(&mut self) {
        unsafe {
            let _ = D::destroy(&mut *self.stream_wrapper);
        }
    }
}

#[cfg(test)]
mod tests {
    use std::io::Write;

    use write;
    use {Compression, Decompress, FlushDecompress};

    #[test]
    fn issue51() {
        let data = vec![
            0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0xb3, 0xc9,
            0x28, 0xc9, 0xcd, 0xb1, 0xe3, 0xe5, 0xb2, 0xc9, 0x48, 0x4d, 0x4c, 0xb1,
            0xb3, 0x29, 0xc9, 0x2c, 0xc9, 0x49, 0xb5, 0x33, 0x31, 0x30, 0x51, 0xf0,
            0xcb, 0x2f, 0x51, 0x70, 0xcb, 0x2f, 0xcd, 0x4b, 0xb1, 0xd1, 0x87, 0x08,
            0xda, 0xe8, 0x83, 0x95, 0x00, 0x95, 0x26, 0xe5, 0xa7, 0x54, 0x2a, 0x24,
            0xa5, 0x27, 0xe7, 0xe7, 0xe4, 0x17, 0xd9, 0x2a, 0x95, 0x67, 0x64, 0x96,
            0xa4, 0x2a, 0x81, 0x8c, 0x48, 0x4e, 0xcd, 0x2b, 0x49, 0x2d, 0xb2, 0xb3,
            0xc9, 0x30, 0x44, 0x37, 0x01, 0x28, 0x62, 0xa3, 0x0f, 0x95, 0x06, 0xd9,
            0x05, 0x54, 0x04, 0xe5, 0xe5, 0xa5, 0x67, 0xe6, 0x55, 0xe8, 0x1b, 0xea,
            0x99, 0xe9, 0x19, 0x21, 0xab, 0xd0, 0x07, 0xd9, 0x01, 0x32, 0x53, 0x1f,
            0xea, 0x3e, 0x00, 0x94, 0x85, 0xeb, 0xe4, 0xa8, 0x00, 0x00, 0x00
        ];

        let mut decoded = Vec::with_capacity(data.len()*2);

        let mut d = Decompress::new(false);
        // decompressed whole deflate stream
        assert!(d.decompress_vec(&data[10..], &mut decoded, FlushDecompress::Finish).is_ok());

        // decompress data that has nothing to do with the deflate stream (this
        // used to panic)
        drop(d.decompress_vec(&[0], &mut decoded, FlushDecompress::None));
    }

    #[test]
    fn reset() {
        let string = "hello world".as_bytes();
        let mut zlib = Vec::new();
        let mut deflate = Vec::new();

        let comp = Compression::default();
        write::ZlibEncoder::new(&mut zlib, comp).write_all(string).unwrap();
        write::DeflateEncoder::new(&mut deflate, comp).write_all(string).unwrap();

        let mut dst = [0; 1024];
        let mut decoder = Decompress::new(true);
        decoder.decompress(&zlib, &mut dst, FlushDecompress::Finish).unwrap();
        assert_eq!(decoder.total_out(), string.len() as u64);
        assert!(dst.starts_with(string));

        decoder.reset(false);
        decoder.decompress(&deflate, &mut dst, FlushDecompress::Finish).unwrap();
        assert_eq!(decoder.total_out(), string.len() as u64);
        assert!(dst.starts_with(string));
    }
}