From b77106b526930990f51a306fd70cd00856f481e8 Mon Sep 17 00:00:00 2001 From: Igor Tolmachev Date: Fri, 12 Jul 2024 21:40:08 +0900 Subject: Add zstd compression and fix bugs --- src/utils.rs | 23 ----------- src/utils/cursor.rs | 60 ++++++++++++++++++++++++++++ src/utils/mod.rs | 5 +++ src/utils/read.rs | 23 +++++++++++ src/zip/driver.rs | 9 ++--- src/zip/error.rs | 2 - src/zip/file/info.rs | 7 ++-- src/zip/file/read.rs | 106 +++++++++++++++++++++++--------------------------- src/zip/file/write.rs | 4 +- 9 files changed, 147 insertions(+), 92 deletions(-) delete mode 100644 src/utils.rs create mode 100644 src/utils/cursor.rs create mode 100644 src/utils/mod.rs create mode 100644 src/utils/read.rs (limited to 'src') diff --git a/src/utils.rs b/src/utils.rs deleted file mode 100644 index 185758a..0000000 --- a/src/utils.rs +++ /dev/null @@ -1,23 +0,0 @@ -use std::io::{Read, Result as IOResult}; - -pub trait ReadUtils { - fn read_arr(&mut self) -> IOResult<[u8; S]>; - - fn read_vec(&mut self, size: usize) -> IOResult>; -} - -impl ReadUtils for R { - #[inline] - fn read_arr(&mut self) -> Result<[u8; S], std::io::Error> { - let mut arr = [0; S]; - self.read(&mut arr)?; - Ok(arr) - } - - #[inline] - fn read_vec(&mut self, size: usize) -> Result, std::io::Error> { - let mut vec = vec![0; size]; - self.read(&mut vec)?; - Ok(vec) - } -} diff --git a/src/utils/cursor.rs b/src/utils/cursor.rs new file mode 100644 index 0000000..c41270a --- /dev/null +++ b/src/utils/cursor.rs @@ -0,0 +1,60 @@ +use std::io::{Error, ErrorKind, Read, Result, Seek, SeekFrom, Write}; + +pub struct IoCursor { + io: Io, + cursor: u64, + bounds: (u64, u64), +} + +impl IoCursor { + pub fn new(mut io: Io, start: u64, end: u64) -> Result { + let cursor = io.seek(SeekFrom::Start(start))?; + Ok(Self { + io, + cursor, + bounds: (cursor, end), + }) + } +} + +impl Read for IoCursor { + fn read(&mut self, buf: &mut [u8]) -> Result { + let upper = buf.len().min((self.bounds.1 - self.cursor) as usize); + let bytes = self.io.read(&mut buf[..upper])?; + self.cursor += bytes as u64; + Ok(bytes) + } +} + +impl Write for IoCursor { + fn write(&mut self, buf: &[u8]) -> Result { + let upper = buf.len().min((self.bounds.1 - self.cursor) as usize); + let bytes = self.io.write(&buf[..upper])?; + self.cursor += bytes as u64; + Ok(bytes) + } + + #[inline] + fn flush(&mut self) -> Result<()> { + self.io.flush() + } +} + +impl Seek for IoCursor { + fn seek(&mut self, pos: SeekFrom) -> Result { + self.cursor = match pos { + SeekFrom::Start(0) => return Ok(self.cursor - self.bounds.0), + SeekFrom::Start(offset) => self.bounds.0.checked_add(offset), + SeekFrom::End(offset) => self.bounds.1.checked_add_signed(offset), + SeekFrom::Current(offset) => self.cursor.checked_add_signed(offset), + } + .filter(|v| *v >= self.bounds.0) + .ok_or(Error::new( + ErrorKind::InvalidInput, + "Invalid seek to a negative or overflowing position", + ))? + .min(self.bounds.1); + + Ok(self.io.seek(SeekFrom::Start(self.cursor))? - self.bounds.0) + } +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs new file mode 100644 index 0000000..99a4e13 --- /dev/null +++ b/src/utils/mod.rs @@ -0,0 +1,5 @@ +mod cursor; +mod read; + +pub use cursor::IoCursor; +pub use read::ReadUtils; diff --git a/src/utils/read.rs b/src/utils/read.rs new file mode 100644 index 0000000..185758a --- /dev/null +++ b/src/utils/read.rs @@ -0,0 +1,23 @@ +use std::io::{Read, Result as IOResult}; + +pub trait ReadUtils { + fn read_arr(&mut self) -> IOResult<[u8; S]>; + + fn read_vec(&mut self, size: usize) -> IOResult>; +} + +impl ReadUtils for R { + #[inline] + fn read_arr(&mut self) -> Result<[u8; S], std::io::Error> { + let mut arr = [0; S]; + self.read(&mut arr)?; + Ok(arr) + } + + #[inline] + fn read_vec(&mut self, size: usize) -> Result, std::io::Error> { + let mut vec = vec![0; size]; + self.read(&mut vec)?; + Ok(vec) + } +} diff --git a/src/zip/driver.rs b/src/zip/driver.rs index 3793e31..99b409d 100644 --- a/src/zip/driver.rs +++ b/src/zip/driver.rs @@ -5,7 +5,7 @@ use crate::zip::{ BitFlag, CompressionMethod, ZipError, ZipFileInfo, ZipFileReader, ZipFileWriter, ZipResult, }; use chrono::{DateTime, Local, NaiveDate, NaiveDateTime, NaiveTime}; -use std::collections::HashMap as Map; +use std::collections::BTreeMap as Map; use std::fs::File; use std::io::{Read, Seek, SeekFrom, Write}; @@ -105,7 +105,7 @@ impl ArchiveRead for Zip { }; // Read cd records - let mut files = Map::with_capacity(cd_records as usize); + let mut files = Map::new(); io.seek(SeekFrom::Start(cd_pointer))?; let buf = io.read_vec(cd_size as usize)?; @@ -200,6 +200,7 @@ impl ArchiveRead for Zip { ))?; ep += header.size as usize } + // Skip unrecognized header _ => ep += header.size as usize, } } @@ -226,9 +227,7 @@ impl ArchiveRead for Zip { } fn files(&self) -> Vec<&Self::FileInfo> { - let mut files: Vec<&Self::FileInfo> = self.files.values().collect(); - files.sort_by_key(|f| &f.name); - files + self.files.values().collect() } fn get_file_info(&self, name: &str) -> ZipResult<&Self::FileInfo> { diff --git a/src/zip/error.rs b/src/zip/error.rs index 963daad..c77370b 100644 --- a/src/zip/error.rs +++ b/src/zip/error.rs @@ -19,7 +19,6 @@ pub enum ZipError { InvalidFileName, InvalidFileComment, - NegativeFileOffset, FileNotFound, CompressedDataIsUnseekable, } @@ -58,7 +57,6 @@ impl Display for ZipError { Self::InvalidFileName => write!(f, "Invalid file name"), Self::InvalidFileComment => write!(f, "Invalid file comment"), - Self::NegativeFileOffset => write!(f, "Negative file offset"), Self::FileNotFound => write!(f, "File not found"), Self::CompressedDataIsUnseekable => write!(f, "Compressed data is unseekable"), } diff --git a/src/zip/file/info.rs b/src/zip/file/info.rs index bfec0b7..4e1b293 100644 --- a/src/zip/file/info.rs +++ b/src/zip/file/info.rs @@ -8,7 +8,9 @@ pub enum CompressionMethod { Deflate, BZip2, Lzma, + Zstd, Xz, + Unsupported, } impl CompressionMethod { @@ -18,10 +20,9 @@ impl CompressionMethod { 8 => Ok(Self::Deflate), 12 => Ok(Self::BZip2), 14 => Ok(Self::Lzma), + 93 => Ok(Self::Zstd), 95 => Ok(Self::Xz), - 1..=7 | 9..=11 | 13 | 15..=20 | 93..=94 | 96..=99 => { - Err(ZipError::UnsupportedCompressionMethod.into()) - } + 1..=7 | 9..=11 | 13 | 15..=20 | 94 | 96..=99 => Ok(Self::Unsupported), 21..=92 | 100.. => Err(ZipError::InvalidCompressionMethod.into()), } } diff --git a/src/zip/file/read.rs b/src/zip/file/read.rs index 7d683db..f5a54f3 100644 --- a/src/zip/file/read.rs +++ b/src/zip/file/read.rs @@ -1,27 +1,26 @@ use crate::driver::FileDriver; -use crate::utils::ReadUtils; +use crate::utils::{IoCursor, ReadUtils}; use crate::zip::{CompressionMethod, ZipError, ZipFileInfo, ZipResult}; use bzip2::read::BzDecoder; use flate2::read::DeflateDecoder; use liblzma::read::XzDecoder; use liblzma::stream::{Filters, LzmaOptions, Stream}; use std::io::{ - Error as IoError, ErrorKind as IoErrorKind, Read, Result as IoResult, Seek, SeekFrom, + BufReader, Error as IoError, ErrorKind as IoErrorKind, Read, Result as IoResult, Seek, SeekFrom, }; +use zstd::stream::Decoder as ZstdDecoder; -enum IoProxy { +enum Compression { Store(Io), Deflate(DeflateDecoder), BZip2(BzDecoder), + Zstd(ZstdDecoder<'static, BufReader>), Xz(XzDecoder), } pub struct ZipFileReader<'d, Io: Read> { - io: IoProxy<&'d mut Io>, + io: Compression>, info: &'d ZipFileInfo, - - bounds: (u64, u64), - cursor: u64, } impl<'d, Io: Read> FileDriver for ZipFileReader<'d, Io> { @@ -36,8 +35,8 @@ impl<'d, Io: Read> FileDriver for ZipFileReader<'d, Io> { impl<'d, Io: Read + Seek> ZipFileReader<'d, Io> { pub fn new(io: &'d mut Io, info: &'d ZipFileInfo) -> ZipResult { io.seek(SeekFrom::Start(info.header_pointer))?; - let buf = io.read_arr::<30>()?; + let buf = io.read_arr::<30>()?; if u32::from_le_bytes(buf[..4].try_into().unwrap()) != 0x04034b50 { return Err(ZipError::InvalidFileHeaderSignature.into()); } @@ -45,18 +44,28 @@ impl<'d, Io: Read + Seek> ZipFileReader<'d, Io> { + 30 + u16::from_le_bytes(buf[26..28].try_into().unwrap()) as u64 + u16::from_le_bytes(buf[28..30].try_into().unwrap()) as u64; - let mut cursor = io.seek(SeekFrom::Start(data_pointer))?; Ok(Self { io: match info.compression_method { - CompressionMethod::Store => IoProxy::Store(io), - CompressionMethod::Deflate => IoProxy::Deflate(DeflateDecoder::new(io)), - CompressionMethod::BZip2 => IoProxy::BZip2(BzDecoder::new(io)), + CompressionMethod::Store => Compression::Store(IoCursor::new( + io, + data_pointer, + data_pointer + info.compressed_size, + )?), + CompressionMethod::Deflate => Compression::Deflate(DeflateDecoder::new( + IoCursor::new(io, data_pointer, data_pointer + info.compressed_size)?, + )), + CompressionMethod::BZip2 => Compression::BZip2(BzDecoder::new(IoCursor::new( + io, + data_pointer, + data_pointer + info.compressed_size, + )?)), CompressionMethod::Lzma => { + io.seek(SeekFrom::Start(data_pointer))?; let buf = io.read_arr::<9>()?; - cursor += 9; - IoProxy::Xz(XzDecoder::new_stream( - io, + + Compression::Xz(XzDecoder::new_stream( + IoCursor::new(io, data_pointer + 9, data_pointer + info.compressed_size)?, Stream::new_raw_decoder( Filters::new().lzma1( LzmaOptions::new() @@ -71,18 +80,30 @@ impl<'d, Io: Read + Seek> ZipFileReader<'d, Io> { .unwrap(), )) } - CompressionMethod::Xz => IoProxy::Xz(XzDecoder::new(io)), + CompressionMethod::Zstd => Compression::Zstd( + ZstdDecoder::new(IoCursor::new( + io, + data_pointer, + data_pointer + info.compressed_size, + )?) + .unwrap(), + ), + CompressionMethod::Xz => Compression::Xz(XzDecoder::new(IoCursor::new( + io, + data_pointer, + data_pointer + info.compressed_size, + )?)), + CompressionMethod::Unsupported => { + return Err(ZipError::UnsupportedCompressionMethod.into()) + } }, info, - - bounds: (cursor, data_pointer + info.compressed_size), - cursor, }) } pub fn seekable(&self) -> bool { match self.io { - IoProxy::Store(..) => true, + Compression::Store(..) => true, _ => false, } } @@ -90,49 +111,20 @@ impl<'d, Io: Read + Seek> ZipFileReader<'d, Io> { impl<'d, Io: Read> Read for ZipFileReader<'d, Io> { fn read(&mut self, buf: &mut [u8]) -> IoResult { - let upper = buf.len().min((self.bounds.1 - self.cursor) as usize); - let bytes = match &mut self.io { - IoProxy::Store(io) => io.read(&mut buf[..upper]), - IoProxy::Deflate(io) => io.read(&mut buf[..upper]), - IoProxy::BZip2(io) => io.read(&mut buf[..upper]), - IoProxy::Xz(io) => io.read(&mut buf[..upper]), - }?; - self.cursor += upper as u64; - Ok(bytes) + match &mut self.io { + Compression::Store(io) => io.read(buf), + Compression::Deflate(io) => io.read(buf), + Compression::BZip2(io) => io.read(buf), + Compression::Zstd(io) => io.read(buf), + Compression::Xz(io) => io.read(buf), + } } } impl<'d, Io: Read + Seek> Seek for ZipFileReader<'d, Io> { fn seek(&mut self, pos: SeekFrom) -> IoResult { match &mut self.io { - IoProxy::Store(io) => { - self.cursor = match pos { - SeekFrom::Start(offset) => self.bounds.0 + offset, - SeekFrom::End(offset) => { - let cursor = self.bounds.1.saturating_add_signed(offset); - if cursor < self.bounds.0 { - return Err(IoError::new( - IoErrorKind::InvalidInput, - ZipError::NegativeFileOffset, - )); - } - cursor - } - SeekFrom::Current(offset) => { - let cursor = self.cursor.saturating_add_signed(offset); - if cursor < self.bounds.0 { - return Err(IoError::new( - IoErrorKind::InvalidInput, - ZipError::NegativeFileOffset, - )); - } - cursor - } - } - .min(self.bounds.1); - - Ok(io.seek(SeekFrom::Start(self.cursor))? - self.bounds.0) - } + Compression::Store(io) => io.seek(pos), _ => Err(IoError::new( IoErrorKind::Unsupported, ZipError::CompressedDataIsUnseekable, diff --git a/src/zip/file/write.rs b/src/zip/file/write.rs index 6f5756a..d20c378 100644 --- a/src/zip/file/write.rs +++ b/src/zip/file/write.rs @@ -5,7 +5,7 @@ use flate2::write::DeflateEncoder; use liblzma::write::XzEncoder; use std::io::Write; -enum IoProxy { +enum Compression { Store(Io), Deflate(DeflateEncoder), BZip2(BzEncoder), @@ -13,7 +13,7 @@ enum IoProxy { } pub struct ZipFileWriter<'d, Io: Write> { - io: IoProxy<&'d mut Io>, + io: Compression<&'d mut Io>, info: &'d ZipFileInfo, bounds: (u64, u64), -- cgit v1.2.3