From 51694e1f0b2730915e0a57ec6d8de503cf06ef9a Mon Sep 17 00:00:00 2001 From: Igor Tolmachev Date: Thu, 27 Jun 2024 16:15:00 +0900 Subject: Create file driver and implement file reader --- src/archive.rs | 20 +++--- src/driver/driver.rs | 27 +++++--- src/driver/file.rs | 9 ++- src/driver/mod.rs | 2 +- src/error.rs | 6 +- src/file.rs | 38 ++++++++++++ src/lib.rs | 3 + src/zip/archive.rs | 4 +- src/zip/driver.rs | 38 ++++++------ src/zip/error.rs | 8 +++ src/zip/file.rs | 160 ------------------------------------------------ src/zip/file_driver.rs | 99 ++++++++++++++++++++++++++++++ src/zip/file_info.rs | 163 +++++++++++++++++++++++++++++++++++++++++++++++++ src/zip/mod.rs | 6 +- src/zip/tests.rs | 2 +- 15 files changed, 380 insertions(+), 205 deletions(-) create mode 100644 src/file.rs delete mode 100644 src/zip/file.rs create mode 100644 src/zip/file_driver.rs create mode 100644 src/zip/file_info.rs (limited to 'src') diff --git a/src/archive.rs b/src/archive.rs index e635007..4a70867 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -1,5 +1,5 @@ use crate::driver::{ArchiveRead, ArchiveWrite, Driver}; -use crate::ArchiveResult; +use crate::{ArchiveFile, ArchiveResult}; use std::fs::File; use std::io::{Read, Write}; use std::path::Path; @@ -10,9 +10,9 @@ pub struct Archive { impl Archive where - D::IO: std::io::Read, + D::Io: std::io::Read, { - pub fn read(io: D::IO) -> ArchiveResult { + pub fn read(io: D::Io) -> ArchiveResult { Ok(Self { driver: D::read(io)?, }) @@ -20,18 +20,22 @@ where pub fn read_from_file(path: impl AsRef) -> ArchiveResult where - D: ArchiveRead, + D: ArchiveRead, { Self::read(File::open(path)?) } - pub fn files(&self) -> Vec<&D::File> { + pub fn files(&self) -> Vec<&D::FileInfo> { self.driver.files() } - pub fn get_file(&self, name: &str) -> Option<&D::File> { - self.driver.get_file(name) + pub fn get_file_info(&self, name: &str) -> Option<&D::FileInfo> { + self.driver.get_file_info(name) + } + + pub fn get_file_reader<'d>(&'d mut self, name: &str) -> Option>> { + Some(ArchiveFile::new(self.driver.get_file_reader(name)?)) } } -impl Archive where D::IO: Read + Write {} +impl Archive where D::Io: Read + Write {} diff --git a/src/driver/driver.rs b/src/driver/driver.rs index 9c18e1f..5bd2319 100644 --- a/src/driver/driver.rs +++ b/src/driver/driver.rs @@ -1,4 +1,4 @@ -use crate::driver::ArchiveFile; +use crate::driver::{ArchiveFileInfo, FileDriver}; use crate::ArchiveResult; use std::error::Error; use std::io::{Read, Write}; @@ -6,26 +6,33 @@ use std::io::{Read, Write}; pub trait Driver: Sized { type Error: Error; - type IO; - type File: ArchiveFile; + type Io; + type FileInfo: ArchiveFileInfo; + type FileDriver<'d>: FileDriver + where + Self::FileInfo: 'd, + Self::Io: 'd; } pub trait ArchiveRead: Driver where - Self::IO: Read, + Self::Io: Read, { // Create driver instance - fn read(io: Self::IO) -> ArchiveResult; + fn read(io: Self::Io) -> ArchiveResult; // Return vec of files (sorted by name) - fn files(&self) -> Vec<&Self::File>; + fn files(&self) -> Vec<&Self::FileInfo>; - // Return file by name - fn get_file(&self, name: &str) -> Option<&Self::File>; + // Return file info by name + fn get_file_info(&self, name: &str) -> Option<&Self::FileInfo>; + + // Return file reader by name + fn get_file_reader<'d>(&'d mut self, name: &str) -> Option>; } -pub trait ArchiveWrite: ArchiveRead +pub trait ArchiveWrite: Driver where - Self::IO: Read + Write, + Self::Io: Read + Write, { } diff --git a/src/driver/file.rs b/src/driver/file.rs index a4974f3..125c9c3 100644 --- a/src/driver/file.rs +++ b/src/driver/file.rs @@ -1 +1,8 @@ -pub trait ArchiveFile {} +pub trait ArchiveFileInfo {} + +pub trait FileDriver { + type Io; + type FileInfo: ArchiveFileInfo; + + fn info(&self) -> &Self::FileInfo; +} diff --git a/src/driver/mod.rs b/src/driver/mod.rs index 36ee6b5..b637a34 100644 --- a/src/driver/mod.rs +++ b/src/driver/mod.rs @@ -2,4 +2,4 @@ mod driver; mod file; pub use driver::{ArchiveRead, ArchiveWrite, Driver}; -pub use file::ArchiveFile; +pub use file::{ArchiveFileInfo, FileDriver}; diff --git a/src/error.rs b/src/error.rs index 7172d04..97a4e62 100644 --- a/src/error.rs +++ b/src/error.rs @@ -6,21 +6,21 @@ pub type ArchiveResult = Result>; #[derive(Debug)] pub enum ArchiveError { - IO { error: io::Error }, + Io { error: io::Error }, Serde { message: String }, Archivator { module: String, error: E }, } impl From for ArchiveError { fn from(value: io::Error) -> Self { - Self::IO { error: value } + Self::Io { error: value } } } impl Display for ArchiveError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::IO { error } => writeln!(f, "IO: {error}"), + Self::Io { error } => writeln!(f, "IO: {error}"), Self::Serde { message } => writeln!(f, "Serde: {message}"), Self::Archivator { module, error } => writeln!(f, "{module}: {error}"), } diff --git a/src/file.rs b/src/file.rs new file mode 100644 index 0000000..f284b98 --- /dev/null +++ b/src/file.rs @@ -0,0 +1,38 @@ +use crate::driver::FileDriver; +use std::io::{Read, Result as IoResult, Seek, Write}; + +pub struct ArchiveFile { + pub(crate) driver: D, +} + +impl ArchiveFile { + pub fn new(driver: D) -> Self { + Self { driver } + } + + pub fn info(&self) -> &D::FileInfo { + self.driver.info() + } +} + +impl Read for ArchiveFile { + fn read(&mut self, buf: &mut [u8]) -> IoResult { + self.driver.read(buf) + } +} + +impl Write for ArchiveFile { + fn write(&mut self, buf: &[u8]) -> IoResult { + self.driver.write(buf) + } + + fn flush(&mut self) -> IoResult<()> { + self.driver.flush() + } +} + +impl Seek for ArchiveFile { + fn seek(&mut self, pos: std::io::SeekFrom) -> IoResult { + self.driver.seek(pos) + } +} diff --git a/src/lib.rs b/src/lib.rs index c9cd1ad..26722c2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ mod archive; mod error; +mod file; pub mod driver; pub mod structs; @@ -7,4 +8,6 @@ pub mod zip; pub use archive::Archive; pub use error::{ArchiveError, ArchiveResult}; +pub use file::ArchiveFile; + pub use zip::Zip; diff --git a/src/zip/archive.rs b/src/zip/archive.rs index 9a244fc..79e8ca1 100644 --- a/src/zip/archive.rs +++ b/src/zip/archive.rs @@ -1,10 +1,10 @@ use crate::{Archive, Zip}; use std::io::{Read, Seek, Write}; -impl Archive> { +impl Archive> { pub fn comment(&self) -> &String { self.driver.comment() } } -impl Archive> {} +impl Archive> {} diff --git a/src/zip/driver.rs b/src/zip/driver.rs index 650344e..0db845d 100644 --- a/src/zip/driver.rs +++ b/src/zip/driver.rs @@ -1,27 +1,27 @@ use crate::driver::{ArchiveRead, ArchiveWrite, Driver}; -use crate::zip::file::{BitFlag, CompressionMethod}; use crate::zip::structs::{deserialize, EOCDR64Locator, ExtraHeader, CDR, EOCDR, EOCDR64}; -use crate::zip::{ZipError, ZipFile, ZipResult}; +use crate::zip::{BitFlag, CompressionMethod, ZipError, ZipFile, ZipFileInfo, ZipResult}; use chrono::{Local, NaiveDate, NaiveDateTime, NaiveTime}; use std::collections::HashMap as Map; use std::io::{Read, Seek, SeekFrom, Write}; -pub struct Zip { - io: IO, +pub struct Zip { + io: Io, - files: Map, + files: Map, comment: String, } -impl Driver for Zip { +impl Driver for Zip { type Error = ZipError; - type IO = IO; - type File = ZipFile; + type Io = Io; + type FileDriver<'d> = ZipFile<'d, Self::Io> where Self::Io: 'd; + type FileInfo = ZipFileInfo; } -impl ArchiveRead for Zip { - fn read(mut io: Self::IO) -> ZipResult { +impl ArchiveRead for Zip { + fn read(mut io: Self::Io) -> ZipResult { // Search eocdr let limit = 65557.min(io.seek(SeekFrom::End(0))?) as i64; let start = io.seek(SeekFrom::End(-limit))?; @@ -137,7 +137,7 @@ impl ArchiveRead for Zip { files.insert( name.clone(), - ZipFile::new( + ZipFileInfo::new( CompressionMethod::from_struct_id(cdr.compression_method)?, BitFlag::new(cdr.bit_flag), NaiveDateTime::new( @@ -169,23 +169,27 @@ impl ArchiveRead for Zip { Ok(Self { io, files, comment }) } - fn files(&self) -> Vec<&Self::File> { - let mut files: Vec<&Self::File> = self.files.values().collect(); + fn files(&self) -> Vec<&Self::FileInfo> { + let mut files: Vec<&Self::FileInfo> = self.files.values().collect(); files.sort_by_key(|f| &f.name); files } - fn get_file(&self, name: &str) -> Option<&Self::File> { + fn get_file_info(&self, name: &str) -> Option<&Self::FileInfo> { self.files.get(name) } + + fn get_file_reader<'d>(&'d mut self, name: &str) -> Option> { + Some(ZipFile::new(&mut self.io, self.files.get(name)?).unwrap()) + } } -impl Zip { +impl Zip { pub fn comment(&self) -> &String { &self.comment } } -impl ArchiveWrite for Zip {} +impl ArchiveWrite for Zip {} -impl Zip {} +impl Zip {} diff --git a/src/zip/error.rs b/src/zip/error.rs index 1c5527c..d82de78 100644 --- a/src/zip/error.rs +++ b/src/zip/error.rs @@ -8,6 +8,7 @@ pub type ZipResult = ArchiveResult; pub enum ZipError { EOCDRNotFound, InvalidEOCDR64Signature, + InvalidFileHeaderSignature, InvalidCDRSignature, InvalidArchiveComment, @@ -17,6 +18,8 @@ pub enum ZipError { InvalidTime, InvalidFileName, InvalidFileComment, + + NegativeFileOffset, } impl From for ArchiveError { @@ -38,6 +41,9 @@ impl Display for ZipError { "Invalid signature of zip64 end of central directory record" ) } + Self::InvalidFileHeaderSignature => { + write!(f, "Invalid file header signature") + } Self::InvalidCDRSignature => { write!(f, "Invalid signature of central directory record") } @@ -49,6 +55,8 @@ impl Display for ZipError { Self::InvalidTime => write!(f, "Invalid time"), Self::InvalidFileName => write!(f, "Invalid file name"), Self::InvalidFileComment => write!(f, "Invalid file comment"), + + Self::NegativeFileOffset => write!(f, "Negative file offset"), } } } diff --git a/src/zip/file.rs b/src/zip/file.rs deleted file mode 100644 index 5b0723f..0000000 --- a/src/zip/file.rs +++ /dev/null @@ -1,160 +0,0 @@ -use crate::driver::ArchiveFile; -use crate::zip::{ZipError, ZipResult}; -use chrono::{DateTime, Local}; - -pub enum CompressionMethod { - Store, - Deflate, - BZIP2, - LZMA, - ZStd, - XZ, -} - -impl CompressionMethod { - pub(crate) fn from_struct_id(id: u16) -> ZipResult { - match id { - 0 => Ok(Self::Store), - 8 => Ok(Self::Deflate), - 12 => Ok(Self::BZIP2), - 14 => Ok(Self::LZMA), - 93 => Ok(Self::ZStd), - 95 => Ok(Self::XZ), - 1..=7 | 9..=11 | 13 | 15..=20 | 94 | 96..=99 => { - Err(ZipError::UnsupportedCompressionMethod.into()) - } - 21..=92 | 100.. => Err(ZipError::InvalidCompressionMethod.into()), - } - } -} - -pub struct BitFlag { - flag: u16, -} - -pub mod bit { - #[derive(Debug, PartialEq, Eq)] - pub enum DeflateMode { - Normal, - Maximum, - Fast, - SuperFast, - } -} - -macro_rules! get_set_bit_flag { - {$($get:ident $set:ident $bit:expr)+} => { - $( - pub fn $get(&self) -> bool { - self.get_bit($bit) - } - - pub fn $set(&mut self, enable: bool) { - self.set_bit($bit, enable); - } - )* - }; -} - -impl BitFlag { - pub fn new(flag: u16) -> Self { - Self { flag } - } - - #[inline] - fn get_bit(&self, bit: u32) -> bool { - (self.flag & 2u16.pow(bit)) > 0 - } - - #[inline] - fn set_bit(&mut self, bit: u32, enable: bool) { - if enable { - self.flag |= 2u16.pow(bit); - } else { - self.flag &= !2u16.pow(bit); - } - } - - pub fn deflate_mode(&self) -> bit::DeflateMode { - match self.flag & 6 { - 0 => bit::DeflateMode::Normal, - 2 => bit::DeflateMode::Maximum, - 4 => bit::DeflateMode::Fast, - 6 => bit::DeflateMode::SuperFast, - _ => panic!("impossible"), - } - } - - pub fn set_deflate_mode(&mut self, mode: bit::DeflateMode) { - match mode { - bit::DeflateMode::Normal => { - self.set_bit(1, false); - self.set_bit(2, false); - } - bit::DeflateMode::Maximum => { - self.set_bit(1, true); - self.set_bit(2, false); - } - bit::DeflateMode::Fast => { - self.set_bit(1, false); - self.set_bit(2, true); - } - bit::DeflateMode::SuperFast => { - self.set_bit(1, true); - self.set_bit(2, true); - } - } - } - - get_set_bit_flag! { - is_encrypted set_encrypted 0 - is_imploding_8k set_imploding_8k 1 - is_imploding_3sf_trees set_imploding_3sf_trees 2 - is_lzma_has_eos_marker set_lzma_has_eos_marker 1 - is_has_data_descriptor set_has_data_descriptor 3 - is_patched_data set_patched_data 5 - is_strong_encryption set_strong_encryption 6 - is_utf8 set_utf8 11 - is_cd_encryption set_cd_encryption 13 - } -} - -pub struct ZipFile { - pub compression_method: CompressionMethod, - pub bit_flag: BitFlag, - pub datetime: DateTime, - pub crc: u32, - pub compressed_size: u64, - pub size: u64, - pub header_pointer: u64, - pub name: String, - pub comment: String, -} - -impl ZipFile { - pub fn new( - compression_method: CompressionMethod, - bit_flag: BitFlag, - datetime: DateTime, - crc: u32, - compressed_size: u64, - size: u64, - header_pointer: u64, - name: String, - comment: String, - ) -> Self { - Self { - compression_method, - bit_flag, - datetime, - crc, - compressed_size, - size, - header_pointer, - name, - comment, - } - } -} - -impl ArchiveFile for ZipFile {} diff --git a/src/zip/file_driver.rs b/src/zip/file_driver.rs new file mode 100644 index 0000000..47b4242 --- /dev/null +++ b/src/zip/file_driver.rs @@ -0,0 +1,99 @@ +use crate::driver::FileDriver; +use crate::zip::{ZipError, ZipFileInfo, ZipResult}; +use std::io::{ + Error as IoError, ErrorKind as IoErrorKind, Read, Result as IoResult, Seek, SeekFrom, Take, + Write, +}; + +pub struct ZipFile<'d, Io> { + io: &'d mut Io, + info: &'d ZipFileInfo, + + bounds: (u64, u64), + cursor: u64, +} + +impl<'d, Io> FileDriver for ZipFile<'d, Io> { + type Io = Io; + type FileInfo = ZipFileInfo; + + fn info(&self) -> &Self::FileInfo { + self.info + } +} + +impl<'d, Io: Read + Seek> ZipFile<'d, Io> { + pub fn new(io: &'d mut Io, info: &'d ZipFileInfo) -> ZipResult { + io.seek(SeekFrom::Start(info.header_pointer))?; + let buf = { + let mut buf = [0; 30]; + io.read(&mut buf)?; + buf + }; + if u32::from_le_bytes(buf[..4].try_into().unwrap()) != 0x04034b50 { + return Err(ZipError::InvalidFileHeaderSignature.into()); + } + let data_pointer = info.header_pointer + + 30 + + u16::from_le_bytes(buf[26..28].try_into().unwrap()) as u64 + + u16::from_le_bytes(buf[28..30].try_into().unwrap()) as u64; + io.seek(SeekFrom::Start(data_pointer))?; + + Ok(Self { + io, + info, + + bounds: (data_pointer, data_pointer + info.compressed_size), + cursor: data_pointer, + }) + } +} + +impl<'d, Io: Read> Read for ZipFile<'d, Io> { + fn read(&mut self, buf: &mut [u8]) -> IoResult { + let upper = buf.len().min((self.bounds.1 - self.cursor) as usize); + self.cursor += upper as u64; + self.io.read(&mut buf[..upper]) + } +} + +impl<'d, Io: Write> Write for ZipFile<'d, Io> { + fn write(&mut self, buf: &[u8]) -> IoResult { + todo!() + } + + fn flush(&mut self) -> IoResult<()> { + todo!() + } +} + +impl<'d, Io: Seek> Seek for ZipFile<'d, Io> { + fn seek(&mut self, pos: SeekFrom) -> IoResult { + self.cursor = match pos { + SeekFrom::Start(offset) => self.bounds.0 + offset, + SeekFrom::End(offset) => { + let cursor = self.bounds.1.saturating_add_signed(offset); + if cursor < self.bounds.0 { + return Err(IoError::new( + IoErrorKind::InvalidInput, + ZipError::NegativeFileOffset, + )); + } + cursor + } + SeekFrom::Current(offset) => { + let cursor = self.cursor.saturating_add_signed(offset); + if cursor < self.bounds.0 { + return Err(IoError::new( + IoErrorKind::InvalidInput, + ZipError::NegativeFileOffset, + )); + } + cursor + } + } + .min(self.bounds.1); + + Ok(self.io.seek(SeekFrom::Start(self.cursor))? - self.bounds.0) + } +} diff --git a/src/zip/file_info.rs b/src/zip/file_info.rs new file mode 100644 index 0000000..88322be --- /dev/null +++ b/src/zip/file_info.rs @@ -0,0 +1,163 @@ +use crate::driver::ArchiveFileInfo; +use crate::zip::{ZipError, ZipResult}; +use chrono::{DateTime, Local}; + +#[derive(Debug)] +pub enum CompressionMethod { + Store, + Deflate, + BZIP2, + LZMA, + ZStd, + XZ, +} + +impl CompressionMethod { + pub(crate) fn from_struct_id(id: u16) -> ZipResult { + match id { + 0 => Ok(Self::Store), + 8 => Ok(Self::Deflate), + 12 => Ok(Self::BZIP2), + 14 => Ok(Self::LZMA), + 93 => Ok(Self::ZStd), + 95 => Ok(Self::XZ), + 1..=7 | 9..=11 | 13 | 15..=20 | 94 | 96..=99 => { + Err(ZipError::UnsupportedCompressionMethod.into()) + } + 21..=92 | 100.. => Err(ZipError::InvalidCompressionMethod.into()), + } + } +} + +#[derive(Debug)] +pub struct BitFlag { + flag: u16, +} + +pub mod bit { + #[derive(Debug, PartialEq, Eq)] + pub enum DeflateMode { + Normal, + Maximum, + Fast, + SuperFast, + } +} + +macro_rules! get_set_bit_flag { + {$($get:ident $set:ident $bit:expr)+} => { + $( + pub fn $get(&self) -> bool { + self.get_bit($bit) + } + + pub fn $set(&mut self, enable: bool) { + self.set_bit($bit, enable); + } + )* + }; +} + +impl BitFlag { + pub fn new(flag: u16) -> Self { + Self { flag } + } + + #[inline] + fn get_bit(&self, bit: u32) -> bool { + (self.flag & 2u16.pow(bit)) > 0 + } + + #[inline] + fn set_bit(&mut self, bit: u32, enable: bool) { + if enable { + self.flag |= 2u16.pow(bit); + } else { + self.flag &= !2u16.pow(bit); + } + } + + pub fn deflate_mode(&self) -> bit::DeflateMode { + match self.flag & 6 { + 0 => bit::DeflateMode::Normal, + 2 => bit::DeflateMode::Maximum, + 4 => bit::DeflateMode::Fast, + 6 => bit::DeflateMode::SuperFast, + _ => panic!("impossible"), + } + } + + pub fn set_deflate_mode(&mut self, mode: bit::DeflateMode) { + match mode { + bit::DeflateMode::Normal => { + self.set_bit(1, false); + self.set_bit(2, false); + } + bit::DeflateMode::Maximum => { + self.set_bit(1, true); + self.set_bit(2, false); + } + bit::DeflateMode::Fast => { + self.set_bit(1, false); + self.set_bit(2, true); + } + bit::DeflateMode::SuperFast => { + self.set_bit(1, true); + self.set_bit(2, true); + } + } + } + + get_set_bit_flag! { + is_encrypted set_encrypted 0 + is_imploding_8k set_imploding_8k 1 + is_imploding_3sf_trees set_imploding_3sf_trees 2 + is_lzma_has_eos_marker set_lzma_has_eos_marker 1 + is_has_data_descriptor set_has_data_descriptor 3 + is_patched_data set_patched_data 5 + is_strong_encryption set_strong_encryption 6 + is_utf8 set_utf8 11 + is_cd_encryption set_cd_encryption 13 + } +} + +#[derive(Debug)] +pub struct ZipFileInfo { + pub compression_method: CompressionMethod, + pub bit_flag: BitFlag, + pub datetime: DateTime, + pub crc: u32, + pub compressed_size: u64, + pub size: u64, + pub header_pointer: u64, + pub name: String, + pub comment: String, +} + +impl ZipFileInfo { + pub fn new( + compression_method: CompressionMethod, + bit_flag: BitFlag, + datetime: DateTime, + crc: u32, + compressed_size: u64, + size: u64, + header_pointer: u64, + name: String, + comment: String, + ) -> Self { + Self { + compression_method, + bit_flag, + datetime, + crc, + compressed_size, + size, + header_pointer, + name, + comment, + } + } +} + +impl ArchiveFileInfo for ZipFileInfo {} diff --git a/src/zip/mod.rs b/src/zip/mod.rs index 89d748b..3fe8384 100644 --- a/src/zip/mod.rs +++ b/src/zip/mod.rs @@ -1,12 +1,14 @@ mod archive; mod driver; mod error; -mod file; +mod file_driver; +mod file_info; mod structs; pub use driver::Zip; pub use error::{ZipError, ZipResult}; -pub use file::{bit, BitFlag, CompressionMethod, ZipFile}; +pub use file_driver::ZipFile; +pub use file_info::{bit, BitFlag, CompressionMethod, ZipFileInfo}; #[cfg(test)] mod tests; diff --git a/src/zip/tests.rs b/src/zip/tests.rs index d64e626..05e076d 100644 --- a/src/zip/tests.rs +++ b/src/zip/tests.rs @@ -1,4 +1,4 @@ -use crate::zip::file::{bit::DeflateMode, BitFlag}; +use crate::zip::file_info::{bit::DeflateMode, BitFlag}; #[test] fn test_bit_flag() { -- cgit v1.2.3