From f8c3c93824645a807d28b760855b4676ea479720 Mon Sep 17 00:00:00 2001 From: Igor Tolmachev Date: Sat, 15 Jun 2024 03:30:50 +0900 Subject: Add simple zip reader --- .vscode/settings.json | 7 +-- Cargo.toml | 2 + src/archive.rs | 20 +++++++ src/driver/driver.rs | 25 +++++++++ src/driver/file.rs | 1 + src/driver/mod.rs | 5 ++ src/error.rs | 35 ++++++++++++ src/lib.rs | 10 ++++ src/structs/de.rs | 0 src/structs/mod.rs | 2 + src/structs/ser.rs | 6 +++ src/zip/driver.rs | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/zip/error.rs | 58 ++++++++++++++++++++ src/zip/file.rs | 45 ++++++++++++++++ src/zip/mod.rs | 7 +++ src/zip/structs.rs | 52 ++++++++++++++++++ tests/usage.rs | 19 +++++++ 17 files changed, 434 insertions(+), 3 deletions(-) create mode 100644 src/archive.rs create mode 100644 src/driver/driver.rs create mode 100644 src/driver/file.rs create mode 100644 src/driver/mod.rs create mode 100644 src/error.rs create mode 100644 src/structs/de.rs create mode 100644 src/structs/mod.rs create mode 100644 src/structs/ser.rs create mode 100644 src/zip/driver.rs create mode 100644 src/zip/error.rs create mode 100644 src/zip/file.rs create mode 100644 src/zip/mod.rs create mode 100644 src/zip/structs.rs create mode 100644 tests/usage.rs diff --git a/.vscode/settings.json b/.vscode/settings.json index c36ecad..193c892 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,11 +1,12 @@ { "cSpell.words": [ "archivator", + "bincode", "chrono", "datatypes", - "eocd", - "LZMA", - "Zstandard" + "datetime", + "eocdr", + "rposition" ], "rust-analyzer.linkedProjects": ["./Cargo.toml", "./Cargo.toml"] } diff --git a/Cargo.toml b/Cargo.toml index 93d39e4..d511387 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,4 +12,6 @@ categories = ["compression", "filesystem"] exclude = [".vscode/"] [dependencies] +bincode = "1.3.3" chrono = "0.4.29" +serde = { version = "1.0.203", features = ["derive"] } diff --git a/src/archive.rs b/src/archive.rs new file mode 100644 index 0000000..a422f9e --- /dev/null +++ b/src/archive.rs @@ -0,0 +1,20 @@ +use crate::driver::{ArchiveRead, ArchiveWrite, Driver}; +use crate::ArchiveResult; +use std::io::{Read, Write}; + +pub struct Archive { + pub(crate) driver: D, +} + +impl Archive +where + D::IO: std::io::Read, +{ + pub fn new(io: D::IO) -> ArchiveResult { + Ok(Self { + driver: D::read(io)?, + }) + } +} + +impl Archive where D::IO: Read + Write {} diff --git a/src/driver/driver.rs b/src/driver/driver.rs new file mode 100644 index 0000000..3a8ed16 --- /dev/null +++ b/src/driver/driver.rs @@ -0,0 +1,25 @@ +use crate::driver::ArchiveFile; +use crate::ArchiveResult; +use std::error::Error; +use std::io::{Read, Write}; + +pub trait Driver: Sized { + type Error: Error; + + type IO; + type File: ArchiveFile; +} + +pub trait ArchiveRead: Driver +where + Self::IO: Read, +{ + // Create driver instance + fn read(io: Self::IO) -> ArchiveResult; +} + +pub trait ArchiveWrite: ArchiveRead +where + Self::IO: Read + Write, +{ +} diff --git a/src/driver/file.rs b/src/driver/file.rs new file mode 100644 index 0000000..a4974f3 --- /dev/null +++ b/src/driver/file.rs @@ -0,0 +1 @@ +pub trait ArchiveFile {} diff --git a/src/driver/mod.rs b/src/driver/mod.rs new file mode 100644 index 0000000..36ee6b5 --- /dev/null +++ b/src/driver/mod.rs @@ -0,0 +1,5 @@ +mod driver; +mod file; + +pub use driver::{ArchiveRead, ArchiveWrite, Driver}; +pub use file::ArchiveFile; diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..6d7aba4 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,35 @@ +use std::error::Error; +use std::fmt::Display; +use std::io; + +pub type ArchiveResult = Result>; + +#[derive(Debug)] +pub enum ArchiveError { + IO(io::Error), + Driver { name: &'static str, error: E }, +} + +impl From for ArchiveError { + fn from(value: io::Error) -> Self { + Self::IO(value) + } +} + +impl Display for ArchiveError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ArchiveError::IO(error) => write!(f, "{error}"), + ArchiveError::Driver { name, error } => write!(f, "{name}: {error}"), + } + } +} + +impl Error for ArchiveError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + match self { + Self::IO(error) => Some(error), + _ => None, + } + } +} diff --git a/src/lib.rs b/src/lib.rs index e69de29..236c58d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -0,0 +1,10 @@ +mod archive; +mod error; +mod structs; + +pub mod driver; +pub mod zip; + +pub use archive::Archive; +pub use error::{ArchiveError, ArchiveResult}; +pub use zip::Zip; diff --git a/src/structs/de.rs b/src/structs/de.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/structs/mod.rs b/src/structs/mod.rs new file mode 100644 index 0000000..d7e9daa --- /dev/null +++ b/src/structs/mod.rs @@ -0,0 +1,2 @@ +mod de; +mod ser; diff --git a/src/structs/ser.rs b/src/structs/ser.rs new file mode 100644 index 0000000..a3fe291 --- /dev/null +++ b/src/structs/ser.rs @@ -0,0 +1,6 @@ +use crate::ArchiveError; +use serde::{ser, Serialize}; + +pub struct ArchiveSerializer { + bin: Vec, +} diff --git a/src/zip/driver.rs b/src/zip/driver.rs new file mode 100644 index 0000000..733d44b --- /dev/null +++ b/src/zip/driver.rs @@ -0,0 +1,143 @@ +use crate::driver::{ArchiveRead, ArchiveWrite, Driver}; +use crate::zip::error::{ZipError, ZipResult}; +use crate::zip::structs::{EOCDR64Locator, CDR, EOCDR, EOCDR64}; +use crate::zip::ZipFile; +use std::collections::HashMap as Map; +use std::fs::File; +use std::io::{Read, Seek, SeekFrom, Write}; + +pub struct Zip { + io: IO, + + files: Map, + comment: String, +} + +impl Driver for Zip { + type Error = ZipError; + + type IO = IO; + type File = ZipFile; +} + +impl ArchiveRead for Zip { + fn read(mut io: Self::IO) -> ZipResult { + // Search eocdr + let limit = 65557.min(io.seek(SeekFrom::End(0))?) as i64; + let start = io.seek(SeekFrom::End(-limit))?; + let pos = start + { + let mut buf = vec![0; limit as usize]; + io.read(&mut buf)?; + buf[..buf.len() - 18] + .windows(4) + .rposition(|v| u32::from_le_bytes(v.try_into().unwrap()) == 0x06054b50) + .ok_or(ZipError::EOCDRNotFound)? as u64 + }; + + // Read eocdr + io.seek(SeekFrom::Start(pos + 4))?; + let buf = { + let mut buf = [0; 18]; + io.read(&mut buf)?; + buf + }; + let eocdr: EOCDR = bincode::deserialize(&buf).map_err(|_| ZipError::InvalidEOCDR)?; + let comment = { + let mut buf = vec![0; eocdr.comment_len as usize]; + io.read(&mut buf)?; + String::from_utf8(buf).map_err(|_| ZipError::InvalidArchiveComment)? + }; + + // Try to find eocdr64locator + io.seek(SeekFrom::Start(pos - 20))?; + let buf = { + let mut buf = [0; 20]; + io.read(&mut buf)?; + buf + }; + let (cd_pointer, cd_size, cd_records) = + if u32::from_le_bytes(buf[0..4].try_into().unwrap()) == 0x07064b50 { + let eocdr64locator: EOCDR64Locator = + bincode::deserialize(&buf[4..]).map_err(|_| ZipError::InvalidEOCDR64Locator)?; + + io.seek(SeekFrom::Start(eocdr64locator.eocdr64_pointer))?; + let buf = { + let mut buf = [0; 56]; + io.read(&mut buf)?; + buf + }; + if u32::from_le_bytes(buf[0..4].try_into().unwrap()) != 0x06064b50 { + return Err(ZipError::InvalidEOCDR64Signature.into()); + } + let eocdr64: EOCDR64 = + bincode::deserialize(&buf[4..]).map_err(|_| ZipError::InvalidEOCDR64)?; + + (eocdr64.cd_pointer, eocdr64.cd_size, eocdr64.cd_records) + } else { + ( + eocdr.cd_pointer as u64, + eocdr.cd_size as u64, + eocdr.cd_records as u64, + ) + }; + + // Read cd records + let mut files = Map::with_capacity(cd_records as usize); + io.seek(SeekFrom::Start(cd_pointer))?; + let buf = { + let mut buf = vec![0; cd_size as usize]; + io.read(&mut buf)?; + buf + }; + let mut records = buf.as_slice(); + + for _ in 0..cd_records { + let buf = { + let mut buf = [0; 46]; + records.read(&mut buf)?; + buf + }; + + if u32::from_le_bytes(buf[0..4].try_into().unwrap()) != 0x02014b50 { + return Err(ZipError::InvalidCDRSignature.into()); + } + let cdr: CDR = bincode::deserialize(&buf[4..]).map_err(|_| ZipError::InvalidCDR)?; + let name = { + let mut buf = vec![0; cdr.name_len as usize]; + records.read(&mut buf)?; + String::from_utf8(buf).map_err(|_| ZipError::InvalidFileName)? + }; + let extra_fields = { + let mut buf = vec![0; cdr.extra_field_len as usize]; + records.read(&mut buf)?; + buf + }; + let comment = { + let mut buf = vec![0; cdr.comment_len as usize]; + records.read(&mut buf)?; + String::from_utf8(buf).map_err(|_| ZipError::InvalidFileComment)? + }; + + files.insert( + name.clone(), + ZipFile::new( + name, + cdr.dos_date, + cdr.dos_time, + cdr.compression_method, + cdr.compressed_size as u64, + cdr.size as u64, + comment, + ), + ); + } + + Ok(Self { io, files, comment }) + } +} + +impl Zip {} + +impl ArchiveWrite for Zip {} + +impl Zip {} diff --git a/src/zip/error.rs b/src/zip/error.rs new file mode 100644 index 0000000..ad1989a --- /dev/null +++ b/src/zip/error.rs @@ -0,0 +1,58 @@ +use crate::{ArchiveError, ArchiveResult}; +use std::error::Error; +use std::fmt::Display; + +pub type ZipResult = ArchiveResult; + +#[derive(Debug)] +pub enum ZipError { + EOCDRNotFound, + InvalidEOCDR, + InvalidArchiveComment, + + InvalidEOCDR64Locator, + InvalidEOCDR64Signature, + InvalidEOCDR64, + + InvalidCDRSignature, + InvalidCDR, + InvalidFileName, + InvalidFileComment, +} + +impl From for ArchiveError { + fn from(value: ZipError) -> Self { + return ArchiveError::Driver { + name: "Zip", + error: value, + }; + } +} + +impl Display for ZipError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ZipError::EOCDRNotFound => write!(f, "End of central directory record not found"), + ZipError::InvalidEOCDR => write!(f, "Invalid end of central directory record"), + ZipError::InvalidArchiveComment => write!(f, "Invalid archive comment"), + ZipError::InvalidEOCDR64Locator => { + write!(f, "Invalid zip64 end of central directory locator") + } + ZipError::InvalidEOCDR64Signature => { + write!( + f, + "Invalid signature of zip64 end of central directory record" + ) + } + ZipError::InvalidEOCDR64 => write!(f, "Invalid zip64 end of central directory record"), + ZipError::InvalidCDRSignature => { + write!(f, "Invalid signature of central directory record") + } + ZipError::InvalidCDR => write!(f, "Invalid central directory record"), + ZipError::InvalidFileName => write!(f, "Invalid file name"), + ZipError::InvalidFileComment => write!(f, "Invalid file comment"), + } + } +} + +impl Error for ZipError {} diff --git a/src/zip/file.rs b/src/zip/file.rs new file mode 100644 index 0000000..3b63c2a --- /dev/null +++ b/src/zip/file.rs @@ -0,0 +1,45 @@ +use crate::driver::ArchiveFile; +use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; + +pub struct ZipFile { + pub name: String, + pub datetime: NaiveDateTime, + pub compression_method: u16, + pub compressed_size: u64, + pub size: u64, + pub comment: String, +} + +impl ArchiveFile for ZipFile {} + +impl ZipFile { + pub fn new( + name: String, + dos_date: u16, + dos_time: u16, + compression_method: u16, + compressed_size: u64, + size: u64, + comment: String, + ) -> Self { + let year = (dos_date >> 9 & 0x7F) + 1980; + let month = dos_date >> 5 & 0xF; + let day = dos_date & 0x1F; + + let hour = (dos_time >> 11) & 0x1F; + let minute = (dos_time >> 5) & 0x3F; + let seconds = (dos_time & 0x1F) * 2; + + Self { + name, + datetime: NaiveDateTime::new( + NaiveDate::from_ymd_opt(year as i32, month as u32, day as u32).unwrap(), + NaiveTime::from_hms_opt(hour as u32, minute as u32, seconds as u32).unwrap(), + ), + compression_method, + compressed_size, + size, + comment, + } + } +} diff --git a/src/zip/mod.rs b/src/zip/mod.rs new file mode 100644 index 0000000..612a946 --- /dev/null +++ b/src/zip/mod.rs @@ -0,0 +1,7 @@ +mod driver; +mod error; +mod file; +mod structs; + +pub use driver::Zip; +pub use file::ZipFile; diff --git a/src/zip/structs.rs b/src/zip/structs.rs new file mode 100644 index 0000000..e38f9f0 --- /dev/null +++ b/src/zip/structs.rs @@ -0,0 +1,52 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize)] +pub struct EOCDR { + pub eocdr_disk: u16, + pub cd_disk: u16, + pub cd_disk_records: u16, + pub cd_records: u16, + pub cd_size: u32, + pub cd_pointer: u32, + pub comment_len: u16, +} + +#[derive(Serialize, Deserialize)] +pub struct EOCDR64Locator { + pub eocdr64_disk: u32, + pub eocdr64_pointer: u64, + pub disks: u32, +} + +#[derive(Serialize, Deserialize)] +pub struct EOCDR64 { + pub eocdr64_size: u64, + pub version: u16, + pub version_needed: u16, + pub eocdr64_disk: u32, + pub cd_disk: u32, + pub cd_disk_records: u64, + pub cd_records: u64, + pub cd_size: u64, + pub cd_pointer: u64, +} + +#[derive(Serialize, Deserialize)] +pub struct CDR { + pub version: u16, + pub version_needed: u16, + pub bit_flag: u16, + pub compression_method: u16, + pub dos_time: u16, + pub dos_date: u16, + pub crc32: u32, + pub compressed_size: u32, + pub size: u32, + pub name_len: u16, + pub extra_field_len: u16, + pub comment_len: u16, + pub disk: u16, + pub internal_attributes: u16, + pub external_attributes: u32, + pub header_pointer: u32, +} diff --git a/tests/usage.rs b/tests/usage.rs new file mode 100644 index 0000000..64f7050 --- /dev/null +++ b/tests/usage.rs @@ -0,0 +1,19 @@ +use archivator::{Archive, Zip}; +use std::fs::File; +use std::time::{SystemTime, UNIX_EPOCH}; + +fn time() -> f64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs_f64() +} + +#[test] +fn time_test() { + let file = File::open("tests/files/1M.zip").unwrap(); + + let start = time(); + let archive = Archive::::new(file).unwrap(); + println!("{}", time() - start); +} -- cgit v1.2.3