From 9003b81813ff171edfc6101868c226c5c7d1957c Mon Sep 17 00:00:00 2001 From: Igor Tolmachov Date: Fri, 8 Sep 2023 17:33:59 +0900 Subject: Add basic zip reader --- src/zip/datatypes.rs | 278 ++++++++++++++++----------------------------------- src/zip/file.rs | 100 ++++++++++++++++-- src/zip/io.rs | 127 +++++++++++++++++++++-- 3 files changed, 293 insertions(+), 212 deletions(-) (limited to 'src/zip') diff --git a/src/zip/datatypes.rs b/src/zip/datatypes.rs index b280e5d..5ce1045 100644 --- a/src/zip/datatypes.rs +++ b/src/zip/datatypes.rs @@ -1,9 +1,11 @@ -use crate::datatypes::{utils, ArchiveDatatype}; +pub use crate::datatypes::ArchiveDatatype; use crate::result::{ArchiveError, ArchiveResult}; -use std::io::{Read, Write}; +use crate::utils::{archive_datatype, ReadHelper}; +use std::io::{Read, Seek, SeekFrom}; -utils::create_archive_datatype! { +archive_datatype! { pub struct LocalFileHeader { + [const] SIGNATURE: u32 = 0x04034b50, signature: u32, version_needed: u16, general_purpose_bit_flag: u16, @@ -16,109 +18,28 @@ utils::create_archive_datatype! { file_name_length: u16, extra_field_length: u16, } - - let file_name: String { - size: file_name_length, - read: utils::read_string, - write: utils::write_string, - } - - let extra_field: Vec { - size: file_name_length, - read: utils::vec, - write: utils::vec, - } - - const { - SIGNATURE: u32 = 0x04034b50; - } - - read { - if signature != Self::SIGNATURE { - return Err(ArchiveError::WrongSignature { expected: Self::SIGNATURE, received: signature }) - } - } } -pub struct DataDescriptor { - pub signature: Option, - pub crc32: u32, - pub compressed_size: u32, - pub uncompressed_size: u32, -} - -impl DataDescriptor { - const SIGNATURE: u32 = 0x04034b50; -} - -impl ArchiveDatatype for DataDescriptor { - const SIZE: usize = 12; - - fn read(mut reader: impl Read) -> ArchiveResult { - let mut buf = [0; Self::SIZE]; - reader.read(&mut buf)?; - - let signature = u32::from_le_bytes([buf[0], buf[1], buf[2], buf[3]]); - if signature == Self::SIGNATURE { - return Ok(Self { - signature: Some(signature), - crc32: u32::from_le_bytes([buf[4], buf[5], buf[6], buf[7]]), - compressed_size: u32::from_le_bytes([buf[8], buf[9], buf[10], buf[11]]), - uncompressed_size: { - let mut buf = [0; 4]; - reader.read(&mut buf)?; - u32::from_le_bytes([buf[0], buf[1], buf[2], buf[3]]) - }, - }); - } else { - return Ok(Self { - signature: None, - crc32: signature, - compressed_size: u32::from_le_bytes([buf[4], buf[5], buf[6], buf[7]]), - uncompressed_size: u32::from_le_bytes([buf[8], buf[9], buf[10], buf[11]]), - }); - } - } - - fn write(&self, mut writer: impl Write) -> ArchiveResult<()> { - writer.write( - &[ - Self::SIGNATURE.to_le_bytes(), - self.crc32.to_le_bytes(), - self.compressed_size.to_le_bytes(), - self.uncompressed_size.to_le_bytes(), - ] - .concat(), - )?; - Ok(()) +archive_datatype! { + pub struct DataDescriptor { + [const] SIGNATURE: u32 = 0x08074b50, + crc32: u32, + compressed_size: u32, + uncompressed_size: u32, } } -utils::create_archive_datatype! { +archive_datatype! { pub struct ArchiveExtraDataRecord { + [const] SIGNATURE: u32 = 0x08064b50, signature: u32, extra_field_length: u32, } - - let extra_field: Vec { - size: extra_field_length, - read: utils::vec, - write: utils::vec, - } - - const { - SIGNATURE: u32 = 0x08064b50; - } - - read { - if signature != Self::SIGNATURE { - return Err(ArchiveError::WrongSignature { expected: Self::SIGNATURE, received: signature }) - } - } } -utils::create_archive_datatype! { +archive_datatype! { pub struct CentralDirectoryRecord { + [const] SIGNATURE: u32 = 0x02014b50, signature: u32, version_made_by: u16, version_needed: u16, @@ -135,136 +56,111 @@ utils::create_archive_datatype! { disk_number: u16, internal_file_attributes: u16, external_file_attributes: u32, - relative_header_offset: u32, - } - - let file_name: String { - size: file_name_length, - read: utils::read_string, - write: utils::write_string, - } - - let extra_field: Vec { - size: external_file_attributes, - read: utils::vec, - write: utils::vec, - } - - let file_comment: String { - size: file_comment_length, - read: utils::read_string, - write: utils::write_string, - } - - const { - SIGNATURE: u32 = 0x02014b50; - } - - read { - if signature != Self::SIGNATURE { - return Err(ArchiveError::WrongSignature { expected: Self::SIGNATURE, received: signature }) - } + header_offset: u32, } } -utils::create_archive_datatype! { +archive_datatype! { pub struct DigitalSignature{ + [const] SIGNATURE: u32 = 0x05054b50, signature: u32, data_size: u16, } - - let data: Vec { - size: data_size, - read: utils::vec, - write: utils::vec, - } - - const { - SIGNATURE: u32 = 0x05054b50; - } - - read { - if signature != Self::SIGNATURE { - return Err(ArchiveError::WrongSignature { expected: Self::SIGNATURE, received: signature }) - } - } } -utils::create_archive_datatype! { +archive_datatype! { pub struct Zip64EndOfCentralDirectoryRecord { + [const] SIGNATURE: u32 = 0x06064b50, signature: u32, - size_of_struct: u64, + size_of_zip64_eocd: u64, version_made_by: u16, version_needed: u16, - disk_number: u32, - disk_number_where_starts_central_directory: u32, - entries_in_central_directory_on_disk: u64, - entries_in_central_directory: u64, - size_of_central_directory_records: u64, - offset_of_central_directory_entries: u64, - } - - let extensible_data: Vec { - size: size_of_struct - Self::SIZE as u64 + 12, - read: utils::vec, - write: utils::vec, + disk: u32, + disk_where_starts_cd: u32, + entries_in_cd_on_disk: u64, + entries_in_cd: u64, + size_of_cd_records: u64, + offset_of_cd_entries: u64, } +} - const { - SIGNATURE: u32 = 0x08064b50; - } +impl Zip64EndOfCentralDirectoryRecord { + pub fn find( + mut reader: impl Read + Seek, + eocd_offset: u64, + ) -> ArchiveResult)>> { + let locator_offset = eocd_offset - Zip64EndOfCentralDirectoryLocator::SIZE as u64; + reader.seek(SeekFrom::Start(locator_offset))?; + + let locator = Zip64EndOfCentralDirectoryLocator::read(&mut reader)?; + if locator.signature != Zip64EndOfCentralDirectoryLocator::SIGNATURE { + return Ok(None); + } - read { - if signature != Self::SIGNATURE { - return Err(ArchiveError::WrongSignature { expected: Self::SIGNATURE, received: signature }) + reader.seek(SeekFrom::Start(locator.offset_of_zip64_eocd))?; + let eocd64 = Self::read(&mut reader)?; + if eocd64.signature != Self::SIGNATURE { + return Err(ArchiveError::IncorrectSignature { + expected: Self::SIGNATURE, + received: eocd64.signature, + }); } + + let ext_data = reader.read2vec(eocd64.size_of_zip64_eocd as usize + 12 - Self::SIZE)?; + Ok(Some((locator.offset_of_zip64_eocd, eocd64, ext_data))) } } -utils::create_archive_datatype! { +archive_datatype! { pub struct Zip64EndOfCentralDirectoryLocator { + [const] SIGNATURE: u32 = 0x07064b50, signature: u32, - disk_number_where_starts_zip64_eocd: u32, - relative_offset_of_zip64_eocd: u64, + disk_where_starts_zip64_eocd: u32, + offset_of_zip64_eocd: u64, total_disks_number: u32, } - - const { - SIGNATURE: u32 = 0x07064b50; - } - - read { - if signature != Self::SIGNATURE { - return Err(ArchiveError::WrongSignature { expected: Self::SIGNATURE, received: signature }) - } - } } -utils::create_archive_datatype! { +archive_datatype! { pub struct EndOfCentralDirectoryRecord { + [const] SIGNATURE: u32 = 0x06054b50, signature: u32, - disk_number: u16, - disk_number_where_starts_central_directory: u16, - entries_in_central_directory_on_disk: u16, - entries_in_central_directory: u16, - size_of_central_directory_records: u32, - offset_of_central_directory_entries: u32, + disk: u16, + disk_where_starts_cd: u16, + entries_in_cd_on_disk: u16, + entries_in_cd: u16, + size_of_cd_records: u32, + offset_of_cd_entries: u32, comment_length: u16, } +} - let comment: String { - size: comment_length, - read: utils::read_string, - write: utils::write_string, - } +impl EndOfCentralDirectoryRecord { + pub fn find(mut reader: impl Read + Seek) -> ArchiveResult<(u64, Self, String)> { + let file_size = reader.seek(SeekFrom::End(0))? as usize; + let limit: usize = (u16::MAX as usize + Self::SIZE).min(file_size); + let mut buf = vec![0; limit]; - const { - SIGNATURE: u32 = 0x06054b50; - } + reader.seek(SeekFrom::End(-(limit as i64)))?; + reader.read(&mut buf)?; + + for byte in 0..limit - 4 { + if u32::from_le_bytes(buf[byte..byte + 4].try_into().unwrap()) == Self::SIGNATURE { + let eocd = Self::parse(buf[byte..byte + Self::SIZE].try_into().unwrap()); - read { - if signature != Self::SIGNATURE { - return Err(ArchiveError::WrongSignature { expected: Self::SIGNATURE, received: signature }) + let comment = String::from_utf8( + buf[byte + Self::SIZE..byte + Self::SIZE + eocd.comment_length as usize].into(), + ) + .map_err(|_| ArchiveError::IncorrectString { + location: "archive_comment", + })?; + + return Ok(((file_size - limit + byte) as u64, eocd, comment)); + } } + + Err(ArchiveError::BadArchive { + reason: "end of central directory record not found", + }) } } diff --git a/src/zip/file.rs b/src/zip/file.rs index dba8d06..261390f 100644 --- a/src/zip/file.rs +++ b/src/zip/file.rs @@ -1,44 +1,124 @@ use crate::file::{ArchiveFile, ArchiveFileRead, ArchiveFileWrite}; +use crate::result::{ArchiveError, ArchiveResult}; +use chrono::NaiveDateTime; use std::io::{Read, Write}; -pub struct FileInfo {} +pub struct GeneralPurposeBitFlag {} -pub struct FileReader {} +pub enum CompressionMethod { + Store, + Deflate, + Bzip2, + LZMA, + Zstandard, +} + +impl TryFrom for CompressionMethod { + type Error = ArchiveError; + + fn try_from(value: u16) -> ArchiveResult { + Ok(match value { + 0 => Self::Store, + 8 => Self::Deflate, + 12 => Self::Bzip2, + 14 => Self::LZMA, + 93 | 20 => Self::Zstandard, + _ => return Err(ArchiveError::UnsupportedCompressionMethod { method: value }), + }) + } +} -pub struct FileWriter {} +pub struct FileInfo { + number: u64, + version_made_by: u16, + version_needed: u16, + general_purpose_bit_flag: GeneralPurposeBitFlag, + compression_method: CompressionMethod, + file_modified_at: NaiveDateTime, + crc32: u32, + compressed_size: u64, + uncompressed_size: u64, + name: String, + comment: String, + header_offset: u64, +} + +impl FileInfo { + pub fn new( + number: u64, + version_made_by: u16, + version_needed: u16, + general_purpose_bit_flag: GeneralPurposeBitFlag, + compression_method: CompressionMethod, + file_modified_at: NaiveDateTime, + crc32: u32, + compressed_size: u64, + uncompressed_size: u64, + name: String, + comment: String, + header_offset: u64, + ) -> Self { + Self { + number, + version_made_by, + version_needed, + general_purpose_bit_flag, + compression_method, + file_modified_at, + crc32, + compressed_size, + uncompressed_size, + name, + comment, + header_offset, + } + } +} + +pub struct FileReader { + info: FileInfo, +} impl ArchiveFile for FileReader { type Info = FileInfo; - fn info() -> Self::Info { - Self::Info {} + fn new(info: Self::Info) -> Self { + Self { info } } } impl Read for FileReader { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { - return Ok(0); + todo!() } } impl ArchiveFileRead for FileReader {} +impl FileReader {} + +pub struct FileWriter { + info: FileInfo, +} + impl ArchiveFile for FileWriter { type Info = FileInfo; - fn info() -> Self::Info { - Self::Info {} + fn new(info: Self::Info) -> Self { + Self { info } } } impl Write for FileWriter { fn write(&mut self, buf: &[u8]) -> std::io::Result { - return Ok(0); + todo!() } fn flush(&mut self) -> std::io::Result<()> { - Ok(()) + todo!() } } impl ArchiveFileWrite for FileWriter {} + +impl FileWriter {} diff --git a/src/zip/io.rs b/src/zip/io.rs index b79ad0d..c41607f 100644 --- a/src/zip/io.rs +++ b/src/zip/io.rs @@ -1,12 +1,18 @@ +use super::datatypes::*; use super::file::{FileInfo, FileReader, FileWriter}; use crate::io::{ArchiveRead, ArchiveWrite}; -use crate::result::ArchiveResult; +use crate::result::{ArchiveError, ArchiveResult}; +use crate::utils::ReadHelper; +use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; +use std::collections::HashMap; use std::fs::File; use std::io::Read; -use std::io::{Seek, Write}; +use std::io::{Seek, SeekFrom, Write}; pub struct Reader { reader: R, + files: HashMap, + comment: String, } impl ArchiveRead for Reader { @@ -14,16 +20,115 @@ impl ArchiveRead for Reader { type FileInfo = FileInfo; type FileReader = FileReader; - fn new(reader: Self::Reader) -> ArchiveResult { - Ok(Self { reader }) + fn new(mut reader: Self::Reader) -> ArchiveResult { + let cd_size: u64; + let cd_offset: u64; + let cd_entries: u64; + + let (eocd_offset, eocd, comment) = EndOfCentralDirectoryRecord::find(&mut reader)?; + if let Some((eocd64_offset, eocd64, eocd64_extensible)) = + Zip64EndOfCentralDirectoryRecord::find(&mut reader, eocd_offset)? + { + cd_size = eocd64.size_of_cd_records; + cd_offset = eocd64.offset_of_cd_entries; + cd_entries = eocd64.entries_in_cd; + } else { + cd_size = eocd.size_of_cd_records as u64; + cd_offset = eocd.offset_of_cd_entries as u64; + cd_entries = eocd.entries_in_cd as u64; + } + + let mut buf = vec![0; cd_size as usize]; + reader.seek(SeekFrom::Start(cd_offset))?; + reader.read(&mut buf)?; + + let mut buf: &[u8] = &buf; + let mut files = HashMap::with_capacity(cd_entries as usize); + + for entry_number in 0..cd_entries { + let cd = CentralDirectoryRecord::read(&mut buf)?; + + let file_name = String::from_utf8(buf.read2vec(cd.file_name_length as usize)?).or( + Err(ArchiveError::IncorrectString { + location: "file_name", + }), + )?; + let mut extra_field: &[u8] = &buf.read2vec(cd.extra_field_length as usize)?; + let file_comment = String::from_utf8(buf.read2vec(cd.file_comment_length as usize)?) + .or(Err(ArchiveError::IncorrectString { + location: "file_comment", + }))?; + + let mut uncompressed_size: u64 = cd.uncompressed_size as u64; + let mut compressed_size: u64 = cd.compressed_size as u64; + let mut header_offset: u64 = cd.header_offset as u64; + + while extra_field.len() > 0 { + let header = u16::from_le_bytes(extra_field.read2buf()?); + let size = u16::from_le_bytes(extra_field.read2buf()?); + let mut data: &[u8] = &extra_field.read2vec(size as usize)?; + + match header { + 0x0001 => { + if uncompressed_size == 0xFFFFFFFF { + uncompressed_size = u64::from_le_bytes(data.read2buf()?); + } + if compressed_size == 0xFFFFFFFF { + compressed_size = u64::from_le_bytes(data.read2buf()?); + } + if header_offset == 0xFFFFFFFF { + header_offset = u64::from_le_bytes(data.read2buf()?); + } + } + _ => {} + }; + } + + let year = ((cd.last_mod_file_date >> 9) & 0x7F) + 1980; + let month = (cd.last_mod_file_date >> 5) & 0xF; + let day = cd.last_mod_file_date & 0x1F; + + let hour = (cd.last_mod_file_time >> 11) & 0x1F; + let min = (cd.last_mod_file_time >> 5) & 0x3F; + let sec = (cd.last_mod_file_time & 0x1F) * 2; + + files.insert( + file_name.clone(), + FileInfo::new( + entry_number, + cd.version_made_by, + cd.version_needed, + super::file::GeneralPurposeBitFlag {}, + cd.compression_method.try_into()?, + NaiveDateTime::new( + NaiveDate::from_ymd_opt(year as i32, month as u32, day as u32) + .ok_or(ArchiveError::IncorrectDate { year, month, day })?, + NaiveTime::from_hms_opt(hour as u32, min as u32, sec as u32) + .ok_or(ArchiveError::IncorrectTime { hour, min, sec })?, + ), + cd.crc32, + compressed_size, + uncompressed_size, + file_name, + file_comment, + header_offset, + ), + ); + } + + Ok(Self { + reader, + files, + comment, + }) } fn files(&self) -> ArchiveResult> { - Ok(Vec::new()) + todo!() } - fn file_reader(&self, name: &str) -> ArchiveResult { - Ok(Self::FileReader {}) + fn open_file(&self, name: &str) -> ArchiveResult { + todo!() } } @@ -33,17 +138,17 @@ pub struct Writer { writer: W, } -impl ArchiveWrite for Writer { +impl ArchiveWrite for Writer { type Writer = W; type FileInfo = FileInfo; type FileWriter = FileWriter; fn new(writer: Self::Writer) -> ArchiveResult { - Ok(Self { writer }) + todo!() } - fn file_writer(&self, name: &str) -> ArchiveResult { - Ok(Self::FileWriter {}) + fn create_file(&self, name: &str) -> ArchiveResult { + todo!() } } -- cgit v1.2.3