From 75f4a84e977a1f409e6580056dc31343e15bbf3e Mon Sep 17 00:00:00 2001 From: Igor Tolmachev Date: Sun, 14 Jul 2024 16:01:10 +0900 Subject: Add support of cp437 encoding --- src/zip/cp437.rs | 376 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/zip/driver.rs | 22 +++- src/zip/error.rs | 2 - src/zip/mod.rs | 1 + src/zip/tests.rs | 34 +++++ 5 files changed, 427 insertions(+), 8 deletions(-) create mode 100644 src/zip/cp437.rs (limited to 'src') diff --git a/src/zip/cp437.rs b/src/zip/cp437.rs new file mode 100644 index 0000000..e290ea6 --- /dev/null +++ b/src/zip/cp437.rs @@ -0,0 +1,376 @@ +pub trait FromCp437: Sized { + type Value; + + fn from_cp437(value: T) -> Self::Value; +} + +pub fn to_char(byte: u8) -> char { + char::from_u32(match byte { + 0..=127 => byte as u32, + 128 => 199, + 129 => 252, + 130 => 233, + 131 => 226, + 132 => 228, + 133 => 224, + 134 => 229, + 135 => 231, + 136 => 234, + 137 => 235, + 138 => 232, + 139 => 239, + 140 => 238, + 141 => 236, + 142 => 196, + 143 => 197, + 144 => 201, + 145 => 230, + 146 => 198, + 147 => 244, + 148 => 246, + 149 => 242, + 150 => 251, + 151 => 249, + 152 => 255, + 153 => 214, + 154 => 220, + 155 => 162, + 156 => 163, + 157 => 165, + 158 => 8359, + 159 => 402, + 160 => 225, + 161 => 237, + 162 => 243, + 163 => 250, + 164 => 241, + 165 => 209, + 166 => 170, + 167 => 186, + 168 => 191, + 169 => 8976, + 170 => 172, + 171 => 189, + 172 => 188, + 173 => 161, + 174 => 171, + 175 => 187, + 176 => 9617, + 177 => 9618, + 178 => 9619, + 179 => 9474, + 180 => 9508, + 181 => 9569, + 182 => 9570, + 183 => 9558, + 184 => 9557, + 185 => 9571, + 186 => 9553, + 187 => 9559, + 188 => 9565, + 189 => 9564, + 190 => 9563, + 191 => 9488, + 192 => 9492, + 193 => 9524, + 194 => 9516, + 195 => 9500, + 196 => 9472, + 197 => 9532, + 198 => 9566, + 199 => 9567, + 200 => 9562, + 201 => 9556, + 202 => 9577, + 203 => 9574, + 204 => 9568, + 205 => 9552, + 206 => 9580, + 207 => 9575, + 208 => 9576, + 209 => 9572, + 210 => 9573, + 211 => 9561, + 212 => 9560, + 213 => 9554, + 214 => 9555, + 215 => 9579, + 216 => 9578, + 217 => 9496, + 218 => 9484, + 219 => 9608, + 220 => 9604, + 221 => 9612, + 222 => 9616, + 223 => 9600, + 224 => 945, + 225 => 223, + 226 => 915, + 227 => 960, + 228 => 931, + 229 => 963, + 230 => 181, + 231 => 964, + 232 => 934, + 233 => 920, + 234 => 937, + 235 => 948, + 236 => 8734, + 237 => 966, + 238 => 949, + 239 => 8745, + 240 => 8801, + 241 => 177, + 242 => 8805, + 243 => 8804, + 244 => 8992, + 245 => 8993, + 246 => 247, + 247 => 8776, + 248 => 176, + 249 => 8729, + 250 => 183, + 251 => 8730, + 252 => 8319, + 253 => 178, + 254 => 9632, + 255 => 160, + }) + .unwrap() +} + +pub fn from_char(char: char) -> Option { + Some(match char as u32 { + 0..=127 => char as u8, + 160 => 255, + 161 => 173, + 162 => 155, + 163 => 156, + 165 => 157, + 170 => 166, + 171 => 174, + 172 => 170, + 176 => 248, + 177 => 241, + 178 => 253, + 181 => 230, + 183 => 250, + 186 => 167, + 187 => 175, + 188 => 172, + 189 => 171, + 191 => 168, + 196 => 142, + 197 => 143, + 198 => 146, + 199 => 128, + 201 => 144, + 209 => 165, + 214 => 153, + 220 => 154, + 223 => 225, + 224 => 133, + 225 => 160, + 226 => 131, + 228 => 132, + 229 => 134, + 230 => 145, + 231 => 135, + 232 => 138, + 233 => 130, + 234 => 136, + 235 => 137, + 236 => 141, + 237 => 161, + 238 => 140, + 239 => 139, + 241 => 164, + 242 => 149, + 243 => 162, + 244 => 147, + 246 => 148, + 247 => 246, + 249 => 151, + 250 => 163, + 251 => 150, + 252 => 129, + 255 => 152, + 402 => 159, + 915 => 226, + 920 => 233, + 931 => 228, + 934 => 232, + 937 => 234, + 945 => 224, + 948 => 235, + 949 => 238, + 960 => 227, + 963 => 229, + 964 => 231, + 966 => 237, + 8319 => 252, + 8359 => 158, + 8729 => 249, + 8730 => 251, + 8734 => 236, + 8745 => 239, + 8776 => 247, + 8801 => 240, + 8804 => 243, + 8805 => 242, + 8976 => 169, + 8992 => 244, + 8993 => 245, + 9472 => 196, + 9474 => 179, + 9484 => 218, + 9488 => 191, + 9492 => 192, + 9496 => 217, + 9500 => 195, + 9508 => 180, + 9516 => 194, + 9524 => 193, + 9532 => 197, + 9552 => 205, + 9553 => 186, + 9554 => 213, + 9555 => 214, + 9556 => 201, + 9557 => 184, + 9558 => 183, + 9559 => 187, + 9560 => 212, + 9561 => 211, + 9562 => 200, + 9563 => 190, + 9564 => 189, + 9565 => 188, + 9566 => 198, + 9567 => 199, + 9568 => 204, + 9569 => 181, + 9570 => 182, + 9571 => 185, + 9572 => 209, + 9573 => 210, + 9574 => 203, + 9575 => 207, + 9576 => 208, + 9577 => 202, + 9578 => 216, + 9579 => 215, + 9580 => 206, + 9600 => 223, + 9604 => 220, + 9608 => 219, + 9612 => 221, + 9616 => 222, + 9617 => 176, + 9618 => 177, + 9619 => 178, + 9632 => 254, + _ => return None, + }) +} + +pub fn is_cp437(char: char) -> bool { + match char as u32 { + 0..=127 => true, + 160..=163 => true, + 165 => true, + 170..=172 => true, + 176..=178 => true, + 181 => true, + 183 => true, + 186..=189 => true, + 191 => true, + 196..=199 => true, + 201 => true, + 209 => true, + 214 => true, + 220 => true, + 223..=226 => true, + 228..=239 => true, + 241..=244 => true, + 246..=247 => true, + 249..=252 => true, + 255 => true, + 402 => true, + 915 => true, + 920 => true, + 931 => true, + 934 => true, + 937 => true, + 945 => true, + 948..=949 => true, + 960 => true, + 963..=964 => true, + 966 => true, + 8319 => true, + 8359 => true, + 8729..=8730 => true, + 8734 => true, + 8745 => true, + 8776 => true, + 8801 => true, + 8804..=8805 => true, + 8976 => true, + 8992..=8993 => true, + 9472 => true, + 9474 => true, + 9484 => true, + 9488 => true, + 9492 => true, + 9496 => true, + 9500 => true, + 9508 => true, + 9516 => true, + 9524 => true, + 9532 => true, + 9552..=9580 => true, + 9600 => true, + 9604 => true, + 9608 => true, + 9612 => true, + 9616..=9619 => true, + 9632 => true, + _ => false, + } +} + +impl FromCp437> for String { + type Value = Self; + + fn from_cp437(bytes: Vec) -> Self { + Self::from_cp437(bytes.as_slice()) + } +} + +impl FromCp437<[u8; S]> for String { + type Value = Self; + + fn from_cp437(bytes: [u8; S]) -> Self { + Self::from_cp437(bytes.as_slice()) + } +} + +impl FromCp437<&[u8]> for String { + type Value = Self; + + fn from_cp437(bytes: &[u8]) -> Self { + bytes.iter().copied().map(to_char).collect() + } +} + +impl FromCp437<&str> for Vec { + type Value = Option; + + fn from_cp437(text: &str) -> Option { + let mut bytes = Vec::with_capacity(text.chars().count()); + for c in text.chars() { + bytes.push(from_char(c)?); + } + Some(bytes) + } +} diff --git a/src/zip/driver.rs b/src/zip/driver.rs index 99b409d..4782e65 100644 --- a/src/zip/driver.rs +++ b/src/zip/driver.rs @@ -1,5 +1,6 @@ use crate::driver::{ArchiveRead, ArchiveWrite, Driver}; use crate::utils::ReadUtils; +use crate::zip::cp437::FromCp437; use crate::zip::structs::{deserialize, Cdr, Eocdr, Eocdr64, Eocdr64Locator, ExtraHeader}; use crate::zip::{ BitFlag, CompressionMethod, ZipError, ZipFileInfo, ZipFileReader, ZipFileWriter, ZipResult, @@ -77,7 +78,7 @@ impl ArchiveRead for Zip { let comment = { let mut buf: Vec = vec![0; eocdr.comment_len as usize]; io.read(&mut buf)?; - String::from_utf8(buf).map_err(|_| ZipError::InvalidArchiveComment)? + String::from_cp437(buf) }; // Try to find eocdr64locator @@ -116,14 +117,23 @@ impl ArchiveRead for Zip { } p += 4; let cdr: Cdr = deserialize(&buf[p..p + 42]).unwrap(); + let bit_flag = BitFlag::new(cdr.bit_flag); p += 42; - let name = String::from_utf8(buf[p..p + cdr.name_len as usize].into()) - .map_err(|_| ZipError::InvalidFileName)?; + let name = if bit_flag.is_utf8() { + String::from_utf8(buf[p..p + cdr.name_len as usize].to_vec()) + .map_err(|_| ZipError::InvalidFileName)? + } else { + String::from_cp437(&buf[p..p + cdr.name_len as usize]) + }; p += cdr.name_len as usize; let extra_fields: Vec = buf[p..p + cdr.extra_field_len as usize].into(); p += cdr.extra_field_len as usize; - let comment = String::from_utf8(buf[p..p + cdr.comment_len as usize].into()) - .map_err(|_| ZipError::InvalidFileComment)?; + let comment = if bit_flag.is_utf8() { + String::from_utf8(buf[p..p + cdr.comment_len as usize].to_vec()) + .map_err(|_| ZipError::InvalidFileComment)? + } else { + String::from_cp437(&buf[p..p + cdr.comment_len as usize]) + }; p += cdr.comment_len as usize; let mut compressed_size = cdr.compressed_size as u64; @@ -209,7 +219,7 @@ impl ArchiveRead for Zip { name.clone(), ZipFileInfo::new( CompressionMethod::from_struct_id(cdr.compression_method)?, - BitFlag::new(cdr.bit_flag), + bit_flag, mtime, atime, ctime, diff --git a/src/zip/error.rs b/src/zip/error.rs index c77370b..3eb68b8 100644 --- a/src/zip/error.rs +++ b/src/zip/error.rs @@ -11,7 +11,6 @@ pub enum ZipError { InvalidFileHeaderSignature, InvalidCDRSignature, - InvalidArchiveComment, InvalidCompressionMethod, UnsupportedCompressionMethod, InvalidDate, @@ -49,7 +48,6 @@ impl Display for ZipError { write!(f, "Invalid signature of central directory record") } - Self::InvalidArchiveComment => write!(f, "Invalid archive comment"), Self::InvalidCompressionMethod => writeln!(f, "Invalid compression method"), Self::UnsupportedCompressionMethod => writeln!(f, "Unsupported compression method"), Self::InvalidDate => write!(f, "Invalid date"), diff --git a/src/zip/mod.rs b/src/zip/mod.rs index 0f19824..bcc34ed 100644 --- a/src/zip/mod.rs +++ b/src/zip/mod.rs @@ -1,4 +1,5 @@ mod archive; +mod cp437; mod driver; mod error; mod file; diff --git a/src/zip/tests.rs b/src/zip/tests.rs index 92a9c3f..e24cdfe 100644 --- a/src/zip/tests.rs +++ b/src/zip/tests.rs @@ -1,3 +1,4 @@ +use crate::zip::cp437::{from_char, is_cp437, to_char, FromCp437}; use crate::zip::{bit::DeflateMode, BitFlag}; #[test] @@ -40,3 +41,36 @@ fn test_bit_flag() { bit_flag.set_deflate_mode(DeflateMode::Normal); assert_eq!(bit_flag.deflate_mode(), DeflateMode::Normal); } + +#[test] +fn test_cp437() { + for b in 0..=255 { + assert_eq!(from_char(to_char(b)).unwrap(), b); + assert!(is_cp437(to_char(b)), "byte: {}", b); + } + + assert_eq!(from_char('Σ'), Some(228)); + assert_eq!(from_char('§'), None); + + assert!(is_cp437('Σ')); + assert!(!is_cp437('§')); + + assert_eq!( + Vec::from_cp437("hello world").unwrap(), + [104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100] + ); + assert_eq!( + String::from_cp437([104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100]), + "hello world" + ); + + assert_eq!( + Vec::from_cp437("ABCDEFGHIJKLMNOPQRSTUVWXYZ").unwrap(), + (65..=90).collect::>() + ); + + assert_eq!( + String::from_cp437((97..=122).collect::>()), + "abcdefghijklmnopqrstuvwxyz" + ); +} -- cgit v1.2.3