From 1ff222bee406711c1b72498539a0344c41263703 Mon Sep 17 00:00:00 2001 From: poly000 <1348292515@qq.com> Date: Fri, 22 Mar 2024 11:20:48 +0800 Subject: [PATCH] feat: support GBK encoding for 'zh' locale --- Cargo.toml | 1 + src/game/scripting/tsc/bytecode_utils.rs | 27 +++++++-------- src/game/scripting/tsc/text_script.rs | 14 ++++++-- src/util/encoding.rs | 42 ------------------------ 4 files changed, 24 insertions(+), 60 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 18efbbd..b5dae93 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -65,6 +65,7 @@ cpal = { git = "https://github.com/doukutsu-rs/cpal", rev = "9d269d8724102404e73 directories = "3" discord-rich-presence = { version = "0.2", optional = true } downcast = "0.11" +encoding_rs = "0.8.33" fern = "0.6.2" glutin = { git = "https://github.com/doukutsu-rs/glutin.git", rev = "2dd95f042e6e090d36f577cbea125560dd99bd27", optional = true, default_features = false, features = ["x11"] } imgui = "0.8" diff --git a/src/game/scripting/tsc/bytecode_utils.rs b/src/game/scripting/tsc/bytecode_utils.rs index 6046ea5..aabd101 100644 --- a/src/game/scripting/tsc/bytecode_utils.rs +++ b/src/game/scripting/tsc/bytecode_utils.rs @@ -3,7 +3,6 @@ use std::io::{Cursor, Read}; use crate::framework::error::GameError::ParseError; use crate::framework::error::GameResult; use crate::game::scripting::tsc::text_script::TextScriptEncoding; -use crate::util::encoding::{read_cur_shift_jis, read_cur_wtf8}; pub fn put_varint(val: i32, out: &mut Vec) { let mut x = ((val as u32) >> 31) ^ ((val as u32) << 1); @@ -43,7 +42,7 @@ pub fn read_cur_varint(cursor: &mut Cursor<&[u8]>) -> GameResult { } #[allow(unused)] -pub fn read_varint>(iter: &mut I) -> GameResult { +pub fn read_varint>(iter: &mut I) -> GameResult { let mut result = 0u32; for o in 0..5 { @@ -62,27 +61,25 @@ pub fn put_string(buffer: &mut Vec, out: &mut Vec, encoding: TextScriptE if buffer.is_empty() { return; } + let mut chars_count = 0; - let mut cursor: Cursor<&Vec> = Cursor::new(buffer); let mut tmp_buf = Vec::new(); - let mut remaining = buffer.len() as u32; - let mut chars = 0; - while remaining > 0 { - let (consumed, chr) = match encoding { - TextScriptEncoding::UTF8 => read_cur_wtf8(&mut cursor, remaining), - TextScriptEncoding::ShiftJIS => read_cur_shift_jis(&mut cursor, remaining), - }; + let encoding = match encoding { + TextScriptEncoding::UTF8 => encoding_rs::UTF_8, + TextScriptEncoding::ShiftJIS => encoding_rs::SHIFT_JIS, + TextScriptEncoding::GBK => encoding_rs::GBK, + }; - remaining -= consumed; - chars += 1; - - put_varint(chr as i32, &mut tmp_buf); + let decoded_text = encoding.decode_without_bom_handling(&buffer).0; + for chr in decoded_text.chars() { + chars_count += 1; + put_varint(chr as _, &mut tmp_buf); } buffer.clear(); - put_varint(chars, out); + put_varint(chars_count, out); out.append(&mut tmp_buf); } diff --git a/src/game/scripting/tsc/text_script.rs b/src/game/scripting/tsc/text_script.rs index 354c9eb..2e115a1 100644 --- a/src/game/scripting/tsc/text_script.rs +++ b/src/game/scripting/tsc/text_script.rs @@ -48,12 +48,15 @@ bitfield! { pub enum TextScriptEncoding { UTF8 = 0, ShiftJIS, + GBK, } impl From<&str> for TextScriptEncoding { fn from(s: &str) -> Self { match s { "utf-8" => Self::UTF8, + // GBK is a superset to GB2312 + "gbk" | "gb2312" => Self::GBK, _ => Self::ShiftJIS, } } @@ -64,7 +67,10 @@ impl TextScriptEncoding { let required_encoding = if (state.loc.code == "jp" || state.loc.code == "en") && state.constants.is_base() { TextScriptEncoding::ShiftJIS } else { - TextScriptEncoding::UTF8 + match state.loc.code.as_str() { + "zh" => TextScriptEncoding::GBK, + _ => TextScriptEncoding::UTF8, + } }; encoding != required_encoding @@ -798,8 +804,10 @@ impl TextScriptVM { // The vanilla game treats this as a 1-byte value lol //if npc.event_num == (new_direction & 0xFF) as u16 { if npc.event_num == new_direction as u16 { - game_scene.player1.direction = if game_scene.player1.x > npc.x { Direction::Left } else { Direction::Right }; - game_scene.player2.direction = if game_scene.player2.x > npc.x { Direction::Left } else { Direction::Right }; + game_scene.player1.direction = + if game_scene.player1.x > npc.x { Direction::Left } else { Direction::Right }; + game_scene.player2.direction = + if game_scene.player2.x > npc.x { Direction::Left } else { Direction::Right }; } } } diff --git a/src/util/encoding.rs b/src/util/encoding.rs index b549044..f9984fb 100644 --- a/src/util/encoding.rs +++ b/src/util/encoding.rs @@ -2,48 +2,6 @@ use std::io::Cursor; use byteorder::ReadBytesExt; -/// Decodes UTF-8 character in a less strict way. -/// http://simonsapin.github.io/wtf-8/#decoding-wtf-8 -pub fn read_cur_wtf8>(cursor: &mut Cursor, max_bytes: u32) -> (u32, char) { - let result: u32; - let consumed: u32; - - if max_bytes == 0 { - return (0, '\u{fffd}'); - } - - match cursor.read_u8() { - Ok(byte @ 0x00..=0x7f) => { - consumed = 1; - result = byte as u32; - } - Ok(byte @ 0xc2..=0xdf) if max_bytes >= 2 => { - let byte2 = { if let Ok(n) = cursor.read_u8() { n } else { return (1, '\u{fffd}'); } }; - - consumed = 2; - result = (byte as u32 & 0x1f) << 6 | (byte2 as u32 & 0x3f); - } - Ok(byte @ 0xe0..=0xef) if max_bytes >= 3 => { - let byte2 = { if let Ok(n) = cursor.read_u8() { n } else { return (1, '\u{fffd}'); } }; - let byte3 = { if let Ok(n) = cursor.read_u8() { n } else { return (2, '\u{fffd}'); } }; - - consumed = 3; - result = (byte as u32 & 0x0f) << 12 | (byte2 as u32 & 0x3f) << 6 | (byte3 as u32 & 0x3f); - } - Ok(byte @ 0xf0..=0xf4) if max_bytes >= 4 => { - let byte2 = { if let Ok(n) = cursor.read_u8() { n } else { return (1, '\u{fffd}'); } }; - let byte3 = { if let Ok(n) = cursor.read_u8() { n } else { return (2, '\u{fffd}'); } }; - let byte4 = { if let Ok(n) = cursor.read_u8() { n } else { return (3, '\u{fffd}'); } }; - - consumed = 4; - result = (byte as u32 & 0x07) << 18 | (byte2 as u32 & 0x3f) << 12 | (byte3 as u32 & 0x3f) << 6 | (byte4 as u32 & 0x3f); - } - _ => { return (1, '\u{fffd}'); } - } - - (consumed, std::char::from_u32(result).unwrap_or('\u{fffd}')) -} - /// Shift-JIS -> Unicode converter. pub fn read_cur_shift_jis>(cursor: &mut Cursor, max_bytes: u32) -> (u32, char) { let result: u32;