diff --git a/Cargo.toml b/Cargo.toml index b5dae93..f456094 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,6 +60,7 @@ android = [] #cpal = { path = "./3rdparty/cpal" } byteorder = "1.4" case_insensitive_hashmap = "1.0.0" +charset-normalizer-rs = "1.0.6" chrono = { version = "0.4", default-features = false, features = ["clock", "std"] } cpal = { git = "https://github.com/doukutsu-rs/cpal", rev = "9d269d8724102404e73a61e9def0c0cbc921b676" } directories = "3" diff --git a/src/game/scripting/tsc/bytecode_utils.rs b/src/game/scripting/tsc/bytecode_utils.rs index aabd101..06eb749 100644 --- a/src/game/scripting/tsc/bytecode_utils.rs +++ b/src/game/scripting/tsc/bytecode_utils.rs @@ -66,9 +66,61 @@ pub fn put_string(buffer: &mut Vec, out: &mut Vec, encoding: TextScriptE let mut tmp_buf = Vec::new(); let encoding = match encoding { - TextScriptEncoding::UTF8 => encoding_rs::UTF_8, TextScriptEncoding::ShiftJIS => encoding_rs::SHIFT_JIS, - TextScriptEncoding::GBK => encoding_rs::GBK, + // auto detection + _ => { + let guess = charset_normalizer_rs::from_bytes(&buffer, None); + let Some(gussed) = guess.get_best() else { + return; + }; + + match gussed.encoding() { + "iso-2022-jp" => encoding_rs::ISO_2022_JP, + "iso-8859-2" => encoding_rs::ISO_8859_2, + "iso-8859-3" => encoding_rs::ISO_8859_3, + "iso-8859-4" => encoding_rs::ISO_8859_4, + "iso-8859-5" => encoding_rs::ISO_8859_5, + "iso-8859-6" => encoding_rs::ISO_8859_6, + "iso-8859-7" => encoding_rs::ISO_8859_7, + "iso-8859-8" => encoding_rs::ISO_8859_8, + "iso-8859-8-i" => encoding_rs::ISO_8859_8_I, + "iso-8859-10" => encoding_rs::ISO_8859_10, + "iso-8859-13" => encoding_rs::ISO_8859_13, + "iso-8859-14" => encoding_rs::ISO_8859_14, + "iso-8859-15" => encoding_rs::ISO_8859_15, + "iso-8859-16" => encoding_rs::ISO_8859_16, + + "koi8-r" => encoding_rs::KOI8_R, + "koi8-u" => encoding_rs::KOI8_U, + + "macintosh" => encoding_rs::MACINTOSH, + + "euc-jp" => encoding_rs::EUC_JP, + "euc-kr" => encoding_rs::EUC_KR, + + "gb18030" => encoding_rs::GB18030, + "gbk" => encoding_rs::GBK, + "big5" => encoding_rs::BIG5, + + "windows-1250" => encoding_rs::WINDOWS_1250, + "windows-1251" => encoding_rs::WINDOWS_1251, + "windows-1252" => encoding_rs::WINDOWS_1252, + "windows-1253" => encoding_rs::WINDOWS_1253, + "windows-1254" => encoding_rs::WINDOWS_1254, + "windows-1255" => encoding_rs::WINDOWS_1255, + "windows-1256" => encoding_rs::WINDOWS_1256, + "windows-1257" => encoding_rs::WINDOWS_1257, + "windows-1258" => encoding_rs::WINDOWS_1258, + + "utf-8" => encoding_rs::UTF_8, + "utf-16be" => encoding_rs::UTF_16BE, + "utf-16le" => encoding_rs::UTF_16LE, + + "x-mac-cyrillic" => encoding_rs::X_MAC_CYRILLIC, + "x-user-defined" => encoding_rs::X_USER_DEFINED, + _ => encoding_rs::UTF_8, + } + } }; let decoded_text = encoding.decode_without_bom_handling(&buffer).0; diff --git a/src/game/scripting/tsc/text_script.rs b/src/game/scripting/tsc/text_script.rs index 2e115a1..9862f75 100644 --- a/src/game/scripting/tsc/text_script.rs +++ b/src/game/scripting/tsc/text_script.rs @@ -48,15 +48,12 @@ bitfield! { pub enum TextScriptEncoding { UTF8 = 0, ShiftJIS, - GBK, } impl From<&str> for TextScriptEncoding { fn from(s: &str) -> Self { match s { "utf-8" => Self::UTF8, - // GBK is a superset to GB2312 - "gbk" | "gb2312" => Self::GBK, _ => Self::ShiftJIS, } } @@ -67,10 +64,7 @@ impl TextScriptEncoding { let required_encoding = if (state.loc.code == "jp" || state.loc.code == "en") && state.constants.is_base() { TextScriptEncoding::ShiftJIS } else { - match state.loc.code.as_str() { - "zh" => TextScriptEncoding::GBK, - _ => TextScriptEncoding::UTF8, - } + TextScriptEncoding::UTF8 }; encoding != required_encoding