feat: support optional custom encoding

This commit is contained in:
poly000 2024-03-22 11:20:48 +08:00
parent 7630a9b60e
commit 42e1e5c31d
No known key found for this signature in database
5 changed files with 134 additions and 61 deletions

View File

@ -65,6 +65,7 @@ cpal = { git = "https://github.com/doukutsu-rs/cpal", rev = "9d269d8724102404e73
directories = "3"
discord-rich-presence = { version = "0.2", optional = true }
downcast = "0.11"
encoding_rs = "0.8.33"
fern = "0.6.2"
glutin = { git = "https://github.com/doukutsu-rs/glutin.git", rev = "2dd95f042e6e090d36f577cbea125560dd99bd27", optional = true, default_features = false, features = ["x11"] }
imgui = "0.8"

View File

@ -3,7 +3,6 @@ use std::io::{Cursor, Read};
use crate::framework::error::GameError::ParseError;
use crate::framework::error::GameResult;
use crate::game::scripting::tsc::text_script::TextScriptEncoding;
use crate::util::encoding::{read_cur_shift_jis, read_cur_wtf8};
pub fn put_varint(val: i32, out: &mut Vec<u8>) {
let mut x = ((val as u32) >> 31) ^ ((val as u32) << 1);
@ -43,7 +42,7 @@ pub fn read_cur_varint(cursor: &mut Cursor<&[u8]>) -> GameResult<i32> {
}
#[allow(unused)]
pub fn read_varint<I: Iterator<Item=u8>>(iter: &mut I) -> GameResult<i32> {
pub fn read_varint<I: Iterator<Item = u8>>(iter: &mut I) -> GameResult<i32> {
let mut result = 0u32;
for o in 0..5 {
@ -62,27 +61,57 @@ pub fn put_string(buffer: &mut Vec<u8>, out: &mut Vec<u8>, encoding: TextScriptE
if buffer.is_empty() {
return;
}
let mut chars_count = 0;
let mut cursor: Cursor<&Vec<u8>> = Cursor::new(buffer);
let mut tmp_buf = Vec::new();
let mut remaining = buffer.len() as u32;
let mut chars = 0;
while remaining > 0 {
let (consumed, chr) = match encoding {
TextScriptEncoding::UTF8 => read_cur_wtf8(&mut cursor, remaining),
TextScriptEncoding::ShiftJIS => read_cur_shift_jis(&mut cursor, remaining),
};
let encoding = match encoding {
TextScriptEncoding::ShiftJIS => encoding_rs::SHIFT_JIS,
TextScriptEncoding::UTF8 => encoding_rs::UTF_8,
TextScriptEncoding::UTF16BE => encoding_rs::UTF_16BE,
TextScriptEncoding::UTF16LE => encoding_rs::UTF_16LE,
TextScriptEncoding::ISO_2022_JP => encoding_rs::ISO_2022_JP,
TextScriptEncoding::ISO_8859_2 => encoding_rs::ISO_8859_2,
TextScriptEncoding::ISO_8859_3 => encoding_rs::ISO_8859_3,
TextScriptEncoding::ISO_8859_4 => encoding_rs::ISO_8859_4,
TextScriptEncoding::ISO_8859_5 => encoding_rs::ISO_8859_5,
TextScriptEncoding::ISO_8859_6 => encoding_rs::ISO_8859_6,
TextScriptEncoding::ISO_8859_7 => encoding_rs::ISO_8859_7,
TextScriptEncoding::ISO_8859_8 => encoding_rs::ISO_8859_8,
TextScriptEncoding::ISO_8859_8_I => encoding_rs::ISO_8859_8_I,
TextScriptEncoding::ISO_8859_10 => encoding_rs::ISO_8859_10,
TextScriptEncoding::ISO_8859_13 => encoding_rs::ISO_8859_13,
TextScriptEncoding::ISO_8859_14 => encoding_rs::ISO_8859_14,
TextScriptEncoding::ISO_8859_15 => encoding_rs::ISO_8859_15,
TextScriptEncoding::ISO_8859_16 => encoding_rs::ISO_8859_16,
TextScriptEncoding::KOI8_R => encoding_rs::KOI8_R,
TextScriptEncoding::KOI8_U => encoding_rs::KOI8_U,
TextScriptEncoding::MACINTOSH => encoding_rs::MACINTOSH,
TextScriptEncoding::EUC_JP => encoding_rs::EUC_JP,
TextScriptEncoding::EUC_KR => encoding_rs::EUC_KR,
TextScriptEncoding::GB18030 => encoding_rs::GB18030,
TextScriptEncoding::GBK => encoding_rs::GBK,
TextScriptEncoding::BIG5 => encoding_rs::BIG5,
TextScriptEncoding::WINDOWS_1250 => encoding_rs::WINDOWS_1250,
TextScriptEncoding::WINDOWS_1251 => encoding_rs::WINDOWS_1251,
TextScriptEncoding::WINDOWS_1252 => encoding_rs::WINDOWS_1252,
TextScriptEncoding::WINDOWS_1253 => encoding_rs::WINDOWS_1253,
TextScriptEncoding::WINDOWS_1254 => encoding_rs::WINDOWS_1254,
TextScriptEncoding::WINDOWS_1255 => encoding_rs::WINDOWS_1255,
TextScriptEncoding::WINDOWS_1256 => encoding_rs::WINDOWS_1256,
TextScriptEncoding::WINDOWS_1257 => encoding_rs::WINDOWS_1257,
TextScriptEncoding::WINDOWS_1258 => encoding_rs::WINDOWS_1258,
};
remaining -= consumed;
chars += 1;
put_varint(chr as i32, &mut tmp_buf);
let decoded_text = encoding.decode_without_bom_handling(&buffer).0;
for chr in decoded_text.chars() {
chars_count += 1;
put_varint(chr as _, &mut tmp_buf);
}
buffer.clear();
put_varint(chars, out);
put_varint(chars_count, out);
out.append(&mut tmp_buf);
}

View File

@ -44,16 +44,91 @@ bitfield! {
}
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
#[allow(non_camel_case_types)]
#[repr(u8)]
pub enum TextScriptEncoding {
UTF8 = 0,
ShiftJIS,
UTF16BE,
UTF16LE,
ISO_2022_JP,
ISO_8859_2,
ISO_8859_3,
ISO_8859_4,
ISO_8859_5,
ISO_8859_6,
ISO_8859_7,
ISO_8859_8,
ISO_8859_8_I,
ISO_8859_10,
ISO_8859_13,
ISO_8859_14,
ISO_8859_15,
ISO_8859_16,
KOI8_R,
KOI8_U,
MACINTOSH,
EUC_JP,
EUC_KR,
GB18030,
GBK,
BIG5,
WINDOWS_1250,
WINDOWS_1251,
WINDOWS_1252,
WINDOWS_1253,
WINDOWS_1254,
WINDOWS_1255,
WINDOWS_1256,
WINDOWS_1257,
WINDOWS_1258,
}
impl From<&str> for TextScriptEncoding {
fn from(s: &str) -> Self {
match s {
"utf-8" => Self::UTF8,
"iso-2022-jp" => Self::ISO_2022_JP,
"iso-8859-2" => Self::ISO_8859_2,
"iso-8859-3" => Self::ISO_8859_3,
"iso-8859-4" => Self::ISO_8859_4,
"iso-8859-5" => Self::ISO_8859_5,
"iso-8859-6" => Self::ISO_8859_6,
"iso-8859-7" => Self::ISO_8859_7,
"iso-8859-8" => Self::ISO_8859_8,
"iso-8859-8-i" => Self::ISO_8859_8_I,
"iso-8859-10" => Self::ISO_8859_10,
"iso-8859-13" => Self::ISO_8859_13,
"iso-8859-14" => Self::ISO_8859_14,
"iso-8859-15" => Self::ISO_8859_15,
"iso-8859-16" => Self::ISO_8859_16,
"koi8-r" => Self::KOI8_R,
"koi8-u" => Self::KOI8_U,
"macintosh" => Self::MACINTOSH,
"euc-jp" => Self::EUC_JP,
"euc-kr" => Self::EUC_KR,
"gb18030" => Self::GB18030,
"gbk" => Self::GBK,
"big5" => Self::BIG5,
"windows-1250" => Self::WINDOWS_1250,
"windows-1251" => Self::WINDOWS_1251,
"windows-1252" => Self::WINDOWS_1252,
"windows-1253" => Self::WINDOWS_1253,
"windows-1254" => Self::WINDOWS_1254,
"windows-1255" => Self::WINDOWS_1255,
"windows-1256" => Self::WINDOWS_1256,
"windows-1257" => Self::WINDOWS_1257,
"windows-1258" => Self::WINDOWS_1258,
"utf-16be" => Self::UTF16BE,
"utf-16le" => Self::UTF16LE,
_ => Self::ShiftJIS,
}
}
@ -61,6 +136,9 @@ impl From<&str> for TextScriptEncoding {
impl TextScriptEncoding {
pub fn invalid_encoding(encoding: TextScriptEncoding, state: &SharedGameState) -> bool {
if state.loc.encoding.as_ref().is_some_and(|s| TextScriptEncoding::from(s.as_str()) == encoding) {
return true;
}
let required_encoding = if (state.loc.code == "jp" || state.loc.code == "en") && state.constants.is_base() {
TextScriptEncoding::ShiftJIS
} else {
@ -798,8 +876,10 @@ impl TextScriptVM {
// The vanilla game treats this as a 1-byte value lol
//if npc.event_num == (new_direction & 0xFF) as u16 {
if npc.event_num == new_direction as u16 {
game_scene.player1.direction = if game_scene.player1.x > npc.x { Direction::Left } else { Direction::Right };
game_scene.player2.direction = if game_scene.player2.x > npc.x { Direction::Left } else { Direction::Right };
game_scene.player1.direction =
if game_scene.player1.x > npc.x { Direction::Left } else { Direction::Right };
game_scene.player2.direction =
if game_scene.player2.x > npc.x { Direction::Left } else { Direction::Right };
}
}
}

View File

@ -1,4 +1,5 @@
use std::collections::HashMap;
use std::string;
use crate::framework::context::Context;
use crate::framework::filesystem;
@ -9,6 +10,7 @@ pub struct Locale {
pub code: String,
pub name: String,
pub font: FontData,
pub encoding: Option<String>,
strings: HashMap<String, String>,
}
@ -22,6 +24,7 @@ impl Default for Locale {
scale: 1.0,
space_offset: 0.0
},
encoding: None,
strings: HashMap::new(),
}
}
@ -29,7 +32,7 @@ impl Default for Locale {
impl Locale {
pub fn new(ctx: &mut Context, base_paths: &Vec<String>, code: &str) -> Locale {
let file = filesystem::open_find(ctx, base_paths, &format!("locale/{}.json", code)).unwrap();
let file = filesystem::open_find(ctx, base_paths, &format!("locale/{code}.json")).unwrap();
let json: serde_json::Value = serde_json::from_reader(file).unwrap();
let strings = Locale::flatten(&json);
@ -39,8 +42,10 @@ impl Locale {
let font_name = strings["font"].clone();
let font_scale = strings["font_scale"].parse::<f32>().unwrap_or(1.0);
let font = FontData::new(font_name, font_scale, 0.0);
let encoding = strings.get("encoding").cloned();
Locale { code: code.to_string(), name, font, strings }
Locale { code: code.to_string(), name, font, encoding, strings }
}
fn flatten(json: &serde_json::Value) -> HashMap<String, String> {

View File

@ -2,48 +2,6 @@ use std::io::Cursor;
use byteorder::ReadBytesExt;
/// Decodes UTF-8 character in a less strict way.
/// http://simonsapin.github.io/wtf-8/#decoding-wtf-8
pub fn read_cur_wtf8<T: AsRef<[u8]>>(cursor: &mut Cursor<T>, max_bytes: u32) -> (u32, char) {
let result: u32;
let consumed: u32;
if max_bytes == 0 {
return (0, '\u{fffd}');
}
match cursor.read_u8() {
Ok(byte @ 0x00..=0x7f) => {
consumed = 1;
result = byte as u32;
}
Ok(byte @ 0xc2..=0xdf) if max_bytes >= 2 => {
let byte2 = { if let Ok(n) = cursor.read_u8() { n } else { return (1, '\u{fffd}'); } };
consumed = 2;
result = (byte as u32 & 0x1f) << 6 | (byte2 as u32 & 0x3f);
}
Ok(byte @ 0xe0..=0xef) if max_bytes >= 3 => {
let byte2 = { if let Ok(n) = cursor.read_u8() { n } else { return (1, '\u{fffd}'); } };
let byte3 = { if let Ok(n) = cursor.read_u8() { n } else { return (2, '\u{fffd}'); } };
consumed = 3;
result = (byte as u32 & 0x0f) << 12 | (byte2 as u32 & 0x3f) << 6 | (byte3 as u32 & 0x3f);
}
Ok(byte @ 0xf0..=0xf4) if max_bytes >= 4 => {
let byte2 = { if let Ok(n) = cursor.read_u8() { n } else { return (1, '\u{fffd}'); } };
let byte3 = { if let Ok(n) = cursor.read_u8() { n } else { return (2, '\u{fffd}'); } };
let byte4 = { if let Ok(n) = cursor.read_u8() { n } else { return (3, '\u{fffd}'); } };
consumed = 4;
result = (byte as u32 & 0x07) << 18 | (byte2 as u32 & 0x3f) << 12 | (byte3 as u32 & 0x3f) << 6 | (byte4 as u32 & 0x3f);
}
_ => { return (1, '\u{fffd}'); }
}
(consumed, std::char::from_u32(result).unwrap_or('\u{fffd}'))
}
/// Shift-JIS -> Unicode converter.
pub fn read_cur_shift_jis<T: AsRef<[u8]>>(cursor: &mut Cursor<T>, max_bytes: u32) -> (u32, char) {
let result: u32;