shift-jis encoding support

This commit is contained in:
Alula 2020-09-13 05:30:56 +02:00
parent 9a7ea5be42
commit 83dc7893f4
No known key found for this signature in database
GPG Key ID: 3E00485503A1D8BA
5 changed files with 7412 additions and 85 deletions

View File

@ -29,7 +29,7 @@ impl BulletManager {
bullet.cond.set_alive(false);
continue;
}
bullet.tick(state, player);
bullet.hit_flags.0 = 0;
bullet.tick_map_collisions(state, stage);

7371
src/encoding.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@ -50,6 +50,7 @@ mod builtin_fs;
mod bullet;
mod caret;
mod common;
mod encoding;
mod engine_constants;
mod entity;
mod frame;

View File

@ -9,6 +9,7 @@ use crate::ggez::{Context, filesystem, GameResult};
use crate::ggez::GameError::ResourceLoadError;
use crate::map::{Map, NPCData};
use crate::text_script::TextScript;
use crate::encoding::read_cur_shift_jis;
#[derive(Debug, PartialEq, Eq, Hash)]
pub struct NpcType {
@ -177,6 +178,20 @@ fn zero_index(s: &[u8]) -> usize {
s.iter().position(|&c| c == b'\0').unwrap_or(s.len())
}
fn from_shift_jis(s: &[u8]) -> String {
let mut cursor = Cursor::new(s);
let mut chars = Vec::new();
let mut bytes = s.len() as u32;
while bytes > 0 {
let (consumed, chr) = read_cur_shift_jis(&mut cursor, bytes);
chars.push(chr);
bytes -= consumed;
}
chars.iter().collect()
}
impl StageData {
// todo: refactor to make it less repetitive.
pub fn load_stage_table(ctx: &mut Context, root: &str) -> GameResult<Vec<Self>> {
@ -214,24 +229,12 @@ impl StageData {
f.read_exact(&mut name_jap_buf)?;
f.read_exact(&mut name_buf)?;
let tileset = from_utf8(&ts_buf[0..zero_index(&ts_buf)])
.map_err(|_| ResourceLoadError("UTF-8 error in tileset field".to_string()))?
.to_owned();
let map = from_utf8(&map_buf[0..zero_index(&map_buf)])
.map_err(|_| ResourceLoadError("UTF-8 error in map field".to_string()))?
.to_owned();
let background = from_utf8(&back_buf[0..zero_index(&back_buf)])
.map_err(|_| ResourceLoadError("UTF-8 error in background field".to_string()))?
.to_owned();
let npc1 = from_utf8(&npc1_buf[0..zero_index(&npc1_buf)])
.map_err(|_| ResourceLoadError("UTF-8 error in npc1 field".to_string()))?
.to_owned();
let npc2 = from_utf8(&npc2_buf[0..zero_index(&npc2_buf)])
.map_err(|_| ResourceLoadError("UTF-8 error in npc2 field".to_string()))?
.to_owned();
let name = from_utf8(&name_buf[0..zero_index(&name_buf)])
.map_err(|_| ResourceLoadError("UTF-8 error in name field".to_string()))?
.to_owned();
let tileset = from_shift_jis(&ts_buf[0..zero_index(&ts_buf)]);
let map = from_shift_jis(&map_buf[0..zero_index(&map_buf)]);
let background = from_shift_jis(&back_buf[0..zero_index(&back_buf)]);
let npc1 = from_shift_jis(&npc1_buf[0..zero_index(&npc1_buf)]);
let npc2 = from_shift_jis(&npc2_buf[0..zero_index(&npc2_buf)]);
let name = from_shift_jis(&name_buf[0..zero_index(&name_buf)]);
let stage = StageData {
name: name.clone(),
@ -281,24 +284,13 @@ impl StageData {
let boss_no = f.read_u8()? as usize;
f.read_exact(&mut name_buf)?;
let tileset = from_utf8(&ts_buf[0..zero_index(&ts_buf)])
.map_err(|_| ResourceLoadError("UTF-8 error in tileset field".to_string()))?
.to_owned();
let map = from_utf8(&map_buf)
.map_err(|_| ResourceLoadError("UTF-8 error in map field".to_string()))?
.trim_matches('\0').to_owned();
let background = from_utf8(&back_buf)
.map_err(|_| ResourceLoadError("UTF-8 error in background field".to_string()))?
.trim_matches('\0').to_owned();
let npc1 = from_utf8(&npc1_buf)
.map_err(|_| ResourceLoadError("UTF-8 error in npc1 field".to_string()))?
.trim_matches('\0').to_owned();
let npc2 = from_utf8(&npc2_buf)
.map_err(|_| ResourceLoadError("UTF-8 error in npc2 field".to_string()))?
.trim_matches('\0').to_owned();
let name = from_utf8(&name_buf)
.map_err(|_| ResourceLoadError("UTF-8 error in name field".to_string()))?
.trim_matches('\0').to_owned();
let tileset = from_shift_jis(&ts_buf[0..zero_index(&ts_buf)]);
let map = from_shift_jis(&map_buf[0..zero_index(&map_buf)]);
let background = from_shift_jis(&back_buf[0..zero_index(&back_buf)]);
let npc1 = from_shift_jis(&npc1_buf[0..zero_index(&npc1_buf)]);
let npc2 = from_shift_jis(&npc2_buf[0..zero_index(&npc2_buf)]);
let name = from_shift_jis(&name_buf[0..zero_index(&name_buf)]);
println!("bg type: {}", bg_type);

View File

@ -15,6 +15,7 @@ use num_traits::{clamp, FromPrimitive};
use crate::{SharedGameState, str};
use crate::bitfield;
use crate::common::{Direction, FadeDirection, FadeState};
use crate::encoding::{read_cur_shift_jis, read_cur_wtf8};
use crate::entity::GameEntity;
use crate::ggez::{Context, GameResult};
use crate::ggez::GameError::ParseError;
@ -278,48 +279,6 @@ fn read_cur_varint(cursor: &mut Cursor<&Vec<u8>>) -> GameResult<i32> {
Ok(((result << 31) ^ (result >> 1)) as i32)
}
/// Decodes UTF-8 character in a less strict way.
/// http://simonsapin.github.io/wtf-8/#decoding-wtf-8
fn read_cur_wtf8(cursor: &mut Cursor<&Vec<u8>>, max_bytes: u32) -> (u32, char) {
let result: u32;
let consumed: u32;
if max_bytes == 0 {
return (0, '\u{fffd}');
}
match cursor.read_u8() {
Ok(byte @ 0x00..=0x7f) => {
consumed = 1;
result = byte as u32;
}
Ok(byte @ 0xc2..=0xdf) if max_bytes >= 2 => {
let byte2 = { if let Ok(n) = cursor.read_u8() { n } else { return (1, '\u{fffd}'); } };
consumed = 2;
result = (byte as u32 & 0x1f) << 6 | (byte2 as u32 & 0x3f);
}
Ok(byte @ 0xe0..=0xef) if max_bytes >= 3 => {
let byte2 = { if let Ok(n) = cursor.read_u8() { n } else { return (1, '\u{fffd}'); } };
let byte3 = { if let Ok(n) = cursor.read_u8() { n } else { return (2, '\u{fffd}'); } };
consumed = 3;
result = (byte as u32 & 0x0f) << 12 | (byte2 as u32 & 0x3f) << 6 | (byte3 as u32 & 0x3f);
}
Ok(byte @ 0xf0..=0xf4) if max_bytes >= 4 => {
let byte2 = { if let Ok(n) = cursor.read_u8() { n } else { return (1, '\u{fffd}'); } };
let byte3 = { if let Ok(n) = cursor.read_u8() { n } else { return (2, '\u{fffd}'); } };
let byte4 = { if let Ok(n) = cursor.read_u8() { n } else { return (3, '\u{fffd}'); } };
consumed = 4;
result = (byte as u32 & 0x07) << 18 | (byte2 as u32 & 0x3f) << 12 | (byte3 as u32 & 0x3f) << 6 | (byte4 as u32 & 0x3f);
}
_ => { return (1, '\u{fffd}'); }
}
(consumed, std::char::from_u32(result).unwrap_or('\u{fffd}'))
}
impl TextScriptVM {
pub fn new() -> Self {
Self {
@ -1214,7 +1173,7 @@ impl TextScript {
}
}
let bytecode = TextScript::compile_event(&mut iter, strict)?;
let bytecode = TextScript::compile_event(&mut iter, strict, TextScriptEncoding::ShiftJIS)?;
log::info!("Successfully compiled event #{} ({} bytes generated).", event_num, bytecode.len());
event_map.insert(event_num, bytecode);
}
@ -1238,7 +1197,7 @@ impl TextScript {
})
}
fn compile_event<I: Iterator<Item=u8>>(iter: &mut Peekable<I>, strict: bool) -> GameResult<Vec<u8>> {
fn compile_event<I: Iterator<Item=u8>>(iter: &mut Peekable<I>, strict: bool, encoding: TextScriptEncoding) -> GameResult<Vec<u8>> {
let mut bytecode = Vec::new();
let mut char_buf = Vec::with_capacity(16);
@ -1246,7 +1205,7 @@ impl TextScript {
match chr {
b'#' => {
if !char_buf.is_empty() {
TextScript::put_string(&mut char_buf, &mut bytecode);
TextScript::put_string(&mut char_buf, &mut bytecode, encoding);
}
// some events end without <END marker.
@ -1255,7 +1214,7 @@ impl TextScript {
}
b'<' => {
if !char_buf.is_empty() {
TextScript::put_string(&mut char_buf, &mut bytecode);
TextScript::put_string(&mut char_buf, &mut bytecode, encoding);
}
iter.next();
@ -1278,14 +1237,18 @@ impl TextScript {
Ok(bytecode)
}
fn put_string(buffer: &mut Vec<u8>, out: &mut Vec<u8>) {
fn put_string(buffer: &mut Vec<u8>, out: &mut Vec<u8>, encoding: TextScriptEncoding) {
let mut cursor: Cursor<&Vec<u8>> = Cursor::new(buffer);
let mut tmp_buf = Vec::new();
let mut remaining = buffer.len() as u32;
let mut chars = 0;
while remaining > 0 {
let (consumed, chr) = read_cur_wtf8(&mut cursor, remaining);
let (consumed, chr) = if encoding == TextScriptEncoding::UTF8 {
read_cur_wtf8(&mut cursor, remaining)
} else {
read_cur_shift_jis(&mut cursor, remaining)
};
remaining -= consumed;
chars += 1;