From ab965820916d3536dd914484b8ffb9c2b4450312 Mon Sep 17 00:00:00 2001 From: Alula Date: Wed, 26 Aug 2020 03:06:21 +0200 Subject: [PATCH] complete the textscript bytecode compiler --- src/engine_constants.rs | 4 + src/text_script.rs | 290 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 288 insertions(+), 6 deletions(-) diff --git a/src/engine_constants.rs b/src/engine_constants.rs index d97a9dc..c8131f3 100644 --- a/src/engine_constants.rs +++ b/src/engine_constants.rs @@ -5,6 +5,7 @@ use maplit::hashmap; use crate::common::{Direction, Rect}; use crate::str; +use crate::text_script::TextScriptEncoding; #[derive(Debug, Copy, Clone)] pub struct PhysicsConsts { @@ -92,6 +93,7 @@ pub struct EngineConstants { pub caret: CaretConsts, pub world: WorldConsts, pub tex_sizes: HashMap, + pub tsc_encoding: TextScriptEncoding, } impl Clone for EngineConstants { @@ -103,6 +105,7 @@ impl Clone for EngineConstants { caret: self.caret.clone(), world: self.world.clone(), tex_sizes: self.tex_sizes.clone(), + tsc_encoding: self.tsc_encoding, } } } @@ -346,6 +349,7 @@ impl EngineConstants { str!("TextBox") => (244, 144), str!("Title") => (320, 48), }, + tsc_encoding: TextScriptEncoding::UTF8, } } diff --git a/src/text_script.rs b/src/text_script.rs index 360e93f..e28cbf0 100644 --- a/src/text_script.rs +++ b/src/text_script.rs @@ -1,13 +1,23 @@ +use std::collections::HashMap; use std::io; +use std::iter::Peekable; +use std::slice::Iter; +use std::str::FromStr; +use itertools::Itertools; + +use crate::ggez::GameError::ParseError; use crate::ggez::GameResult; +use crate::str; -/// Engine's internal text script VM operation codes. -/// Based on https://www.cavestory.org/guides/basicmodding/guide/tscnotes.htm and game reverse engineering. +/// Engine's text script VM operation codes. +#[derive(EnumString, Debug)] pub enum OpCode { // ---- Internal opcodes (used by bytecode, no TSC representation) /// internal: no operation - NOP, + _NOP, + /// internal: unimplemented + _UNI, // ---- Official opcodes ---- /// >, +} impl TextScript { /// Loads, decrypts and compiles a text script from specified stream. @@ -150,7 +198,237 @@ impl TextScript { pub fn compile(data: &[u8]) -> GameResult { println!("data: {}", String::from_utf8(data.to_vec())?); - let tsc = TextScript {}; - Ok(tsc) + let mut event_map = HashMap::new(); + let mut iter = data.iter().peekable(); + while let Some(&&chr) = iter.peek() { + match chr { + b'#' => { + iter.next(); + let event_num = TextScript::read_number(&mut iter)? as u16; + TextScript::skip_until(b'\n', &mut iter)?; + + if event_map.contains_key(&event_num) { + return Err(ParseError(format!("Event {} has been defined twice.", event_num))); + } + + let bytecode = TextScript::compile_event(&mut iter)?; + log::info!("Successfully compiled event #{} ({} bytes generated).", event_num, bytecode.len()); + println!("{:x?}", &bytecode); + event_map.insert(event_num, bytecode); + } + b'\r' | b'\n' => { + iter.next(); + } + n => { + return Err(ParseError(format!("Unexpected token: {}", n as char))); + } + } + } + + Ok(TextScript { + event_map + }) + } + + fn compile_event(iter: &mut Peekable>) -> GameResult> { + let mut bytecode = Vec::new(); + + let mut char_buf = Vec::with_capacity(16); + + while let Some(&&chr) = iter.peek() { + match chr { + b'#' => { + if !char_buf.is_empty() { + TextScript::put_varint(char_buf.len() as i32, &mut bytecode); + bytecode.append(&mut char_buf); + } + + // some events end without { + if !char_buf.is_empty() { + TextScript::put_varint(char_buf.len() as i32, &mut bytecode); + bytecode.append(&mut char_buf); + } + + iter.next(); + let n = iter.next_tuple::<(&u8, &u8, &u8)>() + .map(|t| [*t.0, *t.1, *t.2]) + .ok_or_else(|| ParseError(str!("Script unexpectedly ended.")))?; + + let code = unsafe { std::str::from_utf8_unchecked(&n) }; + + TextScript::compile_code(code, iter, &mut bytecode)?; + } + _ => { + char_buf.push(chr); + + iter.next(); + } + } + } + + Ok(bytecode) + } + + fn put_varint(val: i32, out: &mut Vec) { + let mut x = ((val as u32) >> 31) ^ ((val as u32) << 1); + + while x > 0x80 { + out.push((x & 0x7f) as u8 | 0x80); + x >>= 7; + } + + out.push(x as u8); + } + + fn read_varint(iter: &mut Peekable>) -> GameResult { + let mut result = 0u32; + + for o in 0..5 { + let &n = iter.next().ok_or_else(|| ParseError(str!("Script unexpectedly ended.")))?; + result |= (n as u32 & 0x7f) << (o * 7); + + if n & 0x80 == 0 { + break; + } + } + + Ok(((result << 31) ^ (result >> 1)) as i32) + } + + fn compile_code(code: &str, iter: &mut Peekable>, out: &mut Vec) -> GameResult { + let instr = OpCode::from_str(code).map_err(|e| ParseError(format!("Unknown opcode: {}", code)))?; + + match instr { + // Zero operand codes + OpCode::AEp | OpCode::CAT | OpCode::CIL | OpCode::CLO | OpCode::CLR | OpCode::CPS | + OpCode::CRE | OpCode::CSS | OpCode::END | OpCode::ESC | OpCode::FLA | OpCode::FMU | + OpCode::FRE | OpCode::HMC | OpCode::INI | OpCode::KEY | OpCode::LDP | OpCode::MLP | + OpCode::MM0 | OpCode::MNA | OpCode::MS2 | OpCode::MS3 | OpCode::MSG | OpCode::NOD | + OpCode::PRI | OpCode::RMU | OpCode::SAT | OpCode::SLP | OpCode::SMC | OpCode::SPS | + OpCode::STC | OpCode::SVP | OpCode::TUR | OpCode::WAS | OpCode::ZAM => { + TextScript::put_varint(instr as i32, out); + } + // One operand codes + OpCode::BOA | OpCode::BSL | OpCode::FOB | OpCode::FOM | OpCode::QUA | OpCode::UNI | + OpCode::MYB | OpCode::MYD | OpCode::FAI | OpCode::FAO | OpCode::WAI | OpCode::FAC | + OpCode::GIT | OpCode::NUM | OpCode::DNA | OpCode::DNP | OpCode::FLm | OpCode::FLp | + OpCode::MPp | OpCode::SKm | OpCode::SKp | OpCode::EQp | OpCode::EQm | OpCode::MLp | + OpCode::ITp | OpCode::ITm | OpCode::AMm | OpCode::UNJ | OpCode::MPJ | OpCode::YNJ | + OpCode::EVE | OpCode::XX1 | OpCode::SIL | OpCode::LIp | OpCode::SOU | OpCode::CMU | + OpCode::SSS | OpCode::ACH => { + let operand = TextScript::read_number(iter)?; + TextScript::put_varint(instr as i32, out); + TextScript::put_varint(operand as i32, out); + } + // Two operand codes + OpCode::FON | OpCode::MOV | OpCode::AMp | OpCode::NCJ | OpCode::ECJ | OpCode::FLJ | + OpCode::ITJ | OpCode::SKJ | OpCode::AMJ | OpCode::SMP | OpCode::PSp => { + let operand_a = TextScript::read_number(iter)?; + TextScript::expect_char(b':', iter)?; + let operand_b = TextScript::read_number(iter)?; + + TextScript::put_varint(instr as i32, out); + TextScript::put_varint(operand_a as i32, out); + TextScript::put_varint(operand_b as i32, out); + } + // Three operand codes + OpCode::ANP | OpCode::CNP | OpCode::INP | OpCode::TAM | OpCode::CMP => { + let operand_a = TextScript::read_number(iter)?; + TextScript::expect_char(b':', iter)?; + let operand_b = TextScript::read_number(iter)?; + TextScript::expect_char(b':', iter)?; + let operand_c = TextScript::read_number(iter)?; + + TextScript::put_varint(instr as i32, out); + TextScript::put_varint(operand_a as i32, out); + TextScript::put_varint(operand_b as i32, out); + TextScript::put_varint(operand_c as i32, out); + } + // Four operand codes + OpCode::TRA | OpCode::MNP | OpCode::SNP => { + let operand_a = TextScript::read_number(iter)?; + TextScript::expect_char(b':', iter)?; + let operand_b = TextScript::read_number(iter)?; + TextScript::expect_char(b':', iter)?; + let operand_c = TextScript::read_number(iter)?; + TextScript::expect_char(b':', iter)?; + let operand_d = TextScript::read_number(iter)?; + + TextScript::put_varint(instr as i32, out); + TextScript::put_varint(operand_a as i32, out); + TextScript::put_varint(operand_b as i32, out); + TextScript::put_varint(operand_c as i32, out); + TextScript::put_varint(operand_d as i32, out); + } + _ => { + TextScript::put_varint(OpCode::_UNI as i32, out); + log::warn!("Unimplemented opcode: {:?}", instr); + } + } + + Ok(()) + } + + fn expect_newline(iter: &mut Peekable>) -> GameResult { + if let Some(b'\r') = iter.peek() { + iter.next(); + } + + TextScript::expect_char(b'\n', iter) + } + + fn expect_char(expect: u8, iter: &mut Peekable>) -> GameResult { + let mut res = iter.next(); + + match res { + Some(&n) if n == expect => { + Ok(()) + } + Some(&n) => { + Err(ParseError(format!("Expected {}, found {}", expect as char, n as char))) + } + None => { + Err(ParseError(str!("Script unexpectedly ended."))) + } + } + } + + fn skip_until(expect: u8, iter: &mut Peekable>) -> GameResult { + while let Some(&chr) = iter.next() { + if chr == expect { + return Ok(()); + } + } + + Err(ParseError(str!("Script unexpectedly ended."))) + } + + /// Reads a 4 digit TSC formatted number from iterator. + /// Intentionally does no '0'..'9' range checking, since it was often exploited by modders. + fn read_number(iter: &mut Peekable>) -> GameResult { + Some(0) + .and_then(|result| iter.next().map(|&v| result + 1000 * (v - b'0') as i32)) + .and_then(|result| iter.next().map(|&v| result + 100 * (v - b'0') as i32)) + .and_then(|result| iter.next().map(|&v| result + 10 * (v - b'0') as i32)) + .and_then(|result| iter.next().map(|&v| result + (v - b'0') as i32)) + .ok_or_else(|| ParseError(str!("Script unexpectedly ended."))) + } + + + pub fn has_event(&self, id: u16) -> bool { + self.event_map.contains_key(&id) + } +} + +#[test] +fn test_varint() { + for &n in [1_i32, 23, 456, 7890, 12345, -1, -23, -456].iter() { + let mut out = Vec::new(); + TextScript::put_varint(n, &mut out); + let result = TextScript::read_varint(&mut out.iter().peekable()).unwrap(); + assert_eq!(result, n); } }