commit 99921e9f2a7b2b9ec1fae277228b6b56dd2c31b7 Author: KitsuneCafe <10284516+kitsunecafe@users.noreply.github.com> Date: Tue Jan 30 06:59:27 2024 -0500 initial diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..5cedfcf --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,16 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "charon" +version = "0.1.0" +dependencies = [ + "streaming-iterator", +] + +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..6090861 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "charon" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +streaming-iterator = "0.1.9" diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..c111ae4 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,49 @@ +#![feature(bufread_skip_until)] +mod tokenizer; +mod reader; + +use std::io::{BufRead, BufReader, Read}; + +pub struct Attribute<'a> { + pub name: &'a str, + pub value: &'a str, +} + +//impl<'a> Attribute<'a> { +// pub fn new(&[u8]) -> Self { +// Self { +// +// } +// } +//} + +pub struct Element<'a> { + pub tag: &'a str, + attributes: Vec>, +} + +pub enum Node<'a> { + Text(&'a [u8]), + Tag(&'a Element<'a>), + EndTag(&'a Element<'a>), +} + +pub struct Charon { + reader: BufReader, +} + +impl Charon { + pub fn new(reader: BufReader) -> Self { + Self { reader } + } +} + +impl Charon {} + +#[cfg(test)] +mod tests { + #[test] + fn reader() { + } +} + diff --git a/src/reader.rs b/src/reader.rs new file mode 100644 index 0000000..8c984ad --- /dev/null +++ b/src/reader.rs @@ -0,0 +1,82 @@ +use std::io::{Read, BufRead}; + +pub trait Peek { + type Item; + fn peek(&mut self) -> Option<&Self::Item> { + self.peek_n(1)?.first() + } + + fn peek_n(&mut self, n: usize) -> Option<&[Self::Item]>; +} + +pub struct Reader { + inner: R +} + +impl Reader { + pub fn new(inner: R) -> Self { + Self { inner } + } +} + +impl Peek for Reader { + type Item = u8; + + fn peek_n(&mut self, n: usize) -> Option<&[Self::Item]> { + Some(&self.fill_buf().ok()?[..n]) + } +} + +impl Iterator for Reader { + type Item = u8; + + fn next(&mut self) -> Option { + let mut buf = [0u8]; + self.inner.read(&mut buf).ok()?; + Some(buf[0]) + } +} + +impl Read for Reader { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + self.inner.read(buf) + } +} + +impl BufRead for Reader { + fn fill_buf(&mut self) -> std::io::Result<&[u8]> { + self.inner.fill_buf() + } + + fn consume(&mut self, amt: usize) { + self.inner.consume(amt) + } +} + +#[cfg(test)] +mod tests { + use std::io::BufReader; + + use crate::reader::Peek; + + use super::Reader; + + #[test] + fn basic_read() { + let mut reader = Reader::new(BufReader::new(b"this is a test string".as_slice())); + assert_eq!(Some(b't'), reader.next()); + assert_eq!(Some(b'h'), reader.next()); + assert_eq!(Some(b'i'), reader.next()); + assert_eq!(Some(b's'), reader.next()); + } + + #[test] + fn basic_peek() { + let mut reader = Reader::new(BufReader::new(b"this is a test string".as_slice())); + assert_eq!(Some(&b't'), reader.peek()); + assert_eq!(Some(&b't'), reader.peek()); + assert_eq!("this", String::from_utf8_lossy(reader.peek_n(4).unwrap())); + assert_eq!("this", String::from_utf8_lossy(reader.peek_n(4).unwrap())); + } +} + diff --git a/src/tokenizer.rs b/src/tokenizer.rs new file mode 100644 index 0000000..0ece351 --- /dev/null +++ b/src/tokenizer.rs @@ -0,0 +1,22 @@ +use std::io::Read; + +pub enum Token<'a> { + Space, + TagOpen, + TagClose, + QuoteOpen, + QuoteClose, + Equals, + Tag(&'a str), + Attribute(&'a str), + Value(&'a str) +} + +pub struct Tokenizer; + +impl Tokenizer { + pub fn parse(reader: R) -> Vec { + + } +} +