Add additional documentation
Adds documentation for TokenStream's chars and skip_whitespace, for OffsetStr, in particular describing why do we need that structure, and can't just use Rust's builtin &str, for Offset::from, explaining the unsafety behind calling this function, and describing how unsafe are implementations of Display and Debug for OffsetStr.
This commit is contained in:
parent
c776f79f82
commit
d90f6763d5
24
src/main.rs
24
src/main.rs
|
@ -277,11 +277,17 @@ impl<'a> TokenStream<'a> {
|
|||
};
|
||||
}
|
||||
|
||||
// Utility function for creating an iterator over characters of the current source,
|
||||
// starting at the cursor position, as we use this function in quite a few places.
|
||||
#[inline(always)]
|
||||
fn chars(&'a self) -> std::str::Chars<'a> {
|
||||
return self.source[self.cursor..].chars();
|
||||
}
|
||||
|
||||
// Advances current cursor positioon by ignoring all whitespace characters, as defined
|
||||
// having White_Space property in Unicode [PropList.txt].
|
||||
//
|
||||
// [PropList.txt]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
|
||||
fn skip_whitespace(&mut self) {
|
||||
let mut chars = self.chars().peekable();
|
||||
let mut length = 0;
|
||||
|
@ -348,6 +354,7 @@ impl<'a> TokenStream<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
// Returns char representation in a way that is friendly for displaying in terminals.
|
||||
fn natural_char_representation(c: char) -> char {
|
||||
return match c {
|
||||
' ' => '␣',
|
||||
|
@ -357,6 +364,15 @@ fn natural_char_representation(c: char) -> char {
|
|||
};
|
||||
}
|
||||
|
||||
// This struct is a raw representation of Rust's &str, but a one that doesn't have to
|
||||
// keep track of its lifetime. This allows us to express a notion of string slice that
|
||||
// lives only as long as the underlying string does, although in much more unsafe way.
|
||||
// Currently this is the only way I found possible to implement parse_next and next
|
||||
// functions in TokenStream the way they are now (though it might be possible to change
|
||||
// them and implement differently, without having to step aside the borrow checker).
|
||||
//
|
||||
// TODO: Do we really need this struct? Is there a way to make borrow checker accept what
|
||||
// we want to convey? Or is it impossible due to how it currently works/is implemented?
|
||||
#[derive(Copy, Clone)]
|
||||
struct OffsetStr {
|
||||
data: *const u8,
|
||||
|
@ -365,17 +381,25 @@ struct OffsetStr {
|
|||
|
||||
impl std::fmt::Display for OffsetStr {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
// SAFETY: Safety of this function relies on the caller to ensure that date it wants
|
||||
// to print actually exists, is readable, and is a string. In other words, entirety
|
||||
// of this function is unsafe.
|
||||
return write!(f, "{}", std::str::from_utf8(unsafe { std::slice::from_raw_parts(self.data, self.length) }).unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for OffsetStr {
|
||||
// SAFETY: Since it relies on Display implementation, it inherits the same SAFETY note
|
||||
// as Debug::fmt, and is similarily unsafy.
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
return write!(f, r#""{}""#, self);
|
||||
}
|
||||
}
|
||||
|
||||
impl OffsetStr {
|
||||
// Constructs raw string from a string slice. It is up to the caller to ensure that,
|
||||
// should it want to do anything with it, the underlying data is not dropped or used
|
||||
// for other purpose.
|
||||
pub fn from(s: &str) -> Self {
|
||||
return Self {
|
||||
data: s.as_ptr(),
|
||||
|
|
Loading…
Reference in a new issue