Add additional documentation

Adds documentation for TokenStream's chars and skip_whitespace, for
OffsetStr, in particular describing why do we need that structure, and
can't just use Rust's builtin &str, for Offset::from, explaining the
unsafety behind calling this function, and describing how unsafe are
implementations of Display and Debug for OffsetStr.
This commit is contained in:
Aodhnait Étaín 2021-05-23 10:13:52 +01:00
parent c776f79f82
commit d90f6763d5

View file

@ -277,11 +277,17 @@ impl<'a> TokenStream<'a> {
};
}
// Utility function for creating an iterator over characters of the current source,
// starting at the cursor position, as we use this function in quite a few places.
#[inline(always)]
fn chars(&'a self) -> std::str::Chars<'a> {
return self.source[self.cursor..].chars();
}
// Advances current cursor positioon by ignoring all whitespace characters, as defined
// having White_Space property in Unicode [PropList.txt].
//
// [PropList.txt]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
fn skip_whitespace(&mut self) {
let mut chars = self.chars().peekable();
let mut length = 0;
@ -348,6 +354,7 @@ impl<'a> TokenStream<'a> {
}
}
// Returns char representation in a way that is friendly for displaying in terminals.
fn natural_char_representation(c: char) -> char {
return match c {
' ' => '␣',
@ -357,6 +364,15 @@ fn natural_char_representation(c: char) -> char {
};
}
// This struct is a raw representation of Rust's &str, but a one that doesn't have to
// keep track of its lifetime. This allows us to express a notion of string slice that
// lives only as long as the underlying string does, although in much more unsafe way.
// Currently this is the only way I found possible to implement parse_next and next
// functions in TokenStream the way they are now (though it might be possible to change
// them and implement differently, without having to step aside the borrow checker).
//
// TODO: Do we really need this struct? Is there a way to make borrow checker accept what
// we want to convey? Or is it impossible due to how it currently works/is implemented?
#[derive(Copy, Clone)]
struct OffsetStr {
data: *const u8,
@ -365,17 +381,25 @@ struct OffsetStr {
impl std::fmt::Display for OffsetStr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// SAFETY: Safety of this function relies on the caller to ensure that date it wants
// to print actually exists, is readable, and is a string. In other words, entirety
// of this function is unsafe.
return write!(f, "{}", std::str::from_utf8(unsafe { std::slice::from_raw_parts(self.data, self.length) }).unwrap());
}
}
impl std::fmt::Debug for OffsetStr {
// SAFETY: Since it relies on Display implementation, it inherits the same SAFETY note
// as Debug::fmt, and is similarily unsafy.
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
return write!(f, r#""{}""#, self);
}
}
impl OffsetStr {
// Constructs raw string from a string slice. It is up to the caller to ensure that,
// should it want to do anything with it, the underlying data is not dropped or used
// for other purpose.
pub fn from(s: &str) -> Self {
return Self {
data: s.as_ptr(),