Add comment skipping to lexer

* Comment and whitespace skipping is now handed by a regex
* Remove skip_whitespace and adv_char since they aren't used anymore
* Fix pos_text() to return a reference with the correct lifetime
* Add comment skipping tests

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2020-05-08 12:34:51 -04:00
parent e7e8690463
commit 378bcef40b

View File

@@ -37,7 +37,7 @@ impl<'t> Lexer<'t> {
self.pos_text().chars() self.pos_text().chars()
} }
fn pos_text(&self) -> &str { fn pos_text(&self) -> &'t str {
&self.text[self.pos.byte..] &self.text[self.pos.byte..]
} }
@@ -45,22 +45,6 @@ impl<'t> Lexer<'t> {
self.chars().next() self.chars().next()
} }
fn skip_whitespace(&mut self) {
while let Some(c) = self.curr_char() {
if !c.is_whitespace() || c == '\n' {
break;
} else {
self.adv_char();
}
}
}
fn adv_char(&mut self) -> Option<char> {
let c = self.curr_char()?;
self.pos = self.pos.next_char(c);
Some(c)
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Tokens // Tokens
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@@ -107,6 +91,10 @@ impl<'t> Lexer<'t> {
.ignore_whitespace(true) .ignore_whitespace(true)
.build() .build()
.unwrap(); .unwrap();
static ref SKIP_REGEX: Regex = Regex::new(
r"^((#[^\n]*)\n?|[ \t\r]+)+"
).unwrap();
} }
const CAPTURES: &[(&str, TokenKind)] = &[ const CAPTURES: &[(&str, TokenKind)] = &[
@@ -143,7 +131,9 @@ impl<'t> Lexer<'t> {
("newline", TokenKind::Newline), ("newline", TokenKind::Newline),
]; ];
self.skip_whitespace(); if let Some(skip) = SKIP_REGEX.find(self.pos_text()) {
self.pos.adv_str(skip.as_str());
}
if self.curr_char().is_none() { if self.curr_char().is_none() {
return Ok(None); return Ok(None);
@@ -151,7 +141,7 @@ impl<'t> Lexer<'t> {
let caps = let caps =
REGEX REGEX
.captures(&self.text[self.pos.byte..]) .captures(self.pos_text())
.ok_or_else(|| Error::Unexpected { .ok_or_else(|| Error::Unexpected {
what: "EOF".to_string(), what: "EOF".to_string(),
pos: self.pos, pos: self.pos,
@@ -219,6 +209,11 @@ mod test {
let mut lexer = Lexer::new(" \t \r \r\r\t\t "); let mut lexer = Lexer::new(" \t \r \r\r\t\t ");
assert!(matches!(lexer.next_token(), Ok(None))); assert!(matches!(lexer.next_token(), Ok(None)));
assert!(lexer.is_eof()); assert!(lexer.is_eof());
let mut lexer = Lexer::new(r"#comment
#another comment");
assert!(matches!(lexer.next_token(), Ok(None)));
assert!(lexer.is_eof());
} }
#[test] #[test]