Add comment skipping to lexer

* Comment and whitespace skipping is now handed by a regex
* Remove skip_whitespace and adv_char since they aren't used anymore
* Fix pos_text() to return a reference with the correct lifetime
* Add comment skipping tests

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2020-05-08 12:34:51 -04:00
parent e7e8690463
commit 378bcef40b

View File

@@ -37,7 +37,7 @@ impl<'t> Lexer<'t> {
self.pos_text().chars()
}
fn pos_text(&self) -> &str {
fn pos_text(&self) -> &'t str {
&self.text[self.pos.byte..]
}
@@ -45,22 +45,6 @@ impl<'t> Lexer<'t> {
self.chars().next()
}
fn skip_whitespace(&mut self) {
while let Some(c) = self.curr_char() {
if !c.is_whitespace() || c == '\n' {
break;
} else {
self.adv_char();
}
}
}
fn adv_char(&mut self) -> Option<char> {
let c = self.curr_char()?;
self.pos = self.pos.next_char(c);
Some(c)
}
////////////////////////////////////////////////////////////////////////////////
// Tokens
////////////////////////////////////////////////////////////////////////////////
@@ -107,6 +91,10 @@ impl<'t> Lexer<'t> {
.ignore_whitespace(true)
.build()
.unwrap();
static ref SKIP_REGEX: Regex = Regex::new(
r"^((#[^\n]*)\n?|[ \t\r]+)+"
).unwrap();
}
const CAPTURES: &[(&str, TokenKind)] = &[
@@ -143,7 +131,9 @@ impl<'t> Lexer<'t> {
("newline", TokenKind::Newline),
];
self.skip_whitespace();
if let Some(skip) = SKIP_REGEX.find(self.pos_text()) {
self.pos.adv_str(skip.as_str());
}
if self.curr_char().is_none() {
return Ok(None);
@@ -151,7 +141,7 @@ impl<'t> Lexer<'t> {
let caps =
REGEX
.captures(&self.text[self.pos.byte..])
.captures(self.pos_text())
.ok_or_else(|| Error::Unexpected {
what: "EOF".to_string(),
pos: self.pos,
@@ -219,6 +209,11 @@ mod test {
let mut lexer = Lexer::new(" \t \r \r\r\t\t ");
assert!(matches!(lexer.next_token(), Ok(None)));
assert!(lexer.is_eof());
let mut lexer = Lexer::new(r"#comment
#another comment");
assert!(matches!(lexer.next_token(), Ok(None)));
assert!(lexer.is_eof());
}
#[test]