From 378bcef40b9b6aeebac8a817973de294c6f8fd1e Mon Sep 17 00:00:00 2001 From: Alek Ratzloff Date: Fri, 8 May 2020 12:34:51 -0400 Subject: [PATCH] Add comment skipping to lexer * Comment and whitespace skipping is now handed by a regex * Remove skip_whitespace and adv_char since they aren't used anymore * Fix pos_text() to return a reference with the correct lifetime * Add comment skipping tests Signed-off-by: Alek Ratzloff --- src/syn/lexer.rs | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/src/syn/lexer.rs b/src/syn/lexer.rs index 7c70065..9e38dfe 100644 --- a/src/syn/lexer.rs +++ b/src/syn/lexer.rs @@ -37,7 +37,7 @@ impl<'t> Lexer<'t> { self.pos_text().chars() } - fn pos_text(&self) -> &str { + fn pos_text(&self) -> &'t str { &self.text[self.pos.byte..] } @@ -45,22 +45,6 @@ impl<'t> Lexer<'t> { self.chars().next() } - fn skip_whitespace(&mut self) { - while let Some(c) = self.curr_char() { - if !c.is_whitespace() || c == '\n' { - break; - } else { - self.adv_char(); - } - } - } - - fn adv_char(&mut self) -> Option { - let c = self.curr_char()?; - self.pos = self.pos.next_char(c); - Some(c) - } - //////////////////////////////////////////////////////////////////////////////// // Tokens //////////////////////////////////////////////////////////////////////////////// @@ -107,6 +91,10 @@ impl<'t> Lexer<'t> { .ignore_whitespace(true) .build() .unwrap(); + + static ref SKIP_REGEX: Regex = Regex::new( + r"^((#[^\n]*)\n?|[ \t\r]+)+" + ).unwrap(); } const CAPTURES: &[(&str, TokenKind)] = &[ @@ -143,7 +131,9 @@ impl<'t> Lexer<'t> { ("newline", TokenKind::Newline), ]; - self.skip_whitespace(); + if let Some(skip) = SKIP_REGEX.find(self.pos_text()) { + self.pos.adv_str(skip.as_str()); + } if self.curr_char().is_none() { return Ok(None); @@ -151,7 +141,7 @@ impl<'t> Lexer<'t> { let caps = REGEX - .captures(&self.text[self.pos.byte..]) + .captures(self.pos_text()) .ok_or_else(|| Error::Unexpected { what: "EOF".to_string(), pos: self.pos, @@ -219,6 +209,11 @@ mod test { let mut lexer = Lexer::new(" \t \r \r\r\t\t "); assert!(matches!(lexer.next_token(), Ok(None))); assert!(lexer.is_eof()); + + let mut lexer = Lexer::new(r"#comment + #another comment"); + assert!(matches!(lexer.next_token(), Ok(None))); + assert!(lexer.is_eof()); } #[test]