Add comment skipping to lexer

* Comment and whitespace skipping is now handed by a regex * Remove skip_whitespace and adv_char since they aren't used anymore * Fix pos_text() to return a reference with the correct lifetime * Add comment skipping tests Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
2020-05-08 12:34:51 -04:00
parent e7e8690463
commit 378bcef40b
1 changed files with 14 additions and 19 deletions
--- a/src/syn/lexer.rs
+++ b/src/syn/lexer.rs
@@ -37,7 +37,7 @@ impl<'t> Lexer<'t> {
        self.pos_text().chars()
    }
-    fn pos_text(&self) -> &str {
+    fn pos_text(&self) -> &'t str {
        &self.text[self.pos.byte..]
    }
@@ -45,22 +45,6 @@ impl<'t> Lexer<'t> {
        self.chars().next()
    }
    fn skip_whitespace(&mut self) {
        while let Some(c) = self.curr_char() {
            if !c.is_whitespace() || c == '\n' {
                break;
            } else {
                self.adv_char();
            }
        }
    }
    fn adv_char(&mut self) -> Option<char> {
        let c = self.curr_char()?;
        self.pos = self.pos.next_char(c);
        Some(c)
    }
    ////////////////////////////////////////////////////////////////////////////////
    // Tokens
    ////////////////////////////////////////////////////////////////////////////////
@@ -107,6 +91,10 @@ impl<'t> Lexer<'t> {
            .ignore_whitespace(true)
            .build()
            .unwrap();
            static ref SKIP_REGEX: Regex = Regex::new(
                r"^((#[^\n]*)\n?|[ \t\r]+)+"
            ).unwrap();
        }
        const CAPTURES: &[(&str, TokenKind)] = &[
@@ -143,7 +131,9 @@ impl<'t> Lexer<'t> {
            ("newline", TokenKind::Newline),
        ];
-        self.skip_whitespace();
+        if let Some(skip) = SKIP_REGEX.find(self.pos_text()) {
            self.pos.adv_str(skip.as_str());
        }
        if self.curr_char().is_none() {
            return Ok(None);
@@ -151,7 +141,7 @@ impl<'t> Lexer<'t> {
        let caps =
            REGEX
-                .captures(&self.text[self.pos.byte..])
+                .captures(self.pos_text())
                .ok_or_else(|| Error::Unexpected {
                    what: "EOF".to_string(),
                    pos: self.pos,
@@ -219,6 +209,11 @@ mod test {
        let mut lexer = Lexer::new("   \t \r \r\r\t\t    ");
        assert!(matches!(lexer.next_token(), Ok(None)));
        assert!(lexer.is_eof());
        let mut lexer = Lexer::new(r"#comment
            #another comment");
        assert!(matches!(lexer.next_token(), Ok(None)));
        assert!(lexer.is_eof());
    }
    #[test]