not-python-old.2020-08-27/src/syn/lexer.rs

use crate::syn::{error::*, span::*, token::*};
use lazy_static::lazy_static;
use regex::{Regex, RegexBuilder};
use std::str::Chars;

pub struct Lexer<'t> {
    text: &'t str,
    pos: Pos,
}

impl<'t> Lexer<'t> {
    /// Creates a new lexer that tokenizes the given text.
    pub fn new(text: &'t str) -> Self {
        // load the first position into the start/end position trackers
        let pos = if let Some(c) = text.chars().next() {
            Pos::from_char(c, 0, 0, 0, 0)
        } else {
            Default::default()
        };

        Lexer {
            text,
            pos,
        }
    }

    /// Gets whether this lexer has reached the EOF.
    pub fn is_eof(&self) -> bool {
        self.chars().next().is_none()
    }

    ////////////////////////////////////////////////////////////////////////////////
    // Character advancement
    ////////////////////////////////////////////////////////////////////////////////
    fn chars(&'t self) -> Chars<'t> {
        self.pos_text().chars()
    }

    fn pos_text(&self) -> &str {
        &self.text[self.pos.byte..]
    }

    fn curr_char(&self) -> Option<char> {
        self.chars().next()
    }

    fn skip_whitespace(&mut self) {
        while let Some(c) = self.curr_char() {
            if !c.is_whitespace() || c == '\n' {
                break;
            } else {
                self.adv_char();
            }
        }
    }

    fn adv_char(&mut self) -> Option<char> {
        let c = self.curr_char()?;
        self.pos = self.pos.next_char(c);
        Some(c)
    }

    ////////////////////////////////////////////////////////////////////////////////
    // Tokens
    ////////////////////////////////////////////////////////////////////////////////
    pub fn next_token(&mut self) -> Result<Option<Token>> {
        // Constants and statics
        lazy_static! {
            static ref REGEX: Regex = RegexBuilder::new(r#"
            ^(?P<kw_return>return)
            |(?P<ident>[a-zA-Z_][a-zA-Z0-9_]*)
            |(?P<sym>:[a-zA-Z_][a-zA-Z0-9_]*)
            |(?P<hex_num>0[xX][0-9a-fA-F]+)
            |(?P<dec_num>[0-9]+)
            |(?P<dq_str>"([^\\"]|\\[ntr0"'])*")
            |(?P<sq_str>'([^\\"]|\\[ntr0"'])*')

            |(?P<arrow>->)
            |(?P<eqeq>==)
            |(?P<bangeq>!=)
            |(?P<lteq><=)
            |(?P<gteq>>=)
            |(?P<lt><)
            |(?P<gt>>)

            |(?P<eq>=)
            |(?P<plus>\+)
            |(?P<minus>-)
            |(?P<splat>\*)
            |(?P<fslash>/)
            |(?P<bang>!)
            |(?P<lparen>\()
            |(?P<rparen>\))
            |(?P<lbracket>\[)
            |(?P<rbracket>\])
            |(?P<lbrace>\{)
            |(?P<rbrace>\})
            |(?P<comma>,)
            |(?P<eol>;)
            |(?P<newline>\n)
            "#).ignore_whitespace(true)
                .build()
                .unwrap();
        }

        const CAPTURES: &[(&str, TokenKind)] = &[
            ("kw_return", TokenKind::KwReturn),

            ("ident", TokenKind::Ident),
            ("sym", TokenKind::Sym),
            ("dec_num", TokenKind::Num),
            ("hex_num", TokenKind::Num),
            ("dq_str", TokenKind::Str),
            ("sq_str", TokenKind::Str),

            ("lparen", TokenKind::LParen),
            ("rparen", TokenKind::RParen),
            ("lbracket", TokenKind::LBracket),
            ("rbracket", TokenKind::RBracket),
            ("lbrace", TokenKind::LBrace),
            ("rbrace", TokenKind::RBrace),
            ("comma", TokenKind::Comma),
            ("plus", TokenKind::Plus),
            ("minus", TokenKind::Minus),
            ("splat", TokenKind::Splat),
            ("fslash", TokenKind::FSlash),
            ("bang", TokenKind::Bang),

            ("arrow", TokenKind::Arrow),
            ("eqeq", TokenKind::EqEq),
            ("bangeq", TokenKind::BangEq),
            ("lteq", TokenKind::LtEq),
            ("gteq", TokenKind::GtEq),
            ("lt", TokenKind::Lt),
            ("gt", TokenKind::Gt),

            ("eq", TokenKind::Eq),
            ("eol", TokenKind::Eol),
            ("newline", TokenKind::Newline),
        ];

        self.skip_whitespace();

        if self.curr_char().is_none() {
            return Ok(None);
        };

        let caps = REGEX.captures(&self.text[self.pos.byte..])
            .ok_or_else(|| Error::Unexpected {
                what: "EOF".to_string(),
                pos: self.pos,
            })?;

        // Get first capture
        let capture_kind = CAPTURES.iter()
            .filter_map(|(name, kind)|
                caps.name(name)
                    .map(|cap| (cap, kind)))
            .next();

        let (token_text, kind) = if let Some((capture, kind)) = capture_kind {
            (capture.as_str(), *kind)
        } else {
            return Err(
                Error::Unexpected {
                    what: format!("character {}", (&self.text[self.pos.byte..]).chars().next().unwrap()),
                    pos: self.pos,
                }
            );
        };

        let start = self.pos;
        self.pos.adv_str(token_text);
        let end = self.pos;

        Ok(Some(Token::new(kind, Span { start, end })))
    }
}

#[cfg(test)]
mod test {
    use super::*;

    macro_rules! test_token {
        ($text:expr, $($token_kind:expr, $token_text:expr),+ $(,)?) => {{
            let text = $text;
            let mut lexer = Lexer::new(text);

            $(
                let token = lexer.next_token().expect("token").expect("token");
                assert_eq!(token.kind(), $token_kind);
                assert_eq!(token.text_at(text), $token_text);
            )+

            assert!(lexer.is_eof());
        }};

        ($text:expr, $token_kind:expr) => {{
            test_token!($text, $token_kind, $text);
        }};
    }

    #[test]
    fn test_next_token_eof() {
        let mut lexer = Lexer::new("");
        assert!(matches!(lexer.next_token(), Ok(None)));
        assert!(lexer.is_eof());

        let mut lexer = Lexer::new("       ");
        assert!(matches!(lexer.next_token(), Ok(None)));
        assert!(lexer.is_eof());

        let mut lexer = Lexer::new("   \t \r \r\r\t\t    ");
        assert!(matches!(lexer.next_token(), Ok(None)));
        assert!(lexer.is_eof());
    }

    #[test]
    fn test_ident_token() {
        test_token!(
            "ident OtherIdent other_ident ident1234 RETURN",
            TokenKind::Ident, "ident",
            TokenKind::Ident, "OtherIdent",
            TokenKind::Ident, "other_ident",
            TokenKind::Ident, "ident1234",
            TokenKind::Ident, "RETURN",
        );
    }

    #[test]
    fn test_keywords() {
        test_token!("return", TokenKind::KwReturn);
    }

    #[test]
    fn test_num_token() {
        test_token!(
            "1234 4321 123498765 432156789 0xdcbaBEEF 0xabcdFEED 0XdcbaBEEF 0XabcdFEED 0X123456789DCBAbeef 0xABCDfeed192837465",
            TokenKind::Num, "1234",
            TokenKind::Num, "4321",
            TokenKind::Num, "123498765",
            TokenKind::Num, "432156789",
            TokenKind::Num, "0xdcbaBEEF",
            TokenKind::Num, "0xabcdFEED",
            TokenKind::Num, "0XdcbaBEEF",
            TokenKind::Num, "0XabcdFEED",
            TokenKind::Num, "0X123456789DCBAbeef",
            TokenKind::Num, "0xABCDfeed192837465",
        );
    }

    #[test]
    fn test_str_token() {
        test_token!(r#""this is a string""#, TokenKind::Str);
        test_token!(r#"'this is a string'"#, TokenKind::Str);
        test_token!(r#"'this is a string\nwith escapes'"#, TokenKind::Str);
        test_token!(r#""this is a string\nwith escapes""#, TokenKind::Str);
    }

    #[test]
    fn test_sym_token() {
        test_token!(":symbol :OtherSymbol :other_symbol :symbol1234",
            TokenKind::Sym, ":symbol",
            TokenKind::Sym, ":OtherSymbol",
            TokenKind::Sym, ":other_symbol",
            TokenKind::Sym, ":symbol1234",
        );
    }

    #[test]
    fn test_eol() {
        test_token!("\n;",
            TokenKind::Newline, "\n",
            TokenKind::Eol, ";"
        );
    }

    #[test]
    fn test_symbols() {
        test_token!("(", TokenKind::LParen);
        test_token!(")", TokenKind::RParen);
        test_token!("{", TokenKind::LBrace);
        test_token!("}", TokenKind::RBrace);
        test_token!("[", TokenKind::LBracket);
        test_token!("]", TokenKind::RBracket);
        test_token!(",", TokenKind::Comma);
        test_token!("+", TokenKind::Plus);
        test_token!("-", TokenKind::Minus);
        test_token!("*", TokenKind::Splat);
        test_token!("/", TokenKind::FSlash);
        test_token!("!", TokenKind::Bang);

        test_token!("=", TokenKind::Eq);
        test_token!("!=", TokenKind::BangEq);
        test_token!("==", TokenKind::EqEq);
        test_token!("<=", TokenKind::LtEq);
        test_token!(">=", TokenKind::GtEq);
        test_token!("<", TokenKind::Lt);
        test_token!(">", TokenKind::Gt);
        test_token!("->", TokenKind::Arrow);
    }
}
Add parser and AST, remove some stuff from lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 20:17:16 -04:00			`use crate::syn::{error::, span::, token::*};`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`use lazy_static::lazy_static;`
Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`use regex::{Regex, RegexBuilder};`
Add parser and AST, remove some stuff from lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 20:17:16 -04:00			`use std::str::Chars;`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00
			`pub struct Lexer<'t> {`
			`text: &'t str,`
Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`pos: Pos,`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`}`

			`impl<'t> Lexer<'t> {`
			`/// Creates a new lexer that tokenizes the given text.`
			`pub fn new(text: &'t str) -> Self {`
			`// load the first position into the start/end position trackers`
			`let pos = if let Some(c) = text.chars().next() {`
			`Pos::from_char(c, 0, 0, 0, 0)`
			`} else {`
			`Default::default()`
			`};`

			`Lexer {`
			`text,`
Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`pos,`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`}`
			`}`

			`/// Gets whether this lexer has reached the EOF.`
			`pub fn is_eof(&self) -> bool {`
Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`self.chars().next().is_none()`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`}`

			`////////////////////////////////////////////////////////////////////////////////`
			`// Character advancement`
			`////////////////////////////////////////////////////////////////////////////////`
Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`fn chars(&'t self) -> Chars<'t> {`
			`self.pos_text().chars()`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`}`

Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`fn pos_text(&self) -> &str {`
			`&self.text[self.pos.byte..]`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`}`

Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`fn curr_char(&self) -> Option<char> {`
			`self.chars().next()`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`}`

Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`fn skip_whitespace(&mut self) {`
			`while let Some(c) = self.curr_char() {`
Add more expressions to parser, add EOL and newline tokens * Lexer recognizes semicolons as EOL tokens, and newline tokens * Parser can be configured to ignore newlines (which is also used internally) * Newlines are allowed in lists, tuples, and parenthesized expressions * Add a bunch of tests for the new stuff Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-05-05 16:38:07 -04:00			`if !c.is_whitespace() \|\| c == '\n' {`
Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`break;`
			`} else {`
			`self.adv_char();`
			`}`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`}`
			`}`

Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`fn adv_char(&mut self) -> Option<char> {`
			`let c = self.curr_char()?;`
			`self.pos = self.pos.next_char(c);`
			`Some(c)`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`}`

			`////////////////////////////////////////////////////////////////////////////////`
			`// Tokens`
			`////////////////////////////////////////////////////////////////////////////////`
			`pub fn next_token(&mut self) -> Result<Option<Token>> {`
Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`// Constants and statics`
			`lazy_static! {`
			`static ref REGEX: Regex = RegexBuilder::new(r#"`
			`^(?P<kw_return>return)`
			`\|(?P<ident>[a-zA-Z_][a-zA-Z0-9_]*)`
			`\|(?P<sym>:[a-zA-Z_][a-zA-Z0-9_]*)`
			`\|(?P<hex_num>0[xX][0-9a-fA-F]+)`
			`\|(?P<dec_num>[0-9]+)`
Add expression parsing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-05-02 18:42:01 -04:00			`\|(?P<dq_str>"([^\\"]\|\\[ntr0"'])*")`
			`\|(?P<sq_str>'([^\\"]\|\\[ntr0"'])*')`

			`\|(?P<arrow>->)`
			`\|(?P<eqeq>==)`
			`\|(?P<bangeq>!=)`
			`\|(?P<lteq><=)`
			`\|(?P<gteq>>=)`
			`\|(?P<lt><)`
			`\|(?P<gt>>)`

			`\|(?P<eq>=)`
			`\|(?P<plus>\+)`
			`\|(?P<minus>-)`
			`\|(?P<splat>\*)`
			`\|(?P<fslash>/)`
			`\|(?P<bang>!)`
Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`\|(?P<lparen>\()`
			`\|(?P<rparen>\))`
			`\|(?P<lbracket>\[)`
			`\|(?P<rbracket>\])`
			`\|(?P<lbrace>\{)`
			`\|(?P<rbrace>\})`
			`\|(?P<comma>,)`
Add more expressions to parser, add EOL and newline tokens * Lexer recognizes semicolons as EOL tokens, and newline tokens * Parser can be configured to ignore newlines (which is also used internally) * Newlines are allowed in lists, tuples, and parenthesized expressions * Add a bunch of tests for the new stuff Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-05-05 16:38:07 -04:00			`\|(?P<eol>;)`
			`\|(?P<newline>\n)`
Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`"#).ignore_whitespace(true)`
			`.build()`
			`.unwrap();`
			`}`

			`const CAPTURES: &[(&str, TokenKind)] = &[`
			`("kw_return", TokenKind::KwReturn),`
Add parser and AST, remove some stuff from lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 20:17:16 -04:00
Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`("ident", TokenKind::Ident),`
			`("sym", TokenKind::Sym),`
			`("dec_num", TokenKind::Num),`
			`("hex_num", TokenKind::Num),`
			`("dq_str", TokenKind::Str),`
			`("sq_str", TokenKind::Str),`
Add parser and AST, remove some stuff from lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 20:17:16 -04:00
Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`("lparen", TokenKind::LParen),`
			`("rparen", TokenKind::RParen),`
			`("lbracket", TokenKind::LBracket),`
			`("rbracket", TokenKind::RBracket),`
			`("lbrace", TokenKind::LBrace),`
			`("rbrace", TokenKind::RBrace),`
			`("comma", TokenKind::Comma),`
Add parser and AST, remove some stuff from lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 20:17:16 -04:00			`("plus", TokenKind::Plus),`
			`("minus", TokenKind::Minus),`
			`("splat", TokenKind::Splat),`
			`("fslash", TokenKind::FSlash),`
Add expression parsing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-05-02 18:42:01 -04:00			`("bang", TokenKind::Bang),`
Add parser and AST, remove some stuff from lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 20:17:16 -04:00
Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`("arrow", TokenKind::Arrow),`
Add expression parsing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-05-02 18:42:01 -04:00			`("eqeq", TokenKind::EqEq),`
			`("bangeq", TokenKind::BangEq),`
			`("lteq", TokenKind::LtEq),`
			`("gteq", TokenKind::GtEq),`
			`("lt", TokenKind::Lt),`
			`("gt", TokenKind::Gt),`

Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`("eq", TokenKind::Eq),`
Add more expressions to parser, add EOL and newline tokens * Lexer recognizes semicolons as EOL tokens, and newline tokens * Parser can be configured to ignore newlines (which is also used internally) * Newlines are allowed in lists, tuples, and parenthesized expressions * Add a bunch of tests for the new stuff Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-05-05 16:38:07 -04:00			`("eol", TokenKind::Eol),`
			`("newline", TokenKind::Newline),`
Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`];`

Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`self.skip_whitespace();`

Add parser and AST, remove some stuff from lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 20:17:16 -04:00			`if self.curr_char().is_none() {`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`return Ok(None);`
			`};`

Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`let caps = REGEX.captures(&self.text[self.pos.byte..])`
			`.ok_or_else(\|\| Error::Unexpected {`
			`what: "EOF".to_string(),`
			`pos: self.pos,`
			`})?;`

			`// Get first capture`
			`let capture_kind = CAPTURES.iter()`
			`.filter_map(\|(name, kind)\|`
			`caps.name(name)`
			`.map(\|cap\| (cap, kind)))`
			`.next();`

			`let (token_text, kind) = if let Some((capture, kind)) = capture_kind {`
			`(capture.as_str(), *kind)`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`} else {`
Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`return Err(`
			`Error::Unexpected {`
			`what: format!("character {}", (&self.text[self.pos.byte..]).chars().next().unwrap()),`
			`pos: self.pos,`
			`}`
			`);`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`};`

Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`let start = self.pos;`
			`self.pos.adv_str(token_text);`
			`let end = self.pos;`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00
Move lexer to use regex instead of hand-rolled lexing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 19:50:46 -04:00			`Ok(Some(Token::new(kind, Span { start, end })))`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`}`
			`}`

			`#[cfg(test)]`
			`mod test {`
			`use super::*;`

Add expression parsing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-05-02 18:42:01 -04:00			`macro_rules! test_token {`
			`($text:expr, $($token_kind:expr, $token_text:expr),+ $(,)?) => {{`
			`let text = $text;`
			`let mut lexer = Lexer::new(text);`

			`$(`
			`let token = lexer.next_token().expect("token").expect("token");`
			`assert_eq!(token.kind(), $token_kind);`
			`assert_eq!(token.text_at(text), $token_text);`
			`)+`

			`assert!(lexer.is_eof());`
			`}};`

			`($text:expr, $token_kind:expr) => {{`
			`test_token!($text, $token_kind, $text);`
			`}};`
			`}`

Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`#[test]`
			`fn test_next_token_eof() {`
			`let mut lexer = Lexer::new("");`
			`assert!(matches!(lexer.next_token(), Ok(None)));`
			`assert!(lexer.is_eof());`

			`let mut lexer = Lexer::new(" ");`
			`assert!(matches!(lexer.next_token(), Ok(None)));`
			`assert!(lexer.is_eof());`

Add more expressions to parser, add EOL and newline tokens * Lexer recognizes semicolons as EOL tokens, and newline tokens * Parser can be configured to ignore newlines (which is also used internally) * Newlines are allowed in lists, tuples, and parenthesized expressions * Add a bunch of tests for the new stuff Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-05-05 16:38:07 -04:00			`let mut lexer = Lexer::new(" \t \r \r\r\t\t ");`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`assert!(matches!(lexer.next_token(), Ok(None)));`
			`assert!(lexer.is_eof());`
			`}`

			`#[test]`
			`fn test_ident_token() {`
Add expression parsing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-05-02 18:42:01 -04:00			`test_token!(`
			`"ident OtherIdent other_ident ident1234 RETURN",`
			`TokenKind::Ident, "ident",`
			`TokenKind::Ident, "OtherIdent",`
			`TokenKind::Ident, "other_ident",`
			`TokenKind::Ident, "ident1234",`
			`TokenKind::Ident, "RETURN",`
			`);`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`}`

			`#[test]`
			`fn test_keywords() {`
			`test_token!("return", TokenKind::KwReturn);`
			`}`

			`#[test]`
			`fn test_num_token() {`
Add expression parsing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-05-02 18:42:01 -04:00			`test_token!(`
			`"1234 4321 123498765 432156789 0xdcbaBEEF 0xabcdFEED 0XdcbaBEEF 0XabcdFEED 0X123456789DCBAbeef 0xABCDfeed192837465",`
			`TokenKind::Num, "1234",`
			`TokenKind::Num, "4321",`
			`TokenKind::Num, "123498765",`
			`TokenKind::Num, "432156789",`
			`TokenKind::Num, "0xdcbaBEEF",`
			`TokenKind::Num, "0xabcdFEED",`
			`TokenKind::Num, "0XdcbaBEEF",`
			`TokenKind::Num, "0XabcdFEED",`
			`TokenKind::Num, "0X123456789DCBAbeef",`
			`TokenKind::Num, "0xABCDfeed192837465",`
			`);`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`}`

			`#[test]`
			`fn test_str_token() {`
			`test_token!(r#""this is a string""#, TokenKind::Str);`
			`test_token!(r#"'this is a string'"#, TokenKind::Str);`
			`test_token!(r#"'this is a string\nwith escapes'"#, TokenKind::Str);`
			`test_token!(r#""this is a string\nwith escapes""#, TokenKind::Str);`
			`}`

			`#[test]`
			`fn test_sym_token() {`
Add expression parsing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-05-02 18:42:01 -04:00			`test_token!(":symbol :OtherSymbol :other_symbol :symbol1234",`
			`TokenKind::Sym, ":symbol",`
			`TokenKind::Sym, ":OtherSymbol",`
			`TokenKind::Sym, ":other_symbol",`
			`TokenKind::Sym, ":symbol1234",`
			`);`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`}`

Add more expressions to parser, add EOL and newline tokens * Lexer recognizes semicolons as EOL tokens, and newline tokens * Parser can be configured to ignore newlines (which is also used internally) * Newlines are allowed in lists, tuples, and parenthesized expressions * Add a bunch of tests for the new stuff Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-05-05 16:38:07 -04:00			`#[test]`
			`fn test_eol() {`
			`test_token!("\n;",`
			`TokenKind::Newline, "\n",`
			`TokenKind::Eol, ";"`
			`);`
			`}`

Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`#[test]`
Add expression parsing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-05-02 18:42:01 -04:00			`fn test_symbols() {`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`test_token!("(", TokenKind::LParen);`
			`test_token!(")", TokenKind::RParen);`
			`test_token!("{", TokenKind::LBrace);`
			`test_token!("}", TokenKind::RBrace);`
			`test_token!("[", TokenKind::LBracket);`
			`test_token!("]", TokenKind::RBracket);`
			`test_token!(",", TokenKind::Comma);`
Add parser and AST, remove some stuff from lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 20:17:16 -04:00			`test_token!("+", TokenKind::Plus);`
			`test_token!("-", TokenKind::Minus);`
			`test_token!("*", TokenKind::Splat);`
			`test_token!("/", TokenKind::FSlash);`
Add expression parsing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-05-02 18:42:01 -04:00			`test_token!("!", TokenKind::Bang);`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00
			`test_token!("=", TokenKind::Eq);`
Add expression parsing Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-05-02 18:42:01 -04:00			`test_token!("!=", TokenKind::BangEq);`
			`test_token!("==", TokenKind::EqEq);`
			`test_token!("<=", TokenKind::LtEq);`
			`test_token!(">=", TokenKind::GtEq);`
			`test_token!("<", TokenKind::Lt);`
			`test_token!(">", TokenKind::Gt);`
Initial commit with lexer Signed-off-by: Alek Ratzloff <alekratz@gmail.com> 2020-04-27 12:42:17 -04:00			`test_token!("->", TokenKind::Arrow);`
			`}`
			`}`