Add parser and AST, remove some stuff from lexer
Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
@@ -1,46 +1,7 @@
|
||||
use crate::{
|
||||
syn::{error::*, span::*, token::*},
|
||||
util::LazyString,
|
||||
};
|
||||
use crate::syn::{error::*, span::*, token::*};
|
||||
use lazy_static::lazy_static;
|
||||
use maplit::hashmap;
|
||||
use regex::{Regex, RegexBuilder};
|
||||
use std::{collections::HashMap, mem, str::Chars};
|
||||
|
||||
const IDENT_START_CHARS: &'static [char] = &[
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
|
||||
't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
|
||||
'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '_',
|
||||
];
|
||||
const IDENT_CHARS: &'static [char] = &[
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
|
||||
'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B',
|
||||
'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
|
||||
'V', 'W', 'X', 'Y', 'Z', '_',
|
||||
];
|
||||
|
||||
const DEC_NUM_CHARS: &'static [char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
|
||||
|
||||
const HEX_NUM_CHARS: &'static [char] = &[
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C',
|
||||
'D', 'E', 'F',
|
||||
];
|
||||
|
||||
const STR_QUOTE_CHARS: &'static [char] = &['"', '\''];
|
||||
|
||||
const OP_CHARS: &'static [char] = &['=', '+', '*', '-', '/', '>', '<', '~', '!', '%', '^'];
|
||||
|
||||
lazy_static! {
|
||||
static ref OPS: HashMap<&'static str, TokenKind> = hashmap! {
|
||||
"=" => TokenKind::Eq,
|
||||
"->" => TokenKind::Arrow,
|
||||
};
|
||||
|
||||
static ref KEYWORDS: HashMap<&'static str, TokenKind> = hashmap! {
|
||||
"return" => TokenKind::KwReturn,
|
||||
};
|
||||
|
||||
}
|
||||
use std::str::Chars;
|
||||
|
||||
pub struct Lexer<'t> {
|
||||
text: &'t str,
|
||||
@@ -120,6 +81,10 @@ impl<'t> Lexer<'t> {
|
||||
|(?P<comma>,)
|
||||
|(?P<arrow>->)
|
||||
|(?P<eq>=)
|
||||
|(?P<plus>\+)
|
||||
|(?P<minus>-)
|
||||
|(?P<splat>\*)
|
||||
|(?P<fslash>/)
|
||||
|(?P<dq_str>"([^\\"]|\\[ntr0"'])*")
|
||||
|(?P<sq_str>'([^\\"]|\\[ntr0"'])*')
|
||||
"#).ignore_whitespace(true)
|
||||
@@ -129,12 +94,14 @@ impl<'t> Lexer<'t> {
|
||||
|
||||
const CAPTURES: &[(&str, TokenKind)] = &[
|
||||
("kw_return", TokenKind::KwReturn),
|
||||
|
||||
("ident", TokenKind::Ident),
|
||||
("sym", TokenKind::Sym),
|
||||
("dec_num", TokenKind::Num),
|
||||
("hex_num", TokenKind::Num),
|
||||
("dq_str", TokenKind::Str),
|
||||
("sq_str", TokenKind::Str),
|
||||
|
||||
("lparen", TokenKind::LParen),
|
||||
("rparen", TokenKind::RParen),
|
||||
("lbracket", TokenKind::LBracket),
|
||||
@@ -142,15 +109,18 @@ impl<'t> Lexer<'t> {
|
||||
("lbrace", TokenKind::LBrace),
|
||||
("rbrace", TokenKind::RBrace),
|
||||
("comma", TokenKind::Comma),
|
||||
("plus", TokenKind::Plus),
|
||||
("minus", TokenKind::Minus),
|
||||
("splat", TokenKind::Splat),
|
||||
("fslash", TokenKind::FSlash),
|
||||
|
||||
("arrow", TokenKind::Arrow),
|
||||
("eq", TokenKind::Eq),
|
||||
];
|
||||
|
||||
self.skip_whitespace();
|
||||
|
||||
let curr = if let Some(curr) = self.curr_char() {
|
||||
curr
|
||||
} else {
|
||||
if self.curr_char().is_none() {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
@@ -272,6 +242,10 @@ mod test {
|
||||
test_token!("[", TokenKind::LBracket);
|
||||
test_token!("]", TokenKind::RBracket);
|
||||
test_token!(",", TokenKind::Comma);
|
||||
test_token!("+", TokenKind::Plus);
|
||||
test_token!("-", TokenKind::Minus);
|
||||
test_token!("*", TokenKind::Splat);
|
||||
test_token!("/", TokenKind::FSlash);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user