@@ -11,16 +11,10 @@ pub enum Error {
|
|||||||
},
|
},
|
||||||
|
|
||||||
#[snafu(display("unexpected {}", what))]
|
#[snafu(display("unexpected {}", what))]
|
||||||
Unexpected {
|
Unexpected { what: String, pos: Pos },
|
||||||
what: String,
|
|
||||||
pos: Pos,
|
|
||||||
},
|
|
||||||
|
|
||||||
#[snafu(display("unknown {}", what))]
|
#[snafu(display("unknown {}", what))]
|
||||||
Unknown {
|
Unknown { what: String, pos: Pos },
|
||||||
what: String,
|
|
||||||
pos: Pos,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||||
|
|||||||
@@ -18,10 +18,7 @@ impl<'t> Lexer<'t> {
|
|||||||
Default::default()
|
Default::default()
|
||||||
};
|
};
|
||||||
|
|
||||||
Lexer {
|
Lexer { text, pos }
|
||||||
text,
|
|
||||||
pos,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Gets whether this lexer has reached the EOF.
|
/// Gets whether this lexer has reached the EOF.
|
||||||
@@ -66,7 +63,8 @@ impl<'t> Lexer<'t> {
|
|||||||
pub fn next_token(&mut self) -> Result<Option<Token>> {
|
pub fn next_token(&mut self) -> Result<Option<Token>> {
|
||||||
// Constants and statics
|
// Constants and statics
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref REGEX: Regex = RegexBuilder::new(r#"
|
static ref REGEX: Regex = RegexBuilder::new(
|
||||||
|
r#"
|
||||||
^(?P<kw_return>return)
|
^(?P<kw_return>return)
|
||||||
|(?P<ident>[a-zA-Z_][a-zA-Z0-9_]*)
|
|(?P<ident>[a-zA-Z_][a-zA-Z0-9_]*)
|
||||||
|(?P<sym>:[a-zA-Z_][a-zA-Z0-9_]*)
|
|(?P<sym>:[a-zA-Z_][a-zA-Z0-9_]*)
|
||||||
@@ -99,21 +97,21 @@ impl<'t> Lexer<'t> {
|
|||||||
|(?P<comma>,)
|
|(?P<comma>,)
|
||||||
|(?P<eol>;)
|
|(?P<eol>;)
|
||||||
|(?P<newline>\n)
|
|(?P<newline>\n)
|
||||||
"#).ignore_whitespace(true)
|
"#
|
||||||
|
)
|
||||||
|
.ignore_whitespace(true)
|
||||||
.build()
|
.build()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
const CAPTURES: &[(&str, TokenKind)] = &[
|
const CAPTURES: &[(&str, TokenKind)] = &[
|
||||||
("kw_return", TokenKind::KwReturn),
|
("kw_return", TokenKind::KwReturn),
|
||||||
|
|
||||||
("ident", TokenKind::Ident),
|
("ident", TokenKind::Ident),
|
||||||
("sym", TokenKind::Sym),
|
("sym", TokenKind::Sym),
|
||||||
("dec_num", TokenKind::Num),
|
("dec_num", TokenKind::Num),
|
||||||
("hex_num", TokenKind::Num),
|
("hex_num", TokenKind::Num),
|
||||||
("dq_str", TokenKind::Str),
|
("dq_str", TokenKind::Str),
|
||||||
("sq_str", TokenKind::Str),
|
("sq_str", TokenKind::Str),
|
||||||
|
|
||||||
("lparen", TokenKind::LParen),
|
("lparen", TokenKind::LParen),
|
||||||
("rparen", TokenKind::RParen),
|
("rparen", TokenKind::RParen),
|
||||||
("lbracket", TokenKind::LBracket),
|
("lbracket", TokenKind::LBracket),
|
||||||
@@ -127,7 +125,6 @@ impl<'t> Lexer<'t> {
|
|||||||
("splat", TokenKind::Splat),
|
("splat", TokenKind::Splat),
|
||||||
("fslash", TokenKind::FSlash),
|
("fslash", TokenKind::FSlash),
|
||||||
("bang", TokenKind::Bang),
|
("bang", TokenKind::Bang),
|
||||||
|
|
||||||
("arrow", TokenKind::Arrow),
|
("arrow", TokenKind::Arrow),
|
||||||
("eqeq", TokenKind::EqEq),
|
("eqeq", TokenKind::EqEq),
|
||||||
("bangeq", TokenKind::BangEq),
|
("bangeq", TokenKind::BangEq),
|
||||||
@@ -135,7 +132,6 @@ impl<'t> Lexer<'t> {
|
|||||||
("gteq", TokenKind::GtEq),
|
("gteq", TokenKind::GtEq),
|
||||||
("lt", TokenKind::Lt),
|
("lt", TokenKind::Lt),
|
||||||
("gt", TokenKind::Gt),
|
("gt", TokenKind::Gt),
|
||||||
|
|
||||||
("eq", TokenKind::Eq),
|
("eq", TokenKind::Eq),
|
||||||
("eol", TokenKind::Eol),
|
("eol", TokenKind::Eol),
|
||||||
("newline", TokenKind::Newline),
|
("newline", TokenKind::Newline),
|
||||||
@@ -147,28 +143,30 @@ impl<'t> Lexer<'t> {
|
|||||||
return Ok(None);
|
return Ok(None);
|
||||||
};
|
};
|
||||||
|
|
||||||
let caps = REGEX.captures(&self.text[self.pos.byte..])
|
let caps =
|
||||||
|
REGEX
|
||||||
|
.captures(&self.text[self.pos.byte..])
|
||||||
.ok_or_else(|| Error::Unexpected {
|
.ok_or_else(|| Error::Unexpected {
|
||||||
what: "EOF".to_string(),
|
what: "EOF".to_string(),
|
||||||
pos: self.pos,
|
pos: self.pos,
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
// Get first capture
|
// Get first capture
|
||||||
let capture_kind = CAPTURES.iter()
|
let capture_kind = CAPTURES
|
||||||
.filter_map(|(name, kind)|
|
.iter()
|
||||||
caps.name(name)
|
.filter_map(|(name, kind)| caps.name(name).map(|cap| (cap, kind)))
|
||||||
.map(|cap| (cap, kind)))
|
|
||||||
.next();
|
.next();
|
||||||
|
|
||||||
let (token_text, kind) = if let Some((capture, kind)) = capture_kind {
|
let (token_text, kind) = if let Some((capture, kind)) = capture_kind {
|
||||||
(capture.as_str(), *kind)
|
(capture.as_str(), *kind)
|
||||||
} else {
|
} else {
|
||||||
return Err(
|
return Err(Error::Unexpected {
|
||||||
Error::Unexpected {
|
what: format!(
|
||||||
what: format!("character {}", (&self.text[self.pos.byte..]).chars().next().unwrap()),
|
"character {}",
|
||||||
|
(&self.text[self.pos.byte..]).chars().next().unwrap()
|
||||||
|
),
|
||||||
pos: self.pos,
|
pos: self.pos,
|
||||||
}
|
});
|
||||||
);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let start = self.pos;
|
let start = self.pos;
|
||||||
@@ -221,11 +219,16 @@ mod test {
|
|||||||
fn test_ident_token() {
|
fn test_ident_token() {
|
||||||
test_token!(
|
test_token!(
|
||||||
"ident OtherIdent other_ident ident1234 RETURN",
|
"ident OtherIdent other_ident ident1234 RETURN",
|
||||||
TokenKind::Ident, "ident",
|
TokenKind::Ident,
|
||||||
TokenKind::Ident, "OtherIdent",
|
"ident",
|
||||||
TokenKind::Ident, "other_ident",
|
TokenKind::Ident,
|
||||||
TokenKind::Ident, "ident1234",
|
"OtherIdent",
|
||||||
TokenKind::Ident, "RETURN",
|
TokenKind::Ident,
|
||||||
|
"other_ident",
|
||||||
|
TokenKind::Ident,
|
||||||
|
"ident1234",
|
||||||
|
TokenKind::Ident,
|
||||||
|
"RETURN",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -261,20 +264,22 @@ mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_sym_token() {
|
fn test_sym_token() {
|
||||||
test_token!(":symbol :OtherSymbol :other_symbol :symbol1234",
|
test_token!(
|
||||||
TokenKind::Sym, ":symbol",
|
":symbol :OtherSymbol :other_symbol :symbol1234",
|
||||||
TokenKind::Sym, ":OtherSymbol",
|
TokenKind::Sym,
|
||||||
TokenKind::Sym, ":other_symbol",
|
":symbol",
|
||||||
TokenKind::Sym, ":symbol1234",
|
TokenKind::Sym,
|
||||||
|
":OtherSymbol",
|
||||||
|
TokenKind::Sym,
|
||||||
|
":other_symbol",
|
||||||
|
TokenKind::Sym,
|
||||||
|
":symbol1234",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_eol() {
|
fn test_eol() {
|
||||||
test_token!("\n;",
|
test_token!("\n;", TokenKind::Newline, "\n", TokenKind::Eol, ";");
|
||||||
TokenKind::Newline, "\n",
|
|
||||||
TokenKind::Eol, ";"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
@@ -179,7 +179,8 @@ impl<'t> Parser<'t> {
|
|||||||
TokenKind::ObjBrace => {
|
TokenKind::ObjBrace => {
|
||||||
let prev_skip = self.set_skip_newlines(true)?;
|
let prev_skip = self.set_skip_newlines(true)?;
|
||||||
let object = self.next_obj_list()?;
|
let object = self.next_obj_list()?;
|
||||||
let end_token = self.expect_token_kind(TokenKind::RBrace, "end of object (right curly brace)")?;
|
let end_token =
|
||||||
|
self.expect_token_kind(TokenKind::RBrace, "end of object (right curly brace)")?;
|
||||||
let span = token.span().union(end_token.span());
|
let span = token.span().union(end_token.span());
|
||||||
Expr::Base(BaseExpr {
|
Expr::Base(BaseExpr {
|
||||||
kind: BaseExprKind::Object(object),
|
kind: BaseExprKind::Object(object),
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
use std::fmt::{Display, Formatter, self};
|
use std::fmt::{self, Display, Formatter};
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, Eq)]
|
#[derive(Debug, Clone, Copy, Eq)]
|
||||||
#[cfg_attr(not(test), derive(PartialEq))]
|
#[cfg_attr(not(test), derive(PartialEq))]
|
||||||
@@ -38,7 +38,11 @@ impl PartialEq for Pos {
|
|||||||
impl Pos {
|
impl Pos {
|
||||||
pub fn from_char(c: char, source: usize, line: usize, col: usize, byte: usize) -> Self {
|
pub fn from_char(c: char, source: usize, line: usize, col: usize, byte: usize) -> Self {
|
||||||
Pos {
|
Pos {
|
||||||
source, line, col, byte, len: c.len_utf8(),
|
source,
|
||||||
|
line,
|
||||||
|
col,
|
||||||
|
byte,
|
||||||
|
len: c.len_utf8(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -98,9 +102,20 @@ impl Display for Span {
|
|||||||
if self.start == self.end {
|
if self.start == self.end {
|
||||||
Display::fmt(&self.start, fmt)
|
Display::fmt(&self.start, fmt)
|
||||||
} else if self.start.line == self.end.line {
|
} else if self.start.line == self.end.line {
|
||||||
write!(fmt, "line {} at {}-{}", self.start.line + 1, self.start.col + 1, self.end.col + 1)
|
write!(
|
||||||
|
fmt,
|
||||||
|
"line {} at {}-{}",
|
||||||
|
self.start.line + 1,
|
||||||
|
self.start.col + 1,
|
||||||
|
self.end.col + 1
|
||||||
|
)
|
||||||
} else {
|
} else {
|
||||||
write!(fmt, "lines {} to {}", self.start.line + 1, self.end.line + 1)
|
write!(
|
||||||
|
fmt,
|
||||||
|
"lines {} to {}",
|
||||||
|
self.start.line + 1,
|
||||||
|
self.end.line + 1
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -178,7 +193,7 @@ mod test {
|
|||||||
col: 15,
|
col: 15,
|
||||||
byte: 15,
|
byte: 15,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
let second = Span {
|
let second = Span {
|
||||||
@@ -193,7 +208,7 @@ mod test {
|
|||||||
col: 27,
|
col: 27,
|
||||||
byte: 27,
|
byte: 27,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
let expected = Span {
|
let expected = Span {
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use crate::syn::span::*;
|
use crate::syn::span::*;
|
||||||
use std::fmt::{Display, Formatter, self};
|
use std::fmt::{self, Display, Formatter};
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
pub enum TokenKind {
|
pub enum TokenKind {
|
||||||
@@ -84,7 +84,7 @@ pub struct Token {
|
|||||||
|
|
||||||
impl Token {
|
impl Token {
|
||||||
pub fn new(kind: TokenKind, span: Span) -> Self {
|
pub fn new(kind: TokenKind, span: Span) -> Self {
|
||||||
Token { kind, span, }
|
Token { kind, span }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn kind(&self) -> TokenKind {
|
pub fn kind(&self) -> TokenKind {
|
||||||
@@ -93,5 +93,7 @@ impl Token {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Spanned for Token {
|
impl Spanned for Token {
|
||||||
fn span(&self) -> Span { self.span }
|
fn span(&self) -> Span {
|
||||||
|
self.span
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user