@@ -11,16 +11,10 @@ pub enum Error {
|
||||
},
|
||||
|
||||
#[snafu(display("unexpected {}", what))]
|
||||
Unexpected {
|
||||
what: String,
|
||||
pos: Pos,
|
||||
},
|
||||
Unexpected { what: String, pos: Pos },
|
||||
|
||||
#[snafu(display("unknown {}", what))]
|
||||
Unknown {
|
||||
what: String,
|
||||
pos: Pos,
|
||||
}
|
||||
Unknown { what: String, pos: Pos },
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
|
||||
@@ -18,10 +18,7 @@ impl<'t> Lexer<'t> {
|
||||
Default::default()
|
||||
};
|
||||
|
||||
Lexer {
|
||||
text,
|
||||
pos,
|
||||
}
|
||||
Lexer { text, pos }
|
||||
}
|
||||
|
||||
/// Gets whether this lexer has reached the EOF.
|
||||
@@ -66,7 +63,8 @@ impl<'t> Lexer<'t> {
|
||||
pub fn next_token(&mut self) -> Result<Option<Token>> {
|
||||
// Constants and statics
|
||||
lazy_static! {
|
||||
static ref REGEX: Regex = RegexBuilder::new(r#"
|
||||
static ref REGEX: Regex = RegexBuilder::new(
|
||||
r#"
|
||||
^(?P<kw_return>return)
|
||||
|(?P<ident>[a-zA-Z_][a-zA-Z0-9_]*)
|
||||
|(?P<sym>:[a-zA-Z_][a-zA-Z0-9_]*)
|
||||
@@ -99,21 +97,21 @@ impl<'t> Lexer<'t> {
|
||||
|(?P<comma>,)
|
||||
|(?P<eol>;)
|
||||
|(?P<newline>\n)
|
||||
"#).ignore_whitespace(true)
|
||||
"#
|
||||
)
|
||||
.ignore_whitespace(true)
|
||||
.build()
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
const CAPTURES: &[(&str, TokenKind)] = &[
|
||||
("kw_return", TokenKind::KwReturn),
|
||||
|
||||
("ident", TokenKind::Ident),
|
||||
("sym", TokenKind::Sym),
|
||||
("dec_num", TokenKind::Num),
|
||||
("hex_num", TokenKind::Num),
|
||||
("dq_str", TokenKind::Str),
|
||||
("sq_str", TokenKind::Str),
|
||||
|
||||
("lparen", TokenKind::LParen),
|
||||
("rparen", TokenKind::RParen),
|
||||
("lbracket", TokenKind::LBracket),
|
||||
@@ -127,7 +125,6 @@ impl<'t> Lexer<'t> {
|
||||
("splat", TokenKind::Splat),
|
||||
("fslash", TokenKind::FSlash),
|
||||
("bang", TokenKind::Bang),
|
||||
|
||||
("arrow", TokenKind::Arrow),
|
||||
("eqeq", TokenKind::EqEq),
|
||||
("bangeq", TokenKind::BangEq),
|
||||
@@ -135,7 +132,6 @@ impl<'t> Lexer<'t> {
|
||||
("gteq", TokenKind::GtEq),
|
||||
("lt", TokenKind::Lt),
|
||||
("gt", TokenKind::Gt),
|
||||
|
||||
("eq", TokenKind::Eq),
|
||||
("eol", TokenKind::Eol),
|
||||
("newline", TokenKind::Newline),
|
||||
@@ -147,28 +143,30 @@ impl<'t> Lexer<'t> {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let caps = REGEX.captures(&self.text[self.pos.byte..])
|
||||
let caps =
|
||||
REGEX
|
||||
.captures(&self.text[self.pos.byte..])
|
||||
.ok_or_else(|| Error::Unexpected {
|
||||
what: "EOF".to_string(),
|
||||
pos: self.pos,
|
||||
})?;
|
||||
|
||||
// Get first capture
|
||||
let capture_kind = CAPTURES.iter()
|
||||
.filter_map(|(name, kind)|
|
||||
caps.name(name)
|
||||
.map(|cap| (cap, kind)))
|
||||
let capture_kind = CAPTURES
|
||||
.iter()
|
||||
.filter_map(|(name, kind)| caps.name(name).map(|cap| (cap, kind)))
|
||||
.next();
|
||||
|
||||
let (token_text, kind) = if let Some((capture, kind)) = capture_kind {
|
||||
(capture.as_str(), *kind)
|
||||
} else {
|
||||
return Err(
|
||||
Error::Unexpected {
|
||||
what: format!("character {}", (&self.text[self.pos.byte..]).chars().next().unwrap()),
|
||||
return Err(Error::Unexpected {
|
||||
what: format!(
|
||||
"character {}",
|
||||
(&self.text[self.pos.byte..]).chars().next().unwrap()
|
||||
),
|
||||
pos: self.pos,
|
||||
}
|
||||
);
|
||||
});
|
||||
};
|
||||
|
||||
let start = self.pos;
|
||||
@@ -221,11 +219,16 @@ mod test {
|
||||
fn test_ident_token() {
|
||||
test_token!(
|
||||
"ident OtherIdent other_ident ident1234 RETURN",
|
||||
TokenKind::Ident, "ident",
|
||||
TokenKind::Ident, "OtherIdent",
|
||||
TokenKind::Ident, "other_ident",
|
||||
TokenKind::Ident, "ident1234",
|
||||
TokenKind::Ident, "RETURN",
|
||||
TokenKind::Ident,
|
||||
"ident",
|
||||
TokenKind::Ident,
|
||||
"OtherIdent",
|
||||
TokenKind::Ident,
|
||||
"other_ident",
|
||||
TokenKind::Ident,
|
||||
"ident1234",
|
||||
TokenKind::Ident,
|
||||
"RETURN",
|
||||
);
|
||||
}
|
||||
|
||||
@@ -261,20 +264,22 @@ mod test {
|
||||
|
||||
#[test]
|
||||
fn test_sym_token() {
|
||||
test_token!(":symbol :OtherSymbol :other_symbol :symbol1234",
|
||||
TokenKind::Sym, ":symbol",
|
||||
TokenKind::Sym, ":OtherSymbol",
|
||||
TokenKind::Sym, ":other_symbol",
|
||||
TokenKind::Sym, ":symbol1234",
|
||||
test_token!(
|
||||
":symbol :OtherSymbol :other_symbol :symbol1234",
|
||||
TokenKind::Sym,
|
||||
":symbol",
|
||||
TokenKind::Sym,
|
||||
":OtherSymbol",
|
||||
TokenKind::Sym,
|
||||
":other_symbol",
|
||||
TokenKind::Sym,
|
||||
":symbol1234",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_eol() {
|
||||
test_token!("\n;",
|
||||
TokenKind::Newline, "\n",
|
||||
TokenKind::Eol, ";"
|
||||
);
|
||||
test_token!("\n;", TokenKind::Newline, "\n", TokenKind::Eol, ";");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -179,7 +179,8 @@ impl<'t> Parser<'t> {
|
||||
TokenKind::ObjBrace => {
|
||||
let prev_skip = self.set_skip_newlines(true)?;
|
||||
let object = self.next_obj_list()?;
|
||||
let end_token = self.expect_token_kind(TokenKind::RBrace, "end of object (right curly brace)")?;
|
||||
let end_token =
|
||||
self.expect_token_kind(TokenKind::RBrace, "end of object (right curly brace)")?;
|
||||
let span = token.span().union(end_token.span());
|
||||
Expr::Base(BaseExpr {
|
||||
kind: BaseExprKind::Object(object),
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::fmt::{Display, Formatter, self};
|
||||
use std::fmt::{self, Display, Formatter};
|
||||
|
||||
#[derive(Debug, Clone, Copy, Eq)]
|
||||
#[cfg_attr(not(test), derive(PartialEq))]
|
||||
@@ -38,7 +38,11 @@ impl PartialEq for Pos {
|
||||
impl Pos {
|
||||
pub fn from_char(c: char, source: usize, line: usize, col: usize, byte: usize) -> Self {
|
||||
Pos {
|
||||
source, line, col, byte, len: c.len_utf8(),
|
||||
source,
|
||||
line,
|
||||
col,
|
||||
byte,
|
||||
len: c.len_utf8(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,9 +102,20 @@ impl Display for Span {
|
||||
if self.start == self.end {
|
||||
Display::fmt(&self.start, fmt)
|
||||
} else if self.start.line == self.end.line {
|
||||
write!(fmt, "line {} at {}-{}", self.start.line + 1, self.start.col + 1, self.end.col + 1)
|
||||
write!(
|
||||
fmt,
|
||||
"line {} at {}-{}",
|
||||
self.start.line + 1,
|
||||
self.start.col + 1,
|
||||
self.end.col + 1
|
||||
)
|
||||
} else {
|
||||
write!(fmt, "lines {} to {}", self.start.line + 1, self.end.line + 1)
|
||||
write!(
|
||||
fmt,
|
||||
"lines {} to {}",
|
||||
self.start.line + 1,
|
||||
self.end.line + 1
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -110,7 +125,7 @@ pub trait Spanned {
|
||||
|
||||
fn text_at<'t>(&self, text: &'t str) -> &'t str {
|
||||
let Span { start, end } = self.span();
|
||||
&text[start.byte .. end.byte]
|
||||
&text[start.byte..end.byte]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -178,7 +193,7 @@ mod test {
|
||||
col: 15,
|
||||
byte: 15,
|
||||
..Default::default()
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
let second = Span {
|
||||
@@ -193,7 +208,7 @@ mod test {
|
||||
col: 27,
|
||||
byte: 27,
|
||||
..Default::default()
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
let expected = Span {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::syn::span::*;
|
||||
use std::fmt::{Display, Formatter, self};
|
||||
use std::fmt::{self, Display, Formatter};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum TokenKind {
|
||||
@@ -84,7 +84,7 @@ pub struct Token {
|
||||
|
||||
impl Token {
|
||||
pub fn new(kind: TokenKind, span: Span) -> Self {
|
||||
Token { kind, span, }
|
||||
Token { kind, span }
|
||||
}
|
||||
|
||||
pub fn kind(&self) -> TokenKind {
|
||||
@@ -93,5 +93,7 @@ impl Token {
|
||||
}
|
||||
|
||||
impl Spanned for Token {
|
||||
fn span(&self) -> Span { self.span }
|
||||
fn span(&self) -> Span {
|
||||
self.span
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user