Add binary and hex number parsing
Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
115
src/parser.rs
115
src/parser.rs
@@ -43,6 +43,7 @@ const NAME_START_CHARS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUV
|
||||
const NAME_CHARS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789-";
|
||||
const NUMBER_START_CHARS: &str = "0123456789";
|
||||
const NUMBER_CHARS: &str = "0123456789.";
|
||||
const NUMBER_HEX_CHARS: &str = "0123456789ABCDEFabcdef";
|
||||
const STRING_START_CHARS: &str = "'\"";
|
||||
const STRING_ESCAPES: &str = "nrt\\\"'";
|
||||
|
||||
@@ -62,7 +63,7 @@ pub struct Lexer {
|
||||
}
|
||||
|
||||
impl Lexer {
|
||||
pub fn new(text: String, path: &dyn AsRef<Path>) -> Self {
|
||||
pub fn new(text: String, path: impl AsRef<Path>) -> Self {
|
||||
Self {
|
||||
line: 1,
|
||||
index: 1,
|
||||
@@ -79,7 +80,12 @@ impl Lexer {
|
||||
}
|
||||
|
||||
pub fn lexeme(&self) -> &str {
|
||||
&self.text[self.start..self.index - 1]
|
||||
if self.is_eof() {
|
||||
// if we're at EOF, the index should not be cut off at the very end
|
||||
&self.text[self.start..self.index]
|
||||
} else {
|
||||
&self.text[self.start..self.index - 1]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn was_error(&self) -> bool {
|
||||
@@ -164,8 +170,16 @@ impl Lexer {
|
||||
return Ok(self.make_token(TokenKind::Eof));
|
||||
} else if NAME_START_CHARS.contains(self.current()) {
|
||||
return Ok(self.name());
|
||||
} else if self.mat('0') {
|
||||
return if self.mat('x') || self.mat('X') {
|
||||
self.hex_number()
|
||||
} else if self.mat('b') || self.mat('B') {
|
||||
self.bin_number()
|
||||
} else {
|
||||
self.number()
|
||||
};
|
||||
} else if NUMBER_START_CHARS.contains(self.current()) {
|
||||
return Ok(self.number());
|
||||
return self.number();
|
||||
} else if STRING_START_CHARS.contains(self.current()) {
|
||||
return self.string();
|
||||
} else if self.mat('+') {
|
||||
@@ -291,11 +305,53 @@ impl Lexer {
|
||||
}
|
||||
}
|
||||
|
||||
fn number(&mut self) -> Token {
|
||||
fn number(&mut self) -> Result<Token> {
|
||||
let mut was_decimal = false;
|
||||
while NUMBER_CHARS.contains(self.current()) {
|
||||
// this allows some weird syntax, you're allowed to do e.g. `1.0.to_int()` (usually
|
||||
// written as `(1.0).to_int()` but I don't see a problem with it)
|
||||
if self.current() == '.' {
|
||||
if was_decimal {
|
||||
break;
|
||||
} else {
|
||||
was_decimal = true;
|
||||
}
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
self.make_token(TokenKind::Number)
|
||||
if NAME_CHARS.contains(self.current()) {
|
||||
Err(self.error(format!("invalid digit '{}'", self.current())))
|
||||
} else {
|
||||
Ok(self.make_token(TokenKind::Number))
|
||||
}
|
||||
}
|
||||
|
||||
fn hex_number(&mut self) -> Result<Token> {
|
||||
if !NUMBER_HEX_CHARS.contains(self.current()) {
|
||||
return Err(self.error("expected hex digit after '0x' leader"));
|
||||
}
|
||||
while NUMBER_HEX_CHARS.contains(self.current()) {
|
||||
self.advance();
|
||||
}
|
||||
if NAME_CHARS.contains(self.current()) {
|
||||
Err(self.error(format!("invalid hex digit '{}'", self.current())))
|
||||
} else {
|
||||
Ok(self.make_token(TokenKind::Number))
|
||||
}
|
||||
}
|
||||
|
||||
fn bin_number(&mut self) -> Result<Token> {
|
||||
if self.current() != '0' && self.current() != '1' {
|
||||
return Err(self.error("expected binary digit after '0b' leader"));
|
||||
}
|
||||
while self.current() == '0' || self.current() == '1' {
|
||||
self.advance();
|
||||
}
|
||||
if NAME_CHARS.contains(self.current()) {
|
||||
Err(self.error(format!("invalid binary digit '{}'", self.current())))
|
||||
} else {
|
||||
Ok(self.make_token(TokenKind::Number))
|
||||
}
|
||||
}
|
||||
|
||||
fn string(&mut self) -> Result<Token> {
|
||||
@@ -799,3 +855,52 @@ impl Parser {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! lexer_check {
|
||||
($lexer:expr, $kind:expr, $text:expr) => {{
|
||||
let next = $lexer.next().unwrap();
|
||||
assert_eq!(next.kind, $kind);
|
||||
assert_eq!(next.text, $text);
|
||||
}};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lexer_names() {
|
||||
let input = "asdf fdsa the-quick-brown-fox jumped_over-the-lazy_dogs";
|
||||
let mut lexer = Lexer::new(input.to_string(), ":testing:");
|
||||
|
||||
lexer_check!(lexer, TokenKind::Name, "asdf");
|
||||
lexer_check!(lexer, TokenKind::Name, "fdsa");
|
||||
lexer_check!(lexer, TokenKind::Name, "the-quick-brown-fox");
|
||||
lexer_check!(lexer, TokenKind::Name, "jumped_over-the-lazy_dogs");
|
||||
assert!(lexer.is_eof());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lexer_numbers() {
|
||||
let input = "1 2 3 0 0.0 1.0 2.0 3.0 0x1 0xa 0xff 0xabcd 0xabcdef 0XDEADBEEF 0XDECAFDAD 0b0 0b1 0b010101 0B101010 0B00000";
|
||||
let mut lexer = Lexer::new(input.to_string(), ":testing:");
|
||||
|
||||
lexer_check!(lexer, TokenKind::Number, "1");
|
||||
lexer_check!(lexer, TokenKind::Number, "2");
|
||||
lexer_check!(lexer, TokenKind::Number, "3");
|
||||
lexer_check!(lexer, TokenKind::Number, "0");
|
||||
lexer_check!(lexer, TokenKind::Number, "0.0");
|
||||
lexer_check!(lexer, TokenKind::Number, "1.0");
|
||||
lexer_check!(lexer, TokenKind::Number, "2.0");
|
||||
lexer_check!(lexer, TokenKind::Number, "3.0");
|
||||
lexer_check!(lexer, TokenKind::Number, "0x1");
|
||||
lexer_check!(lexer, TokenKind::Number, "0xa");
|
||||
lexer_check!(lexer, TokenKind::Number, "0xff");
|
||||
lexer_check!(lexer, TokenKind::Number, "0xabcd");
|
||||
lexer_check!(lexer, TokenKind::Number, "0xabcdef");
|
||||
lexer_check!(lexer, TokenKind::Number, "0XDEADBEEF");
|
||||
lexer_check!(lexer, TokenKind::Number, "0XDECAFDAD");
|
||||
lexer_check!(lexer, TokenKind::Number, "0b0");
|
||||
lexer_check!(lexer, TokenKind::Number, "0b1");
|
||||
lexer_check!(lexer, TokenKind::Number, "0b010101");
|
||||
lexer_check!(lexer, TokenKind::Number, "0B101010");
|
||||
lexer_check!(lexer, TokenKind::Number, "0B00000");
|
||||
assert!(lexer.is_eof());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user