Add binary and hex number parsing
Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
@@ -889,6 +889,10 @@ impl ExprVisitor for Compiler {
|
|||||||
TokenKind::Number => {
|
TokenKind::Number => {
|
||||||
let obj = if expr.token.text.contains('.') {
|
let obj = if expr.token.text.contains('.') {
|
||||||
FloatInst::create(expr.token.text.parse().unwrap())
|
FloatInst::create(expr.token.text.parse().unwrap())
|
||||||
|
} else if expr.token.text.starts_with("0x") || expr.token.text.starts_with("0X") {
|
||||||
|
IntInst::create(i64::from_str_radix(&expr.token.text[2..], 16).unwrap())
|
||||||
|
} else if expr.token.text.starts_with("0b") || expr.token.text.starts_with("0B") {
|
||||||
|
IntInst::create(i64::from_str_radix(&expr.token.text[2..], 2).unwrap())
|
||||||
} else {
|
} else {
|
||||||
IntInst::create(expr.token.text.parse().unwrap())
|
IntInst::create(expr.token.text.parse().unwrap())
|
||||||
};
|
};
|
||||||
|
|||||||
115
src/parser.rs
115
src/parser.rs
@@ -43,6 +43,7 @@ const NAME_START_CHARS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUV
|
|||||||
const NAME_CHARS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789-";
|
const NAME_CHARS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789-";
|
||||||
const NUMBER_START_CHARS: &str = "0123456789";
|
const NUMBER_START_CHARS: &str = "0123456789";
|
||||||
const NUMBER_CHARS: &str = "0123456789.";
|
const NUMBER_CHARS: &str = "0123456789.";
|
||||||
|
const NUMBER_HEX_CHARS: &str = "0123456789ABCDEFabcdef";
|
||||||
const STRING_START_CHARS: &str = "'\"";
|
const STRING_START_CHARS: &str = "'\"";
|
||||||
const STRING_ESCAPES: &str = "nrt\\\"'";
|
const STRING_ESCAPES: &str = "nrt\\\"'";
|
||||||
|
|
||||||
@@ -62,7 +63,7 @@ pub struct Lexer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Lexer {
|
impl Lexer {
|
||||||
pub fn new(text: String, path: &dyn AsRef<Path>) -> Self {
|
pub fn new(text: String, path: impl AsRef<Path>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
line: 1,
|
line: 1,
|
||||||
index: 1,
|
index: 1,
|
||||||
@@ -79,7 +80,12 @@ impl Lexer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn lexeme(&self) -> &str {
|
pub fn lexeme(&self) -> &str {
|
||||||
&self.text[self.start..self.index - 1]
|
if self.is_eof() {
|
||||||
|
// if we're at EOF, the index should not be cut off at the very end
|
||||||
|
&self.text[self.start..self.index]
|
||||||
|
} else {
|
||||||
|
&self.text[self.start..self.index - 1]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn was_error(&self) -> bool {
|
pub fn was_error(&self) -> bool {
|
||||||
@@ -164,8 +170,16 @@ impl Lexer {
|
|||||||
return Ok(self.make_token(TokenKind::Eof));
|
return Ok(self.make_token(TokenKind::Eof));
|
||||||
} else if NAME_START_CHARS.contains(self.current()) {
|
} else if NAME_START_CHARS.contains(self.current()) {
|
||||||
return Ok(self.name());
|
return Ok(self.name());
|
||||||
|
} else if self.mat('0') {
|
||||||
|
return if self.mat('x') || self.mat('X') {
|
||||||
|
self.hex_number()
|
||||||
|
} else if self.mat('b') || self.mat('B') {
|
||||||
|
self.bin_number()
|
||||||
|
} else {
|
||||||
|
self.number()
|
||||||
|
};
|
||||||
} else if NUMBER_START_CHARS.contains(self.current()) {
|
} else if NUMBER_START_CHARS.contains(self.current()) {
|
||||||
return Ok(self.number());
|
return self.number();
|
||||||
} else if STRING_START_CHARS.contains(self.current()) {
|
} else if STRING_START_CHARS.contains(self.current()) {
|
||||||
return self.string();
|
return self.string();
|
||||||
} else if self.mat('+') {
|
} else if self.mat('+') {
|
||||||
@@ -291,11 +305,53 @@ impl Lexer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn number(&mut self) -> Token {
|
fn number(&mut self) -> Result<Token> {
|
||||||
|
let mut was_decimal = false;
|
||||||
while NUMBER_CHARS.contains(self.current()) {
|
while NUMBER_CHARS.contains(self.current()) {
|
||||||
|
// this allows some weird syntax, you're allowed to do e.g. `1.0.to_int()` (usually
|
||||||
|
// written as `(1.0).to_int()` but I don't see a problem with it)
|
||||||
|
if self.current() == '.' {
|
||||||
|
if was_decimal {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
was_decimal = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
self.advance();
|
self.advance();
|
||||||
}
|
}
|
||||||
self.make_token(TokenKind::Number)
|
if NAME_CHARS.contains(self.current()) {
|
||||||
|
Err(self.error(format!("invalid digit '{}'", self.current())))
|
||||||
|
} else {
|
||||||
|
Ok(self.make_token(TokenKind::Number))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hex_number(&mut self) -> Result<Token> {
|
||||||
|
if !NUMBER_HEX_CHARS.contains(self.current()) {
|
||||||
|
return Err(self.error("expected hex digit after '0x' leader"));
|
||||||
|
}
|
||||||
|
while NUMBER_HEX_CHARS.contains(self.current()) {
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
if NAME_CHARS.contains(self.current()) {
|
||||||
|
Err(self.error(format!("invalid hex digit '{}'", self.current())))
|
||||||
|
} else {
|
||||||
|
Ok(self.make_token(TokenKind::Number))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bin_number(&mut self) -> Result<Token> {
|
||||||
|
if self.current() != '0' && self.current() != '1' {
|
||||||
|
return Err(self.error("expected binary digit after '0b' leader"));
|
||||||
|
}
|
||||||
|
while self.current() == '0' || self.current() == '1' {
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
if NAME_CHARS.contains(self.current()) {
|
||||||
|
Err(self.error(format!("invalid binary digit '{}'", self.current())))
|
||||||
|
} else {
|
||||||
|
Ok(self.make_token(TokenKind::Number))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn string(&mut self) -> Result<Token> {
|
fn string(&mut self) -> Result<Token> {
|
||||||
@@ -799,3 +855,52 @@ impl Parser {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
macro_rules! lexer_check {
|
||||||
|
($lexer:expr, $kind:expr, $text:expr) => {{
|
||||||
|
let next = $lexer.next().unwrap();
|
||||||
|
assert_eq!(next.kind, $kind);
|
||||||
|
assert_eq!(next.text, $text);
|
||||||
|
}};
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_lexer_names() {
|
||||||
|
let input = "asdf fdsa the-quick-brown-fox jumped_over-the-lazy_dogs";
|
||||||
|
let mut lexer = Lexer::new(input.to_string(), ":testing:");
|
||||||
|
|
||||||
|
lexer_check!(lexer, TokenKind::Name, "asdf");
|
||||||
|
lexer_check!(lexer, TokenKind::Name, "fdsa");
|
||||||
|
lexer_check!(lexer, TokenKind::Name, "the-quick-brown-fox");
|
||||||
|
lexer_check!(lexer, TokenKind::Name, "jumped_over-the-lazy_dogs");
|
||||||
|
assert!(lexer.is_eof());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_lexer_numbers() {
|
||||||
|
let input = "1 2 3 0 0.0 1.0 2.0 3.0 0x1 0xa 0xff 0xabcd 0xabcdef 0XDEADBEEF 0XDECAFDAD 0b0 0b1 0b010101 0B101010 0B00000";
|
||||||
|
let mut lexer = Lexer::new(input.to_string(), ":testing:");
|
||||||
|
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "1");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "2");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "3");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "0");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "0.0");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "1.0");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "2.0");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "3.0");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "0x1");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "0xa");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "0xff");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "0xabcd");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "0xabcdef");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "0XDEADBEEF");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "0XDECAFDAD");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "0b0");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "0b1");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "0b010101");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "0B101010");
|
||||||
|
lexer_check!(lexer, TokenKind::Number, "0B00000");
|
||||||
|
assert!(lexer.is_eof());
|
||||||
|
}
|
||||||
|
|||||||
@@ -16,6 +16,8 @@ println(a + b)
|
|||||||
println(b + a)
|
println(b + a)
|
||||||
println(a + -b)
|
println(a + -b)
|
||||||
println(-a + b)
|
println(-a + b)
|
||||||
|
println(0xa + 1)
|
||||||
|
println(0b10 + 0b10)
|
||||||
|
|
||||||
# __sub__
|
# __sub__
|
||||||
println("__sub__")
|
println("__sub__")
|
||||||
@@ -120,3 +122,4 @@ println(----1)
|
|||||||
println(10 - -20)
|
println(10 - -20)
|
||||||
println(-10 - 20)
|
println(-10 - 20)
|
||||||
println(-10 - -20)
|
println(-10 - -20)
|
||||||
|
println(-0xff)
|
||||||
|
|||||||
@@ -8,6 +8,8 @@ __add__
|
|||||||
30
|
30
|
||||||
-10
|
-10
|
||||||
10
|
10
|
||||||
|
11
|
||||||
|
4
|
||||||
__sub__
|
__sub__
|
||||||
-1
|
-1
|
||||||
1
|
1
|
||||||
@@ -90,3 +92,4 @@ __neg__
|
|||||||
30
|
30
|
||||||
-30
|
-30
|
||||||
10
|
10
|
||||||
|
-255
|
||||||
|
|||||||
Reference in New Issue
Block a user