2024-09-20 16:04:30 -07:00
|
|
|
use common_macros::hash_map;
|
|
|
|
|
use thiserror::Error;
|
|
|
|
|
|
|
|
|
|
use std::collections::HashMap;
|
|
|
|
|
use std::fmt::{self, Display};
|
|
|
|
|
use std::path::{Path, PathBuf};
|
|
|
|
|
use std::sync::OnceLock;
|
|
|
|
|
|
|
|
|
|
use crate::ast::*;
|
|
|
|
|
use crate::token::{Token, TokenKind};
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
// ParseError
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
#[derive(Error, Debug)]
|
|
|
|
|
pub struct ParseError {
|
|
|
|
|
pub message: String,
|
|
|
|
|
pub line: usize,
|
|
|
|
|
pub path: PathBuf,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub type Result<T> = std::result::Result<T, ParseError>;
|
|
|
|
|
|
|
|
|
|
impl Display for ParseError {
|
|
|
|
|
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
|
|
|
|
write!(
|
|
|
|
|
fmt,
|
|
|
|
|
"in {} at line {}: {}",
|
|
|
|
|
self.path.as_os_str().to_str().unwrap(),
|
|
|
|
|
self.line,
|
|
|
|
|
self.message
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
// Constants
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
const WHITESPACE: &str = " \t\r";
|
|
|
|
|
const NAME_START_CHARS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_";
|
2024-09-26 10:47:53 -07:00
|
|
|
const NAME_CHARS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789";
|
2024-09-20 16:04:30 -07:00
|
|
|
const NUMBER_START_CHARS: &str = "0123456789";
|
|
|
|
|
const NUMBER_CHARS: &str = "0123456789.";
|
2024-09-26 10:03:54 -07:00
|
|
|
const NUMBER_HEX_CHARS: &str = "0123456789ABCDEFabcdef";
|
2024-09-20 16:04:30 -07:00
|
|
|
const STRING_START_CHARS: &str = "'\"";
|
|
|
|
|
const STRING_ESCAPES: &str = "nrt\\\"'";
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
// Lexer
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
|
pub struct Lexer {
|
|
|
|
|
line: usize,
|
|
|
|
|
index: usize,
|
|
|
|
|
start: usize,
|
|
|
|
|
text: String,
|
|
|
|
|
path: PathBuf,
|
|
|
|
|
paren_stack: Vec<char>,
|
|
|
|
|
was_error: bool,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Lexer {
|
2024-09-26 10:03:54 -07:00
|
|
|
pub fn new(text: String, path: impl AsRef<Path>) -> Self {
|
2024-09-20 16:04:30 -07:00
|
|
|
Self {
|
|
|
|
|
line: 1,
|
|
|
|
|
index: 1,
|
|
|
|
|
start: 0,
|
|
|
|
|
text,
|
|
|
|
|
path: path.as_ref().into(),
|
|
|
|
|
paren_stack: Vec::new(),
|
|
|
|
|
was_error: false,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn is_eof(&self) -> bool {
|
2024-10-15 14:04:15 -07:00
|
|
|
self.index > self.text.len()
|
2024-09-20 16:04:30 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn lexeme(&self) -> &str {
|
WIP: Add imports and modules
This is a big change because it touches a lot of stuff, but here is the
overview:
* Import syntax:
```
import foo
import bar from foo
import bar from "foo.npp"
import bar, baz from foo
import * from foo
import "foo.npp"
```
* These are all valid imports. They should be pretty
straightforward, maybe with exception of the last item. If you are
importing a path directly, but not importing any members from it,
it does not insert anything into the current namespace, and just
executes the file. This is probably going to be unused but I want
to include it for completeness. We can always remove it later
before a hypothetical 1.0 release.
* The "from" keyword is only ever used as a keyword here, and I am
allowing it to be used as an identifier elsewhere. Don't export
it, because that's weird and wrong and won't work.
* Modules:
* Doing an `import foo` will look for "foo.npp" at compile-time,
relative to the importer's directory, parse it, and compile it.
The importer will then attempt to execute the module with the new
`EnterModule` op. This instruction will execute the module kind of
like a function, assigning the module's global namespace to an
object that you can pass around.
* `import bar from foo` and `import bar from "foo.npp"` et al syntax
is not currently implemented in the compiler.
* There is a new "Module" object that represents a potentially
un-initialized module. This can't be referred to directly in code.
* VM:
* The VM operates around Module objects now. If you want to "call" a
new module, you should call `enter_module`. This is how the main
chunk is invoked.
* TODOs:
* `exit_module` function in the VM
* Finish up module implementation in compiler
* Built-in modules
* Sub-modules - e.g. `import foo.bar` - how does naming work for
this?
* Module directories. In Python you have `foo/__init__.py` and in
Rust you have `foo/mod.rs`.
* Probably a "Namespace" object that explicitly denotes "this is an
imported module that you're dealing with"
* Tests, tests, tests
Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
2024-10-04 10:11:49 -07:00
|
|
|
&self.text[self.start..self.index - 1]
|
2024-09-20 16:04:30 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn was_error(&self) -> bool {
|
|
|
|
|
self.was_error
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn current(&self) -> char {
|
|
|
|
|
if self.is_eof() {
|
|
|
|
|
return '\0';
|
|
|
|
|
}
|
|
|
|
|
self.text[self.index - 1..].chars().nth(0).unwrap()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn ignore_newlines(&self) -> bool {
|
|
|
|
|
self.paren_stack.len() > 0 && self.paren_stack.last() != Some(&'}')
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn error(&mut self, message: impl ToString) -> ParseError {
|
|
|
|
|
self.was_error = true;
|
|
|
|
|
ParseError {
|
|
|
|
|
message: message.to_string(),
|
|
|
|
|
line: self.line,
|
|
|
|
|
path: self.path.clone(),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn advance(&mut self) {
|
|
|
|
|
if self.is_eof() {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if self.current() == '\n' {
|
|
|
|
|
self.line += 1;
|
|
|
|
|
}
|
|
|
|
|
self.index += self.text[self.index - 1..]
|
|
|
|
|
.chars()
|
|
|
|
|
.nth(0)
|
|
|
|
|
.unwrap()
|
|
|
|
|
.len_utf8();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn mat(&mut self, c: char) -> bool {
|
|
|
|
|
if self.current() == c {
|
|
|
|
|
self.advance();
|
|
|
|
|
return true;
|
|
|
|
|
} else {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn skip_whitespace(&mut self) {
|
|
|
|
|
while WHITESPACE.contains(self.current())
|
|
|
|
|
|| (self.current() == '\n' && self.ignore_newlines())
|
|
|
|
|
|| self.current() == '#'
|
|
|
|
|
{
|
|
|
|
|
if self.current() == '#' {
|
|
|
|
|
self.advance();
|
|
|
|
|
while self.current() != '\n' && !self.is_eof() {
|
|
|
|
|
self.advance();
|
|
|
|
|
}
|
|
|
|
|
self.mat('\n');
|
|
|
|
|
} else {
|
|
|
|
|
self.advance();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
self.start = self.index - 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn make_token(&mut self, kind: TokenKind) -> Token {
|
|
|
|
|
let token = Token {
|
|
|
|
|
line: self.line,
|
|
|
|
|
//index: self.start,
|
|
|
|
|
text: self.lexeme().to_string(),
|
|
|
|
|
kind,
|
|
|
|
|
};
|
|
|
|
|
self.start = self.index - 1;
|
|
|
|
|
token
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn next(&mut self) -> Result<Token> {
|
|
|
|
|
self.skip_whitespace();
|
|
|
|
|
if self.is_eof() {
|
|
|
|
|
return Ok(self.make_token(TokenKind::Eof));
|
|
|
|
|
} else if NAME_START_CHARS.contains(self.current()) {
|
|
|
|
|
return Ok(self.name());
|
2024-09-26 10:03:54 -07:00
|
|
|
} else if self.mat('0') {
|
|
|
|
|
return if self.mat('x') || self.mat('X') {
|
|
|
|
|
self.hex_number()
|
|
|
|
|
} else if self.mat('b') || self.mat('B') {
|
|
|
|
|
self.bin_number()
|
|
|
|
|
} else {
|
|
|
|
|
self.number()
|
|
|
|
|
};
|
2024-09-20 16:04:30 -07:00
|
|
|
} else if NUMBER_START_CHARS.contains(self.current()) {
|
2024-09-26 10:03:54 -07:00
|
|
|
return self.number();
|
2024-09-20 16:04:30 -07:00
|
|
|
} else if STRING_START_CHARS.contains(self.current()) {
|
|
|
|
|
return self.string();
|
|
|
|
|
} else if self.mat('+') {
|
2024-10-07 10:23:15 -07:00
|
|
|
if self.mat('=') {
|
|
|
|
|
return Ok(self.make_token(TokenKind::PlusEq));
|
|
|
|
|
} else {
|
|
|
|
|
return Ok(self.make_token(TokenKind::Plus));
|
|
|
|
|
}
|
2024-09-20 16:04:30 -07:00
|
|
|
} else if self.mat('-') {
|
|
|
|
|
if self.mat('>') {
|
|
|
|
|
return Ok(self.make_token(TokenKind::Arrow));
|
2024-10-07 10:23:15 -07:00
|
|
|
} else if self.mat('=') {
|
|
|
|
|
return Ok(self.make_token(TokenKind::MinusEq));
|
2024-09-20 16:04:30 -07:00
|
|
|
} else {
|
|
|
|
|
return Ok(self.make_token(TokenKind::Minus));
|
|
|
|
|
}
|
|
|
|
|
} else if self.mat('*') {
|
2024-10-07 10:23:15 -07:00
|
|
|
if self.mat('=') {
|
|
|
|
|
return Ok(self.make_token(TokenKind::StarEq));
|
|
|
|
|
} else {
|
|
|
|
|
return Ok(self.make_token(TokenKind::Star));
|
|
|
|
|
}
|
2024-09-20 16:04:30 -07:00
|
|
|
} else if self.mat('/') {
|
2024-10-07 10:23:15 -07:00
|
|
|
if self.mat('=') {
|
|
|
|
|
return Ok(self.make_token(TokenKind::SlashEq));
|
|
|
|
|
} else {
|
|
|
|
|
return Ok(self.make_token(TokenKind::Slash));
|
|
|
|
|
}
|
2024-09-20 16:04:30 -07:00
|
|
|
} else if self.mat('&') {
|
|
|
|
|
if self.mat('&') {
|
|
|
|
|
return Ok(self.make_token(TokenKind::And));
|
|
|
|
|
}
|
|
|
|
|
} else if self.mat('|') {
|
|
|
|
|
if self.mat('|') {
|
|
|
|
|
return Ok(self.make_token(TokenKind::Or));
|
|
|
|
|
}
|
|
|
|
|
} else if self.mat('!') {
|
|
|
|
|
if self.mat('=') {
|
|
|
|
|
return Ok(self.make_token(TokenKind::BangEq));
|
|
|
|
|
} else {
|
|
|
|
|
return Ok(self.make_token(TokenKind::Bang));
|
|
|
|
|
}
|
|
|
|
|
} else if self.mat('=') {
|
|
|
|
|
if self.mat('=') {
|
|
|
|
|
return Ok(self.make_token(TokenKind::EqEq));
|
|
|
|
|
} else {
|
|
|
|
|
return Ok(self.make_token(TokenKind::Eq));
|
|
|
|
|
}
|
|
|
|
|
} else if self.mat('<') {
|
|
|
|
|
if self.mat('=') {
|
|
|
|
|
return Ok(self.make_token(TokenKind::LessEq));
|
|
|
|
|
} else {
|
|
|
|
|
return Ok(self.make_token(TokenKind::Less));
|
|
|
|
|
}
|
|
|
|
|
} else if self.mat('>') {
|
|
|
|
|
if self.mat('=') {
|
|
|
|
|
return Ok(self.make_token(TokenKind::GreaterEq));
|
2024-09-24 17:16:47 -07:00
|
|
|
} else {
|
|
|
|
|
return Ok(self.make_token(TokenKind::Greater));
|
2024-09-20 16:04:30 -07:00
|
|
|
}
|
|
|
|
|
} else if self.mat('(') {
|
|
|
|
|
self.paren_stack.push(')');
|
|
|
|
|
return Ok(self.make_token(TokenKind::LParen));
|
|
|
|
|
} else if self.mat(')') {
|
|
|
|
|
return match self.paren_stack.last() {
|
|
|
|
|
None => Err(self.error("')' has unmatched '('")),
|
|
|
|
|
Some(')') => {
|
|
|
|
|
self.paren_stack.pop();
|
|
|
|
|
Ok(self.make_token(TokenKind::RParen))
|
|
|
|
|
}
|
|
|
|
|
Some(c) => Err(self.error(format!("mismatched ')' (expected {:?})", c))),
|
|
|
|
|
};
|
|
|
|
|
} else if self.mat('{') {
|
|
|
|
|
self.paren_stack.push('}');
|
|
|
|
|
return Ok(self.make_token(TokenKind::LBrace));
|
|
|
|
|
} else if self.mat('}') {
|
|
|
|
|
return match self.paren_stack.last() {
|
|
|
|
|
None => Err(self.error("'}' has unmatched '{'")),
|
|
|
|
|
Some('}') => {
|
|
|
|
|
self.paren_stack.pop();
|
|
|
|
|
Ok(self.make_token(TokenKind::RBrace))
|
|
|
|
|
}
|
|
|
|
|
Some(c) => Err(self.error(format!("mismatched '}}' (expected {:?})", c))),
|
|
|
|
|
};
|
|
|
|
|
} else if self.mat('[') {
|
|
|
|
|
self.paren_stack.push(']');
|
|
|
|
|
return Ok(self.make_token(TokenKind::LBracket));
|
|
|
|
|
} else if self.mat(']') {
|
|
|
|
|
return match self.paren_stack.last() {
|
|
|
|
|
None => Err(self.error("']' has unmatched '['")),
|
|
|
|
|
Some(']') => {
|
|
|
|
|
self.paren_stack.pop();
|
|
|
|
|
Ok(self.make_token(TokenKind::RBracket))
|
|
|
|
|
}
|
|
|
|
|
Some(c) => Err(self.error(format!("mismatched ']' (expected {:?})", c))),
|
|
|
|
|
};
|
|
|
|
|
} else if self.mat('.') {
|
|
|
|
|
return Ok(self.make_token(TokenKind::Dot));
|
|
|
|
|
} else if self.mat(',') {
|
|
|
|
|
return Ok(self.make_token(TokenKind::Comma));
|
|
|
|
|
} else if self.mat(':') {
|
|
|
|
|
return Ok(self.make_token(TokenKind::Colon));
|
|
|
|
|
} else if self.mat('\n') {
|
|
|
|
|
assert!(!self.ignore_newlines());
|
|
|
|
|
// fix the line number since it will have already advanced when we make the token
|
|
|
|
|
self.line -= 1;
|
|
|
|
|
let token = self.make_token(TokenKind::Eol);
|
|
|
|
|
self.line += 1;
|
|
|
|
|
return Ok(token);
|
|
|
|
|
} else if self.mat(';') {
|
|
|
|
|
return Ok(self.make_token(TokenKind::Eol));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Err(self.error(format!("unexpected character: {:?}", self.current())))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn name(&mut self) -> Token {
|
|
|
|
|
static KEYWORDS: OnceLock<HashMap<&'static str, TokenKind>> = OnceLock::new();
|
|
|
|
|
let keywords = KEYWORDS.get_or_init(|| {
|
|
|
|
|
hash_map! {
|
|
|
|
|
"return" => TokenKind::Return,
|
|
|
|
|
"if" => TokenKind::If,
|
|
|
|
|
"else" => TokenKind::Else,
|
|
|
|
|
"true" => TokenKind::True,
|
|
|
|
|
"false" => TokenKind::False,
|
|
|
|
|
"nil" => TokenKind::Nil,
|
WIP: Add imports and modules
This is a big change because it touches a lot of stuff, but here is the
overview:
* Import syntax:
```
import foo
import bar from foo
import bar from "foo.npp"
import bar, baz from foo
import * from foo
import "foo.npp"
```
* These are all valid imports. They should be pretty
straightforward, maybe with exception of the last item. If you are
importing a path directly, but not importing any members from it,
it does not insert anything into the current namespace, and just
executes the file. This is probably going to be unused but I want
to include it for completeness. We can always remove it later
before a hypothetical 1.0 release.
* The "from" keyword is only ever used as a keyword here, and I am
allowing it to be used as an identifier elsewhere. Don't export
it, because that's weird and wrong and won't work.
* Modules:
* Doing an `import foo` will look for "foo.npp" at compile-time,
relative to the importer's directory, parse it, and compile it.
The importer will then attempt to execute the module with the new
`EnterModule` op. This instruction will execute the module kind of
like a function, assigning the module's global namespace to an
object that you can pass around.
* `import bar from foo` and `import bar from "foo.npp"` et al syntax
is not currently implemented in the compiler.
* There is a new "Module" object that represents a potentially
un-initialized module. This can't be referred to directly in code.
* VM:
* The VM operates around Module objects now. If you want to "call" a
new module, you should call `enter_module`. This is how the main
chunk is invoked.
* TODOs:
* `exit_module` function in the VM
* Finish up module implementation in compiler
* Built-in modules
* Sub-modules - e.g. `import foo.bar` - how does naming work for
this?
* Module directories. In Python you have `foo/__init__.py` and in
Rust you have `foo/mod.rs`.
* Probably a "Namespace" object that explicitly denotes "this is an
imported module that you're dealing with"
* Tests, tests, tests
Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
2024-10-04 10:11:49 -07:00
|
|
|
"import" => TokenKind::Import,
|
|
|
|
|
"from" => TokenKind::From,
|
2024-09-20 16:04:30 -07:00
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
while NAME_CHARS.contains(self.current()) {
|
|
|
|
|
self.advance();
|
|
|
|
|
}
|
|
|
|
|
if let Some(kind) = keywords.get(self.lexeme()) {
|
|
|
|
|
self.make_token(*kind)
|
|
|
|
|
} else {
|
|
|
|
|
self.make_token(TokenKind::Name)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-09-26 10:03:54 -07:00
|
|
|
fn number(&mut self) -> Result<Token> {
|
|
|
|
|
let mut was_decimal = false;
|
2024-09-20 16:04:30 -07:00
|
|
|
while NUMBER_CHARS.contains(self.current()) {
|
2024-09-26 10:03:54 -07:00
|
|
|
// this allows some weird syntax, you're allowed to do e.g. `1.0.to_int()` (usually
|
|
|
|
|
// written as `(1.0).to_int()` but I don't see a problem with it)
|
|
|
|
|
if self.current() == '.' {
|
|
|
|
|
if was_decimal {
|
|
|
|
|
break;
|
|
|
|
|
} else {
|
|
|
|
|
was_decimal = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-09-20 16:04:30 -07:00
|
|
|
self.advance();
|
|
|
|
|
}
|
2024-09-26 10:03:54 -07:00
|
|
|
if NAME_CHARS.contains(self.current()) {
|
|
|
|
|
Err(self.error(format!("invalid digit '{}'", self.current())))
|
|
|
|
|
} else {
|
|
|
|
|
Ok(self.make_token(TokenKind::Number))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn hex_number(&mut self) -> Result<Token> {
|
|
|
|
|
if !NUMBER_HEX_CHARS.contains(self.current()) {
|
|
|
|
|
return Err(self.error("expected hex digit after '0x' leader"));
|
|
|
|
|
}
|
|
|
|
|
while NUMBER_HEX_CHARS.contains(self.current()) {
|
|
|
|
|
self.advance();
|
|
|
|
|
}
|
|
|
|
|
if NAME_CHARS.contains(self.current()) {
|
|
|
|
|
Err(self.error(format!("invalid hex digit '{}'", self.current())))
|
|
|
|
|
} else {
|
|
|
|
|
Ok(self.make_token(TokenKind::Number))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn bin_number(&mut self) -> Result<Token> {
|
|
|
|
|
if self.current() != '0' && self.current() != '1' {
|
|
|
|
|
return Err(self.error("expected binary digit after '0b' leader"));
|
|
|
|
|
}
|
|
|
|
|
while self.current() == '0' || self.current() == '1' {
|
|
|
|
|
self.advance();
|
|
|
|
|
}
|
|
|
|
|
if NAME_CHARS.contains(self.current()) {
|
|
|
|
|
Err(self.error(format!("invalid binary digit '{}'", self.current())))
|
|
|
|
|
} else {
|
|
|
|
|
Ok(self.make_token(TokenKind::Number))
|
|
|
|
|
}
|
2024-09-20 16:04:30 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn string(&mut self) -> Result<Token> {
|
|
|
|
|
let terminator = self.current();
|
|
|
|
|
self.advance();
|
|
|
|
|
|
|
|
|
|
while self.current() != terminator && !self.is_eof() {
|
|
|
|
|
if self.current() == '\\' {
|
|
|
|
|
self.advance();
|
|
|
|
|
if STRING_ESCAPES.contains(self.current()) {
|
|
|
|
|
self.advance();
|
|
|
|
|
} else {
|
|
|
|
|
return Err(self.error(format!("unknown string escape {:?}", self.current())));
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
self.advance();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if self.current() == terminator {
|
|
|
|
|
self.advance();
|
|
|
|
|
Ok(self.make_token(TokenKind::String))
|
|
|
|
|
} else {
|
|
|
|
|
Err(self.error("unterminated string"))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
// Parser
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
macro_rules! mat {
|
|
|
|
|
($self:expr, $($op:expr),+ $(,)?) => {
|
|
|
|
|
$($self.mat($op)?)||+
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
macro_rules! expect {
|
|
|
|
|
($self:expr, $message:expr, $($kind:expr),+ $(,)?) => {{
|
|
|
|
|
if mat!($self, $($kind),+) {
|
|
|
|
|
Ok($self.prev.clone().unwrap())
|
|
|
|
|
} else {
|
|
|
|
|
Err($self.error($message))
|
|
|
|
|
}
|
|
|
|
|
}};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
macro_rules! bin_expr {
|
|
|
|
|
($name:ident, $next:ident, $($op:expr),+ $(,)?) => {
|
|
|
|
|
fn $name(&mut self) -> Result<ExprP> {
|
|
|
|
|
let mut expr = self.$next()?;
|
|
|
|
|
while $(self.mat($op)?)||+ {
|
|
|
|
|
let op = self.prev.clone().unwrap();
|
|
|
|
|
let rhs = self.$next()?;
|
|
|
|
|
expr = Box::new(BinaryExpr {lhs: expr, op, rhs});
|
|
|
|
|
}
|
|
|
|
|
Ok(expr)
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub struct Parser {
|
|
|
|
|
lexer: Lexer,
|
|
|
|
|
prev: Option<Token>,
|
|
|
|
|
current: Token,
|
|
|
|
|
next: Token,
|
|
|
|
|
was_error: bool,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Parser {
|
|
|
|
|
pub fn new(text: String, path: &dyn AsRef<Path>) -> Result<Self> {
|
|
|
|
|
let mut lexer = Lexer::new(text, path);
|
|
|
|
|
let prev = None;
|
|
|
|
|
let current = lexer.next()?;
|
|
|
|
|
let next = lexer.next()?;
|
|
|
|
|
Ok(Self {
|
|
|
|
|
lexer,
|
|
|
|
|
prev,
|
|
|
|
|
current,
|
|
|
|
|
next,
|
|
|
|
|
was_error: false,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn parse_all(&mut self) -> Result<Vec<StmtP>> {
|
|
|
|
|
let mut stmts = Vec::new();
|
|
|
|
|
while !self.is_eof() {
|
|
|
|
|
if let Some(s) = self.stmt()? {
|
|
|
|
|
stmts.push(s);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Ok(stmts)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
|
// Properties
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
fn line(&self) -> usize {
|
|
|
|
|
self.lexer.line
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn path(&self) -> &Path {
|
|
|
|
|
&self.lexer.path
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn is_eof(&self) -> bool {
|
2024-10-04 11:07:42 -07:00
|
|
|
self.current.kind == TokenKind::Eof
|
2024-09-20 16:04:30 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn was_error(&self) -> bool {
|
|
|
|
|
self.was_error || self.lexer.was_error()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
|
// Parser primitives
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
fn advance(&mut self) -> Result<()> {
|
|
|
|
|
self.prev = Some(self.current.clone());
|
|
|
|
|
self.current = self.next.clone();
|
|
|
|
|
self.next = self.lexer.next()?;
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn check(&self, what: TokenKind) -> bool {
|
|
|
|
|
self.current.kind == what
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn mat(&mut self, what: TokenKind) -> Result<bool> {
|
|
|
|
|
if self.check(what) {
|
|
|
|
|
self.advance()?;
|
|
|
|
|
Ok(true)
|
|
|
|
|
} else {
|
|
|
|
|
Ok(false)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn expect(&mut self, message: impl Display, what: TokenKind) -> Result<&Token> {
|
|
|
|
|
if self.mat(what)? {
|
|
|
|
|
Ok(self.prev.as_ref().unwrap())
|
|
|
|
|
} else {
|
|
|
|
|
Err(self.error(format!(
|
|
|
|
|
"{message} (NOTE: got {:?} {:?})",
|
|
|
|
|
self.current.kind, self.current.text
|
|
|
|
|
)))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn error(&mut self, message: impl ToString) -> ParseError {
|
|
|
|
|
self.was_error = true;
|
|
|
|
|
ParseError {
|
|
|
|
|
message: message.to_string(),
|
|
|
|
|
line: self.line(),
|
|
|
|
|
path: self.path().into(),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn synchronize(&mut self) -> Result<()> {
|
|
|
|
|
while !self.is_eof() {
|
|
|
|
|
match self.current.kind {
|
|
|
|
|
TokenKind::Return | TokenKind::If | TokenKind::LBrace => {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
_ => self.advance()?,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
|
// Statements
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
fn stmt(&mut self) -> Result<Option<StmtP>> {
|
|
|
|
|
// skip past end-lines to get to the good stuff
|
|
|
|
|
while self.mat(TokenKind::Eol)? {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// nothing left after EOLs
|
|
|
|
|
if self.is_eof() {
|
|
|
|
|
return Ok(None);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
match self.stmt_wrapped() {
|
|
|
|
|
Ok(result) => Ok(Some(result)),
|
|
|
|
|
Err(e) => {
|
|
|
|
|
eprintln!("{}", e);
|
|
|
|
|
self.synchronize()?;
|
|
|
|
|
Ok(None)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn stmt_wrapped(&mut self) -> Result<StmtP> {
|
|
|
|
|
if self.mat(TokenKind::Return)? {
|
|
|
|
|
self.return_stmt()
|
|
|
|
|
} else if self.mat(TokenKind::If)? {
|
|
|
|
|
self.if_stmt()
|
WIP: Add imports and modules
This is a big change because it touches a lot of stuff, but here is the
overview:
* Import syntax:
```
import foo
import bar from foo
import bar from "foo.npp"
import bar, baz from foo
import * from foo
import "foo.npp"
```
* These are all valid imports. They should be pretty
straightforward, maybe with exception of the last item. If you are
importing a path directly, but not importing any members from it,
it does not insert anything into the current namespace, and just
executes the file. This is probably going to be unused but I want
to include it for completeness. We can always remove it later
before a hypothetical 1.0 release.
* The "from" keyword is only ever used as a keyword here, and I am
allowing it to be used as an identifier elsewhere. Don't export
it, because that's weird and wrong and won't work.
* Modules:
* Doing an `import foo` will look for "foo.npp" at compile-time,
relative to the importer's directory, parse it, and compile it.
The importer will then attempt to execute the module with the new
`EnterModule` op. This instruction will execute the module kind of
like a function, assigning the module's global namespace to an
object that you can pass around.
* `import bar from foo` and `import bar from "foo.npp"` et al syntax
is not currently implemented in the compiler.
* There is a new "Module" object that represents a potentially
un-initialized module. This can't be referred to directly in code.
* VM:
* The VM operates around Module objects now. If you want to "call" a
new module, you should call `enter_module`. This is how the main
chunk is invoked.
* TODOs:
* `exit_module` function in the VM
* Finish up module implementation in compiler
* Built-in modules
* Sub-modules - e.g. `import foo.bar` - how does naming work for
this?
* Module directories. In Python you have `foo/__init__.py` and in
Rust you have `foo/mod.rs`.
* Probably a "Namespace" object that explicitly denotes "this is an
imported module that you're dealing with"
* Tests, tests, tests
Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
2024-10-04 10:11:49 -07:00
|
|
|
} else if self.mat(TokenKind::Import)? {
|
|
|
|
|
self.import_stmt()
|
2024-09-20 16:04:30 -07:00
|
|
|
} else if self.mat(TokenKind::LBrace)? {
|
|
|
|
|
let lbrace = self.prev.clone().unwrap();
|
|
|
|
|
let stmts = self.block()?;
|
|
|
|
|
let rbrace = self.prev.clone().unwrap();
|
|
|
|
|
Ok(Box::new(BlockStmt {
|
|
|
|
|
lbrace,
|
|
|
|
|
stmts,
|
|
|
|
|
rbrace,
|
|
|
|
|
}) as Box<dyn Stmt + 'static>)
|
2024-10-07 10:23:15 -07:00
|
|
|
} else if self.current.kind == TokenKind::Name
|
|
|
|
|
&& (self.next.kind == TokenKind::Eq
|
|
|
|
|
|| self.next.kind == TokenKind::PlusEq
|
|
|
|
|
|| self.next.kind == TokenKind::MinusEq
|
|
|
|
|
|| self.next.kind == TokenKind::StarEq
|
|
|
|
|
|| self.next.kind == TokenKind::SlashEq)
|
|
|
|
|
{
|
2024-09-20 16:04:30 -07:00
|
|
|
self.assign_stmt()
|
|
|
|
|
} else {
|
|
|
|
|
let expr = self.expr()?;
|
|
|
|
|
let stmt: StmtP;
|
|
|
|
|
|
2024-10-18 22:03:10 -07:00
|
|
|
// TODO Parser::stmt_wrapped - complex assign statements could probably be cleaner and
|
|
|
|
|
// probably need their own function at this point.
|
|
|
|
|
|
|
|
|
|
let is_get_expr = expr.as_any_ref().downcast_ref::<GetExpr>().is_some();
|
|
|
|
|
let is_index_expr = expr.as_any_ref().downcast_ref::<IndexExpr>().is_some();
|
|
|
|
|
|
|
|
|
|
if (is_get_expr || is_index_expr)
|
2024-10-07 11:05:39 -07:00
|
|
|
&& mat!(
|
|
|
|
|
self,
|
|
|
|
|
TokenKind::Eq,
|
|
|
|
|
TokenKind::PlusEq,
|
|
|
|
|
TokenKind::MinusEq,
|
|
|
|
|
TokenKind::StarEq,
|
|
|
|
|
TokenKind::SlashEq
|
|
|
|
|
)
|
|
|
|
|
{
|
|
|
|
|
let op = self.prev.clone().unwrap();
|
2024-09-20 16:04:30 -07:00
|
|
|
let rhs = self.expr()?;
|
2024-10-18 22:03:10 -07:00
|
|
|
|
|
|
|
|
// unpack the GetExpr or IndexExpr and turn it into a SetExpr instead
|
|
|
|
|
if is_get_expr {
|
|
|
|
|
let expr = expr.as_any().downcast::<GetExpr>().unwrap();
|
|
|
|
|
stmt = Box::new(SetStmt {
|
|
|
|
|
expr: expr.expr,
|
|
|
|
|
name: expr.name,
|
|
|
|
|
op,
|
|
|
|
|
rhs,
|
|
|
|
|
});
|
|
|
|
|
} else if is_index_expr {
|
|
|
|
|
let expr = expr.as_any().downcast::<IndexExpr>().unwrap();
|
|
|
|
|
stmt = Box::new(IndexAssignStmt {
|
|
|
|
|
expr: expr.expr,
|
|
|
|
|
index: expr.index,
|
|
|
|
|
op,
|
|
|
|
|
rhs,
|
|
|
|
|
});
|
|
|
|
|
} else {
|
|
|
|
|
unreachable!()
|
|
|
|
|
}
|
2024-09-20 16:04:30 -07:00
|
|
|
} else {
|
|
|
|
|
stmt = Box::new(ExprStmt { expr });
|
|
|
|
|
}
|
|
|
|
|
expect!(
|
|
|
|
|
self,
|
|
|
|
|
"expect end of line after expression",
|
|
|
|
|
TokenKind::Eol,
|
|
|
|
|
TokenKind::Eof,
|
|
|
|
|
)?;
|
|
|
|
|
Ok(stmt)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn return_stmt(&mut self) -> Result<StmtP> {
|
|
|
|
|
let return_kw = self.prev.clone().unwrap();
|
|
|
|
|
let mut expr = None;
|
|
|
|
|
|
|
|
|
|
if !self.check(TokenKind::Eol) && !self.check(TokenKind::RBrace) {
|
|
|
|
|
expr = Some(self.expr()?);
|
|
|
|
|
}
|
|
|
|
|
if !self.check(TokenKind::RBrace) {
|
|
|
|
|
expect!(
|
|
|
|
|
self,
|
|
|
|
|
"expected end of line after return statement",
|
|
|
|
|
TokenKind::Eol,
|
|
|
|
|
TokenKind::Eof,
|
|
|
|
|
)?;
|
|
|
|
|
}
|
|
|
|
|
Ok(Box::new(ReturnStmt { return_kw, expr }))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn if_stmt(&mut self) -> Result<StmtP> {
|
|
|
|
|
let if_kw = self.prev.clone().unwrap();
|
|
|
|
|
let condition = self.expr()?;
|
|
|
|
|
self.expect("expect '{' after 'if' condition", TokenKind::LBrace)?;
|
|
|
|
|
let then_branch = self.block_stmt()?;
|
|
|
|
|
let mut else_branch = Vec::new();
|
|
|
|
|
if self.mat(TokenKind::Else)? {
|
|
|
|
|
if self.mat(TokenKind::If)? {
|
|
|
|
|
else_branch.push(self.if_stmt()?);
|
|
|
|
|
} else {
|
|
|
|
|
self.expect("expect '{' after else statement", TokenKind::LBrace)?;
|
|
|
|
|
else_branch = self.block()?;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Ok(Box::new(IfStmt {
|
|
|
|
|
if_kw,
|
|
|
|
|
condition,
|
|
|
|
|
then_branch,
|
|
|
|
|
else_branch,
|
|
|
|
|
}))
|
|
|
|
|
}
|
|
|
|
|
|
WIP: Add imports and modules
This is a big change because it touches a lot of stuff, but here is the
overview:
* Import syntax:
```
import foo
import bar from foo
import bar from "foo.npp"
import bar, baz from foo
import * from foo
import "foo.npp"
```
* These are all valid imports. They should be pretty
straightforward, maybe with exception of the last item. If you are
importing a path directly, but not importing any members from it,
it does not insert anything into the current namespace, and just
executes the file. This is probably going to be unused but I want
to include it for completeness. We can always remove it later
before a hypothetical 1.0 release.
* The "from" keyword is only ever used as a keyword here, and I am
allowing it to be used as an identifier elsewhere. Don't export
it, because that's weird and wrong and won't work.
* Modules:
* Doing an `import foo` will look for "foo.npp" at compile-time,
relative to the importer's directory, parse it, and compile it.
The importer will then attempt to execute the module with the new
`EnterModule` op. This instruction will execute the module kind of
like a function, assigning the module's global namespace to an
object that you can pass around.
* `import bar from foo` and `import bar from "foo.npp"` et al syntax
is not currently implemented in the compiler.
* There is a new "Module" object that represents a potentially
un-initialized module. This can't be referred to directly in code.
* VM:
* The VM operates around Module objects now. If you want to "call" a
new module, you should call `enter_module`. This is how the main
chunk is invoked.
* TODOs:
* `exit_module` function in the VM
* Finish up module implementation in compiler
* Built-in modules
* Sub-modules - e.g. `import foo.bar` - how does naming work for
this?
* Module directories. In Python you have `foo/__init__.py` and in
Rust you have `foo/mod.rs`.
* Probably a "Namespace" object that explicitly denotes "this is an
imported module that you're dealing with"
* Tests, tests, tests
Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
2024-10-04 10:11:49 -07:00
|
|
|
fn import_stmt(&mut self) -> Result<StmtP> {
|
|
|
|
|
let import_kw = self.prev.clone().unwrap();
|
|
|
|
|
|
|
|
|
|
let name = expect!(
|
|
|
|
|
self,
|
|
|
|
|
"expect name, string, or '*' after import keyword",
|
|
|
|
|
TokenKind::String,
|
|
|
|
|
TokenKind::Name,
|
|
|
|
|
TokenKind::Star
|
|
|
|
|
)?;
|
|
|
|
|
|
|
|
|
|
let import_stmt = if self.mat(TokenKind::From)? {
|
|
|
|
|
if name.kind == TokenKind::String {
|
|
|
|
|
return Err(self.error("expect name before 'from' keyword"));
|
|
|
|
|
}
|
|
|
|
|
let what = vec![name];
|
|
|
|
|
let module = expect!(
|
|
|
|
|
self,
|
|
|
|
|
"expect name or string after 'from' keyword",
|
|
|
|
|
TokenKind::Name,
|
|
|
|
|
TokenKind::String
|
|
|
|
|
)?;
|
|
|
|
|
ImportStmt {
|
|
|
|
|
import_kw,
|
|
|
|
|
what,
|
|
|
|
|
module,
|
|
|
|
|
}
|
|
|
|
|
} else if self.check(TokenKind::Comma) {
|
|
|
|
|
if name.kind == TokenKind::String {
|
|
|
|
|
return Err(self.error("expect name in import list, not a string"));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let mut what = vec![name];
|
|
|
|
|
while self.mat(TokenKind::Comma)? {
|
|
|
|
|
let name = self
|
|
|
|
|
.expect("expect name after comma in import list", TokenKind::Name)?
|
|
|
|
|
.clone();
|
|
|
|
|
what.push(name);
|
|
|
|
|
}
|
|
|
|
|
self.expect("expect 'from' keyword after import list", TokenKind::From)?;
|
|
|
|
|
let module = expect!(
|
|
|
|
|
self,
|
|
|
|
|
"expect name or string after 'from' keyword",
|
|
|
|
|
TokenKind::Name,
|
|
|
|
|
TokenKind::String
|
|
|
|
|
)?;
|
|
|
|
|
ImportStmt {
|
|
|
|
|
import_kw,
|
|
|
|
|
what,
|
|
|
|
|
module,
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
if name.kind == TokenKind::Star {
|
|
|
|
|
return Err(
|
|
|
|
|
self.error("'import *' does not make any sense without a 'from' keyword")
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ImportStmt {
|
|
|
|
|
import_kw,
|
|
|
|
|
what: vec![],
|
|
|
|
|
module: name,
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
expect!(
|
|
|
|
|
self,
|
|
|
|
|
"expected end of line after import statement",
|
|
|
|
|
TokenKind::Eol,
|
|
|
|
|
TokenKind::Eof
|
|
|
|
|
)?;
|
|
|
|
|
|
|
|
|
|
Ok(Box::new(import_stmt))
|
|
|
|
|
}
|
|
|
|
|
|
2024-09-20 16:04:30 -07:00
|
|
|
fn block_stmt(&mut self) -> Result<BlockStmt> {
|
|
|
|
|
let lbrace = self.prev.clone().unwrap();
|
|
|
|
|
assert_eq!(lbrace.kind, TokenKind::LBrace);
|
|
|
|
|
let stmts = self.block()?;
|
|
|
|
|
let rbrace = self.prev.clone().unwrap();
|
|
|
|
|
assert_eq!(rbrace.kind, TokenKind::RBrace);
|
|
|
|
|
Ok(BlockStmt {
|
|
|
|
|
lbrace,
|
|
|
|
|
stmts,
|
|
|
|
|
rbrace,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn block(&mut self) -> Result<Vec<StmtP>> {
|
|
|
|
|
let mut stmts = Vec::new();
|
|
|
|
|
// the stmt rule is skipping past EOLs too. however if there's nothing *except* for EOLs
|
|
|
|
|
// remaining for the rest of the block, we want to know about that head of time rather than
|
|
|
|
|
// let the statement rule handle it.
|
|
|
|
|
// so we handle a bunch of EOLs right here and now.
|
|
|
|
|
while self.mat(TokenKind::Eol)? {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
while !self.check(TokenKind::RBrace) && !self.is_eof() {
|
|
|
|
|
let s = self.stmt()?;
|
|
|
|
|
if let Some(s) = s {
|
|
|
|
|
stmts.push(s);
|
|
|
|
|
} else {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
while self.mat(TokenKind::Eol)? {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
self.expect("expect '}' after statement block", TokenKind::RBrace)?;
|
|
|
|
|
Ok(stmts)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn assign_stmt(&mut self) -> Result<StmtP> {
|
|
|
|
|
let name = self
|
|
|
|
|
.expect("expect name for assign statement", TokenKind::Name)?
|
|
|
|
|
.clone();
|
2024-10-07 10:23:15 -07:00
|
|
|
let op = expect!(
|
|
|
|
|
self,
|
|
|
|
|
"expected '=' or augmented assign after name",
|
|
|
|
|
TokenKind::Eq,
|
|
|
|
|
TokenKind::PlusEq,
|
|
|
|
|
TokenKind::MinusEq,
|
|
|
|
|
TokenKind::StarEq,
|
|
|
|
|
TokenKind::SlashEq
|
|
|
|
|
)?;
|
2024-09-20 16:04:30 -07:00
|
|
|
let expr = self.expr()?;
|
|
|
|
|
if !self.check(TokenKind::RBrace) {
|
|
|
|
|
expect!(
|
|
|
|
|
self,
|
|
|
|
|
"expected end of line after assign statement",
|
|
|
|
|
TokenKind::Eol,
|
|
|
|
|
TokenKind::Eof
|
|
|
|
|
)?;
|
|
|
|
|
}
|
|
|
|
|
Ok(Box::new(AssignStmt {
|
|
|
|
|
lhs: name,
|
2024-10-07 10:23:15 -07:00
|
|
|
op,
|
2024-09-20 16:04:30 -07:00
|
|
|
rhs: expr,
|
|
|
|
|
}))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
|
// Expressions
|
|
|
|
|
//
|
|
|
|
|
fn expr(&mut self) -> Result<ExprP> {
|
|
|
|
|
self.logical_or_expr()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bin_expr!(logical_or_expr, logical_and_expr, TokenKind::Or);
|
|
|
|
|
|
|
|
|
|
bin_expr!(logical_and_expr, equality_expr, TokenKind::And);
|
|
|
|
|
|
|
|
|
|
bin_expr!(
|
|
|
|
|
equality_expr,
|
|
|
|
|
compare_expr,
|
|
|
|
|
TokenKind::BangEq,
|
|
|
|
|
TokenKind::EqEq
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
bin_expr!(
|
|
|
|
|
compare_expr,
|
|
|
|
|
binary_term,
|
|
|
|
|
TokenKind::Less,
|
|
|
|
|
TokenKind::LessEq,
|
|
|
|
|
TokenKind::Greater,
|
|
|
|
|
TokenKind::GreaterEq
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
bin_expr!(
|
|
|
|
|
binary_term,
|
|
|
|
|
binary_factor,
|
|
|
|
|
TokenKind::Plus,
|
|
|
|
|
TokenKind::Minus
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
bin_expr!(binary_factor, unary_expr, TokenKind::Star, TokenKind::Slash);
|
|
|
|
|
|
|
|
|
|
fn unary_expr(&mut self) -> Result<ExprP> {
|
|
|
|
|
if mat!(self, TokenKind::Bang, TokenKind::Minus, TokenKind::Plus) {
|
|
|
|
|
let op = self.prev.clone().unwrap();
|
|
|
|
|
let expr = self.unary_expr()?;
|
|
|
|
|
Ok(Box::new(UnaryExpr { op, expr }))
|
|
|
|
|
} else {
|
|
|
|
|
self.call_expr()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn call_expr(&mut self) -> Result<ExprP> {
|
|
|
|
|
let mut expr = self.primary_expr()?;
|
|
|
|
|
loop {
|
|
|
|
|
if self.mat(TokenKind::LParen)? {
|
|
|
|
|
expr = self.finish_call_expr(expr)?;
|
|
|
|
|
} else if self.mat(TokenKind::Dot)? {
|
|
|
|
|
let name = self
|
|
|
|
|
.expect("expect name after '.'", TokenKind::Name)?
|
|
|
|
|
.clone();
|
|
|
|
|
expr = Box::new(GetExpr { expr, name });
|
2024-09-30 16:33:58 -07:00
|
|
|
} else if self.mat(TokenKind::LBracket)? {
|
|
|
|
|
let index = self.expr()?;
|
|
|
|
|
let rbracket = self
|
|
|
|
|
.expect("expect ']' after index expression", TokenKind::RBracket)?
|
|
|
|
|
.clone();
|
|
|
|
|
expr = Box::new(IndexExpr {
|
|
|
|
|
expr,
|
|
|
|
|
index,
|
|
|
|
|
rbracket,
|
|
|
|
|
})
|
2024-09-20 16:04:30 -07:00
|
|
|
} else {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Ok(expr)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn finish_call_expr(&mut self, callee: ExprP) -> Result<ExprP> {
|
|
|
|
|
let mut args = Vec::new();
|
|
|
|
|
if !self.check(TokenKind::RParen) {
|
|
|
|
|
args.push(self.expr()?);
|
|
|
|
|
while self.mat(TokenKind::Comma)? {
|
|
|
|
|
// this allows a trailing comma
|
|
|
|
|
if self.check(TokenKind::RParen) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
args.push(self.expr()?);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
let rparen = self
|
|
|
|
|
.expect("expect ')' after function arguments", TokenKind::RParen)?
|
|
|
|
|
.clone();
|
|
|
|
|
Ok(Box::new(CallExpr {
|
|
|
|
|
expr: callee,
|
|
|
|
|
args,
|
|
|
|
|
rparen,
|
|
|
|
|
}))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn primary_expr(&mut self) -> Result<ExprP> {
|
|
|
|
|
if mat!(
|
|
|
|
|
self,
|
|
|
|
|
TokenKind::Name,
|
|
|
|
|
TokenKind::Number,
|
|
|
|
|
TokenKind::String,
|
|
|
|
|
TokenKind::True,
|
|
|
|
|
TokenKind::False,
|
|
|
|
|
TokenKind::Nil
|
|
|
|
|
) {
|
|
|
|
|
Ok(Box::new(PrimaryExpr {
|
|
|
|
|
token: self.prev.clone().unwrap(),
|
|
|
|
|
}))
|
WIP: Add imports and modules
This is a big change because it touches a lot of stuff, but here is the
overview:
* Import syntax:
```
import foo
import bar from foo
import bar from "foo.npp"
import bar, baz from foo
import * from foo
import "foo.npp"
```
* These are all valid imports. They should be pretty
straightforward, maybe with exception of the last item. If you are
importing a path directly, but not importing any members from it,
it does not insert anything into the current namespace, and just
executes the file. This is probably going to be unused but I want
to include it for completeness. We can always remove it later
before a hypothetical 1.0 release.
* The "from" keyword is only ever used as a keyword here, and I am
allowing it to be used as an identifier elsewhere. Don't export
it, because that's weird and wrong and won't work.
* Modules:
* Doing an `import foo` will look for "foo.npp" at compile-time,
relative to the importer's directory, parse it, and compile it.
The importer will then attempt to execute the module with the new
`EnterModule` op. This instruction will execute the module kind of
like a function, assigning the module's global namespace to an
object that you can pass around.
* `import bar from foo` and `import bar from "foo.npp"` et al syntax
is not currently implemented in the compiler.
* There is a new "Module" object that represents a potentially
un-initialized module. This can't be referred to directly in code.
* VM:
* The VM operates around Module objects now. If you want to "call" a
new module, you should call `enter_module`. This is how the main
chunk is invoked.
* TODOs:
* `exit_module` function in the VM
* Finish up module implementation in compiler
* Built-in modules
* Sub-modules - e.g. `import foo.bar` - how does naming work for
this?
* Module directories. In Python you have `foo/__init__.py` and in
Rust you have `foo/mod.rs`.
* Probably a "Namespace" object that explicitly denotes "this is an
imported module that you're dealing with"
* Tests, tests, tests
Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
2024-10-04 10:11:49 -07:00
|
|
|
} else if mat!(self, TokenKind::From) {
|
|
|
|
|
// any other keywords that are acceptable as names in the primary expression context
|
|
|
|
|
let mut token = self.prev.clone().unwrap();
|
|
|
|
|
token.kind = TokenKind::Name;
|
|
|
|
|
Ok(Box::new(PrimaryExpr { token }))
|
2024-09-20 16:04:30 -07:00
|
|
|
} else if self.mat(TokenKind::LParen)? {
|
|
|
|
|
let expr: ExprP;
|
|
|
|
|
// check if we're defining a function
|
|
|
|
|
if self.check(TokenKind::RParen) {
|
|
|
|
|
expr = self.finish_function_expr()?;
|
|
|
|
|
} else if self.current.kind == TokenKind::Name
|
|
|
|
|
&& (self.next.kind == TokenKind::RParen
|
|
|
|
|
|| self.next.kind == TokenKind::Colon
|
|
|
|
|
|| self.next.kind == TokenKind::Comma)
|
|
|
|
|
{
|
|
|
|
|
expr = self.finish_function_expr()?;
|
|
|
|
|
} else {
|
|
|
|
|
expr = self.expr()?;
|
|
|
|
|
self.expect("expect ')' after expression", TokenKind::RParen)?;
|
|
|
|
|
}
|
|
|
|
|
Ok(expr)
|
2024-09-30 16:33:58 -07:00
|
|
|
} else if self.mat(TokenKind::LBracket)? {
|
2024-10-15 19:01:21 -07:00
|
|
|
self.list_or_map()
|
2024-09-20 16:04:30 -07:00
|
|
|
} else {
|
|
|
|
|
Err(self.error(format!("unexpected token {:?}", self.current.kind)))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn finish_function_expr(&mut self) -> Result<ExprP> {
|
|
|
|
|
let lparen = self.prev.clone().unwrap();
|
|
|
|
|
|
|
|
|
|
let mut params = Vec::new();
|
|
|
|
|
if !self.check(TokenKind::RParen) {
|
|
|
|
|
self.parse_param(&mut params)?;
|
|
|
|
|
while self.mat(TokenKind::Comma)? {
|
|
|
|
|
if self.check(TokenKind::RParen) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
self.parse_param(&mut params)?;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.expect(
|
|
|
|
|
"expect ')' after function definition parameters",
|
|
|
|
|
TokenKind::RParen,
|
|
|
|
|
)?;
|
|
|
|
|
|
|
|
|
|
let mut return_type = None;
|
|
|
|
|
if self.mat(TokenKind::Arrow)? {
|
|
|
|
|
return_type = Some(self.expr()?);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.expect("expect '{' after function signature", TokenKind::LBrace)?;
|
|
|
|
|
let body = self.block()?;
|
|
|
|
|
let rbrace = self.prev.clone().unwrap();
|
|
|
|
|
|
|
|
|
|
Ok(Box::new(FunctionExpr {
|
|
|
|
|
lparen,
|
|
|
|
|
params,
|
|
|
|
|
return_type,
|
|
|
|
|
body,
|
|
|
|
|
rbrace,
|
|
|
|
|
}))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn parse_param(&mut self, params: &mut Vec<(Token, Option<ExprP>)>) -> Result<()> {
|
|
|
|
|
let name = self
|
|
|
|
|
.expect("expect name after function declaration", TokenKind::Name)?
|
|
|
|
|
.clone();
|
|
|
|
|
let mut ty = None;
|
|
|
|
|
if self.mat(TokenKind::Colon)? {
|
|
|
|
|
ty = Some(self.expr()?);
|
|
|
|
|
}
|
|
|
|
|
params.push((name, ty));
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
2024-09-30 16:33:58 -07:00
|
|
|
|
2024-10-15 19:01:21 -07:00
|
|
|
fn list_or_map(&mut self) -> Result<ExprP> {
|
2024-09-30 16:33:58 -07:00
|
|
|
let lbracket = self.prev.clone().unwrap();
|
|
|
|
|
let mut exprs = Vec::new();
|
|
|
|
|
|
2024-10-15 19:01:21 -07:00
|
|
|
// check if it's a map
|
|
|
|
|
if self.mat(TokenKind::Colon)? {
|
|
|
|
|
let rbracket = self
|
|
|
|
|
.expect("expected ']' after empty map body", TokenKind::RBracket)?
|
|
|
|
|
.clone();
|
|
|
|
|
return Ok(Box::new(MapExpr {
|
|
|
|
|
lbracket,
|
|
|
|
|
pairs: vec![],
|
|
|
|
|
rbracket,
|
|
|
|
|
}));
|
|
|
|
|
}
|
|
|
|
|
|
2024-09-30 16:33:58 -07:00
|
|
|
if !self.check(TokenKind::RBracket) {
|
2024-10-15 19:01:21 -07:00
|
|
|
let expr = self.expr()?;
|
|
|
|
|
|
|
|
|
|
// check if it's a map
|
|
|
|
|
if self.mat(TokenKind::Colon)? {
|
|
|
|
|
let value = self.expr()?;
|
|
|
|
|
let pairs = vec![(expr, value)];
|
|
|
|
|
return self.finish_map(lbracket, pairs);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
exprs.push(expr);
|
|
|
|
|
|
2024-09-30 16:33:58 -07:00
|
|
|
while self.mat(TokenKind::Comma)? {
|
|
|
|
|
// allow trailing comma
|
|
|
|
|
if self.check(TokenKind::RBracket) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
exprs.push(self.expr()?);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let rbracket = self
|
|
|
|
|
.expect("expect ']' after list items", TokenKind::RBracket)?
|
|
|
|
|
.clone();
|
|
|
|
|
|
|
|
|
|
Ok(Box::new(ListExpr {
|
|
|
|
|
lbracket,
|
|
|
|
|
exprs,
|
|
|
|
|
rbracket,
|
|
|
|
|
}))
|
|
|
|
|
}
|
2024-10-15 19:01:21 -07:00
|
|
|
|
|
|
|
|
fn finish_map(&mut self, lbracket: Token, mut pairs: Vec<(ExprP, ExprP)>) -> Result<ExprP> {
|
|
|
|
|
while self.mat(TokenKind::Comma)? {
|
|
|
|
|
// trailing comma
|
|
|
|
|
if self.check(TokenKind::RBracket) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
let key = self.expr()?;
|
|
|
|
|
self.expect("expected ':' after map key", TokenKind::Colon)?;
|
|
|
|
|
let value = self.expr()?;
|
|
|
|
|
pairs.push((key, value));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let rbracket = self
|
|
|
|
|
.expect("expect ']' after map pairs", TokenKind::RBracket)?
|
|
|
|
|
.clone();
|
|
|
|
|
|
|
|
|
|
Ok(Box::new(MapExpr {
|
|
|
|
|
lbracket,
|
|
|
|
|
pairs,
|
|
|
|
|
rbracket,
|
|
|
|
|
}))
|
|
|
|
|
}
|
2024-09-20 16:04:30 -07:00
|
|
|
}
|
2024-09-26 10:03:54 -07:00
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
macro_rules! lexer_check {
|
|
|
|
|
($lexer:expr, $kind:expr, $text:expr) => {{
|
|
|
|
|
let next = $lexer.next().unwrap();
|
|
|
|
|
assert_eq!(next.kind, $kind);
|
|
|
|
|
assert_eq!(next.text, $text);
|
|
|
|
|
}};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_lexer_names() {
|
2024-09-26 10:47:53 -07:00
|
|
|
let input = "asdf fdsa the_quick_brown_fox jumped_over_the_lazy_dogs";
|
2024-09-26 10:03:54 -07:00
|
|
|
let mut lexer = Lexer::new(input.to_string(), ":testing:");
|
|
|
|
|
|
|
|
|
|
lexer_check!(lexer, TokenKind::Name, "asdf");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Name, "fdsa");
|
2024-09-26 10:47:53 -07:00
|
|
|
lexer_check!(lexer, TokenKind::Name, "the_quick_brown_fox");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Name, "jumped_over_the_lazy_dogs");
|
2024-09-26 10:03:54 -07:00
|
|
|
assert!(lexer.is_eof());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_lexer_numbers() {
|
|
|
|
|
let input = "1 2 3 0 0.0 1.0 2.0 3.0 0x1 0xa 0xff 0xabcd 0xabcdef 0XDEADBEEF 0XDECAFDAD 0b0 0b1 0b010101 0B101010 0B00000";
|
|
|
|
|
let mut lexer = Lexer::new(input.to_string(), ":testing:");
|
|
|
|
|
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "1");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "2");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "3");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "0");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "0.0");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "1.0");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "2.0");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "3.0");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "0x1");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "0xa");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "0xff");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "0xabcd");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "0xabcdef");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "0XDEADBEEF");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "0XDECAFDAD");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "0b0");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "0b1");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "0b010101");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "0B101010");
|
|
|
|
|
lexer_check!(lexer, TokenKind::Number, "0B00000");
|
|
|
|
|
assert!(lexer.is_eof());
|
|
|
|
|
}
|