WIP: Add imports and modules

This is a big change because it touches a lot of stuff, but here is the
overview:

* Import syntax:
    ```
    import foo
    import bar from foo
    import bar from "foo.npp"
    import bar, baz from foo
    import * from foo
    import "foo.npp"
    ```
    * These are all valid imports. They should be pretty
      straightforward, maybe with exception of the last item. If you are
      importing a path directly, but not importing any members from it,
      it does not insert anything into the current namespace, and just
      executes the file. This is probably going to be unused but I want
      to include it for completeness. We can always remove it later
      before a hypothetical 1.0 release.
    * The "from" keyword is only ever used as a keyword here, and I am
      allowing it to be used as an identifier elsewhere. Don't export
      it, because that's weird and wrong and won't work.
* Modules:
    * Doing an `import foo` will look for "foo.npp" at compile-time,
      relative to the importer's directory, parse it, and compile it.
      The importer will then attempt to execute the module with the new
      `EnterModule` op. This instruction will execute the module kind of
      like a function, assigning the module's global namespace to an
      object that you can pass around.
    * `import bar from foo` and `import bar from "foo.npp"` et al syntax
      is not currently implemented in the compiler.
    * There is a new "Module" object that represents a potentially
      un-initialized module. This can't be referred to directly in code.
* VM:
    * The VM operates around Module objects now. If you want to "call" a
      new module, you should call `enter_module`. This is how the main
      chunk is invoked.
* TODOs:
    * `exit_module` function in the VM
    * Finish up module implementation in compiler
    * Built-in modules
    * Sub-modules - e.g. `import foo.bar` - how does naming work for
      this?
    * Module directories. In Python you have `foo/__init__.py` and in
      Rust you have `foo/mod.rs`.
    * Probably a "Namespace" object that explicitly denotes "this is an
      imported module that you're dealing with"
    * Tests, tests, tests

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2024-10-04 10:11:49 -07:00
parent 4a7644b84a
commit f0de5f7850
12 changed files with 753 additions and 268 deletions

View File

@@ -1,5 +1,8 @@
use std::collections::{HashMap, HashSet};
use std::fmt::{self, Display};
use std::fs::File;
use std::io::Read;
use std::path::{Path, PathBuf};
use std::rc::Rc;
use std::sync::LazyLock;
@@ -9,7 +12,9 @@ use thiserror::Error;
use crate::ast::*;
use crate::obj::prelude::*;
use crate::obj::Ptr;
use crate::obj::BUILTINS;
use crate::parser::Parser;
use crate::token::TokenKind;
use crate::vm::*;
@@ -40,6 +45,12 @@ impl LineNumber {
}
impl StmtVisitor for LineNumber {
fn visit_import_stmt(&mut self, stmt: &ImportStmt) -> Result<()> {
self.update_start(stmt.import_kw.line);
self.update_end(stmt.module.line);
Ok(())
}
fn visit_expr_stmt(&mut self, stmt: &ExprStmt) -> Result<()> {
stmt.expr.accept(self).unwrap();
Ok(())
@@ -165,6 +176,25 @@ impl LocalAssignCollector {
}
impl StmtVisitor for LocalAssignCollector {
fn visit_import_stmt(&mut self, stmt: &ImportStmt) -> Result<()> {
if stmt.what.is_empty() {
// `import foo`
if stmt.module.kind == TokenKind::Name {
// do not add `import "my_file.ext"`
self.names.insert(stmt.module.text.to_string());
}
} else {
// `import foo, bar from baz`
for what in &stmt.what {
if what.kind == TokenKind::Name {
// do not add `import * from foo`
self.names.insert(what.text.to_string());
}
}
}
Ok(())
}
fn visit_expr_stmt(&mut self, stmt: &ExprStmt) -> Result<()> {
stmt.expr.accept(self)?;
Ok(())
@@ -285,6 +315,25 @@ impl LocalNameCollector {
}
impl StmtVisitor for LocalNameCollector {
fn visit_import_stmt(&mut self, stmt: &ImportStmt) -> Result<()> {
if stmt.what.is_empty() {
// `import foo`
if stmt.module.kind == TokenKind::Name {
// do not add `import "my_file.ext"`
self.names.insert(stmt.module.text.to_string());
}
} else {
// `import foo, bar from baz`
for what in &stmt.what {
if what.kind == TokenKind::Name {
// do not add `import * from foo`
self.names.insert(what.text.to_string());
}
}
}
Ok(())
}
fn visit_expr_stmt(&mut self, stmt: &ExprStmt) -> Result<()> {
stmt.expr.accept(self)?;
Ok(())
@@ -441,19 +490,27 @@ impl Display for CompileError {
////////////////////////////////////////////////////////////////////////////////
#[derive(Debug)]
pub struct Compiler {
pub struct Compiler<'c> {
path: PathBuf,
chunks: Vec<Chunk>,
scopes: Vec<Scope>,
constants: Vec<ObjP>,
constants: &'c mut Vec<ObjP>,
imported: &'c mut HashMap<String, Ptr<Module>>,
globals: Vec<String>,
}
impl Compiler {
pub fn new() -> Self {
impl<'c> Compiler<'c> {
pub fn new(
path: PathBuf,
constants: &'c mut Vec<ObjP>,
imported: &'c mut HashMap<String, Ptr<Module>>,
) -> Self {
Compiler {
path,
chunks: Default::default(),
scopes: Default::default(),
constants: Default::default(),
constants,
imported,
globals: BUILTINS
.with_borrow(|builtins| builtins.keys().map(ToString::to_string).collect()),
}
@@ -479,10 +536,33 @@ impl Compiler {
self.scopes.is_empty()
}
pub fn compile_path(self, path: impl AsRef<Path>) -> Result<Ptr<Module>> {
let path_str = &path.as_ref().as_os_str().to_str().unwrap();
let mut file = File::open(path.as_ref()).map_err(|e| CompileError {
line: None,
message: format!("could not open {}: {}", path.as_ref().display(), e),
})?;
let mut contents = String::new();
file.read_to_string(&mut contents)?;
let mut parser = Parser::new(contents, &path_str)?;
let ast = parser.parse_all()?;
if parser.was_error() {
return Err(CompileError {
line: None,
message: format!("error in '{}'", path.as_ref().display()),
}
.into());
}
self.compile(&path_str, &ast)
}
/// Compiles a body of code.
///
/// This returns a tuple of `Chunk`, the constants table, and the list of globals.
pub fn compile(mut self, body: &Vec<StmtP>) -> Result<(Chunk, Vec<ObjP>, Vec<String>)> {
/// This returns the module of the compiled program.
pub fn compile(mut self, path: impl ToString, body: &Vec<StmtP>) -> Result<Ptr<Module>> {
self.chunks.push(Chunk::default());
for stmt in body {
@@ -494,11 +574,19 @@ impl Compiler {
if let Some(last) = body.last() {
last_line = stmt_line_number(last.as_ref());
}
self.emit(last_line, Op::Halt);
self.emit(last_line, Op::ExitModule);
let chunk = self.chunks.pop().expect("no chunk");
Ok((chunk, self.constants, self.globals))
// This is allowed because obviously it is a pointer to a Module. We can upcast later.
let module = Module::create(path.to_string(), Rc::new(chunk), self.globals);
let module = unsafe {
let ptr = Ptr::into_raw(module) as *const gc::GcCell<Module>;
Ptr::from_raw(ptr)
};
Ok(module)
}
fn compile_stmt(&mut self, stmt: &StmtP) -> Result<()> {
@@ -673,21 +761,13 @@ impl Compiler {
chunk.code.push(op);
chunk.lines.push(line);
}
}
impl StmtVisitor for Compiler {
fn visit_expr_stmt(&mut self, stmt: &ExprStmt) -> Result<()> {
self.compile_expr(&stmt.expr)?;
self.emit(stmt_line_number(stmt), Op::Pop);
Ok(())
}
fn visit_assign_stmt(&mut self, stmt: &AssignStmt) -> Result<()> {
let name = &stmt.lhs.text;
/// Emit an assign statement based on the current scope - i.e. `Op::SetGlobal` if we're
/// in global scope, or `Op::SetLocal` if we're in a function or local scope.
fn emit_assign(&mut self, line: LineRange, name: &str) -> Result<()> {
if self.is_global_scope() {
let global = self.insert_global(name)?;
self.compile_expr(&stmt.rhs)?;
self.emit(stmt_line_number(stmt), Op::SetGlobal(global));
self.emit(line, Op::SetGlobal(global));
} else {
let mut declare = false;
let local = if let Some(local) = self.get_local(name) {
@@ -696,12 +776,93 @@ impl StmtVisitor for Compiler {
declare = true;
self.insert_local(name.to_string())?
}
.clone(); // gotta clone so we can borrow self as mutable for compile_expr
self.compile_expr(&stmt.rhs)?;
.clone();
if !declare {
self.emit(stmt_line_number(stmt), Op::SetLocal(local.index));
self.emit(line, Op::SetLocal(local.index));
}
}
Ok(())
}
fn search_dir(&self) -> &Path {
self.path.parent().unwrap()
}
}
impl StmtVisitor for Compiler<'_> {
fn visit_import_stmt(&mut self, stmt: &ImportStmt) -> Result<()> {
const EXT: &str = "npp";
let line = stmt_line_number(stmt);
// resolve filename and get full filepath
let path = match stmt.module.kind {
TokenKind::Name => self
.search_dir()
.join(format!("{}.{EXT}", stmt.module.text)),
TokenKind::String => {
let path = PathBuf::from(unescape(&stmt.module.text));
if path.is_absolute() {
path
} else {
std::path::absolute(self.search_dir().join(path))?
}
}
_ => unreachable!(),
};
// check if this has already been registered with our compile session and just use that if
// so
let path_str = path.as_os_str().to_str().unwrap();
let module = if let Some(imported) = self.imported.get(path_str) {
// use the imported module
imported.clone()
} else {
// otherwise compile and create a new Module object and insert it as a constant and
// also into the modules cache
let module = Compiler::new(path.clone(), self.constants, self.imported)
.compile_path(&path)
.map_err(|e| CompileError {
line: Some(line),
message: format!("while importing module '{}': {}", stmt.module.text, e),
})?;
self.imported.insert(path_str.to_string(), module.clone());
module
};
let module_constant = self.insert_constant(upcast_obj(module.clone()))?;
self.emit(stmt_line_number(stmt), Op::PushConstant(module_constant));
if stmt.what.is_empty() {
// evaluate the module, and then assign the resulting object to the module name as
// appropriate
self.emit(line, Op::EnterModule);
// only assign if it's a name, if it's a string we don't assign anything
if stmt.module.kind == TokenKind::Name {
self.emit_assign(line, &stmt.module.text)?;
} else {
self.emit(line, Op::Pop);
}
} else {
// evaluate the module, and then assign all names that were imported as appropriate
// TODO Compiler::visit_import_stmt - visit names from module
todo!("import names from module")
}
Ok(())
}
fn visit_expr_stmt(&mut self, stmt: &ExprStmt) -> Result<()> {
self.compile_expr(&stmt.expr)?;
self.emit(stmt_line_number(stmt), Op::Pop);
Ok(())
}
fn visit_assign_stmt(&mut self, stmt: &AssignStmt) -> Result<()> {
// compile RHS
self.compile_expr(&stmt.rhs)?;
let name = &stmt.lhs.text;
// If the last value that was assigned to is a function, set its name here
// TODO - maybe this would be smarter to set up in the AST. I'm 99% sure that the last
@@ -713,6 +874,9 @@ impl StmtVisitor for Compiler {
}
}
// compile LHS
self.emit_assign(stmt_line_number(stmt), name)?;
Ok(())
}
@@ -794,7 +958,7 @@ impl StmtVisitor for Compiler {
}
}
impl ExprVisitor for Compiler {
impl ExprVisitor for Compiler<'_> {
fn visit_binary_expr(&mut self, expr: &BinaryExpr) -> Result<()> {
static OP_NAMES: LazyLock<HashMap<TokenKind, &'static str>> = LazyLock::new(|| {
hash_map! {
@@ -1024,7 +1188,11 @@ impl ExprVisitor for Compiler {
// create the function
let chunk = self.chunks.pop().unwrap();
let fun = UserFunction::create(chunk, expr.params.len() as Argc);
let fun = UserFunction::create(
&self.path.as_os_str().to_str().unwrap(),
chunk,
expr.params.len() as Argc,
);
// register the function as a constant
let fun_constant = self.insert_constant(fun)?;