From 58421a0469dfade4fc21c14eaf1710c6ccda7524 Mon Sep 17 00:00:00 2001
From: Alek Ratzloff <alekratz@gmail.com>
Date: Mon, 27 Apr 2020 12:42:17 -0400
Subject: [PATCH] Initial commit with lexer

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
---
 .gitignore       |   1 +
 Cargo.lock       |  84 +++++++++++
 Cargo.toml       |  12 ++
 src/main.rs      |   6 +
 src/syn/error.rs |  36 +++++
 src/syn/lexer.rs | 360 +++++++++++++++++++++++++++++++++++++++++++++++
 src/syn/mod.rs   |   4 +
 src/syn/span.rs  | 150 ++++++++++++++++++++
 src/syn/token.rs |  68 +++++++++
 src/util.rs      |  38 +++++
 10 files changed, 759 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 Cargo.lock
 create mode 100644 Cargo.toml
 create mode 100644 src/main.rs
 create mode 100644 src/syn/error.rs
 create mode 100644 src/syn/lexer.rs
 create mode 100644 src/syn/mod.rs
 create mode 100644 src/syn/span.rs
 create mode 100644 src/syn/token.rs
 create mode 100644 src/util.rs

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ea8c4bf
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/target
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..b3d3405
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,84 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+[[package]]
+name = "doc-comment"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "maplit"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
+
+[[package]]
+name = "not-python"
+version = "0.1.0"
+dependencies = [
+ "lazy_static",
+ "maplit",
+ "snafu",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df246d292ff63439fea9bc8c0a270bed0e390d5ebd4db4ba15aba81111b5abe3"
+dependencies = [
+ "unicode-xid",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2bdc6c187c65bca4260c9011c9e3132efe4909da44726bad24cf7572ae338d7f"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "snafu"
+version = "0.6.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1ec0ae2ed980f26e1ad62e717feb01df90731df56887b5391a2c79f9f6805be"
+dependencies = [
+ "doc-comment",
+ "snafu-derive",
+]
+
+[[package]]
+name = "snafu-derive"
+version = "0.6.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ec32ba84a7a86aeb0bc32fd0c46d31b0285599f68ea72e87eff6127889d99e1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "syn"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "410a7488c0a728c7ceb4ad59b9567eb4053d02e8cc7f5c0e0eeeb39518369213"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-xid",
+]
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..21e1da6
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "not-python"
+version = "0.1.0"
+authors = ["Alek Ratzloff <alekratz@gmail.com>"]
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+snafu = "0.6.6"
+lazy_static = "1.4.0"
+maplit = "1.0.2"
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..c082f31
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,6 @@
+mod syn;
+mod util;
+
+fn main() {
+    println!("Hello, world!");
+}
diff --git a/src/syn/error.rs b/src/syn/error.rs
new file mode 100644
index 0000000..f43b214
--- /dev/null
+++ b/src/syn/error.rs
@@ -0,0 +1,36 @@
+use crate::syn::span::*;
+use snafu::Snafu;
+
+#[derive(Debug, Snafu)]
+pub enum Error {
+    #[snafu(display("expected {}, but got {} instead", expected, got))]
+    ExpectedGot {
+        expected: String,
+        got: String,
+        span: Span,
+    },
+
+    #[snafu(display("unexpected {}", what))]
+    Unexpected {
+        what: String,
+        span: Span,
+    },
+
+    #[snafu(display("unknown {}", what))]
+    Unknown {
+        what: String,
+        span: Span,
+    }
+}
+
+impl Spanned for Error {
+    fn span(&self) -> Span {
+        match self {
+            Error::ExpectedGot { span, .. }
+            | Error::Unknown { span, .. }
+            | Error::Unexpected { span, .. } => { *span }
+        }
+    }
+}
+
+pub type Result<T, E = Error> = std::result::Result<T, E>;
diff --git a/src/syn/lexer.rs b/src/syn/lexer.rs
new file mode 100644
index 0000000..5504e3e
--- /dev/null
+++ b/src/syn/lexer.rs
@@ -0,0 +1,360 @@
+use crate::{
+    syn::{error::*, span::*, token::*},
+    util::LazyString,
+};
+use lazy_static::lazy_static;
+use maplit::hashmap;
+use std::{collections::HashMap, mem, str::Chars};
+
+const IDENT_START_CHARS: &'static [char] = &[
+    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
+    't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
+    'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '_',
+];
+const IDENT_CHARS: &'static [char] = &[
+    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
+    'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B',
+    'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
+    'V', 'W', 'X', 'Y', 'Z', '_',
+];
+
+const DEC_NUM_CHARS: &'static [char] = &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
+
+const HEX_NUM_CHARS: &'static [char] = &[
+    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C',
+    'D', 'E', 'F',
+];
+
+const STR_QUOTE_CHARS: &'static [char] = &['"', '\''];
+
+const OP_CHARS: &'static [char] = &['=', '+', '*', '-', '/', '>', '<', '~', '!', '%', '^'];
+
+lazy_static! {
+    static ref OPS: HashMap<&'static str, TokenKind> = hashmap! {
+        "=" => TokenKind::Eq,
+        "->" => TokenKind::Arrow,
+    };
+
+    static ref KEYWORDS: HashMap<&'static str, TokenKind> = hashmap! {
+        "return" => TokenKind::KwReturn,
+    };
+}
+
+pub struct Lexer<'t> {
+    chars: Chars<'t>,
+    text: &'t str,
+    start: Pos,
+    end: Pos,
+}
+
+impl<'t> Lexer<'t> {
+    /// Creates a new lexer that tokenizes the given text.
+    pub fn new(text: &'t str) -> Self {
+        // load the first position into the start/end position trackers
+        let pos = if let Some(c) = text.chars().next() {
+            Pos::from_char(c, 0, 0, 0, 0)
+        } else {
+            Default::default()
+        };
+
+        Lexer {
+            chars: text.chars(),
+            text,
+            start: pos,
+            end: pos,
+        }
+    }
+
+    /// Gets whether this lexer has reached the EOF.
+    pub fn is_eof(&self) -> bool {
+        self.chars.clone().next().is_none()
+    }
+
+    ////////////////////////////////////////////////////////////////////////////////
+    // Character advancement
+    ////////////////////////////////////////////////////////////////////////////////
+    fn curr_char(&self) -> Option<char> {
+        self.chars.clone().next()
+    }
+
+    fn adv_char(&mut self) -> Option<char> {
+        let c = self.chars.next()?;
+        self.end = self.end.next_char(c);
+        Some(c)
+    }
+
+    fn skip_whitespace(&mut self) {
+        self.match_while(|c| c.is_whitespace());
+    }
+
+    fn catchup(&mut self) -> Span {
+        let start = mem::replace(&mut self.start, self.end);
+        Span {
+            start,
+            end: self.end,
+        }
+    }
+
+    fn make_token(&mut self, kind: TokenKind) -> Token {
+        let span = self.catchup();
+        Token::new(kind, span)
+    }
+
+    ////////////////////////////////////////////////////////////////////////////////
+    // Tokens
+    ////////////////////////////////////////////////////////////////////////////////
+    pub fn next_token(&mut self) -> Result<Option<Token>> {
+        self.skip_whitespace();
+
+        let curr = if let Some(curr) = self.curr_char() {
+            curr
+        } else {
+            return Ok(None);
+        };
+
+        let token = match curr {
+            c if IDENT_START_CHARS.contains(&c) => self.next_ident_or_kw()?,
+            c if DEC_NUM_CHARS.contains(&c) => self.next_num()?,
+            '"' | '\'' => self.next_str()?,
+            ':' => self.next_sym()?,
+            '(' => self.next_char_token('(', TokenKind::LParen)?,
+            ')' => self.next_char_token(')', TokenKind::RParen)?,
+            '{' => self.next_char_token('{', TokenKind::LBrace)?,
+            '}' => self.next_char_token('}', TokenKind::RBrace)?,
+            '[' => self.next_char_token('[', TokenKind::LBracket)?,
+            ']' => self.next_char_token(']', TokenKind::RBracket)?,
+            ',' => self.next_char_token(',', TokenKind::Comma)?,
+            c if OP_CHARS.contains(&c) => self.next_op()?,
+            c => return Err(Error::Unexpected {
+                what: format!("character {}", c.escape_debug()),
+                span: self.span(),
+            })
+        };
+        Ok(Some(token))
+    }
+
+    fn next_ident_or_kw(&mut self) -> Result<Token> {
+        let ident = self.expect_ident("identifier")?;
+        let kind = KEYWORDS.get(ident).copied()
+            .unwrap_or(TokenKind::Ident);
+        Ok(self.make_token(kind))
+    }
+
+    fn next_num(&mut self) -> Result<Token> {
+        let first = self.expect_any(DEC_NUM_CHARS, "number")?;
+        let alphabet = if first == '0' && matches!(self.curr_char(), Some('x') | Some('X')) {
+            self.adv_char().unwrap();
+            self.expect_any(HEX_NUM_CHARS, "hex number")?;
+            HEX_NUM_CHARS
+        } else {
+            DEC_NUM_CHARS
+        };
+
+        self.match_while(|c| alphabet.contains(&c));
+        Ok(self.make_token(TokenKind::Num))
+    }
+
+    fn next_str(&mut self) -> Result<Token> {
+        let start_char = self.expect_any(STR_QUOTE_CHARS, "string")?;
+        while let Some(c) = self.match_where(|curr| curr != start_char) {
+            if c == '\\' {
+                // Match escapes
+                self.expect_any(&['n', 't', 'r', '\\', '\'', '\"', '0'], "escape character")?;
+            }
+        }
+        self.expect_char(start_char, "end of string")?;
+        Ok(self.make_token(TokenKind::Str))
+    }
+
+    fn next_sym(&mut self) -> Result<Token> {
+        self.expect_char(':', "symbol")?;
+        self.expect_ident("symbol")?;
+        Ok(self.make_token(TokenKind::Sym))
+    }
+
+    fn next_op(&mut self) -> Result<Token> {
+        self.expect_any(OP_CHARS, "operator")?;
+        let op_text = self.match_while(|c| OP_CHARS.contains(&c));
+        if let Some(kind) = OPS.get(op_text).copied() {
+            Ok(self.make_token(kind))
+        } else {
+            Err(Error::Unknown {
+                what: format!("operator {}", op_text.escape_debug()),
+                span: self.span(),
+            })
+        }
+    }
+
+    fn next_char_token(&mut self, c: char, kind: TokenKind) -> Result<Token> {
+        self.expect_char(c, LazyString::new(|| format!("{} token", kind)))?;
+        Ok(self.make_token(kind))
+    }
+
+    ////////////////////////////////////////////////////////////////////////////////
+    // Character pattern matching
+    ////////////////////////////////////////////////////////////////////////////////
+    fn match_where<P>(&mut self, p: P) -> Option<char>
+    where
+        P: Fn(char) -> bool,
+    {
+        if (p)(self.curr_char()?) {
+            self.adv_char()
+        } else {
+            None
+        }
+    }
+
+    fn match_while<P>(&mut self, p: P) -> &str
+    where
+        P: Fn(char) -> bool + Copy,
+    {
+        while self.match_where(p).is_some() {}
+        self.text_at(self.text)
+    }
+
+    fn expect_where<P>(&mut self, p: P, expected: impl ToString) -> Result<char>
+    where
+        P: Fn(char) -> bool,
+    {
+        // Check EOF
+        self.curr_char().ok_or_else(|| Error::ExpectedGot {
+            expected: expected.to_string(),
+            got: "EOF".to_string(),
+            span: self.span(),
+        })?;
+
+        // Match
+        self.match_where(p).ok_or_else(|| Error::ExpectedGot {
+            expected: expected.to_string(),
+            got: format!("{} character", self.curr_char().unwrap().escape_debug()),
+            span: self.span(),
+        })
+    }
+
+    fn expect_char(&mut self, c: char, expected: impl ToString) -> Result<char> {
+        self.expect_where(|curr| curr == c, expected)
+    }
+
+    fn expect_any(&mut self, chars: &[char], expected: impl ToString) -> Result<char> {
+        self.expect_where(|curr| chars.contains(&curr), expected)
+    }
+
+    fn expect_ident(&mut self, expected: impl ToString) -> Result<&str> {
+        self.expect_any(IDENT_START_CHARS, expected)?;
+        Ok(self.match_while(|curr| IDENT_CHARS.contains(&curr)))
+    }
+}
+
+impl Spanned for Lexer<'_> {
+    fn span(&self) -> Span {
+        Span {
+            start: self.start,
+            end: self.end,
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_next_token_eof() {
+        let mut lexer = Lexer::new("");
+        assert!(matches!(lexer.next_token(), Ok(None)));
+        assert!(lexer.is_eof());
+
+        let mut lexer = Lexer::new("       ");
+        assert!(matches!(lexer.next_token(), Ok(None)));
+        assert!(lexer.is_eof());
+
+        let mut lexer = Lexer::new("   \n \n \n\r\n\t    ");
+        assert!(matches!(lexer.next_token(), Ok(None)));
+        assert!(lexer.is_eof());
+    }
+
+    macro_rules! test_token {
+        ($text:expr, $token_kind:expr, $token_text:expr) => {{
+            let text = $text;
+            let mut lexer = Lexer::new(text);
+            let token = lexer.next_token().expect("token").expect("token");
+            assert_eq!(token.kind(), $token_kind);
+            assert_eq!(token.text_at(text), $token_text);
+        }};
+
+        ($text:expr, $token_kind:expr) => {{
+            test_token!($text, $token_kind, $text);
+        }};
+    }
+
+    #[test]
+    fn test_ident_token() {
+        test_token!("ident", TokenKind::Ident);
+        test_token!("OtherIdent", TokenKind::Ident);
+        test_token!("other_ident", TokenKind::Ident);
+        test_token!("ident1234", TokenKind::Ident);
+        test_token!("RETURN", TokenKind::Ident);
+    }
+
+    #[test]
+    fn test_keywords() {
+        test_token!("return", TokenKind::KwReturn);
+    }
+
+    #[test]
+    fn test_num_token() {
+        test_token!("1234", TokenKind::Num);
+        test_token!("4321", TokenKind::Num);
+        test_token!("123498765", TokenKind::Num);
+        test_token!("432156789", TokenKind::Num);
+        test_token!("0xdcbaBEEF", TokenKind::Num);
+        test_token!("0xabcdFEED", TokenKind::Num);
+        test_token!("0XdcbaBEEF", TokenKind::Num);
+        test_token!("0XabcdFEED", TokenKind::Num);
+        test_token!("0X123456789DCBAbeef", TokenKind::Num);
+        test_token!("0xABCDfeed192837465", TokenKind::Num);
+    }
+
+    #[test]
+    fn test_str_token() {
+        test_token!(r#""this is a string""#, TokenKind::Str);
+        test_token!(r#"'this is a string'"#, TokenKind::Str);
+        test_token!(r#"'this is a string\nwith escapes'"#, TokenKind::Str);
+        test_token!(r#""this is a string\nwith escapes""#, TokenKind::Str);
+    }
+
+    #[test]
+    fn test_sym_token() {
+        test_token!(":symbol", TokenKind::Sym);
+        test_token!(":OtherSymbol", TokenKind::Sym);
+        test_token!(":other_symbol", TokenKind::Sym);
+        test_token!(":symbol1234", TokenKind::Sym);
+    }
+
+    #[test]
+    fn test_single_char_symbols() {
+        test_token!("(", TokenKind::LParen);
+        test_token!(")", TokenKind::RParen);
+        test_token!("{", TokenKind::LBrace);
+        test_token!("}", TokenKind::RBrace);
+        test_token!("[", TokenKind::LBracket);
+        test_token!("]", TokenKind::RBracket);
+        test_token!(",", TokenKind::Comma);
+    }
+
+    #[test]
+    fn test_op_tokens() {
+        test_token!("=", TokenKind::Eq);
+        test_token!("->", TokenKind::Arrow);
+    }
+
+    #[test]
+    fn test_expect_char() {
+        let mut lexer = Lexer::new("asdf");
+        assert!(matches!(lexer.expect_char('a', "a"), Ok('a')));
+        assert!(matches!(lexer.expect_char('s', "s"), Ok('s')));
+        assert!(matches!(lexer.expect_char('d', "d"), Ok('d')));
+        assert!(matches!(lexer.expect_char('f', "f"), Ok('f')));
+        assert!(lexer.is_eof());
+    }
+}
diff --git a/src/syn/mod.rs b/src/syn/mod.rs
new file mode 100644
index 0000000..9959f63
--- /dev/null
+++ b/src/syn/mod.rs
@@ -0,0 +1,4 @@
+pub mod error;
+pub mod lexer;
+pub mod span;
+pub mod token;
diff --git a/src/syn/span.rs b/src/syn/span.rs
new file mode 100644
index 0000000..c744e86
--- /dev/null
+++ b/src/syn/span.rs
@@ -0,0 +1,150 @@
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct Pos {
+    pub source: usize,
+    pub line: usize,
+    pub col: usize,
+    pub byte: usize,
+    pub len: usize,
+}
+
+impl Default for Pos {
+    fn default() -> Self {
+        Pos {
+            source: 0,
+            line: 0,
+            col: 0,
+            byte: 0,
+            len: 1,
+        }
+    }
+}
+
+impl Pos {
+    pub fn from_char(c: char, source: usize, line: usize, col: usize, byte: usize) -> Self {
+        Pos {
+            source, line, col, byte, len: c.len_utf8(),
+        }
+    }
+
+    pub fn next_char(&self, c: char) -> Self {
+        Pos {
+            source: self.source + 1,
+            line: self.line,
+            col: self.col + 1,
+            byte: self.byte + self.len,
+            len: c.len_utf8(),
+        }
+    }
+
+    pub fn min(self, other: Self) -> Self {
+        if self.byte < other.byte {
+            self
+        } else {
+            other
+        }
+    }
+
+    pub fn max(self, other: Self) -> Self {
+        if self.byte > other.byte {
+            self
+        } else {
+            other
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
+pub struct Span {
+    pub start: Pos,
+    pub end: Pos,
+}
+
+impl Span {
+    pub fn union(self, other: Self) -> Self {
+        let start = self.start.min(other.start);
+        let end = self.end.max(other.end);
+        Span { start, end }
+    }
+}
+
+pub trait Spanned {
+    fn span(&self) -> Span;
+
+    fn text_at<'t>(&self, text: &'t str) -> &'t str {
+        let Span { start, end } = self.span();
+        &text[start.byte .. end.byte]
+    }
+}
+
+impl Spanned for Span {
+    fn span(&self) -> Span {
+        *self
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_pos_min() {
+        let small = Pos::default();
+        let large = Pos { 
+            source: 1,
+            byte: 1,
+            ..Default::default()
+        };
+
+        assert_eq!(small.min(large), small);
+        assert_eq!(large.min(small), small);
+    }
+
+    #[test]
+    fn test_pos_max() {
+        let small = Pos::default();
+        let large = Pos { 
+            source: 1,
+            byte: 1,
+            ..Default::default()
+        };
+
+        assert_eq!(small.max(large), large);
+        assert_eq!(large.max(small), large);
+    }
+
+    #[test]
+    fn test_span_union() {
+        let first = Span {
+            start: Pos::default(),
+            end: Pos {
+                source: 15,
+                col: 15,
+                byte: 15,
+                ..Default::default()
+            }
+        };
+
+        let second = Span {
+            start: Pos {
+                source: 25,
+                col: 25,
+                byte: 25,
+                ..Default::default()
+            },
+            end: Pos {
+                source: 27,
+                col: 27,
+                byte: 27,
+                ..Default::default()
+            }
+        };
+
+        let expected = Span {
+            start: first.start,
+            end: second.end,
+        };
+
+        assert_eq!(first.union(second), expected);
+        assert_eq!(second.union(first), expected);
+    }
+}
diff --git a/src/syn/token.rs b/src/syn/token.rs
new file mode 100644
index 0000000..de9e7d3
--- /dev/null
+++ b/src/syn/token.rs
@@ -0,0 +1,68 @@
+use crate::syn::span::*;
+use std::fmt::{Display, Formatter, self};
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum TokenKind {
+    Ident,
+    Num,
+    Str,
+    Sym,
+
+    LParen,
+    RParen,
+    LBrace,
+    RBrace,
+    LBracket,
+    RBracket,
+    Comma,
+
+    Eq,
+    Arrow,
+
+    KwReturn,
+}
+
+impl Display for TokenKind {
+    fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
+        use TokenKind::*;
+        let s = match self {
+            Ident => "identifier",
+            Num => "number",
+            Str => "string",
+            Sym => "symbol",
+
+            LParen => "left paren",
+            RParen => "right paren",
+            LBrace => "left brace",
+            RBrace => "right brace",
+            LBracket => "left bracket",
+            RBracket => "right bracket",
+            Comma => "comma",
+
+            Eq => "equals",
+            Arrow => "arrow",
+
+            KwReturn => "return keyword",
+        };
+        Display::fmt(s, fmt)
+    }
+}
+
+pub struct Token {
+    kind: TokenKind,
+    span: Span,
+}
+
+impl Token {
+    pub fn new(kind: TokenKind, span: Span) -> Self {
+        Token { kind, span, }
+    }
+
+    pub fn kind(&self) -> TokenKind {
+        self.kind
+    }
+}
+
+impl Spanned for Token {
+    fn span(&self) -> Span { self.span }
+}
diff --git a/src/util.rs b/src/util.rs
new file mode 100644
index 0000000..a50e422
--- /dev/null
+++ b/src/util.rs
@@ -0,0 +1,38 @@
+use std::{
+    fmt::{Display, Formatter, self},
+};
+
+pub struct LazyString<'f, F>
+    where F: Fn() -> String + 'f
+{
+    source: F,
+    _lifetime: std::marker::PhantomData<dyn Fn() + 'f>,
+}
+
+impl<'f, F> LazyString<'f, F>
+    where F: Fn() -> String + 'f
+{
+    pub fn new(source: F) -> Self {
+        LazyString {
+            source,
+            _lifetime: Default::default(),
+        }
+    }
+}
+
+impl<'f, F> Display for LazyString<'f, F>
+    where F: Fn() -> String + 'f
+{
+    fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
+        let s = (self.source)();
+        Display::fmt(&s, fmt)
+    }
+}
+
+#[test]
+fn test_lazy_string() {
+    let i = 10;
+
+    let lzstr = LazyString::new(|| format!("the value is {}", i));
+    assert_eq!(lzstr.to_string(), "the value is 10");
+}