Fix a bug with how line/col positions are calculated

Positions were being calculated by looking at the *first* character,
which would cause the same character to be visited twice. Now, if the
character is a null byte (\0), it won't advance the position at all.

The null byte should be a safe sentinel value to use because it
(hypothetically) shouldn't be showing up in a source file. This will
probably cause a hang if a file that starts with null bytes, but that's
a problem for future me.

Signed-off-by: Alek Ratzloff <alekratz@gmail.com>
This commit is contained in:
2022-01-16 18:49:38 -08:00
parent 1eb7eb73cb
commit dfac970cc7
2 changed files with 17 additions and 7 deletions

View File

@@ -33,12 +33,11 @@ pub struct Lexer<'t> {
impl<'t> Lexer<'t> {
pub fn new(source: impl ToString, text: &'t str) -> Self {
let c = text.chars().next().unwrap_or('\0');
Self {
source: Rc::new(source.to_string()),
text,
start: Pos::new(c),
end: Pos::new(c),
start: Pos::new('\0'),
end: Pos::new('\0'),
}
}
@@ -89,7 +88,7 @@ impl<'t> Lexer<'t> {
return Ok(None);
}
if let Some(cap) = LEX_PAT.captures(&self.text[self.end.byte..]) {
if let Some(cap) = LEX_PAT.captures(&self.text[self.start.byte..]) {
self.end = self.end.next_str(cap.get(0).unwrap().as_str());
let sp_token = if let Some(_) = cap.name("assign") {
self.make_token(Token::Assign)

View File

@@ -37,8 +37,15 @@ impl Pos {
}
pub fn next(self, c: char) -> Self {
let (line, col) = if self.c == '\n' {
(self.line + 1, 0)
// catch the "pre-scan" case
if c == '\0' {
let mut next = self;
next.c = c;
return next;
}
let (line, col) = if c == '\n' {
(self.line + 1, 1)
} else {
(self.line, self.col + 1)
};
@@ -99,7 +106,11 @@ impl Span {
impl Display for Span {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
if self.start.line == self.end.line {
write!(fmt, "line {} in {}", self.start.line, self.source)
write!(
fmt,
"line {}, column {} in {}",
self.start.line, self.start.col, self.source
)
} else {
write!(
fmt,