Browse Source

fix number and symbol parsing

master
Alison Watson 2 years ago
parent
commit
d142632914
  1. 143
      source/framework/vire/parser/tok.rs

143
source/framework/vire/parser/tok.rs

@ -193,17 +193,14 @@ fn integer_rad(rd: &mut PosReader, rad: u32) -> Result<Token, Err> {
c if is_delim(c) => break,
Some(c) if c.is_digit(rad) => {
rd.next();
let cn = unsafe { text::radix(c) }.into();
n =
n.checked_mul(rad.into()).ok_or_else(|| Err::Numb(rd.pos()))?;
n = if sign {
n.checked_sub(cn).ok_or_else(|| Err::Numb(rd.pos()))?
} else {
n.checked_add(cn).ok_or_else(|| Err::Numb(rd.pos()))?
};
let c = unsafe { text::radix(c) }.into();
let res = n.checked_mul(rad.into());
n = res.ok_or_else(|| Err::Numb(rd.pos()))?;
let res = if sign { n.checked_sub(c) } else { n.checked_add(c) };
n = res.ok_or_else(|| Err::Numb(rd.pos()))?;
}
Some(c) => return Err(Err::Delim(rd.pos(), c)),
None => unsafe { std::hint::unreachable_unchecked() },
None => return Err(Err::Eof),
}
}
@ -219,12 +216,13 @@ fn is_sym_init(c: char) -> bool {
| '/' | ':' | '<'
| '=' | '>' | '?'
| '~' | '_' | '^'
| '+' | '-'
)
}
fn is_sym_subs(c: char) -> bool {
is_sym_init(c) || c.is_numeric() || matches!(c, '0'..='9' | '.' | '@')
is_sym_init(c)
|| c.is_numeric()
|| matches!(c, '0'..='9' | '.' | '@' | '+' | '-')
}
fn symbol(rd: &mut PosReader, c: char) -> Result<Token, Err> {
@ -285,75 +283,92 @@ fn block_comment(rd: &mut PosReader) -> Result<(), Err> {
}
impl Token {
pub fn read_all(name: &str, data: &str) -> Result<Vec<Self>, Err> {
fn read_from_char(
rd: &mut PosReader,
c: char,
) -> Result<Option<Self>, Err> {
use self::Type::*;
let mut tokens = Vec::new();
let rd = &mut PosReader::new(data, text::ellipsize_small_str(name));
let tk = match c {
// line comments
';' => {
line_comment(rd)?;
return Ok(None);
}
while let Some(c) = rd.next() {
let tk = match c {
// line comments
';' => {
line_comment(rd)?;
continue;
// basic tokens
'(' => new_tok(rd, Br1O, Data::None),
')' => new_tok(rd, Br1C, Data::None),
'[' => new_tok(rd, Br3O, Data::None),
']' => new_tok(rd, Br3C, Data::None),
'.' => new_tok(delim_end(rd)?, Peri, Data::None),
// quote abbreviations
'\'' => new_tok(rd, Quot, Data::None),
'`' => new_tok(rd, QQuo, Data::None),
',' => unquote(rd, UnQS, UnQu),
// tokens preceded by #
'#' => match rd.next().ok_or(Err::Eof)? {
// block comments
'|' => {
block_comment(rd)?;
return Ok(None);
}
// basic tokens
'(' => new_tok(rd, Br1O, Data::None),
')' => new_tok(rd, Br1C, Data::None),
'[' => new_tok(rd, Br3O, Data::None),
']' => new_tok(rd, Br3C, Data::None),
'.' => new_tok(delim_end(rd)?, Peri, Data::None),
// integers
'b' | 'B' => integer_rad(rd, 2)?,
'o' | 'O' => integer_rad(rd, 8)?,
'd' | 'D' => integer_rad(rd, 10)?,
'x' | 'X' => integer_rad(rd, 16)?,
// quote abbreviations
'\'' => new_tok(rd, Quot, Data::None),
'`' => new_tok(rd, QQuo, Data::None),
',' => unquote(rd, UnQS, UnQu),
// booleans
't' | 'T' => new_tok(delim_end(rd)?, Bool, Data::Bool(true)),
'f' | 'F' => new_tok(delim_end(rd)?, Bool, Data::Bool(false)),
// tokens preceded by #
'#' => match rd.next().ok_or(Err::Eof)? {
// block comments
'|' => {
block_comment(rd)?;
continue;
}
// syntax abbreviations
'\'' => new_tok(rd, Synt, Data::None),
'`' => new_tok(rd, QSyn, Data::None),
',' => unquote(rd, UnSS, UnSy),
// integers
'b' | 'B' => integer_rad(rd, 2)?,
'o' | 'O' => integer_rad(rd, 8)?,
'd' | 'D' => integer_rad(rd, 10)?,
'x' | 'X' => integer_rad(rd, 16)?,
// character literals
'\\' => char_lit(rd)?,
// booleans
't' | 'T' => new_tok(delim_end(rd)?, Bool, Data::Bool(true)),
'f' | 'F' => new_tok(delim_end(rd)?, Bool, Data::Bool(false)),
// syntax abbreviations
'\'' => new_tok(rd, Synt, Data::None),
'`' => new_tok(rd, QSyn, Data::None),
',' => unquote(rd, UnSS, UnSy),
c => return Err(Err::Char(rd.pos(), c)),
},
// character literals
'\\' => char_lit(rd)?,
// strings
'"' => strn_lit(rd)?,
c => return Err(Err::Char(rd.pos(), c)),
},
// symbols
c if is_sym_init(c) => symbol(rd, c)?,
// strings
'"' => strn_lit(rd)?,
// skip whitespace
c if c.is_whitespace() => return Ok(None),
// integers without prefixes
c if c.is_digit(10) => integer_rad(rd, 10)?,
c => return Err(Err::Char(rd.pos(), c)),
};
// symbols
c if is_sym_init(c) => symbol(rd, c)?,
Ok(Some(tk))
}
// skip whitespace
c if c.is_whitespace() => continue,
pub fn read_all(name: &str, data: &str) -> Result<Vec<Self>, Err> {
let mut tokens = Vec::new();
let rd = &mut PosReader::new(data, text::ellipsize_small_str(name));
c => return Err(Err::Char(rd.pos(), c)),
while let Some(c) = rd.peek() {
let tk = if c.is_digit(10) || c == '-' {
// integers without prefixes are a special case and must
// be handled before actually parsing a token
integer_rad(rd, 10)?
} else {
rd.next();
if let Some(tk) = Self::read_from_char(rd, c)? {
tk
} else {
continue;
}
};
tokens.push(tk);

Loading…
Cancel
Save