Browse Source

implement most of the Vire parser

master
Alison Watson 3 years ago
parent
commit
1c21693bd4
  1. 20
      source/main.rs
  2. 6
      source/vire.rs
  3. 70
      source/vire/parser.rs
  4. 22
      source/vire/parser/boolv.rs
  5. 30
      source/vire/parser/chara.rs
  6. 69
      source/vire/parser/cment.rs
  7. 53
      source/vire/parser/datum.rs
  8. 67
      source/vire/parser/integ.rs
  9. 121
      source/vire/parser/listv.rs
  10. 56
      source/vire/parser/space.rs
  11. 41
      source/vire/parser/strng.rs
  12. 15
      source/vire/parser/symbl.rs
  13. 54
      source/vire/parser/token.rs

20
source/main.rs

@ -202,6 +202,26 @@ fn fallback_main(
conf: &conf::Conf,
lg: &log::Log,
) -> Result<(), Box<dyn std::error::Error>> {
let main = {
use std::io::prelude::*;
let mut file = std::fs::File::open("main.vire")?;
let mut data = String::new();
file.read_to_string(&mut data)?;
match vire::parser::datum::Datum::parse(&data) {
Ok(datum) => datum,
Err(res) => {
lg!(
lg, log::Level::Warning, "`main.vire': {}",
vire::parser::printable_error(&data, res),
);
Vec::new()
}
}
};
trace!(lg, main);
unimplemented!();
let concur_frames = conf.render.concurrent_frames.into();
let hal = hal::Context::new()?;

6
source/vire.rs

@ -1,7 +1,3 @@
mod parser;
pub use self::{
parser::Datum,
};
pub mod parser;
// EOF

70
source/vire/parser.rs

@ -1,53 +1,29 @@
mod boolv;
mod chara;
mod integ;
mod listv;
mod strng;
mod symbl;
pub mod boolv;
pub mod chara;
pub mod cment;
pub mod datum;
pub mod integ;
pub mod listv;
pub mod space;
pub mod strng;
pub mod symbl;
pub mod token;
use nom::{
IResult,
branch::alt,
combinator::{all_consuming, complete, cut},
error::ErrorKind,
};
pub type IResult<I, O> = nom::IResult<I, O, nom::error::VerboseError<I>>;
#[derive(Debug, PartialEq)]
pub enum Datum {
BoolF,
BoolT,
Chara(char),
Integ(i64),
Listv(Box<Datum>, Box<Datum>),
Nullv,
Strng(String),
Symbl(String),
}
fn datum(inp: &str) -> IResult<&str, Datum> {
// <datum> ->
// | <boolean>
// | <character>
// | <symbol>
// | <string>
// | <integer>
// | <list>
alt((
boolv::parse,
chara::parse,
symbl::parse,
strng::parse,
integ::parse,
listv::parse,
))(inp)
}
impl Datum {
pub fn parse(inp: &str) -> Result<Self, nom::Err<(&str, ErrorKind)>> {
match all_consuming(cut(complete(datum)))(inp) {
Ok((_, data)) => Ok(data),
Err(res) => Err(res),
pub fn printable_error(
inp: &str,
res: nom::Err<nom::error::VerboseError<&str>>,
) -> String {
match res {
nom::Err::Incomplete(need) => {
match need {
nom::Needed::Unknown => "data was needed".to_owned(),
nom::Needed::Size(sz) => format!("{} bytes were needed", sz),
}
}
nom::Err::Error(res) => nom::error::convert_error(inp, res),
nom::Err::Failure(res) => nom::error::convert_error(inp, res),
}
}

22
source/vire/parser/boolv.rs

@ -1,17 +1,12 @@
use nom::{
IResult,
branch::alt,
bytes::complete::tag,
combinator::map,
};
use super::Datum;
use super::{datum::Datum, IResult};
use nom::{branch::alt, bytes::complete::tag, combinator::map};
pub fn parse(inp: &str) -> IResult<&str, Datum> {
// <boolean> -> `#f' | `#t'
alt((
// <* boolean> -> `#f' | `#t'
super::token::delim(alt((
map(tag("#f"), |_| Datum::BoolF),
map(tag("#t"), |_| Datum::BoolT),
))(inp)
)))(inp)
}
#[test]
@ -19,11 +14,8 @@ fn test() {
use nom::error::ErrorKind;
assert_eq!(parse("#t"), Ok(("", Datum::BoolT)));
assert_eq!(parse("#f"), Ok(("", Datum::BoolF)));
assert_eq!(
parse("garbage"),
Err(nom::Err::Error(("garbage", ErrorKind::Tag))),
);
assert_eq!(parse(""), Err(nom::Err::Error(("", ErrorKind::Tag))));
assert!(parse("garbage").is_err());
assert!(parse("").is_err());
}
// EOF

30
source/vire/parser/chara.rs

@ -1,5 +1,5 @@
use super::{datum::Datum, IResult};
use nom::{
IResult,
branch::alt,
bytes::complete::tag,
character::complete::{anychar, hex_digit1},
@ -7,7 +7,6 @@ use nom::{
sequence::preceded,
};
use std::convert::TryFrom;
use super::Datum;
#[derive(Debug)]
enum ErrUnicodeParse {
@ -45,13 +44,13 @@ pub fn unicode(inp: &str) -> IResult<&str, char> {
}
pub fn parse(inp: &str) -> IResult<&str, Datum> {
// <character> ->
// <* character> ->
// | `#\U+' <digit 16>+
// | `#\' <any character>
alt((
super::token::delim(alt((
map(preceded(tag(r"#\U+"), cut(unicode)), |t| Datum::Chara(t)),
map(preceded(tag(r"#\"), cut(anychar)), |t| Datum::Chara(t)),
))(inp)
)))(inp)
}
#[test]
@ -64,21 +63,12 @@ fn test() {
assert_eq!(parse(r"#\U+fe0f"), Ok(("", Datum::Chara('\u{fe0f}'))));
assert_eq!(parse(r"#\U+FE0F"), Ok(("", Datum::Chara('\u{fe0f}'))));
assert_eq!(parse(r"#\bb"), Ok(("b", Datum::Chara('b'))));
assert_eq!(
parse(r"#\U+1234567890"),
Err(nom::Err::Failure(("1234567890", ErrorKind::MapRes))),
);
assert_eq!(
parse(r"#\U+g"),
Err(nom::Err::Failure(("g", ErrorKind::HexDigit))),
);
assert_eq!(parse(r"#U+a"), Err(nom::Err::Error(("#U+a", ErrorKind::Tag))));
assert_eq!(parse(r"#b"), Err(nom::Err::Error(("#b", ErrorKind::Tag))));
assert_eq!(
parse("garbage"),
Err(nom::Err::Error(("garbage", ErrorKind::Tag))),
);
assert_eq!(parse(""), Err(nom::Err::Error(("", ErrorKind::Tag))));
assert!(parse(r"#\U+1234567890").is_err());
assert!(parse(r"#\U+g").is_err());
assert!(parse(r"#U+a").is_err());
assert!(parse(r"#b").is_err());
assert!(parse("garbage").is_err());
assert!(parse("").is_err());
}
// EOF

69
source/vire/parser/cment.rs

@ -0,0 +1,69 @@
use super::{space::{eof, line_ending, not_line_ending}, IResult};
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{anychar, char},
combinator::recognize,
multi::many_till,
sequence::{pair, tuple},
};
pub fn line(inp: &str) -> IResult<&str, &str> {
// <line comment> ->
// | `;' <not line ending>* <line ending>
recognize(tuple((char(';'), not_line_ending, alt((line_ending, eof)))))(inp)
}
pub fn block(inp: &str) -> IResult<&str, &str> {
// <block comment> ->
// | `#|' <block comment inner>* `|#'
// <block comment inner> ->
// | <not `|#'>
// | <block comment>
recognize(pair(
tag("#|"),
many_till(alt((block, recognize(anychar))), tag("|#")),
))(inp)
}
pub fn datum(inp: &str) -> IResult<&str, &str> {
// <datum comment> -> `#;' <datum>
recognize(pair(tag("#;"), super::datum::parse))(inp)
}
pub fn any(inp: &str) -> IResult<&str, &str> {
// <comment> ->
// | <line comment>
// | <block comment>
// | <datum comment>
alt((line, block, datum))(inp)
}
#[test]
fn line_test() {
use nom::error::ErrorKind;
let inp = "; test";
assert_eq!(line(inp), Ok(("", inp)));
let inp = "; test\n\r\n\r\n";
assert_eq!(line(inp), Ok(("", inp)));
let inp = " test\n\r\n\r\n";
assert!(line(inp).is_err());
}
#[test]
fn block_test() {
use nom::error::ErrorKind;
let inp = "#| test |#";
assert_eq!(block(inp), Ok(("", inp)));
let inp = "#| #| test |# |#";
assert_eq!(block(inp), Ok(("", inp)));
let inp = "#test |# |#";
assert!(block(inp).is_err());
let inp = "#| test #";
assert!(block(inp).is_err());
}
// TODO: datum test
// EOF

53
source/vire/parser/datum.rs

@ -0,0 +1,53 @@
use super::IResult;
use nom::{
branch::alt,
combinator::all_consuming,
error::VerboseError,
multi::many0,
sequence::delimited,
};
#[derive(Debug, PartialEq)]
pub enum Datum {
BoolF,
BoolT,
Chara(char),
Integ(i64),
Listv(Box<Datum>, Box<Datum>),
Nullv,
Strng(String),
Symbl(String),
}
impl Datum {
pub fn parse(inp: &str) -> Result<Vec<Self>, nom::Err<VerboseError<&str>>> {
match all_consuming(many0(self::parse))(inp) {
Ok((_, data)) => Ok(data),
Err(res) => Err(res),
}
}
}
pub fn parse(inp: &str) -> IResult<&str, Datum> {
// <datum> ->
// | <boolean>
// | <character>
// | <symbol>
// | <string>
// | <integer>
// | <list>
delimited(
super::space::any0,
alt((
super::boolv::parse,
super::chara::parse,
super::symbl::parse,
super::strng::parse,
super::integ::parse,
super::listv::parse,
)),
super::space::any0,
)(inp)
}
// EOF

67
source/vire/parser/integ.rs

@ -1,12 +1,11 @@
use super::{datum::Datum, IResult};
use nom::{
IResult,
branch::alt,
bytes::complete::{tag, take_while1},
character::complete::char,
combinator::{flat_map, map, map_parser, opt},
error::ErrorKind,
error::{ErrorKind, VerboseErrorKind},
};
use super::Datum;
#[derive(Clone, Copy)]
enum Radix {
@ -71,14 +70,12 @@ fn radix(inp: &str) -> IResult<&str, Radix> {
// <radix 10> -> `#d' | <empty>
// <radix 16> -> `#x'
map(
opt(
alt((
map(tag("#b"), |_| Radix::Base2),
map(tag("#o"), |_| Radix::Base8),
map(tag("#d"), |_| Radix::Base10),
map(tag("#x"), |_| Radix::Base16),
)),
),
opt(alt((
map(tag("#b"), |_| Radix::Base2),
map(tag("#o"), |_| Radix::Base8),
map(tag("#d"), |_| Radix::Base10),
map(tag("#x"), |_| Radix::Base16),
))),
|v| v.unwrap_or(Radix::Base10),
)(inp)
}
@ -87,7 +84,7 @@ fn signed(inp: &str) -> IResult<&str, i8> {
// <sign> ->
// | <empty>
// | `-'
map(opt(char('-')), |v| if v.is_none() {1} else {-1})(inp)
map(opt(char('-')), |v| if v.is_none() { 1 } else { -1 })(inp)
}
fn int_parse(
@ -96,27 +93,27 @@ fn int_parse(
sign: i8,
) -> Result<Datum, std::num::ParseIntError> {
Ok(Datum::Integ(
i64::from_str_radix(inp, rad.into())?.wrapping_mul(sign.into())
i64::from_str_radix(inp, rad.into())?.wrapping_mul(sign.into()),
))
}
pub fn parse(inp: &str) -> IResult<&str, Datum> {
// <integer r> -> <radix r> <sign> <digit r>+
flat_map(
radix,
|rad| flat_map(
signed,
move |sign| map_parser(
take_while1(move |ch| rad.matches(ch)),
move |inp| {
match int_parse(inp, rad, sign) {
Ok(v) => Ok(("", v)),
Err(_) => Err(nom::Err::Failure((inp, ErrorKind::MapRes))),
}
},
),
),
)(inp)
// <* integer r> -> <radix r> <sign> <digit r>+
super::token::delim(flat_map(radix, |rad| {
flat_map(signed, move |sign| {
map_parser(take_while1(move |ch| rad.matches(ch)), move |inp| {
match int_parse(inp, rad, sign) {
Ok(v) => Ok(("", v)),
Err(_) => Err(nom::Err::Error(nom::error::VerboseError {
errors: vec![(
inp,
VerboseErrorKind::Nom(ErrorKind::MapRes),
)],
}))
}
})
})
}))(inp)
}
#[test]
@ -141,15 +138,9 @@ fn test() {
parse("#x-4000000000000000"),
Ok(("", Datum::Integ(-4611686018427387904))),
);
assert_eq!(
parse("#xFFFFFFFFFFFFFFFF"),
Err(nom::Err::Failure(("FFFFFFFFFFFFFFFF", ErrorKind::MapRes))),
);
assert_eq!(
parse("garbage"),
Err(nom::Err::Error(("garbage", ErrorKind::TakeWhile1))),
);
assert_eq!(parse(""), Err(nom::Err::Error(("", ErrorKind::TakeWhile1))));
assert!(parse("#xFFFFFFFFFFFFFFFF").is_err());
assert!(parse("garbage").is_err());
assert!(parse("").is_err());
}
// EOF

121
source/vire/parser/listv.rs

@ -1,72 +1,65 @@
use nom::{
use super::{
datum::{self, Datum},
token::{brk1c, brk1o, brk3c, brk3o, dotls},
IResult,
};
use nom::{
branch::alt,
character::complete::{char, multispace0, multispace1},
combinator::map,
multi::separated_list,
multi::many0,
sequence::{delimited, separated_pair},
};
use super::{Datum, datum};
fn skip_space<F, I, O, E>(sep: F) -> impl Fn(I) -> IResult<I, O, E>
fn sexpr<'a, F, G>(
dl_st: F,
dl_en: G,
) -> impl Fn(&'a str) -> IResult<&'a str, Datum>
where
F: Fn(I) -> IResult<I, O, E>,
I: nom::InputTakeAtPosition,
<I as nom::InputTakeAtPosition>::Item: nom::AsChar + Clone,
E: nom::error::ParseError<I>,
F: Fn(&'a str) -> IResult<&'a str, char>,
G: Fn(&'a str) -> IResult<&'a str, char>,
{
delimited(multispace0, sep, multispace0)
}
// <* dot> -> `.'
fn sexpr<'a>(
dl_st: char,
dl_en: char,
) -> impl Fn(&'a str) -> IResult<&'a str, Datum> {
// <s-expression> ->
// | `(' <datum> <dot> <datum> `)'
// | `[' <datum> <dot> <datum> `]'
map(
delimited(
skip_space(char(dl_st)),
separated_pair(
datum,
delimited(multispace1, char('.'), multispace1),
datum,
),
skip_space(char(dl_en)),
dl_st,
separated_pair(datum::parse, dotls, datum::parse),
dl_en,
),
|(lhs, rhs)| Datum::Listv(Box::new(lhs), Box::new(rhs)),
)
}
fn list<'a>(
dl_st: char,
dl_en: char,
) -> impl Fn(&'a str) -> IResult<&'a str, Datum> {
map(
delimited(
char(dl_st),
separated_list(multispace1, datum),
char(dl_en),
),
|vec| {
let mut data = Datum::Nullv;
for lhs in vec.into_iter().rev() {
data = Datum::Listv(Box::new(lhs), Box::new(data));
}
data
},
)
fn list<'a, F, G>(
dl_st: F,
dl_en: G,
) -> impl Fn(&'a str) -> IResult<&'a str, Datum>
where
F: Fn(&'a str) -> IResult<&'a str, char>,
G: Fn(&'a str) -> IResult<&'a str, char>,
{
// <lisp list> ->
// | `(' <datum>* `)'
// | `[' <datum>* `]'
map(delimited(dl_st, many0(datum::parse), dl_en), |vec| {
let mut data = Datum::Nullv;
for lhs in vec.into_iter().rev() {
data = Datum::Listv(Box::new(lhs), Box::new(data));
}
data
})
}
pub fn parse(inp: &str) -> IResult<&str, Datum> {
// <list> ->
// | `(' <datum> `.' <datum> `)'
// | `[' <datum> `.' <datum> `]'
// | `(' <datum>* `)'
// | `[' <datum>* `]'
// <list> -> <s-expression> | <lisp list>
alt((
sexpr('(', ')'),
sexpr('[', ']'),
list('(', ')'),
list('[', ']'),
sexpr(brk1o, brk1c),
sexpr(brk3o, brk3c),
list(brk1o, brk1c),
list(brk3o, brk3c),
))(inp)
}
@ -111,6 +104,36 @@ fn test() {
)),
),
);
let in_sexpr = parse(
r#"(define . ((main . ()) . ((println . ("hello, world" . ())) . ())))"#
).unwrap();
let in_listv =
parse(r#"(define (main) (println "hello, world"))"#).unwrap();
assert_eq!(in_sexpr, in_listv);
assert_eq!(in_sexpr.0, "");
let in_sexpr = in_sexpr.1;
assert_eq!(
in_sexpr,
Datum::Listv(
Box::new(Datum::Symbl("define".to_owned())),
Box::new(Datum::Listv(
Box::new(Datum::Listv(
Box::new(Datum::Symbl("main".to_owned())),
Box::new(Datum::Nullv),
)),
Box::new(Datum::Listv(
Box::new(Datum::Listv(
Box::new(Datum::Symbl("println".to_owned())),
Box::new(Datum::Listv(
Box::new(Datum::Strng("hello, world".to_owned())),
Box::new(Datum::Nullv),
)),
)),
Box::new(Datum::Nullv),
)),
)),
),
);
}
// EOF

56
source/vire/parser/space.rs

@ -0,0 +1,56 @@
use super::IResult;
use nom::{
branch::alt,
bytes::complete::{is_a, is_not},
combinator::recognize,
error::{ErrorKind, VerboseErrorKind},
multi::many0_count,
sequence::terminated,
};
fn space(inp: &str) -> IResult<&str, &str> {
// <space> -> <U+9> ... <U+D> | <U+20>
is_a("\x09\x0a\x0b\x0c\x0d\x20")(inp)
}
pub fn line_ending(inp: &str) -> IResult<&str, &str> {
// <line ending> -> <U+A> | <U+D> | <end of file>
is_a("\x0a\x0d")(inp)
}
pub fn not_line_ending(inp: &str) -> IResult<&str, &str> {
is_not("\x0a\x0d")(inp)
}
fn any(inp: &str) -> IResult<&str, &str> {
// <any space kind> ->
// | <space>
// | <line ending>
// | <comment>
alt((space, line_ending, super::cment::any))(inp)
}
pub fn any0(inp: &str) -> IResult<&str, &str> {
// <any space> -> <any space kind>*
alt((eof, recognize(many0_count(any))))(inp)
}
pub fn delim<'a, F, O>(mid: F) -> impl Fn(&'a str) -> IResult<&'a str, O>
where
F: Fn(&'a str) -> IResult<&'a str, O>,
{
terminated(mid, any0)
}
pub fn eof(inp: &str) -> IResult<&str, &str> {
// <end of file>
if inp.is_empty() {
Ok((inp, inp))
} else {
Err(nom::Err::Error(nom::error::VerboseError {
errors: vec![(inp, VerboseErrorKind::Nom(ErrorKind::Eof))],
}))
}
}
// EOF

41
source/vire/parser/strng.rs

@ -1,36 +1,31 @@
use super::{chara::unicode, datum::Datum, IResult};
use nom::{
IResult,
branch::alt,
bytes::complete::{escaped_transform, is_not, tag},
character::complete::char,
combinator::{complete, cut, map},
sequence::delimited,
};
use super::{Datum, chara::unicode};
pub fn parse(inp: &str) -> IResult<&str, Datum> {
// <string> -> `"' <string character>* `"'
// <string> -> `"' <string character>* `"' <any space>
// <string character> ->
// | <any character except `"' or `\'>
// | `\\' | `\"'
// | `\U{' <digit 16>+ `}'
map(
complete(delimited(
super::space::delim(complete(delimited(
char('"'),
cut(escaped_transform(
is_not(r#"\""#),
'\\',
|inp| {
alt((
char('\\'),
char('"'),
delimited(tag("U{"), unicode, char('}')),
))(inp)
},
)),
cut(escaped_transform(is_not(r#"\""#), '\\', |inp| {
alt((
char('\\'),
char('"'),
delimited(tag("U{"), unicode, char('}')),
))(inp)
})),
char('"'),
)),
))),
|t| Datum::Strng(t),
)(inp)
}
@ -43,16 +38,10 @@ fn test() {
parse(r#""\U{d}\U{a}b\\\"\U{a}\U{9}b\U{d}ab\U{2764}""#),
Ok(("", Datum::Strng("\r\nb\\\"\n\tb\rab❤".to_owned()))),
);
assert_eq!(parse(""), Err(nom::Err::Error(("", ErrorKind::Char))));
assert_eq!(parse("\""), Err(nom::Err::Error(("", ErrorKind::Char))));
assert_eq!(
parse(r#""test\r\n""#),
Err(nom::Err::Failure((r#"r\n""#, ErrorKind::Tag))),
);
assert_eq!(
parse(r#""\U{1234567890}""#),
Err(nom::Err::Failure((r#"1234567890}""#, ErrorKind::MapRes))),
);
assert!(parse("").is_err());
assert!(parse("\"").is_err());
assert!(parse(r#""test\r\n""#).is_err());
assert!(parse(r#""\U{1234567890}""#).is_err());
}
// EOF

15
source/vire/parser/symbl.rs

@ -1,12 +1,11 @@
use super::{datum::Datum, IResult};
use nom::{
IResult,
branch::alt,
bytes::complete::{tag, take_while},
character::complete::anychar,
character::complete::{anychar, char},
combinator::{map, map_opt, recognize},
sequence::pair,
};
use super::Datum;
fn init_c(ch: char) -> Option<char> {
// <symbol initial> ->
@ -50,15 +49,15 @@ pub fn parse(inp: &str) -> IResult<&str, Datum> {
// <symbol> ->
// | `+' | `-' | `...'
// | <symbol initial> <symbol subsequent>*
map(
super::token::delim(map(
alt((
tag("+"),
tag("-"),
recognize(char('+')),
recognize(char('-')),
tag("..."),
recognize(pair(init, subs)),
)),
|t| Datum::Symbl(t.to_owned()),
)(inp)
))(inp)
}
#[test]
@ -73,7 +72,7 @@ fn test() {
assert_eq!(parse("a-b"), Ok(("", Datum::Symbl("a-b".to_owned()))));
assert_eq!(parse("a@b"), Ok(("", Datum::Symbl("a@b".to_owned()))));
assert_eq!(parse("❤"), Ok(("", Datum::Symbl("❤".to_owned()))));
assert_eq!(parse("@a"), Err(nom::Err::Error(("@a", ErrorKind::MapOpt))));
assert!(parse("@a").is_err());
}
// EOF

54
source/vire/parser/token.rs

@ -0,0 +1,54 @@
use super::{space, IResult};
use nom::{
branch::alt,
character::complete::char,
combinator::{cut, peek, recognize},
sequence::terminated,
};
pub fn brk1o(inp: &str) -> IResult<&str, char> {
space::delim(char('('))(inp)
}
pub fn brk1c(inp: &str) -> IResult<&str, char> {
space::delim(char(')'))(inp)
}
pub fn brk3o(inp: &str) -> IResult<&str, char> {
space::delim(char('['))(inp)
}
pub fn brk3c(inp: &str) -> IResult<&str, char> {
space::delim(char(']'))(inp)
}
pub fn dotls(inp: &str) -> IResult<&str, char> {
delim(char('.'))(inp)
}
pub fn delim<'a, F, O>(mid: F) -> impl Fn(&'a str) -> IResult<&'a str, O>
where
F: Fn(&'a str) -> IResult<&'a str, O>,
{
// <* any> ->
// | <any> <delimiter (not token yet)>
// | <any> <any space>
// <delimiter> -> `(' | `)' | `[' | `]' | `"' | `#'
terminated(
mid,
cut(alt((
space::eof,
space::any0,
peek(recognize(alt((
char('('),
char(')'),
char('['),
char(']'),
char('"'),
char('#'),
)))),
))),
)
}
// EOF
Loading…
Cancel
Save